1
0
mirror of https://github.com/vikstrous/pirate-get synced 2025-01-09 09:59:51 +01:00

handle mirrors with fake results table

This commit is contained in:
Michele Guerini Rocco 2019-11-24 11:01:58 +01:00
parent 757231a188
commit 6642130fc5
Signed by: rnhmjoj
GPG Key ID: BFBAF4C975F76450

View File

@ -80,14 +80,12 @@ def build_request_path(page, category, sort, mode, terms):
# this returns a list of dictionaries
def parse_page(html):
soup = BeautifulSoup(html, 'html.parser')
table = soup.find('table', id='searchResult')
results = []
tables = soup.find_all('table', id='searchResult')
no_results = re.search(r'No hits\. Try adding an asterisk in '
r'you search phrase\.', html)
# check for a blocked mirror
if not table and not no_results:
if not tables and not no_results:
# Contradiction - we found no results,
# but the page didn't say there were no results.
# The page is probably not actually the pirate bay,
@ -95,7 +93,21 @@ def parse_page(html):
raise IOError('Blocked mirror detected.')
if no_results:
return results
return []
# handle ads disguised as fake result tables
for table in tables:
results = parse_table(table)
if results:
break
else:
raise IOError('Mirror does not contain magnets.')
return results
def parse_table(table):
results = []
# parse the rows one by one (skipping headings)
for row in table('tr')[1:]:
@ -103,11 +115,14 @@ def parse_page(html):
row_link = row.find('a', class_='detLink')
if row_link is None:
continue
id_ = row_link['href'].split('/')[2]
seeds, leechers = [i.text for i in row('td')[-2:]]
magnet = row.find(lambda tag:
tag.name == 'a' and
tag['href'].startswith('magnet'))['href']
magnet_tag = row.find(lambda tag: tag.name == 'a' and
tag['href'].startswith('magnet'))
if magnet_tag is None:
continue
magnet = magnet_tag['href']
# parse descriptions separately
description = row.find('font', class_='detDesc').text
@ -203,6 +218,7 @@ def save_magnets(printer, chosen_links, results, folder):
with open(file, 'w') as f:
f.write(magnet + '\n')
def copy_magnets(printer, chosen_links, results):
clipboard_text = ''
for link in chosen_links: