mirror of
https://github.com/vikstrous/pirate-get
synced 2025-01-10 10:04:21 +01:00
handle mirrors with fake results table
This commit is contained in:
parent
757231a188
commit
6642130fc5
@ -80,14 +80,12 @@ def build_request_path(page, category, sort, mode, terms):
|
|||||||
# this returns a list of dictionaries
|
# this returns a list of dictionaries
|
||||||
def parse_page(html):
|
def parse_page(html):
|
||||||
soup = BeautifulSoup(html, 'html.parser')
|
soup = BeautifulSoup(html, 'html.parser')
|
||||||
table = soup.find('table', id='searchResult')
|
tables = soup.find_all('table', id='searchResult')
|
||||||
|
|
||||||
results = []
|
|
||||||
no_results = re.search(r'No hits\. Try adding an asterisk in '
|
no_results = re.search(r'No hits\. Try adding an asterisk in '
|
||||||
r'you search phrase\.', html)
|
r'you search phrase\.', html)
|
||||||
|
|
||||||
# check for a blocked mirror
|
# check for a blocked mirror
|
||||||
if not table and not no_results:
|
if not tables and not no_results:
|
||||||
# Contradiction - we found no results,
|
# Contradiction - we found no results,
|
||||||
# but the page didn't say there were no results.
|
# but the page didn't say there were no results.
|
||||||
# The page is probably not actually the pirate bay,
|
# The page is probably not actually the pirate bay,
|
||||||
@ -95,7 +93,21 @@ def parse_page(html):
|
|||||||
raise IOError('Blocked mirror detected.')
|
raise IOError('Blocked mirror detected.')
|
||||||
|
|
||||||
if no_results:
|
if no_results:
|
||||||
return results
|
return []
|
||||||
|
|
||||||
|
# handle ads disguised as fake result tables
|
||||||
|
for table in tables:
|
||||||
|
results = parse_table(table)
|
||||||
|
if results:
|
||||||
|
break
|
||||||
|
else:
|
||||||
|
raise IOError('Mirror does not contain magnets.')
|
||||||
|
|
||||||
|
return results
|
||||||
|
|
||||||
|
|
||||||
|
def parse_table(table):
|
||||||
|
results = []
|
||||||
|
|
||||||
# parse the rows one by one (skipping headings)
|
# parse the rows one by one (skipping headings)
|
||||||
for row in table('tr')[1:]:
|
for row in table('tr')[1:]:
|
||||||
@ -103,11 +115,14 @@ def parse_page(html):
|
|||||||
row_link = row.find('a', class_='detLink')
|
row_link = row.find('a', class_='detLink')
|
||||||
if row_link is None:
|
if row_link is None:
|
||||||
continue
|
continue
|
||||||
|
|
||||||
id_ = row_link['href'].split('/')[2]
|
id_ = row_link['href'].split('/')[2]
|
||||||
seeds, leechers = [i.text for i in row('td')[-2:]]
|
seeds, leechers = [i.text for i in row('td')[-2:]]
|
||||||
magnet = row.find(lambda tag:
|
magnet_tag = row.find(lambda tag: tag.name == 'a' and
|
||||||
tag.name == 'a' and
|
tag['href'].startswith('magnet'))
|
||||||
tag['href'].startswith('magnet'))['href']
|
if magnet_tag is None:
|
||||||
|
continue
|
||||||
|
magnet = magnet_tag['href']
|
||||||
|
|
||||||
# parse descriptions separately
|
# parse descriptions separately
|
||||||
description = row.find('font', class_='detDesc').text
|
description = row.find('font', class_='detDesc').text
|
||||||
@ -203,6 +218,7 @@ def save_magnets(printer, chosen_links, results, folder):
|
|||||||
with open(file, 'w') as f:
|
with open(file, 'w') as f:
|
||||||
f.write(magnet + '\n')
|
f.write(magnet + '\n')
|
||||||
|
|
||||||
|
|
||||||
def copy_magnets(printer, chosen_links, results):
|
def copy_magnets(printer, chosen_links, results):
|
||||||
clipboard_text = ''
|
clipboard_text = ''
|
||||||
for link in chosen_links:
|
for link in chosen_links:
|
||||||
|
Loading…
Reference in New Issue
Block a user