handle mirrors with fake results table

2025-01-10 10:04:21 +01:00 · 2019-11-24 11:01:58 +01:00 · 2019-11-24 11:01:58 +01:00 · 6642130fc5
commit 6642130fc5
parent 757231a188
1 changed files with 24 additions and 8 deletions
--- a/pirate/torrent.py
+++ b/pirate/torrent.py
@ -80,14 +80,12 @@ def build_request_path(page, category, sort, mode, terms):
 # this returns a list of dictionaries
 def parse_page(html):
    soup = BeautifulSoup(html, 'html.parser')
-    table = soup.find('table', id='searchResult')
+    tables = soup.find_all('table', id='searchResult')
    results = []
    no_results = re.search(r'No hits\. Try adding an asterisk in '
                           r'you search phrase\.', html)
    # check for a blocked mirror
-    if not table and not no_results:
+    if not tables and not no_results:
        # Contradiction - we found no results,
        # but the page didn't say there were no results.
        # The page is probably not actually the pirate bay,
@ -95,19 +93,36 @@ def parse_page(html):
        raise IOError('Blocked mirror detected.')
    if no_results:
        return []
    # handle ads disguised as fake result tables
    for table in tables:
        results = parse_table(table)
        if results:
            break
    else:
        raise IOError('Mirror does not contain magnets.')
    return results
 def parse_table(table):
    results = []
    # parse the rows one by one (skipping headings)
    for row in table('tr')[1:]:
        # grab info about the row
        row_link = row.find('a', class_='detLink')
        if row_link is None:
            continue
        id_ = row_link['href'].split('/')[2]
        seeds, leechers = [i.text for i in row('td')[-2:]]
-        magnet = row.find(lambda tag:
+        magnet_tag = row.find(lambda tag: tag.name == 'a' and
-                          tag.name == 'a' and
+                              tag['href'].startswith('magnet'))
-                          tag['href'].startswith('magnet'))['href']
+        if magnet_tag is None:
            continue
        magnet = magnet_tag['href']
        # parse descriptions separately
        description = row.find('font', class_='detDesc').text
@ -203,6 +218,7 @@ def save_magnets(printer, chosen_links, results, folder):
        with open(file, 'w') as f:
            f.write(magnet + '\n')
 def copy_magnets(printer, chosen_links, results):
    clipboard_text = ''
    for link in chosen_links: