From a430fab14cc5d98da14bd078fa035aa0e78273d5 Mon Sep 17 00:00:00 2001 From: Viktor Stanchev Date: Thu, 3 Sep 2015 00:05:33 -0700 Subject: [PATCH 01/12] organize argument passing to torrent.py --- pirate/pirate.py | 20 +++++++++++++-- pirate/torrent.py | 64 ++++++++++++++++++++++++++++------------------- 2 files changed, 56 insertions(+), 28 deletions(-) diff --git a/pirate/pirate.py b/pirate/pirate.py index b3c6383..c526f17 100755 --- a/pirate/pirate.py +++ b/pirate/pirate.py @@ -123,6 +123,16 @@ def main(): help='disable colored output') args = parser.parse_args() + # figure out the mode - browse, search, top or recent + if args.browse: + args.mode = 'browse' + elif args.recent: + args.mode = 'recent' + elif len(args.search) == 0: + args.mode = 'top' + else: + args.mode = 'search' + if (config.getboolean('Misc', 'colors') and not args.color or not config.getboolean('Misc', 'colors')): pirate.data.colored_output = False @@ -182,8 +192,14 @@ def main(): for mirror in mirrors: try: print('Trying', mirror, end='... ') - mags, sizes, uploaded, ids = pirate.torrent.remote(args, - mirror) + mags, sizes, uploaded, ids = pirate.torrent.remote( + pages=args.pages, + category=pirate.torrent.parse_category(args.category), + sort=pirate.torrent.parse_sort(args.sort), + mode=args.mode, + terms=args.search, + mirror=mirror + ) except (urllib.error.URLError, socket.timeout, IOError, ValueError): print('Failed', color='WARN') diff --git a/pirate/torrent.py b/pirate/torrent.py index 6699b88..3676dce 100644 --- a/pirate/torrent.py +++ b/pirate/torrent.py @@ -11,52 +11,64 @@ from pirate.print import print from io import BytesIO + +def parse_category(category): + if str(category) in pirate.data.categories.values(): + return category + elif category in pirate.data.categories.keys(): + return pirate.data.categories[category] + else: + print('Invalid category ignored', color='WARN') + return '0' + + +def parse_sort(sort): + if str(sort) in pirate.data.sorts.values(): + return sort + elif sort in pirate.data.sorts.keys(): + return pirate.data.sorts[sort] + else: + print('Invalid sort ignored', color='WARN') + return '99' + + #todo: redo this with html parser instead of regex -def remote(args, mirror): +def remote(pages, category, sort, mode, terms, mirror): res_l = [] - pages = int(args.pages) + pages = int(pages) if pages < 1: raise ValueError('Please provide an integer greater than 0 ' 'for the number of pages to fetch.') - if str(args.category) in pirate.data.categories.values(): - category = args.category - elif args.category in pirate.data.categories.keys(): - category = pirate.data.categories[args.category] - else: - category = '0' - print('Invalid category ignored', color='WARN') - - if str(args.sort) in pirate.data.sorts.values(): - sort = args.sort - elif args.sort in pirate.data.sorts.keys(): - sort = pirate.data.sorts[args.sort] - else: - sort = '99' - print('Invalid sort ignored', color='WARN') # Catch the Ctrl-C exception and exit cleanly try: sizes = [] uploaded = [] identifiers = [] for page in range(pages): - if args.browse: + if mode == 'browse': path = '/browse/' if(category == 0): category = 100 - path = '/browse/' + '/'.join(str(i) for i in ( - category, page, sort)) - elif len(args.search) == 0: - path = '/top/48h' if args.recent else '/top/' + path = '/browse/{}/{}/{}'.format(category, page, sort) + elif mode == 'recent': + # This is not a typo. There is no / between 48h and the category. + path = '/top/48h' if(category == 0): path += 'all' else: path += str(category) + elif mode == 'top': + path = '/top/' + if(category == 0): + path += 'all' + else: + path += str(category) + elif mode == 'search': + query = urllib.parse.quote_plus(' '.join(terms)) + path = '/search/{}/{}/{}/{}'.format(query, page, sort, category) else: - path = '/search/' + '/'.join(str(i) for i in ( - '+'.join(args.search), - page, sort, - category)) + raise Exception('Unknown mode.') req = request.Request(mirror + path, headers=pirate.data.default_headers) From 4eb034e925ed2d9806b2d56b9e1fb44d916df661 Mon Sep 17 00:00:00 2001 From: Viktor Stanchev Date: Thu, 3 Sep 2015 00:18:11 -0700 Subject: [PATCH 02/12] fix bugs with categories and sorts --- pirate/torrent.py | 19 ++++++++++++++++--- 1 file changed, 16 insertions(+), 3 deletions(-) diff --git a/pirate/torrent.py b/pirate/torrent.py index 3676dce..413e1bc 100644 --- a/pirate/torrent.py +++ b/pirate/torrent.py @@ -13,7 +13,11 @@ from io import BytesIO def parse_category(category): - if str(category) in pirate.data.categories.values(): + try: + category = int(category) + except ValueError: + pass + if category in pirate.data.categories.values(): return category elif category in pirate.data.categories.keys(): return pirate.data.categories[category] @@ -23,7 +27,11 @@ def parse_category(category): def parse_sort(sort): - if str(sort) in pirate.data.sorts.values(): + try: + sort = int(sort) + except ValueError: + pass + if sort in pirate.data.sorts.values(): return sort elif sort in pirate.data.sorts.keys(): return pirate.data.sorts[sort] @@ -32,7 +40,11 @@ def parse_sort(sort): return '99' -#todo: redo this with html parser instead of regex +#TODO: redo this with html parser instead of regex +#TODO: warn users when using a sort in a mode that doesn't accept sorts +#TODO: warn users when using search terms in a mode that doesn't accept search terms +#TODO: same with page parameter for top and top48h +#TODO: warn the user if trying to use a minor category with top48h def remote(pages, category, sort, mode, terms, mirror): res_l = [] pages = int(pages) @@ -54,6 +66,7 @@ def remote(pages, category, sort, mode, terms, mirror): elif mode == 'recent': # This is not a typo. There is no / between 48h and the category. path = '/top/48h' + # only major categories can be used with this mode if(category == 0): path += 'all' else: From 0ad94fca46f867b1316bb486df1b790af20af525 Mon Sep 17 00:00:00 2001 From: Viktor Stanchev Date: Thu, 3 Sep 2015 00:25:17 -0700 Subject: [PATCH 03/12] extract parse_magnets_seeds_leachers function --- pirate/torrent.py | 43 ++++++++++++++++++++++++++----------------- 1 file changed, 26 insertions(+), 17 deletions(-) diff --git a/pirate/torrent.py b/pirate/torrent.py index 413e1bc..e2e7eca 100644 --- a/pirate/torrent.py +++ b/pirate/torrent.py @@ -12,6 +12,9 @@ from pirate.print import print from io import BytesIO +parser_regex = r'"(magnet\:\?xt=[^"]*)|([^<]+)' + + def parse_category(category): try: category = int(category) @@ -40,6 +43,25 @@ def parse_sort(sort): return '99' +def parse_magnets_seeds_leechers(found): + res = [] + state = 'seeds' + curr = ['', 0, 0] #magnet, seeds, leeches + for f in found: + if f[1] == '': + curr[0] = f[0] + else: + if state == 'seeds': + curr[1] = f[1] + state = 'leeches' + else: + curr[2] = f[1] + state = 'seeds' + res.append(curr) + curr = ['', 0, 0] + return res + + #TODO: redo this with html parser instead of regex #TODO: warn users when using a sort in a mode that doesn't accept sorts #TODO: warn users when using search terms in a mode that doesn't accept search terms @@ -90,8 +112,7 @@ def remote(pages, category, sort, mode, terms, mirror): if f.info().get('Content-Encoding') == 'gzip': f = gzip.GzipFile(fileobj=BytesIO(f.read())) res = f.read().decode('utf-8') - found = re.findall(r'"(magnet\:\?xt=[^"]*)|' - r'([^<]+)', res) + found = re.findall(parser_regex, res) # check for a blocked mirror no_results = re.search(r'No hits\. Try adding an asterisk in ' @@ -104,6 +125,7 @@ def remote(pages, category, sort, mode, terms, mirror): raise IOError('Blocked mirror detected.') # get sizes as well and substitute the   character + # TODO: use actual html decode sizes.extend([match.replace(' ', ' ').split() for match in re.findall(r'(?<=Size )[0-9.]' r'+\ \;[KMGT]*[i ]*B', res)]) @@ -116,20 +138,8 @@ def remote(pages, category, sort, mode, terms, mirror): for match in re.findall('(?<=/torrent/)' '[0-9]+(?=/)',res)]) - state = 'seeds' - curr = ['', 0, 0] #magnet, seeds, leeches - for f in found: - if f[1] == '': - curr[0] = f[0] - else: - if state == 'seeds': - curr[1] = f[1] - state = 'leeches' - else: - curr[2] = f[1] - state = 'seeds' - res_l.append(curr) - curr = ['', 0, 0] + res_l += parse_magnets_seeds_leechers(found) + except KeyboardInterrupt : print('\nCancelled.') sys.exit(0) @@ -138,7 +148,6 @@ def remote(pages, category, sort, mode, terms, mirror): return res_l, sizes, uploaded, identifiers - def get_torrent(info_hash): url = 'http://torcache.net/torrent/{:X}.torrent' req = request.Request(url.format(info_hash), From cecc8ba68b9d949ba463fdcf4364c0f3999ab07c Mon Sep 17 00:00:00 2001 From: Viktor Stanchev Date: Thu, 3 Sep 2015 20:25:24 -0700 Subject: [PATCH 04/12] factor out page parser --- pirate/pirate.py | 2 +- pirate/torrent.py | 128 +++++++++++++++++++++++++--------------------- 2 files changed, 72 insertions(+), 58 deletions(-) diff --git a/pirate/pirate.py b/pirate/pirate.py index c526f17..674ca46 100755 --- a/pirate/pirate.py +++ b/pirate/pirate.py @@ -91,7 +91,7 @@ def main(): help='list Sortable Types') parser.add_argument('-L', '--local', dest='database', help='an xml file containing the Pirate Bay database') - parser.add_argument('-p', dest='pages', default=1, + parser.add_argument('-p', dest='pages', default=1, type=int, help='the number of pages to fetch ' "(doesn't work with --local)") parser.add_argument('-0', dest='first', diff --git a/pirate/torrent.py b/pirate/torrent.py index e2e7eca..83b6202 100644 --- a/pirate/torrent.py +++ b/pirate/torrent.py @@ -62,48 +62,83 @@ def parse_magnets_seeds_leechers(found): return res -#TODO: redo this with html parser instead of regex #TODO: warn users when using a sort in a mode that doesn't accept sorts #TODO: warn users when using search terms in a mode that doesn't accept search terms #TODO: same with page parameter for top and top48h #TODO: warn the user if trying to use a minor category with top48h +def build_request_path(page, category, sort, mode, terms): + if mode == 'browse': + if(category == 0): + category = 100 + return '/browse/{}/{}/{}'.format(category, page, sort) + elif mode == 'recent': + # This is not a typo. There is no / between 48h and the category. + path = '/top/48h' + # only major categories can be used with this mode + if(category == 0): + return path + 'all' + else: + return path + str(category) + elif mode == 'top': + path = '/top/' + if(category == 0): + return path + 'all' + else: + return path + str(category) + elif mode == 'search': + query = urllib.parse.quote_plus(' '.join(terms)) + return '/search/{}/{}/{}/{}'.format(query, page, sort, category) + else: + raise Exception('Unknown mode.') + + +#TODO: redo this with html parser instead of regex +def parse_page(res): + found = re.findall(parser_regex, res) + + # check for a blocked mirror + no_results = re.search(r'No hits\. Try adding an asterisk in ' + r'you search phrase\.', res) + if found == [] and no_results is None: + # Contradiction - we found no results, + # but the page didn't say there were no results. + # The page is probably not actually the pirate bay, + # so let's try another mirror + raise IOError('Blocked mirror detected.') + + # get sizes as well and substitute the   character + # TODO: use actual html decode + sizes = [match.replace(' ', ' ').split() + for match in re.findall(r'(?<=Size )[0-9.]' + r'+\ \;[KMGT]*[i ]*B', res)] + + uploaded = [match.replace(' ', ' ') + for match in re.findall(r'(?<=Uploaded )' + r'.+(?=\, Size)',res)] + + identifiers = [match.replace(' ', ' ') + for match in re.findall('(?<=/torrent/)' + '[0-9]+(?=/)',res)] + + res_l = parse_magnets_seeds_leechers(found) + + return res_l, sizes, uploaded, identifiers + + def remote(pages, category, sort, mode, terms, mirror): res_l = [] - pages = int(pages) + sizes = [] + uploaded = [] + identifiers = [] + if pages < 1: raise ValueError('Please provide an integer greater than 0 ' 'for the number of pages to fetch.') # Catch the Ctrl-C exception and exit cleanly try: - sizes = [] - uploaded = [] - identifiers = [] for page in range(pages): - if mode == 'browse': - path = '/browse/' - if(category == 0): - category = 100 - path = '/browse/{}/{}/{}'.format(category, page, sort) - elif mode == 'recent': - # This is not a typo. There is no / between 48h and the category. - path = '/top/48h' - # only major categories can be used with this mode - if(category == 0): - path += 'all' - else: - path += str(category) - elif mode == 'top': - path = '/top/' - if(category == 0): - path += 'all' - else: - path += str(category) - elif mode == 'search': - query = urllib.parse.quote_plus(' '.join(terms)) - path = '/search/{}/{}/{}/{}'.format(query, page, sort, category) - else: - raise Exception('Unknown mode.') + path = build_request_path(page, category, sort, mode, terms) req = request.Request(mirror + path, headers=pirate.data.default_headers) @@ -112,39 +147,18 @@ def remote(pages, category, sort, mode, terms, mirror): if f.info().get('Content-Encoding') == 'gzip': f = gzip.GzipFile(fileobj=BytesIO(f.read())) res = f.read().decode('utf-8') - found = re.findall(parser_regex, res) - # check for a blocked mirror - no_results = re.search(r'No hits\. Try adding an asterisk in ' - r'you search phrase\.', res) - if found == [] and no_results is None: - # Contradiction - we found no results, - # but the page didn't say there were no results. - # The page is probably not actually the pirate bay, - # so let's try another mirror - raise IOError('Blocked mirror detected.') + page_res_l, page_sizes, page_uploaded, page_identifiers = parse_page(res) + res_l += page_res_l + sizes += page_sizes + uploaded += page_uploaded + identifiers += page_identifiers - # get sizes as well and substitute the   character - # TODO: use actual html decode - sizes.extend([match.replace(' ', ' ').split() - for match in re.findall(r'(?<=Size )[0-9.]' - r'+\ \;[KMGT]*[i ]*B', res)]) - - uploaded.extend([match.replace(' ', ' ') - for match in re.findall(r'(?<=Uploaded )' - r'.+(?=\, Size)',res)]) - - identifiers.extend([match.replace(' ', ' ') - for match in re.findall('(?<=/torrent/)' - '[0-9]+(?=/)',res)]) - - res_l += parse_magnets_seeds_leechers(found) - - except KeyboardInterrupt : + except KeyboardInterrupt: print('\nCancelled.') sys.exit(0) - # return the sizes in a spearate list + # return the sizes in a separate list return res_l, sizes, uploaded, identifiers From d0a9d0f51eba7dd08369edf0bffab0357b162b16 Mon Sep 17 00:00:00 2001 From: Viktor Stanchev Date: Thu, 3 Sep 2015 20:35:12 -0700 Subject: [PATCH 05/12] add a test for parse_page --- tests/data/dan_bull_search.html | 461 ++++++++++++++++++++++++++++++++ tests/{ => data}/rich.xml | 0 tests/test_local.py | 5 +- tests/test_torrent.py | 18 ++ tests/util.py | 8 + 5 files changed, 491 insertions(+), 1 deletion(-) create mode 100644 tests/data/dan_bull_search.html rename tests/{ => data}/rich.xml (100%) create mode 100755 tests/test_torrent.py create mode 100644 tests/util.py diff --git a/tests/data/dan_bull_search.html b/tests/data/dan_bull_search.html new file mode 100644 index 0000000..b7799c8 --- /dev/null +++ b/tests/data/dan_bull_search.html @@ -0,0 +1,461 @@ + + + + The Pirate Bay - The galaxy's most resilient bittorrent site + + + + + + + + + + + + + + + + + + + + + + + + +

Search results: dan bull Displaying hits from 0 to 15 (approx 15 found)

+ +
+ + +
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
Type
Name (Order by: Uploaded, Size, ULed by, SE, LE)
View: Single / Double 
SELE
+
+ Audio
+ (Music) +
+
+ +Magnet linkTrusted + Uploaded 04-04 2014, Size 89.33 MiB, ULed by Capajebo + 161
+
+ Audio
+ (Other) +
+
+ +Magnet linkThis torrent has 1 comments. + Uploaded 03-02 2014, Size 294 MiB, ULed by Vakume + 40
+
+ Audio
+ (Music) +
+
+ +Magnet link + Uploaded 01-19 2013, Size 54.86 MiB, ULed by blowingfish + 20
+
+ Audio
+ (Other) +
+
+ +Magnet linkThis torrent has 11 comments. + Uploaded 01-21 2010, Size 236.78 MiB, ULed by SuperSaru + 10
+
+ Audio
+ (Music) +
+
+ +Magnet link + Uploaded 09-02 2014, Size 36.27 MiB, ULed by Bazookus + 10
+
+ Audio
+ (Music) +
+
+ +Magnet linkThis torrent has 1 comments.VIP + Uploaded 09-27 2009, Size 5.51 MiB, ULed by oneanight + 01
+
+ Audio
+ (Music) +
+
+ +Magnet linkThis torrent has 1 comments. + Uploaded 11-29 2009, Size 5.07 MiB, ULed by epiclawl + 00
+
+ Audio
+ (Music) +
+
+ +Magnet link + Uploaded 11-10 2011, Size 5.34 MiB, ULed by Imperator42 + 00
+
+ Audio
+ (Music) +
+
+ +Magnet link + Uploaded 12-20 2011, Size 4.8 MiB, ULed by lerdie + 00
+
+ Audio
+ (Music) +
+
+ +Magnet linkThis torrent has 1 comments. + Uploaded 12-21 2011, Size 3.4 MiB, ULed by mattdow + 01
+
+ Audio
+ (Music) +
+
+ +Magnet linkThis torrent has 3 comments. + Uploaded 12-21 2011, Size 4.8 MiB, ULed by lerdie + 01
+
+ Audio
+ (Other) +
+
+ +Magnet linkThis torrent has 1 comments.VIP + Uploaded 03-09 2012, Size 60.72 MiB, ULed by oneanight + 01
+
+ Audio
+ (Music) +
+
+ +Magnet linkThis torrent has 1 comments. + Uploaded 10-24 2012, Size 6.29 MiB, ULed by PIRATE300 + 00
+
+ Audio
+ (Music) +
+
+ +Magnet linkThis torrent has 1 comments. + Uploaded 11-10 2012, Size 6.41 MiB, ULed by AdpoX10 + 00
+
+ Audio
+ (Other) +
+
+ +Magnet linkThis torrent has 2 comments. + Uploaded 01-19 2013, Size 54.87 MiB, ULed by blowingfish + 01
+
+
+
+ + + + + + \ No newline at end of file diff --git a/tests/rich.xml b/tests/data/rich.xml similarity index 100% rename from tests/rich.xml rename to tests/data/rich.xml diff --git a/tests/test_local.py b/tests/test_local.py index 3d6b8d9..afda395 100755 --- a/tests/test_local.py +++ b/tests/test_local.py @@ -3,10 +3,13 @@ import unittest import pirate.local import os +from tests import util + + class TestLocal(unittest.TestCase): def test_rich_xml(self): - path = os.path.join(os.path.dirname(os.path.realpath(__file__)), 'rich.xml') + path = util.data_path('rich.xml') expected = [['magnet:?xt=urn:btih:b03c8641415d3a0fc7077f5bf567634442989a74&dn=High.Chaparall.S02E02.PDTV.XViD.SWEDiSH-HuBBaTiX', '?', '?']] actual = pirate.local.search(path, ('High',)) self.assertEqual(actual, expected) diff --git a/tests/test_torrent.py b/tests/test_torrent.py new file mode 100755 index 0000000..b166359 --- /dev/null +++ b/tests/test_torrent.py @@ -0,0 +1,18 @@ +#!/usr/bin/env python3 +import unittest +import pirate.torrent +import os + +from tests import util + + +class TestTorrent(unittest.TestCase): + + def test_rich_xml(self): + res = util.read_data('dan_bull_search.html') + actual = pirate.torrent.parse_page(res) + expected = ([['magnet:?xt=urn:btih:30df4f8b42b8fd77f5e5aa34abbffe97f5e81fbf&dn=Dan+Croll+%26bull%3B+Sweet+Disarray+%5B2014%5D+320&tr=udp%3A%2F%2Ftracker.openbittorrent.com%3A80&tr=udp%3A%2F%2Fopen.demonii.com%3A1337&tr=udp%3A%2F%2Ftracker.coppersurfer.tk%3A6969&tr=udp%3A%2F%2Fexodus.desync.com%3A6969', '16', '1'], ['magnet:?xt=urn:btih:7abd3eda600996b8e6fc9a61b83288e0c6ac0d83&dn=Dan+Bull+-+Massive+Collection&tr=udp%3A%2F%2Ftracker.openbittorrent.com%3A80&tr=udp%3A%2F%2Fopen.demonii.com%3A1337&tr=udp%3A%2F%2Ftracker.coppersurfer.tk%3A6969&tr=udp%3A%2F%2Fexodus.desync.com%3A6969', '4', '0'], ['magnet:?xt=urn:btih:8f8d68fd0a51237c89692c428ed8a8f64a969c70&dn=Dan+Bull+-+Generation+Gaming+-+2013&tr=udp%3A%2F%2Ftracker.openbittorrent.com%3A80&tr=udp%3A%2F%2Fopen.demonii.com%3A1337&tr=udp%3A%2F%2Ftracker.coppersurfer.tk%3A6969&tr=udp%3A%2F%2Fexodus.desync.com%3A6969', '2', '0'], ['magnet:?xt=urn:btih:3da6a0fdc1d67a768cb32597e926abdf3e1a2fdd&dn=Dan+Bull+Collection&tr=udp%3A%2F%2Ftracker.openbittorrent.com%3A80&tr=udp%3A%2F%2Fopen.demonii.com%3A1337&tr=udp%3A%2F%2Ftracker.coppersurfer.tk%3A6969&tr=udp%3A%2F%2Fexodus.desync.com%3A6969', '1', '0'], ['magnet:?xt=urn:btih:5cd371a235317319db7da52c64422f9c2ac75d77&dn=Dan+Bull+-+The+Garden+%7B2014-Album%7D&tr=udp%3A%2F%2Ftracker.openbittorrent.com%3A80&tr=udp%3A%2F%2Fopen.demonii.com%3A1337&tr=udp%3A%2F%2Ftracker.coppersurfer.tk%3A6969&tr=udp%3A%2F%2Fexodus.desync.com%3A6969', '1', '0'], ['magnet:?xt=urn:btih:4e14dbd077c920875be4c15971b23b609ad6716a&dn=Dan+Bull+-+Dear+Lily+%5Ban+open+letter+to+Lily+Allen%5D+-+2009%5BMP3+%40&tr=udp%3A%2F%2Ftracker.openbittorrent.com%3A80&tr=udp%3A%2F%2Fopen.demonii.com%3A1337&tr=udp%3A%2F%2Ftracker.coppersurfer.tk%3A6969&tr=udp%3A%2F%2Fexodus.desync.com%3A6969', '0', '1'], ['magnet:?xt=urn:btih:5d9319cf852f7462422cb1bffc37b65174645047&dn=Dan+Bull+-+Dear+Mandy+%5Ban+open+letter+to+Lord+Mandelson%5D&tr=udp%3A%2F%2Ftracker.openbittorrent.com%3A80&tr=udp%3A%2F%2Fopen.demonii.com%3A1337&tr=udp%3A%2F%2Ftracker.coppersurfer.tk%3A6969&tr=udp%3A%2F%2Fexodus.desync.com%3A6969', '0', '0'], ['magnet:?xt=urn:btih:1c54af57426f53fdef4bbf1a9dbddf32f7b4988a&dn=Dan+Bull+-+Dear+Lily+%28Lily+Allen%29+%28Song+about+filesharing%29&tr=udp%3A%2F%2Ftracker.openbittorrent.com%3A80&tr=udp%3A%2F%2Fopen.demonii.com%3A1337&tr=udp%3A%2F%2Ftracker.coppersurfer.tk%3A6969&tr=udp%3A%2F%2Fexodus.desync.com%3A6969', '0', '0'], ['magnet:?xt=urn:btih:942c5bf3e1e9bc263939e13cea6ad7bd5f62aa36&dn=Dan+Bull+-+SOPA+Cabana.mp3&tr=udp%3A%2F%2Ftracker.openbittorrent.com%3A80&tr=udp%3A%2F%2Fopen.demonii.com%3A1337&tr=udp%3A%2F%2Ftracker.coppersurfer.tk%3A6969&tr=udp%3A%2F%2Fexodus.desync.com%3A6969', '0', '0'], ['magnet:?xt=urn:btih:d376f68a31b0db652234e790ed7256ac5e32db57&dn=Dan+Bull+-+SOPA+Cabana&tr=udp%3A%2F%2Ftracker.openbittorrent.com%3A80&tr=udp%3A%2F%2Fopen.demonii.com%3A1337&tr=udp%3A%2F%2Ftracker.coppersurfer.tk%3A6969&tr=udp%3A%2F%2Fexodus.desync.com%3A6969', '0', '1'], ['magnet:?xt=urn:btih:28163770a532eb24b9e0865878288a9bbdb7a5e6&dn=Dan+Bull+-+SOPA+Cabana+%5BWORKING%5D&tr=udp%3A%2F%2Ftracker.openbittorrent.com%3A80&tr=udp%3A%2F%2Fopen.demonii.com%3A1337&tr=udp%3A%2F%2Ftracker.coppersurfer.tk%3A6969&tr=udp%3A%2F%2Fexodus.desync.com%3A6969', '0', '1'], ['magnet:?xt=urn:btih:779ab0f13a3fbb12ba68b27721491e4d143f26eb&dn=Dan+Bull+-+Bye+Bye+BPI+2012++%5BMP3%40192%5D%28oan%29&tr=udp%3A%2F%2Ftracker.openbittorrent.com%3A80&tr=udp%3A%2F%2Fopen.demonii.com%3A1337&tr=udp%3A%2F%2Ftracker.coppersurfer.tk%3A6969&tr=udp%3A%2F%2Fexodus.desync.com%3A6969', '0', '1'], ['magnet:?xt=urn:btih:2667e4795bd5c868dedcabcb52943f4bb7212bab&dn=Dan+Bull+-+Dishonored+%5BExplicit+ver.%5D+%28Single+2012%29&tr=udp%3A%2F%2Ftracker.openbittorrent.com%3A80&tr=udp%3A%2F%2Fopen.demonii.com%3A1337&tr=udp%3A%2F%2Ftracker.coppersurfer.tk%3A6969&tr=udp%3A%2F%2Fexodus.desync.com%3A6969', '0', '0'], ['magnet:?xt=urn:btih:16364f83c556ad0fd3bb57a4a7c890e7e8087414&dn=Halo+4+EPIC+Rap+By+Dan+Bull&tr=udp%3A%2F%2Ftracker.openbittorrent.com%3A80&tr=udp%3A%2F%2Fopen.demonii.com%3A1337&tr=udp%3A%2F%2Ftracker.coppersurfer.tk%3A6969&tr=udp%3A%2F%2Fexodus.desync.com%3A6969', '0', '0'], ['magnet:?xt=urn:btih:843b466d9fd1f0bee3a476573b272dc2d6d0ebae&dn=Dan+Bull+-+Generation+Gaming+-+2013&tr=udp%3A%2F%2Ftracker.openbittorrent.com%3A80&tr=udp%3A%2F%2Fopen.demonii.com%3A1337&tr=udp%3A%2F%2Ftracker.coppersurfer.tk%3A6969&tr=udp%3A%2F%2Fexodus.desync.com%3A6969', '0', '1']], [['89.33', 'MiB'], ['294', 'MiB'], ['54.86', 'MiB'], ['236.78', 'MiB'], ['36.27', 'MiB'], ['5.51', 'MiB'], ['5.07', 'MiB'], ['5.34', 'MiB'], ['4.8', 'MiB'], ['3.4', 'MiB'], ['4.8', 'MiB'], ['60.72', 'MiB'], ['6.29', 'MiB'], ['6.41', 'MiB'], ['54.87', 'MiB']], ['04-04 2014', '03-02 2014', '01-19 2013', '01-21 2010', '09-02 2014', '09-27 2009', '11-29 2009', '11-10 2011', '12-20 2011', '12-21 2011', '12-21 2011', '03-09 2012', '10-24 2012', '11-10 2012', '01-19 2013'], ['9890864', '9684858', '8037968', '5295449', '10954408', '5101630', '5185893', '6806996', '6901871', '6902247', '6903548', '7088979', '7756344', '7812951', '8037899']) + self.assertEqual(actual, expected) + +if __name__ == '__main__': + unittest.main() diff --git a/tests/util.py b/tests/util.py new file mode 100644 index 0000000..26f0fbe --- /dev/null +++ b/tests/util.py @@ -0,0 +1,8 @@ +import os + +def data_path(name): + return os.path.join(os.path.dirname(os.path.realpath(__file__)), 'data', name) + +def read_data(name): + with open(data_path(name)) as f: + return f.read() From 43f8ffefea5617268f13c14b628ca989f716da83 Mon Sep 17 00:00:00 2001 From: Viktor Stanchev Date: Thu, 3 Sep 2015 21:44:02 -0700 Subject: [PATCH 06/12] rewrite html parser with pyquery --- pirate/torrent.py | 64 +++++-------- setup.py | 2 +- tests/data/blocked.html | 1 + tests/data/no_hits.html | 200 ++++++++++++++++++++++++++++++++++++++++ tests/test_torrent.py | 15 ++- 5 files changed, 240 insertions(+), 42 deletions(-) create mode 100644 tests/data/blocked.html create mode 100644 tests/data/no_hits.html diff --git a/pirate/torrent.py b/pirate/torrent.py index 83b6202..eb62f21 100644 --- a/pirate/torrent.py +++ b/pirate/torrent.py @@ -6,6 +6,8 @@ import urllib.parse as parse import urllib.error import os.path +from pyquery import PyQuery as pq + import pirate.data from pirate.print import print @@ -43,25 +45,6 @@ def parse_sort(sort): return '99' -def parse_magnets_seeds_leechers(found): - res = [] - state = 'seeds' - curr = ['', 0, 0] #magnet, seeds, leeches - for f in found: - if f[1] == '': - curr[0] = f[0] - else: - if state == 'seeds': - curr[1] = f[1] - state = 'leeches' - else: - curr[2] = f[1] - state = 'seeds' - res.append(curr) - curr = ['', 0, 0] - return res - - #TODO: warn users when using a sort in a mode that doesn't accept sorts #TODO: warn users when using search terms in a mode that doesn't accept search terms #TODO: same with page parameter for top and top48h @@ -92,37 +75,40 @@ def build_request_path(page, category, sort, mode, terms): raise Exception('Unknown mode.') -#TODO: redo this with html parser instead of regex -def parse_page(res): - found = re.findall(parser_regex, res) +def parse_page(html): + d = pq(html) + + # first get the magnet links and make sure there are results + magnets = list(map(lambda l: pq(l).attr('href'), + d('table#searchResult tr>td:nth-child(2)>a:nth-child(2)'))) # check for a blocked mirror no_results = re.search(r'No hits\. Try adding an asterisk in ' - r'you search phrase\.', res) - if found == [] and no_results is None: + r'you search phrase\.', html) + if len(magnets) == 0 and no_results is None: # Contradiction - we found no results, # but the page didn't say there were no results. # The page is probably not actually the pirate bay, # so let's try another mirror raise IOError('Blocked mirror detected.') - # get sizes as well and substitute the   character - # TODO: use actual html decode - sizes = [match.replace(' ', ' ').split() - for match in re.findall(r'(?<=Size )[0-9.]' - r'+\ \;[KMGT]*[i ]*B', res)] + # next get more info + seeds = list(map(lambda l: pq(l).text(), + d('table#searchResult tr>td:nth-child(3)'))) + leechers = list(map(lambda l: pq(l).text(), + d('table#searchResult tr>td:nth-child(4)'))) + identifiers = list(map(lambda l: pq(l).attr('href').split('/')[2], + d('table#searchResult .detLink'))) - uploaded = [match.replace(' ', ' ') - for match in re.findall(r'(?<=Uploaded )' - r'.+(?=\, Size)',res)] + sizes = [] + uploaded = [] + # parse descriptions separately + for node in d('font.detDesc'): + text = pq(node).text() + sizes.append(re.findall(r'(?<=Size )[0-9.]+\s[KMGT]*[i ]*B', text)[0].split()) + uploaded.append(re.findall(r'(?<=Uploaded ).+(?=\, Size)', text)[0]) - identifiers = [match.replace(' ', ' ') - for match in re.findall('(?<=/torrent/)' - '[0-9]+(?=/)',res)] - - res_l = parse_magnets_seeds_leechers(found) - - return res_l, sizes, uploaded, identifiers + return list(zip(magnets,seeds,leechers)), sizes, uploaded, identifiers def remote(pages, category, sort, mode, terms, mirror): diff --git a/setup.py b/setup.py index 8f8f6c1..42707ef 100755 --- a/setup.py +++ b/setup.py @@ -13,7 +13,7 @@ setup(name='pirate-get', entry_points={ 'console_scripts': ['pirate-get = pirate.pirate:main'] }, - install_requires=['colorama>=0.3.3'], + install_requires=['colorama>=0.3.3', 'pyquery>=1.2.9'], keywords=['torrent', 'magnet', 'download', 'tpb', 'client'], classifiers=[ 'Topic :: Utilities', diff --git a/tests/data/blocked.html b/tests/data/blocked.html new file mode 100644 index 0000000..dd4bc42 --- /dev/null +++ b/tests/data/blocked.html @@ -0,0 +1 @@ +blocked. diff --git a/tests/data/no_hits.html b/tests/data/no_hits.html new file mode 100644 index 0000000..a3a9156 --- /dev/null +++ b/tests/data/no_hits.html @@ -0,0 +1,200 @@ + + + + The Pirate Bay - The galaxy's most resilient bittorrent site + + + + + + + + + + + + + + + + + + + + + + + + +

Search results: aaaaaaaaaaaaaaaaa No hits. Try adding an asterisk in you search phrase.

+ +
+ +
+
+
+
+ + + + + + \ No newline at end of file diff --git a/tests/test_torrent.py b/tests/test_torrent.py index b166359..06c62fd 100755 --- a/tests/test_torrent.py +++ b/tests/test_torrent.py @@ -8,10 +8,21 @@ from tests import util class TestTorrent(unittest.TestCase): - def test_rich_xml(self): + def test_no_hits(self): + res = util.read_data('no_hits.html') + actual = pirate.torrent.parse_page(res) + expected = ([], [], [], []) + self.assertEqual(actual, expected) + + def test_blocked_mirror(self): + res = util.read_data('blocked.html') + with self.assertRaises(IOError): + pirate.torrent.parse_page(res) + + def test_search_results(self): res = util.read_data('dan_bull_search.html') actual = pirate.torrent.parse_page(res) - expected = ([['magnet:?xt=urn:btih:30df4f8b42b8fd77f5e5aa34abbffe97f5e81fbf&dn=Dan+Croll+%26bull%3B+Sweet+Disarray+%5B2014%5D+320&tr=udp%3A%2F%2Ftracker.openbittorrent.com%3A80&tr=udp%3A%2F%2Fopen.demonii.com%3A1337&tr=udp%3A%2F%2Ftracker.coppersurfer.tk%3A6969&tr=udp%3A%2F%2Fexodus.desync.com%3A6969', '16', '1'], ['magnet:?xt=urn:btih:7abd3eda600996b8e6fc9a61b83288e0c6ac0d83&dn=Dan+Bull+-+Massive+Collection&tr=udp%3A%2F%2Ftracker.openbittorrent.com%3A80&tr=udp%3A%2F%2Fopen.demonii.com%3A1337&tr=udp%3A%2F%2Ftracker.coppersurfer.tk%3A6969&tr=udp%3A%2F%2Fexodus.desync.com%3A6969', '4', '0'], ['magnet:?xt=urn:btih:8f8d68fd0a51237c89692c428ed8a8f64a969c70&dn=Dan+Bull+-+Generation+Gaming+-+2013&tr=udp%3A%2F%2Ftracker.openbittorrent.com%3A80&tr=udp%3A%2F%2Fopen.demonii.com%3A1337&tr=udp%3A%2F%2Ftracker.coppersurfer.tk%3A6969&tr=udp%3A%2F%2Fexodus.desync.com%3A6969', '2', '0'], ['magnet:?xt=urn:btih:3da6a0fdc1d67a768cb32597e926abdf3e1a2fdd&dn=Dan+Bull+Collection&tr=udp%3A%2F%2Ftracker.openbittorrent.com%3A80&tr=udp%3A%2F%2Fopen.demonii.com%3A1337&tr=udp%3A%2F%2Ftracker.coppersurfer.tk%3A6969&tr=udp%3A%2F%2Fexodus.desync.com%3A6969', '1', '0'], ['magnet:?xt=urn:btih:5cd371a235317319db7da52c64422f9c2ac75d77&dn=Dan+Bull+-+The+Garden+%7B2014-Album%7D&tr=udp%3A%2F%2Ftracker.openbittorrent.com%3A80&tr=udp%3A%2F%2Fopen.demonii.com%3A1337&tr=udp%3A%2F%2Ftracker.coppersurfer.tk%3A6969&tr=udp%3A%2F%2Fexodus.desync.com%3A6969', '1', '0'], ['magnet:?xt=urn:btih:4e14dbd077c920875be4c15971b23b609ad6716a&dn=Dan+Bull+-+Dear+Lily+%5Ban+open+letter+to+Lily+Allen%5D+-+2009%5BMP3+%40&tr=udp%3A%2F%2Ftracker.openbittorrent.com%3A80&tr=udp%3A%2F%2Fopen.demonii.com%3A1337&tr=udp%3A%2F%2Ftracker.coppersurfer.tk%3A6969&tr=udp%3A%2F%2Fexodus.desync.com%3A6969', '0', '1'], ['magnet:?xt=urn:btih:5d9319cf852f7462422cb1bffc37b65174645047&dn=Dan+Bull+-+Dear+Mandy+%5Ban+open+letter+to+Lord+Mandelson%5D&tr=udp%3A%2F%2Ftracker.openbittorrent.com%3A80&tr=udp%3A%2F%2Fopen.demonii.com%3A1337&tr=udp%3A%2F%2Ftracker.coppersurfer.tk%3A6969&tr=udp%3A%2F%2Fexodus.desync.com%3A6969', '0', '0'], ['magnet:?xt=urn:btih:1c54af57426f53fdef4bbf1a9dbddf32f7b4988a&dn=Dan+Bull+-+Dear+Lily+%28Lily+Allen%29+%28Song+about+filesharing%29&tr=udp%3A%2F%2Ftracker.openbittorrent.com%3A80&tr=udp%3A%2F%2Fopen.demonii.com%3A1337&tr=udp%3A%2F%2Ftracker.coppersurfer.tk%3A6969&tr=udp%3A%2F%2Fexodus.desync.com%3A6969', '0', '0'], ['magnet:?xt=urn:btih:942c5bf3e1e9bc263939e13cea6ad7bd5f62aa36&dn=Dan+Bull+-+SOPA+Cabana.mp3&tr=udp%3A%2F%2Ftracker.openbittorrent.com%3A80&tr=udp%3A%2F%2Fopen.demonii.com%3A1337&tr=udp%3A%2F%2Ftracker.coppersurfer.tk%3A6969&tr=udp%3A%2F%2Fexodus.desync.com%3A6969', '0', '0'], ['magnet:?xt=urn:btih:d376f68a31b0db652234e790ed7256ac5e32db57&dn=Dan+Bull+-+SOPA+Cabana&tr=udp%3A%2F%2Ftracker.openbittorrent.com%3A80&tr=udp%3A%2F%2Fopen.demonii.com%3A1337&tr=udp%3A%2F%2Ftracker.coppersurfer.tk%3A6969&tr=udp%3A%2F%2Fexodus.desync.com%3A6969', '0', '1'], ['magnet:?xt=urn:btih:28163770a532eb24b9e0865878288a9bbdb7a5e6&dn=Dan+Bull+-+SOPA+Cabana+%5BWORKING%5D&tr=udp%3A%2F%2Ftracker.openbittorrent.com%3A80&tr=udp%3A%2F%2Fopen.demonii.com%3A1337&tr=udp%3A%2F%2Ftracker.coppersurfer.tk%3A6969&tr=udp%3A%2F%2Fexodus.desync.com%3A6969', '0', '1'], ['magnet:?xt=urn:btih:779ab0f13a3fbb12ba68b27721491e4d143f26eb&dn=Dan+Bull+-+Bye+Bye+BPI+2012++%5BMP3%40192%5D%28oan%29&tr=udp%3A%2F%2Ftracker.openbittorrent.com%3A80&tr=udp%3A%2F%2Fopen.demonii.com%3A1337&tr=udp%3A%2F%2Ftracker.coppersurfer.tk%3A6969&tr=udp%3A%2F%2Fexodus.desync.com%3A6969', '0', '1'], ['magnet:?xt=urn:btih:2667e4795bd5c868dedcabcb52943f4bb7212bab&dn=Dan+Bull+-+Dishonored+%5BExplicit+ver.%5D+%28Single+2012%29&tr=udp%3A%2F%2Ftracker.openbittorrent.com%3A80&tr=udp%3A%2F%2Fopen.demonii.com%3A1337&tr=udp%3A%2F%2Ftracker.coppersurfer.tk%3A6969&tr=udp%3A%2F%2Fexodus.desync.com%3A6969', '0', '0'], ['magnet:?xt=urn:btih:16364f83c556ad0fd3bb57a4a7c890e7e8087414&dn=Halo+4+EPIC+Rap+By+Dan+Bull&tr=udp%3A%2F%2Ftracker.openbittorrent.com%3A80&tr=udp%3A%2F%2Fopen.demonii.com%3A1337&tr=udp%3A%2F%2Ftracker.coppersurfer.tk%3A6969&tr=udp%3A%2F%2Fexodus.desync.com%3A6969', '0', '0'], ['magnet:?xt=urn:btih:843b466d9fd1f0bee3a476573b272dc2d6d0ebae&dn=Dan+Bull+-+Generation+Gaming+-+2013&tr=udp%3A%2F%2Ftracker.openbittorrent.com%3A80&tr=udp%3A%2F%2Fopen.demonii.com%3A1337&tr=udp%3A%2F%2Ftracker.coppersurfer.tk%3A6969&tr=udp%3A%2F%2Fexodus.desync.com%3A6969', '0', '1']], [['89.33', 'MiB'], ['294', 'MiB'], ['54.86', 'MiB'], ['236.78', 'MiB'], ['36.27', 'MiB'], ['5.51', 'MiB'], ['5.07', 'MiB'], ['5.34', 'MiB'], ['4.8', 'MiB'], ['3.4', 'MiB'], ['4.8', 'MiB'], ['60.72', 'MiB'], ['6.29', 'MiB'], ['6.41', 'MiB'], ['54.87', 'MiB']], ['04-04 2014', '03-02 2014', '01-19 2013', '01-21 2010', '09-02 2014', '09-27 2009', '11-29 2009', '11-10 2011', '12-20 2011', '12-21 2011', '12-21 2011', '03-09 2012', '10-24 2012', '11-10 2012', '01-19 2013'], ['9890864', '9684858', '8037968', '5295449', '10954408', '5101630', '5185893', '6806996', '6901871', '6902247', '6903548', '7088979', '7756344', '7812951', '8037899']) + expected = ([('magnet:?xt=urn:btih:30df4f8b42b8fd77f5e5aa34abbffe97f5e81fbf&dn=Dan+Croll+%26bull%3B+Sweet+Disarray+%5B2014%5D+320&tr=udp%3A%2F%2Ftracker.openbittorrent.com%3A80&tr=udp%3A%2F%2Fopen.demonii.com%3A1337&tr=udp%3A%2F%2Ftracker.coppersurfer.tk%3A6969&tr=udp%3A%2F%2Fexodus.desync.com%3A6969', '16', '1'), ('magnet:?xt=urn:btih:7abd3eda600996b8e6fc9a61b83288e0c6ac0d83&dn=Dan+Bull+-+Massive+Collection&tr=udp%3A%2F%2Ftracker.openbittorrent.com%3A80&tr=udp%3A%2F%2Fopen.demonii.com%3A1337&tr=udp%3A%2F%2Ftracker.coppersurfer.tk%3A6969&tr=udp%3A%2F%2Fexodus.desync.com%3A6969', '4', '0'), ('magnet:?xt=urn:btih:8f8d68fd0a51237c89692c428ed8a8f64a969c70&dn=Dan+Bull+-+Generation+Gaming+-+2013&tr=udp%3A%2F%2Ftracker.openbittorrent.com%3A80&tr=udp%3A%2F%2Fopen.demonii.com%3A1337&tr=udp%3A%2F%2Ftracker.coppersurfer.tk%3A6969&tr=udp%3A%2F%2Fexodus.desync.com%3A6969', '2', '0'), ('magnet:?xt=urn:btih:3da6a0fdc1d67a768cb32597e926abdf3e1a2fdd&dn=Dan+Bull+Collection&tr=udp%3A%2F%2Ftracker.openbittorrent.com%3A80&tr=udp%3A%2F%2Fopen.demonii.com%3A1337&tr=udp%3A%2F%2Ftracker.coppersurfer.tk%3A6969&tr=udp%3A%2F%2Fexodus.desync.com%3A6969', '1', '0'), ('magnet:?xt=urn:btih:5cd371a235317319db7da52c64422f9c2ac75d77&dn=Dan+Bull+-+The+Garden+%7B2014-Album%7D&tr=udp%3A%2F%2Ftracker.openbittorrent.com%3A80&tr=udp%3A%2F%2Fopen.demonii.com%3A1337&tr=udp%3A%2F%2Ftracker.coppersurfer.tk%3A6969&tr=udp%3A%2F%2Fexodus.desync.com%3A6969', '1', '0'), ('magnet:?xt=urn:btih:4e14dbd077c920875be4c15971b23b609ad6716a&dn=Dan+Bull+-+Dear+Lily+%5Ban+open+letter+to+Lily+Allen%5D+-+2009%5BMP3+%40&tr=udp%3A%2F%2Ftracker.openbittorrent.com%3A80&tr=udp%3A%2F%2Fopen.demonii.com%3A1337&tr=udp%3A%2F%2Ftracker.coppersurfer.tk%3A6969&tr=udp%3A%2F%2Fexodus.desync.com%3A6969', '0', '1'), ('magnet:?xt=urn:btih:5d9319cf852f7462422cb1bffc37b65174645047&dn=Dan+Bull+-+Dear+Mandy+%5Ban+open+letter+to+Lord+Mandelson%5D&tr=udp%3A%2F%2Ftracker.openbittorrent.com%3A80&tr=udp%3A%2F%2Fopen.demonii.com%3A1337&tr=udp%3A%2F%2Ftracker.coppersurfer.tk%3A6969&tr=udp%3A%2F%2Fexodus.desync.com%3A6969', '0', '0'), ('magnet:?xt=urn:btih:1c54af57426f53fdef4bbf1a9dbddf32f7b4988a&dn=Dan+Bull+-+Dear+Lily+%28Lily+Allen%29+%28Song+about+filesharing%29&tr=udp%3A%2F%2Ftracker.openbittorrent.com%3A80&tr=udp%3A%2F%2Fopen.demonii.com%3A1337&tr=udp%3A%2F%2Ftracker.coppersurfer.tk%3A6969&tr=udp%3A%2F%2Fexodus.desync.com%3A6969', '0', '0'), ('magnet:?xt=urn:btih:942c5bf3e1e9bc263939e13cea6ad7bd5f62aa36&dn=Dan+Bull+-+SOPA+Cabana.mp3&tr=udp%3A%2F%2Ftracker.openbittorrent.com%3A80&tr=udp%3A%2F%2Fopen.demonii.com%3A1337&tr=udp%3A%2F%2Ftracker.coppersurfer.tk%3A6969&tr=udp%3A%2F%2Fexodus.desync.com%3A6969', '0', '0'), ('magnet:?xt=urn:btih:d376f68a31b0db652234e790ed7256ac5e32db57&dn=Dan+Bull+-+SOPA+Cabana&tr=udp%3A%2F%2Ftracker.openbittorrent.com%3A80&tr=udp%3A%2F%2Fopen.demonii.com%3A1337&tr=udp%3A%2F%2Ftracker.coppersurfer.tk%3A6969&tr=udp%3A%2F%2Fexodus.desync.com%3A6969', '0', '1'), ('magnet:?xt=urn:btih:28163770a532eb24b9e0865878288a9bbdb7a5e6&dn=Dan+Bull+-+SOPA+Cabana+%5BWORKING%5D&tr=udp%3A%2F%2Ftracker.openbittorrent.com%3A80&tr=udp%3A%2F%2Fopen.demonii.com%3A1337&tr=udp%3A%2F%2Ftracker.coppersurfer.tk%3A6969&tr=udp%3A%2F%2Fexodus.desync.com%3A6969', '0', '1'), ('magnet:?xt=urn:btih:779ab0f13a3fbb12ba68b27721491e4d143f26eb&dn=Dan+Bull+-+Bye+Bye+BPI+2012++%5BMP3%40192%5D%28oan%29&tr=udp%3A%2F%2Ftracker.openbittorrent.com%3A80&tr=udp%3A%2F%2Fopen.demonii.com%3A1337&tr=udp%3A%2F%2Ftracker.coppersurfer.tk%3A6969&tr=udp%3A%2F%2Fexodus.desync.com%3A6969', '0', '1'), ('magnet:?xt=urn:btih:2667e4795bd5c868dedcabcb52943f4bb7212bab&dn=Dan+Bull+-+Dishonored+%5BExplicit+ver.%5D+%28Single+2012%29&tr=udp%3A%2F%2Ftracker.openbittorrent.com%3A80&tr=udp%3A%2F%2Fopen.demonii.com%3A1337&tr=udp%3A%2F%2Ftracker.coppersurfer.tk%3A6969&tr=udp%3A%2F%2Fexodus.desync.com%3A6969', '0', '0'), ('magnet:?xt=urn:btih:16364f83c556ad0fd3bb57a4a7c890e7e8087414&dn=Halo+4+EPIC+Rap+By+Dan+Bull&tr=udp%3A%2F%2Ftracker.openbittorrent.com%3A80&tr=udp%3A%2F%2Fopen.demonii.com%3A1337&tr=udp%3A%2F%2Ftracker.coppersurfer.tk%3A6969&tr=udp%3A%2F%2Fexodus.desync.com%3A6969', '0', '0'), ('magnet:?xt=urn:btih:843b466d9fd1f0bee3a476573b272dc2d6d0ebae&dn=Dan+Bull+-+Generation+Gaming+-+2013&tr=udp%3A%2F%2Ftracker.openbittorrent.com%3A80&tr=udp%3A%2F%2Fopen.demonii.com%3A1337&tr=udp%3A%2F%2Ftracker.coppersurfer.tk%3A6969&tr=udp%3A%2F%2Fexodus.desync.com%3A6969', '0', '1')], [['89.33', 'MiB'], ['294', 'MiB'], ['54.86', 'MiB'], ['236.78', 'MiB'], ['36.27', 'MiB'], ['5.51', 'MiB'], ['5.07', 'MiB'], ['5.34', 'MiB'], ['4.8', 'MiB'], ['3.4', 'MiB'], ['4.8', 'MiB'], ['60.72', 'MiB'], ['6.29', 'MiB'], ['6.41', 'MiB'], ['54.87', 'MiB']],['04-04\xa02014', '03-02\xa02014', '01-19\xa02013', '01-21\xa02010', '09-02\xa02014', '09-27\xa02009', '11-29\xa02009', '11-10\xa02011', '12-20\xa02011', '12-21\xa02011', '12-21\xa02011', '03-09\xa02012', '10-24\xa02012', '11-10\xa02012', '01-19\xa02013'], ['9890864', '9684858', '8037968', '5295449', '10954408', '5101630', '5185893', '6806996', '6901871', '6902247', '6903548', '7088979', '7756344', '7812951', '8037899']) self.assertEqual(actual, expected) if __name__ == '__main__': From b8fa71d1419dd4a0a916d5529c6d0f513a70351e Mon Sep 17 00:00:00 2001 From: Viktor Stanchev Date: Thu, 3 Sep 2015 21:48:57 -0700 Subject: [PATCH 07/12] fix not flushing on time --- pirate/pirate.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pirate/pirate.py b/pirate/pirate.py index 674ca46..c9070d4 100755 --- a/pirate/pirate.py +++ b/pirate/pirate.py @@ -191,7 +191,7 @@ def main(): for mirror in mirrors: try: - print('Trying', mirror, end='... ') + print('Trying', mirror, end='... \n') mags, sizes, uploaded, ids = pirate.torrent.remote( pages=args.pages, category=pirate.torrent.parse_category(args.category), From 1f0470015829c534669838335a92351b3087e2cd Mon Sep 17 00:00:00 2001 From: Viktor Stanchev Date: Thu, 3 Sep 2015 22:18:38 -0700 Subject: [PATCH 08/12] simplify results array --- pirate/pirate.py | 28 ++++++++++++++-------------- pirate/print.py | 42 ++++++++++++++++++++++++++---------------- pirate/torrent.py | 27 ++++++++++++--------------- tests/test_torrent.py | 4 ++-- 4 files changed, 54 insertions(+), 47 deletions(-) diff --git a/pirate/pirate.py b/pirate/pirate.py index c9070d4..ab3fba8 100755 --- a/pirate/pirate.py +++ b/pirate/pirate.py @@ -171,11 +171,11 @@ def main(): path = args.database else: path = config.get('LocalDB', 'path') - mags = pirate.local.search(path, args.search) + results = pirate.local.search(path, args.search) sizes, uploaded = [], [] else: - mags, mirrors = [], {'https://thepiratebay.mn'} + results, mirrors = [], {'https://thepiratebay.mn'} try: req = request.Request('https://proxybay.co/list.txt', headers=pirate.data.default_headers) @@ -192,7 +192,7 @@ def main(): for mirror in mirrors: try: print('Trying', mirror, end='... \n') - mags, sizes, uploaded, ids = pirate.torrent.remote( + results = pirate.torrent.remote( pages=args.pages, category=pirate.torrent.parse_category(args.category), sort=pirate.torrent.parse_sort(args.sort), @@ -211,18 +211,18 @@ def main(): print('No available mirrors :(', color='WARN') return - if not mags: + if len(results) == 0: print('No results') return - pirate.print.search_results(mags, sizes, uploaded, local=args.database) + pirate.print.search_results(results, local=args.database) if args.first: print('Choosing first result') choices = [0] elif args.download_all: print('Downloading all results') - choices = range(len(mags)) + choices = range(len(results)) else: # New input loop to support different link options while True: @@ -284,16 +284,16 @@ def main(): print('Bye.', color='alt') return elif code == 'd': - pirate.print.descriptions(choices, mags, site, ids) + pirate.print.descriptions(choices, results, site) elif code == 'f': - pirate.print.file_lists(choices, mags, site, ids) + pirate.print.file_lists(choices, results, site) elif code == 'p': - pirate.print.search_results(mags, sizes, uploaded) + pirate.print.search_results(results) elif code == 'm': - pirate.torrent.save_magnets(choices, mags, config.get( + pirate.torrent.save_magnets(choices, results, config.get( 'Save', 'directory')) elif code == 't': - pirate.torrent.save_torrents(choices, mags, config.get( + pirate.torrent.save_torrents(choices, results, config.get( 'Save', 'directory')) elif not l: print('No links entered!', color='WARN') @@ -307,13 +307,13 @@ def main(): if args.save_magnets or config.getboolean('Save', 'magnets'): print('Saving selected magnets...') - pirate.torrent.save_magnets(choices, mags, config.get( + pirate.torrent.save_magnets(choices, results, config.get( 'Save', 'directory')) save_to_file = True if args.save_torrents or config.getboolean('Save', 'torrents'): print('Saving selected torrents...') - pirate.torrent.save_torrents(choices, mags, config.get( + pirate.torrent.save_torrents(choices, results, config.get( 'Save', 'directory')) save_to_file = True @@ -321,7 +321,7 @@ def main(): return for choice in choices: - url = mags[int(choice)][0] + url = results[int(choice)]['magnet'] if args.transmission or config.getboolean('Misc', 'transmission'): subprocess.call(transmission_command + ['--add', url]) diff --git a/pirate/print.py b/pirate/print.py index 9d3f4a0..7e19ba3 100644 --- a/pirate/print.py +++ b/pirate/print.py @@ -31,7 +31,7 @@ def print(*args, **kwargs): return builtins.print(*args, **kwargs) -def search_results(mags, sizes, uploaded, local=None): +def search_results(results, local=None): columns = int(os.popen('stty size', 'r').read().split()[1]) cur_color = 'zebra_0' @@ -45,21 +45,26 @@ def search_results(mags, sizes, uploaded, local=None): 'SIZE', 'UPLOAD', 'NAME', length=columns - 52), color='header') - for m, magnet in enumerate(mags): + for n, result in enumerate(results): # Alternate between colors cur_color = 'zebra_0' if cur_color == 'zebra_1' else 'zebra_1' - name = re.search(r'dn=([^\&]*)', magnet[0]) - torrent_name = parse.unquote(name.group(1)).replace('+', ' ') + name = re.search(r'dn=([^\&]*)', result['magnet']) + torrent_name = parse.unquote_plus(name.group(1)) if local: line = '{:5} {:{length}}' - content = [m, torrent_name[:columns]] + content = [n, torrent_name[:columns]] else: - no_seeders, no_leechers = map(int, magnet[1:]) - size, unit = (float(sizes[m][0]), - sizes[m][1]) if sizes else (0, '???') - date = uploaded[m] + no_seeders = int(result['seeds']) + no_leechers = int(result['leechers']) + if result['size'] != []: + size = float(result['size'][0]) + unit = result['size'][1] + else: + size = 0 + unit = '???' + date = result['uploaded'] # compute the S/L ratio (Higher is better) try: @@ -69,17 +74,17 @@ def search_results(mags, sizes, uploaded, local=None): line = ('{:4} {:5} {:5} {:5.1f} {:5.1f}' ' {:3} {:<11} {:{length}}') - content = [m, no_seeders, no_leechers, ratio, + content = [n, no_seeders, no_leechers, ratio, size, unit, date, torrent_name[:columns - 52]] # enhanced print output with justified columns print(line.format(*content, length=columns - 52), color=cur_color) -def descriptions(chosen_links, mags, site, identifiers): +def descriptions(chosen_links, results, site): for link in chosen_links: link = int(link) - path = '/torrent/%s/' % identifiers[link] + path = '/torrent/%s/' % results[link]['id'] req = request.Request(site + path, headers=pirate.data.default_headers) req.add_header('Accept-encoding', 'gzip') f = request.urlopen(req, timeout=pirate.data.default_timeout) @@ -88,7 +93,7 @@ def descriptions(chosen_links, mags, site, identifiers): f = gzip.GzipFile(fileobj=BytesIO(f.read())) res = f.read().decode('utf-8') - name = re.search(r'dn=([^\&]*)', mags[link][0]) + name = re.search(r'dn=([^\&]*)', results[link]['magnet']) torrent_name = parse.unquote(name.group(1)).replace('+', ' ') desc = re.search(r'
\s*
(.+?)(?=
)', res, re.DOTALL).group(1) @@ -101,10 +106,11 @@ def descriptions(chosen_links, mags, site, identifiers): print(desc, color='zebra_0') -def file_lists(chosen_links, mags, site, identifiers): +def file_lists(chosen_links, results, site): for link in chosen_links: + link = int(link) path = '/ajax_details_filelist.php' - query = '?id=' + identifiers[int(link)] + query = '?id=' + results[link]['id'] req = request.Request(site + path + query, headers=pirate.data.default_headers) req.add_header('Accept-encoding', 'gzip') @@ -113,10 +119,14 @@ def file_lists(chosen_links, mags, site, identifiers): if f.info().get('Content-Encoding') == 'gzip': f = gzip.GzipFile(fileobj=BytesIO(f.read())) + # TODO: proper html decoding/parsing res = f.read().decode('utf-8').replace(' ', ' ') + if 'File list not available.' in res: + print('File list not available.') + return files = re.findall(r'\s*([^<]+?)\s*\s*([^<]+?)\s*', res) - name = re.search(r'dn=([^\&]*)', mags[int(link)][0]) + name = re.search(r'dn=([^\&]*)', results[link]['magnet']) torrent_name = parse.unquote(name.group(1)).replace('+', ' ') print('Files in "%s":' % torrent_name, color='zebra_1') diff --git a/pirate/torrent.py b/pirate/torrent.py index eb62f21..ab70e8b 100644 --- a/pirate/torrent.py +++ b/pirate/torrent.py @@ -75,6 +75,7 @@ def build_request_path(page, category, sort, mode, terms): raise Exception('Unknown mode.') +# this returns a list of dictionaries def parse_page(html): d = pq(html) @@ -97,7 +98,7 @@ def parse_page(html): d('table#searchResult tr>td:nth-child(3)'))) leechers = list(map(lambda l: pq(l).text(), d('table#searchResult tr>td:nth-child(4)'))) - identifiers = list(map(lambda l: pq(l).attr('href').split('/')[2], + ids = list(map(lambda l: pq(l).attr('href').split('/')[2], d('table#searchResult .detLink'))) sizes = [] @@ -108,14 +109,13 @@ def parse_page(html): sizes.append(re.findall(r'(?<=Size )[0-9.]+\s[KMGT]*[i ]*B', text)[0].split()) uploaded.append(re.findall(r'(?<=Uploaded ).+(?=\, Size)', text)[0]) - return list(zip(magnets,seeds,leechers)), sizes, uploaded, identifiers + titles = ('magnet', 'seeds', 'leechers', 'size', 'uploaded', 'id') + rows = list(zip(magnets, seeds, leechers, sizes, uploaded, ids)) + return [dict(zip(titles,row)) for row in rows] def remote(pages, category, sort, mode, terms, mirror): res_l = [] - sizes = [] - uploaded = [] - identifiers = [] if pages < 1: raise ValueError('Please provide an integer greater than 0 ' @@ -134,18 +134,13 @@ def remote(pages, category, sort, mode, terms, mirror): f = gzip.GzipFile(fileobj=BytesIO(f.read())) res = f.read().decode('utf-8') - page_res_l, page_sizes, page_uploaded, page_identifiers = parse_page(res) - res_l += page_res_l - sizes += page_sizes - uploaded += page_uploaded - identifiers += page_identifiers + res_l += parse_page(res) except KeyboardInterrupt: print('\nCancelled.') sys.exit(0) - # return the sizes in a separate list - return res_l, sizes, uploaded, identifiers + return res_l def get_torrent(info_hash): @@ -161,9 +156,10 @@ def get_torrent(info_hash): return torrent.read() -def save_torrents(chosen_links, mags, folder): +def save_torrents(chosen_links, results, folder): for link in chosen_links: - magnet = mags[int(link)][0] + link = int(link) + magnet = results[link]['magnet'] name = re.search(r'dn=([^\&]*)', magnet) torrent_name = parse.unquote(name.group(1)).replace('+', ' ') info_hash = int(re.search(r'btih:([a-f0-9]{40})', magnet).group(1), 16) @@ -180,7 +176,8 @@ def save_torrents(chosen_links, mags, folder): def save_magnets(chosen_links, mags, folder): for link in chosen_links: - magnet = mags[int(link)][0] + link = int(link) + magnet = results[link]['magnet'] name = re.search(r'dn=([^\&]*)', magnet) torrent_name = parse.unquote(name.group(1)).replace('+', ' ') info_hash = int(re.search(r'btih:([a-f0-9]{40})', magnet).group(1), 16) diff --git a/tests/test_torrent.py b/tests/test_torrent.py index 06c62fd..6ca7c5c 100755 --- a/tests/test_torrent.py +++ b/tests/test_torrent.py @@ -11,7 +11,7 @@ class TestTorrent(unittest.TestCase): def test_no_hits(self): res = util.read_data('no_hits.html') actual = pirate.torrent.parse_page(res) - expected = ([], [], [], []) + expected = [] self.assertEqual(actual, expected) def test_blocked_mirror(self): @@ -22,7 +22,7 @@ class TestTorrent(unittest.TestCase): def test_search_results(self): res = util.read_data('dan_bull_search.html') actual = pirate.torrent.parse_page(res) - expected = ([('magnet:?xt=urn:btih:30df4f8b42b8fd77f5e5aa34abbffe97f5e81fbf&dn=Dan+Croll+%26bull%3B+Sweet+Disarray+%5B2014%5D+320&tr=udp%3A%2F%2Ftracker.openbittorrent.com%3A80&tr=udp%3A%2F%2Fopen.demonii.com%3A1337&tr=udp%3A%2F%2Ftracker.coppersurfer.tk%3A6969&tr=udp%3A%2F%2Fexodus.desync.com%3A6969', '16', '1'), ('magnet:?xt=urn:btih:7abd3eda600996b8e6fc9a61b83288e0c6ac0d83&dn=Dan+Bull+-+Massive+Collection&tr=udp%3A%2F%2Ftracker.openbittorrent.com%3A80&tr=udp%3A%2F%2Fopen.demonii.com%3A1337&tr=udp%3A%2F%2Ftracker.coppersurfer.tk%3A6969&tr=udp%3A%2F%2Fexodus.desync.com%3A6969', '4', '0'), ('magnet:?xt=urn:btih:8f8d68fd0a51237c89692c428ed8a8f64a969c70&dn=Dan+Bull+-+Generation+Gaming+-+2013&tr=udp%3A%2F%2Ftracker.openbittorrent.com%3A80&tr=udp%3A%2F%2Fopen.demonii.com%3A1337&tr=udp%3A%2F%2Ftracker.coppersurfer.tk%3A6969&tr=udp%3A%2F%2Fexodus.desync.com%3A6969', '2', '0'), ('magnet:?xt=urn:btih:3da6a0fdc1d67a768cb32597e926abdf3e1a2fdd&dn=Dan+Bull+Collection&tr=udp%3A%2F%2Ftracker.openbittorrent.com%3A80&tr=udp%3A%2F%2Fopen.demonii.com%3A1337&tr=udp%3A%2F%2Ftracker.coppersurfer.tk%3A6969&tr=udp%3A%2F%2Fexodus.desync.com%3A6969', '1', '0'), ('magnet:?xt=urn:btih:5cd371a235317319db7da52c64422f9c2ac75d77&dn=Dan+Bull+-+The+Garden+%7B2014-Album%7D&tr=udp%3A%2F%2Ftracker.openbittorrent.com%3A80&tr=udp%3A%2F%2Fopen.demonii.com%3A1337&tr=udp%3A%2F%2Ftracker.coppersurfer.tk%3A6969&tr=udp%3A%2F%2Fexodus.desync.com%3A6969', '1', '0'), ('magnet:?xt=urn:btih:4e14dbd077c920875be4c15971b23b609ad6716a&dn=Dan+Bull+-+Dear+Lily+%5Ban+open+letter+to+Lily+Allen%5D+-+2009%5BMP3+%40&tr=udp%3A%2F%2Ftracker.openbittorrent.com%3A80&tr=udp%3A%2F%2Fopen.demonii.com%3A1337&tr=udp%3A%2F%2Ftracker.coppersurfer.tk%3A6969&tr=udp%3A%2F%2Fexodus.desync.com%3A6969', '0', '1'), ('magnet:?xt=urn:btih:5d9319cf852f7462422cb1bffc37b65174645047&dn=Dan+Bull+-+Dear+Mandy+%5Ban+open+letter+to+Lord+Mandelson%5D&tr=udp%3A%2F%2Ftracker.openbittorrent.com%3A80&tr=udp%3A%2F%2Fopen.demonii.com%3A1337&tr=udp%3A%2F%2Ftracker.coppersurfer.tk%3A6969&tr=udp%3A%2F%2Fexodus.desync.com%3A6969', '0', '0'), ('magnet:?xt=urn:btih:1c54af57426f53fdef4bbf1a9dbddf32f7b4988a&dn=Dan+Bull+-+Dear+Lily+%28Lily+Allen%29+%28Song+about+filesharing%29&tr=udp%3A%2F%2Ftracker.openbittorrent.com%3A80&tr=udp%3A%2F%2Fopen.demonii.com%3A1337&tr=udp%3A%2F%2Ftracker.coppersurfer.tk%3A6969&tr=udp%3A%2F%2Fexodus.desync.com%3A6969', '0', '0'), ('magnet:?xt=urn:btih:942c5bf3e1e9bc263939e13cea6ad7bd5f62aa36&dn=Dan+Bull+-+SOPA+Cabana.mp3&tr=udp%3A%2F%2Ftracker.openbittorrent.com%3A80&tr=udp%3A%2F%2Fopen.demonii.com%3A1337&tr=udp%3A%2F%2Ftracker.coppersurfer.tk%3A6969&tr=udp%3A%2F%2Fexodus.desync.com%3A6969', '0', '0'), ('magnet:?xt=urn:btih:d376f68a31b0db652234e790ed7256ac5e32db57&dn=Dan+Bull+-+SOPA+Cabana&tr=udp%3A%2F%2Ftracker.openbittorrent.com%3A80&tr=udp%3A%2F%2Fopen.demonii.com%3A1337&tr=udp%3A%2F%2Ftracker.coppersurfer.tk%3A6969&tr=udp%3A%2F%2Fexodus.desync.com%3A6969', '0', '1'), ('magnet:?xt=urn:btih:28163770a532eb24b9e0865878288a9bbdb7a5e6&dn=Dan+Bull+-+SOPA+Cabana+%5BWORKING%5D&tr=udp%3A%2F%2Ftracker.openbittorrent.com%3A80&tr=udp%3A%2F%2Fopen.demonii.com%3A1337&tr=udp%3A%2F%2Ftracker.coppersurfer.tk%3A6969&tr=udp%3A%2F%2Fexodus.desync.com%3A6969', '0', '1'), ('magnet:?xt=urn:btih:779ab0f13a3fbb12ba68b27721491e4d143f26eb&dn=Dan+Bull+-+Bye+Bye+BPI+2012++%5BMP3%40192%5D%28oan%29&tr=udp%3A%2F%2Ftracker.openbittorrent.com%3A80&tr=udp%3A%2F%2Fopen.demonii.com%3A1337&tr=udp%3A%2F%2Ftracker.coppersurfer.tk%3A6969&tr=udp%3A%2F%2Fexodus.desync.com%3A6969', '0', '1'), ('magnet:?xt=urn:btih:2667e4795bd5c868dedcabcb52943f4bb7212bab&dn=Dan+Bull+-+Dishonored+%5BExplicit+ver.%5D+%28Single+2012%29&tr=udp%3A%2F%2Ftracker.openbittorrent.com%3A80&tr=udp%3A%2F%2Fopen.demonii.com%3A1337&tr=udp%3A%2F%2Ftracker.coppersurfer.tk%3A6969&tr=udp%3A%2F%2Fexodus.desync.com%3A6969', '0', '0'), ('magnet:?xt=urn:btih:16364f83c556ad0fd3bb57a4a7c890e7e8087414&dn=Halo+4+EPIC+Rap+By+Dan+Bull&tr=udp%3A%2F%2Ftracker.openbittorrent.com%3A80&tr=udp%3A%2F%2Fopen.demonii.com%3A1337&tr=udp%3A%2F%2Ftracker.coppersurfer.tk%3A6969&tr=udp%3A%2F%2Fexodus.desync.com%3A6969', '0', '0'), ('magnet:?xt=urn:btih:843b466d9fd1f0bee3a476573b272dc2d6d0ebae&dn=Dan+Bull+-+Generation+Gaming+-+2013&tr=udp%3A%2F%2Ftracker.openbittorrent.com%3A80&tr=udp%3A%2F%2Fopen.demonii.com%3A1337&tr=udp%3A%2F%2Ftracker.coppersurfer.tk%3A6969&tr=udp%3A%2F%2Fexodus.desync.com%3A6969', '0', '1')], [['89.33', 'MiB'], ['294', 'MiB'], ['54.86', 'MiB'], ['236.78', 'MiB'], ['36.27', 'MiB'], ['5.51', 'MiB'], ['5.07', 'MiB'], ['5.34', 'MiB'], ['4.8', 'MiB'], ['3.4', 'MiB'], ['4.8', 'MiB'], ['60.72', 'MiB'], ['6.29', 'MiB'], ['6.41', 'MiB'], ['54.87', 'MiB']],['04-04\xa02014', '03-02\xa02014', '01-19\xa02013', '01-21\xa02010', '09-02\xa02014', '09-27\xa02009', '11-29\xa02009', '11-10\xa02011', '12-20\xa02011', '12-21\xa02011', '12-21\xa02011', '03-09\xa02012', '10-24\xa02012', '11-10\xa02012', '01-19\xa02013'], ['9890864', '9684858', '8037968', '5295449', '10954408', '5101630', '5185893', '6806996', '6901871', '6902247', '6903548', '7088979', '7756344', '7812951', '8037899']) + expected = [{'uploaded': '04-04\xa02014', 'seeds': '16', 'leechers': '1', 'id': '9890864', 'magnet': 'magnet:?xt=urn:btih:30df4f8b42b8fd77f5e5aa34abbffe97f5e81fbf&dn=Dan+Croll+%26bull%3B+Sweet+Disarray+%5B2014%5D+320&tr=udp%3A%2F%2Ftracker.openbittorrent.com%3A80&tr=udp%3A%2F%2Fopen.demonii.com%3A1337&tr=udp%3A%2F%2Ftracker.coppersurfer.tk%3A6969&tr=udp%3A%2F%2Fexodus.desync.com%3A6969', 'size': ['89.33', 'MiB']}, {'uploaded': '03-02\xa02014', 'seeds': '4', 'leechers': '0', 'id': '9684858', 'magnet': 'magnet:?xt=urn:btih:7abd3eda600996b8e6fc9a61b83288e0c6ac0d83&dn=Dan+Bull+-+Massive+Collection&tr=udp%3A%2F%2Ftracker.openbittorrent.com%3A80&tr=udp%3A%2F%2Fopen.demonii.com%3A1337&tr=udp%3A%2F%2Ftracker.coppersurfer.tk%3A6969&tr=udp%3A%2F%2Fexodus.desync.com%3A6969', 'size': ['294', 'MiB']}, {'uploaded': '01-19\xa02013', 'seeds': '2', 'leechers': '0', 'id': '8037968', 'magnet': 'magnet:?xt=urn:btih:8f8d68fd0a51237c89692c428ed8a8f64a969c70&dn=Dan+Bull+-+Generation+Gaming+-+2013&tr=udp%3A%2F%2Ftracker.openbittorrent.com%3A80&tr=udp%3A%2F%2Fopen.demonii.com%3A1337&tr=udp%3A%2F%2Ftracker.coppersurfer.tk%3A6969&tr=udp%3A%2F%2Fexodus.desync.com%3A6969', 'size': ['54.86', 'MiB']}, {'uploaded': '01-21\xa02010', 'seeds': '1', 'leechers': '0', 'id': '5295449', 'magnet': 'magnet:?xt=urn:btih:3da6a0fdc1d67a768cb32597e926abdf3e1a2fdd&dn=Dan+Bull+Collection&tr=udp%3A%2F%2Ftracker.openbittorrent.com%3A80&tr=udp%3A%2F%2Fopen.demonii.com%3A1337&tr=udp%3A%2F%2Ftracker.coppersurfer.tk%3A6969&tr=udp%3A%2F%2Fexodus.desync.com%3A6969', 'size': ['236.78', 'MiB']}, {'uploaded': '09-02\xa02014', 'seeds': '1', 'leechers': '0', 'id': '10954408', 'magnet': 'magnet:?xt=urn:btih:5cd371a235317319db7da52c64422f9c2ac75d77&dn=Dan+Bull+-+The+Garden+%7B2014-Album%7D&tr=udp%3A%2F%2Ftracker.openbittorrent.com%3A80&tr=udp%3A%2F%2Fopen.demonii.com%3A1337&tr=udp%3A%2F%2Ftracker.coppersurfer.tk%3A6969&tr=udp%3A%2F%2Fexodus.desync.com%3A6969', 'size': ['36.27', 'MiB']}, {'uploaded': '09-27\xa02009', 'seeds': '0', 'leechers': '1', 'id': '5101630', 'magnet': 'magnet:?xt=urn:btih:4e14dbd077c920875be4c15971b23b609ad6716a&dn=Dan+Bull+-+Dear+Lily+%5Ban+open+letter+to+Lily+Allen%5D+-+2009%5BMP3+%40&tr=udp%3A%2F%2Ftracker.openbittorrent.com%3A80&tr=udp%3A%2F%2Fopen.demonii.com%3A1337&tr=udp%3A%2F%2Ftracker.coppersurfer.tk%3A6969&tr=udp%3A%2F%2Fexodus.desync.com%3A6969', 'size': ['5.51', 'MiB']}, {'uploaded': '11-29\xa02009', 'seeds': '0', 'leechers': '0', 'id': '5185893', 'magnet': 'magnet:?xt=urn:btih:5d9319cf852f7462422cb1bffc37b65174645047&dn=Dan+Bull+-+Dear+Mandy+%5Ban+open+letter+to+Lord+Mandelson%5D&tr=udp%3A%2F%2Ftracker.openbittorrent.com%3A80&tr=udp%3A%2F%2Fopen.demonii.com%3A1337&tr=udp%3A%2F%2Ftracker.coppersurfer.tk%3A6969&tr=udp%3A%2F%2Fexodus.desync.com%3A6969', 'size': ['5.07', 'MiB']}, {'uploaded': '11-10\xa02011', 'seeds': '0', 'leechers': '0', 'id': '6806996', 'magnet': 'magnet:?xt=urn:btih:1c54af57426f53fdef4bbf1a9dbddf32f7b4988a&dn=Dan+Bull+-+Dear+Lily+%28Lily+Allen%29+%28Song+about+filesharing%29&tr=udp%3A%2F%2Ftracker.openbittorrent.com%3A80&tr=udp%3A%2F%2Fopen.demonii.com%3A1337&tr=udp%3A%2F%2Ftracker.coppersurfer.tk%3A6969&tr=udp%3A%2F%2Fexodus.desync.com%3A6969', 'size': ['5.34', 'MiB']}, {'uploaded': '12-20\xa02011', 'seeds': '0', 'leechers': '0', 'id': '6901871', 'magnet': 'magnet:?xt=urn:btih:942c5bf3e1e9bc263939e13cea6ad7bd5f62aa36&dn=Dan+Bull+-+SOPA+Cabana.mp3&tr=udp%3A%2F%2Ftracker.openbittorrent.com%3A80&tr=udp%3A%2F%2Fopen.demonii.com%3A1337&tr=udp%3A%2F%2Ftracker.coppersurfer.tk%3A6969&tr=udp%3A%2F%2Fexodus.desync.com%3A6969', 'size': ['4.8', 'MiB']}, {'uploaded': '12-21\xa02011', 'seeds': '0', 'leechers': '1', 'id': '6902247', 'magnet': 'magnet:?xt=urn:btih:d376f68a31b0db652234e790ed7256ac5e32db57&dn=Dan+Bull+-+SOPA+Cabana&tr=udp%3A%2F%2Ftracker.openbittorrent.com%3A80&tr=udp%3A%2F%2Fopen.demonii.com%3A1337&tr=udp%3A%2F%2Ftracker.coppersurfer.tk%3A6969&tr=udp%3A%2F%2Fexodus.desync.com%3A6969', 'size': ['3.4', 'MiB']}, {'uploaded': '12-21\xa02011', 'seeds': '0', 'leechers': '1', 'id': '6903548', 'magnet': 'magnet:?xt=urn:btih:28163770a532eb24b9e0865878288a9bbdb7a5e6&dn=Dan+Bull+-+SOPA+Cabana+%5BWORKING%5D&tr=udp%3A%2F%2Ftracker.openbittorrent.com%3A80&tr=udp%3A%2F%2Fopen.demonii.com%3A1337&tr=udp%3A%2F%2Ftracker.coppersurfer.tk%3A6969&tr=udp%3A%2F%2Fexodus.desync.com%3A6969', 'size': ['4.8', 'MiB']}, {'uploaded': '03-09\xa02012', 'seeds': '0', 'leechers': '1', 'id': '7088979', 'magnet': 'magnet:?xt=urn:btih:779ab0f13a3fbb12ba68b27721491e4d143f26eb&dn=Dan+Bull+-+Bye+Bye+BPI+2012++%5BMP3%40192%5D%28oan%29&tr=udp%3A%2F%2Ftracker.openbittorrent.com%3A80&tr=udp%3A%2F%2Fopen.demonii.com%3A1337&tr=udp%3A%2F%2Ftracker.coppersurfer.tk%3A6969&tr=udp%3A%2F%2Fexodus.desync.com%3A6969', 'size': ['60.72', 'MiB']}, {'uploaded': '10-24\xa02012', 'seeds': '0', 'leechers': '0', 'id': '7756344', 'magnet': 'magnet:?xt=urn:btih:2667e4795bd5c868dedcabcb52943f4bb7212bab&dn=Dan+Bull+-+Dishonored+%5BExplicit+ver.%5D+%28Single+2012%29&tr=udp%3A%2F%2Ftracker.openbittorrent.com%3A80&tr=udp%3A%2F%2Fopen.demonii.com%3A1337&tr=udp%3A%2F%2Ftracker.coppersurfer.tk%3A6969&tr=udp%3A%2F%2Fexodus.desync.com%3A6969', 'size': ['6.29', 'MiB']}, {'uploaded': '11-10\xa02012', 'seeds': '0', 'leechers': '0', 'id': '7812951', 'magnet': 'magnet:?xt=urn:btih:16364f83c556ad0fd3bb57a4a7c890e7e8087414&dn=Halo+4+EPIC+Rap+By+Dan+Bull&tr=udp%3A%2F%2Ftracker.openbittorrent.com%3A80&tr=udp%3A%2F%2Fopen.demonii.com%3A1337&tr=udp%3A%2F%2Ftracker.coppersurfer.tk%3A6969&tr=udp%3A%2F%2Fexodus.desync.com%3A6969', 'size': ['6.41', 'MiB']}, {'uploaded': '01-19\xa02013', 'seeds': '0', 'leechers': '1', 'id': '8037899', 'magnet': 'magnet:?xt=urn:btih:843b466d9fd1f0bee3a476573b272dc2d6d0ebae&dn=Dan+Bull+-+Generation+Gaming+-+2013&tr=udp%3A%2F%2Ftracker.openbittorrent.com%3A80&tr=udp%3A%2F%2Fopen.demonii.com%3A1337&tr=udp%3A%2F%2Ftracker.coppersurfer.tk%3A6969&tr=udp%3A%2F%2Fexodus.desync.com%3A6969', 'size': ['54.87', 'MiB']}] self.assertEqual(actual, expected) if __name__ == '__main__': From 101f7e0fb1498ffb8455d8b0f33712e385cf0c71 Mon Sep 17 00:00:00 2001 From: Viktor Stanchev Date: Thu, 3 Sep 2015 22:34:08 -0700 Subject: [PATCH 09/12] refactor results parser to be row-oriented --- pirate/torrent.py | 51 ++++++++++++++++++++++++++--------------------- 1 file changed, 28 insertions(+), 23 deletions(-) diff --git a/pirate/torrent.py b/pirate/torrent.py index ab70e8b..c0f2ff4 100644 --- a/pirate/torrent.py +++ b/pirate/torrent.py @@ -79,39 +79,44 @@ def build_request_path(page, category, sort, mode, terms): def parse_page(html): d = pq(html) - # first get the magnet links and make sure there are results - magnets = list(map(lambda l: pq(l).attr('href'), - d('table#searchResult tr>td:nth-child(2)>a:nth-child(2)'))) + results = [] + # parse the rows one by one + for row in d('table#searchResult tr'): + drow = d(row) + if len(drow('th')) > 0: + continue + + # grab info about the row + magnet = pq(drow(':eq(0)>td:nth-child(2)>a:nth-child(2)')[0]).attr('href') + seeds = pq(drow(':eq(0)>td:nth-child(3)')).text() + leechers = pq(drow(':eq(0)>td:nth-child(4)')).text() + id_ = pq(drow('.detLink')).attr('href').split('/')[2] + + # parse descriptions separately + desc_text = pq(drow('font.detDesc')[0]).text() + size = re.findall(r'(?<=Size )[0-9.]+\s[KMGT]*[i ]*B', desc_text)[0].split() + uploaded = re.findall(r'(?<=Uploaded ).+(?=\, Size)', desc_text)[0] + + results.append({ + 'magnet': magnet, + 'seeds': seeds, + 'leechers': leechers, + 'size': size, + 'uploaded': uploaded, + 'id': id_ + }) # check for a blocked mirror no_results = re.search(r'No hits\. Try adding an asterisk in ' r'you search phrase\.', html) - if len(magnets) == 0 and no_results is None: + if len(results) == 0 and no_results is None: # Contradiction - we found no results, # but the page didn't say there were no results. # The page is probably not actually the pirate bay, # so let's try another mirror raise IOError('Blocked mirror detected.') - # next get more info - seeds = list(map(lambda l: pq(l).text(), - d('table#searchResult tr>td:nth-child(3)'))) - leechers = list(map(lambda l: pq(l).text(), - d('table#searchResult tr>td:nth-child(4)'))) - ids = list(map(lambda l: pq(l).attr('href').split('/')[2], - d('table#searchResult .detLink'))) - - sizes = [] - uploaded = [] - # parse descriptions separately - for node in d('font.detDesc'): - text = pq(node).text() - sizes.append(re.findall(r'(?<=Size )[0-9.]+\s[KMGT]*[i ]*B', text)[0].split()) - uploaded.append(re.findall(r'(?<=Uploaded ).+(?=\, Size)', text)[0]) - - titles = ('magnet', 'seeds', 'leechers', 'size', 'uploaded', 'id') - rows = list(zip(magnets, seeds, leechers, sizes, uploaded, ids)) - return [dict(zip(titles,row)) for row in rows] + return results def remote(pages, category, sort, mode, terms, mirror): From 0b8a27e64e3cfb54686af11c5a4c6cc0874da618 Mon Sep 17 00:00:00 2001 From: Viktor Stanchev Date: Thu, 3 Sep 2015 22:46:06 -0700 Subject: [PATCH 10/12] test parse_cmd --- tests/test_pirate.py | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) create mode 100755 tests/test_pirate.py diff --git a/tests/test_pirate.py b/tests/test_pirate.py new file mode 100755 index 0000000..dff36af --- /dev/null +++ b/tests/test_pirate.py @@ -0,0 +1,21 @@ +#!/usr/bin/env python3 +import unittest +import pirate.pirate + + +class TestPirate(unittest.TestCase): + + def test_parse_cmd(self): + tests = [ + [['abc', ''], ['abc']], + [['abc %s', 'url'], ['abc', 'url']], + [['abc "%s"', 'url'], ['abc', 'url']], + [["abc \'%s\'", 'url'], ['abc', 'url']], + [['abc bash -c "\'%s\'"', 'url'], ['abc', 'bash', '-c', "'url'"]], + [['abc %s %s', 'url'], ['abc', 'url', 'url']], + ] + for test in tests: + self.assertEqual(pirate.pirate.parse_cmd(*test[0]), test[1]) + +if __name__ == '__main__': + unittest.main() From 383e5a101dfb52e19d652200e5b31d4441660684 Mon Sep 17 00:00:00 2001 From: Viktor Stanchev Date: Thu, 3 Sep 2015 23:00:40 -0700 Subject: [PATCH 11/12] test torrent command parser --- pirate/pirate.py | 72 +++++++++++++++++++++++--------------------- pirate/print.py | 2 -- pirate/torrent.py | 2 -- tests/test_pirate.py | 19 ++++++++++++ 4 files changed, 57 insertions(+), 38 deletions(-) diff --git a/pirate/pirate.py b/pirate/pirate.py index ab3fba8..b740b00 100755 --- a/pirate/pirate.py +++ b/pirate/pirate.py @@ -66,6 +66,43 @@ def parse_cmd(cmd, url): return ret_no_quotes +def parse_torrent_command(l): + # Very permissive handling + # Check for any occurances or d, f, p, t, m, or q + cmd_code_match = re.search(r'([hdfpmtq])', l, + flags=re.IGNORECASE) + if cmd_code_match: + code = cmd_code_match.group(0).lower() + else: + code = None + + # Clean up command codes + # Substitute multiple consecutive spaces/commas for single + # comma remove anything that isn't an integer or comma. + # Turn into list + l = re.sub(r'^[hdfp, ]*|[hdfp, ]*$', '', l) + l = re.sub('[ ,]+', ',', l) + l = re.sub('[^0-9,-]', '', l) + parsed_input = l.split(',') + + # expand ranges + choices = [] + # loop will generate a list of lists + for elem in parsed_input: + left, sep, right = elem.partition('-') + if right: + choices.append(list(range(int(left), int(right) + 1))) + elif left != '': + choices.append([int(left)]) + + # flatten list + choices = sum(choices, []) + # the current code stores the choices as strings + # instead of ints. not sure if necessary + choices = [elem for elem in choices] + return code, choices + + def main(): config = load_config() @@ -235,40 +272,7 @@ def main(): return try: - # Very permissive handling - # Check for any occurances or d, f, p, t, m, or q - cmd_code_match = re.search(r'([hdfpmtq])', l, - flags=re.IGNORECASE) - if cmd_code_match: - code = cmd_code_match.group(0).lower() - else: - code = None - - # Clean up command codes - # Substitute multiple consecutive spaces/commas for single - # comma remove anything that isn't an integer or comma. - # Turn into list - l = re.sub(r'^[hdfp, ]*|[hdfp, ]*$', '', l) - l = re.sub('[ ,]+', ',', l) - l = re.sub('[^0-9,-]', '', l) - parsed_input = l.split(',') - - # expand ranges - choices = [] - # loop will generate a list of lists - for elem in parsed_input: - left, sep, right = elem.partition('-') - if right: - choices.append(list(range(int(left), int(right) + 1))) - elif left != '': - choices.append([int(left)]) - - # flatten list - choices = sum(choices, []) - # the current code stores the choices as strings - # instead of ints. not sure if necessary - choices = [str(elem) for elem in choices] - + code, choices = parse_torrent_command(l) # Act on option, if supplied print('') if code == 'h': diff --git a/pirate/print.py b/pirate/print.py index 7e19ba3..04d3a7e 100644 --- a/pirate/print.py +++ b/pirate/print.py @@ -83,7 +83,6 @@ def search_results(results, local=None): def descriptions(chosen_links, results, site): for link in chosen_links: - link = int(link) path = '/torrent/%s/' % results[link]['id'] req = request.Request(site + path, headers=pirate.data.default_headers) req.add_header('Accept-encoding', 'gzip') @@ -108,7 +107,6 @@ def descriptions(chosen_links, results, site): def file_lists(chosen_links, results, site): for link in chosen_links: - link = int(link) path = '/ajax_details_filelist.php' query = '?id=' + results[link]['id'] req = request.Request(site + path + query, diff --git a/pirate/torrent.py b/pirate/torrent.py index c0f2ff4..3c0aa16 100644 --- a/pirate/torrent.py +++ b/pirate/torrent.py @@ -163,7 +163,6 @@ def get_torrent(info_hash): def save_torrents(chosen_links, results, folder): for link in chosen_links: - link = int(link) magnet = results[link]['magnet'] name = re.search(r'dn=([^\&]*)', magnet) torrent_name = parse.unquote(name.group(1)).replace('+', ' ') @@ -181,7 +180,6 @@ def save_torrents(chosen_links, results, folder): def save_magnets(chosen_links, mags, folder): for link in chosen_links: - link = int(link) magnet = results[link]['magnet'] name = re.search(r'dn=([^\&]*)', magnet) torrent_name = parse.unquote(name.group(1)).replace('+', ' ') diff --git a/tests/test_pirate.py b/tests/test_pirate.py index dff36af..a11f696 100755 --- a/tests/test_pirate.py +++ b/tests/test_pirate.py @@ -17,5 +17,24 @@ class TestPirate(unittest.TestCase): for test in tests: self.assertEqual(pirate.pirate.parse_cmd(*test[0]), test[1]) + def test_parse_torrent_command(self): + tests = [ + [['h'], ('h', [])], + [['q'], ('q', [])], + [['d1'], ('d', [1])], + [['f1'], ('f', [1])], + [['p1'], ('p', [1])], + [['t1'], ('t', [1])], + [['m1'], ('m', [1])], + [['d 23'], ('d', [23])], + [['d 23,1'], ('d', [23, 1])], + [['d 23, 1'], ('d', [23, 1])], + [['1d'], ('d', [1])], + [['1 ... d'], ('d', [1])], + [['1-3 d'], ('d', [1,2,3])], + ] + for test in tests: + self.assertEqual(pirate.pirate.parse_torrent_command(*test[0]), test[1]) + if __name__ == '__main__': unittest.main() From 0c2415bb47554a7e7e4b67bb883ea506fa7ab9f6 Mon Sep 17 00:00:00 2001 From: Viktor Stanchev Date: Thu, 3 Sep 2015 23:29:20 -0700 Subject: [PATCH 12/12] test printing results --- pirate/print.py | 4 +++- tests/test_print.py | 29 +++++++++++++++++++++++++++++ 2 files changed, 32 insertions(+), 1 deletion(-) create mode 100755 tests/test_print.py diff --git a/pirate/print.py b/pirate/print.py index 04d3a7e..a83b89b 100644 --- a/pirate/print.py +++ b/pirate/print.py @@ -5,6 +5,7 @@ import gzip import colorama import urllib.parse as parse import urllib.request as request +import shutil from io import BytesIO import pirate.data @@ -31,8 +32,9 @@ def print(*args, **kwargs): return builtins.print(*args, **kwargs) +# TODO: extract the name from the search results instead of the magnet link when possible def search_results(results, local=None): - columns = int(os.popen('stty size', 'r').read().split()[1]) + columns = shutil.get_terminal_size((80, 20)).columns cur_color = 'zebra_0' if local: diff --git a/tests/test_print.py b/tests/test_print.py new file mode 100755 index 0000000..438170d --- /dev/null +++ b/tests/test_print.py @@ -0,0 +1,29 @@ +#!/usr/bin/env python3 +import unittest +from unittest.mock import patch +from unittest.mock import call + +import pirate.print + + +class TestPrint(unittest.TestCase): + + def test_print_results(self): + with patch('pirate.print.print') as mock: + results = [{ + 'magnet': 'dn=name', + 'seeds': 1, + 'leechers': 2, + 'size': ['3','MiB'], + 'uploaded': 'never' + }] + pirate.print.search_results(results) + actual = mock.call_args_list + expected = [ + call('LINK SEED LEECH RATIO SIZE UPLOAD NAME ', color='header'), + call(' 0 1 2 0.5 3.0 MiB never name ', color='zebra_1'), + ] + self.assertEqual(expected, actual) + +if __name__ == '__main__': + unittest.main()