diff --git a/pirate/torrent.py b/pirate/torrent.py
index 83b6202..eb62f21 100644
--- a/pirate/torrent.py
+++ b/pirate/torrent.py
@@ -6,6 +6,8 @@ import urllib.parse as parse
import urllib.error
import os.path
+from pyquery import PyQuery as pq
+
import pirate.data
from pirate.print import print
@@ -43,25 +45,6 @@ def parse_sort(sort):
return '99'
-def parse_magnets_seeds_leechers(found):
- res = []
- state = 'seeds'
- curr = ['', 0, 0] #magnet, seeds, leeches
- for f in found:
- if f[1] == '':
- curr[0] = f[0]
- else:
- if state == 'seeds':
- curr[1] = f[1]
- state = 'leeches'
- else:
- curr[2] = f[1]
- state = 'seeds'
- res.append(curr)
- curr = ['', 0, 0]
- return res
-
-
#TODO: warn users when using a sort in a mode that doesn't accept sorts
#TODO: warn users when using search terms in a mode that doesn't accept search terms
#TODO: same with page parameter for top and top48h
@@ -92,37 +75,40 @@ def build_request_path(page, category, sort, mode, terms):
raise Exception('Unknown mode.')
-#TODO: redo this with html parser instead of regex
-def parse_page(res):
- found = re.findall(parser_regex, res)
+def parse_page(html):
+ d = pq(html)
+
+ # first get the magnet links and make sure there are results
+ magnets = list(map(lambda l: pq(l).attr('href'),
+ d('table#searchResult tr>td:nth-child(2)>a:nth-child(2)')))
# check for a blocked mirror
no_results = re.search(r'No hits\. Try adding an asterisk in '
- r'you search phrase\.', res)
- if found == [] and no_results is None:
+ r'you search phrase\.', html)
+ if len(magnets) == 0 and no_results is None:
# Contradiction - we found no results,
# but the page didn't say there were no results.
# The page is probably not actually the pirate bay,
# so let's try another mirror
raise IOError('Blocked mirror detected.')
- # get sizes as well and substitute the character
- # TODO: use actual html decode
- sizes = [match.replace(' ', ' ').split()
- for match in re.findall(r'(?<=Size )[0-9.]'
- r'+\ \;[KMGT]*[i ]*B', res)]
+ # next get more info
+ seeds = list(map(lambda l: pq(l).text(),
+ d('table#searchResult tr>td:nth-child(3)')))
+ leechers = list(map(lambda l: pq(l).text(),
+ d('table#searchResult tr>td:nth-child(4)')))
+ identifiers = list(map(lambda l: pq(l).attr('href').split('/')[2],
+ d('table#searchResult .detLink')))
- uploaded = [match.replace(' ', ' ')
- for match in re.findall(r'(?<=Uploaded )'
- r'.+(?=\, Size)',res)]
+ sizes = []
+ uploaded = []
+ # parse descriptions separately
+ for node in d('font.detDesc'):
+ text = pq(node).text()
+ sizes.append(re.findall(r'(?<=Size )[0-9.]+\s[KMGT]*[i ]*B', text)[0].split())
+ uploaded.append(re.findall(r'(?<=Uploaded ).+(?=\, Size)', text)[0])
- identifiers = [match.replace(' ', ' ')
- for match in re.findall('(?<=/torrent/)'
- '[0-9]+(?=/)',res)]
-
- res_l = parse_magnets_seeds_leechers(found)
-
- return res_l, sizes, uploaded, identifiers
+ return list(zip(magnets,seeds,leechers)), sizes, uploaded, identifiers
def remote(pages, category, sort, mode, terms, mirror):
diff --git a/setup.py b/setup.py
index 8f8f6c1..42707ef 100755
--- a/setup.py
+++ b/setup.py
@@ -13,7 +13,7 @@ setup(name='pirate-get',
entry_points={
'console_scripts': ['pirate-get = pirate.pirate:main']
},
- install_requires=['colorama>=0.3.3'],
+ install_requires=['colorama>=0.3.3', 'pyquery>=1.2.9'],
keywords=['torrent', 'magnet', 'download', 'tpb', 'client'],
classifiers=[
'Topic :: Utilities',
diff --git a/tests/data/blocked.html b/tests/data/blocked.html
new file mode 100644
index 0000000..dd4bc42
--- /dev/null
+++ b/tests/data/blocked.html
@@ -0,0 +1 @@
+blocked.
diff --git a/tests/data/no_hits.html b/tests/data/no_hits.html
new file mode 100644
index 0000000..a3a9156
--- /dev/null
+++ b/tests/data/no_hits.html
@@ -0,0 +1,200 @@
+
+
+
+ The Pirate Bay - The galaxy's most resilient bittorrent site
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ Search results: aaaaaaaaaaaaaaaaa No hits. Try adding an asterisk in you search phrase.
+
+
+
+
+
+
+
+
\ No newline at end of file
diff --git a/tests/test_torrent.py b/tests/test_torrent.py
index b166359..06c62fd 100755
--- a/tests/test_torrent.py
+++ b/tests/test_torrent.py
@@ -8,10 +8,21 @@ from tests import util
class TestTorrent(unittest.TestCase):
- def test_rich_xml(self):
+ def test_no_hits(self):
+ res = util.read_data('no_hits.html')
+ actual = pirate.torrent.parse_page(res)
+ expected = ([], [], [], [])
+ self.assertEqual(actual, expected)
+
+ def test_blocked_mirror(self):
+ res = util.read_data('blocked.html')
+ with self.assertRaises(IOError):
+ pirate.torrent.parse_page(res)
+
+ def test_search_results(self):
res = util.read_data('dan_bull_search.html')
actual = pirate.torrent.parse_page(res)
- expected = ([['magnet:?xt=urn:btih:30df4f8b42b8fd77f5e5aa34abbffe97f5e81fbf&dn=Dan+Croll+%26bull%3B+Sweet+Disarray+%5B2014%5D+320&tr=udp%3A%2F%2Ftracker.openbittorrent.com%3A80&tr=udp%3A%2F%2Fopen.demonii.com%3A1337&tr=udp%3A%2F%2Ftracker.coppersurfer.tk%3A6969&tr=udp%3A%2F%2Fexodus.desync.com%3A6969', '16', '1'], ['magnet:?xt=urn:btih:7abd3eda600996b8e6fc9a61b83288e0c6ac0d83&dn=Dan+Bull+-+Massive+Collection&tr=udp%3A%2F%2Ftracker.openbittorrent.com%3A80&tr=udp%3A%2F%2Fopen.demonii.com%3A1337&tr=udp%3A%2F%2Ftracker.coppersurfer.tk%3A6969&tr=udp%3A%2F%2Fexodus.desync.com%3A6969', '4', '0'], ['magnet:?xt=urn:btih:8f8d68fd0a51237c89692c428ed8a8f64a969c70&dn=Dan+Bull+-+Generation+Gaming+-+2013&tr=udp%3A%2F%2Ftracker.openbittorrent.com%3A80&tr=udp%3A%2F%2Fopen.demonii.com%3A1337&tr=udp%3A%2F%2Ftracker.coppersurfer.tk%3A6969&tr=udp%3A%2F%2Fexodus.desync.com%3A6969', '2', '0'], ['magnet:?xt=urn:btih:3da6a0fdc1d67a768cb32597e926abdf3e1a2fdd&dn=Dan+Bull+Collection&tr=udp%3A%2F%2Ftracker.openbittorrent.com%3A80&tr=udp%3A%2F%2Fopen.demonii.com%3A1337&tr=udp%3A%2F%2Ftracker.coppersurfer.tk%3A6969&tr=udp%3A%2F%2Fexodus.desync.com%3A6969', '1', '0'], ['magnet:?xt=urn:btih:5cd371a235317319db7da52c64422f9c2ac75d77&dn=Dan+Bull+-+The+Garden+%7B2014-Album%7D&tr=udp%3A%2F%2Ftracker.openbittorrent.com%3A80&tr=udp%3A%2F%2Fopen.demonii.com%3A1337&tr=udp%3A%2F%2Ftracker.coppersurfer.tk%3A6969&tr=udp%3A%2F%2Fexodus.desync.com%3A6969', '1', '0'], ['magnet:?xt=urn:btih:4e14dbd077c920875be4c15971b23b609ad6716a&dn=Dan+Bull+-+Dear+Lily+%5Ban+open+letter+to+Lily+Allen%5D+-+2009%5BMP3+%40&tr=udp%3A%2F%2Ftracker.openbittorrent.com%3A80&tr=udp%3A%2F%2Fopen.demonii.com%3A1337&tr=udp%3A%2F%2Ftracker.coppersurfer.tk%3A6969&tr=udp%3A%2F%2Fexodus.desync.com%3A6969', '0', '1'], ['magnet:?xt=urn:btih:5d9319cf852f7462422cb1bffc37b65174645047&dn=Dan+Bull+-+Dear+Mandy+%5Ban+open+letter+to+Lord+Mandelson%5D&tr=udp%3A%2F%2Ftracker.openbittorrent.com%3A80&tr=udp%3A%2F%2Fopen.demonii.com%3A1337&tr=udp%3A%2F%2Ftracker.coppersurfer.tk%3A6969&tr=udp%3A%2F%2Fexodus.desync.com%3A6969', '0', '0'], ['magnet:?xt=urn:btih:1c54af57426f53fdef4bbf1a9dbddf32f7b4988a&dn=Dan+Bull+-+Dear+Lily+%28Lily+Allen%29+%28Song+about+filesharing%29&tr=udp%3A%2F%2Ftracker.openbittorrent.com%3A80&tr=udp%3A%2F%2Fopen.demonii.com%3A1337&tr=udp%3A%2F%2Ftracker.coppersurfer.tk%3A6969&tr=udp%3A%2F%2Fexodus.desync.com%3A6969', '0', '0'], ['magnet:?xt=urn:btih:942c5bf3e1e9bc263939e13cea6ad7bd5f62aa36&dn=Dan+Bull+-+SOPA+Cabana.mp3&tr=udp%3A%2F%2Ftracker.openbittorrent.com%3A80&tr=udp%3A%2F%2Fopen.demonii.com%3A1337&tr=udp%3A%2F%2Ftracker.coppersurfer.tk%3A6969&tr=udp%3A%2F%2Fexodus.desync.com%3A6969', '0', '0'], ['magnet:?xt=urn:btih:d376f68a31b0db652234e790ed7256ac5e32db57&dn=Dan+Bull+-+SOPA+Cabana&tr=udp%3A%2F%2Ftracker.openbittorrent.com%3A80&tr=udp%3A%2F%2Fopen.demonii.com%3A1337&tr=udp%3A%2F%2Ftracker.coppersurfer.tk%3A6969&tr=udp%3A%2F%2Fexodus.desync.com%3A6969', '0', '1'], ['magnet:?xt=urn:btih:28163770a532eb24b9e0865878288a9bbdb7a5e6&dn=Dan+Bull+-+SOPA+Cabana+%5BWORKING%5D&tr=udp%3A%2F%2Ftracker.openbittorrent.com%3A80&tr=udp%3A%2F%2Fopen.demonii.com%3A1337&tr=udp%3A%2F%2Ftracker.coppersurfer.tk%3A6969&tr=udp%3A%2F%2Fexodus.desync.com%3A6969', '0', '1'], ['magnet:?xt=urn:btih:779ab0f13a3fbb12ba68b27721491e4d143f26eb&dn=Dan+Bull+-+Bye+Bye+BPI+2012++%5BMP3%40192%5D%28oan%29&tr=udp%3A%2F%2Ftracker.openbittorrent.com%3A80&tr=udp%3A%2F%2Fopen.demonii.com%3A1337&tr=udp%3A%2F%2Ftracker.coppersurfer.tk%3A6969&tr=udp%3A%2F%2Fexodus.desync.com%3A6969', '0', '1'], ['magnet:?xt=urn:btih:2667e4795bd5c868dedcabcb52943f4bb7212bab&dn=Dan+Bull+-+Dishonored+%5BExplicit+ver.%5D+%28Single+2012%29&tr=udp%3A%2F%2Ftracker.openbittorrent.com%3A80&tr=udp%3A%2F%2Fopen.demonii.com%3A1337&tr=udp%3A%2F%2Ftracker.coppersurfer.tk%3A6969&tr=udp%3A%2F%2Fexodus.desync.com%3A6969', '0', '0'], ['magnet:?xt=urn:btih:16364f83c556ad0fd3bb57a4a7c890e7e8087414&dn=Halo+4+EPIC+Rap+By+Dan+Bull&tr=udp%3A%2F%2Ftracker.openbittorrent.com%3A80&tr=udp%3A%2F%2Fopen.demonii.com%3A1337&tr=udp%3A%2F%2Ftracker.coppersurfer.tk%3A6969&tr=udp%3A%2F%2Fexodus.desync.com%3A6969', '0', '0'], ['magnet:?xt=urn:btih:843b466d9fd1f0bee3a476573b272dc2d6d0ebae&dn=Dan+Bull+-+Generation+Gaming+-+2013&tr=udp%3A%2F%2Ftracker.openbittorrent.com%3A80&tr=udp%3A%2F%2Fopen.demonii.com%3A1337&tr=udp%3A%2F%2Ftracker.coppersurfer.tk%3A6969&tr=udp%3A%2F%2Fexodus.desync.com%3A6969', '0', '1']], [['89.33', 'MiB'], ['294', 'MiB'], ['54.86', 'MiB'], ['236.78', 'MiB'], ['36.27', 'MiB'], ['5.51', 'MiB'], ['5.07', 'MiB'], ['5.34', 'MiB'], ['4.8', 'MiB'], ['3.4', 'MiB'], ['4.8', 'MiB'], ['60.72', 'MiB'], ['6.29', 'MiB'], ['6.41', 'MiB'], ['54.87', 'MiB']], ['04-04 2014', '03-02 2014', '01-19 2013', '01-21 2010', '09-02 2014', '09-27 2009', '11-29 2009', '11-10 2011', '12-20 2011', '12-21 2011', '12-21 2011', '03-09 2012', '10-24 2012', '11-10 2012', '01-19 2013'], ['9890864', '9684858', '8037968', '5295449', '10954408', '5101630', '5185893', '6806996', '6901871', '6902247', '6903548', '7088979', '7756344', '7812951', '8037899'])
+ expected = ([('magnet:?xt=urn:btih:30df4f8b42b8fd77f5e5aa34abbffe97f5e81fbf&dn=Dan+Croll+%26bull%3B+Sweet+Disarray+%5B2014%5D+320&tr=udp%3A%2F%2Ftracker.openbittorrent.com%3A80&tr=udp%3A%2F%2Fopen.demonii.com%3A1337&tr=udp%3A%2F%2Ftracker.coppersurfer.tk%3A6969&tr=udp%3A%2F%2Fexodus.desync.com%3A6969', '16', '1'), ('magnet:?xt=urn:btih:7abd3eda600996b8e6fc9a61b83288e0c6ac0d83&dn=Dan+Bull+-+Massive+Collection&tr=udp%3A%2F%2Ftracker.openbittorrent.com%3A80&tr=udp%3A%2F%2Fopen.demonii.com%3A1337&tr=udp%3A%2F%2Ftracker.coppersurfer.tk%3A6969&tr=udp%3A%2F%2Fexodus.desync.com%3A6969', '4', '0'), ('magnet:?xt=urn:btih:8f8d68fd0a51237c89692c428ed8a8f64a969c70&dn=Dan+Bull+-+Generation+Gaming+-+2013&tr=udp%3A%2F%2Ftracker.openbittorrent.com%3A80&tr=udp%3A%2F%2Fopen.demonii.com%3A1337&tr=udp%3A%2F%2Ftracker.coppersurfer.tk%3A6969&tr=udp%3A%2F%2Fexodus.desync.com%3A6969', '2', '0'), ('magnet:?xt=urn:btih:3da6a0fdc1d67a768cb32597e926abdf3e1a2fdd&dn=Dan+Bull+Collection&tr=udp%3A%2F%2Ftracker.openbittorrent.com%3A80&tr=udp%3A%2F%2Fopen.demonii.com%3A1337&tr=udp%3A%2F%2Ftracker.coppersurfer.tk%3A6969&tr=udp%3A%2F%2Fexodus.desync.com%3A6969', '1', '0'), ('magnet:?xt=urn:btih:5cd371a235317319db7da52c64422f9c2ac75d77&dn=Dan+Bull+-+The+Garden+%7B2014-Album%7D&tr=udp%3A%2F%2Ftracker.openbittorrent.com%3A80&tr=udp%3A%2F%2Fopen.demonii.com%3A1337&tr=udp%3A%2F%2Ftracker.coppersurfer.tk%3A6969&tr=udp%3A%2F%2Fexodus.desync.com%3A6969', '1', '0'), ('magnet:?xt=urn:btih:4e14dbd077c920875be4c15971b23b609ad6716a&dn=Dan+Bull+-+Dear+Lily+%5Ban+open+letter+to+Lily+Allen%5D+-+2009%5BMP3+%40&tr=udp%3A%2F%2Ftracker.openbittorrent.com%3A80&tr=udp%3A%2F%2Fopen.demonii.com%3A1337&tr=udp%3A%2F%2Ftracker.coppersurfer.tk%3A6969&tr=udp%3A%2F%2Fexodus.desync.com%3A6969', '0', '1'), ('magnet:?xt=urn:btih:5d9319cf852f7462422cb1bffc37b65174645047&dn=Dan+Bull+-+Dear+Mandy+%5Ban+open+letter+to+Lord+Mandelson%5D&tr=udp%3A%2F%2Ftracker.openbittorrent.com%3A80&tr=udp%3A%2F%2Fopen.demonii.com%3A1337&tr=udp%3A%2F%2Ftracker.coppersurfer.tk%3A6969&tr=udp%3A%2F%2Fexodus.desync.com%3A6969', '0', '0'), ('magnet:?xt=urn:btih:1c54af57426f53fdef4bbf1a9dbddf32f7b4988a&dn=Dan+Bull+-+Dear+Lily+%28Lily+Allen%29+%28Song+about+filesharing%29&tr=udp%3A%2F%2Ftracker.openbittorrent.com%3A80&tr=udp%3A%2F%2Fopen.demonii.com%3A1337&tr=udp%3A%2F%2Ftracker.coppersurfer.tk%3A6969&tr=udp%3A%2F%2Fexodus.desync.com%3A6969', '0', '0'), ('magnet:?xt=urn:btih:942c5bf3e1e9bc263939e13cea6ad7bd5f62aa36&dn=Dan+Bull+-+SOPA+Cabana.mp3&tr=udp%3A%2F%2Ftracker.openbittorrent.com%3A80&tr=udp%3A%2F%2Fopen.demonii.com%3A1337&tr=udp%3A%2F%2Ftracker.coppersurfer.tk%3A6969&tr=udp%3A%2F%2Fexodus.desync.com%3A6969', '0', '0'), ('magnet:?xt=urn:btih:d376f68a31b0db652234e790ed7256ac5e32db57&dn=Dan+Bull+-+SOPA+Cabana&tr=udp%3A%2F%2Ftracker.openbittorrent.com%3A80&tr=udp%3A%2F%2Fopen.demonii.com%3A1337&tr=udp%3A%2F%2Ftracker.coppersurfer.tk%3A6969&tr=udp%3A%2F%2Fexodus.desync.com%3A6969', '0', '1'), ('magnet:?xt=urn:btih:28163770a532eb24b9e0865878288a9bbdb7a5e6&dn=Dan+Bull+-+SOPA+Cabana+%5BWORKING%5D&tr=udp%3A%2F%2Ftracker.openbittorrent.com%3A80&tr=udp%3A%2F%2Fopen.demonii.com%3A1337&tr=udp%3A%2F%2Ftracker.coppersurfer.tk%3A6969&tr=udp%3A%2F%2Fexodus.desync.com%3A6969', '0', '1'), ('magnet:?xt=urn:btih:779ab0f13a3fbb12ba68b27721491e4d143f26eb&dn=Dan+Bull+-+Bye+Bye+BPI+2012++%5BMP3%40192%5D%28oan%29&tr=udp%3A%2F%2Ftracker.openbittorrent.com%3A80&tr=udp%3A%2F%2Fopen.demonii.com%3A1337&tr=udp%3A%2F%2Ftracker.coppersurfer.tk%3A6969&tr=udp%3A%2F%2Fexodus.desync.com%3A6969', '0', '1'), ('magnet:?xt=urn:btih:2667e4795bd5c868dedcabcb52943f4bb7212bab&dn=Dan+Bull+-+Dishonored+%5BExplicit+ver.%5D+%28Single+2012%29&tr=udp%3A%2F%2Ftracker.openbittorrent.com%3A80&tr=udp%3A%2F%2Fopen.demonii.com%3A1337&tr=udp%3A%2F%2Ftracker.coppersurfer.tk%3A6969&tr=udp%3A%2F%2Fexodus.desync.com%3A6969', '0', '0'), ('magnet:?xt=urn:btih:16364f83c556ad0fd3bb57a4a7c890e7e8087414&dn=Halo+4+EPIC+Rap+By+Dan+Bull&tr=udp%3A%2F%2Ftracker.openbittorrent.com%3A80&tr=udp%3A%2F%2Fopen.demonii.com%3A1337&tr=udp%3A%2F%2Ftracker.coppersurfer.tk%3A6969&tr=udp%3A%2F%2Fexodus.desync.com%3A6969', '0', '0'), ('magnet:?xt=urn:btih:843b466d9fd1f0bee3a476573b272dc2d6d0ebae&dn=Dan+Bull+-+Generation+Gaming+-+2013&tr=udp%3A%2F%2Ftracker.openbittorrent.com%3A80&tr=udp%3A%2F%2Fopen.demonii.com%3A1337&tr=udp%3A%2F%2Ftracker.coppersurfer.tk%3A6969&tr=udp%3A%2F%2Fexodus.desync.com%3A6969', '0', '1')], [['89.33', 'MiB'], ['294', 'MiB'], ['54.86', 'MiB'], ['236.78', 'MiB'], ['36.27', 'MiB'], ['5.51', 'MiB'], ['5.07', 'MiB'], ['5.34', 'MiB'], ['4.8', 'MiB'], ['3.4', 'MiB'], ['4.8', 'MiB'], ['60.72', 'MiB'], ['6.29', 'MiB'], ['6.41', 'MiB'], ['54.87', 'MiB']],['04-04\xa02014', '03-02\xa02014', '01-19\xa02013', '01-21\xa02010', '09-02\xa02014', '09-27\xa02009', '11-29\xa02009', '11-10\xa02011', '12-20\xa02011', '12-21\xa02011', '12-21\xa02011', '03-09\xa02012', '10-24\xa02012', '11-10\xa02012', '01-19\xa02013'], ['9890864', '9684858', '8037968', '5295449', '10954408', '5101630', '5185893', '6806996', '6901871', '6902247', '6903548', '7088979', '7756344', '7812951', '8037899'])
self.assertEqual(actual, expected)
if __name__ == '__main__':