diff --git a/pirate/torrent.py b/pirate/torrent.py
index 413e1bc..e2e7eca 100644
--- a/pirate/torrent.py
+++ b/pirate/torrent.py
@@ -12,6 +12,9 @@ from pirate.print import print
from io import BytesIO
+parser_regex = r'"(magnet\:\?xt=[^"]*)|
([^<]+) | '
+
+
def parse_category(category):
try:
category = int(category)
@@ -40,6 +43,25 @@ def parse_sort(sort):
return '99'
+def parse_magnets_seeds_leechers(found):
+ res = []
+ state = 'seeds'
+ curr = ['', 0, 0] #magnet, seeds, leeches
+ for f in found:
+ if f[1] == '':
+ curr[0] = f[0]
+ else:
+ if state == 'seeds':
+ curr[1] = f[1]
+ state = 'leeches'
+ else:
+ curr[2] = f[1]
+ state = 'seeds'
+ res.append(curr)
+ curr = ['', 0, 0]
+ return res
+
+
#TODO: redo this with html parser instead of regex
#TODO: warn users when using a sort in a mode that doesn't accept sorts
#TODO: warn users when using search terms in a mode that doesn't accept search terms
@@ -90,8 +112,7 @@ def remote(pages, category, sort, mode, terms, mirror):
if f.info().get('Content-Encoding') == 'gzip':
f = gzip.GzipFile(fileobj=BytesIO(f.read()))
res = f.read().decode('utf-8')
- found = re.findall(r'"(magnet\:\?xt=[^"]*)|'
- r'([^<]+) | ', res)
+ found = re.findall(parser_regex, res)
# check for a blocked mirror
no_results = re.search(r'No hits\. Try adding an asterisk in '
@@ -104,6 +125,7 @@ def remote(pages, category, sort, mode, terms, mirror):
raise IOError('Blocked mirror detected.')
# get sizes as well and substitute the character
+ # TODO: use actual html decode
sizes.extend([match.replace(' ', ' ').split()
for match in re.findall(r'(?<=Size )[0-9.]'
r'+\ \;[KMGT]*[i ]*B', res)])
@@ -116,20 +138,8 @@ def remote(pages, category, sort, mode, terms, mirror):
for match in re.findall('(?<=/torrent/)'
'[0-9]+(?=/)',res)])
- state = 'seeds'
- curr = ['', 0, 0] #magnet, seeds, leeches
- for f in found:
- if f[1] == '':
- curr[0] = f[0]
- else:
- if state == 'seeds':
- curr[1] = f[1]
- state = 'leeches'
- else:
- curr[2] = f[1]
- state = 'seeds'
- res_l.append(curr)
- curr = ['', 0, 0]
+ res_l += parse_magnets_seeds_leechers(found)
+
except KeyboardInterrupt :
print('\nCancelled.')
sys.exit(0)
@@ -138,7 +148,6 @@ def remote(pages, category, sort, mode, terms, mirror):
return res_l, sizes, uploaded, identifiers
-
def get_torrent(info_hash):
url = 'http://torcache.net/torrent/{:X}.torrent'
req = request.Request(url.format(info_hash),