1
0
mirror of https://github.com/vikstrous/pirate-get synced 2025-01-10 10:04:21 +01:00

extract parse_magnets_seeds_leachers function

This commit is contained in:
Viktor Stanchev 2015-09-03 00:25:17 -07:00
parent 4eb034e925
commit 0ad94fca46

View File

@ -12,6 +12,9 @@ from pirate.print import print
from io import BytesIO from io import BytesIO
parser_regex = r'"(magnet\:\?xt=[^"]*)|<td align="right">([^<]+)</td>'
def parse_category(category): def parse_category(category):
try: try:
category = int(category) category = int(category)
@ -40,6 +43,25 @@ def parse_sort(sort):
return '99' return '99'
def parse_magnets_seeds_leechers(found):
res = []
state = 'seeds'
curr = ['', 0, 0] #magnet, seeds, leeches
for f in found:
if f[1] == '':
curr[0] = f[0]
else:
if state == 'seeds':
curr[1] = f[1]
state = 'leeches'
else:
curr[2] = f[1]
state = 'seeds'
res.append(curr)
curr = ['', 0, 0]
return res
#TODO: redo this with html parser instead of regex #TODO: redo this with html parser instead of regex
#TODO: warn users when using a sort in a mode that doesn't accept sorts #TODO: warn users when using a sort in a mode that doesn't accept sorts
#TODO: warn users when using search terms in a mode that doesn't accept search terms #TODO: warn users when using search terms in a mode that doesn't accept search terms
@ -90,8 +112,7 @@ def remote(pages, category, sort, mode, terms, mirror):
if f.info().get('Content-Encoding') == 'gzip': if f.info().get('Content-Encoding') == 'gzip':
f = gzip.GzipFile(fileobj=BytesIO(f.read())) f = gzip.GzipFile(fileobj=BytesIO(f.read()))
res = f.read().decode('utf-8') res = f.read().decode('utf-8')
found = re.findall(r'"(magnet\:\?xt=[^"]*)|<td align="right">' found = re.findall(parser_regex, res)
r'([^<]+)</td>', res)
# check for a blocked mirror # check for a blocked mirror
no_results = re.search(r'No hits\. Try adding an asterisk in ' no_results = re.search(r'No hits\. Try adding an asterisk in '
@ -104,6 +125,7 @@ def remote(pages, category, sort, mode, terms, mirror):
raise IOError('Blocked mirror detected.') raise IOError('Blocked mirror detected.')
# get sizes as well and substitute the &nbsp; character # get sizes as well and substitute the &nbsp; character
# TODO: use actual html decode
sizes.extend([match.replace('&nbsp;', ' ').split() sizes.extend([match.replace('&nbsp;', ' ').split()
for match in re.findall(r'(?<=Size )[0-9.]' for match in re.findall(r'(?<=Size )[0-9.]'
r'+\&nbsp\;[KMGT]*[i ]*B', res)]) r'+\&nbsp\;[KMGT]*[i ]*B', res)])
@ -116,20 +138,8 @@ def remote(pages, category, sort, mode, terms, mirror):
for match in re.findall('(?<=/torrent/)' for match in re.findall('(?<=/torrent/)'
'[0-9]+(?=/)',res)]) '[0-9]+(?=/)',res)])
state = 'seeds' res_l += parse_magnets_seeds_leechers(found)
curr = ['', 0, 0] #magnet, seeds, leeches
for f in found:
if f[1] == '':
curr[0] = f[0]
else:
if state == 'seeds':
curr[1] = f[1]
state = 'leeches'
else:
curr[2] = f[1]
state = 'seeds'
res_l.append(curr)
curr = ['', 0, 0]
except KeyboardInterrupt : except KeyboardInterrupt :
print('\nCancelled.') print('\nCancelled.')
sys.exit(0) sys.exit(0)
@ -138,7 +148,6 @@ def remote(pages, category, sort, mode, terms, mirror):
return res_l, sizes, uploaded, identifiers return res_l, sizes, uploaded, identifiers
def get_torrent(info_hash): def get_torrent(info_hash):
url = 'http://torcache.net/torrent/{:X}.torrent' url = 'http://torcache.net/torrent/{:X}.torrent'
req = request.Request(url.format(info_hash), req = request.Request(url.format(info_hash),