mirror of
https://github.com/vikstrous/pirate-get
synced 2025-01-10 10:04:21 +01:00
extract parse_magnets_seeds_leachers function
This commit is contained in:
parent
4eb034e925
commit
0ad94fca46
@ -12,6 +12,9 @@ from pirate.print import print
|
|||||||
from io import BytesIO
|
from io import BytesIO
|
||||||
|
|
||||||
|
|
||||||
|
parser_regex = r'"(magnet\:\?xt=[^"]*)|<td align="right">([^<]+)</td>'
|
||||||
|
|
||||||
|
|
||||||
def parse_category(category):
|
def parse_category(category):
|
||||||
try:
|
try:
|
||||||
category = int(category)
|
category = int(category)
|
||||||
@ -40,6 +43,25 @@ def parse_sort(sort):
|
|||||||
return '99'
|
return '99'
|
||||||
|
|
||||||
|
|
||||||
|
def parse_magnets_seeds_leechers(found):
|
||||||
|
res = []
|
||||||
|
state = 'seeds'
|
||||||
|
curr = ['', 0, 0] #magnet, seeds, leeches
|
||||||
|
for f in found:
|
||||||
|
if f[1] == '':
|
||||||
|
curr[0] = f[0]
|
||||||
|
else:
|
||||||
|
if state == 'seeds':
|
||||||
|
curr[1] = f[1]
|
||||||
|
state = 'leeches'
|
||||||
|
else:
|
||||||
|
curr[2] = f[1]
|
||||||
|
state = 'seeds'
|
||||||
|
res.append(curr)
|
||||||
|
curr = ['', 0, 0]
|
||||||
|
return res
|
||||||
|
|
||||||
|
|
||||||
#TODO: redo this with html parser instead of regex
|
#TODO: redo this with html parser instead of regex
|
||||||
#TODO: warn users when using a sort in a mode that doesn't accept sorts
|
#TODO: warn users when using a sort in a mode that doesn't accept sorts
|
||||||
#TODO: warn users when using search terms in a mode that doesn't accept search terms
|
#TODO: warn users when using search terms in a mode that doesn't accept search terms
|
||||||
@ -90,8 +112,7 @@ def remote(pages, category, sort, mode, terms, mirror):
|
|||||||
if f.info().get('Content-Encoding') == 'gzip':
|
if f.info().get('Content-Encoding') == 'gzip':
|
||||||
f = gzip.GzipFile(fileobj=BytesIO(f.read()))
|
f = gzip.GzipFile(fileobj=BytesIO(f.read()))
|
||||||
res = f.read().decode('utf-8')
|
res = f.read().decode('utf-8')
|
||||||
found = re.findall(r'"(magnet\:\?xt=[^"]*)|<td align="right">'
|
found = re.findall(parser_regex, res)
|
||||||
r'([^<]+)</td>', res)
|
|
||||||
|
|
||||||
# check for a blocked mirror
|
# check for a blocked mirror
|
||||||
no_results = re.search(r'No hits\. Try adding an asterisk in '
|
no_results = re.search(r'No hits\. Try adding an asterisk in '
|
||||||
@ -104,6 +125,7 @@ def remote(pages, category, sort, mode, terms, mirror):
|
|||||||
raise IOError('Blocked mirror detected.')
|
raise IOError('Blocked mirror detected.')
|
||||||
|
|
||||||
# get sizes as well and substitute the character
|
# get sizes as well and substitute the character
|
||||||
|
# TODO: use actual html decode
|
||||||
sizes.extend([match.replace(' ', ' ').split()
|
sizes.extend([match.replace(' ', ' ').split()
|
||||||
for match in re.findall(r'(?<=Size )[0-9.]'
|
for match in re.findall(r'(?<=Size )[0-9.]'
|
||||||
r'+\ \;[KMGT]*[i ]*B', res)])
|
r'+\ \;[KMGT]*[i ]*B', res)])
|
||||||
@ -116,20 +138,8 @@ def remote(pages, category, sort, mode, terms, mirror):
|
|||||||
for match in re.findall('(?<=/torrent/)'
|
for match in re.findall('(?<=/torrent/)'
|
||||||
'[0-9]+(?=/)',res)])
|
'[0-9]+(?=/)',res)])
|
||||||
|
|
||||||
state = 'seeds'
|
res_l += parse_magnets_seeds_leechers(found)
|
||||||
curr = ['', 0, 0] #magnet, seeds, leeches
|
|
||||||
for f in found:
|
|
||||||
if f[1] == '':
|
|
||||||
curr[0] = f[0]
|
|
||||||
else:
|
|
||||||
if state == 'seeds':
|
|
||||||
curr[1] = f[1]
|
|
||||||
state = 'leeches'
|
|
||||||
else:
|
|
||||||
curr[2] = f[1]
|
|
||||||
state = 'seeds'
|
|
||||||
res_l.append(curr)
|
|
||||||
curr = ['', 0, 0]
|
|
||||||
except KeyboardInterrupt :
|
except KeyboardInterrupt :
|
||||||
print('\nCancelled.')
|
print('\nCancelled.')
|
||||||
sys.exit(0)
|
sys.exit(0)
|
||||||
@ -138,7 +148,6 @@ def remote(pages, category, sort, mode, terms, mirror):
|
|||||||
return res_l, sizes, uploaded, identifiers
|
return res_l, sizes, uploaded, identifiers
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
def get_torrent(info_hash):
|
def get_torrent(info_hash):
|
||||||
url = 'http://torcache.net/torrent/{:X}.torrent'
|
url = 'http://torcache.net/torrent/{:X}.torrent'
|
||||||
req = request.Request(url.format(info_hash),
|
req = request.Request(url.format(info_hash),
|
||||||
|
Loading…
Reference in New Issue
Block a user