1
0
mirror of https://github.com/vikstrous/pirate-get synced 2025-01-09 09:59:51 +01:00

Unclutter main function

Why was everything inside main()?
This commit is contained in:
Rnhmjoj 2014-12-03 22:43:11 +01:00
parent edc4d4ed71
commit c823f1564e

View File

@ -32,8 +32,69 @@ import urllib.request as request
import urllib.parse as parse
from html.parser import HTMLParser
from pprint import pprint
from io import StringIO, BytesIO
from io import BytesIO
categories = {
"All":"0",
"Audio":"100",
"Audio/Music":"101",
"Audio/Audio books":"102",
"Audio/Sound clips":"103",
"Audio/FLAC":"104",
"Audio/Other":"199",
"Video":"200",
"Video/Movies":"201",
"Video/Movies DVDR":"202",
"Video/Music videos":"203",
"Video/Movie clips":"204",
"Video/TV shows":"205",
"Video/Handheld":"206",
"Video/HD - Movies":"207",
"Video/HD - TV shows":"208",
"Video/3D":"209",
"Video/Other":"299",
"Applications":"300",
"Applications/Windows":"301",
"Applications/Mac":"302",
"Applications/UNIX":"303",
"Applications/Handheld":"304",
"Applications/IOS (iPad/iPhone)":"305",
"Applications/Android":"306",
"Applications/Other OS":"399",
"Games":"400",
"Games/PC":"401",
"Games/Mac":"402",
"Games/PSx":"403",
"Games/XBOX360":"404",
"Games/Wii":"405",
"Games/Handheld":"406",
"Games/IOS (iPad/iPhone)":"407",
"Games/Android":"408",
"Games/Other":"499",
"Porn":"500",
"Porn/Movies":"501",
"Porn/Movies DVDR":"502",
"Porn/Pictures":"503",
"Porn/Games":"504",
"Porn/HD - Movies":"505",
"Porn/Movie clips":"506",
"Porn/Other":"599",
"Other":"600",
"Other/E-books":"601",
"Other/Comics":"602",
"Other/Pictures":"603",
"Other/Covers":"604",
"Other/Physibles":"605",
"Other/Other":"699"}
sorts = {
"TitleDsc": "1", "TitleAsc": "2",
"DateDsc": "3", "DateAsc": "4",
"SizeDsc": "5", "SizeAsc": "6",
"SeedersDsc": "7", "SeedersAsc": "8",
"LeechersDsc": "9", "LeechersAsc": "10",
"CategoryDsc": "13", "CategoryAsc": "14",
"Default": "99"}
class NoRedirection(request.HTTPErrorProcessor):
@ -42,6 +103,7 @@ class NoRedirection(request.HTTPErrorProcessor):
https_response = http_response
# create a subclass and override the handler methods
class MyHTMLParser(HTMLParser):
title = ''
@ -75,6 +137,237 @@ class MyHTMLParser(HTMLParser):
self.state = 'looking'
def print(*args, **kwargs):
if kwargs.get('color', False):
import colorama
colorama.init()
color_dict = {
"default": "",
"header": colorama.Back.BLACK + colorama.Fore.BLUE,
"alt": colorama.Fore.YELLOW,
"zebra_0": "",
"zebra_1": colorama.Fore.BLUE,
"WARN": colorama.Fore.MAGENTA,
"ERROR": colorama.Fore.RED}
try:
c = color_dict[kwargs.pop("color")]
args = (c + args[0],) + args[1:] + (colorama.Style.RESET_ALL,)
except KeyError as e:
pass
except IndexError as e:
pass
return builtins.print(*args, **kwargs)
else:
kwargs.pop('color', None)
return builtins.print(*args, **kwargs)
#todo: redo this with html parser instead of regex
def remote(args, mirror):
res_l = []
try:
pages = int(args.pages)
if pages < 1:
raise Exception('')
except Exception:
raise Exception("Please provide an integer greater than 0"
"for the number of pages to fetch.")
if str(args.category) in categories.values():
category = args.category
elif args.category in categories.keys():
category = categories[args.category]
else:
category = "0"
print("Invalid category ignored", color="WARN")
if str(args.sort) in sorts.values():
sort = args.sort
elif args.sort in sorts.keys():
sort = sorts[args.sort]
else:
sort = "99"
print("Invalid sort ignored", color="WARN")
# Catch the Ctrl-C exception and exit cleanly
try:
sizes = []
uploaded = []
identifiers = []
for page in range(pages):
if args.browse:
path = "/browse/"
if(category == "0"):
category = '100'
path = '/browse/' + '/'.join(str(i) for i in (
category, page, sort))
elif len(args.search) == 0:
path = "/top/48h" if args.recent else "/top/"
if(category == "0"):
path += 'all'
else:
path += category
else:
path = '/search/' + '/'.join(str(i) for i in (
"+".join(args.search),
page, sort,
category))
req = request.Request(mirror + path)
req.add_header('Accept-encoding', 'gzip')
f = request.urlopen(req)
if f.info().get('Content-Encoding') == 'gzip':
f = gzip.GzipFile(fileobj=BytesIO(f.read()))
res = f.read().decode('utf-8')
found = re.findall(r'"(magnet\:\?xt=[^"]*)|<td align="right">'
r'([^<]+)</td>', res)
# check for a blocked mirror
no_results = re.search(r"\"No hits\.", res)
if found == [] and not no_results is None:
# Contradiction - we found no results,
# but the page didn't say there were no results.
# The page is probably not actually the pirate bay,
# so let's try another mirror
raise Exception("Blocked mirror detected.")
# get sizes as well and substitute the &nbsp; character
sizes.extend([match.replace("&nbsp;", " ")
for match in re.findall(r"(?<=Size )[0-9.]"
r"+\&nbsp\;[KMGT]*[i ]*B", res)])
uploaded.extend([match.replace("&nbsp;", " ")
for match in re.findall(r"(?<=Uploaded )"
r".+(?=\, Size)",res)])
identifiers.extend([match.replace("&nbsp;", " ")
for match in re.findall("(?<=/torrent/)"
"[0-9]+(?=/)",res)])
state = "seeds"
curr = ['', 0, 0] #magnet, seeds, leeches
for f in found:
if f[1] == '':
curr[0] = f[0]
else:
if state == 'seeds':
curr[1] = f[1]
state = 'leeches'
else:
curr[2] = f[1]
state = 'seeds'
res_l.append(curr)
curr = ['', 0, 0]
except KeyboardInterrupt :
print("\nCancelled.")
sys.exit(0)
# return the sizes in a spearate list
return res_l, sizes, uploaded, identifiers
def local(args):
xml_str = ''
with open(args.database, 'r') as f:
xml_str += f.read()
htmlparser = MyHTMLParser(args.q)
htmlparser.feed(xml_str)
return htmlparser.results
# load user options, to override default ones
def config_to_load():
if os.path.isfile(os.path.expandvars('$XDG_CONFIG_HOME/pirate-get')):
return os.path.expandvars('$XDG_CONFIG_HOME/pirate-get')
else:
return os.path.expanduser('~/.config/pirate-get')
# enhanced print output with column titles
def print_search_results(mags, sizes, uploaded):
columns = int(os.popen('stty size', 'r').read().split()[1]) - 52
cur_color = "zebra_0"
print("%5s %6s %6s %-5s %-11s %-11s %-*s" \
% ( "LINK", "SEED", "LEECH", "RATIO", "SIZE", "UPLOAD", columns, "NAME"),
color="header")
for m in range(len(mags)):
magnet = mags[m]
no_seeders = int(magnet[1])
no_leechers = int(magnet[2])
name = re.search(r"dn=([^\&]*)", magnet[0])
# compute the S/L ratio (Higher is better)
try:
ratio = no_seeders/no_leechers
except ZeroDivisionError:
ratio = 0
# Alternate between colors
cur_color = "zebra_0" if (cur_color == "zebra_1") else "zebra_1"
torrent_name = parse.unquote(name.group(1)).replace("+", " ")
# enhanced print output with justified columns
print("%5d %6d %6d %5.1f %-11s %-11s %-*s" % (
m, no_seeders, no_leechers, ratio ,sizes[m],
uploaded[m], columns, torrent_name), color=cur_color)
def print_descriptions(chosen_links, mags, site, identifiers):
for link in chosen_links:
link = int(link)
path = '/torrent/%s/' % identifiers[link]
req = request.Request(site + path)
req.add_header('Accept-encoding', 'gzip')
f = request.urlopen(req)
if f.info().get('Content-Encoding') == 'gzip':
f = gzip.GzipFile(fileobj=BytesIO(f.read()))
res = f.read().decode('utf-8')
name = re.search(r"dn=([^\&]*)", mags[link][0])
torrent_name = parse.unquote(name.group(1)).replace("+", " ")
desc = re.search(r"<div class=\"nfo\">\s*<pre>(.+?)(?=</pre>)",
res, re.DOTALL).group(1)
# Replace HTML links with markdown style versions
desc = re.sub(r"<a href=\"\s*([^\"]+?)\s*\"[^>]*>(\s*)([^<]+?)(\s*"
r")</a>", r"\2[\3](\1)\4", desc)
print('Description for "' + torrent_name + '":', color="zebra_1")
print(desc, color="zebra_0")
def print_fileLists(chosen_links, mags, site, identifiers):
for link in chosen_links:
path = '/ajax_details_filelist.php'
query = '?id=' + identifiers[int(link)]
req = request.Request(site + path + query)
req.add_header('Accept-encoding', 'gzip')
f = request.urlopen(req)
if f.info().get('Content-Encoding') == 'gzip':
f = gzip.GzipFile(fileobj=BytesIO(f.read()))
res = f.read().decode('utf-8').replace("&nbsp;", " ")
files = re.findall(r"<td align=\"left\">\s*([^<]+?)\s*</td><td ali"
r"gn=\"right\">\s*([^<]+?)\s*</tr>", res)
name = re.search(r"dn=([^\&]*)", mags[int(link)][0])
torrent_name = parse.unquote(name.group(1)).replace("+", " ")
print('Files in "' + torrent_name + '":', color="zebra_1")
cur_color = "zebra_0"
for f in files:
print("%-11s %s" % (f[1], f[0]), color=cur_color)
cur_color = "zebra_0" if (cur_color == "zebra_1") else "zebra_1"
def id_generator(size=6, chars=string.ascii_uppercase + string.digits):
return ''.join(random.choice(chars) for _ in range(size))
def main():
# new ConfigParser
config = configparser.ConfigParser()
@ -84,13 +377,6 @@ def main():
config.set('SaveToFile', 'enabled', 'false')
config.set('SaveToFile', 'directory', '~/downloads/pirate-get/')
# load user options, to override default ones
def config_to_load():
if os.path.isfile(os.path.expandvars('$XDG_CONFIG_HOME/pirate-get')):
return os.path.expandvars('$XDG_CONFIG_HOME/pirate-get')
else:
return os.path.expanduser('~/.config/pirate-get')
config.read([config_to_load()])
parser = argparse.ArgumentParser(
@ -133,214 +419,8 @@ def main():
parser.add_argument('--color', dest='color',
action='store_false', default=True,
help="disable colored output")
categories = {
"All":"0",
"Audio":"100",
"Audio/Music":"101",
"Audio/Audio books":"102",
"Audio/Sound clips":"103",
"Audio/FLAC":"104",
"Audio/Other":"199",
"Video":"200",
"Video/Movies":"201",
"Video/Movies DVDR":"202",
"Video/Music videos":"203",
"Video/Movie clips":"204",
"Video/TV shows":"205",
"Video/Handheld":"206",
"Video/HD - Movies":"207",
"Video/HD - TV shows":"208",
"Video/3D":"209",
"Video/Other":"299",
"Applications":"300",
"Applications/Windows":"301",
"Applications/Mac":"302",
"Applications/UNIX":"303",
"Applications/Handheld":"304",
"Applications/IOS (iPad/iPhone)":"305",
"Applications/Android":"306",
"Applications/Other OS":"399",
"Games":"400",
"Games/PC":"401",
"Games/Mac":"402",
"Games/PSx":"403",
"Games/XBOX360":"404",
"Games/Wii":"405",
"Games/Handheld":"406",
"Games/IOS (iPad/iPhone)":"407",
"Games/Android":"408",
"Games/Other":"499",
"Porn":"500",
"Porn/Movies":"501",
"Porn/Movies DVDR":"502",
"Porn/Pictures":"503",
"Porn/Games":"504",
"Porn/HD - Movies":"505",
"Porn/Movie clips":"506",
"Porn/Other":"599",
"Other":"600",
"Other/E-books":"601",
"Other/Comics":"602",
"Other/Pictures":"603",
"Other/Covers":"604",
"Other/Physibles":"605",
"Other/Other":"699"}
sorts = {
"TitleDsc": "1", "TitleAsc": "2",
"DateDsc": "3", "DateAsc": "4",
"SizeDsc": "5", "SizeAsc": "6",
"SeedersDsc": "7", "SeedersAsc": "8",
"LeechersDsc": "9", "LeechersAsc": "10",
"CategoryDsc": "13", "CategoryAsc": "14",
"Default": "99"}
#todo: redo this with html parser instead of regex
def remote(args, mirror):
res_l = []
try:
pages = int(args.pages)
if pages < 1:
raise Exception('')
except Exception:
raise Exception("Please provide an integer greater than 0"
"for the number of pages to fetch.")
if str(args.category) in categories.values():
category = args.category;
elif args.category in categories.keys():
category = categories[args.category]
else:
category = "0";
print("Invalid category ignored", color="WARN")
if str(args.sort) in sorts.values():
sort = args.sort;
elif args.sort in sorts.keys():
sort = sorts[args.sort]
else:
sort = "99";
print("Invalid sort ignored", color="WARN")
# Catch the Ctrl-C exception and exit cleanly
try:
sizes = []
uploaded = []
identifiers = []
for page in range(pages):
if args.browse:
path = "/browse/"
if(category == "0"):
category = '100'
path = '/browse/' + '/'.join(str(i) for i in (
category, page, sort))
elif len(args.search) == 0:
path = "/top/48h" if args.recent else "/top/"
if(category == "0"):
path += 'all'
else:
path += category
else:
path = '/search/' + '/'.join(str(i) for i in (
"+".join(args.search),
page, sort,
category))
req = request.Request(mirror + path)
req.add_header('Accept-encoding', 'gzip')
f = request.urlopen(req)
if f.info().get('Content-Encoding') == 'gzip':
f = gzip.GzipFile(fileobj=BytesIO(f.read()))
res = f.read().decode('utf-8')
found = re.findall('"(magnet\:\?xt=[^"]*)|<td align="right">'
'([^<]+)</td>', res)
# check for a blocked mirror
no_results = re.search("\"No hits\.", res)
if found == [] and not no_results is None:
# Contradiction - we found no results,
# but the page didn't say there were no results.
# The page is probably not actually the pirate bay,
# so let's try another mirror
raise Exception("Blocked mirror detected.")
# get sizes as well and substitute the &nbsp; character
sizes.extend([match.replace("&nbsp;", " ")
for match in re.findall("(?<=Size )[0-9.]"
"+\&nbsp\;[KMGT]*[i ]*B", res)])
uploaded.extend([match.replace("&nbsp;", " ")
for match in re.findall("(?<=Uploaded )"
".+(?=\, Size)",res)])
identifiers.extend([match.replace("&nbsp;", " ")
for match in re.findall("(?<=/torrent/)"
"[0-9]+(?=/)",res)])
state = "seeds"
curr = ['', 0, 0] #magnet, seeds, leeches
for f in found:
if f[1] == '':
curr[0] = f[0]
else:
if state == 'seeds':
curr[1] = f[1]
state = 'leeches'
else:
curr[2] = f[1]
state = 'seeds'
res_l.append(curr)
curr = ['', 0, 0]
except KeyboardInterrupt :
print("\nCancelled.")
exit()
# return the sizes in a spearate list
return res_l, sizes, uploaded, identifiers
args = parser.parse_args()
def make_print():
if(args.color):
import colorama
colorama.init()
color_dict = {
"default": "",
"header": colorama.Back.BLACK + colorama.Fore.BLUE,
"zebra_0": "",
"zebra_1": colorama.Fore.BLUE,
"WARN": colorama.Fore.YELLOW,
"ERROR": colorama.Fore.RED}
def n_print(*args, **kwargs):
"""Print with colors"""
try:
c = color_dict[kwargs.pop("color")]
args = (c + args[0],) + args[1:] + (colorama.Style.RESET_ALL,)
except KeyError as e:
pass
except IndexError as e:
pass
return builtins.print(*args, **kwargs)
else:
def n_print(*args, **kwargs):
if("color" in kwargs):
kwargs.pop('color')
return builtins.print(*args, **kwargs)
return n_print
print=make_print()
def local(args):
xml_str = ''
with open(args.database, 'r') as f:
xml_str += f.read()
htmlparser = MyHTMLParser(args.q)
htmlparser.feed(xml_str)
return htmlparser.results
if args.list_categories:
cur_color = "zebra_0"
for key, value in sorted(categories.iteritems()) :
@ -381,85 +461,7 @@ def main():
print("no results")
return
# enhanced print output with column titles
def print_search_results():
columns = int(os.popen('stty size', 'r').read().split()[1]) - 52
cur_color = "zebra_0"
print("%5s %6s %6s %-5s %-11s %-11s %-*s" \
% ( "LINK", "SEED", "LEECH", "RATIO", "SIZE", "UPLOAD", columns, "NAME"),
color="header")
for m in range(len(mags)):
magnet = mags[m]
no_seeders = int(magnet[1])
no_leechers = int(magnet[2])
name = re.search("dn=([^\&]*)", magnet[0])
# compute the S/L ratio (Higher is better)
try:
ratio = no_seeders/no_leechers
except ZeroDivisionError:
ratio = 0
# Alternate between colors
cur_color = "zebra_0" if (cur_color == "zebra_1") else "zebra_1"
torrent_name = parse.unquote(name.group(1)).replace("+", " ")
# enhanced print output with justified columns
print("%5d %6d %6d %5.1f %-11s %-11s %-*s" % (
m, no_seeders, no_leechers, ratio ,sizes[m],
uploaded[m], columns, torrent_name), color=cur_color)
def print_descriptions(chosen_links):
for link in chosen_links:
link = int(link)
path = '/torrent/%s/' % identifiers[link]
req = request.Request(mirror + path)
req.add_header('Accept-encoding', 'gzip')
f = request.urlopen(req)
if f.info().get('Content-Encoding') == 'gzip':
f = gzip.GzipFile(fileobj=BytesIO(f.read()))
res = f.read().decode('utf-8')
name = re.search("dn=([^\&]*)", mags[link][0])
torrent_name = parse.unquote(name.group(1)).replace("+", " ")
desc = re.search(r"<div class=\"nfo\">\s*<pre>(.+?)(?=</pre>)",
res, re.DOTALL).group(1)
# Replace HTML links with markdown style versions
desc = re.sub(r"<a href=\"\s*([^\"]+?)\s*\"[^>]*>(\s*)([^<]+?)(\s*"
r")</a>", r"\2[\3](\1)\4", desc)
print('Description for "' + torrent_name + '":', color="zebra_1")
print(desc, color="zebra_0")
def print_fileLists(chosen_links):
for link in chosen_links:
path = '/ajax_details_filelist.php'
query = '?id=' + identifiers[int(link)]
req = request.Request(mirror + path + query)
req.add_header('Accept-encoding', 'gzip')
f = request.urlopen(req)
if f.info().get('Content-Encoding') == 'gzip':
f = gzip.GzipFile(fileobj=BytesIO(f.read()))
res = f.read().decode('utf-8').replace("&nbsp;", " ")
files = re.findall(r"<td align=\"left\">\s*([^<]+?)\s*</td><td ali"
r"gn=\"right\">\s*([^<]+?)\s*</tr>", res)
name = re.search("dn=([^\&]*)", mags[int(link)][0])
torrent_name = parse.unquote(name.group(1)).replace("+", " ")
print('Files in "' + torrent_name + '":', color="zebra_1")
cur_color = "zebra_0"
for f in files:
print("%-11s %s" % (f[1], f[0]), color=cur_color)
cur_color = "zebra_0" if (cur_color == "zebra_1") else "zebra_1"
print_search_results()
print_search_results(mags, sizes, uploaded)
if args.first:
print("Choosing first result");
@ -506,11 +508,11 @@ def main():
print("User Cancelled.")
sys.exit(0)
elif code == 'd':
print_descriptions(choices)
print_descriptions(choices, mags, site, identifiers)
elif code == 'f':
print_fileLists(choices)
print_fileLists(choices, mags, site, identifiers)
elif code == 'p':
print_search_results()
print_search_results(mags, sizes, uploaded)
elif not l:
print('No links entered!')
else:
@ -550,8 +552,6 @@ def main():
else:
webbrowser.open(url)
def id_generator(size=6, chars=string.ascii_uppercase + string.digits):
return ''.join(random.choice(chars) for _ in range(size))
if __name__ == "__main__":
main()