1
0
mirror of https://github.com/vikstrous/pirate-get synced 2025-01-24 12:14:20 +01:00

Unclutter main function

Why was everything inside main()?
This commit is contained in:
Rnhmjoj 2014-12-03 22:43:11 +01:00
parent edc4d4ed71
commit c823f1564e

View File

@ -32,8 +32,69 @@ import urllib.request as request
import urllib.parse as parse import urllib.parse as parse
from html.parser import HTMLParser from html.parser import HTMLParser
from pprint import pprint from io import BytesIO
from io import StringIO, BytesIO
categories = {
"All":"0",
"Audio":"100",
"Audio/Music":"101",
"Audio/Audio books":"102",
"Audio/Sound clips":"103",
"Audio/FLAC":"104",
"Audio/Other":"199",
"Video":"200",
"Video/Movies":"201",
"Video/Movies DVDR":"202",
"Video/Music videos":"203",
"Video/Movie clips":"204",
"Video/TV shows":"205",
"Video/Handheld":"206",
"Video/HD - Movies":"207",
"Video/HD - TV shows":"208",
"Video/3D":"209",
"Video/Other":"299",
"Applications":"300",
"Applications/Windows":"301",
"Applications/Mac":"302",
"Applications/UNIX":"303",
"Applications/Handheld":"304",
"Applications/IOS (iPad/iPhone)":"305",
"Applications/Android":"306",
"Applications/Other OS":"399",
"Games":"400",
"Games/PC":"401",
"Games/Mac":"402",
"Games/PSx":"403",
"Games/XBOX360":"404",
"Games/Wii":"405",
"Games/Handheld":"406",
"Games/IOS (iPad/iPhone)":"407",
"Games/Android":"408",
"Games/Other":"499",
"Porn":"500",
"Porn/Movies":"501",
"Porn/Movies DVDR":"502",
"Porn/Pictures":"503",
"Porn/Games":"504",
"Porn/HD - Movies":"505",
"Porn/Movie clips":"506",
"Porn/Other":"599",
"Other":"600",
"Other/E-books":"601",
"Other/Comics":"602",
"Other/Pictures":"603",
"Other/Covers":"604",
"Other/Physibles":"605",
"Other/Other":"699"}
sorts = {
"TitleDsc": "1", "TitleAsc": "2",
"DateDsc": "3", "DateAsc": "4",
"SizeDsc": "5", "SizeAsc": "6",
"SeedersDsc": "7", "SeedersAsc": "8",
"LeechersDsc": "9", "LeechersAsc": "10",
"CategoryDsc": "13", "CategoryAsc": "14",
"Default": "99"}
class NoRedirection(request.HTTPErrorProcessor): class NoRedirection(request.HTTPErrorProcessor):
@ -42,6 +103,7 @@ class NoRedirection(request.HTTPErrorProcessor):
https_response = http_response https_response = http_response
# create a subclass and override the handler methods # create a subclass and override the handler methods
class MyHTMLParser(HTMLParser): class MyHTMLParser(HTMLParser):
title = '' title = ''
@ -75,6 +137,237 @@ class MyHTMLParser(HTMLParser):
self.state = 'looking' self.state = 'looking'
def print(*args, **kwargs):
if kwargs.get('color', False):
import colorama
colorama.init()
color_dict = {
"default": "",
"header": colorama.Back.BLACK + colorama.Fore.BLUE,
"alt": colorama.Fore.YELLOW,
"zebra_0": "",
"zebra_1": colorama.Fore.BLUE,
"WARN": colorama.Fore.MAGENTA,
"ERROR": colorama.Fore.RED}
try:
c = color_dict[kwargs.pop("color")]
args = (c + args[0],) + args[1:] + (colorama.Style.RESET_ALL,)
except KeyError as e:
pass
except IndexError as e:
pass
return builtins.print(*args, **kwargs)
else:
kwargs.pop('color', None)
return builtins.print(*args, **kwargs)
#todo: redo this with html parser instead of regex
def remote(args, mirror):
res_l = []
try:
pages = int(args.pages)
if pages < 1:
raise Exception('')
except Exception:
raise Exception("Please provide an integer greater than 0"
"for the number of pages to fetch.")
if str(args.category) in categories.values():
category = args.category
elif args.category in categories.keys():
category = categories[args.category]
else:
category = "0"
print("Invalid category ignored", color="WARN")
if str(args.sort) in sorts.values():
sort = args.sort
elif args.sort in sorts.keys():
sort = sorts[args.sort]
else:
sort = "99"
print("Invalid sort ignored", color="WARN")
# Catch the Ctrl-C exception and exit cleanly
try:
sizes = []
uploaded = []
identifiers = []
for page in range(pages):
if args.browse:
path = "/browse/"
if(category == "0"):
category = '100'
path = '/browse/' + '/'.join(str(i) for i in (
category, page, sort))
elif len(args.search) == 0:
path = "/top/48h" if args.recent else "/top/"
if(category == "0"):
path += 'all'
else:
path += category
else:
path = '/search/' + '/'.join(str(i) for i in (
"+".join(args.search),
page, sort,
category))
req = request.Request(mirror + path)
req.add_header('Accept-encoding', 'gzip')
f = request.urlopen(req)
if f.info().get('Content-Encoding') == 'gzip':
f = gzip.GzipFile(fileobj=BytesIO(f.read()))
res = f.read().decode('utf-8')
found = re.findall(r'"(magnet\:\?xt=[^"]*)|<td align="right">'
r'([^<]+)</td>', res)
# check for a blocked mirror
no_results = re.search(r"\"No hits\.", res)
if found == [] and not no_results is None:
# Contradiction - we found no results,
# but the page didn't say there were no results.
# The page is probably not actually the pirate bay,
# so let's try another mirror
raise Exception("Blocked mirror detected.")
# get sizes as well and substitute the &nbsp; character
sizes.extend([match.replace("&nbsp;", " ")
for match in re.findall(r"(?<=Size )[0-9.]"
r"+\&nbsp\;[KMGT]*[i ]*B", res)])
uploaded.extend([match.replace("&nbsp;", " ")
for match in re.findall(r"(?<=Uploaded )"
r".+(?=\, Size)",res)])
identifiers.extend([match.replace("&nbsp;", " ")
for match in re.findall("(?<=/torrent/)"
"[0-9]+(?=/)",res)])
state = "seeds"
curr = ['', 0, 0] #magnet, seeds, leeches
for f in found:
if f[1] == '':
curr[0] = f[0]
else:
if state == 'seeds':
curr[1] = f[1]
state = 'leeches'
else:
curr[2] = f[1]
state = 'seeds'
res_l.append(curr)
curr = ['', 0, 0]
except KeyboardInterrupt :
print("\nCancelled.")
sys.exit(0)
# return the sizes in a spearate list
return res_l, sizes, uploaded, identifiers
def local(args):
xml_str = ''
with open(args.database, 'r') as f:
xml_str += f.read()
htmlparser = MyHTMLParser(args.q)
htmlparser.feed(xml_str)
return htmlparser.results
# load user options, to override default ones
def config_to_load():
if os.path.isfile(os.path.expandvars('$XDG_CONFIG_HOME/pirate-get')):
return os.path.expandvars('$XDG_CONFIG_HOME/pirate-get')
else:
return os.path.expanduser('~/.config/pirate-get')
# enhanced print output with column titles
def print_search_results(mags, sizes, uploaded):
columns = int(os.popen('stty size', 'r').read().split()[1]) - 52
cur_color = "zebra_0"
print("%5s %6s %6s %-5s %-11s %-11s %-*s" \
% ( "LINK", "SEED", "LEECH", "RATIO", "SIZE", "UPLOAD", columns, "NAME"),
color="header")
for m in range(len(mags)):
magnet = mags[m]
no_seeders = int(magnet[1])
no_leechers = int(magnet[2])
name = re.search(r"dn=([^\&]*)", magnet[0])
# compute the S/L ratio (Higher is better)
try:
ratio = no_seeders/no_leechers
except ZeroDivisionError:
ratio = 0
# Alternate between colors
cur_color = "zebra_0" if (cur_color == "zebra_1") else "zebra_1"
torrent_name = parse.unquote(name.group(1)).replace("+", " ")
# enhanced print output with justified columns
print("%5d %6d %6d %5.1f %-11s %-11s %-*s" % (
m, no_seeders, no_leechers, ratio ,sizes[m],
uploaded[m], columns, torrent_name), color=cur_color)
def print_descriptions(chosen_links, mags, site, identifiers):
for link in chosen_links:
link = int(link)
path = '/torrent/%s/' % identifiers[link]
req = request.Request(site + path)
req.add_header('Accept-encoding', 'gzip')
f = request.urlopen(req)
if f.info().get('Content-Encoding') == 'gzip':
f = gzip.GzipFile(fileobj=BytesIO(f.read()))
res = f.read().decode('utf-8')
name = re.search(r"dn=([^\&]*)", mags[link][0])
torrent_name = parse.unquote(name.group(1)).replace("+", " ")
desc = re.search(r"<div class=\"nfo\">\s*<pre>(.+?)(?=</pre>)",
res, re.DOTALL).group(1)
# Replace HTML links with markdown style versions
desc = re.sub(r"<a href=\"\s*([^\"]+?)\s*\"[^>]*>(\s*)([^<]+?)(\s*"
r")</a>", r"\2[\3](\1)\4", desc)
print('Description for "' + torrent_name + '":', color="zebra_1")
print(desc, color="zebra_0")
def print_fileLists(chosen_links, mags, site, identifiers):
for link in chosen_links:
path = '/ajax_details_filelist.php'
query = '?id=' + identifiers[int(link)]
req = request.Request(site + path + query)
req.add_header('Accept-encoding', 'gzip')
f = request.urlopen(req)
if f.info().get('Content-Encoding') == 'gzip':
f = gzip.GzipFile(fileobj=BytesIO(f.read()))
res = f.read().decode('utf-8').replace("&nbsp;", " ")
files = re.findall(r"<td align=\"left\">\s*([^<]+?)\s*</td><td ali"
r"gn=\"right\">\s*([^<]+?)\s*</tr>", res)
name = re.search(r"dn=([^\&]*)", mags[int(link)][0])
torrent_name = parse.unquote(name.group(1)).replace("+", " ")
print('Files in "' + torrent_name + '":', color="zebra_1")
cur_color = "zebra_0"
for f in files:
print("%-11s %s" % (f[1], f[0]), color=cur_color)
cur_color = "zebra_0" if (cur_color == "zebra_1") else "zebra_1"
def id_generator(size=6, chars=string.ascii_uppercase + string.digits):
return ''.join(random.choice(chars) for _ in range(size))
def main(): def main():
# new ConfigParser # new ConfigParser
config = configparser.ConfigParser() config = configparser.ConfigParser()
@ -84,13 +377,6 @@ def main():
config.set('SaveToFile', 'enabled', 'false') config.set('SaveToFile', 'enabled', 'false')
config.set('SaveToFile', 'directory', '~/downloads/pirate-get/') config.set('SaveToFile', 'directory', '~/downloads/pirate-get/')
# load user options, to override default ones
def config_to_load():
if os.path.isfile(os.path.expandvars('$XDG_CONFIG_HOME/pirate-get')):
return os.path.expandvars('$XDG_CONFIG_HOME/pirate-get')
else:
return os.path.expanduser('~/.config/pirate-get')
config.read([config_to_load()]) config.read([config_to_load()])
parser = argparse.ArgumentParser( parser = argparse.ArgumentParser(
@ -133,214 +419,8 @@ def main():
parser.add_argument('--color', dest='color', parser.add_argument('--color', dest='color',
action='store_false', default=True, action='store_false', default=True,
help="disable colored output") help="disable colored output")
categories = {
"All":"0",
"Audio":"100",
"Audio/Music":"101",
"Audio/Audio books":"102",
"Audio/Sound clips":"103",
"Audio/FLAC":"104",
"Audio/Other":"199",
"Video":"200",
"Video/Movies":"201",
"Video/Movies DVDR":"202",
"Video/Music videos":"203",
"Video/Movie clips":"204",
"Video/TV shows":"205",
"Video/Handheld":"206",
"Video/HD - Movies":"207",
"Video/HD - TV shows":"208",
"Video/3D":"209",
"Video/Other":"299",
"Applications":"300",
"Applications/Windows":"301",
"Applications/Mac":"302",
"Applications/UNIX":"303",
"Applications/Handheld":"304",
"Applications/IOS (iPad/iPhone)":"305",
"Applications/Android":"306",
"Applications/Other OS":"399",
"Games":"400",
"Games/PC":"401",
"Games/Mac":"402",
"Games/PSx":"403",
"Games/XBOX360":"404",
"Games/Wii":"405",
"Games/Handheld":"406",
"Games/IOS (iPad/iPhone)":"407",
"Games/Android":"408",
"Games/Other":"499",
"Porn":"500",
"Porn/Movies":"501",
"Porn/Movies DVDR":"502",
"Porn/Pictures":"503",
"Porn/Games":"504",
"Porn/HD - Movies":"505",
"Porn/Movie clips":"506",
"Porn/Other":"599",
"Other":"600",
"Other/E-books":"601",
"Other/Comics":"602",
"Other/Pictures":"603",
"Other/Covers":"604",
"Other/Physibles":"605",
"Other/Other":"699"}
sorts = {
"TitleDsc": "1", "TitleAsc": "2",
"DateDsc": "3", "DateAsc": "4",
"SizeDsc": "5", "SizeAsc": "6",
"SeedersDsc": "7", "SeedersAsc": "8",
"LeechersDsc": "9", "LeechersAsc": "10",
"CategoryDsc": "13", "CategoryAsc": "14",
"Default": "99"}
#todo: redo this with html parser instead of regex
def remote(args, mirror):
res_l = []
try:
pages = int(args.pages)
if pages < 1:
raise Exception('')
except Exception:
raise Exception("Please provide an integer greater than 0"
"for the number of pages to fetch.")
if str(args.category) in categories.values():
category = args.category;
elif args.category in categories.keys():
category = categories[args.category]
else:
category = "0";
print("Invalid category ignored", color="WARN")
if str(args.sort) in sorts.values():
sort = args.sort;
elif args.sort in sorts.keys():
sort = sorts[args.sort]
else:
sort = "99";
print("Invalid sort ignored", color="WARN")
# Catch the Ctrl-C exception and exit cleanly
try:
sizes = []
uploaded = []
identifiers = []
for page in range(pages):
if args.browse:
path = "/browse/"
if(category == "0"):
category = '100'
path = '/browse/' + '/'.join(str(i) for i in (
category, page, sort))
elif len(args.search) == 0:
path = "/top/48h" if args.recent else "/top/"
if(category == "0"):
path += 'all'
else:
path += category
else:
path = '/search/' + '/'.join(str(i) for i in (
"+".join(args.search),
page, sort,
category))
req = request.Request(mirror + path)
req.add_header('Accept-encoding', 'gzip')
f = request.urlopen(req)
if f.info().get('Content-Encoding') == 'gzip':
f = gzip.GzipFile(fileobj=BytesIO(f.read()))
res = f.read().decode('utf-8')
found = re.findall('"(magnet\:\?xt=[^"]*)|<td align="right">'
'([^<]+)</td>', res)
# check for a blocked mirror
no_results = re.search("\"No hits\.", res)
if found == [] and not no_results is None:
# Contradiction - we found no results,
# but the page didn't say there were no results.
# The page is probably not actually the pirate bay,
# so let's try another mirror
raise Exception("Blocked mirror detected.")
# get sizes as well and substitute the &nbsp; character
sizes.extend([match.replace("&nbsp;", " ")
for match in re.findall("(?<=Size )[0-9.]"
"+\&nbsp\;[KMGT]*[i ]*B", res)])
uploaded.extend([match.replace("&nbsp;", " ")
for match in re.findall("(?<=Uploaded )"
".+(?=\, Size)",res)])
identifiers.extend([match.replace("&nbsp;", " ")
for match in re.findall("(?<=/torrent/)"
"[0-9]+(?=/)",res)])
state = "seeds"
curr = ['', 0, 0] #magnet, seeds, leeches
for f in found:
if f[1] == '':
curr[0] = f[0]
else:
if state == 'seeds':
curr[1] = f[1]
state = 'leeches'
else:
curr[2] = f[1]
state = 'seeds'
res_l.append(curr)
curr = ['', 0, 0]
except KeyboardInterrupt :
print("\nCancelled.")
exit()
# return the sizes in a spearate list
return res_l, sizes, uploaded, identifiers
args = parser.parse_args() args = parser.parse_args()
def make_print():
if(args.color):
import colorama
colorama.init()
color_dict = {
"default": "",
"header": colorama.Back.BLACK + colorama.Fore.BLUE,
"zebra_0": "",
"zebra_1": colorama.Fore.BLUE,
"WARN": colorama.Fore.YELLOW,
"ERROR": colorama.Fore.RED}
def n_print(*args, **kwargs):
"""Print with colors"""
try:
c = color_dict[kwargs.pop("color")]
args = (c + args[0],) + args[1:] + (colorama.Style.RESET_ALL,)
except KeyError as e:
pass
except IndexError as e:
pass
return builtins.print(*args, **kwargs)
else:
def n_print(*args, **kwargs):
if("color" in kwargs):
kwargs.pop('color')
return builtins.print(*args, **kwargs)
return n_print
print=make_print()
def local(args):
xml_str = ''
with open(args.database, 'r') as f:
xml_str += f.read()
htmlparser = MyHTMLParser(args.q)
htmlparser.feed(xml_str)
return htmlparser.results
if args.list_categories: if args.list_categories:
cur_color = "zebra_0" cur_color = "zebra_0"
for key, value in sorted(categories.iteritems()) : for key, value in sorted(categories.iteritems()) :
@ -381,85 +461,7 @@ def main():
print("no results") print("no results")
return return
# enhanced print output with column titles print_search_results(mags, sizes, uploaded)
def print_search_results():
columns = int(os.popen('stty size', 'r').read().split()[1]) - 52
cur_color = "zebra_0"
print("%5s %6s %6s %-5s %-11s %-11s %-*s" \
% ( "LINK", "SEED", "LEECH", "RATIO", "SIZE", "UPLOAD", columns, "NAME"),
color="header")
for m in range(len(mags)):
magnet = mags[m]
no_seeders = int(magnet[1])
no_leechers = int(magnet[2])
name = re.search("dn=([^\&]*)", magnet[0])
# compute the S/L ratio (Higher is better)
try:
ratio = no_seeders/no_leechers
except ZeroDivisionError:
ratio = 0
# Alternate between colors
cur_color = "zebra_0" if (cur_color == "zebra_1") else "zebra_1"
torrent_name = parse.unquote(name.group(1)).replace("+", " ")
# enhanced print output with justified columns
print("%5d %6d %6d %5.1f %-11s %-11s %-*s" % (
m, no_seeders, no_leechers, ratio ,sizes[m],
uploaded[m], columns, torrent_name), color=cur_color)
def print_descriptions(chosen_links):
for link in chosen_links:
link = int(link)
path = '/torrent/%s/' % identifiers[link]
req = request.Request(mirror + path)
req.add_header('Accept-encoding', 'gzip')
f = request.urlopen(req)
if f.info().get('Content-Encoding') == 'gzip':
f = gzip.GzipFile(fileobj=BytesIO(f.read()))
res = f.read().decode('utf-8')
name = re.search("dn=([^\&]*)", mags[link][0])
torrent_name = parse.unquote(name.group(1)).replace("+", " ")
desc = re.search(r"<div class=\"nfo\">\s*<pre>(.+?)(?=</pre>)",
res, re.DOTALL).group(1)
# Replace HTML links with markdown style versions
desc = re.sub(r"<a href=\"\s*([^\"]+?)\s*\"[^>]*>(\s*)([^<]+?)(\s*"
r")</a>", r"\2[\3](\1)\4", desc)
print('Description for "' + torrent_name + '":', color="zebra_1")
print(desc, color="zebra_0")
def print_fileLists(chosen_links):
for link in chosen_links:
path = '/ajax_details_filelist.php'
query = '?id=' + identifiers[int(link)]
req = request.Request(mirror + path + query)
req.add_header('Accept-encoding', 'gzip')
f = request.urlopen(req)
if f.info().get('Content-Encoding') == 'gzip':
f = gzip.GzipFile(fileobj=BytesIO(f.read()))
res = f.read().decode('utf-8').replace("&nbsp;", " ")
files = re.findall(r"<td align=\"left\">\s*([^<]+?)\s*</td><td ali"
r"gn=\"right\">\s*([^<]+?)\s*</tr>", res)
name = re.search("dn=([^\&]*)", mags[int(link)][0])
torrent_name = parse.unquote(name.group(1)).replace("+", " ")
print('Files in "' + torrent_name + '":', color="zebra_1")
cur_color = "zebra_0"
for f in files:
print("%-11s %s" % (f[1], f[0]), color=cur_color)
cur_color = "zebra_0" if (cur_color == "zebra_1") else "zebra_1"
print_search_results()
if args.first: if args.first:
print("Choosing first result"); print("Choosing first result");
@ -506,11 +508,11 @@ def main():
print("User Cancelled.") print("User Cancelled.")
sys.exit(0) sys.exit(0)
elif code == 'd': elif code == 'd':
print_descriptions(choices) print_descriptions(choices, mags, site, identifiers)
elif code == 'f': elif code == 'f':
print_fileLists(choices) print_fileLists(choices, mags, site, identifiers)
elif code == 'p': elif code == 'p':
print_search_results() print_search_results(mags, sizes, uploaded)
elif not l: elif not l:
print('No links entered!') print('No links entered!')
else: else:
@ -550,8 +552,6 @@ def main():
else: else:
webbrowser.open(url) webbrowser.open(url)
def id_generator(size=6, chars=string.ascii_uppercase + string.digits):
return ''.join(random.choice(chars) for _ in range(size))
if __name__ == "__main__": if __name__ == "__main__":
main() main()