Unclutter main function

Why was everything inside main()?
2025-05-09 04:08:39 +02:00 · 2014-12-03 22:43:11 +01:00 · 2014-12-03 22:43:11 +01:00 · c823f1564e
commit c823f1564e
parent edc4d4ed71
1 changed files with 299 additions and 299 deletions
--- a/pirate-get.py
+++ b/pirate-get.py
@ -32,109 +32,9 @@ import urllib.request as request
 import urllib.parse as parse
 from html.parser import HTMLParser
-from pprint import pprint
+from io import BytesIO
 from io import StringIO, BytesIO
-
+categories = {
 class NoRedirection(request.HTTPErrorProcessor):
    def http_response(self, request, response):
        return response
    https_response = http_response
 # create a subclass and override the handler methods
 class MyHTMLParser(HTMLParser):
    title = ''
    q = ''
    state = 'looking'
    results = []
    def __init__(self, q):
        HTMLParser.__init__(self)
        self.q = q.lower()
    def handle_starttag(self, tag, attrs):
        if tag == 'title':
            self.state = 'title'
        if tag == 'magnet' and self.state == 'matched':
            self.state = 'magnet'
    def handle_data(self, data):
        if self.state == 'title':
            if data.lower().find(self.q) != -1:
                self.title = data
                self.state = 'matched'
            else:
                self.state = 'looking'
        if self.state == 'magnet':
            self.results.append([
                'magnet:?xt=urn:btih:' +
                parse.quote(data) +
                '&dn=' +
                parse.quote(self.title), '?', '?'])
            self.state = 'looking'
 def main():
    # new ConfigParser
    config = configparser.ConfigParser()
    # default options so we dont die later
    config.add_section('SaveToFile')
    config.set('SaveToFile', 'enabled', 'false')
    config.set('SaveToFile', 'directory', '~/downloads/pirate-get/')
    # load user options, to override default ones
    def config_to_load():
        if os.path.isfile(os.path.expandvars('$XDG_CONFIG_HOME/pirate-get')):
            return os.path.expandvars('$XDG_CONFIG_HOME/pirate-get')
        else:
            return os.path.expanduser('~/.config/pirate-get')
    config.read([config_to_load()])
    parser = argparse.ArgumentParser(
                description='finds and downloads torrents from the Pirate Bay')
    parser.add_argument('-b', dest='browse',
                        action='store_true',
                        help="display in Browse mode")
    parser.add_argument('search', metavar='search',
                        nargs="*", help="term to search for")
    parser.add_argument('-c', dest='category', metavar='category',
                        help="specify a category to search", default="All")
    parser.add_argument('-s', dest='sort', metavar='sort',
                        help="specify a sort option", default="SeedersDsc")
    parser.add_argument('-R', dest='recent',  action='store_true',
                        help="torrents uploaded in the last 48hours."
                                                "*ignored in searches*")
    parser.add_argument('-l', dest='list_categories',
                        action='store_true',
                        help="list categories")
    parser.add_argument('--list_sorts', dest='list_sorts',
                        action='store_true',
                        help="list Sortable Types")
    parser.add_argument('-t', dest='transmission',
                         action='store_true',
                         help="call transmission-remote to start the download")
    parser.add_argument('--custom', dest='command',
                        help="call custom command, %%s will be replaced with"
                                                                    "the url")
    parser.add_argument('--local', dest='database',
                        help="an xml file containing the Pirate Bay database")
    parser.add_argument('-p', dest='pages', default=1,
                        help="the number of pages to fetch (doesn't work with"
                                                                    "--local)")
    parser.add_argument('-0', dest='first',
                        action='store_true',
                        help="choose the top result")
    parser.add_argument('-a', dest='download_all',
                        action='store_true',
                        help="download all results")
    parser.add_argument('--color', dest='color',
                        action='store_false', default=True,
                        help="disable colored output")
    categories = {
    "All":"0",
    "Audio":"100",
    "Audio/Music":"101",
@ -187,7 +87,7 @@ def main():
    "Other/Physibles":"605",
    "Other/Other":"699"}
-    sorts = {
+sorts = {
       "TitleDsc": "1",     "TitleAsc": "2",
        "DateDsc": "3",      "DateAsc": "4",
        "SizeDsc": "5",      "SizeAsc": "6",
@ -196,8 +96,75 @@ def main():
    "CategoryDsc": "13", "CategoryAsc": "14",
        "Default": "99"}
-    #todo: redo this with html parser instead of regex
+
-    def remote(args, mirror):
+class NoRedirection(request.HTTPErrorProcessor):
    def http_response(self, request, response):
        return response
    https_response = http_response
 # create a subclass and override the handler methods
 class MyHTMLParser(HTMLParser):
    title = ''
    q = ''
    state = 'looking'
    results = []
    def __init__(self, q):
        HTMLParser.__init__(self)
        self.q = q.lower()
    def handle_starttag(self, tag, attrs):
        if tag == 'title':
            self.state = 'title'
        if tag == 'magnet' and self.state == 'matched':
            self.state = 'magnet'
    def handle_data(self, data):
        if self.state == 'title':
            if data.lower().find(self.q) != -1:
                self.title = data
                self.state = 'matched'
            else:
                self.state = 'looking'
        if self.state == 'magnet':
            self.results.append([
                'magnet:?xt=urn:btih:' +
                parse.quote(data) +
                '&dn=' +
                parse.quote(self.title), '?', '?'])
            self.state = 'looking'
 def print(*args, **kwargs):
    if kwargs.get('color', False):
        import colorama
        colorama.init()
        color_dict = {
            "default": "",
            "header":  colorama.Back.BLACK + colorama.Fore.BLUE,
            "alt":     colorama.Fore.YELLOW,
            "zebra_0": "",
            "zebra_1": colorama.Fore.BLUE,
            "WARN":    colorama.Fore.MAGENTA,
            "ERROR":   colorama.Fore.RED}
        try:
            c = color_dict[kwargs.pop("color")]
            args = (c + args[0],) + args[1:] + (colorama.Style.RESET_ALL,)
        except KeyError as e:
            pass
        except IndexError as e:
            pass
        return builtins.print(*args, **kwargs)
    else:
        kwargs.pop('color', None)
        return builtins.print(*args, **kwargs)
 #todo: redo this with html parser instead of regex
 def remote(args, mirror):
    res_l = []
    try:
        pages = int(args.pages)
@ -208,19 +175,19 @@ def main():
                        "for the number of pages to fetch.")
    if str(args.category) in categories.values():
-            category = args.category;
+        category = args.category
    elif args.category in categories.keys():
        category = categories[args.category]
    else:
-            category = "0";
+        category = "0"
        print("Invalid category ignored", color="WARN")
    if str(args.sort) in sorts.values():
-            sort = args.sort;
+        sort = args.sort
    elif args.sort in sorts.keys():
        sort = sorts[args.sort]
    else:
-            sort = "99";
+        sort = "99"
        print("Invalid sort ignored", color="WARN")
    # Catch the Ctrl-C exception and exit cleanly
@ -253,11 +220,11 @@ def main():
            if f.info().get('Content-Encoding') == 'gzip':
                f = gzip.GzipFile(fileobj=BytesIO(f.read()))
            res = f.read().decode('utf-8')
-                found = re.findall('"(magnet\:\?xt=[^"]*)|<td align="right">'
+            found = re.findall(r'"(magnet\:\?xt=[^"]*)|<td align="right">'
-                                                         '([^<]+)</td>', res)
+                                                     r'([^<]+)</td>', res)
            # check for a blocked mirror
-                no_results = re.search("\"No hits\.", res)
+            no_results = re.search(r"\"No hits\.", res)
            if found == [] and not no_results is None:
                # Contradiction - we found no results,
                # but the page didn't say there were no results.
@ -267,12 +234,12 @@ def main():
            # get sizes as well and substitute the &nbsp; character
            sizes.extend([match.replace("&nbsp;", " ")
-                             for match in re.findall("(?<=Size )[0-9.]"
+                         for match in re.findall(r"(?<=Size )[0-9.]"
-                             "+\&nbsp\;[KMGT]*[i ]*B", res)])
+                         r"+\&nbsp\;[KMGT]*[i ]*B", res)])
            uploaded.extend([match.replace("&nbsp;", " ")
-                                for match in re.findall("(?<=Uploaded )"
+                            for match in re.findall(r"(?<=Uploaded )"
-                                ".+(?=\, Size)",res)])
+                            r".+(?=\, Size)",res)])
            identifiers.extend([match.replace("&nbsp;", " ")
                            for match in re.findall("(?<=/torrent/)"
@ -294,45 +261,13 @@ def main():
                        curr = ['', 0, 0]
    except KeyboardInterrupt :
        print("\nCancelled.")
-            exit()
+        sys.exit(0)
    # return the sizes in a spearate list
    return res_l, sizes, uploaded, identifiers
    args = parser.parse_args()
-    def make_print():
+def local(args):
        if(args.color):
            import colorama
            colorama.init()
            color_dict = {
                "default": "",
                "header":  colorama.Back.BLACK + colorama.Fore.BLUE,
                "zebra_0": "",
                "zebra_1": colorama.Fore.BLUE,
                "WARN":    colorama.Fore.YELLOW,
                "ERROR":   colorama.Fore.RED}
            def n_print(*args, **kwargs):
                """Print with colors"""
                try:
                    c = color_dict[kwargs.pop("color")]
                    args = (c + args[0],) + args[1:] + (colorama.Style.RESET_ALL,)
                except KeyError as e:
                    pass
                except IndexError as e:
                    pass
                return builtins.print(*args, **kwargs)
        else:
            def n_print(*args, **kwargs):
                if("color" in kwargs):
                    kwargs.pop('color')
                return builtins.print(*args, **kwargs)
        return n_print
    print=make_print()
    def local(args):
    xml_str = ''
    with open(args.database, 'r') as f:
        xml_str += f.read()
@ -341,6 +276,151 @@ def main():
    return htmlparser.results
 # load user options, to override default ones
 def config_to_load():
    if os.path.isfile(os.path.expandvars('$XDG_CONFIG_HOME/pirate-get')):
        return os.path.expandvars('$XDG_CONFIG_HOME/pirate-get')
    else:
        return os.path.expanduser('~/.config/pirate-get')
 # enhanced print output with column titles
 def print_search_results(mags, sizes, uploaded):
    columns = int(os.popen('stty size', 'r').read().split()[1]) - 52
    cur_color = "zebra_0"
    print("%5s %6s %6s %-5s %-11s %-11s  %-*s" \
        % ( "LINK", "SEED", "LEECH", "RATIO", "SIZE", "UPLOAD", columns, "NAME"),
        color="header")
    for m in range(len(mags)):
        magnet = mags[m]
        no_seeders = int(magnet[1])
        no_leechers = int(magnet[2])
        name = re.search(r"dn=([^\&]*)", magnet[0])
        # compute the S/L ratio (Higher is better)
        try:
            ratio = no_seeders/no_leechers
        except ZeroDivisionError:
            ratio = 0
        # Alternate between colors
        cur_color = "zebra_0" if (cur_color == "zebra_1") else "zebra_1"
        torrent_name = parse.unquote(name.group(1)).replace("+", " ")
        # enhanced print output with justified columns
        print("%5d %6d %6d %5.1f %-11s %-11s  %-*s" % (
            m, no_seeders, no_leechers, ratio ,sizes[m],
            uploaded[m], columns, torrent_name), color=cur_color)
 def print_descriptions(chosen_links, mags, site, identifiers):
    for link in chosen_links:
        link = int(link)
        path = '/torrent/%s/' % identifiers[link]
        req = request.Request(site + path)
        req.add_header('Accept-encoding', 'gzip')
        f = request.urlopen(req)
        if f.info().get('Content-Encoding') == 'gzip':
            f = gzip.GzipFile(fileobj=BytesIO(f.read()))
        res = f.read().decode('utf-8')
        name = re.search(r"dn=([^\&]*)", mags[link][0])
        torrent_name = parse.unquote(name.group(1)).replace("+", " ")
        desc = re.search(r"<div class=\"nfo\">\s*<pre>(.+?)(?=</pre>)",
                         res, re.DOTALL).group(1)
        # Replace HTML links with markdown style versions
        desc = re.sub(r"<a href=\"\s*([^\"]+?)\s*\"[^>]*>(\s*)([^<]+?)(\s*"
                      r")</a>", r"\2[\3](\1)\4", desc)
        print('Description for "' + torrent_name + '":', color="zebra_1")
        print(desc, color="zebra_0")
 def print_fileLists(chosen_links, mags, site, identifiers):
    for link in chosen_links:
        path = '/ajax_details_filelist.php'
        query = '?id=' + identifiers[int(link)]
        req = request.Request(site + path + query)
        req.add_header('Accept-encoding', 'gzip')
        f = request.urlopen(req)
        if f.info().get('Content-Encoding') == 'gzip':
            f = gzip.GzipFile(fileobj=BytesIO(f.read()))
        res = f.read().decode('utf-8').replace("&nbsp;", " ")
        files = re.findall(r"<td align=\"left\">\s*([^<]+?)\s*</td><td ali"
                           r"gn=\"right\">\s*([^<]+?)\s*</tr>", res)
        name = re.search(r"dn=([^\&]*)", mags[int(link)][0])
        torrent_name = parse.unquote(name.group(1)).replace("+", " ")
        print('Files in "' + torrent_name + '":', color="zebra_1")
        cur_color = "zebra_0"
        for f in files:
            print("%-11s  %s" % (f[1], f[0]), color=cur_color)
            cur_color = "zebra_0" if (cur_color == "zebra_1") else "zebra_1"
 def id_generator(size=6, chars=string.ascii_uppercase + string.digits):
    return ''.join(random.choice(chars) for _ in range(size))
 def main():
    # new ConfigParser
    config = configparser.ConfigParser()
    # default options so we dont die later
    config.add_section('SaveToFile')
    config.set('SaveToFile', 'enabled', 'false')
    config.set('SaveToFile', 'directory', '~/downloads/pirate-get/')
    config.read([config_to_load()])
    parser = argparse.ArgumentParser(
                description='finds and downloads torrents from the Pirate Bay')
    parser.add_argument('-b', dest='browse',
                        action='store_true',
                        help="display in Browse mode")
    parser.add_argument('search', metavar='search',
                        nargs="*", help="term to search for")
    parser.add_argument('-c', dest='category', metavar='category',
                        help="specify a category to search", default="All")
    parser.add_argument('-s', dest='sort', metavar='sort',
                        help="specify a sort option", default="SeedersDsc")
    parser.add_argument('-R', dest='recent',  action='store_true',
                        help="torrents uploaded in the last 48hours."
                                                "*ignored in searches*")
    parser.add_argument('-l', dest='list_categories',
                        action='store_true',
                        help="list categories")
    parser.add_argument('--list_sorts', dest='list_sorts',
                        action='store_true',
                        help="list Sortable Types")
    parser.add_argument('-t', dest='transmission',
                         action='store_true',
                         help="call transmission-remote to start the download")
    parser.add_argument('--custom', dest='command',
                        help="call custom command, %%s will be replaced with"
                                                                    "the url")
    parser.add_argument('--local', dest='database',
                        help="an xml file containing the Pirate Bay database")
    parser.add_argument('-p', dest='pages', default=1,
                        help="the number of pages to fetch (doesn't work with"
                                                                    "--local)")
    parser.add_argument('-0', dest='first',
                        action='store_true',
                        help="choose the top result")
    parser.add_argument('-a', dest='download_all',
                        action='store_true',
                        help="download all results")
    parser.add_argument('--color', dest='color',
                        action='store_false', default=True,
                        help="disable colored output")
    args = parser.parse_args()
    if args.list_categories:
        cur_color = "zebra_0"
        for key, value in sorted(categories.iteritems()) :
@ -381,85 +461,7 @@ def main():
        print("no results")
        return
-    # enhanced print output with column titles
+    print_search_results(mags, sizes, uploaded)
    def print_search_results():
        columns = int(os.popen('stty size', 'r').read().split()[1]) - 52
        cur_color = "zebra_0"
        print("%5s %6s %6s %-5s %-11s %-11s  %-*s" \
            % ( "LINK", "SEED", "LEECH", "RATIO", "SIZE", "UPLOAD", columns, "NAME"),
            color="header")
        for m in range(len(mags)):
            magnet = mags[m]
            no_seeders = int(magnet[1])
            no_leechers = int(magnet[2])
            name = re.search("dn=([^\&]*)", magnet[0])
            # compute the S/L ratio (Higher is better)
            try:
                ratio = no_seeders/no_leechers
            except ZeroDivisionError:
                ratio = 0
            # Alternate between colors
            cur_color = "zebra_0" if (cur_color == "zebra_1") else "zebra_1"
            torrent_name = parse.unquote(name.group(1)).replace("+", " ")
            # enhanced print output with justified columns
            print("%5d %6d %6d %5.1f %-11s %-11s  %-*s" % (
                m, no_seeders, no_leechers, ratio ,sizes[m],
                uploaded[m], columns, torrent_name), color=cur_color)
    def print_descriptions(chosen_links):
        for link in chosen_links:
            link = int(link)
            path = '/torrent/%s/' % identifiers[link]
            req = request.Request(mirror + path)
            req.add_header('Accept-encoding', 'gzip')
            f = request.urlopen(req)
            if f.info().get('Content-Encoding') == 'gzip':
                f = gzip.GzipFile(fileobj=BytesIO(f.read()))
            res = f.read().decode('utf-8')
            name = re.search("dn=([^\&]*)", mags[link][0])
            torrent_name = parse.unquote(name.group(1)).replace("+", " ")
            desc = re.search(r"<div class=\"nfo\">\s*<pre>(.+?)(?=</pre>)",
                             res, re.DOTALL).group(1)
            # Replace HTML links with markdown style versions
            desc = re.sub(r"<a href=\"\s*([^\"]+?)\s*\"[^>]*>(\s*)([^<]+?)(\s*"
                          r")</a>", r"\2[\3](\1)\4", desc)
            print('Description for "' + torrent_name + '":', color="zebra_1")
            print(desc, color="zebra_0")
    def print_fileLists(chosen_links):
        for link in chosen_links:
            path = '/ajax_details_filelist.php'
            query = '?id=' + identifiers[int(link)]
            req = request.Request(mirror + path + query)
            req.add_header('Accept-encoding', 'gzip')
            f = request.urlopen(req)
            if f.info().get('Content-Encoding') == 'gzip':
                f = gzip.GzipFile(fileobj=BytesIO(f.read()))
            res = f.read().decode('utf-8').replace("&nbsp;", " ")
            files = re.findall(r"<td align=\"left\">\s*([^<]+?)\s*</td><td ali"
                               r"gn=\"right\">\s*([^<]+?)\s*</tr>", res)
            name = re.search("dn=([^\&]*)", mags[int(link)][0])
            torrent_name = parse.unquote(name.group(1)).replace("+", " ")
            print('Files in "' + torrent_name + '":', color="zebra_1")
            cur_color = "zebra_0"
            for f in files:
                print("%-11s  %s" % (f[1], f[0]), color=cur_color)
                cur_color = "zebra_0" if (cur_color == "zebra_1") else "zebra_1"
    print_search_results()
    if args.first:
        print("Choosing first result");
@ -506,11 +508,11 @@ def main():
                    print("User Cancelled.")
                    sys.exit(0)
                elif code == 'd':
-                    print_descriptions(choices)
+                    print_descriptions(choices, mags, site, identifiers)
                elif code == 'f':
-                    print_fileLists(choices)
+                    print_fileLists(choices, mags, site, identifiers)
                elif code == 'p':
-                    print_search_results()
+                    print_search_results(mags, sizes, uploaded)
                elif not l:
                    print('No links entered!')
                else:
@ -550,8 +552,6 @@ def main():
            else:
                webbrowser.open(url)
 def id_generator(size=6, chars=string.ascii_uppercase + string.digits):
    return ''.join(random.choice(chars) for _ in range(size))
 if __name__ == "__main__":
    main()