diff --git a/install-py3.sh b/install-py3.sh new file mode 100755 index 0000000..197e0b8 --- /dev/null +++ b/install-py3.sh @@ -0,0 +1 @@ +cp pirate-get-py3.py /usr/bin/pirate-get diff --git a/pirate-get-py3.py b/pirate-get-py3.py new file mode 100755 index 0000000..5bd01eb --- /dev/null +++ b/pirate-get-py3.py @@ -0,0 +1,112 @@ +#!/usr/bin/env python +import webbrowser +import urllib.request, urllib.parse, urllib.error +import re +from html.parser import HTMLParser +import argparse + + +# create a subclass and override the handler methods +class MyHTMLParser(HTMLParser): + title = '' + q = '' + state = 'looking' + results = [] + + def __init__(self, q): + HTMLParser.__init__(self) + self.q = q.lower() + + def handle_starttag(self, tag, attrs): + if tag == 'title': + self.state = 'title' + if tag == 'magnet' and self.state == 'matched': + self.state = 'magnet' + + def handle_data(self, data): + if self.state == 'title': + if data.lower().find(self.q) != -1: + self.title = data + self.state = 'matched' + else: + self.state = 'looking' + if self.state == 'magnet': + self.results.append(['magnet:?xt=urn:btih:' + urllib.parse.quote(data) + '&dn=' + urllib.parse.quote(self.title), '?', '?']) + self.state = 'looking' + + +def main(): + parser = argparse.ArgumentParser(description='Finds and downloads torrents from the Pirate Bay') + parser.add_argument('q', metavar='search_term', help="The term to search for") + parser.add_argument('--local', dest='database', help="An xml file containing the Pirate Bay database") + parser.add_argument('-p', dest='pages', help="The number of pages to fetch (doesn't work with --local)", default=1) + + def local(args): + xml_str = '' + with open(args.database, 'r') as f: + xml_str += f.read() + htmlparser = MyHTMLParser(args.q) + htmlparser.feed(xml_str) + return htmlparser.results + + #todo: redo this with html parser instead of regex + def remote(args): + res_l = [] + try: + pages = int(args.pages) + if pages < 1: + raise Exception('') + except Exception: + raise Exception("Please provide an integer greater than 0 for the number of pages to fetch.") + + for page in range(pages): + f = urllib.request.urlopen('http://thepiratebay.se/search/' + args.q.replace(" ", "+") + '/' + str(page) + '/7/0') + res = f.read() + found = re.findall(b""""(magnet\:\?xt=[^"]*)|