From 6fd505a72221c696e390f90e2c231da2e9087f4c Mon Sep 17 00:00:00 2001 From: Viktor Stanchev Date: Thu, 21 Feb 2013 15:19:40 -0500 Subject: [PATCH] added the ability to use a local download the db --- pirate-get.py | 81 +++++++++++++++++++++++++++++++++++++++++---------- 1 file changed, 66 insertions(+), 15 deletions(-) diff --git a/pirate-get.py b/pirate-get.py index af111dc..5db4434 100755 --- a/pirate-get.py +++ b/pirate-get.py @@ -2,22 +2,73 @@ import webbrowser import urllib import urllib2 -import sys import re +from HTMLParser import HTMLParser +import argparse -if(len(sys.argv) == 1): - print "usage: pirate-get " - exit() -q = sys.argv[1] -f = urllib2.urlopen('http://thepiratebay.se/search/' + q.replace(" ", "+") + '/0/7/0') -mag = re.findall(""""(magnet\:\?xt=[^"]*)""", f.read()) +# create a subclass and override the handler methods +class MyHTMLParser(HTMLParser): + title = '' + q = '' + state = 'looking' + results = [] -if mag: - for m in range(len(mag)): - name = re.search("dn=([^\&]*)", mag[m]) - print str(m) + ".", urllib.unquote(name.group(1).encode('ascii')).decode('utf-8').replace("+", " ") - l = raw_input("Select a link: ") - webbrowser.open(mag[int(l)]) -else: - print "no results" + def __init__(self, q): + HTMLParser.__init__(self) + self.q = q.lower() + + def handle_starttag(self, tag, attrs): + if tag == 'title': + self.state = 'title' + if tag == 'magnet' and self.state == 'matched': + self.state = 'magnet' + + def handle_data(self, data): + if self.state == 'title': + if data.lower().find(self.q) != -1: + self.title = data + self.state = 'matched' + else: + self.state = 'looking' + if self.state == 'magnet': + self.results.append('magnet:?xt=urn:btih:' + urllib.quote(data) + '&dn=' + urllib.quote(self.title)) + self.state = 'looking' + + +def main(): + + parser = argparse.ArgumentParser(description='Finds and downloads torrents from the Pirate Bay') + + def local(args): + xml_str = '' + with open(args.database, 'r') as f: + xml_str += f.read() + htmlparser = MyHTMLParser(args.q) + htmlparser.feed(xml_str) + return htmlparser.results + + def remote(args): + f = urllib2.urlopen('http://thepiratebay.se/search/' + args.q.replace(" ", "+") + '/0/7/0') + return re.findall(""""(magnet\:\?xt=[^"]*)""", f.read()) + + parser.add_argument('q', metavar='search_term', help="The term to search for") + parser.add_argument('--local', dest='database', help="An xml file containing the Pirate Bay database") + + args = parser.parse_args() + if args.database: + mags = local(args) + else: + mags = remote(args) + + if mags: + for m in range(len(mags)): + name = re.search("dn=([^\&]*)", mags[m]) + print str(m) + ".", urllib.unquote(name.group(1).encode('ascii')).decode('utf-8').replace("+", " ") + l = raw_input("Select a link: ") + webbrowser.open(mags[int(l)]) + else: + print "no results" + +if __name__ == "__main__": + main()