2012-11-16 08:52:09 +01:00
|
|
|
#!/usr/bin/env python
|
|
|
|
import webbrowser
|
|
|
|
import urllib
|
|
|
|
import urllib2
|
|
|
|
import re
|
2013-02-21 21:19:40 +01:00
|
|
|
from HTMLParser import HTMLParser
|
|
|
|
import argparse
|
2013-12-10 14:41:56 +01:00
|
|
|
from pprint import pprint
|
2012-11-16 08:52:09 +01:00
|
|
|
|
2013-02-21 21:19:40 +01:00
|
|
|
|
|
|
|
# create a subclass and override the handler methods
|
|
|
|
class MyHTMLParser(HTMLParser):
|
|
|
|
title = ''
|
|
|
|
q = ''
|
|
|
|
state = 'looking'
|
|
|
|
results = []
|
|
|
|
|
|
|
|
def __init__(self, q):
|
|
|
|
HTMLParser.__init__(self)
|
|
|
|
self.q = q.lower()
|
|
|
|
|
|
|
|
def handle_starttag(self, tag, attrs):
|
|
|
|
if tag == 'title':
|
|
|
|
self.state = 'title'
|
|
|
|
if tag == 'magnet' and self.state == 'matched':
|
|
|
|
self.state = 'magnet'
|
|
|
|
|
|
|
|
def handle_data(self, data):
|
|
|
|
if self.state == 'title':
|
|
|
|
if data.lower().find(self.q) != -1:
|
|
|
|
self.title = data
|
|
|
|
self.state = 'matched'
|
|
|
|
else:
|
|
|
|
self.state = 'looking'
|
|
|
|
if self.state == 'magnet':
|
2013-02-26 22:48:02 +01:00
|
|
|
self.results.append(['magnet:?xt=urn:btih:' + urllib.quote(data) + '&dn=' + urllib.quote(self.title), '?', '?'])
|
2013-02-21 21:19:40 +01:00
|
|
|
self.state = 'looking'
|
|
|
|
|
|
|
|
|
|
|
|
def main():
|
|
|
|
parser = argparse.ArgumentParser(description='Finds and downloads torrents from the Pirate Bay')
|
2013-02-21 21:31:19 +01:00
|
|
|
parser.add_argument('q', metavar='search_term', help="The term to search for")
|
|
|
|
parser.add_argument('--local', dest='database', help="An xml file containing the Pirate Bay database")
|
2013-02-27 18:14:39 +01:00
|
|
|
parser.add_argument('-p', dest='pages', help="The number of pages to fetch (doesn't work with --local)", default=1)
|
2013-02-21 21:19:40 +01:00
|
|
|
|
|
|
|
def local(args):
|
|
|
|
xml_str = ''
|
|
|
|
with open(args.database, 'r') as f:
|
|
|
|
xml_str += f.read()
|
|
|
|
htmlparser = MyHTMLParser(args.q)
|
|
|
|
htmlparser.feed(xml_str)
|
|
|
|
return htmlparser.results
|
|
|
|
|
2013-02-26 22:48:02 +01:00
|
|
|
#todo: redo this with html parser instead of regex
|
2013-02-21 21:19:40 +01:00
|
|
|
def remote(args):
|
2013-02-26 22:48:02 +01:00
|
|
|
res_l = []
|
2013-02-27 18:14:39 +01:00
|
|
|
try:
|
|
|
|
pages = int(args.pages)
|
|
|
|
if pages < 1:
|
|
|
|
raise Exception('')
|
|
|
|
except Exception:
|
|
|
|
raise Exception("Please provide an integer greater than 0 for the number of pages to fetch.")
|
|
|
|
|
2013-11-11 22:42:51 +01:00
|
|
|
# Catch the Ctrl-C exception and exit cleanly
|
|
|
|
try:
|
|
|
|
for page in xrange(pages):
|
|
|
|
f = urllib2.urlopen('http://thepiratebay.se/search/' + args.q.replace(" ", "+") + '/' + str(page) + '/7/0')
|
|
|
|
res = f.read()
|
|
|
|
found = re.findall(""""(magnet\:\?xt=[^"]*)|<td align="right">([^<]+)</td>""", res)
|
|
|
|
|
|
|
|
# get sizes as well and substitute the character
|
2013-11-12 11:33:32 +01:00
|
|
|
# print res
|
2013-12-10 14:41:56 +01:00
|
|
|
sizes = [ match.replace(" ", " ") for match in re.findall("(?<=Size )[0-9.]+\ \;[KMGT]*[i ]*B",res) ]
|
2013-11-12 11:53:17 +01:00
|
|
|
uploaded = [ match.replace(" ", " ") for match in re.findall("(?<=Uploaded ).+(?=\, Size)",res) ]
|
2013-12-10 14:41:56 +01:00
|
|
|
# pprint(sizes); print len(sizes)
|
|
|
|
# pprint(uploaded); print len(uploaded)
|
2013-11-11 22:42:51 +01:00
|
|
|
state = "seeds"
|
|
|
|
curr = ['',0,0] #magnet, seeds, leeches
|
|
|
|
for f in found:
|
|
|
|
if f[1] == '':
|
|
|
|
curr[0] = f[0]
|
2013-02-27 18:14:39 +01:00
|
|
|
else:
|
2013-11-11 22:42:51 +01:00
|
|
|
if state == 'seeds':
|
|
|
|
curr[1] = f[1]
|
|
|
|
state = 'leeches'
|
|
|
|
else:
|
|
|
|
curr[2] = f[1]
|
|
|
|
state = 'seeds'
|
|
|
|
res_l.append(curr)
|
|
|
|
curr = ['', 0, 0]
|
|
|
|
except KeyboardInterrupt :
|
|
|
|
print "\nCancelled."
|
|
|
|
exit()
|
|
|
|
|
|
|
|
# return the sizes in a spearate list
|
2013-11-12 11:53:17 +01:00
|
|
|
return res_l, sizes, uploaded
|
2013-02-21 21:19:40 +01:00
|
|
|
|
|
|
|
args = parser.parse_args()
|
|
|
|
if args.database:
|
|
|
|
mags = local(args)
|
|
|
|
else:
|
2013-11-12 11:53:17 +01:00
|
|
|
mags, sizes, uploaded = remote(args)
|
2013-02-21 21:19:40 +01:00
|
|
|
|
2013-02-27 18:14:39 +01:00
|
|
|
if mags and len(mags) > 0:
|
2013-11-11 22:42:51 +01:00
|
|
|
# enhanced print output with column titles
|
2013-11-12 11:55:37 +01:00
|
|
|
print "\n%-5s %-6s %-6s %-5s %-11s %-11s %s" % ( "LINK", "SEED", "LEECH", "RATIO", "SIZE", "UPLOAD", "NAME")
|
2013-02-21 21:19:40 +01:00
|
|
|
for m in range(len(mags)):
|
2013-02-26 22:48:02 +01:00
|
|
|
magnet = mags[m]
|
|
|
|
name = re.search("dn=([^\&]*)", magnet[0])
|
2013-11-11 22:42:51 +01:00
|
|
|
|
|
|
|
# compute the S/L ratio (Higher is better)
|
2013-11-12 10:09:43 +01:00
|
|
|
try:
|
|
|
|
ratio = float(magnet[1])/float(magnet[2])
|
|
|
|
except ZeroDivisionError:
|
|
|
|
ratio = 0
|
2013-11-11 22:42:51 +01:00
|
|
|
|
|
|
|
# enhanced print output with justified columns
|
2013-11-12 11:55:37 +01:00
|
|
|
print "%-5s %-6s %-6s %5.1f %-11s %-11s %s" % (m, magnet[1], magnet[2], ratio ,sizes[m], uploaded[m],urllib.unquote(name.group(1).encode('ascii')).decode('utf-8').replace("+", " ") )
|
2013-11-11 22:42:51 +01:00
|
|
|
|
|
|
|
try:
|
|
|
|
l = raw_input("Select a link: ")
|
|
|
|
except KeyboardInterrupt :
|
|
|
|
print "\nCancelled."
|
|
|
|
exit()
|
|
|
|
|
2013-02-26 22:48:02 +01:00
|
|
|
try:
|
|
|
|
choice = int(l)
|
|
|
|
except Exception:
|
|
|
|
choice = None
|
2013-11-11 22:42:51 +01:00
|
|
|
|
2013-02-26 22:48:02 +01:00
|
|
|
if not choice == None:
|
2013-02-27 18:02:05 +01:00
|
|
|
webbrowser.open(mags[choice][0])
|
2013-02-26 22:48:02 +01:00
|
|
|
else:
|
|
|
|
print "Cancelled."
|
2013-02-21 21:19:40 +01:00
|
|
|
else:
|
|
|
|
print "no results"
|
|
|
|
|
|
|
|
if __name__ == "__main__":
|
|
|
|
main()
|