2012-11-16 08:52:09 +01:00
#!/usr/bin/env python
2014-02-15 16:55:28 +01:00
from __future__ import print_function
import __builtin__
2012-11-16 08:52:09 +01:00
import webbrowser
import urllib
2014-02-01 12:53:39 +01:00
import urllib2
2012-11-16 08:52:09 +01:00
import re
2013-12-14 14:30:11 +01:00
import os
2014-04-16 20:24:11 +02:00
import ConfigParser
import string
import random
2013-02-21 21:19:40 +01:00
from HTMLParser import HTMLParser
import argparse
2013-12-10 14:41:56 +01:00
from pprint import pprint
2014-05-11 20:37:22 +02:00
from StringIO import StringIO
import gzip
2012-11-16 08:52:09 +01:00
2014-02-01 12:58:55 +01:00
class NoRedirection(urllib2.HTTPErrorProcessor):
def http_response(self, request, response):
return response
https_response = http_response
2013-02-21 21:19:40 +01:00
# create a subclass and override the handler methods
class MyHTMLParser(HTMLParser):
title = ''
q = ''
state = 'looking'
results = []
def __init__(self, q):
self.q = q.lower()
def handle_starttag(self, tag, attrs):
if tag == 'title':
self.state = 'title'
if tag == 'magnet' and self.state == 'matched':
self.state = 'magnet'
def handle_data(self, data):
if self.state == 'title':
if data.lower().find(self.q) != -1:
self.title = data
self.state = 'matched'
self.state = 'looking'
if self.state == 'magnet':
2013-02-26 22:48:02 +01:00
self.results.append(['magnet:?xt=urn:btih:' + urllib.quote(data) + '&dn=' + urllib.quote(self.title), '?', '?'])
2013-02-21 21:19:40 +01:00
self.state = 'looking'
def main():
2014-04-16 20:24:11 +02:00
# new ConfigParser
config = ConfigParser.ConfigParser()
# default options so we dont die later
config.set('SaveToFile', 'enabled', False)
config.set('SaveToFile', 'directory', '~/Dropbox/pirate-get/')
# load user options, to override default ones
2013-02-21 21:19:40 +01:00
parser = argparse.ArgumentParser(description='Finds and downloads torrents from the Pirate Bay')
2013-02-21 21:31:19 +01:00
parser.add_argument('q', metavar='search_term', help="The term to search for")
2013-12-14 14:48:00 +01:00
parser.add_argument('-t',dest='transmission',action='store_true', help="call transmission-remote to start the download", default=False)
2014-05-08 17:17:24 +02:00
parser.add_argument('--custom',dest='command', help="call custom command, %%s will be replaced with the url")
2013-02-21 21:31:19 +01:00
parser.add_argument('--local', dest='database', help="An xml file containing the Pirate Bay database")
2013-02-27 18:14:39 +01:00
parser.add_argument('-p', dest='pages', help="The number of pages to fetch (doesn't work with --local)", default=1)
2014-01-31 23:16:37 +01:00
parser.add_argument('-0', dest='first', action='store_true', help="choose the top result", default=False)
2014-02-15 16:55:28 +01:00
parser.add_argument('--color', dest='color', action='store_true', help="use colored output", default=False)
2013-02-21 21:19:40 +01:00
2013-02-26 22:48:02 +01:00
#todo: redo this with html parser instead of regex
2014-02-01 10:42:58 +01:00
def remote(args, mirror):
2013-02-26 22:48:02 +01:00
res_l = []
2013-02-27 18:14:39 +01:00
pages = int(args.pages)
if pages < 1:
raise Exception('')
except Exception:
raise Exception("Please provide an integer greater than 0 for the number of pages to fetch.")
2013-11-11 22:42:51 +01:00
# Catch the Ctrl-C exception and exit cleanly
for page in xrange(pages):
2014-05-11 20:37:22 +02:00
request = urllib2.Request(mirror + '/search/' + args.q.replace(" ", "+") + '/' + str(page) + '/7/0')
request.add_header('Accept-encoding', 'gzip')
2014-05-11 20:42:18 +02:00
f = urllib2.urlopen(request)
if f.info().get('Content-Encoding') == 'gzip':
buf = StringIO(f.read())
f = gzip.GzipFile(fileobj=buf)
res = f.read()
2013-11-11 22:42:51 +01:00
found = re.findall(""""(magnet\:\?xt=[^"]*)|<td align="right">([^<]+)</td>""", res)
2014-02-16 19:22:10 +01:00
# check for a blocked mirror
no_results = re.search(""""No hits\.""", res)
2014-03-03 17:02:56 +01:00
if found == [] and no_results is None:
2014-02-16 19:22:10 +01:00
# Contradiction - we found no results, but the page didn't say there were no results
# the page is probably not actually the pirate bay, so let's try another mirror
raise Exception("Blocked mirror detected.")
2013-11-11 22:42:51 +01:00
# get sizes as well and substitute the character
2013-11-12 11:33:32 +01:00
# print res
2013-12-10 14:41:56 +01:00
sizes = [ match.replace(" ", " ") for match in re.findall("(?<=Size )[0-9.]+\ \;[KMGT]*[i ]*B",res) ]
2013-11-12 11:53:17 +01:00
uploaded = [ match.replace(" ", " ") for match in re.findall("(?<=Uploaded ).+(?=\, Size)",res) ]
2013-12-10 14:41:56 +01:00
# pprint(sizes); print len(sizes)
# pprint(uploaded); print len(uploaded)
2013-11-11 22:42:51 +01:00
state = "seeds"
curr = ['',0,0] #magnet, seeds, leeches
for f in found:
if f[1] == '':
curr[0] = f[0]
2013-02-27 18:14:39 +01:00
2013-11-11 22:42:51 +01:00
if state == 'seeds':
curr[1] = f[1]
state = 'leeches'
curr[2] = f[1]
state = 'seeds'
curr = ['', 0, 0]
except KeyboardInterrupt :
2014-02-15 16:55:28 +01:00
2013-11-11 22:42:51 +01:00
# return the sizes in a spearate list
2013-11-12 11:53:17 +01:00
return res_l, sizes, uploaded
2013-02-21 21:19:40 +01:00
args = parser.parse_args()
2014-02-15 16:55:28 +01:00
def make_print():
import colorama
color_dict = {"default": "",
"header": colorama.Back.WHITE + colorama.Fore.BLACK,
"zebra_0": "",
"zebra_1": colorama.Style.DIM,
"WARN": colorama.Fore.YELLOW,
"ERROR": colorama.Fore.RED}
def n_print(*args, **kwargs):
"""Print with colors"""
c = color_dict[kwargs.pop("color")]
args = (c + str(args[0]),) + args[1:] + (colorama.Style.RESET_ALL,)
except KeyError as e:
except IndexError as e:
return __builtin__.print(*args, **kwargs)
def n_print(*args, **kwargs):
if("color" in kwargs):
return __builtin__.print(*args, **kwargs)
return n_print
def local(args):
xml_str = ''
with open(args.database, 'r') as f:
xml_str += f.read()
htmlparser = MyHTMLParser(args.q)
return htmlparser.results
2013-02-21 21:19:40 +01:00
if args.database:
mags = local(args)
2014-02-01 12:53:39 +01:00
mirrors = ["http://thepiratebay.se"]
2014-02-01 10:42:58 +01:00
2014-02-01 12:58:55 +01:00
opener = urllib2.build_opener(NoRedirection)
f = opener.open("http://proxybay.info/list.txt")
if f.getcode() != 200:
raise Exception("The pirate bay responded with an error.")
2014-02-01 12:53:39 +01:00
res = f.read()
2014-02-01 10:42:58 +01:00
mirrors += res.split("\n")[3:]
2014-02-15 16:55:28 +01:00
print("Could not fetch additional mirrors", color="WARN")
2014-02-01 10:42:58 +01:00
for mirror in mirrors:
print("Trying " + mirror)
mags, sizes, uploaded = remote(args, mirror)
except Exception, e:
2014-02-15 18:13:16 +01:00
2014-02-15 16:55:28 +01:00
print("Could not contact " + mirror, color="WARN")
2013-02-21 21:19:40 +01:00
2014-02-15 18:13:16 +01:00
if not mags or len(mags) == 0:
print("no results")
# enhanced print output with column titles
print("%5s %6s %6s %-5s %-11s %-11s %s" \
% ( "LINK", "SEED", "LEECH", "RATIO", "SIZE", "UPLOAD", "NAME"),
cur_color = "zebra_0"
for m in range(len(mags)):
magnet = mags[m]
no_seeders = int(magnet[1])
no_leechers = int(magnet[2])
name = re.search("dn=([^\&]*)", magnet[0])
# compute the S/L ratio (Higher is better)
ratio = no_seeders/no_leechers
except ZeroDivisionError:
ratio = -1
# Alternate between colors
cur_color = "zebra_0" if (cur_color == "zebra_1") else "zebra_1"
torrent_name = urllib.unquote(name.group(1).encode('ascii')) \
.decode('utf-8').replace("+", " ")
# enhanced print output with justified columns
print ("%5d %6d %6d %5.1f %-11s %-11s %s" % (
m, no_seeders, no_leechers, ratio ,sizes[m],
uploaded[m], torrent_name), color=cur_color)
if args.first:
print("Choosing first result");
2014-05-12 06:14:22 +02:00
choices = [0]
2014-02-15 18:13:16 +01:00
l = raw_input("Select link(s): ")
except KeyboardInterrupt :
2014-01-31 23:16:37 +01:00
2014-02-15 18:13:16 +01:00
# Very permissive handling
# Substitute multiple consecutive spaces or commas for single comma
l = re.sub("[ ,]+", ",", l)
# Remove anything that isn't an integer or comma.
l = re.sub("[^0-9,]", "", l)
# Turn into list
choices = l.split(",")
except Exception:
choices = ()
2014-04-16 20:24:11 +02:00
if config.get('SaveToFile', 'enabled'):
# Save to file is enabled
fileName = os.path.expanduser(config.get('SaveToFile', 'directory')) + id_generator() + '.magnet'
print ("Saving to File: " + fileName)
f = open(fileName, 'w')
for choice in choices:
choice = int(choice)
url = mags[choice][0]
f.write(url + '\n')
# use transmission as default
for choice in choices:
choice = int(choice)
url = mags[choice][0]
if args.transmission:
os.system("""transmission-remote --add "%s" """ % (url))
os.system("transmission-remote -l")
2014-05-08 17:17:24 +02:00
elif args.command:
2014-05-12 06:16:06 +02:00
os.system(args.command % (url))
2014-04-16 20:24:11 +02:00
def id_generator(size=6, chars=string.ascii_uppercase + string.digits):
return ''.join(random.choice(chars) for _ in range(size))
2013-02-21 21:19:40 +01:00
if __name__ == "__main__":