1
0
mirror of https://github.com/vikstrous/pirate-get synced 2025-01-10 10:04:21 +01:00
pirate-get/pirate-get.py

686 lines
24 KiB
Python
Raw Normal View History

2012-11-16 08:52:09 +01:00
#!/usr/bin/env python
2014-10-28 07:48:02 +01:00
#
2015-01-31 16:21:58 +01:00
# Copyright 2015, Viktor Stanchev and contributors
2014-10-28 07:48:02 +01:00
#
# This file is part of pirate-get.
#
# pirate-get is free software: you can redistribute it and/or modify
# it under the terms of the GNU Affero General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# pirate-get is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU Affero General Public License for more details.
#
# You should have received a copy of the GNU Affero General Public License
# along with pirate-get. If not, see <http://www.gnu.org/licenses/>.
2013-12-14 14:30:11 +01:00
import os
2014-12-03 21:00:49 +01:00
import sys
2014-12-03 19:38:24 +01:00
import re
import string
import gzip
import configparser
import argparse
2014-12-03 19:38:24 +01:00
import builtins
import subprocess
2014-12-03 19:38:24 +01:00
import webbrowser
import urllib.request as request
import urllib.parse as parse
from html.parser import HTMLParser
2015-03-25 00:08:35 +01:00
from urllib.error import URLError, HTTPError
2014-12-10 18:26:34 +01:00
from socket import timeout
from io import BytesIO
from os.path import expanduser, expandvars
2015-03-26 00:09:21 +01:00
colored_output = True
2015-03-23 23:04:34 +01:00
default_timeout = 10
2015-01-31 16:20:28 +01:00
headers = {'User-Agent': 'pirate get'}
categories = {
'All': 0,
'Applications': 300,
'Applications/Android': 306,
'Applications/Handheld': 304,
'Applications/IOS (iPad/iPhone)': 305,
'Applications/Mac': 302,
'Applications/Other OS': 399,
'Applications/UNIX': 303,
'Applications/Windows': 301,
'Audio': 100,
'Audio/Audio books': 102,
'Audio/FLAC': 104,
'Audio/Music': 101,
'Audio/Other': 199,
'Audio/Sound clips': 103,
'Games': 400,
'Games/Android': 408,
'Games/Handheld': 406,
'Games/IOS (iPad/iPhone)': 407,
'Games/Mac': 402,
'Games/Other': 499,
'Games/PC': 401,
'Games/PSx': 403,
'Games/Wii': 405,
'Games/XBOX360': 404,
'Other': 600,
'Other/Comics': 602,
'Other/Covers': 604,
'Other/E-books': 601,
'Other/Other': 699,
'Other/Physibles': 605,
'Other/Pictures': 603,
'Porn': 500,
'Porn/Games': 504,
'Porn/HD - Movies': 505,
'Porn/Movie clips': 506,
'Porn/Movies': 501,
'Porn/Movies DVDR': 502,
'Porn/Other': 599,
'Porn/Pictures': 503,
'Video': 200,
'Video/3D': 209,
'Video/HD - Movies': 207,
'Video/HD - TV shows': 208,
'Video/Handheld': 206,
'Video/Movie clips': 204,
'Video/Movies': 201,
'Video/Movies DVDR': 202,
'Video/Music videos': 203,
'Video/Other': 299,
'Video/TV shows': 205}
sorts = {
'TitleDsc': 1, 'TitleAsc': 2,
'DateDsc': 3, 'DateAsc': 4,
'SizeDsc': 5, 'SizeAsc': 6,
'SeedersDsc': 7, 'SeedersAsc': 8,
'LeechersDsc': 9, 'LeechersAsc': 10,
'CategoryDsc': 13, 'CategoryAsc': 14,
'Default': 99}
2012-11-16 08:52:09 +01:00
2014-02-01 12:58:55 +01:00
2014-12-03 19:40:04 +01:00
class NoRedirection(request.HTTPErrorProcessor):
def http_response(self, _, res):
return res
2014-02-01 12:58:55 +01:00
https_response = http_response
# create a subclass and override the handler methods
2014-12-10 18:26:34 +01:00
class BayParser(HTMLParser):
title = ''
q = ''
state = 'looking'
results = []
def __init__(self, q):
HTMLParser.__init__(self)
self.q = q.lower()
def handle_starttag(self, tag, attrs):
if tag == 'title':
self.state = 'title'
if tag == 'magnet' and self.state == 'matched':
self.state = 'magnet'
def handle_data(self, data):
if self.state == 'title':
if data.lower().find(self.q) != -1:
self.title = data
self.state = 'matched'
else:
self.state = 'looking'
if self.state == 'magnet':
2014-12-03 19:41:31 +01:00
self.results.append([
'magnet:?xt=urn:btih:' +
parse.quote(data) +
'&dn=' +
parse.quote(self.title), '?', '?'])
self.state = 'looking'
def print(*args, **kwargs):
2015-03-26 00:09:21 +01:00
if kwargs.get('color', False) and colored_output:
try:
2015-03-23 23:29:54 +01:00
import colorama
except (ImportError):
pass
else:
2015-03-23 23:29:54 +01:00
colorama.init()
color_dict = {
'default': '',
'header': colorama.Back.BLACK + colorama.Fore.WHITE,
'alt': colorama.Fore.YELLOW,
'zebra_0': '',
'zebra_1': colorama.Fore.BLUE,
'WARN': colorama.Fore.MAGENTA,
'ERROR': colorama.Fore.RED}
c = color_dict[kwargs.pop('color')]
args = (c + args[0],) + args[1:] + (colorama.Style.RESET_ALL,)
2015-03-23 23:29:54 +01:00
kwargs.pop('color', None)
return builtins.print(*args, **kwargs)
else:
kwargs.pop('color', None)
return builtins.print(*args, **kwargs)
#todo: redo this with html parser instead of regex
def remote(args, mirror):
res_l = []
2014-12-04 15:58:32 +01:00
pages = int(args.pages)
if pages < 1:
raise ValueError('Please provide an integer greater than 0 '
'for the number of pages to fetch.')
if str(args.category) in categories.values():
category = args.category
elif args.category in categories.keys():
category = categories[args.category]
else:
category = '0'
print('Invalid category ignored', color='WARN')
if str(args.sort) in sorts.values():
sort = args.sort
elif args.sort in sorts.keys():
sort = sorts[args.sort]
else:
sort = '99'
print('Invalid sort ignored', color='WARN')
# Catch the Ctrl-C exception and exit cleanly
try:
sizes = []
uploaded = []
identifiers = []
for page in range(pages):
if args.browse:
path = '/browse/'
2015-03-23 23:03:00 +01:00
if(category == 0):
category = 100
path = '/browse/' + '/'.join(str(i) for i in (
category, page, sort))
elif len(args.search) == 0:
path = '/top/48h' if args.recent else '/top/'
2015-03-23 23:03:00 +01:00
if(category == 0):
path += 'all'
else:
path += str(category)
else:
path = '/search/' + '/'.join(str(i) for i in (
'+'.join(args.search),
page, sort,
category))
2015-01-31 16:20:28 +01:00
req = request.Request(mirror + path, headers=headers)
req.add_header('Accept-encoding', 'gzip')
2015-03-23 23:04:34 +01:00
f = request.urlopen(req, timeout=default_timeout)
if f.info().get('Content-Encoding') == 'gzip':
f = gzip.GzipFile(fileobj=BytesIO(f.read()))
res = f.read().decode('utf-8')
found = re.findall(r'"(magnet\:\?xt=[^"]*)|<td align="right">'
r'([^<]+)</td>', res)
# check for a blocked mirror
no_results = re.search(r'"No hits\.', res)
2014-12-10 18:26:34 +01:00
if found == [] and no_results is None:
# Contradiction - we found no results,
# but the page didn't say there were no results.
# The page is probably not actually the pirate bay,
# so let's try another mirror
raise IOError('Blocked mirror detected.')
# get sizes as well and substitute the &nbsp; character
sizes.extend([match.replace('&nbsp;', ' ').split()
for match in re.findall(r'(?<=Size )[0-9.]'
r'+\&nbsp\;[KMGT]*[i ]*B', res)])
uploaded.extend([match.replace('&nbsp;', ' ')
for match in re.findall(r'(?<=Uploaded )'
r'.+(?=\, Size)',res)])
identifiers.extend([match.replace('&nbsp;', ' ')
for match in re.findall('(?<=/torrent/)'
'[0-9]+(?=/)',res)])
state = 'seeds'
curr = ['', 0, 0] #magnet, seeds, leeches
for f in found:
if f[1] == '':
curr[0] = f[0]
else:
if state == 'seeds':
curr[1] = f[1]
state = 'leeches'
else:
curr[2] = f[1]
state = 'seeds'
res_l.append(curr)
curr = ['', 0, 0]
except KeyboardInterrupt :
print('\nCancelled.')
sys.exit(0)
# return the sizes in a spearate list
return res_l, sizes, uploaded, identifiers
2014-12-10 18:26:34 +01:00
def local(db, search):
2015-01-31 16:21:14 +01:00
xml = open(db).readlines()
parser = BayParser(' '.join(search))
parser.feed(''.join(xml))
2014-12-10 18:26:34 +01:00
return parser.results
def load_config():
config = configparser.ConfigParser()
# default options
config.add_section('Save')
config.set('Save', 'magnets', 'false')
config.set('Save', 'torrents', 'false')
config.set('Save', 'directory', os.getcwd())
config.add_section('LocalDB')
config.set('LocalDB', 'enabled', 'false')
config.set('LocalDB', 'path', expanduser('~/downloads/pirate-get/db'))
config.add_section('Misc')
config.set('Misc', 'openCommand', '')
config.set('Misc', 'transmission', 'false')
config.set('Misc', 'colors', 'true')
# user-defined config files
main = expandvars('$XDG_CONFIG_HOME/pirate-get')
alt = expanduser('~/.config/pirate-get')
# read config file
config.read([main] if os.path.isfile(main) else [alt])
# expand env variables
directory = expanduser(expandvars(config.get('Save', 'Directory')))
path = expanduser(expandvars(config.get('LocalDB', 'path')))
config.set('Save', 'Directory', directory)
config.set('LocalDB', 'path', path)
return config
2015-03-25 00:08:35 +01:00
def get_torrent(info_hash):
url = 'http://torcache.net/torrent/{:X}.torrent'
req = request.Request(url.format(info_hash))
req.add_header('Accept-encoding', 'gzip')
torrent = request.urlopen(req, timeout=default_timeout)
if torrent.info().get('Content-Encoding') == 'gzip':
torrent = gzip.GzipFile(fileobj=BytesIO(torrent.read()))
return torrent.read()
def print_search_results(mags, sizes, uploaded, local):
columns = int(os.popen('stty size', 'r').read().split()[1])
cur_color = 'zebra_0'
if local:
print('{:>4} {:{length}}'.format(
'LINK', 'NAME', length=columns - 8),
color='header')
else:
print('{:>4} {:>5} {:>5} {:>5} {:9} {:11} {:{length}}'.format(
'LINK', 'SEED', 'LEECH', 'RATIO',
'SIZE', 'UPLOAD', 'NAME', length=columns - 52),
color='header')
for m, magnet in enumerate(mags):
# Alternate between colors
cur_color = 'zebra_0' if cur_color == 'zebra_1' else 'zebra_1'
name = re.search(r'dn=([^\&]*)', magnet[0])
torrent_name = parse.unquote(name.group(1)).replace('+', ' ')
if local:
line = '{:5} {:{length}}'
content = [m, torrent_name[:columns]]
else:
no_seeders, no_leechers = map(int, magnet[1:])
size = float(sizes[m][0])
unit = sizes[m][1]
date = uploaded[m]
# compute the S/L ratio (Higher is better)
try:
ratio = no_seeders / no_leechers
except ZeroDivisionError:
ratio = float('inf')
line = ('{:4} {:5} {:5} {:5.1f} {:5.1f}'
' {:3} {:<11} {:{length}}')
content = [m, no_seeders, no_leechers, ratio,
size, unit, date, torrent_name[:columns - 52]]
# enhanced print output with justified columns
print(line.format(*content, length=columns - 52), color=cur_color)
def print_descriptions(chosen_links, mags, site, identifiers):
for link in chosen_links:
link = int(link)
path = '/torrent/%s/' % identifiers[link]
2015-01-31 16:20:28 +01:00
req = request.Request(site + path, headers=headers)
req.add_header('Accept-encoding', 'gzip')
2015-03-23 23:04:34 +01:00
f = request.urlopen(req, timeout=default_timeout)
if f.info().get('Content-Encoding') == 'gzip':
f = gzip.GzipFile(fileobj=BytesIO(f.read()))
res = f.read().decode('utf-8')
name = re.search(r'dn=([^\&]*)', mags[link][0])
torrent_name = parse.unquote(name.group(1)).replace('+', ' ')
desc = re.search(r'<div class="nfo">\s*<pre>(.+?)(?=</pre>)',
res, re.DOTALL).group(1)
# Replace HTML links with markdown style versions
desc = re.sub(r'<a href="\s*([^"]+?)\s*"[^>]*>(\s*)([^<]+?)(\s*'
r')</a>', r'\2[\3](\1)\4', desc)
print('Description for "%s":' % torrent_name, color='zebra_1')
print(desc, color='zebra_0')
def print_file_lists(chosen_links, mags, site, identifiers):
for link in chosen_links:
path = '/ajax_details_filelist.php'
query = '?id=' + identifiers[int(link)]
2015-01-31 16:20:28 +01:00
req = request.Request(site + path + query, headers=headers)
req.add_header('Accept-encoding', 'gzip')
2015-03-23 23:04:34 +01:00
f = request.urlopen(req, timeout=default_timeout)
if f.info().get('Content-Encoding') == 'gzip':
f = gzip.GzipFile(fileobj=BytesIO(f.read()))
res = f.read().decode('utf-8').replace('&nbsp;', ' ')
files = re.findall(r'<td align="left">\s*([^<]+?)\s*</td><td ali'
r'gn="right">\s*([^<]+?)\s*</tr>', res)
name = re.search(r'dn=([^\&]*)', mags[int(link)][0])
torrent_name = parse.unquote(name.group(1)).replace('+', ' ')
print('Files in "%s":' % torrent_name, color='zebra_1')
cur_color = 'zebra_0'
for f in files:
print('{0[0]:>11} {0[1]}'.format(f), color=cur_color)
cur_color = 'zebra_0' if (cur_color == 'zebra_1') else 'zebra_1'
2015-03-25 00:11:37 +01:00
def save_torrents(chosen_links, mags, folder):
2015-03-25 00:08:35 +01:00
for link in chosen_links:
magnet = mags[int(link)][0]
name = re.search(r'dn=([^\&]*)', magnet)
torrent_name = parse.unquote(name.group(1)).replace('+', ' ')
info_hash = int(re.search(r'btih:([a-f0-9]{40})', magnet).group(1), 16)
file = os.path.join(folder, torrent_name + '.torrent')
try:
torrent = get_torrent(info_hash)
except HTTPError:
print('There is no cached file for this torrent :(', color='ERROR')
else:
open(file,'wb').write(torrent)
print('Saved {:X} in {}'.format(info_hash, file))
def save_magnets(chosen_links, mags, folder):
2015-03-26 23:42:03 +01:00
for link in chosen_links:
magnet = mags[int(link)][0]
name = re.search(r'dn=([^\&]*)', magnet)
torrent_name = parse.unquote(name.group(1)).replace('+', ' ')
info_hash = int(re.search(r'btih:([a-f0-9]{40})', magnet).group(1), 16)
file = os.path.join(folder, torrent_name + '.magnet')
2015-03-26 23:42:03 +01:00
print('Saved {:X} in {}'.format(info_hash, file))
with open(file, 'w') as f:
f.write(magnet + '\n')
def main():
config = load_config()
2014-12-03 19:47:38 +01:00
parser = argparse.ArgumentParser(
description='finds and downloads torrents from the Pirate Bay')
parser.add_argument('-b', dest='browse',
action='store_true',
help='display in Browse mode')
2014-12-03 19:47:38 +01:00
parser.add_argument('search', metavar='search',
nargs='*', help='term to search for')
2014-12-03 19:47:38 +01:00
parser.add_argument('-c', dest='category', metavar='category',
help='specify a category to search', default='All')
2014-12-03 19:47:38 +01:00
parser.add_argument('-s', dest='sort', metavar='sort',
help='specify a sort option', default='SeedersDsc')
2014-12-03 19:47:38 +01:00
parser.add_argument('-R', dest='recent', action='store_true',
help='torrents uploaded in the last 48hours.'
'*ignored in searches*')
2014-12-03 19:47:38 +01:00
parser.add_argument('-l', dest='list_categories',
action='store_true',
help='list categories')
2014-12-03 19:47:38 +01:00
parser.add_argument('--list_sorts', dest='list_sorts',
action='store_true',
help='list Sortable Types')
2014-12-10 18:26:34 +01:00
parser.add_argument('-L', '--local', dest='database',
help='an xml file containing the Pirate Bay database')
2014-12-03 19:47:38 +01:00
parser.add_argument('-p', dest='pages', default=1,
help='the number of pages to fetch '
"(doesn't work with --local)")
2014-12-03 19:47:38 +01:00
parser.add_argument('-0', dest='first',
action='store_true',
help='choose the top result')
parser.add_argument('-a', '--download-all',
2014-12-03 19:47:38 +01:00
action='store_true',
help='download all results')
parser.add_argument('-t', '--transmission',
action='store_true',
help='open magnets with transmission-remote')
parser.add_argument('-C', '--custom', dest='command',
action='store_true',
help='open magnets with a custom command'
' (%%s will be replaced with the url)')
parser.add_argument('-M', '--save-magnets',
action='store_true',
2015-03-26 23:42:03 +01:00
help='save magnets links as files')
parser.add_argument('-T', '--save-torrents',
action='store_true',
help='save torrent files')
parser.add_argument('-S', '--save-directory',
type=str, metavar='DIRECTORY',
help='directory where to save downloaded files'
' (if none is given $PWD will be used)')
parser.add_argument('--disable-colors', dest='color',
action='store_false',
help='disable colored output')
args = parser.parse_args()
2015-03-26 00:09:21 +01:00
if (config.getboolean('Misc', 'colors') and not args.color
or not config.getboolean('Misc', 'colors')):
global colored_output
colored_output = False
if args.save_directory:
config.set('Save', 'directory', args.save_directory)
2015-03-26 00:09:21 +01:00
if args.transmission or config.getboolean('Misc', 'transmission'):
2015-03-24 23:45:02 +01:00
ret = subprocess.call(['transmission-remote', '-l'],
stdout=subprocess.DEVNULL,
stderr=subprocess.DEVNULL)
if ret != 0:
print('Transmission is not running.')
return
if args.list_categories:
cur_color = 'zebra_0'
for key, value in sorted(categories.items()) :
2015-03-26 00:09:21 +01:00
cur_color = 'zebra_0' if cur_color == 'zebra_1' else 'zebra_1'
print(str(value), '\t', key, sep='', color=cur_color)
return
if args.list_sorts:
cur_color = 'zebra_0'
for key, value in sorted(sorts.items()):
2015-03-26 00:09:21 +01:00
cur_color = 'zebra_0' if cur_color == 'zebra_1' else 'zebra_1'
print(str(value), '\t', key, sep='', color=cur_color)
return
if args.database or config.getboolean('LocalDB', 'enabled'):
if args.database:
path = args.database
else:
path = config.get('LocalDB', 'path')
mags = local(path, args.search)
sizes, uploaded = [], []
else:
mags, mirrors = [], set(['https://thepiratebay.se'])
try:
2014-12-03 19:40:04 +01:00
opener = request.build_opener(NoRedirection)
f = opener.open('https://proxybay.info/list.txt',
timeout=default_timeout)
except IOError:
print('Could not fetch additional mirrors', color='WARN')
else:
2014-02-01 12:58:55 +01:00
if f.getcode() != 200:
raise IOError('The pirate bay responded with an error.')
mirrors.union([i.decode('utf-8').strip()
2014-12-10 18:26:34 +01:00
for i in f.readlines()][3:])
for mirror in mirrors:
try:
print('Trying', mirror, end='... ')
mags, sizes, uploaded, identifiers = remote(args, mirror)
except (URLError, IOError, ValueError, timeout) as e:
print('Failed', color='WARN')
else:
2014-12-03 22:43:51 +01:00
site = mirror
print('Ok', color='alt')
break
2014-12-10 18:26:34 +01:00
else:
print('No available mirrors :(', color='WARN')
return
2014-12-03 22:43:51 +01:00
2014-12-04 20:03:40 +01:00
if not mags:
print('No results')
return
2014-12-03 19:47:38 +01:00
print_search_results(mags, sizes, uploaded, local=args.database)
if args.first:
print('Choosing first result')
2014-05-12 06:14:22 +02:00
choices = [0]
elif args.download_all:
print('Downloading all results')
2014-12-03 19:47:38 +01:00
choices = range(len(mags))
else:
# New input loop to support different link options
while True:
print("\nSelect links (Type 'h' for more options"
", 'q' to quit)", end='\b', color='alt')
try:
l=input(': ')
except KeyboardInterrupt :
print('\nCancelled.')
return
try:
# Very permissive handling
# Check for any occurances or d, f, p, t, m, or q
cmd_code_match = re.search(r'([hdfpmtq])', l,
flags=re.IGNORECASE)
if cmd_code_match:
code = cmd_code_match.group(0).lower()
else:
code = None
2014-12-03 22:43:51 +01:00
# Clean up command codes
# Substitute multiple consecutive spaces/commas for single
# comma remove anything that isn't an integer or comma.
# Turn into list
l = re.sub(r'^[hdfp, ]*|[hdfp, ]*$', '', l)
l = re.sub('[ ,]+', ',', l)
l = re.sub('[^0-9,]', '', l)
choices = l.split(',')
2014-12-03 22:43:51 +01:00
# Act on option, if supplied
print('')
if code == 'h':
print('Options:',
'<links>: Download selected torrents',
2015-03-26 23:42:03 +01:00
'[m<links>]: Save magnets as files',
'[t<links>]: Save .torrent files',
'[d<links>]: Get descriptions',
'[f<links>]: Get files',
'[p] Print search results',
'[q] Quit', sep='\n')
2014-10-29 07:26:37 +01:00
elif code == 'q':
print('Bye.', color='alt')
return
elif code == 'd':
print_descriptions(choices, mags, site, identifiers)
elif code == 'f':
print_file_lists(choices, mags, site, identifiers)
elif code == 'p':
print_search_results(mags, sizes, uploaded)
elif code == 'm':
save_magnets(choices, mags,
config.get('Save', 'directory'))
elif code == 't':
save_torrents(choices, mags,
config.get('Save', 'directory'))
elif not l:
2014-12-04 19:29:10 +01:00
print('No links entered!', color='WARN')
else:
break
2014-12-03 19:42:47 +01:00
except Exception as e:
2014-12-04 19:29:10 +01:00
print('Exception:', e, color='ERROR')
choices = ()
save_to_file = False
2014-12-03 19:47:38 +01:00
if args.save_magnets or config.getboolean('Save', 'magnets'):
print('Saving selected magnets...')
save_magnets(choices, mags, config.get('Save', 'directory'))
save_to_file = True
2014-12-03 19:47:38 +01:00
if args.save_torrents or config.getboolean('Save', 'torrents'):
print('Saving selected torrents...')
save_torrents(choices, mags, config.get('Save', 'directory'))
save_to_file = True
if save_to_file:
return
for choice in choices:
url = mags[int(choice)][0]
if args.transmission or config.getboolean('Misc', 'transmission'):
os.system('transmission-remote --add "%s" ' % url)
os.system('transmission-remote -l')
2015-03-26 23:42:03 +01:00
elif args.command or config.get('Misc', 'openCommand'):
command = config.get('Misc', 'openCommand')
if args.command:
command = args.command
os.system(command % url)
2015-03-26 23:42:03 +01:00
else:
webbrowser.open(url)
if __name__ == '__main__':
main()