2015-08-30 03:28:43 +02:00
|
|
|
import re
|
|
|
|
import sys
|
|
|
|
import gzip
|
|
|
|
import urllib.request as request
|
|
|
|
import urllib.parse as parse
|
|
|
|
import urllib.error
|
2015-08-31 07:54:59 +02:00
|
|
|
import os.path
|
2015-08-30 03:28:43 +02:00
|
|
|
|
2015-09-04 06:44:02 +02:00
|
|
|
from pyquery import PyQuery as pq
|
|
|
|
|
2015-08-30 03:28:43 +02:00
|
|
|
import pirate.data
|
2015-08-31 07:54:59 +02:00
|
|
|
from pirate.print import print
|
2015-08-30 03:28:43 +02:00
|
|
|
|
|
|
|
from io import BytesIO
|
|
|
|
|
|
|
|
|
2015-09-03 09:25:17 +02:00
|
|
|
parser_regex = r'"(magnet\:\?xt=[^"]*)|<td align="right">([^<]+)</td>'
|
|
|
|
|
|
|
|
|
2015-09-03 09:05:33 +02:00
|
|
|
def parse_category(category):
|
2015-09-03 09:18:11 +02:00
|
|
|
try:
|
|
|
|
category = int(category)
|
|
|
|
except ValueError:
|
|
|
|
pass
|
|
|
|
if category in pirate.data.categories.values():
|
2015-09-03 09:05:33 +02:00
|
|
|
return category
|
|
|
|
elif category in pirate.data.categories.keys():
|
|
|
|
return pirate.data.categories[category]
|
2015-08-30 03:28:43 +02:00
|
|
|
else:
|
|
|
|
print('Invalid category ignored', color='WARN')
|
2015-09-03 09:05:33 +02:00
|
|
|
return '0'
|
2015-08-30 03:28:43 +02:00
|
|
|
|
2015-09-03 09:05:33 +02:00
|
|
|
|
|
|
|
def parse_sort(sort):
|
2015-09-03 09:18:11 +02:00
|
|
|
try:
|
|
|
|
sort = int(sort)
|
|
|
|
except ValueError:
|
|
|
|
pass
|
|
|
|
if sort in pirate.data.sorts.values():
|
2015-09-03 09:05:33 +02:00
|
|
|
return sort
|
|
|
|
elif sort in pirate.data.sorts.keys():
|
|
|
|
return pirate.data.sorts[sort]
|
2015-08-30 03:28:43 +02:00
|
|
|
else:
|
|
|
|
print('Invalid sort ignored', color='WARN')
|
2015-09-03 09:05:33 +02:00
|
|
|
return '99'
|
|
|
|
|
|
|
|
|
2015-09-03 09:18:11 +02:00
|
|
|
#TODO: warn users when using a sort in a mode that doesn't accept sorts
|
|
|
|
#TODO: warn users when using search terms in a mode that doesn't accept search terms
|
|
|
|
#TODO: same with page parameter for top and top48h
|
|
|
|
#TODO: warn the user if trying to use a minor category with top48h
|
2015-09-04 05:25:24 +02:00
|
|
|
def build_request_path(page, category, sort, mode, terms):
|
|
|
|
if mode == 'browse':
|
|
|
|
if(category == 0):
|
|
|
|
category = 100
|
|
|
|
return '/browse/{}/{}/{}'.format(category, page, sort)
|
|
|
|
elif mode == 'recent':
|
|
|
|
# This is not a typo. There is no / between 48h and the category.
|
|
|
|
path = '/top/48h'
|
|
|
|
# only major categories can be used with this mode
|
|
|
|
if(category == 0):
|
|
|
|
return path + 'all'
|
|
|
|
else:
|
|
|
|
return path + str(category)
|
|
|
|
elif mode == 'top':
|
|
|
|
path = '/top/'
|
|
|
|
if(category == 0):
|
|
|
|
return path + 'all'
|
|
|
|
else:
|
|
|
|
return path + str(category)
|
|
|
|
elif mode == 'search':
|
|
|
|
query = urllib.parse.quote_plus(' '.join(terms))
|
|
|
|
return '/search/{}/{}/{}/{}'.format(query, page, sort, category)
|
|
|
|
else:
|
|
|
|
raise Exception('Unknown mode.')
|
|
|
|
|
|
|
|
|
2015-09-04 06:44:02 +02:00
|
|
|
def parse_page(html):
|
|
|
|
d = pq(html)
|
|
|
|
|
|
|
|
# first get the magnet links and make sure there are results
|
|
|
|
magnets = list(map(lambda l: pq(l).attr('href'),
|
|
|
|
d('table#searchResult tr>td:nth-child(2)>a:nth-child(2)')))
|
2015-09-04 05:25:24 +02:00
|
|
|
|
|
|
|
# check for a blocked mirror
|
|
|
|
no_results = re.search(r'No hits\. Try adding an asterisk in '
|
2015-09-04 06:44:02 +02:00
|
|
|
r'you search phrase\.', html)
|
|
|
|
if len(magnets) == 0 and no_results is None:
|
2015-09-04 05:25:24 +02:00
|
|
|
# Contradiction - we found no results,
|
|
|
|
# but the page didn't say there were no results.
|
|
|
|
# The page is probably not actually the pirate bay,
|
|
|
|
# so let's try another mirror
|
|
|
|
raise IOError('Blocked mirror detected.')
|
|
|
|
|
2015-09-04 06:44:02 +02:00
|
|
|
# next get more info
|
|
|
|
seeds = list(map(lambda l: pq(l).text(),
|
|
|
|
d('table#searchResult tr>td:nth-child(3)')))
|
|
|
|
leechers = list(map(lambda l: pq(l).text(),
|
|
|
|
d('table#searchResult tr>td:nth-child(4)')))
|
|
|
|
identifiers = list(map(lambda l: pq(l).attr('href').split('/')[2],
|
|
|
|
d('table#searchResult .detLink')))
|
2015-09-04 05:25:24 +02:00
|
|
|
|
2015-09-04 06:44:02 +02:00
|
|
|
sizes = []
|
|
|
|
uploaded = []
|
|
|
|
# parse descriptions separately
|
|
|
|
for node in d('font.detDesc'):
|
|
|
|
text = pq(node).text()
|
|
|
|
sizes.append(re.findall(r'(?<=Size )[0-9.]+\s[KMGT]*[i ]*B', text)[0].split())
|
|
|
|
uploaded.append(re.findall(r'(?<=Uploaded ).+(?=\, Size)', text)[0])
|
2015-09-04 05:25:24 +02:00
|
|
|
|
2015-09-04 06:44:02 +02:00
|
|
|
return list(zip(magnets,seeds,leechers)), sizes, uploaded, identifiers
|
2015-09-04 05:25:24 +02:00
|
|
|
|
|
|
|
|
2015-09-03 09:05:33 +02:00
|
|
|
def remote(pages, category, sort, mode, terms, mirror):
|
|
|
|
res_l = []
|
2015-09-04 05:25:24 +02:00
|
|
|
sizes = []
|
|
|
|
uploaded = []
|
|
|
|
identifiers = []
|
|
|
|
|
2015-09-03 09:05:33 +02:00
|
|
|
if pages < 1:
|
|
|
|
raise ValueError('Please provide an integer greater than 0 '
|
|
|
|
'for the number of pages to fetch.')
|
|
|
|
|
2015-08-30 03:28:43 +02:00
|
|
|
# Catch the Ctrl-C exception and exit cleanly
|
|
|
|
try:
|
|
|
|
for page in range(pages):
|
2015-09-04 05:25:24 +02:00
|
|
|
path = build_request_path(page, category, sort, mode, terms)
|
2015-08-30 03:28:43 +02:00
|
|
|
|
|
|
|
req = request.Request(mirror + path,
|
|
|
|
headers=pirate.data.default_headers)
|
|
|
|
req.add_header('Accept-encoding', 'gzip')
|
|
|
|
f = request.urlopen(req, timeout=pirate.data.default_timeout)
|
|
|
|
if f.info().get('Content-Encoding') == 'gzip':
|
|
|
|
f = gzip.GzipFile(fileobj=BytesIO(f.read()))
|
|
|
|
res = f.read().decode('utf-8')
|
2015-09-04 05:25:24 +02:00
|
|
|
|
|
|
|
page_res_l, page_sizes, page_uploaded, page_identifiers = parse_page(res)
|
|
|
|
res_l += page_res_l
|
|
|
|
sizes += page_sizes
|
|
|
|
uploaded += page_uploaded
|
|
|
|
identifiers += page_identifiers
|
|
|
|
|
|
|
|
except KeyboardInterrupt:
|
2015-08-30 03:28:43 +02:00
|
|
|
print('\nCancelled.')
|
|
|
|
sys.exit(0)
|
|
|
|
|
2015-09-04 05:25:24 +02:00
|
|
|
# return the sizes in a separate list
|
2015-08-30 03:28:43 +02:00
|
|
|
return res_l, sizes, uploaded, identifiers
|
|
|
|
|
|
|
|
|
|
|
|
def get_torrent(info_hash):
|
|
|
|
url = 'http://torcache.net/torrent/{:X}.torrent'
|
|
|
|
req = request.Request(url.format(info_hash),
|
|
|
|
headers=pirate.data.default_headers)
|
|
|
|
req.add_header('Accept-encoding', 'gzip')
|
|
|
|
|
|
|
|
torrent = request.urlopen(req, timeout=pirate.data.default_timeout)
|
|
|
|
if torrent.info().get('Content-Encoding') == 'gzip':
|
|
|
|
torrent = gzip.GzipFile(fileobj=BytesIO(torrent.read()))
|
|
|
|
|
|
|
|
return torrent.read()
|
|
|
|
|
|
|
|
|
|
|
|
def save_torrents(chosen_links, mags, folder):
|
|
|
|
for link in chosen_links:
|
|
|
|
magnet = mags[int(link)][0]
|
|
|
|
name = re.search(r'dn=([^\&]*)', magnet)
|
|
|
|
torrent_name = parse.unquote(name.group(1)).replace('+', ' ')
|
|
|
|
info_hash = int(re.search(r'btih:([a-f0-9]{40})', magnet).group(1), 16)
|
|
|
|
file = os.path.join(folder, torrent_name + '.torrent')
|
|
|
|
|
|
|
|
try:
|
|
|
|
torrent = get_torrent(info_hash)
|
|
|
|
except urllib.error.HTTPError:
|
|
|
|
print('There is no cached file for this torrent :(', color='ERROR')
|
|
|
|
else:
|
|
|
|
open(file,'wb').write(torrent)
|
|
|
|
print('Saved {:X} in {}'.format(info_hash, file))
|
|
|
|
|
|
|
|
|
|
|
|
def save_magnets(chosen_links, mags, folder):
|
|
|
|
for link in chosen_links:
|
|
|
|
magnet = mags[int(link)][0]
|
|
|
|
name = re.search(r'dn=([^\&]*)', magnet)
|
|
|
|
torrent_name = parse.unquote(name.group(1)).replace('+', ' ')
|
|
|
|
info_hash = int(re.search(r'btih:([a-f0-9]{40})', magnet).group(1), 16)
|
|
|
|
file = os.path.join(folder, torrent_name + '.magnet')
|
|
|
|
|
|
|
|
print('Saved {:X} in {}'.format(info_hash, file))
|
|
|
|
with open(file, 'w') as f:
|
2015-08-31 07:54:59 +02:00
|
|
|
f.write(magnet + '\n')
|