From 7bbfac52e9d51ef88625ab90abe4f745c70e64d9 Mon Sep 17 00:00:00 2001 From: rnhmjoj Date: Tue, 10 Mar 2020 14:33:25 +0100 Subject: [PATCH] handle cookies for tpb http requests The pirate bay (or a hoster in between) added a mechanism to rate-limit requests that forces us to handle cookies. --- pirate/print.py | 20 ++++++++++++++++---- pirate/torrent.py | 11 ++++++++--- 2 files changed, 24 insertions(+), 7 deletions(-) diff --git a/pirate/print.py b/pirate/print.py index 55e241d..c7490c2 100644 --- a/pirate/print.py +++ b/pirate/print.py @@ -4,12 +4,14 @@ import gzip import urllib.parse as parse import urllib.request as request import shutil -from io import BytesIO + +import pirate.data import colorama import veryprettytable -import pirate.data +from io import BytesIO +from http.cookiejar import CookieJar class Printer: @@ -101,12 +103,17 @@ class Printer: self.print(table) def descriptions(self, chosen_links, results, site): + jar = CookieJar() + opener = request.build_opener( + request.HTTPErrorProcessor, + request.HTTPCookieProcessor(jar)) + for link in chosen_links: path = '/torrent/%s/' % results[link]['id'] req = request.Request(site + path, headers=pirate.data.default_headers) req.add_header('Accept-encoding', 'gzip') - f = request.urlopen(req, timeout=pirate.data.default_timeout) + f = opener.open(req, timeout=pirate.data.default_timeout) if f.info().get('Content-Encoding') == 'gzip': f = gzip.GzipFile(fileobj=BytesIO(f.read())) @@ -125,13 +132,18 @@ class Printer: self.print(desc, color='zebra_0') def file_lists(self, chosen_links, results, site): + jar = CookieJar() + opener = request.build_opener( + request.HTTPErrorProcessor, + request.HTTPCookieProcessor(jar)) + for link in chosen_links: path = '/ajax_details_filelist.php' query = '?id=' + results[link]['id'] req = request.Request(site + path + query, headers=pirate.data.default_headers) req.add_header('Accept-encoding', 'gzip') - f = request.urlopen(req, timeout=pirate.data.default_timeout) + f = opener.open(req, timeout=pirate.data.default_timeout) if f.info().get('Content-Encoding') == 'gzip': f = gzip.GzipFile(fileobj=BytesIO(f.read())) diff --git a/pirate/torrent.py b/pirate/torrent.py index 5547463..6e3acc6 100644 --- a/pirate/torrent.py +++ b/pirate/torrent.py @@ -7,11 +7,11 @@ import urllib.parse as parse import urllib.error import os.path -from bs4 import BeautifulSoup - import pirate.data +from bs4 import BeautifulSoup from io import BytesIO +from http.cookiejar import CookieJar parser_regex = r'"(magnet\:\?xt=[^"]*)|([^<]+)' @@ -152,13 +152,18 @@ def remote(printer, pages, category, sort, mode, terms, mirror): # Catch the Ctrl-C exception and exit cleanly try: + jar = CookieJar() + opener = request.build_opener( + request.HTTPErrorProcessor, + request.HTTPCookieProcessor(jar)) + for page in range(pages): path = build_request_path(page, category, sort, mode, terms) req = request.Request(mirror + path, headers=pirate.data.default_headers) req.add_header('Accept-encoding', 'gzip') - f = request.urlopen(req, timeout=pirate.data.default_timeout) + f = opener.open(req, timeout=pirate.data.default_timeout) if f.info().get('Content-Encoding') == 'gzip': f = gzip.GzipFile(fileobj=BytesIO(f.read())) res = f.read().decode('utf-8')