From 7bbfac52e9d51ef88625ab90abe4f745c70e64d9 Mon Sep 17 00:00:00 2001 From: rnhmjoj Date: Tue, 10 Mar 2020 14:33:25 +0100 Subject: [PATCH 1/3] handle cookies for tpb http requests The pirate bay (or a hoster in between) added a mechanism to rate-limit requests that forces us to handle cookies. --- pirate/print.py | 20 ++++++++++++++++---- pirate/torrent.py | 11 ++++++++--- 2 files changed, 24 insertions(+), 7 deletions(-) diff --git a/pirate/print.py b/pirate/print.py index 55e241d..c7490c2 100644 --- a/pirate/print.py +++ b/pirate/print.py @@ -4,12 +4,14 @@ import gzip import urllib.parse as parse import urllib.request as request import shutil -from io import BytesIO + +import pirate.data import colorama import veryprettytable -import pirate.data +from io import BytesIO +from http.cookiejar import CookieJar class Printer: @@ -101,12 +103,17 @@ class Printer: self.print(table) def descriptions(self, chosen_links, results, site): + jar = CookieJar() + opener = request.build_opener( + request.HTTPErrorProcessor, + request.HTTPCookieProcessor(jar)) + for link in chosen_links: path = '/torrent/%s/' % results[link]['id'] req = request.Request(site + path, headers=pirate.data.default_headers) req.add_header('Accept-encoding', 'gzip') - f = request.urlopen(req, timeout=pirate.data.default_timeout) + f = opener.open(req, timeout=pirate.data.default_timeout) if f.info().get('Content-Encoding') == 'gzip': f = gzip.GzipFile(fileobj=BytesIO(f.read())) @@ -125,13 +132,18 @@ class Printer: self.print(desc, color='zebra_0') def file_lists(self, chosen_links, results, site): + jar = CookieJar() + opener = request.build_opener( + request.HTTPErrorProcessor, + request.HTTPCookieProcessor(jar)) + for link in chosen_links: path = '/ajax_details_filelist.php' query = '?id=' + results[link]['id'] req = request.Request(site + path + query, headers=pirate.data.default_headers) req.add_header('Accept-encoding', 'gzip') - f = request.urlopen(req, timeout=pirate.data.default_timeout) + f = opener.open(req, timeout=pirate.data.default_timeout) if f.info().get('Content-Encoding') == 'gzip': f = gzip.GzipFile(fileobj=BytesIO(f.read())) diff --git a/pirate/torrent.py b/pirate/torrent.py index 5547463..6e3acc6 100644 --- a/pirate/torrent.py +++ b/pirate/torrent.py @@ -7,11 +7,11 @@ import urllib.parse as parse import urllib.error import os.path -from bs4 import BeautifulSoup - import pirate.data +from bs4 import BeautifulSoup from io import BytesIO +from http.cookiejar import CookieJar parser_regex = r'"(magnet\:\?xt=[^"]*)|([^<]+)' @@ -152,13 +152,18 @@ def remote(printer, pages, category, sort, mode, terms, mirror): # Catch the Ctrl-C exception and exit cleanly try: + jar = CookieJar() + opener = request.build_opener( + request.HTTPErrorProcessor, + request.HTTPCookieProcessor(jar)) + for page in range(pages): path = build_request_path(page, category, sort, mode, terms) req = request.Request(mirror + path, headers=pirate.data.default_headers) req.add_header('Accept-encoding', 'gzip') - f = request.urlopen(req, timeout=pirate.data.default_timeout) + f = opener.open(req, timeout=pirate.data.default_timeout) if f.info().get('Content-Encoding') == 'gzip': f = gzip.GzipFile(fileobj=BytesIO(f.read())) res = f.read().decode('utf-8') From 95782b0974e85fafe0edafb9e0a7043e32d7e8d5 Mon Sep 17 00:00:00 2001 From: rnhmjoj Date: Tue, 10 Mar 2020 15:39:23 +0100 Subject: [PATCH 2/3] fix print_results tests when COLUMNS is not set --- tests/test_print.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/tests/test_print.py b/tests/test_print.py index 583ebbe..7c3a005 100755 --- a/tests/test_print.py +++ b/tests/test_print.py @@ -1,4 +1,5 @@ #!/usr/bin/env python3 +import os import unittest from unittest.mock import patch, call, MagicMock @@ -6,6 +7,10 @@ from pirate.print import Printer class TestPrint(unittest.TestCase): + @classmethod + def setUpClass(cls): + # needed to display the results table + os.environ['COLUMNS'] = '80' def test_print_results_remote(self): class MockTable: From c6f742893ce0a8a12d1b9c1fbab2ca2b1b294eac Mon Sep 17 00:00:00 2001 From: rnhmjoj Date: Tue, 10 Mar 2020 17:12:09 +0100 Subject: [PATCH 3/3] fix tests with custom url opener --- tests/test_print.py | 12 ++++++++++-- tests/test_torrent.py | 10 +++++++--- 2 files changed, 17 insertions(+), 5 deletions(-) diff --git a/tests/test_print.py b/tests/test_print.py index 7c3a005..feeb59a 100755 --- a/tests/test_print.py +++ b/tests/test_print.py @@ -93,8 +93,12 @@ class TestPrint(unittest.TestCase): read = MagicMock(return_value='
stuff link
'.encode('utf8')) info = MagicMock() response_obj = MockResponse() + class MockOpener(): + open = MagicMock(return_value=response_obj) + add_handler = MagicMock() + opener_obj = MockOpener() with patch('urllib.request.Request', return_value=request_obj) as request: - with patch('urllib.request.urlopen', return_value=response_obj) as urlopen: + with patch('urllib.request.OpenerDirector', return_value=opener_obj) as opener: printer.descriptions([0], [{'id': '1', 'magnet': 'dn=name'}], 'example.com') printer.print.assert_has_calls([call('Description for "name":', color='zebra_1'),call('stuff [link](href)', color='zebra_0')]) @@ -108,8 +112,12 @@ class TestPrint(unittest.TestCase): read = MagicMock(return_value='1.filename'.encode('utf8')) info = MagicMock() response_obj = MockResponse() + class MockOpener(): + open = MagicMock(return_value=response_obj) + add_handler = MagicMock() + opener_obj = MockOpener() with patch('urllib.request.Request', return_value=request_obj) as request: - with patch('urllib.request.urlopen', return_value=response_obj) as urlopen: + with patch('urllib.request.OpenerDirector', return_value=opener_obj) as opener: printer.file_lists([0], [{'id': '1', 'magnet': 'dn=name'}], 'example.com') printer.print.assert_has_calls([call('Files in "name":', color='zebra_1'),call(' 1. filename', color='zebra_0')]) diff --git a/tests/test_torrent.py b/tests/test_torrent.py index a7236c5..9939e16 100755 --- a/tests/test_torrent.py +++ b/tests/test_torrent.py @@ -124,14 +124,18 @@ class TestTorrent(unittest.TestCase): add_header = mock.MagicMock() request_obj = MockRequest() class MockResponse(): - read = mock.MagicMock(return_value='No hits. Try adding an asterisk in you search phrase.'.encode('utf8')) + read = mock.MagicMock(return_value=b'No hits. Try adding an asterisk in you search phrase.') info = mock.MagicMock() response_obj = MockResponse() + class MockOpener(): + open = mock.MagicMock(return_value=response_obj) + add_handler = mock.MagicMock() + opener_obj = MockOpener() with patch('urllib.request.Request', return_value=request_obj) as request: - with patch('urllib.request.urlopen', return_value=response_obj) as urlopen: + with patch('urllib.request.OpenerDirector', return_value=opener_obj) as opener: res = pirate.torrent.remote(MagicMock(Printer), 1, 100, 10, 'browse', [], 'http://example.com') request.assert_called_once_with('http://example.com/browse/100/0/10', headers=pirate.data.default_headers) - urlopen.assert_called_once_with(request_obj, timeout=pirate.data.default_timeout) + opener_obj.open.assert_called_once_with(request_obj, timeout=pirate.data.default_timeout) self.assertEqual(res, []) if __name__ == '__main__':