mirror of
https://github.com/vikstrous/pirate-get
synced 2025-01-24 12:14:20 +01:00
Merge pull request #119 from vikstrous/parse-csv-dumps
add some configs and local csv db search
This commit is contained in:
commit
08848d1bd4
@ -37,7 +37,7 @@ magnets = false
|
||||
torrents = false
|
||||
|
||||
[LocalDB]
|
||||
; use a local copy of the pirate bay database
|
||||
; use a local copy of the csv formatted pirate bay database
|
||||
enabled = false
|
||||
|
||||
; path of the database
|
||||
@ -51,6 +51,10 @@ openCommand =
|
||||
|
||||
; open magnets with transmission-remote client
|
||||
transmission = false
|
||||
; set to username:password if needed
|
||||
transmission-auth =
|
||||
; set to the port number if needed
|
||||
transmission-port =
|
||||
|
||||
; use colored output
|
||||
colors = true
|
||||
@ -67,7 +71,7 @@ Any command line option will override its respective setting in the config file.
|
||||
## Local Database
|
||||
If you want to use a local copy of the Pirate Bay database download a copy here (or wherever the latest version is currently):
|
||||
|
||||
http://thepiratebay.se/torrent/8156416
|
||||
https://thepiratebay.org/static/dump/csv/
|
||||
|
||||
## License
|
||||
pirate-get is licensed under the GNU Affero General Public License version 3 or later.
|
||||
|
@ -5,7 +5,7 @@ import pkgutil
|
||||
def get_resource(filename):
|
||||
return pkgutil.get_data(__package__, 'data/' + filename)
|
||||
|
||||
version = '0.2.13'
|
||||
version = '0.3.0'
|
||||
|
||||
categories = json.loads(get_resource('categories.json').decode())
|
||||
sorts = json.loads(get_resource('sorts.json').decode())
|
||||
@ -15,4 +15,4 @@ default_headers = {'User-Agent': 'pirate get'}
|
||||
default_timeout = 10
|
||||
|
||||
default_mirror = 'https://thepiratebay.org/'
|
||||
mirror_list = 'https://proxybay.co/list.txt'
|
||||
mirror_list = 'https://proxybay.bz/list.txt'
|
||||
|
@ -1,40 +1,45 @@
|
||||
import urllib.parse as parse
|
||||
import html.parser as parser
|
||||
import base64
|
||||
import csv
|
||||
|
||||
# this is used to remove null bytes from the input stream because
|
||||
# apparently they exist
|
||||
def replace_iter(iterable):
|
||||
for value in iterable:
|
||||
yield value.replace("\0", "")
|
||||
|
||||
class BayParser(parser.HTMLParser):
|
||||
title = ''
|
||||
q = ''
|
||||
state = 'looking'
|
||||
results = []
|
||||
|
||||
def handle_starttag(self, tag, attrs):
|
||||
if tag == 'title':
|
||||
self.state = 'title'
|
||||
if tag == 'magnet' and self.state == 'matched':
|
||||
self.state = 'magnet'
|
||||
|
||||
def handle_data(self, data):
|
||||
if self.state == 'title':
|
||||
if data.lower().find(self.q) != -1:
|
||||
self.title = data
|
||||
self.state = 'matched'
|
||||
else:
|
||||
self.state = 'looking'
|
||||
if self.state == 'magnet':
|
||||
self.results.append([
|
||||
'magnet:?xt=urn:btih:' +
|
||||
parse.quote(data) +
|
||||
'&dn=' +
|
||||
parse.quote(self.title), '?', '?'])
|
||||
self.state = 'looking'
|
||||
|
||||
# https://stackoverflow.com/questions/1094841/reusable-library-to-get-human-readable-version-of-file-size#1094933
|
||||
def sizeof_fmt(num, suffix='B'):
|
||||
for unit in ['','Ki','Mi','Gi','Ti','Pi','Ei','Zi']:
|
||||
if abs(num) < 1024.0:
|
||||
return "%3.1f %s%s" % (num, unit, suffix)
|
||||
num /= 1024.0
|
||||
return "%.1f %s%s" % (num, 'Yi', suffix)
|
||||
|
||||
def search(db, terms):
|
||||
f = open(db)
|
||||
xml = f.readlines()
|
||||
f.close()
|
||||
parser = BayParser()
|
||||
parser.q = (' '.join(terms)).lower()
|
||||
parser.feed(''.join(xml))
|
||||
return parser.results
|
||||
with open(db, 'r') as f:
|
||||
results = []
|
||||
reader = csv.reader(replace_iter(f), delimiter=';')
|
||||
for row in reader:
|
||||
# skip comments
|
||||
if row[0][0] == '#':
|
||||
continue
|
||||
# 0 is date in rfc 3339 format
|
||||
# 1 magnet link hash
|
||||
# 2 is title
|
||||
# 3 is size in bytes
|
||||
if ' '.join(terms).lower() in row[2].lower():
|
||||
result = {
|
||||
'date': row[0],
|
||||
'size': sizeof_fmt(int(row[3])),
|
||||
'magnet':
|
||||
'magnet:?xt=urn:btih:' +
|
||||
base64.b16encode(base64.b64decode(row[1])).decode('utf-8') +
|
||||
'&dn=' +
|
||||
parse.quote(row[2]),
|
||||
}
|
||||
results.append(result)
|
||||
# limit page size to not print walls of results
|
||||
# TODO: consider pagination
|
||||
results = results[:30]
|
||||
return results
|
||||
|
@ -38,6 +38,8 @@ def parse_config_file(text):
|
||||
# for interpolating in the command
|
||||
config.set('Misc', 'openCommand', '')
|
||||
config.set('Misc', 'transmission', 'false')
|
||||
config.set('Misc', 'transmission-auth', '')
|
||||
config.set('Misc', 'transmission-port', '')
|
||||
config.set('Misc', 'colors', 'true')
|
||||
config.set('Misc', 'mirror', pirate.data.default_mirror)
|
||||
|
||||
@ -139,7 +141,8 @@ def parse_args(args_in):
|
||||
action='store_true',
|
||||
help='list Sortable Types')
|
||||
parser.add_argument('-L', '--local', dest='database',
|
||||
help='an xml file containing the Pirate Bay database')
|
||||
help='a csv file containing the Pirate Bay database '
|
||||
'downloaded from https://thepiratebay.org/static/dump/csv/')
|
||||
parser.add_argument('-p', dest='pages', default=1, type=int,
|
||||
help='the number of pages to fetch '
|
||||
"(doesn't work with --local)")
|
||||
@ -219,9 +222,14 @@ def combine_configs(config, args):
|
||||
args.transmission_command = ['transmission-remote']
|
||||
if args.port:
|
||||
args.transmission_command.append(args.port)
|
||||
elif config.get('Misc', 'transmission-port'):
|
||||
args.transmission_command.append(config.get('Misc', 'transmission-port'))
|
||||
if args.auth:
|
||||
args.transmission_command.append('--auth')
|
||||
args.transmission_command.append(args.auth)
|
||||
elif config.get('Misc', 'transmission-auth'):
|
||||
args.transmission_command.append('--auth')
|
||||
args.transmission_command.append(config.get('Misc', 'transmission-auth'))
|
||||
|
||||
args.output = 'browser_open'
|
||||
if args.transmission or config.getboolean('Misc', 'transmission'):
|
||||
|
@ -43,10 +43,13 @@ class Printer:
|
||||
even = True
|
||||
|
||||
if local:
|
||||
table = veryprettytable.VeryPrettyTable(['LINK', 'NAME'])
|
||||
table = veryprettytable.VeryPrettyTable(['LINK', 'DATE', 'SIZE', 'NAME'])
|
||||
|
||||
table.align['SIZE'] = 'r'
|
||||
table.align['NAME'] = 'l'
|
||||
else:
|
||||
table = veryprettytable.VeryPrettyTable(['LINK', 'SEED', 'LEECH',
|
||||
'RATIO', 'SIZE', '',
|
||||
'RATIO', 'SIZE',
|
||||
'UPLOAD', 'NAME'])
|
||||
table.align['NAME'] = 'l'
|
||||
table.align['SEED'] = 'r'
|
||||
@ -65,7 +68,7 @@ class Printer:
|
||||
torrent_name = parse.unquote_plus(name.group(1))
|
||||
|
||||
if local:
|
||||
content = [n, torrent_name[:columns - 7]]
|
||||
content = [n, result['date'], result['size'], torrent_name[:columns - 42]]
|
||||
else:
|
||||
no_seeders = int(result['seeds'])
|
||||
no_leechers = int(result['leechers'])
|
||||
@ -85,8 +88,8 @@ class Printer:
|
||||
|
||||
content = [n, no_seeders, no_leechers,
|
||||
'{:.1f}'.format(ratio),
|
||||
'{:.1f}'.format(size),
|
||||
unit, date, torrent_name[:columns - 53]]
|
||||
'{:.1f} '.format(size) + unit,
|
||||
date, torrent_name[:columns - 50]]
|
||||
|
||||
if even or not self.enable_color:
|
||||
table.add_row(content)
|
||||
|
BIN
tests/data/db.csv
Normal file
BIN
tests/data/db.csv
Normal file
Binary file not shown.
Can't render this file because it contains an unexpected character in line 5 and column 77.
|
@ -2,17 +2,36 @@
|
||||
import unittest
|
||||
import pirate.local
|
||||
import os
|
||||
import base64
|
||||
|
||||
from tests import util
|
||||
|
||||
|
||||
class TestLocal(unittest.TestCase):
|
||||
|
||||
def test_rich_xml(self):
|
||||
path = util.data_path('rich.xml')
|
||||
expected = [['magnet:?xt=urn:btih:b03c8641415d3a0fc7077f5bf567634442989a74&dn=High.Chaparall.S02E02.PDTV.XViD.SWEDiSH-HuBBaTiX', '?', '?']]
|
||||
actual = pirate.local.search(path, ('High',))
|
||||
self.assertEqual(actual, expected)
|
||||
def test_local_csv_db(self):
|
||||
path = util.data_path('db.csv')
|
||||
expected = [
|
||||
{
|
||||
'date':'2018-May-14 11:05:31',
|
||||
'magnet': 'magnet:?xt=urn:btih:34930674EF3BB9317FB5F263CCA830F52685235B&dn=ubuntu-14.04.5-desktop-amd64.iso',
|
||||
'size': '1.0 GiB',
|
||||
},
|
||||
{
|
||||
'date': '2018-Apr-15 00:04:09',
|
||||
'magnet': 'magnet:?xt=urn:btih:F07E0B0584745B7BCB35E98097488D34E68623D0&dn=Ubuntu%2017.10.1%20Desktop%20%28amd64%29',
|
||||
'size': '1.4 GiB',
|
||||
},
|
||||
{
|
||||
'date': '2017-Aug-01 15:08:07',
|
||||
'magnet': 'magnet:?xt=urn:btih:4096EC129404689CEB8056D907E384FF872C2CE9&dn=LINUX%20UBUNTU%2016.10%2032X64',
|
||||
'size': '1.5 GiB',
|
||||
},
|
||||
]
|
||||
actual = pirate.local.search(path, ('ubuntu',))
|
||||
self.assertEqual(len(actual), len(expected))
|
||||
for i in range(len(expected)):
|
||||
self.assertDictEqual(actual[i], expected[i])
|
||||
|
||||
if __name__ == '__main__':
|
||||
unittest.main()
|
||||
|
@ -23,8 +23,8 @@ class TestPrint(unittest.TestCase):
|
||||
'uploaded': 'never'
|
||||
}]
|
||||
printer.search_results(results)
|
||||
prettytable.assert_called_once_with(['LINK', 'SEED', 'LEECH', 'RATIO', 'SIZE', '', 'UPLOAD', 'NAME'])
|
||||
mock.add_row.assert_has_calls([call([0, 1, 2, '0.5', '3.0', 'MiB', 'never', 'name'])])
|
||||
prettytable.assert_called_once_with(['LINK', 'SEED', 'LEECH', 'RATIO', 'SIZE', 'UPLOAD', 'NAME'])
|
||||
mock.add_row.assert_has_calls([call([0, 1, 2, '0.5', '3.0 MiB', 'never', 'name'])])
|
||||
|
||||
def test_print_results_local(self):
|
||||
class MockTable:
|
||||
@ -36,14 +36,16 @@ class TestPrint(unittest.TestCase):
|
||||
with patch('veryprettytable.VeryPrettyTable', return_value=mock) as prettytable:
|
||||
results = [{
|
||||
'magnet': 'dn=name',
|
||||
'Name': 'name',
|
||||
'date': '1',
|
||||
'size': '1',
|
||||
},{
|
||||
'magnet': 'dn=name2',
|
||||
'Name': 'name2',
|
||||
'date': '2',
|
||||
'size': '2',
|
||||
}]
|
||||
printer.search_results(results, local=True)
|
||||
prettytable.assert_called_once_with(['LINK', 'NAME'])
|
||||
mock.add_row.assert_has_calls([call([0, 'name']), call([1, 'name2'])])
|
||||
prettytable.assert_called_once_with(['LINK', 'DATE', 'SIZE', 'NAME'])
|
||||
mock.add_row.assert_has_calls([call([0, '1', '1', 'name']), call([1, '2', '2', 'name2'])])
|
||||
|
||||
def test_print_color(self):
|
||||
printer = Printer(False)
|
||||
@ -55,7 +57,7 @@ class TestPrint(unittest.TestCase):
|
||||
printer.print('abc', color='zebra_1')
|
||||
mock_print.assert_called_once_with('\x1b[34mabc', '\x1b[0m')
|
||||
|
||||
def test_print_results_local(self):
|
||||
def test_print_results_local2(self):
|
||||
class MockTable:
|
||||
add_row = MagicMock()
|
||||
align = {}
|
||||
@ -65,14 +67,16 @@ class TestPrint(unittest.TestCase):
|
||||
with patch('veryprettytable.VeryPrettyTable', return_value=mock) as prettytable:
|
||||
results = [{
|
||||
'magnet': 'dn=name',
|
||||
'Name': 'name',
|
||||
'date': '1',
|
||||
'size': '1',
|
||||
},{
|
||||
'magnet': 'dn=name2',
|
||||
'Name': 'name2',
|
||||
'date': '2',
|
||||
'size': '2',
|
||||
}]
|
||||
printer.search_results(results, local=True)
|
||||
prettytable.assert_called_once_with(['LINK', 'NAME'])
|
||||
mock.add_row.assert_has_calls([call([0, 'name']), call([1, 'name2'], fore_color='blue')])
|
||||
prettytable.assert_called_once_with(['LINK', 'DATE', 'SIZE', 'NAME'])
|
||||
mock.add_row.assert_has_calls([call([0, '1', '1', 'name']), call([1, '2', '2', 'name2'], fore_color='blue')])
|
||||
|
||||
def test_print_descriptions(self):
|
||||
printer = Printer(False)
|
||||
|
Loading…
Reference in New Issue
Block a user