1
0
mirror of https://github.com/vikstrous/pirate-get synced 2025-01-09 09:59:51 +01:00

Merge pull request #119 from vikstrous/parse-csv-dumps

add some configs and local csv db search
This commit is contained in:
Viktor Stanchev 2018-05-29 21:18:16 -07:00 committed by GitHub
commit 08848d1bd4
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
8 changed files with 112 additions and 69 deletions

View File

@ -18,7 +18,7 @@ Watch [this](http://showterm.io/d6f7a0c2a5de1da9ea317) for an example usage.
## Configuration file
You can use a file to override pirate-get's default settings.
You can use a file to override pirate-get's default settings.
Default is `$XDG_CONFIG_HOME/pirate-get`.
If it does not exist then `$HOME/.config/pirate-get`.
@ -34,23 +34,27 @@ directory = $PWD
magnets = false
; save each selected torrent in a .torrent file
torrents = false
torrents = false
[LocalDB]
; use a local copy of the pirate bay database
enabled = false
; use a local copy of the csv formatted pirate bay database
enabled = false
; path of the database
; path of the database
path = ~/downloads/pirate-get/db
[Misc]
; specify a custom command for opening the magnet
; ex. myprogram --open %s
; %s represent the magnet uri
openCommand =
openCommand =
; open magnets with transmission-remote client
transmission = false
; set to username:password if needed
transmission-auth =
; set to the port number if needed
transmission-port =
; use colored output
colors = true
@ -60,15 +64,15 @@ colors = true
mirror = http://thepiratebay.org
```
Note:
Any command line option will override its respective setting in the config file.
Note:
Any command line option will override its respective setting in the config file.
## Local Database
If you want to use a local copy of the Pirate Bay database download a copy here (or wherever the latest version is currently):
http://thepiratebay.se/torrent/8156416
https://thepiratebay.org/static/dump/csv/
## License
pirate-get is licensed under the GNU Affero General Public License version 3 or later.
pirate-get is licensed under the GNU Affero General Public License version 3 or later.
See the accompanying file LICENSE or http://www.gnu.org/licenses/agpl.html.

View File

@ -5,7 +5,7 @@ import pkgutil
def get_resource(filename):
return pkgutil.get_data(__package__, 'data/' + filename)
version = '0.2.13'
version = '0.3.0'
categories = json.loads(get_resource('categories.json').decode())
sorts = json.loads(get_resource('sorts.json').decode())
@ -15,4 +15,4 @@ default_headers = {'User-Agent': 'pirate get'}
default_timeout = 10
default_mirror = 'https://thepiratebay.org/'
mirror_list = 'https://proxybay.co/list.txt'
mirror_list = 'https://proxybay.bz/list.txt'

View File

@ -1,40 +1,45 @@
import urllib.parse as parse
import html.parser as parser
import base64
import csv
# this is used to remove null bytes from the input stream because
# apparently they exist
def replace_iter(iterable):
for value in iterable:
yield value.replace("\0", "")
class BayParser(parser.HTMLParser):
title = ''
q = ''
state = 'looking'
results = []
def handle_starttag(self, tag, attrs):
if tag == 'title':
self.state = 'title'
if tag == 'magnet' and self.state == 'matched':
self.state = 'magnet'
def handle_data(self, data):
if self.state == 'title':
if data.lower().find(self.q) != -1:
self.title = data
self.state = 'matched'
else:
self.state = 'looking'
if self.state == 'magnet':
self.results.append([
'magnet:?xt=urn:btih:' +
parse.quote(data) +
'&dn=' +
parse.quote(self.title), '?', '?'])
self.state = 'looking'
# https://stackoverflow.com/questions/1094841/reusable-library-to-get-human-readable-version-of-file-size#1094933
def sizeof_fmt(num, suffix='B'):
for unit in ['','Ki','Mi','Gi','Ti','Pi','Ei','Zi']:
if abs(num) < 1024.0:
return "%3.1f %s%s" % (num, unit, suffix)
num /= 1024.0
return "%.1f %s%s" % (num, 'Yi', suffix)
def search(db, terms):
f = open(db)
xml = f.readlines()
f.close()
parser = BayParser()
parser.q = (' '.join(terms)).lower()
parser.feed(''.join(xml))
return parser.results
with open(db, 'r') as f:
results = []
reader = csv.reader(replace_iter(f), delimiter=';')
for row in reader:
# skip comments
if row[0][0] == '#':
continue
# 0 is date in rfc 3339 format
# 1 magnet link hash
# 2 is title
# 3 is size in bytes
if ' '.join(terms).lower() in row[2].lower():
result = {
'date': row[0],
'size': sizeof_fmt(int(row[3])),
'magnet':
'magnet:?xt=urn:btih:' +
base64.b16encode(base64.b64decode(row[1])).decode('utf-8') +
'&dn=' +
parse.quote(row[2]),
}
results.append(result)
# limit page size to not print walls of results
# TODO: consider pagination
results = results[:30]
return results

View File

@ -38,6 +38,8 @@ def parse_config_file(text):
# for interpolating in the command
config.set('Misc', 'openCommand', '')
config.set('Misc', 'transmission', 'false')
config.set('Misc', 'transmission-auth', '')
config.set('Misc', 'transmission-port', '')
config.set('Misc', 'colors', 'true')
config.set('Misc', 'mirror', pirate.data.default_mirror)
@ -139,7 +141,8 @@ def parse_args(args_in):
action='store_true',
help='list Sortable Types')
parser.add_argument('-L', '--local', dest='database',
help='an xml file containing the Pirate Bay database')
help='a csv file containing the Pirate Bay database '
'downloaded from https://thepiratebay.org/static/dump/csv/')
parser.add_argument('-p', dest='pages', default=1, type=int,
help='the number of pages to fetch '
"(doesn't work with --local)")
@ -219,9 +222,14 @@ def combine_configs(config, args):
args.transmission_command = ['transmission-remote']
if args.port:
args.transmission_command.append(args.port)
elif config.get('Misc', 'transmission-port'):
args.transmission_command.append(config.get('Misc', 'transmission-port'))
if args.auth:
args.transmission_command.append('--auth')
args.transmission_command.append(args.auth)
elif config.get('Misc', 'transmission-auth'):
args.transmission_command.append('--auth')
args.transmission_command.append(config.get('Misc', 'transmission-auth'))
args.output = 'browser_open'
if args.transmission or config.getboolean('Misc', 'transmission'):

View File

@ -43,10 +43,13 @@ class Printer:
even = True
if local:
table = veryprettytable.VeryPrettyTable(['LINK', 'NAME'])
table = veryprettytable.VeryPrettyTable(['LINK', 'DATE', 'SIZE', 'NAME'])
table.align['SIZE'] = 'r'
table.align['NAME'] = 'l'
else:
table = veryprettytable.VeryPrettyTable(['LINK', 'SEED', 'LEECH',
'RATIO', 'SIZE', '',
'RATIO', 'SIZE',
'UPLOAD', 'NAME'])
table.align['NAME'] = 'l'
table.align['SEED'] = 'r'
@ -65,7 +68,7 @@ class Printer:
torrent_name = parse.unquote_plus(name.group(1))
if local:
content = [n, torrent_name[:columns - 7]]
content = [n, result['date'], result['size'], torrent_name[:columns - 42]]
else:
no_seeders = int(result['seeds'])
no_leechers = int(result['leechers'])
@ -85,8 +88,8 @@ class Printer:
content = [n, no_seeders, no_leechers,
'{:.1f}'.format(ratio),
'{:.1f}'.format(size),
unit, date, torrent_name[:columns - 53]]
'{:.1f} '.format(size) + unit,
date, torrent_name[:columns - 50]]
if even or not self.enable_color:
table.add_row(content)

BIN
tests/data/db.csv Normal file

Binary file not shown.
Can't render this file because it contains an unexpected character in line 5 and column 77.

View File

@ -2,17 +2,36 @@
import unittest
import pirate.local
import os
import base64
from tests import util
class TestLocal(unittest.TestCase):
def test_rich_xml(self):
path = util.data_path('rich.xml')
expected = [['magnet:?xt=urn:btih:b03c8641415d3a0fc7077f5bf567634442989a74&dn=High.Chaparall.S02E02.PDTV.XViD.SWEDiSH-HuBBaTiX', '?', '?']]
actual = pirate.local.search(path, ('High',))
self.assertEqual(actual, expected)
def test_local_csv_db(self):
path = util.data_path('db.csv')
expected = [
{
'date':'2018-May-14 11:05:31',
'magnet': 'magnet:?xt=urn:btih:34930674EF3BB9317FB5F263CCA830F52685235B&dn=ubuntu-14.04.5-desktop-amd64.iso',
'size': '1.0 GiB',
},
{
'date': '2018-Apr-15 00:04:09',
'magnet': 'magnet:?xt=urn:btih:F07E0B0584745B7BCB35E98097488D34E68623D0&dn=Ubuntu%2017.10.1%20Desktop%20%28amd64%29',
'size': '1.4 GiB',
},
{
'date': '2017-Aug-01 15:08:07',
'magnet': 'magnet:?xt=urn:btih:4096EC129404689CEB8056D907E384FF872C2CE9&dn=LINUX%20UBUNTU%2016.10%2032X64',
'size': '1.5 GiB',
},
]
actual = pirate.local.search(path, ('ubuntu',))
self.assertEqual(len(actual), len(expected))
for i in range(len(expected)):
self.assertDictEqual(actual[i], expected[i])
if __name__ == '__main__':
unittest.main()

View File

@ -23,8 +23,8 @@ class TestPrint(unittest.TestCase):
'uploaded': 'never'
}]
printer.search_results(results)
prettytable.assert_called_once_with(['LINK', 'SEED', 'LEECH', 'RATIO', 'SIZE', '', 'UPLOAD', 'NAME'])
mock.add_row.assert_has_calls([call([0, 1, 2, '0.5', '3.0', 'MiB', 'never', 'name'])])
prettytable.assert_called_once_with(['LINK', 'SEED', 'LEECH', 'RATIO', 'SIZE', 'UPLOAD', 'NAME'])
mock.add_row.assert_has_calls([call([0, 1, 2, '0.5', '3.0 MiB', 'never', 'name'])])
def test_print_results_local(self):
class MockTable:
@ -36,14 +36,16 @@ class TestPrint(unittest.TestCase):
with patch('veryprettytable.VeryPrettyTable', return_value=mock) as prettytable:
results = [{
'magnet': 'dn=name',
'Name': 'name',
'date': '1',
'size': '1',
},{
'magnet': 'dn=name2',
'Name': 'name2',
'date': '2',
'size': '2',
}]
printer.search_results(results, local=True)
prettytable.assert_called_once_with(['LINK', 'NAME'])
mock.add_row.assert_has_calls([call([0, 'name']), call([1, 'name2'])])
prettytable.assert_called_once_with(['LINK', 'DATE', 'SIZE', 'NAME'])
mock.add_row.assert_has_calls([call([0, '1', '1', 'name']), call([1, '2', '2', 'name2'])])
def test_print_color(self):
printer = Printer(False)
@ -55,7 +57,7 @@ class TestPrint(unittest.TestCase):
printer.print('abc', color='zebra_1')
mock_print.assert_called_once_with('\x1b[34mabc', '\x1b[0m')
def test_print_results_local(self):
def test_print_results_local2(self):
class MockTable:
add_row = MagicMock()
align = {}
@ -65,14 +67,16 @@ class TestPrint(unittest.TestCase):
with patch('veryprettytable.VeryPrettyTable', return_value=mock) as prettytable:
results = [{
'magnet': 'dn=name',
'Name': 'name',
'date': '1',
'size': '1',
},{
'magnet': 'dn=name2',
'Name': 'name2',
'date': '2',
'size': '2',
}]
printer.search_results(results, local=True)
prettytable.assert_called_once_with(['LINK', 'NAME'])
mock.add_row.assert_has_calls([call([0, 'name']), call([1, 'name2'], fore_color='blue')])
prettytable.assert_called_once_with(['LINK', 'DATE', 'SIZE', 'NAME'])
mock.add_row.assert_has_calls([call([0, '1', '1', 'name']), call([1, '2', '2', 'name2'], fore_color='blue')])
def test_print_descriptions(self):
printer = Printer(False)