diff --git a/README.md b/README.md index 6ca0749..d0c0e6c 100644 --- a/README.md +++ b/README.md @@ -7,7 +7,7 @@ Tested on Arch Linux mostly. It should work on any other Linux too. Let me know ## Installation -Make sure you have python 2 installed. +Make sure you have python 2 and pip installed. Run install.sh diff --git a/install.sh b/install.sh index c60bdb3..0db071c 100755 --- a/install.sh +++ b/install.sh @@ -14,10 +14,13 @@ TMP=$(mktemp pirate-get-XXXXXX) echo "#!/usr/bin/env python" > "$TMP" fi -sed 1d $(dirname $0)/pirate-get.py >> "$TMP" + sed 1d $(dirname $0)/pirate-get.py >> "$TMP" cp "$TMP" /usr/bin/pirate-get && chmod +x /usr/bin/pirate-get && chmod 755 /usr/bin/pirate-get && + + pip install -r requirements.txt && + rm $TMP } || rm $TMP diff --git a/pirate-get.py b/pirate-get.py index a291060..0ce5392 100755 --- a/pirate-get.py +++ b/pirate-get.py @@ -33,6 +33,8 @@ from pprint import pprint from StringIO import StringIO import gzip +from bs4 import BeautifulSoup + class NoRedirection(urllib2.HTTPErrorProcessor): def http_response(self, request, response): @@ -92,8 +94,8 @@ def main(): parser.add_argument('-b', dest='browse', action='store_true', help="Display in Browse mode", default=False) parser.add_argument('search', metavar='search', nargs="*", help="Term to search for") parser.add_argument('-c', dest='category', metavar='category', help="Specify a category to search", default="All") - parser.add_argument('-s', dest='sort', metavar='sort', help="Specify a sort option", default="SeedersDsc") - parser.add_argument('-R', dest='recent', action='store_true', help="Torrents uploaded in the last 48hours. *ignored in searches*", default=False) + parser.add_argument('-s', dest='sort', metavar='sort', help="Specify a sort option", default="seeders.desc") + parser.add_argument('-R', dest='recent', action='store_true', help="Torrents uploaded in the last two weeks. *ignored in searches*", default=False) parser.add_argument('-l', dest='list_categories', action='store_true', help="List categories", default=False) parser.add_argument('--list_sorts', dest='list_sorts', action='store_true', help="List Sortable Types", default=False) parser.add_argument('-t',dest='transmission',action='store_true', help="call transmission-remote to start the download", default=False) @@ -104,9 +106,30 @@ def main(): parser.add_argument('-a', dest='download_all', action='store_true', help="download all results", default=False) parser.add_argument('--color', dest='color', action='store_true', help="use colored output", default=False) - categories = {"All":"0","Audio":"100","Audio/Music":"101","Audio/Audio books":"102","Audio/Sound clips":"103","Audio/FLAC":"104","Audio/Other":"199","Video":"200","Video/Movies":"201","Video/Movies DVDR":"202","Video/Music videos":"203","Video/Movie clips":"204","Video/TV shows":"205","Video/Handheld":"206","Video/HD - Movies":"207","Video/HD - TV shows":"208","Video/3D":"209","Video/Other":"299","Applications":"300","Applications/Windows":"301","Applications/Mac":"302","Applications/UNIX":"303","Applications/Handheld":"304","Applications/IOS (iPad/iPhone)":"305","Applications/Android":"306","Applications/Other OS":"399","Games":"400","Games/PC":"401","Games/Mac":"402","Games/PSx":"403","Games/XBOX360":"404","Games/Wii":"405","Games/Handheld":"406","Games/IOS (iPad/iPhone)":"407","Games/Android":"408","Games/Other":"499","Porn":"500","Porn/Movies":"501","Porn/Movies DVDR":"502","Porn/Pictures":"503","Porn/Games":"504","Porn/HD - Movies":"505","Porn/Movie clips":"506","Porn/Other":"599","Other":"600","Other/E-books":"601","Other/Comics":"602","Other/Pictures":"603","Other/Covers":"604","Other/Physibles":"605","Other/Other":"699"} + categories = { + "All":"0", + "Anime":"1", + "Software":"2", + "Games":"3", + "Adult":"4", + "Movies":"5", + "Music":"6", + "Other":"7", + "Series & TV":"8", + "Books":"9", + } - sorts = {"TitleDsc":"1","TitleAsc":"2","DateDsc":"3","DateAsc":"4","SizeDsc":"5","SizeAsc":"6","SeedersDsc":"7","SeedersAsc":"8","LeechersDsc":"9","LeechersAsc":"10","CategoryDsc":"13","CategoryAsc":"14","Default":"99"} + sorts = { + "created_at":"0", + "created_at.desc":"1", + "size":"2", + "size.desc":"3", + "seeders":"4", + "seeders.desc":"5", + "leechers":"6", + "leechers.desc":"7", + } + reverse_sorts = {v: k for k, v in sorts.items()} #todo: redo this with html parser instead of regex def remote(args, mirror): @@ -126,35 +149,37 @@ def main(): category = "0"; print ("Invalid category ignored", color="WARN") - if str(args.sort) in sorts.values(): + if args.sort in sorts.keys(): sort = args.sort; - elif args.sort in sorts.keys(): - sort = sorts[args.sort] + elif args.sort in sorts.values(): + sort = reverse_sorts[args.sort] else: - sort = "99"; print ("Invalid sort ignored", color="WARN") + query_parameters = { + "iht":"0", + "age":"0", + "Torrent_sort":"", + "LTorrent_page":0, + "q":"", + } # Catch the Ctrl-C exception and exit cleanly try: sizes = [] uploaded = [] identifiers = [] + OPENBAY_PAGE_LEN = 40 for page in xrange(pages): - # - if args.browse: - path = "/browse/" - if(category == "0"): - category = '100' - path = '/browse/' + category + '/' + str(page) + '/' + str(sort) - elif len(args.search) == 0: - path = "/top/48h" if args.recent else "/top/" - if(category == "0"): - path += 'all' - else: - path += category + query_parameters["LTorrent_page"] = page * OPENBAY_PAGE_LEN + query_parameters["Torrent_sort"] = sort + query_parameters["iht"] = category + if len(args.search) == 0: + query_parameters["age"] = "14" if args.recent else "0" else: - path = '/search/' + "+".join(args.search) + '/' + str(page) + '/' + str(sort) + '/' + category + query_parameters["q"] = "+".join(args.search) + + path = "/search.php?" + '&'.join(k + "=" + str(v) for k, v in query_parameters.items()) request = urllib2.Request(mirror + path) request.add_header('Accept-encoding', 'gzip') @@ -163,39 +188,36 @@ def main(): buf = StringIO(f.read()) f = gzip.GzipFile(fileobj=buf) res = f.read() - found = re.findall(""""(magnet\:\?xt=[^"]*)|([^<]+)""", res) - # check for a blocked mirror - no_results = re.search(""""No hits\.""", res) - if found == [] and not no_results is None: - # Contradiction - we found no results, but the page didn't say there were no results - # the page is probably not actually the pirate bay, so let's try another mirror - raise Exception("Blocked mirror detected.") + soup = BeautifulSoup(res) + found = soup.select('table.table-torrents>tbody>tr') - # get sizes as well and substitute the   character - sizes.extend([match.replace(" ", " ") for match in re.findall("(?<=Size )[0-9.]+\ \;[KMGT]*[i ]*B",res)]) - uploaded.extend([match.replace(" ", " ") for match in re.findall("(?<=Uploaded ).+(?=\, Size)",res)]) - identifiers.extend([match.replace(" ", " ") for match in re.findall("(?<=/torrent/)[0-9]+(?=/)",res)]) + results_body = soup.table.tbody - state = "seeds" - curr = ['',0,0] #magnet, seeds, leeches - for f in found: - if f[1] == '': - curr[0] = f[0] - else: - if state == 'seeds': - curr[1] = f[1] - state = 'leeches' - else: - curr[2] = f[1] - state = 'seeds' - res_l.append(curr) - curr = ['', 0, 0] + get_text = lambda elements: [element.get_text() for element in elements] + get_text_by_class = lambda class_: get_text(results_body.find_all(class_=class_)) + get_links = lambda links: [link.get('href') for link in links] + + sizes.extend(get_text_by_class('size-row')) + uploaded.extend(get_text_by_class('date-row')) + identifiers.extend([ + re.search('torrent/(\d+)', link).group(1) + for link in + get_links(results_body.find_all('a', href=re.compile('/torrent/'))) + ]) + + links = get_links(results_body.find_all('a', title='MAGNET LINK')) + seeders = get_text_by_class('seeders-row') + leechers = get_text_by_class('leechers-row') + for i in xrange(len(links)): + res_l.append([links[i], seeders[i], leechers[i]]) + + if len(links) < OPENBAY_PAGE_LEN: + break except KeyboardInterrupt : print("\nCancelled.") exit() - # return the sizes in a spearate list return res_l, sizes, uploaded, identifiers args = parser.parse_args() @@ -261,7 +283,7 @@ def main(): if args.database: mags = local(args) else: - mirrors = ["http://thepiratebay.se"] + mirrors = ["https://oldpiratebay.org", "http://thepiratebay.se"] try: opener = urllib2.build_opener(NoRedirection) f = opener.open("https://proxybay.info/list.txt") @@ -275,14 +297,14 @@ def main(): try: print("Trying " + mirror) mags, sizes, uploaded, identifiers = remote(args, mirror) - break + if not mags or len(mags) == 0: + print("No results from " + mirror) + else: + break except Exception, e: print(format(e)) print("Could not contact " + mirror, color="WARN") - if not mags or len(mags) == 0: - print("no results") - return # enhanced print output with column titles def print_search_results(): print("%5s %6s %6s %-5s %-11s %-11s %s" \ @@ -312,7 +334,7 @@ def main(): uploaded[m], torrent_name), color=cur_color) def print_descriptions(chosen_links): for link in chosen_links: - path = '/torrent/' + identifiers[int(link)] + '/' + path = '/torrent/' + identifiers[int(link)] + '/pirate-get' request = urllib2.Request(mirror + path) request.add_header('Accept-encoding', 'gzip') f = urllib2.urlopen(request) @@ -320,14 +342,19 @@ def main(): buf = StringIO(f.read()) f = gzip.GzipFile(fileobj=buf) res = f.read() + name = re.search("dn=([^\&]*)", mags[int(link)][0]) torrent_name = urllib.unquote(name.group(1).encode('ascii')) \ .decode('utf-8').replace("+", " ") - desc = re.search(r"
\s*
(.+?)(?=
)", res, re.DOTALL).group(1) + + desc = re.search(r"
\s*
(.*?)(?=
)", res, re.DOTALL).group(1) # Replace HTML links with markdown style versions - desc = re.sub(r"]*>(\s*)([^<]+?)(\s*)", r"\2[\3](\1)\4", desc) - print ('Description for "' + torrent_name + '":', color="zebra_1") - print (desc, color="zebra_0") + desc = re.sub(r"]*>(\s*)([^<]+?)(\s*)", r"\2[\3](\1)\4", desc).strip() + if desc == '': + print ('No description given for "' + torrent_name + '"', color="zebra_1") + else: + print ('Description for "' + torrent_name + '":', color="zebra_1") + print (desc, color="zebra_0") def print_fileLists(chosen_links): for link in chosen_links: diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..f7bd8a1 --- /dev/null +++ b/requirements.txt @@ -0,0 +1,2 @@ +beautifulsoup4==4.3.2 +colorama==0.3.2