pirate-get/pirate-get.py

#!/usr/bin/env python
import webbrowser
import urllib
import urllib2
import re
import os
from HTMLParser import HTMLParser
import argparse
from pprint import pprint


# create a subclass and override the handler methods
class MyHTMLParser(HTMLParser):
    title = ''
    q = ''
    state = 'looking'
    results = []

    def __init__(self, q):
        HTMLParser.__init__(self)
        self.q = q.lower()

    def handle_starttag(self, tag, attrs):
        if tag == 'title':
            self.state = 'title'
        if tag == 'magnet' and self.state == 'matched':
            self.state = 'magnet'

    def handle_data(self, data):
        if self.state == 'title':
            if data.lower().find(self.q) != -1:
                self.title = data
                self.state = 'matched'
            else:
                self.state = 'looking'
        if self.state == 'magnet':
            self.results.append(['magnet:?xt=urn:btih:' + urllib.quote(data) + '&dn=' + urllib.quote(self.title), '?', '?'])
            self.state = 'looking'


def main():
    parser = argparse.ArgumentParser(description='Finds and downloads torrents from the Pirate Bay')
    parser.add_argument('q', metavar='search_term', help="The term to search for")
    parser.add_argument('-t',dest='transmission',action='store_true', help="call transmission-remote to start the download", default=False)
    parser.add_argument('--local', dest='database', help="An xml file containing the Pirate Bay database")
    parser.add_argument('-p', dest='pages', help="The number of pages to fetch (doesn't work with --local)", default=1)
    parser.add_argument('-0', dest='first', action='store_true', help="choose the top result", default=False)

    def local(args):
        xml_str = ''
        with open(args.database, 'r') as f:
            xml_str += f.read()
        htmlparser = MyHTMLParser(args.q)
        htmlparser.feed(xml_str)
        return htmlparser.results

    #todo: redo this with html parser instead of regex
    def remote(args, mirror):
        res_l = []
        try:
            pages = int(args.pages)
            if pages < 1:
                raise Exception('')
        except Exception:
            raise Exception("Please provide an integer greater than 0 for the number of pages to fetch.")

        # Catch the Ctrl-C exception and exit cleanly
        try:
            for page in xrange(pages):
                f = urllib2.urlopen(mirror + '/search/' + args.q.replace(" ", "+") + '/' + str(page) + '/7/0')
                res = f.read()
                found = re.findall(""""(magnet\:\?xt=[^"]*)|<td align="right">([^<]+)</td>""", res)

                # get sizes as well and substitute the &nbsp; character
                # print res
                sizes = [ match.replace("&nbsp;", " ") for match in re.findall("(?<=Size )[0-9.]+\&nbsp\;[KMGT]*[i ]*B",res) ]
                uploaded = [ match.replace("&nbsp;", " ") for match in re.findall("(?<=Uploaded ).+(?=\, Size)",res) ]
                # pprint(sizes); print len(sizes)
                # pprint(uploaded); print len(uploaded)
                state = "seeds"
                curr = ['',0,0] #magnet, seeds, leeches
                for f in found:
                    if f[1] == '':
                        curr[0] = f[0]
                    else:
                        if state == 'seeds':
                            curr[1] = f[1]
                            state = 'leeches'
                        else:
                            curr[2] = f[1]
                            state = 'seeds'
                            res_l.append(curr)
                            curr = ['', 0, 0]
        except KeyboardInterrupt :
            print "\nCancelled."
            exit()

        # return the sizes in a spearate list
        return res_l, sizes, uploaded

    args = parser.parse_args()
    if args.database:
        mags = local(args)
    else:
        mirrors = ["http://thepiratebay.se"]
        try:
            f = urllib2.urlopen("http://proxybay.info/list.txt")
            res = f.read()
            mirrors += res.split("\n")[3:]
        except:
            print "Could not fetch additional mirrors"
        for mirror in mirrors:
            try:
                print("Trying " + mirror)
                mags, sizes, uploaded = remote(args, mirror)
                break
            except Exception, e:
                print("Could not contact " + mirror)

    if mags and len(mags) > 0:
        # enhanced print output with column titles
        print "\n%-5s %-6s %-6s %-5s %-11s %-11s  %s" % ( "LINK", "SEED", "LEECH", "RATIO", "SIZE", "UPLOAD", "NAME")
        for m in range(len(mags)):
            magnet = mags[m]
            name = re.search("dn=([^\&]*)", magnet[0])

            # compute the S/L ratio (Higher is better)
            try:
                ratio = float(magnet[1])/float(magnet[2])
            except ZeroDivisionError:
                ratio = 0

            # enhanced print output with justified columns
            print "%-5s %-6s %-6s %5.1f %-11s %-11s  %s" % (m, magnet[1], magnet[2], ratio ,sizes[m], uploaded[m],urllib.unquote(name.group(1).encode('ascii')).decode('utf-8').replace("+", " ") )

        if args.first:
            print "Choosing first result";
            choice = 0

        else:
            try:
                l = raw_input("Select a link: ")
            except KeyboardInterrupt :
                print "\nCancelled."
                exit()

            try:
                choice = int(l)
            except Exception:
                choice = None

        if not choice == None:
            url = mags[choice][0]
            print
            print "url:"
            print url
            if args.transmission: 
                os.system("""transmission-remote --add "%s" """ % (url))
                os.system("transmission-remote -l")
            else:
                webbrowser.open(url)
        else:
            print "Cancelled."
    else:
        print "no results"

if __name__ == "__main__":
    main()
initial commit 2012-11-16 08:52:09 +01:00			`#!/usr/bin/env python`
			`import webbrowser`
			`import urllib`
Revert "Merge pull request #7 from gausie/check_error_code_for_mirrors" This reverts commit 1c8f5056a5cbba07927b6ed1a870ea71887dbd29, reversing changes made to 951fc1a28de1dfcac1c0187a126d8e21bc89b2ca. 2014-02-01 12:53:39 +01:00			`import urllib2`
initial commit 2012-11-16 08:52:09 +01:00			`import re`
-t option for transmission 2013-12-14 14:30:11 +01:00			`import os`
added the ability to use a local download the db 2013-02-21 21:19:40 +01:00			`from HTMLParser import HTMLParser`
			`import argparse`
Sizes fix Fixed the regular expression so that sizes in bytes (B) work properly. 2013-12-10 14:41:56 +01:00			`from pprint import pprint`
initial commit 2012-11-16 08:52:09 +01:00
added the ability to use a local download the db 2013-02-21 21:19:40 +01:00
			`# create a subclass and override the handler methods`
			`class MyHTMLParser(HTMLParser):`
			`title = ''`
			`q = ''`
			`state = 'looking'`
			`results = []`

			`def __init__(self, q):`
			`HTMLParser.__init__(self)`
			`self.q = q.lower()`

			`def handle_starttag(self, tag, attrs):`
			`if tag == 'title':`
			`self.state = 'title'`
			`if tag == 'magnet' and self.state == 'matched':`
			`self.state = 'magnet'`

			`def handle_data(self, data):`
			`if self.state == 'title':`
			`if data.lower().find(self.q) != -1:`
			`self.title = data`
			`self.state = 'matched'`
			`else:`
			`self.state = 'looking'`
			`if self.state == 'magnet':`
display number of seeders and leechers in results fixes #1 2013-02-26 22:48:02 +01:00			`self.results.append(['magnet:?xt=urn:btih:' + urllib.quote(data) + '&dn=' + urllib.quote(self.title), '?', '?'])`
added the ability to use a local download the db 2013-02-21 21:19:40 +01:00			`self.state = 'looking'`


			`def main():`
			`parser = argparse.ArgumentParser(description='Finds and downloads torrents from the Pirate Bay')`
rearrange a few lines 2013-02-21 21:31:19 +01:00			`parser.add_argument('q', metavar='search_term', help="The term to search for")`
fix wrong default 2013-12-14 14:48:00 +01:00			`parser.add_argument('-t',dest='transmission',action='store_true', help="call transmission-remote to start the download", default=False)`
rearrange a few lines 2013-02-21 21:31:19 +01:00			`parser.add_argument('--local', dest='database', help="An xml file containing the Pirate Bay database")`
add variable number of pages to fetch option 2013-02-27 18:14:39 +01:00			`parser.add_argument('-p', dest='pages', help="The number of pages to fetch (doesn't work with --local)", default=1)`
Added -0 flag to choose the first (0th) result 2014-01-31 23:16:37 +01:00			`parser.add_argument('-0', dest='first', action='store_true', help="choose the top result", default=False)`
added the ability to use a local download the db 2013-02-21 21:19:40 +01:00
			`def local(args):`
			`xml_str = ''`
			`with open(args.database, 'r') as f:`
			`xml_str += f.read()`
			`htmlparser = MyHTMLParser(args.q)`
			`htmlparser.feed(xml_str)`
			`return htmlparser.results`

display number of seeders and leechers in results fixes #1 2013-02-26 22:48:02 +01:00			`#todo: redo this with html parser instead of regex`
automatically discover mirrors from proxybay.info closes #5 2014-02-01 10:42:58 +01:00			`def remote(args, mirror):`
display number of seeders and leechers in results fixes #1 2013-02-26 22:48:02 +01:00			`res_l = []`
add variable number of pages to fetch option 2013-02-27 18:14:39 +01:00			`try:`
			`pages = int(args.pages)`
			`if pages < 1:`
			`raise Exception('')`
			`except Exception:`
			`raise Exception("Please provide an integer greater than 0 for the number of pages to fetch.")`

Revert 3affc91..43444db This rolls back to commit 3affc9171d0d520dca95b1e2c7ae7a607005f0e7. 2013-11-11 22:42:51 +01:00			`# Catch the Ctrl-C exception and exit cleanly`
			`try:`
			`for page in xrange(pages):`
Revert "Merge pull request #7 from gausie/check_error_code_for_mirrors" This reverts commit 1c8f5056a5cbba07927b6ed1a870ea71887dbd29, reversing changes made to 951fc1a28de1dfcac1c0187a126d8e21bc89b2ca. 2014-02-01 12:53:39 +01:00			`f = urllib2.urlopen(mirror + '/search/' + args.q.replace(" ", "+") + '/' + str(page) + '/7/0')`
			`res = f.read()`
Revert 3affc91..43444db This rolls back to commit 3affc9171d0d520dca95b1e2c7ae7a607005f0e7. 2013-11-11 22:42:51 +01:00			`found = re.findall(""""(magnet\:\?xt=[^"]*)\|<td align="right">([^<]+)</td>""", res)`

			`# get sizes as well and substitute the   character`
Fixed regex to match integer sizes 2013-11-12 11:33:32 +01:00			`# print res`
Sizes fix Fixed the regular expression so that sizes in bytes (B) work properly. 2013-12-10 14:41:56 +01:00			`sizes = [ match.replace(" ", " ") for match in re.findall("(?<=Size )[0-9.]+\&nbsp\;[KMGT][i ]B",res) ]`
Added "uploaded" column 2013-11-12 11:53:17 +01:00			`uploaded = [ match.replace(" ", " ") for match in re.findall("(?<=Uploaded ).+(?=\, Size)",res) ]`
Sizes fix Fixed the regular expression so that sizes in bytes (B) work properly. 2013-12-10 14:41:56 +01:00			`# pprint(sizes); print len(sizes)`
			`# pprint(uploaded); print len(uploaded)`
Revert 3affc91..43444db This rolls back to commit 3affc9171d0d520dca95b1e2c7ae7a607005f0e7. 2013-11-11 22:42:51 +01:00			`state = "seeds"`
			`curr = ['',0,0] #magnet, seeds, leeches`
			`for f in found:`
			`if f[1] == '':`
			`curr[0] = f[0]`
add variable number of pages to fetch option 2013-02-27 18:14:39 +01:00			`else:`
Revert 3affc91..43444db This rolls back to commit 3affc9171d0d520dca95b1e2c7ae7a607005f0e7. 2013-11-11 22:42:51 +01:00			`if state == 'seeds':`
			`curr[1] = f[1]`
			`state = 'leeches'`
			`else:`
			`curr[2] = f[1]`
			`state = 'seeds'`
			`res_l.append(curr)`
			`curr = ['', 0, 0]`
			`except KeyboardInterrupt :`
			`print "\nCancelled."`
			`exit()`

			`# return the sizes in a spearate list`
Added "uploaded" column 2013-11-12 11:53:17 +01:00			`return res_l, sizes, uploaded`
added the ability to use a local download the db 2013-02-21 21:19:40 +01:00
			`args = parser.parse_args()`
			`if args.database:`
			`mags = local(args)`
			`else:`
Revert "Merge pull request #7 from gausie/check_error_code_for_mirrors" This reverts commit 1c8f5056a5cbba07927b6ed1a870ea71887dbd29, reversing changes made to 951fc1a28de1dfcac1c0187a126d8e21bc89b2ca. 2014-02-01 12:53:39 +01:00			`mirrors = ["http://thepiratebay.se"]`
automatically discover mirrors from proxybay.info closes #5 2014-02-01 10:42:58 +01:00			`try:`
Revert "Merge pull request #7 from gausie/check_error_code_for_mirrors" This reverts commit 1c8f5056a5cbba07927b6ed1a870ea71887dbd29, reversing changes made to 951fc1a28de1dfcac1c0187a126d8e21bc89b2ca. 2014-02-01 12:53:39 +01:00			`f = urllib2.urlopen("http://proxybay.info/list.txt")`
			`res = f.read()`
automatically discover mirrors from proxybay.info closes #5 2014-02-01 10:42:58 +01:00			`mirrors += res.split("\n")[3:]`
			`except:`
			`print "Could not fetch additional mirrors"`
			`for mirror in mirrors:`
			`try:`
			`print("Trying " + mirror)`
			`mags, sizes, uploaded = remote(args, mirror)`
			`break`
			`except Exception, e:`
			`print("Could not contact " + mirror)`
added the ability to use a local download the db 2013-02-21 21:19:40 +01:00
add variable number of pages to fetch option 2013-02-27 18:14:39 +01:00			`if mags and len(mags) > 0:`
Revert 3affc91..43444db This rolls back to commit 3affc9171d0d520dca95b1e2c7ae7a607005f0e7. 2013-11-11 22:42:51 +01:00			`# enhanced print output with column titles`
Added space between last two columns 2013-11-12 11:55:37 +01:00			`print "\n%-5s %-6s %-6s %-5s %-11s %-11s %s" % ( "LINK", "SEED", "LEECH", "RATIO", "SIZE", "UPLOAD", "NAME")`
added the ability to use a local download the db 2013-02-21 21:19:40 +01:00			`for m in range(len(mags)):`
display number of seeders and leechers in results fixes #1 2013-02-26 22:48:02 +01:00			`magnet = mags[m]`
			`name = re.search("dn=([^\&]*)", magnet[0])`
Revert 3affc91..43444db This rolls back to commit 3affc9171d0d520dca95b1e2c7ae7a607005f0e7. 2013-11-11 22:42:51 +01:00
			`# compute the S/L ratio (Higher is better)`
Fixed ZeroDivisionError Fixed ZeroDivisionError if there are no leechers. 2013-11-12 10:09:43 +01:00			`try:`
			`ratio = float(magnet[1])/float(magnet[2])`
			`except ZeroDivisionError:`
			`ratio = 0`
Revert 3affc91..43444db This rolls back to commit 3affc9171d0d520dca95b1e2c7ae7a607005f0e7. 2013-11-11 22:42:51 +01:00
			`# enhanced print output with justified columns`
Added space between last two columns 2013-11-12 11:55:37 +01:00			`print "%-5s %-6s %-6s %5.1f %-11s %-11s %s" % (m, magnet[1], magnet[2], ratio ,sizes[m], uploaded[m],urllib.unquote(name.group(1).encode('ascii')).decode('utf-8').replace("+", " ") )`
Revert 3affc91..43444db This rolls back to commit 3affc9171d0d520dca95b1e2c7ae7a607005f0e7. 2013-11-11 22:42:51 +01:00
Added -0 flag to choose the first (0th) result 2014-01-31 23:16:37 +01:00			`if args.first:`
			`print "Choosing first result";`
			`choice = 0`
Revert 3affc91..43444db This rolls back to commit 3affc9171d0d520dca95b1e2c7ae7a607005f0e7. 2013-11-11 22:42:51 +01:00
Added -0 flag to choose the first (0th) result 2014-01-31 23:16:37 +01:00			`else:`
			`try:`
			`l = raw_input("Select a link: ")`
			`except KeyboardInterrupt :`
			`print "\nCancelled."`
			`exit()`

			`try:`
			`choice = int(l)`
			`except Exception:`
			`choice = None`
Revert 3affc91..43444db This rolls back to commit 3affc9171d0d520dca95b1e2c7ae7a607005f0e7. 2013-11-11 22:42:51 +01:00
display number of seeders and leechers in results fixes #1 2013-02-26 22:48:02 +01:00			`if not choice == None:`
-t option for transmission 2013-12-14 14:30:11 +01:00			`url = mags[choice][0]`
			`print`
			`print "url:"`
			`print url`
			`if args.transmission:`
			`os.system("""transmission-remote --add "%s" """ % (url))`
			`os.system("transmission-remote -l")`
			`else:`
			`webbrowser.open(url)`
display number of seeders and leechers in results fixes #1 2013-02-26 22:48:02 +01:00			`else:`
			`print "Cancelled."`
added the ability to use a local download the db 2013-02-21 21:19:40 +01:00			`else:`
			`print "no results"`

			`if __name__ == "__main__":`
			`main()`