diff --git a/qutebrowser/config/configtypes.py b/qutebrowser/config/configtypes.py index 4066c4cbf..4c6a3b4c8 100644 --- a/qutebrowser/config/configtypes.py +++ b/qutebrowser/config/configtypes.py @@ -1569,49 +1569,41 @@ class UserAgent(BaseType): def validate(self, value): self._basic_validation(value) + # To update the following list of user agents, run the script 'ua_fetch.py' + # Vim-protip: Place your cursor below this comment and run + # :r!python scripts/dev/ua_fetch.py def complete(self): """Complete a list of common user agents.""" out = [ - ('Mozilla/5.0 (Windows NT 6.1; WOW64; rv:35.0) Gecko/20100101 ' - 'Firefox/35.0', - "Firefox 35.0 Win7 64-bit"), - ('Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:35.0) Gecko/20100101 ' - 'Firefox/35.0', - "Firefox 35.0 Ubuntu"), - ('Mozilla/5.0 (Macintosh; Intel Mac OS X 10.10; rv:35.0) ' - 'Gecko/20100101 Firefox/35.0', - "Firefox 35.0 MacOSX"), + ('Mozilla/5.0 (Windows NT 6.1; WOW64; rv:41.0) Gecko/20100101 ' + 'Firefox/41.0', + "Firefox 41.0 Win7 64-bit"), + ('Mozilla/5.0 (Macintosh; Intel Mac OS X 10.10; rv:41.0) ' + 'Gecko/20100101 Firefox/41.0', + "Firefox 41.0 MacOSX"), + ('Mozilla/5.0 (X11; Linux x86_64; rv:41.0) Gecko/20100101 ' + 'Firefox/41.0', + "Firefox 41.0 Linux"), - ('Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_2) ' - 'AppleWebKit/600.3.18 (KHTML, like Gecko) Version/8.0.3 ' - 'Safari/600.3.18', - "Safari 8.0 MacOSX"), + ('Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_1) ' + 'AppleWebKit/601.2.7 (KHTML, like Gecko) Version/9.0.1 ' + 'Safari/601.2.7', + "Safari Generic MacOSX"), + ('Mozilla/5.0 (iPad; CPU OS 9_1 like Mac OS X) ' + 'AppleWebKit/601.1.46 (KHTML, like Gecko) Version/9.0 ' + 'Mobile/13B143 Safari/601.1', + "Mobile Safari Generic iOS"), ('Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, ' - 'like Gecko) Chrome/40.0.2214.111 Safari/537.36', - "Chrome 40.0 Win7 64-bit"), - ('Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_2) ' - 'AppleWebKit/537.36 (KHTML, like Gecko) Chrome/40.0.2214.111 ' + 'like Gecko) Chrome/46.0.2490.80 Safari/537.36', + "Chrome 46.0 Win7 64-bit"), + ('Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_5) ' + 'AppleWebKit/537.36 (KHTML, like Gecko) Chrome/46.0.2490.80 ' 'Safari/537.36', - "Chrome 40.0 MacOSX"), - ('Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 ' - '(KHTML, like Gecko) Chrome/40.0.2214.111 Safari/537.36', - "Chrome 40.0 Linux"), - - ('Mozilla/5.0 (Windows NT 6.1; WOW64; Trident/7.0; rv:11.0) like ' - 'Gecko', - "IE 11.0 Win7 64-bit"), - - ('Mozilla/5.0 (iPhone; CPU iPhone OS 8_1_2 like Mac OS X) ' - 'AppleWebKit/600.1.4 (KHTML, like Gecko) Version/8.0 ' - 'Mobile/12B440 Safari/600.1.4', - "Mobile Safari 8.0 iOS"), - ('Mozilla/5.0 (Android; Mobile; rv:35.0) Gecko/35.0 Firefox/35.0', - "Firefox 35, Android"), - ('Mozilla/5.0 (Linux; Android 5.0.2; One Build/KTU84L.H4) ' - 'AppleWebKit/537.36 (KHTML, like Gecko) Version/4.0 ' - 'Chrome/37.0.0.0 Mobile Safari/537.36', - "Android Browser"), + "Chrome 46.0 MacOSX"), + ('Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, ' + 'like Gecko) Chrome/46.0.2490.80 Safari/537.36', + "Chrome 46.0 Linux"), ('Mozilla/5.0 (compatible; Googlebot/2.1; ' '+http://www.google.com/bot.html', @@ -1619,7 +1611,11 @@ class UserAgent(BaseType): ('Wget/1.16.1 (linux-gnu)', "wget 1.16.1"), ('curl/7.40.0', - "curl 7.40.0") + "curl 7.40.0"), + + ('Mozilla/5.0 (Windows NT 6.1; WOW64; Trident/7.0; rv:11.0) like ' + 'Gecko', + "IE 11.0 for Desktop Win7 64-bit") ] return out diff --git a/scripts/dev/ua_fetch.py b/scripts/dev/ua_fetch.py new file mode 100644 index 000000000..85df09628 --- /dev/null +++ b/scripts/dev/ua_fetch.py @@ -0,0 +1,109 @@ +#!/usr/bin/env python3 +# vim: ft=python fileencoding=utf-8 sts=4 sw=4 et: + +# Copyright 2015 lamarpavel +# Copyright 2015 Alexey Nabrodov (Averrin) +# +# This file is part of qutebrowser. +# +# qutebrowser is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# qutebrowser is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with qutebrowser. If not, see . + + +"""Fetch list of popular user-agents. + +The script is based on a gist posted by github.com/averrin, the ouput of this +script is formatted to be pasted into configtypes.py. +""" + +import requests +from lxml import html # pylint: disable=import-error + + +# Fetch list of popular user-agents and return list of relevant strings +def fetch(): + url = 'https://techblog.willshouse.com/2012/01/03/most-common-user-agents/' + page = requests.get(url) + page = html.fromstring(page.text) + path = '//*[@id="post-2229"]/div[2]/table/tbody' + return page.xpath(path)[0] + + +# Filter the received list based on a look up table. The LUT should be a +# dictionary of the format {browser: versions}, where 'browser' is the name of +# the browser (eg. "Firefox") as string and 'versions' is a set of different +# versions of this browser that should be included when found (eg. {"Linux", +# "MacOSX"}). This function returns a dictionary with the same keys as the +# LUT, but storing lists of tuples (user_agent, browser_description) as values. +def filter_list(complete_list, browsers): + table = {} + for entry in complete_list: + # Tuple of (user_agent, browser_description) + candidate = (entry[1].text_content(), entry[2].text_content()) + for name in browsers: + found = False + if name.lower() in candidate[1].lower(): + for version in browsers[name]: + if version.lower() in candidate[1].lower(): + if table.get(name) is None: + table[name] = [] + table[name].append(candidate) + browsers[name].remove(version) + found = True + break + if found: + break + return table + + +# Insert a few additional entries for diversity into the dict (as returned by +# filter_list()) +def add_diversity(table): + table["Obscure"] = [ + ('Mozilla/5.0 (compatible; Googlebot/2.1; ' + '+http://www.google.com/bot.html', + "Google Bot"), + ('Wget/1.16.1 (linux-gnu)', + "wget 1.16.1"), + ('curl/7.40.0', + "curl 7.40.0") + ] + return table + + +if __name__ == '__main__': + fetched = fetch() + lut = { + "Firefox": {"Win", "MacOSX", "Linux", "Android"}, + "Chrome": {"Win", "MacOSX", "Linux"}, + "Safari": {"MacOSX", "iOS"} + } + filtered = filter_list(fetched, lut) + filtered = add_diversity(filtered) + + tab = " " + print("%sdef complete(self):" % tab) + print("%s\"\"\"Complete a list of common user agents.\"\"\"" % (2 * tab)) + print("%sout = [" %(2 * tab)) + + for browser in ["Firefox", "Safari", "Chrome", "Obscure"]: + for it in filtered[browser]: + print("%s(\'%s\',\n%s \"%s\")," % (3 * tab, it[0], 3 * tab, it[1])) + print("") + + print("""\ + ('Mozilla/5.0 (Windows NT 6.1; WOW64; Trident/7.0; rv:11.0) like ' + 'Gecko', + "IE 11.0 for Desktop Win7 64-bit")""") + + print("%s]\n%sreturn out\n" % (2 * tab, 2 * tab)) diff --git a/tox.ini b/tox.ini index b3f2b8ff4..a2e720bfe 100644 --- a/tox.ini +++ b/tox.ini @@ -105,6 +105,7 @@ deps = astroid==1.3.8 pylint==1.4.4 logilab-common==1.1.0 + requests==2.8.1 commands = {envpython} scripts/link_pyqt.py --tox {envdir} {envpython} -m pylint scripts qutebrowser --rcfile=.pylintrc --output-format=colorized --reports=no --expected-line-ending-format=LF