2015-10-20 17:28:22 +02:00
|
|
|
|
#!/usr/bin/env python3
|
|
|
|
|
# vim: ft=python fileencoding=utf-8 sts=4 sw=4 et:
|
|
|
|
|
|
2017-05-09 21:37:03 +02:00
|
|
|
|
# Copyright 2015-2017 lamarpavel
|
|
|
|
|
# Copyright 2015-2017 Alexey Nabrodov (Averrin)
|
|
|
|
|
# Copyright 2015-2017 Florian Bruhin (The Compiler) <mail@qutebrowser.org>
|
2015-10-20 17:28:22 +02:00
|
|
|
|
#
|
|
|
|
|
# This file is part of qutebrowser.
|
|
|
|
|
#
|
|
|
|
|
# qutebrowser is free software: you can redistribute it and/or modify
|
|
|
|
|
# it under the terms of the GNU General Public License as published by
|
|
|
|
|
# the Free Software Foundation, either version 3 of the License, or
|
|
|
|
|
# (at your option) any later version.
|
|
|
|
|
#
|
|
|
|
|
# qutebrowser is distributed in the hope that it will be useful,
|
|
|
|
|
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
|
|
|
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
|
|
|
# GNU General Public License for more details.
|
|
|
|
|
#
|
|
|
|
|
# You should have received a copy of the GNU General Public License
|
|
|
|
|
# along with qutebrowser. If not, see <http://www.gnu.org/licenses/>.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
"""Fetch list of popular user-agents.
|
|
|
|
|
|
2015-11-17 19:46:24 +01:00
|
|
|
|
The script is based on a gist posted by github.com/averrin, the output of this
|
2015-10-20 17:28:22 +02:00
|
|
|
|
script is formatted to be pasted into configtypes.py.
|
|
|
|
|
"""
|
|
|
|
|
|
|
|
|
|
import requests
|
2016-04-27 18:30:54 +02:00
|
|
|
|
from lxml import html # pylint: disable=import-error
|
2015-10-20 17:28:22 +02:00
|
|
|
|
|
2015-11-14 15:57:24 +01:00
|
|
|
|
|
|
|
|
|
def fetch():
|
2015-11-18 07:43:10 +01:00
|
|
|
|
"""Fetch list of popular user-agents.
|
|
|
|
|
|
|
|
|
|
Return:
|
|
|
|
|
List of relevant strings.
|
|
|
|
|
"""
|
2015-11-14 15:57:24 +01:00
|
|
|
|
url = 'https://techblog.willshouse.com/2012/01/03/most-common-user-agents/'
|
|
|
|
|
page = requests.get(url)
|
|
|
|
|
page = html.fromstring(page.text)
|
|
|
|
|
path = '//*[@id="post-2229"]/div[2]/table/tbody'
|
|
|
|
|
return page.xpath(path)[0]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def filter_list(complete_list, browsers):
|
2015-11-18 07:43:10 +01:00
|
|
|
|
"""Filter the received list based on a look up table.
|
2015-11-17 20:28:27 +01:00
|
|
|
|
|
|
|
|
|
The LUT should be a dictionary of the format {browser: versions}, where
|
|
|
|
|
'browser' is the name of the browser (eg. "Firefox") as string and
|
|
|
|
|
'versions' is a set of different versions of this browser that should be
|
|
|
|
|
included when found (eg. {"Linux", "MacOSX"}). This function returns a
|
|
|
|
|
dictionary with the same keys as the LUT, but storing lists of tuples
|
|
|
|
|
(user_agent, browser_description) as values.
|
|
|
|
|
"""
|
2015-11-30 07:12:18 +01:00
|
|
|
|
# pylint: disable=too-many-nested-blocks
|
2015-11-14 15:57:24 +01:00
|
|
|
|
table = {}
|
|
|
|
|
for entry in complete_list:
|
|
|
|
|
# Tuple of (user_agent, browser_description)
|
|
|
|
|
candidate = (entry[1].text_content(), entry[2].text_content())
|
|
|
|
|
for name in browsers:
|
|
|
|
|
found = False
|
|
|
|
|
if name.lower() in candidate[1].lower():
|
|
|
|
|
for version in browsers[name]:
|
|
|
|
|
if version.lower() in candidate[1].lower():
|
|
|
|
|
if table.get(name) is None:
|
|
|
|
|
table[name] = []
|
|
|
|
|
table[name].append(candidate)
|
|
|
|
|
browsers[name].remove(version)
|
|
|
|
|
found = True
|
|
|
|
|
break
|
|
|
|
|
if found:
|
|
|
|
|
break
|
|
|
|
|
return table
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def add_diversity(table):
|
2015-11-17 20:28:27 +01:00
|
|
|
|
"""Insert a few additional entries for diversity into the dict.
|
|
|
|
|
|
|
|
|
|
(as returned by filter_list())
|
|
|
|
|
"""
|
2015-11-14 15:57:24 +01:00
|
|
|
|
table["Obscure"] = [
|
|
|
|
|
('Mozilla/5.0 (compatible; Googlebot/2.1; '
|
|
|
|
|
'+http://www.google.com/bot.html',
|
|
|
|
|
"Google Bot"),
|
|
|
|
|
('Wget/1.16.1 (linux-gnu)',
|
|
|
|
|
"wget 1.16.1"),
|
|
|
|
|
('curl/7.40.0',
|
|
|
|
|
"curl 7.40.0")
|
|
|
|
|
]
|
|
|
|
|
return table
|
|
|
|
|
|
|
|
|
|
|
2015-11-17 20:28:27 +01:00
|
|
|
|
def main():
|
|
|
|
|
"""Generate user agent code."""
|
2015-11-14 15:57:24 +01:00
|
|
|
|
fetched = fetch()
|
|
|
|
|
lut = {
|
|
|
|
|
"Firefox": {"Win", "MacOSX", "Linux", "Android"},
|
|
|
|
|
"Chrome": {"Win", "MacOSX", "Linux"},
|
|
|
|
|
"Safari": {"MacOSX", "iOS"}
|
2015-11-17 19:46:24 +01:00
|
|
|
|
}
|
2015-11-14 15:57:24 +01:00
|
|
|
|
filtered = filter_list(fetched, lut)
|
|
|
|
|
filtered = add_diversity(filtered)
|
|
|
|
|
|
|
|
|
|
tab = " "
|
2015-11-18 07:43:10 +01:00
|
|
|
|
print(tab + "def complete(self):")
|
|
|
|
|
print((2 * tab) + "\"\"\"Complete a list of common user agents.\"\"\"")
|
2016-08-16 14:06:37 +02:00
|
|
|
|
print((2 * tab) + "out = [")
|
2015-11-14 15:57:24 +01:00
|
|
|
|
|
|
|
|
|
for browser in ["Firefox", "Safari", "Chrome", "Obscure"]:
|
|
|
|
|
for it in filtered[browser]:
|
2015-11-18 07:43:10 +01:00
|
|
|
|
print("{}(\'{}\',\n{} \"{}\"),".format(3 * tab, it[0],
|
|
|
|
|
3 * tab, it[1]))
|
2015-11-14 15:57:24 +01:00
|
|
|
|
print("")
|
|
|
|
|
|
|
|
|
|
print("""\
|
|
|
|
|
('Mozilla/5.0 (Windows NT 6.1; WOW64; Trident/7.0; rv:11.0) like '
|
|
|
|
|
'Gecko',
|
|
|
|
|
"IE 11.0 for Desktop Win7 64-bit")""")
|
|
|
|
|
|
2015-11-18 07:43:10 +01:00
|
|
|
|
print("{}]\n{}return out\n".format(2 * tab, 2 * tab))
|
2015-11-17 20:28:27 +01:00
|
|
|
|
|
|
|
|
|
|
|
|
|
|
if __name__ == '__main__':
|
|
|
|
|
main()
|