Improve script to print entries grouped by browser
The ua_fetch.py has been re-written entirely to find specific entries for each browser based on diversity rather than sole popularity. The output is now formatted to print the entries for each browser grouped together.
This commit is contained in:
parent
45f9e61815
commit
d745819715
@ -1564,6 +1564,22 @@ class UserAgent(BaseType):
|
|||||||
def complete(self):
|
def complete(self):
|
||||||
"""Complete a list of common user agents."""
|
"""Complete a list of common user agents."""
|
||||||
out = [
|
out = [
|
||||||
|
('Mozilla/5.0 (Windows NT 6.1; WOW64; rv:41.0) Gecko/20100101 '
|
||||||
|
'Firefox/41.0',
|
||||||
|
"Firefox 41.0 Win7 64-bit"),
|
||||||
|
('Mozilla/5.0 (Windows NT 6.1; WOW64; rv:40.0) Gecko/20100101 '
|
||||||
|
'Firefox/40.0',
|
||||||
|
"Firefox 40.0 Win7 64-bit"),
|
||||||
|
|
||||||
|
('Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_5) '
|
||||||
|
'AppleWebKit/600.8.9 (KHTML, like Gecko) Version/8.0.8 '
|
||||||
|
'Safari/600.8.9',
|
||||||
|
"Safari 8.0 MacOSX"),
|
||||||
|
('Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11) '
|
||||||
|
'AppleWebKit/601.1.56 (KHTML, like Gecko) Version/9.0 '
|
||||||
|
'Safari/601.1.56',
|
||||||
|
"Safari Generic MacOSX"),
|
||||||
|
|
||||||
('Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, '
|
('Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, '
|
||||||
'like Gecko) Chrome/45.0.2454.101 Safari/537.36',
|
'like Gecko) Chrome/45.0.2454.101 Safari/537.36',
|
||||||
"Chrome 45.0 Win7 64-bit"),
|
"Chrome 45.0 Win7 64-bit"),
|
||||||
@ -1571,29 +1587,12 @@ class UserAgent(BaseType):
|
|||||||
'AppleWebKit/537.36 (KHTML, like Gecko) Chrome/45.0.2454.101 '
|
'AppleWebKit/537.36 (KHTML, like Gecko) Chrome/45.0.2454.101 '
|
||||||
'Safari/537.36',
|
'Safari/537.36',
|
||||||
"Chrome 45.0 MacOSX"),
|
"Chrome 45.0 MacOSX"),
|
||||||
('Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_5) '
|
|
||||||
'AppleWebKit/600.8.9 (KHTML, like Gecko) Version/8.0.8 '
|
|
||||||
'Safari/600.8.9',
|
|
||||||
"Safari 8.0 MacOSX"),
|
|
||||||
('Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, '
|
('Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, '
|
||||||
'like Gecko) Chrome/45.0.2454.101 Safari/537.36',
|
'like Gecko) Chrome/45.0.2454.101 Safari/537.36',
|
||||||
"Chrome 45.0 Win10 64-bit"),
|
"Chrome 45.0 Win10 64-bit"),
|
||||||
('Mozilla/5.0 (Windows NT 6.1; WOW64; rv:41.0) Gecko/20100101 '
|
|
||||||
'Firefox/41.0',
|
|
||||||
"Firefox 41.0 Win7 64-bit"),
|
|
||||||
('Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11) '
|
|
||||||
'AppleWebKit/601.1.56 (KHTML, like Gecko) Version/9.0 '
|
|
||||||
'Safari/601.1.56',
|
|
||||||
"Safari Generic MacOSX"),
|
|
||||||
('Mozilla/5.0 (Windows NT 6.3; WOW64) AppleWebKit/537.36 (KHTML, '
|
('Mozilla/5.0 (Windows NT 6.3; WOW64) AppleWebKit/537.36 (KHTML, '
|
||||||
'like Gecko) Chrome/45.0.2454.101 Safari/537.36',
|
'like Gecko) Chrome/45.0.2454.101 Safari/537.36',
|
||||||
"Chrome 45.0 Win8.1 64-bit"),
|
"Chrome 45.0 Win8.1 64-bit"),
|
||||||
('Mozilla/5.0 (Windows NT 6.1; WOW64; rv:40.0) Gecko/20100101 '
|
|
||||||
'Firefox/40.0',
|
|
||||||
"Firefox 40.0 Win7 64-bit"),
|
|
||||||
('Mozilla/5.0 (Windows NT 6.1; WOW64; Trident/7.0; rv:11.0) like '
|
|
||||||
'Gecko',
|
|
||||||
"IE 11.0 for Desktop Win7 64-bit"),
|
|
||||||
('Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, '
|
('Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, '
|
||||||
'like Gecko) Chrome/45.0.2454.93 Safari/537.36',
|
'like Gecko) Chrome/45.0.2454.93 Safari/537.36',
|
||||||
"Chrome 45.0 Win7 64-bit"),
|
"Chrome 45.0 Win7 64-bit"),
|
||||||
@ -1605,6 +1604,10 @@ class UserAgent(BaseType):
|
|||||||
'like Gecko) Chrome/45.0.2454.99 Safari/537.36',
|
'like Gecko) Chrome/45.0.2454.99 Safari/537.36',
|
||||||
"Chrome 45.0 Win7 64-bit"),
|
"Chrome 45.0 Win7 64-bit"),
|
||||||
|
|
||||||
|
('Mozilla/5.0 (Windows NT 6.1; WOW64; Trident/7.0; rv:11.0) like '
|
||||||
|
'Gecko',
|
||||||
|
"IE 11.0 for Desktop Win7 64-bit"),
|
||||||
|
|
||||||
('Mozilla/5.0 (compatible; Googlebot/2.1; '
|
('Mozilla/5.0 (compatible; Googlebot/2.1; '
|
||||||
'+http://www.google.com/bot.html',
|
'+http://www.google.com/bot.html',
|
||||||
"Google Bot"),
|
"Google Bot"),
|
||||||
|
@ -29,33 +29,81 @@ script is formatted to be pasted into configtypes.py.
|
|||||||
import requests
|
import requests
|
||||||
from lxml import html # pylint: disable=import-error
|
from lxml import html # pylint: disable=import-error
|
||||||
|
|
||||||
# Fetch list of popular user-agents and store the relevant strings
|
|
||||||
url = 'https://techblog.willshouse.com/2012/01/03/most-common-user-agents/'
|
|
||||||
page = requests.get(url)
|
|
||||||
page = html.fromstring(page.text)
|
|
||||||
path = '//*[@id="post-2229"]/div[2]/table/tbody'
|
|
||||||
table = page.xpath(path)[0]
|
|
||||||
indent = " "
|
|
||||||
|
|
||||||
# Print function defition followed by an automatically fetched list of popular
|
# Fetch list of popular user-agents and return list of relevant strings
|
||||||
# user agents and a few additional entries for diversity.
|
def fetch():
|
||||||
print("%sdef complete(self):" % indent)
|
url = 'https://techblog.willshouse.com/2012/01/03/most-common-user-agents/'
|
||||||
print("%s\"\"\"Complete a list of common user agents.\"\"\"" % (2 * indent))
|
page = requests.get(url)
|
||||||
print("%sout = [" % (2 * indent))
|
page = html.fromstring(page.text)
|
||||||
for row in table[:12]:
|
path = '//*[@id="post-2229"]/div[2]/table/tbody'
|
||||||
ua = row[1].text_content()
|
return page.xpath(path)[0]
|
||||||
browser = row[2].text_content()
|
|
||||||
print("%s(\'%s\',\n%s \"%s\")," % (3 * indent, ua, 3 * indent, browser))
|
|
||||||
print("""
|
# Filter the received list based on a look up table. The LUT should be a
|
||||||
('Mozilla/5.0 (iPhone; CPU iPhone OS 8_1_2 like Mac OS X) '
|
# dictionary of the format {browser: versions}, where 'browser' is the name of
|
||||||
'AppleWebKit/600.1.4 (KHTML, like Gecko) Version/8.0 '
|
# the browser (eg. "Firefox") as string and 'versions' is a set of different
|
||||||
'Mobile/12B440 Safari/600.1.4',
|
# versions of this browser that should be included when found (eg. {"Linux",
|
||||||
"Mobile Safari 8.0 iOS"),
|
# "MacOSX"}). This function returns a dictionary with the same keys as the
|
||||||
('Mozilla/5.0 (Android; Mobile; rv:35.0) Gecko/35.0 Firefox/35.0',
|
# LUT, but storing lists of tuples (user_agent, browser_description) as values.
|
||||||
"Firefox 35, Android"),
|
def filter_list(complete_list, browsers):
|
||||||
('Mozilla/5.0 (Linux; Android 5.0.2; One Build/KTU84L.H4) '
|
table = {}
|
||||||
'AppleWebKit/537.36 (KHTML, like Gecko) Version/4.0 '
|
for entry in complete_list:
|
||||||
'Chrome/37.0.0.0 Mobile Safari/537.36',
|
# Tuple of (user_agent, browser_description)
|
||||||
"Android Browser")
|
candidate = (entry[1].text_content(), entry[2].text_content())
|
||||||
""")
|
for name in browsers:
|
||||||
print("%s]\n%sreturn out\n" % (2 * indent, 2 * indent))
|
found = False
|
||||||
|
if name.lower() in candidate[1].lower():
|
||||||
|
for version in browsers[name]:
|
||||||
|
if version.lower() in candidate[1].lower():
|
||||||
|
if table.get(name) is None:
|
||||||
|
table[name] = []
|
||||||
|
table[name].append(candidate)
|
||||||
|
browsers[name].remove(version)
|
||||||
|
found = True
|
||||||
|
break
|
||||||
|
if found:
|
||||||
|
break
|
||||||
|
return table
|
||||||
|
|
||||||
|
|
||||||
|
# Insert a few additional entries for diversity into the dict (as returned by
|
||||||
|
# filter_list())
|
||||||
|
def add_diversity(table):
|
||||||
|
table["Obscure"] = [
|
||||||
|
('Mozilla/5.0 (compatible; Googlebot/2.1; '
|
||||||
|
'+http://www.google.com/bot.html',
|
||||||
|
"Google Bot"),
|
||||||
|
('Wget/1.16.1 (linux-gnu)',
|
||||||
|
"wget 1.16.1"),
|
||||||
|
('curl/7.40.0',
|
||||||
|
"curl 7.40.0")
|
||||||
|
]
|
||||||
|
return table
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
fetched = fetch()
|
||||||
|
lut = {
|
||||||
|
"Firefox": {"Win", "MacOSX", "Linux", "Android"},
|
||||||
|
"Chrome": {"Win", "MacOSX", "Linux"},
|
||||||
|
"Safari": {"MacOSX", "iOS"}
|
||||||
|
}
|
||||||
|
filtered = filter_list(fetched, lut)
|
||||||
|
filtered = add_diversity(filtered)
|
||||||
|
|
||||||
|
tab = " "
|
||||||
|
print("%sdef complete(self):" % tab)
|
||||||
|
print("%s\"\"\"Complete a list of common user agents.\"\"\"" % (2 * tab))
|
||||||
|
print("%sout = [" %(2 * tab))
|
||||||
|
|
||||||
|
for browser in ["Firefox", "Safari", "Chrome", "Obscure"]:
|
||||||
|
for it in filtered[browser]:
|
||||||
|
print("%s(\'%s\',\n%s \"%s\")," % (3 * tab, it[0], 3 * tab, it[1]))
|
||||||
|
print("")
|
||||||
|
|
||||||
|
print("""\
|
||||||
|
('Mozilla/5.0 (Windows NT 6.1; WOW64; Trident/7.0; rv:11.0) like '
|
||||||
|
'Gecko',
|
||||||
|
"IE 11.0 for Desktop Win7 64-bit")""")
|
||||||
|
|
||||||
|
print("%s]\n%sreturn out\n" % (2 * tab, 2 * tab))
|
||||||
|
Loading…
Reference in New Issue
Block a user