From b2f95339ce3f42f8724fdf524004b41abcfa793a Mon Sep 17 00:00:00 2001 From: Jimmy Date: Wed, 27 Dec 2017 22:04:47 +1300 Subject: [PATCH 1/4] Greasemonkey: support regexes in @include and @exclude. Like the spec says, if a value for the @include or @exclude rules starts and ends with a '/' it should be parsed as a regular expression. Technically a ECMAScript syntax regular expression, but I am not sure of the differences and I assume they are far fewer than the similarities. One that I did see mentioned was that javascript RegExp doesn't support unicode. Although it apparently does support a 'u' flag now. Note that code will only be ran for QtWebkit and QWebEngine < 5.8 we rely on the builtin support for metadata it QWebEngine for most things greasemonkey related. Sadly it seems that they missed the regex requirement too. I've opened a ticket to track that https://bugreports.qt.io/browse/QTBUG-65484 --- qutebrowser/browser/greasemonkey.py | 19 +++++++++++---- tests/unit/javascript/test_greasemonkey.py | 28 +++++++++++++++++++++- 2 files changed, 41 insertions(+), 6 deletions(-) diff --git a/qutebrowser/browser/greasemonkey.py b/qutebrowser/browser/greasemonkey.py index 9a82d6a93..579cca82b 100644 --- a/qutebrowser/browser/greasemonkey.py +++ b/qutebrowser/browser/greasemonkey.py @@ -23,7 +23,6 @@ import re import os import json import fnmatch -import functools import glob import attr @@ -196,11 +195,21 @@ class GreasemonkeyManager(QObject): """ if url.scheme() not in self.greaseable_schemes: return MatchingScripts(url, [], [], []) - match = functools.partial(fnmatch.fnmatch, - url.toString(QUrl.FullyEncoded)) + + def _match(pattern): + # For include and exclude rules if they start and end with '/' they + # should be treated as a (ecma syntax) regular expression. + string_url = url.toString(QUrl.FullyEncoded) + if pattern.startswith('/') and pattern.endswith('/'): + return re.search(pattern[1:-1], string_url) is not None + + # Otherwise they are glob expressions. + return fnmatch.fnmatch(string_url, pattern) + tester = (lambda script: - any(match(pat) for pat in script.includes) and - not any(match(pat) for pat in script.excludes)) + any(_match(pat) for pat in script.includes) and + not any(_match(pat) for pat in script.excludes)) + return MatchingScripts( url, [script for script in self._run_start if tester(script)], diff --git a/tests/unit/javascript/test_greasemonkey.py b/tests/unit/javascript/test_greasemonkey.py index 0f5fe476c..1aaa3380f 100644 --- a/tests/unit/javascript/test_greasemonkey.py +++ b/tests/unit/javascript/test_greasemonkey.py @@ -19,6 +19,7 @@ """Tests for qutebrowser.browser.greasemonkey.""" import logging +import textwrap import pytest import py.path # pylint: disable=no-name-in-module @@ -26,7 +27,7 @@ from PyQt5.QtCore import QUrl from qutebrowser.browser import greasemonkey -test_gm_script = """ +test_gm_script = r""" // ==UserScript== // @name qutebrowser test userscript // @namespace invalid.org @@ -75,6 +76,31 @@ def test_get_scripts_by_url(url, expected_matches): expected_matches) +@pytest.mark.parametrize("url, expected_matches", [ + # included + ('https://github.com/qutebrowser/qutebrowser/', 1), + # neither included nor excluded + ('http://aaaaaaaaaa.com/', 0), + # excluded takes priority + ('http://github.com/foo', 0), +]) +def test_regex_includes_scripts_for(url, expected_matches): + """Ensure our GM @*clude support supports regular expressions.""" + gh_dark_example = textwrap.dedent(r""" + // ==UserScript== + // @include /^https?://((gist|guides|help|raw|status|developer)\.)?github\.com/((?!generated_pages\/preview).)*$/ + // @exclude /https?://github\.com/foo/ + // @run-at document-start + // ==/UserScript== + """) + _save_script(gh_dark_example, 'test.user.js') + gm_manager = greasemonkey.GreasemonkeyManager() + + scripts = gm_manager.scripts_for(QUrl(url)) + assert (len(scripts.start + scripts.end + scripts.idle) == + expected_matches) + + def test_no_metadata(caplog): """Run on all sites at document-end is the default.""" _save_script("var nothing = true;\n", 'nothing.user.js') From 971b41399160c26335b7a364348ca8b929e6538a Mon Sep 17 00:00:00 2001 From: Jimmy Date: Wed, 17 Jan 2018 18:17:14 +1300 Subject: [PATCH 2/4] Greasemonkey: make *clude regexes case insensitive Sometimes I don't read specs so good. --- qutebrowser/browser/greasemonkey.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/qutebrowser/browser/greasemonkey.py b/qutebrowser/browser/greasemonkey.py index 579cca82b..753d13c56 100644 --- a/qutebrowser/browser/greasemonkey.py +++ b/qutebrowser/browser/greasemonkey.py @@ -201,7 +201,8 @@ class GreasemonkeyManager(QObject): # should be treated as a (ecma syntax) regular expression. string_url = url.toString(QUrl.FullyEncoded) if pattern.startswith('/') and pattern.endswith('/'): - return re.search(pattern[1:-1], string_url) is not None + matches = re.search(pattern[1:-1], string_url, flags=re.I) + return matches is not None # Otherwise they are glob expressions. return fnmatch.fnmatch(string_url, pattern) From d5d22783eadda873e955911d17c137117a4cd9bf Mon Sep 17 00:00:00 2001 From: Jimmy Date: Sat, 20 Jan 2018 13:32:16 +1300 Subject: [PATCH 3/4] Greasemonkey: optimize pattern matching a little Moving `QUrl.toString()` out of the `_match()` function which is called for every pattern in every stript seems to make it ~40% faster. --- qutebrowser/browser/greasemonkey.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/qutebrowser/browser/greasemonkey.py b/qutebrowser/browser/greasemonkey.py index 753d13c56..8a9624148 100644 --- a/qutebrowser/browser/greasemonkey.py +++ b/qutebrowser/browser/greasemonkey.py @@ -196,10 +196,11 @@ class GreasemonkeyManager(QObject): if url.scheme() not in self.greaseable_schemes: return MatchingScripts(url, [], [], []) + string_url = url.toString(QUrl.FullyEncoded) + def _match(pattern): # For include and exclude rules if they start and end with '/' they # should be treated as a (ecma syntax) regular expression. - string_url = url.toString(QUrl.FullyEncoded) if pattern.startswith('/') and pattern.endswith('/'): matches = re.search(pattern[1:-1], string_url, flags=re.I) return matches is not None From aebc1a7d48b9583ee4b246dbfb6b1afcba91048e Mon Sep 17 00:00:00 2001 From: Jimmy Date: Sat, 20 Jan 2018 13:34:51 +1300 Subject: [PATCH 4/4] Greasemonkey: don't complain about an unset run-at Apparently is is not an unusual situation to leave it unset and rely on the default. Logging a warning about this could be unnecerasily confusing for users. I'm leaving the log message in there if it is set to something weird like `window-load` or `document-complete` which scriptish may support. --- qutebrowser/browser/greasemonkey.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/qutebrowser/browser/greasemonkey.py b/qutebrowser/browser/greasemonkey.py index 8a9624148..7ee843b0b 100644 --- a/qutebrowser/browser/greasemonkey.py +++ b/qutebrowser/browser/greasemonkey.py @@ -177,10 +177,10 @@ class GreasemonkeyManager(QObject): elif script.run_at == 'document-idle': self._run_idle.append(script) else: - log.greasemonkey.warning("Script {} has invalid run-at " - "defined, defaulting to " - "document-end" - .format(script_path)) + if script.run_at: + log.greasemonkey.warning( + "Script {} has invalid run-at defined, " + "defaulting to document-end".format(script_path)) # Default as per # https://wiki.greasespot.net/Metadata_Block#.40run-at self._run_end.append(script)