From b2f95339ce3f42f8724fdf524004b41abcfa793a Mon Sep 17 00:00:00 2001 From: Jimmy Date: Wed, 27 Dec 2017 22:04:47 +1300 Subject: [PATCH] Greasemonkey: support regexes in @include and @exclude. Like the spec says, if a value for the @include or @exclude rules starts and ends with a '/' it should be parsed as a regular expression. Technically a ECMAScript syntax regular expression, but I am not sure of the differences and I assume they are far fewer than the similarities. One that I did see mentioned was that javascript RegExp doesn't support unicode. Although it apparently does support a 'u' flag now. Note that code will only be ran for QtWebkit and QWebEngine < 5.8 we rely on the builtin support for metadata it QWebEngine for most things greasemonkey related. Sadly it seems that they missed the regex requirement too. I've opened a ticket to track that https://bugreports.qt.io/browse/QTBUG-65484 --- qutebrowser/browser/greasemonkey.py | 19 +++++++++++---- tests/unit/javascript/test_greasemonkey.py | 28 +++++++++++++++++++++- 2 files changed, 41 insertions(+), 6 deletions(-) diff --git a/qutebrowser/browser/greasemonkey.py b/qutebrowser/browser/greasemonkey.py index 9a82d6a93..579cca82b 100644 --- a/qutebrowser/browser/greasemonkey.py +++ b/qutebrowser/browser/greasemonkey.py @@ -23,7 +23,6 @@ import re import os import json import fnmatch -import functools import glob import attr @@ -196,11 +195,21 @@ class GreasemonkeyManager(QObject): """ if url.scheme() not in self.greaseable_schemes: return MatchingScripts(url, [], [], []) - match = functools.partial(fnmatch.fnmatch, - url.toString(QUrl.FullyEncoded)) + + def _match(pattern): + # For include and exclude rules if they start and end with '/' they + # should be treated as a (ecma syntax) regular expression. + string_url = url.toString(QUrl.FullyEncoded) + if pattern.startswith('/') and pattern.endswith('/'): + return re.search(pattern[1:-1], string_url) is not None + + # Otherwise they are glob expressions. + return fnmatch.fnmatch(string_url, pattern) + tester = (lambda script: - any(match(pat) for pat in script.includes) and - not any(match(pat) for pat in script.excludes)) + any(_match(pat) for pat in script.includes) and + not any(_match(pat) for pat in script.excludes)) + return MatchingScripts( url, [script for script in self._run_start if tester(script)], diff --git a/tests/unit/javascript/test_greasemonkey.py b/tests/unit/javascript/test_greasemonkey.py index 0f5fe476c..1aaa3380f 100644 --- a/tests/unit/javascript/test_greasemonkey.py +++ b/tests/unit/javascript/test_greasemonkey.py @@ -19,6 +19,7 @@ """Tests for qutebrowser.browser.greasemonkey.""" import logging +import textwrap import pytest import py.path # pylint: disable=no-name-in-module @@ -26,7 +27,7 @@ from PyQt5.QtCore import QUrl from qutebrowser.browser import greasemonkey -test_gm_script = """ +test_gm_script = r""" // ==UserScript== // @name qutebrowser test userscript // @namespace invalid.org @@ -75,6 +76,31 @@ def test_get_scripts_by_url(url, expected_matches): expected_matches) +@pytest.mark.parametrize("url, expected_matches", [ + # included + ('https://github.com/qutebrowser/qutebrowser/', 1), + # neither included nor excluded + ('http://aaaaaaaaaa.com/', 0), + # excluded takes priority + ('http://github.com/foo', 0), +]) +def test_regex_includes_scripts_for(url, expected_matches): + """Ensure our GM @*clude support supports regular expressions.""" + gh_dark_example = textwrap.dedent(r""" + // ==UserScript== + // @include /^https?://((gist|guides|help|raw|status|developer)\.)?github\.com/((?!generated_pages\/preview).)*$/ + // @exclude /https?://github\.com/foo/ + // @run-at document-start + // ==/UserScript== + """) + _save_script(gh_dark_example, 'test.user.js') + gm_manager = greasemonkey.GreasemonkeyManager() + + scripts = gm_manager.scripts_for(QUrl(url)) + assert (len(scripts.start + scripts.end + scripts.idle) == + expected_matches) + + def test_no_metadata(caplog): """Run on all sites at document-end is the default.""" _save_script("var nothing = true;\n", 'nothing.user.js')