Greasemonkey: support regexes in @include and @exclude.

Like the spec says, if a value for the @include or @exclude rules starts
and ends with a '/' it should be parsed as a regular expression.
Technically a ECMAScript syntax regular expression, but I am not sure of
the differences and I assume they are far fewer than the similarities.
One that I did see mentioned was that javascript RegExp doesn't support
unicode. Although it apparently does support a 'u' flag now.

Note that code will only be ran for QtWebkit and QWebEngine < 5.8
we rely on the builtin support for metadata it QWebEngine for most
things greasemonkey related. Sadly it seems that they missed the regex
requirement too. I've opened a ticket to track that https://bugreports.qt.io/browse/QTBUG-65484
This commit is contained in:
Jimmy 2017-12-27 22:04:47 +13:00
parent 2ef6e740d9
commit b2f95339ce
2 changed files with 41 additions and 6 deletions

View File

@ -23,7 +23,6 @@ import re
import os
import json
import fnmatch
import functools
import glob
import attr
@ -196,11 +195,21 @@ class GreasemonkeyManager(QObject):
"""
if url.scheme() not in self.greaseable_schemes:
return MatchingScripts(url, [], [], [])
match = functools.partial(fnmatch.fnmatch,
url.toString(QUrl.FullyEncoded))
def _match(pattern):
# For include and exclude rules if they start and end with '/' they
# should be treated as a (ecma syntax) regular expression.
string_url = url.toString(QUrl.FullyEncoded)
if pattern.startswith('/') and pattern.endswith('/'):
return re.search(pattern[1:-1], string_url) is not None
# Otherwise they are glob expressions.
return fnmatch.fnmatch(string_url, pattern)
tester = (lambda script:
any(match(pat) for pat in script.includes) and
not any(match(pat) for pat in script.excludes))
any(_match(pat) for pat in script.includes) and
not any(_match(pat) for pat in script.excludes))
return MatchingScripts(
url,
[script for script in self._run_start if tester(script)],

View File

@ -19,6 +19,7 @@
"""Tests for qutebrowser.browser.greasemonkey."""
import logging
import textwrap
import pytest
import py.path # pylint: disable=no-name-in-module
@ -26,7 +27,7 @@ from PyQt5.QtCore import QUrl
from qutebrowser.browser import greasemonkey
test_gm_script = """
test_gm_script = r"""
// ==UserScript==
// @name qutebrowser test userscript
// @namespace invalid.org
@ -75,6 +76,31 @@ def test_get_scripts_by_url(url, expected_matches):
expected_matches)
@pytest.mark.parametrize("url, expected_matches", [
# included
('https://github.com/qutebrowser/qutebrowser/', 1),
# neither included nor excluded
('http://aaaaaaaaaa.com/', 0),
# excluded takes priority
('http://github.com/foo', 0),
])
def test_regex_includes_scripts_for(url, expected_matches):
"""Ensure our GM @*clude support supports regular expressions."""
gh_dark_example = textwrap.dedent(r"""
// ==UserScript==
// @include /^https?://((gist|guides|help|raw|status|developer)\.)?github\.com/((?!generated_pages\/preview).)*$/
// @exclude /https?://github\.com/foo/
// @run-at document-start
// ==/UserScript==
""")
_save_script(gh_dark_example, 'test.user.js')
gm_manager = greasemonkey.GreasemonkeyManager()
scripts = gm_manager.scripts_for(QUrl(url))
assert (len(scripts.start + scripts.end + scripts.idle) ==
expected_matches)
def test_no_metadata(caplog):
"""Run on all sites at document-end is the default."""
_save_script("var nothing = true;\n", 'nothing.user.js')