Merge branch 'greasemonkey' of https://github.com/toofar/qutebrowser into greasemonkey

This commit is contained in:
Florian Bruhin 2017-12-06 10:47:29 +01:00
commit 3cd2910fa2
14 changed files with 637 additions and 4 deletions

View File

@ -64,7 +64,7 @@ from qutebrowser.completion.models import miscmodels
from qutebrowser.commands import cmdutils, runners, cmdexc
from qutebrowser.config import config, websettings, configfiles, configinit
from qutebrowser.browser import (urlmarks, adblock, history, browsertab,
downloads)
downloads, greasemonkey)
from qutebrowser.browser.network import proxy
from qutebrowser.browser.webkit import cookies, cache
from qutebrowser.browser.webkit.network import networkmanager
@ -491,6 +491,10 @@ def _init_modules(args, crash_handler):
diskcache = cache.DiskCache(standarddir.cache(), parent=qApp)
objreg.register('cache', diskcache)
log.init.debug("Initializing Greasemonkey...")
gm_manager = greasemonkey.GreasemonkeyManager()
objreg.register('greasemonkey', gm_manager)
log.init.debug("Misc initialization...")
macros.init()
# Init backend-specific stuff

View File

@ -0,0 +1,207 @@
# vim: ft=python fileencoding=utf-8 sts=4 sw=4 et:
# Copyright 2017 Florian Bruhin (The Compiler) <mail@qutebrowser.org>
#
# This file is part of qutebrowser.
#
# qutebrowser is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# qutebrowser is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with qutebrowser. If not, see <http://www.gnu.org/licenses/>.
"""Load, parse and make avalaible greasemonkey scripts."""
import re
import os
import json
import fnmatch
import functools
import glob
import attr
from PyQt5.QtCore import pyqtSignal, QObject, QUrl
from qutebrowser.utils import log, standarddir, jinja
from qutebrowser.commands import cmdutils
def _scripts_dir():
"""Get the directory of the scripts."""
return os.path.join(standarddir.data(), 'greasemonkey')
class GreasemonkeyScript:
"""Container class for userscripts, parses metadata blocks."""
def __init__(self, properties, code):
self._code = code
self.includes = []
self.excludes = []
self.description = None
self.name = None
self.namespace = None
self.run_at = None
self.script_meta = None
self.runs_on_sub_frames = True
for name, value in properties:
if name == 'name':
self.name = value
elif name == 'namespace':
self.namespace = value
elif name == 'description':
self.description = value
elif name in ['include', 'match']:
self.includes.append(value)
elif name in ['exclude', 'exclude_match']:
self.excludes.append(value)
elif name == 'run-at':
self.run_at = value
elif name == 'noframes':
self.runs_on_sub_frames = False
HEADER_REGEX = r'// ==UserScript==|\n+// ==/UserScript==\n'
PROPS_REGEX = r'// @(?P<prop>[^\s]+)\s*(?P<val>.*)'
@classmethod
def parse(cls, source):
"""GreaseMonkeyScript factory.
Takes a userscript source and returns a GreaseMonkeyScript.
Parses the greasemonkey metadata block, if present, to fill out
attributes.
"""
matches = re.split(cls.HEADER_REGEX, source, maxsplit=2)
try:
_, props, _code = matches
except ValueError:
props = ""
script = cls(re.findall(cls.PROPS_REGEX, props), source)
script.script_meta = props
if not props:
script.includes = ['*']
return script
def code(self):
"""Return the processed javascript code of this script.
Adorns the source code with GM_* methods for greasemonkey
compatibility and wraps it in an IFFE to hide it within a
lexical scope. Note that this means line numbers in your
browser's debugger/inspector will not match up to the line
numbers in the source script directly.
"""
return jinja.js_environment.get_template(
'greasemonkey_wrapper.js').render(
scriptName="/".join([self.namespace or '', self.name]),
scriptInfo=self._meta_json(),
scriptMeta=self.script_meta,
scriptSource=self._code)
def _meta_json(self):
return json.dumps({
'name': self.name,
'description': self.description,
'matches': self.includes,
'includes': self.includes,
'excludes': self.excludes,
'run-at': self.run_at,
})
@attr.s
class MatchingScripts(object):
"""All userscripts registered to run on a particular url."""
url = attr.ib()
start = attr.ib(default=attr.Factory(list))
end = attr.ib(default=attr.Factory(list))
idle = attr.ib(default=attr.Factory(list))
class GreasemonkeyManager(QObject):
"""Manager of userscripts and a greasemonkey compatible environment.
Signals:
scripts_reloaded: Emitted when scripts are reloaded from disk.
Any any cached or already-injected scripts should be
considered obselete.
"""
scripts_reloaded = pyqtSignal()
# https://wiki.greasespot.net/Include_and_exclude_rules#Greaseable_schemes
# Limit the schemes scripts can run on due to unreasonable levels of
# exploitability
greaseable_schemes = ['http', 'https', 'ftp', 'file']
def __init__(self, parent=None):
super().__init__(parent)
self.load_scripts()
@cmdutils.register(name='greasemonkey-reload',
instance='greasemonkey')
def load_scripts(self):
"""Re-Read greasemonkey scripts from disk."""
self._run_start = []
self._run_end = []
self._run_idle = []
scripts_dir = os.path.abspath(_scripts_dir())
log.greasemonkey.debug("Reading scripts from: {}".format(scripts_dir))
for script_filename in glob.glob(os.path.join(scripts_dir, '*.js')):
if not os.path.isfile(script_filename):
continue
script_path = os.path.join(scripts_dir, script_filename)
with open(script_path, encoding='utf-8') as script_file:
script = GreasemonkeyScript.parse(script_file.read())
if not script.name:
script.name = script_filename
if script.run_at == 'document-start':
self._run_start.append(script)
elif script.run_at == 'document-end':
self._run_end.append(script)
elif script.run_at == 'document-idle':
self._run_idle.append(script)
else:
log.greasemonkey.warning("Script {} has invalid run-at "
"defined, defaulting to "
"document-end"
.format(script_path))
# Default as per
# https://wiki.greasespot.net/Metadata_Block#.40run-at
self._run_end.append(script)
log.greasemonkey.debug("Loaded script: {}".format(script.name))
self.scripts_reloaded.emit()
def scripts_for(self, url):
"""Fetch scripts that are registered to run for url.
returns a tuple of lists of scripts meant to run at (document-start,
document-end, document-idle)
"""
if url.scheme() not in self.greaseable_schemes:
return MatchingScripts(url, [], [], [])
match = functools.partial(fnmatch.fnmatch,
url.toString(QUrl.FullyEncoded))
tester = (lambda script:
any(match(pat) for pat in script.includes) and
not any(match(pat) for pat in script.excludes))
return MatchingScripts(
url,
[script for script in self._run_start if tester(script)],
[script for script in self._run_end if tester(script)],
[script for script in self._run_idle if tester(script)]
)
def all_scripts(self):
"""Return all scripts found in the configured script directory."""
return self._run_start + self._run_end + self._run_idle

View File

@ -244,6 +244,44 @@ def _init_profiles():
private_profile.setSpellCheckEnabled(True)
def inject_userscripts():
"""Register user javascript files with the global profiles."""
# The greasemonkey metadata block support in qtwebengine only starts at 5.8
# Otherwise have to handle injecting the scripts into the page at very
# early load, probs same place in view as the enableJS check.
if not qtutils.version_check('5.8'):
return
# Since we are inserting scripts into profile.scripts they won't
# just get replaced by new gm scripts like if we were injecting them
# ourselves so we need to remove all gm scripts, while not removing
# any other stuff that might have been added. Like the one for
# stylsheets.
# Could either use a different world for gm scripts, check for gm metadata
# values (would mean no non-gm userscripts), or check the code for
# _qute_script_id
for profile in [default_profile, private_profile]:
scripts = profile.scripts()
for script in scripts.toList():
if script.name().startswith("GM-"):
log.greasemonkey.debug('removing script: {}'
.format(script.name()))
scripts.remove(script)
for profile in [default_profile, private_profile]:
scripts = profile.scripts()
greasemonkey = objreg.get('greasemonkey')
for script in greasemonkey.all_scripts():
new_script = QWebEngineScript()
new_script.setWorldId(QWebEngineScript.MainWorld)
new_script.setSourceCode(script.code())
new_script.setName("GM-{}".format(script.name))
new_script.setRunsOnSubFrames(script.runs_on_sub_frames)
log.greasemonkey.debug('adding script: {}'
.format(new_script.name()))
scripts.insert(new_script)
def init(args):
"""Initialize the global QWebSettings."""
if args.enable_webengine_inspector:

View File

@ -69,6 +69,10 @@ def init():
download_manager.install(webenginesettings.private_profile)
objreg.register('webengine-download-manager', download_manager)
greasemonkey = objreg.get('greasemonkey')
greasemonkey.scripts_reloaded.connect(webenginesettings.inject_userscripts)
webenginesettings.inject_userscripts()
# Mapping worlds from usertypes.JsWorld to QWebEngineScript world IDs.
_JS_WORLD_MAP = {

View File

@ -23,12 +23,14 @@ import functools
from PyQt5.QtCore import pyqtSignal, pyqtSlot, QUrl, PYQT_VERSION
from PyQt5.QtGui import QPalette
from PyQt5.QtWebEngineWidgets import QWebEngineView, QWebEnginePage
from PyQt5.QtWebEngineWidgets import (QWebEngineView, QWebEnginePage,
QWebEngineScript)
from qutebrowser.browser import shared
from qutebrowser.browser.webengine import certificateerror, webenginesettings
from qutebrowser.config import config
from qutebrowser.utils import log, debug, usertypes, jinja, urlutils, message
from qutebrowser.utils import (log, debug, usertypes, jinja, urlutils, message,
objreg, qtutils)
class WebEngineView(QWebEngineView):
@ -135,6 +137,7 @@ class WebEnginePage(QWebEnginePage):
self._theme_color = theme_color
self._set_bg_color()
config.instance.changed.connect(self._set_bg_color)
self.urlChanged.connect(self._inject_userjs)
@config.change_filter('colors.webpage.bg')
def _set_bg_color(self):
@ -300,3 +303,42 @@ class WebEnginePage(QWebEnginePage):
message.error(msg)
return False
return True
@pyqtSlot('QUrl')
def _inject_userjs(self, url):
"""Inject userscripts registered for `url` into the current page."""
if qtutils.version_check('5.8'):
# Handled in webenginetab with the builtin greasemonkey
# support.
return
# Using QWebEnginePage.scripts() to hold the user scripts means
# we don't have to worry ourselves about where to inject the
# page but also means scripts hang around for the tab lifecycle.
# So clear them here.
scripts = self.scripts()
for script in scripts.toList():
if script.name().startswith("GM-"):
really_removed = scripts.remove(script)
log.greasemonkey.debug("Removing ({}) script: {}"
.format(really_removed, script.name()))
def _add_script(script, injection_point):
new_script = QWebEngineScript()
new_script.setInjectionPoint(injection_point)
new_script.setWorldId(QWebEngineScript.MainWorld)
new_script.setSourceCode(script.code())
new_script.setName("GM-{}".format(script.name))
new_script.setRunsOnSubFrames(script.runs_on_sub_frames)
log.greasemonkey.debug("Adding script: {}"
.format(new_script.name()))
scripts.insert(new_script)
greasemonkey = objreg.get('greasemonkey')
matching_scripts = greasemonkey.scripts_for(url)
for script in matching_scripts.start:
_add_script(script, QWebEngineScript.DocumentCreation)
for script in matching_scripts.end:
_add_script(script, QWebEngineScript.DocumentReady)
for script in matching_scripts.idle:
_add_script(script, QWebEngineScript.Deferred)

View File

@ -86,6 +86,21 @@ class BrowserPage(QWebPage):
self.on_save_frame_state_requested)
self.restoreFrameStateRequested.connect(
self.on_restore_frame_state_requested)
self.loadFinished.connect(
functools.partial(self._inject_userjs, self.mainFrame()))
self.frameCreated.connect(self._connect_userjs_signals)
@pyqtSlot('QWebFrame*')
def _connect_userjs_signals(self, frame):
"""Connect userjs related signals to `frame`.
Connect the signals used as triggers for injecting user
javascripts into the passed QWebFrame.
"""
log.greasemonkey.debug("Connecting to frame {} ({})"
.format(frame, frame.url().toDisplayString()))
frame.loadFinished.connect(
functools.partial(self._inject_userjs, frame))
def javaScriptPrompt(self, frame, js_msg, default):
"""Override javaScriptPrompt to use qutebrowser prompts."""
@ -283,6 +298,38 @@ class BrowserPage(QWebPage):
else:
self.error_occurred = False
def _inject_userjs(self, frame):
"""Inject user javascripts into the page.
Args:
frame: The QWebFrame to inject the user scripts into.
"""
url = frame.url()
if url.isEmpty():
url = frame.requestedUrl()
log.greasemonkey.debug("_inject_userjs called for {} ({})"
.format(frame, url.toDisplayString()))
greasemonkey = objreg.get('greasemonkey')
scripts = greasemonkey.scripts_for(url)
# QtWebKit has trouble providing us with signals representing
# page load progress at reasonable times, so we just load all
# scripts on the same event.
toload = scripts.start + scripts.end + scripts.idle
if url.isEmpty():
# This happens during normal usage like with view source but may
# also indicate a bug.
log.greasemonkey.debug("Not running scripts for frame with no "
"url: {}".format(frame))
assert not toload, toload
for script in toload:
if frame is self.mainFrame() or script.runs_on_sub_frames:
log.webview.debug('Running GM script: {}'.format(script.name))
frame.evaluateJavaScript(script.code())
@pyqtSlot('QWebFrame*', 'QWebPage::Feature')
def _on_feature_permission_requested(self, frame, feature):
"""Ask the user for approval for geolocation/notifications."""

View File

@ -1,2 +1,4 @@
# Upstream Mozilla's code
pac_utils.js
# Actually a jinja template so eslint chokes on the {{}} syntax.
greasemonkey_wrapper.js

View File

@ -0,0 +1,118 @@
(function () {
const _qute_script_id = "__gm_"+{{ scriptName | tojson }};
function GM_log(text) {
console.log(text);
}
const GM_info = {
'script': {{ scriptInfo }},
'scriptMetaStr': {{ scriptMeta | tojson }},
'scriptWillUpdate': false,
'version': "0.0.1",
'scriptHandler': 'Tampermonkey' // so scripts don't expect exportFunction
};
function checkKey(key, funcName) {
if (typeof key !== "string") {
throw new Error(`${funcName} requires the first parameter to be of type string, not '${typeof key}'`);
}
}
function GM_setValue(key, value) {
checkKey(key, "GM_setValue");
if (typeof value !== "string" &&
typeof value !== "number" &&
typeof value !== "boolean") {
throw new Error(`GM_setValue requires the second parameter to be of type string, number or boolean, not '${typeof value}'`);
}
localStorage.setItem(_qute_script_id + key, value);
}
function GM_getValue(key, default_) {
checkKey(key, "GM_getValue");
return localStorage.getItem(_qute_script_id + key) || default_;
}
function GM_deleteValue(key) {
checkKey(key, "GM_deleteValue");
localStorage.removeItem(_qute_script_id + key);
}
function GM_listValues() {
let keys = [];
for (let i = 0; i < localStorage.length; i++) {
if (localStorage.key(i).startsWith(_qute_script_id)) {
keys.push(localStorage.key(i).slice(_qute_script_id.length));
}
}
return keys;
}
function GM_openInTab(url) {
window.open(url);
}
// Almost verbatim copy from Eric
function GM_xmlhttpRequest(/* object */ details) {
details.method = details.method ? details.method.toUpperCase() : "GET";
if (!details.url) {
throw ("GM_xmlhttpRequest requires an URL.");
}
// build XMLHttpRequest object
let oXhr = new XMLHttpRequest();
// run it
if ("onreadystatechange" in details) {
oXhr.onreadystatechange = function () {
details.onreadystatechange(oXhr);
};
}
if ("onload" in details) {
oXhr.onload = function () { details.onload(oXhr) };
}
if ("onerror" in details) {
oXhr.onerror = function () { details.onerror(oXhr) };
}
oXhr.open(details.method, details.url, true);
if ("headers" in details) {
for (let header in details.headers) {
oXhr.setRequestHeader(header, details.headers[header]);
}
}
if ("data" in details) {
oXhr.send(details.data);
} else {
oXhr.send();
}
}
function GM_addStyle(/* String */ styles) {
let oStyle = document.createElement("style");
oStyle.setAttribute("type", "text/css");
oStyle.appendChild(document.createTextNode(styles));
let head = document.getElementsByTagName("head")[0];
if (head === undefined) {
document.onreadystatechange = function () {
if (document.readyState == "interactive") {
document.getElementsByTagName("head")[0].appendChild(oStyle);
}
}
}
else {
head.appendChild(oStyle);
}
}
const unsafeWindow = window;
//====== The actual user script source ======//
{{ scriptSource }}
//====== End User Script ======//
})();

View File

@ -136,3 +136,4 @@ def render(template, **kwargs):
environment = Environment()
js_environment = jinja2.Environment(loader=Loader('javascript'))

View File

@ -95,7 +95,8 @@ LOGGER_NAMES = [
'commands', 'signals', 'downloads',
'js', 'qt', 'rfc6266', 'ipc', 'shlexer',
'save', 'message', 'config', 'sessions',
'webelem', 'prompt', 'network', 'sql'
'webelem', 'prompt', 'network', 'sql',
'greasemonkey'
]
@ -144,6 +145,7 @@ webelem = logging.getLogger('webelem')
prompt = logging.getLogger('prompt')
network = logging.getLogger('network')
sql = logging.getLogger('sql')
greasemonkey = logging.getLogger('greasemonkey')
ram_handler = None

View File

@ -123,3 +123,25 @@ Feature: Javascript stuff
And I wait for "[*/data/javascript/windowsize.html:*] loaded" in the log
And I run :tab-next
Then the window sizes should be the same
Scenario: Have a greasemonkey script run at page start
When I have a greasemonkey file saved for document-start with noframes unset
And I run :greasemonkey-reload
And I open data/hints/iframe.html
# This second reload is required in webengine < 5.8 for scripts
# registered to run at document-start, some sort of timing issue.
And I run :reload
Then the javascript message "Script is running on /data/hints/iframe.html" should be logged
Scenario: Have a greasemonkey script running on frames
When I have a greasemonkey file saved for document-end with noframes unset
And I run :greasemonkey-reload
And I open data/hints/iframe.html
Then the javascript message "Script is running on /data/hints/html/wrapped.html" should be logged
@flaky
Scenario: Have a greasemonkey script running on noframes
When I have a greasemonkey file saved for document-end with noframes set
And I run :greasemonkey-reload
And I open data/hints/iframe.html
Then the javascript message "Script is running on /data/hints/html/wrapped.html" should not be logged

View File

@ -17,6 +17,8 @@
# You should have received a copy of the GNU General Public License
# along with qutebrowser. If not, see <http://www.gnu.org/licenses/>.
import os.path
import pytest_bdd as bdd
bdd.scenarios('javascript.feature')
@ -29,3 +31,38 @@ def check_window_sizes(quteproc):
hidden_size = hidden.message.split()[-1]
visible_size = visible.message.split()[-1]
assert hidden_size == visible_size
test_gm_script = r"""
// ==UserScript==
// @name Qutebrowser test userscript
// @namespace invalid.org
// @include http://localhost:*/data/hints/iframe.html
// @include http://localhost:*/data/hints/html/wrapped.html
// @exclude ???
// @run-at {stage}
// {frames}
// ==/UserScript==
console.log("Script is running on " + window.location.pathname);
"""
@bdd.when(bdd.parsers.parse("I have a greasemonkey file saved for {stage} "
"with noframes {frameset}"))
def create_greasemonkey_file(quteproc, stage, frameset):
script_path = os.path.join(quteproc.basedir, 'data', 'greasemonkey')
try:
os.mkdir(script_path)
except FileExistsError:
pass
file_path = os.path.join(script_path, 'test.user.js')
if frameset == "set":
frames = "@noframes"
elif frameset == "unset":
frames = ""
else:
raise ValueError("noframes can only be set or unset, "
"not {}".format(frameset))
with open(file_path, 'w', encoding='utf-8') as f:
f.write(test_gm_script.format(stage=stage,
frames=frames))

View File

@ -527,6 +527,7 @@ class QuteProc(testprocess.Process):
super().before_test()
self.send_cmd(':config-clear')
self._init_settings()
self.clear_data()
def _init_settings(self):
"""Adjust some qutebrowser settings after starting."""

View File

@ -0,0 +1,108 @@
# vim: ft=python fileencoding=utf-8 sts=4 sw=4 et:
# Copyright 2017 Florian Bruhin (The Compiler) <mail@qutebrowser.org>
# This file is part of qutebrowser.
#
# qutebrowser is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# qutebrowser is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with qutebrowser. If not, see <http://www.gnu.org/licenses/>.
"""Tests for qutebrowser.browser.greasemonkey."""
import os
import logging
import pytest
from PyQt5.QtCore import QUrl
from qutebrowser.browser import greasemonkey
test_gm_script = """
// ==UserScript==
// @name Qutebrowser test userscript
// @namespace invalid.org
// @include http://localhost:*/data/title.html
// @match http://trolol*
// @exclude https://badhost.xxx/*
// @run-at document-start
// ==/UserScript==
console.log("Script is running.");
"""
pytestmark = pytest.mark.usefixtures('data_tmpdir')
def save_script(script_text, filename):
script_path = greasemonkey._scripts_dir()
try:
os.mkdir(script_path)
except FileExistsError:
pass
file_path = os.path.join(script_path, filename)
with open(file_path, 'w', encoding='utf-8') as f:
f.write(script_text)
def test_all():
"""Test that a script gets read from file, parsed and returned."""
save_script(test_gm_script, 'test.user.js')
gm_manager = greasemonkey.GreasemonkeyManager()
assert (gm_manager.all_scripts()[0].name ==
"Qutebrowser test userscript")
@pytest.mark.parametrize("url, expected_matches", [
# included
('http://trololololololo.com/', 1),
# neither included nor excluded
('http://aaaaaaaaaa.com/', 0),
# excluded
('https://badhost.xxx/', 0),
])
def test_get_scripts_by_url(url, expected_matches):
"""Check greasemonkey include/exclude rules work."""
save_script(test_gm_script, 'test.user.js')
gm_manager = greasemonkey.GreasemonkeyManager()
scripts = gm_manager.scripts_for(QUrl(url))
assert (len(scripts.start + scripts.end + scripts.idle) ==
expected_matches)
def test_no_metadata(caplog):
"""Run on all sites at document-end is the default."""
save_script("var nothing = true;\n", 'nothing.user.js')
with caplog.at_level(logging.WARNING):
gm_manager = greasemonkey.GreasemonkeyManager()
scripts = gm_manager.scripts_for(QUrl('http://notamatch.invalid/'))
assert len(scripts.start + scripts.end + scripts.idle) == 1
assert len(scripts.end) == 1
def test_bad_scheme(caplog):
"""qute:// isn't in the list of allowed schemes."""
save_script("var nothing = true;\n", 'nothing.user.js')
with caplog.at_level(logging.WARNING):
gm_manager = greasemonkey.GreasemonkeyManager()
scripts = gm_manager.scripts_for(QUrl('qute://settings'))
assert len(scripts.start + scripts.end + scripts.idle) == 0
def test_load_emits_signal(qtbot):
gm_manager = greasemonkey.GreasemonkeyManager()
with qtbot.wait_signal(gm_manager.scripts_reloaded):
gm_manager.load_scripts()