Add qutebrowser.utils.javascript

webelem.javascript_escape got renamed to javascript.string_escape, and a
new javascript.assemble got added to make it easier to call a function
inside a .js file.
This commit is contained in:
Florian Bruhin 2016-08-04 17:53:13 +02:00
parent 10dd3135a7
commit 08b70f0f4c
11 changed files with 236 additions and 154 deletions

View File

@ -44,7 +44,7 @@ from qutebrowser.browser import urlmarks, browsertab, inspector
from qutebrowser.browser.webkit import webelem, downloads, mhtml from qutebrowser.browser.webkit import webelem, downloads, mhtml
from qutebrowser.keyinput import modeman from qutebrowser.keyinput import modeman
from qutebrowser.utils import (message, usertypes, log, qtutils, urlutils, from qutebrowser.utils import (message, usertypes, log, qtutils, urlutils,
objreg, utils, typing) objreg, utils, typing, javascript)
from qutebrowser.utils.usertypes import KeyMode from qutebrowser.utils.usertypes import KeyMode
from qutebrowser.misc import editor, guiprocess from qutebrowser.misc import editor, guiprocess
from qutebrowser.completion.models import instances, sortfilter from qutebrowser.completion.models import instances, sortfilter
@ -1485,7 +1485,7 @@ class CommandDispatcher:
var event = document.createEvent('TextEvent'); var event = document.createEvent('TextEvent');
event.initTextEvent('textInput', true, true, null, sel); event.initTextEvent('textInput', true, true, null, sel);
this.dispatchEvent(event); this.dispatchEvent(event);
""".format(webelem.javascript_escape(sel))) """.format(javascript.string_escape(sel)))
def _search_cb(self, found, *, tab, old_scroll_pos, options, text, prev): def _search_cb(self, found, *, tab, old_scroll_pos, options, text, prev):
"""Callback called from search/search_next/search_prev. """Callback called from search/search_next/search_prev.

View File

@ -23,8 +23,7 @@ import os
from PyQt5.QtCore import QUrl from PyQt5.QtCore import QUrl
from qutebrowser.browser.webkit import webelem from qutebrowser.utils import utils, javascript
from qutebrowser.utils import utils
class PDFJSNotFound(Exception): class PDFJSNotFound(Exception):
@ -65,7 +64,7 @@ def _generate_pdfjs_script(url):
return ( return (
'PDFJS.verbosity = PDFJS.VERBOSITY_LEVELS.info;\n' 'PDFJS.verbosity = PDFJS.VERBOSITY_LEVELS.info;\n'
'PDFView.open("{url}");\n' 'PDFView.open("{url}");\n'
).format(url=webelem.javascript_escape(url.toString(QUrl.FullyEncoded))) ).format(url=javascript.string_escape(url.toString(QUrl.FullyEncoded)))
def fix_urls(asset): def fix_urls(asset):

View File

@ -31,7 +31,7 @@ from PyQt5.QtWebEngineWidgets import QWebEnginePage
from qutebrowser.browser import browsertab from qutebrowser.browser import browsertab
from qutebrowser.browser.webengine import webview from qutebrowser.browser.webengine import webview
from qutebrowser.utils import usertypes, qtutils, log, utils from qutebrowser.utils import usertypes, qtutils, log, javascript
class WebEnginePrinting(browsertab.AbstractPrinting): class WebEnginePrinting(browsertab.AbstractPrinting):
@ -220,10 +220,7 @@ class WebEngineScroller(browsertab.AbstractScroller):
self._pos_px = QPoint(jsret['px']['x'], jsret['px']['y']) self._pos_px = QPoint(jsret['px']['x'], jsret['px']['y'])
self.perc_changed.emit(*self._pos_perc) self.perc_changed.emit(*self._pos_perc)
js_code = """ js_code = javascript.assemble('scroll', 'scroll_pos')
{scroll_js}
scroll_pos();
""".format(scroll_js=utils.read_file('javascript/scroll.js'))
self._tab.run_js_async(js_code, update_scroll_pos) self._tab.run_js_async(js_code, update_scroll_pos)
def pos_px(self): def pos_px(self):
@ -233,12 +230,7 @@ class WebEngineScroller(browsertab.AbstractScroller):
return self._pos_perc return self._pos_perc
def to_perc(self, x=None, y=None): def to_perc(self, x=None, y=None):
js_code = """ js_code = javascript.assemble('scroll', 'scroll_to_perc', x, y)
{scroll_js}
scroll_to_perc({x}, {y});
""".format(scroll_js=utils.read_file('javascript/scroll.js'),
x='undefined' if x is None else x,
y='undefined' if y is None else y)
self._tab.run_js_async(js_code) self._tab.run_js_async(js_code)
def to_point(self, point): def to_point(self, point):
@ -249,10 +241,7 @@ class WebEngineScroller(browsertab.AbstractScroller):
self._tab.run_js_async("window.scrollBy({x}, {y});".format(x=x, y=y)) self._tab.run_js_async("window.scrollBy({x}, {y});".format(x=x, y=y))
def delta_page(self, x=0, y=0): def delta_page(self, x=0, y=0):
js_code = """ js_code = javascript.assemble('scroll', 'scroll_delta_page', x, y)
{scroll_js}
scroll_delta_page({x}, {y});
""".format(scroll_js=utils.read_file('javascript/scroll.js'), x=x, y=y)
self._tab.run_js_async(js_code) self._tab.run_js_async(js_code)
def up(self, count=1): def up(self, count=1):

View File

@ -33,7 +33,7 @@ from PyQt5.QtCore import QRect, QUrl
from PyQt5.QtWebKit import QWebElement from PyQt5.QtWebKit import QWebElement
from qutebrowser.config import config from qutebrowser.config import config
from qutebrowser.utils import log, usertypes, utils from qutebrowser.utils import log, usertypes, utils, javascript
Group = usertypes.enum('Group', ['all', 'links', 'images', 'url', 'prevnext', Group = usertypes.enum('Group', ['all', 'links', 'images', 'url', 'prevnext',
@ -202,7 +202,7 @@ class WebElementWrapper(collections.abc.MutableMapping):
else: else:
log.misc.debug("Filling element {} via javascript.".format( log.misc.debug("Filling element {} via javascript.".format(
self.debug_text())) self.debug_text()))
text = javascript_escape(text) text = javascript.string_escape(text)
self._elem.evaluateJavaScript("this.value='{}'".format(text)) self._elem.evaluateJavaScript("this.value='{}'".format(text))
def set_inner_xml(self, xml): def set_inner_xml(self, xml):
@ -501,34 +501,6 @@ class WebElementWrapper(collections.abc.MutableMapping):
return all([visible_on_screen, visible_in_frame]) return all([visible_on_screen, visible_in_frame])
def javascript_escape(text):
"""Escape values special to javascript in strings.
With this we should be able to use something like:
elem.evaluateJavaScript("this.value='{}'".format(javascript_escape(...)))
And all values should work.
"""
# This is a list of tuples because order matters, and using OrderedDict
# makes no sense because we don't actually need dict-like properties.
replacements = (
('\\', r'\\'), # First escape all literal \ signs as \\.
("'", r"\'"), # Then escape ' and " as \' and \".
('"', r'\"'), # (note it won't hurt when we escape the wrong one).
('\n', r'\n'), # We also need to escape newlines for some reason.
('\r', r'\r'),
('\x00', r'\x00'),
('\ufeff', r'\ufeff'),
# http://stackoverflow.com/questions/2965293/
('\u2028', r'\u2028'),
('\u2029', r'\u2029'),
)
for orig, repl in replacements:
text = text.replace(orig, repl)
return text
def get_child_frames(startframe): def get_child_frames(startframe):
"""Get all children recursively of a given QWebFrame. """Get all children recursively of a given QWebFrame.

View File

@ -0,0 +1,72 @@
# vim: ft=python fileencoding=utf-8 sts=4 sw=4 et:
# Copyright 2016 Florian Bruhin (The Compiler) <mail@qutebrowser.org>
#
# This file is part of qutebrowser.
#
# qutebrowser is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# qutebrowser is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with qutebrowser. If not, see <http://www.gnu.org/licenses/>.
"""Utilities related to javascript interaction."""
from qutebrowser.utils import utils
def string_escape(text):
"""Escape values special to javascript in strings.
With this we should be able to use something like:
elem.evaluateJavaScript("this.value='{}'".format(string_escape(...)))
And all values should work.
"""
# This is a list of tuples because order matters, and using OrderedDict
# makes no sense because we don't actually need dict-like properties.
replacements = (
('\\', r'\\'), # First escape all literal \ signs as \\.
("'", r"\'"), # Then escape ' and " as \' and \".
('"', r'\"'), # (note it won't hurt when we escape the wrong one).
('\n', r'\n'), # We also need to escape newlines for some reason.
('\r', r'\r'),
('\x00', r'\x00'),
('\ufeff', r'\ufeff'),
# http://stackoverflow.com/questions/2965293/
('\u2028', r'\u2028'),
('\u2029', r'\u2029'),
)
for orig, repl in replacements:
text = text.replace(orig, repl)
return text
def _convert_js_arg(arg):
"""Convert the given argument so it's the equivalent in JS."""
if arg is None:
return 'undefined'
elif isinstance(arg, str):
return string_escape(arg)
elif isinstance(arg, int):
return str(arg)
else:
raise TypeError("Don't know how to handle {!r} of type {}!".format(
arg, type(arg).__name__))
def assemble(name, function, *args):
"""Assemble a javascript file and a function call."""
code = "{code}\n{function}({args});".format(
code=utils.read_file('javascript/{}.js'.format(name)),
function=function,
args=', '.join(_convert_js_arg(arg) for arg in args),
)
return code

View File

@ -145,6 +145,9 @@ PERFECT_FILES = [
'qutebrowser/utils/error.py'), 'qutebrowser/utils/error.py'),
('tests/unit/utils/test_typing.py', ('tests/unit/utils/test_typing.py',
'qutebrowser/utils/typing.py'), 'qutebrowser/utils/typing.py'),
('tests/unit/utils/test_javascript.py',
'qutebrowser/utils/javascript.py'),
('tests/unit/completion/test_models.py', ('tests/unit/completion/test_models.py',
'qutebrowser/completion/models/base.py'), 'qutebrowser/completion/models/base.py'),

View File

@ -36,8 +36,7 @@ import pytest
from PyQt5.QtCore import pyqtSignal, QUrl from PyQt5.QtCore import pyqtSignal, QUrl
from qutebrowser.misc import ipc from qutebrowser.misc import ipc
from qutebrowser.utils import log, utils from qutebrowser.utils import log, utils, javascript
from qutebrowser.browser.webkit import webelem
from helpers import utils as testutils from helpers import utils as testutils
from end2end.fixtures import testprocess from end2end.fixtures import testprocess
@ -527,7 +526,7 @@ class QuteProc(testprocess.Process):
'else if (_es.snapshotLength > 1) {{ console.log("qute:ambiguous ' 'else if (_es.snapshotLength > 1) {{ console.log("qute:ambiguous '
'elems") }} ' 'elems") }} '
'else {{ console.log("qute:okay"); _es.snapshotItem(0).click() }}' 'else {{ console.log("qute:okay"); _es.snapshotItem(0).click() }}'
).format(text=webelem.javascript_escape(_xpath_escape(text))) ).format(text=javascript.string_escape(_xpath_escape(text)))
self.send_cmd(':jseval ' + script, escape=False) self.send_cmd(':jseval ' + script, escape=False)
message = self.wait_for_js('qute:*').message message = self.wait_for_js('qute:*').message
if message.endswith('qute:no elems'): if message.endswith('qute:no elems'):
@ -562,8 +561,8 @@ class QuteProc(testprocess.Process):
def _xpath_escape(text): def _xpath_escape(text):
"""Escape a string to be used in an XPath expression. """Escape a string to be used in an XPath expression.
The resulting string should still be escaped with javascript_escape, to The resulting string should still be escaped with javascript.string_escape,
prevent javascript from interpreting the quotes. to prevent javascript from interpreting the quotes.
This function is needed because XPath does not provide any character This function is needed because XPath does not provide any character
escaping mechanisms, so to get the string escaping mechanisms, so to get the string

View File

@ -26,7 +26,7 @@ from qutebrowser.browser import pdfjs
# Note that we got double protection, once because we use QUrl.FullyEncoded and # Note that we got double protection, once because we use QUrl.FullyEncoded and
# because we use qutebrowser.browser.webelem.javascript_escape. Characters # because we use qutebrowser.utils.javascript.string_escape. Characters
# like " are already replaced by QUrl. # like " are already replaced by QUrl.
@pytest.mark.parametrize('url, expected', [ @pytest.mark.parametrize('url, expected', [
('http://foo.bar', "http://foo.bar"), ('http://foo.bar', "http://foo.bar"),

View File

@ -23,12 +23,8 @@ from unittest import mock
import collections.abc import collections.abc
import operator import operator
import itertools import itertools
import binascii
import os.path
import hypothesis from PyQt5.QtCore import QRect, QPoint
import hypothesis.strategies
from PyQt5.QtCore import PYQT_VERSION, QRect, QPoint
from PyQt5.QtWebKit import QWebElement from PyQt5.QtWebKit import QWebElement
import pytest import pytest
@ -818,98 +814,6 @@ class TestRectOnView:
assert rect == QRect(20, 20, 8, 8) assert rect == QRect(20, 20, 8, 8)
class TestJavascriptEscape:
TESTS = {
'foo\\bar': r'foo\\bar',
'foo\nbar': r'foo\nbar',
'foo\rbar': r'foo\rbar',
"foo'bar": r"foo\'bar",
'foo"bar': r'foo\"bar',
'one\\two\rthree\nfour\'five"six': r'one\\two\rthree\nfour\'five\"six',
'\x00': r'\x00',
'hellö': 'hellö',
'': '',
'\x80Ā': '\x80Ā',
'𐀀\x00𐀀\x00': r'𐀀\x00𐀀\x00',
'𐀀\ufeff': r'𐀀\ufeff',
'\ufeff': r'\ufeff',
# http://stackoverflow.com/questions/2965293/
'\u2028': r'\u2028',
'\u2029': r'\u2029',
}
# Once there was this warning here:
# load glyph failed err=6 face=0x2680ba0, glyph=1912
# http://qutebrowser.org:8010/builders/debian-jessie/builds/765/steps/unittests/
# Should that be ignored?
@pytest.mark.parametrize('before, after', sorted(TESTS.items()), ids=repr)
def test_fake_escape(self, before, after):
"""Test javascript escaping with some expected outcomes."""
assert webelem.javascript_escape(before) == after
def _test_escape(self, text, qtbot, webframe):
"""Helper function for test_real_escape*."""
try:
self._test_escape_simple(text, webframe)
except AssertionError:
# Try another method if the simple method failed.
#
# See _test_escape_hexlified documentation on why this is
# necessary.
self._test_escape_hexlified(text, qtbot, webframe)
def _test_escape_hexlified(self, text, qtbot, webframe):
"""Test conversion by hexlifying in javascript.
Since the conversion of QStrings to Python strings is broken in some
older PyQt versions in some corner cases, we load an HTML file which
generates an MD5 of the escaped text and use that for comparisons.
"""
escaped = webelem.javascript_escape(text)
path = os.path.join(os.path.dirname(__file__),
'test_webelem_jsescape.html')
with open(path, encoding='utf-8') as f:
html_source = f.read().replace('%INPUT%', escaped)
with qtbot.waitSignal(webframe.loadFinished) as blocker:
webframe.setHtml(html_source)
assert blocker.args == [True]
result = webframe.evaluateJavaScript('window.qute_test_result')
assert result is not None
assert '|' in result
result_md5, result_text = result.split('|', maxsplit=1)
text_md5 = binascii.hexlify(text.encode('utf-8')).decode('ascii')
assert result_md5 == text_md5, result_text
def _test_escape_simple(self, text, webframe):
"""Test conversion by using evaluateJavaScript."""
escaped = webelem.javascript_escape(text)
result = webframe.evaluateJavaScript('"{}";'.format(escaped))
assert result == text
@pytest.mark.parametrize('text', sorted(TESTS), ids=repr)
def test_real_escape(self, webframe, qtbot, text):
"""Test javascript escaping with a real QWebPage."""
self._test_escape(text, qtbot, webframe)
@pytest.mark.qt_log_ignore('^OpenType support missing for script')
@hypothesis.given(hypothesis.strategies.text())
def test_real_escape_hypothesis(self, webframe, qtbot, text):
"""Test javascript escaping with a real QWebPage and hypothesis."""
# We can't simply use self._test_escape because of this:
# https://github.com/pytest-dev/pytest-qt/issues/69
# self._test_escape(text, qtbot, webframe)
try:
self._test_escape_simple(text, webframe)
except AssertionError:
if PYQT_VERSION >= 0x050300:
self._test_escape_hexlified(text, qtbot, webframe)
class TestGetChildFrames: class TestGetChildFrames:
"""Check get_child_frames.""" """Check get_child_frames."""

View File

@ -0,0 +1,144 @@
# vim: ft=python fileencoding=utf-8 sts=4 sw=4 et:
# Copyright 2016 Florian Bruhin (The Compiler) <mail@qutebrowser.org>
#
# This file is part of qutebrowser.
#
# qutebrowser is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# qutebrowser is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with qutebrowser. If not, see <http://www.gnu.org/licenses/>.
"""Tests for qutebrowser.utils.javascript."""
import binascii
import os.path
import pytest
import hypothesis
import hypothesis.strategies
from PyQt5.QtCore import PYQT_VERSION
from qutebrowser.utils import javascript
class TestStringEscape:
TESTS = {
'foo\\bar': r'foo\\bar',
'foo\nbar': r'foo\nbar',
'foo\rbar': r'foo\rbar',
"foo'bar": r"foo\'bar",
'foo"bar': r'foo\"bar',
'one\\two\rthree\nfour\'five"six': r'one\\two\rthree\nfour\'five\"six',
'\x00': r'\x00',
'hellö': 'hellö',
'': '',
'\x80Ā': '\x80Ā',
'𐀀\x00𐀀\x00': r'𐀀\x00𐀀\x00',
'𐀀\ufeff': r'𐀀\ufeff',
'\ufeff': r'\ufeff',
# http://stackoverflow.com/questions/2965293/
'\u2028': r'\u2028',
'\u2029': r'\u2029',
}
# Once there was this warning here:
# load glyph failed err=6 face=0x2680ba0, glyph=1912
# http://qutebrowser.org:8010/builders/debian-jessie/builds/765/steps/unittests/
# Should that be ignored?
@pytest.mark.parametrize('before, after', sorted(TESTS.items()), ids=repr)
def test_fake_escape(self, before, after):
"""Test javascript escaping with some expected outcomes."""
assert javascript.string_escape(before) == after
def _test_escape(self, text, qtbot, webframe):
"""Helper function for test_real_escape*."""
try:
self._test_escape_simple(text, webframe)
except AssertionError:
# Try another method if the simple method failed.
#
# See _test_escape_hexlified documentation on why this is
# necessary.
self._test_escape_hexlified(text, qtbot, webframe)
def _test_escape_hexlified(self, text, qtbot, webframe):
"""Test conversion by hexlifying in javascript.
Since the conversion of QStrings to Python strings is broken in some
older PyQt versions in some corner cases, we load an HTML file which
generates an MD5 of the escaped text and use that for comparisons.
"""
escaped = javascript.string_escape(text)
path = os.path.join(os.path.dirname(__file__),
'test_javascript_string_escape.html')
with open(path, encoding='utf-8') as f:
html_source = f.read().replace('%INPUT%', escaped)
with qtbot.waitSignal(webframe.loadFinished) as blocker:
webframe.setHtml(html_source)
assert blocker.args == [True]
result = webframe.evaluateJavaScript('window.qute_test_result')
assert result is not None
assert '|' in result
result_md5, result_text = result.split('|', maxsplit=1)
text_md5 = binascii.hexlify(text.encode('utf-8')).decode('ascii')
assert result_md5 == text_md5, result_text
def _test_escape_simple(self, text, webframe):
"""Test conversion by using evaluateJavaScript."""
escaped = javascript.string_escape(text)
result = webframe.evaluateJavaScript('"{}";'.format(escaped))
assert result == text
@pytest.mark.parametrize('text', sorted(TESTS), ids=repr)
def test_real_escape(self, webframe, qtbot, text):
"""Test javascript escaping with a real QWebPage."""
self._test_escape(text, qtbot, webframe)
@pytest.mark.qt_log_ignore('^OpenType support missing for script')
@hypothesis.given(hypothesis.strategies.text())
def test_real_escape_hypothesis(self, webframe, qtbot, text):
"""Test javascript escaping with a real QWebPage and hypothesis."""
# We can't simply use self._test_escape because of this:
# https://github.com/pytest-dev/pytest-qt/issues/69
# self._test_escape(text, qtbot, webframe)
try:
self._test_escape_simple(text, webframe)
except AssertionError:
if PYQT_VERSION >= 0x050300:
self._test_escape_hexlified(text, qtbot, webframe)
@pytest.mark.parametrize('arg, expected', [
('foobar', 'foobar'),
('foo\\bar', r'foo\\bar'),
(42, '42'),
(None, 'undefined'),
(object(), TypeError),
])
def test_convert_js_arg(arg, expected):
if expected is TypeError:
with pytest.raises(TypeError):
javascript._convert_js_arg(arg)
else:
assert javascript._convert_js_arg(arg) == expected
def test_assemble(monkeypatch):
monkeypatch.setattr(javascript.utils, 'read_file',
lambda name: '<code from {}>'.format(name))
expected = '<code from javascript/foo.js>\nfunc(23);'
assert javascript.assemble('foo', 'func', 23) == expected

View File

@ -1,5 +1,5 @@
<!-- <!--
Helper file for test_javascript_escape() in test_webelem.py. Helper file for string_escape() in test_javascript.py.
Since the conversion from QStrings to Python strings is broken in some corner Since the conversion from QStrings to Python strings is broken in some corner
cases in PyQt < 5.4 we hexlify the string we got in javascript here and test cases in PyQt < 5.4 we hexlify the string we got in javascript here and test