From 08b70f0f4cfb102f41b1105b1b06681d30362c59 Mon Sep 17 00:00:00 2001 From: Florian Bruhin Date: Thu, 4 Aug 2016 17:53:13 +0200 Subject: [PATCH] Add qutebrowser.utils.javascript webelem.javascript_escape got renamed to javascript.string_escape, and a new javascript.assemble got added to make it easier to call a function inside a .js file. --- qutebrowser/browser/commands.py | 4 +- qutebrowser/browser/pdfjs.py | 5 +- qutebrowser/browser/webengine/webenginetab.py | 19 +-- qutebrowser/browser/webkit/webelem.py | 32 +--- qutebrowser/utils/javascript.py | 72 +++++++++ scripts/dev/check_coverage.py | 3 + tests/end2end/fixtures/quteprocess.py | 9 +- tests/unit/browser/test_pdfjs.py | 2 +- tests/unit/browser/webkit/test_webelem.py | 98 +----------- tests/unit/utils/test_javascript.py | 144 ++++++++++++++++++ .../test_javascript_string_escape.html} | 2 +- 11 files changed, 236 insertions(+), 154 deletions(-) create mode 100644 qutebrowser/utils/javascript.py create mode 100644 tests/unit/utils/test_javascript.py rename tests/unit/{browser/webkit/test_webelem_jsescape.html => utils/test_javascript_string_escape.html} (96%) diff --git a/qutebrowser/browser/commands.py b/qutebrowser/browser/commands.py index 416719c67..2ffa647a3 100644 --- a/qutebrowser/browser/commands.py +++ b/qutebrowser/browser/commands.py @@ -44,7 +44,7 @@ from qutebrowser.browser import urlmarks, browsertab, inspector from qutebrowser.browser.webkit import webelem, downloads, mhtml from qutebrowser.keyinput import modeman from qutebrowser.utils import (message, usertypes, log, qtutils, urlutils, - objreg, utils, typing) + objreg, utils, typing, javascript) from qutebrowser.utils.usertypes import KeyMode from qutebrowser.misc import editor, guiprocess from qutebrowser.completion.models import instances, sortfilter @@ -1485,7 +1485,7 @@ class CommandDispatcher: var event = document.createEvent('TextEvent'); event.initTextEvent('textInput', true, true, null, sel); this.dispatchEvent(event); - """.format(webelem.javascript_escape(sel))) + """.format(javascript.string_escape(sel))) def _search_cb(self, found, *, tab, old_scroll_pos, options, text, prev): """Callback called from search/search_next/search_prev. diff --git a/qutebrowser/browser/pdfjs.py b/qutebrowser/browser/pdfjs.py index 398f0494a..51ed2dfbe 100644 --- a/qutebrowser/browser/pdfjs.py +++ b/qutebrowser/browser/pdfjs.py @@ -23,8 +23,7 @@ import os from PyQt5.QtCore import QUrl -from qutebrowser.browser.webkit import webelem -from qutebrowser.utils import utils +from qutebrowser.utils import utils, javascript class PDFJSNotFound(Exception): @@ -65,7 +64,7 @@ def _generate_pdfjs_script(url): return ( 'PDFJS.verbosity = PDFJS.VERBOSITY_LEVELS.info;\n' 'PDFView.open("{url}");\n' - ).format(url=webelem.javascript_escape(url.toString(QUrl.FullyEncoded))) + ).format(url=javascript.string_escape(url.toString(QUrl.FullyEncoded))) def fix_urls(asset): diff --git a/qutebrowser/browser/webengine/webenginetab.py b/qutebrowser/browser/webengine/webenginetab.py index 05b7bc6ac..800d5b3bc 100644 --- a/qutebrowser/browser/webengine/webenginetab.py +++ b/qutebrowser/browser/webengine/webenginetab.py @@ -31,7 +31,7 @@ from PyQt5.QtWebEngineWidgets import QWebEnginePage from qutebrowser.browser import browsertab from qutebrowser.browser.webengine import webview -from qutebrowser.utils import usertypes, qtutils, log, utils +from qutebrowser.utils import usertypes, qtutils, log, javascript class WebEnginePrinting(browsertab.AbstractPrinting): @@ -220,10 +220,7 @@ class WebEngineScroller(browsertab.AbstractScroller): self._pos_px = QPoint(jsret['px']['x'], jsret['px']['y']) self.perc_changed.emit(*self._pos_perc) - js_code = """ - {scroll_js} - scroll_pos(); - """.format(scroll_js=utils.read_file('javascript/scroll.js')) + js_code = javascript.assemble('scroll', 'scroll_pos') self._tab.run_js_async(js_code, update_scroll_pos) def pos_px(self): @@ -233,12 +230,7 @@ class WebEngineScroller(browsertab.AbstractScroller): return self._pos_perc def to_perc(self, x=None, y=None): - js_code = """ - {scroll_js} - scroll_to_perc({x}, {y}); - """.format(scroll_js=utils.read_file('javascript/scroll.js'), - x='undefined' if x is None else x, - y='undefined' if y is None else y) + js_code = javascript.assemble('scroll', 'scroll_to_perc', x, y) self._tab.run_js_async(js_code) def to_point(self, point): @@ -249,10 +241,7 @@ class WebEngineScroller(browsertab.AbstractScroller): self._tab.run_js_async("window.scrollBy({x}, {y});".format(x=x, y=y)) def delta_page(self, x=0, y=0): - js_code = """ - {scroll_js} - scroll_delta_page({x}, {y}); - """.format(scroll_js=utils.read_file('javascript/scroll.js'), x=x, y=y) + js_code = javascript.assemble('scroll', 'scroll_delta_page', x, y) self._tab.run_js_async(js_code) def up(self, count=1): diff --git a/qutebrowser/browser/webkit/webelem.py b/qutebrowser/browser/webkit/webelem.py index d73341178..213ee435c 100644 --- a/qutebrowser/browser/webkit/webelem.py +++ b/qutebrowser/browser/webkit/webelem.py @@ -33,7 +33,7 @@ from PyQt5.QtCore import QRect, QUrl from PyQt5.QtWebKit import QWebElement from qutebrowser.config import config -from qutebrowser.utils import log, usertypes, utils +from qutebrowser.utils import log, usertypes, utils, javascript Group = usertypes.enum('Group', ['all', 'links', 'images', 'url', 'prevnext', @@ -202,7 +202,7 @@ class WebElementWrapper(collections.abc.MutableMapping): else: log.misc.debug("Filling element {} via javascript.".format( self.debug_text())) - text = javascript_escape(text) + text = javascript.string_escape(text) self._elem.evaluateJavaScript("this.value='{}'".format(text)) def set_inner_xml(self, xml): @@ -501,34 +501,6 @@ class WebElementWrapper(collections.abc.MutableMapping): return all([visible_on_screen, visible_in_frame]) - - -def javascript_escape(text): - """Escape values special to javascript in strings. - - With this we should be able to use something like: - elem.evaluateJavaScript("this.value='{}'".format(javascript_escape(...))) - And all values should work. - """ - # This is a list of tuples because order matters, and using OrderedDict - # makes no sense because we don't actually need dict-like properties. - replacements = ( - ('\\', r'\\'), # First escape all literal \ signs as \\. - ("'", r"\'"), # Then escape ' and " as \' and \". - ('"', r'\"'), # (note it won't hurt when we escape the wrong one). - ('\n', r'\n'), # We also need to escape newlines for some reason. - ('\r', r'\r'), - ('\x00', r'\x00'), - ('\ufeff', r'\ufeff'), - # http://stackoverflow.com/questions/2965293/ - ('\u2028', r'\u2028'), - ('\u2029', r'\u2029'), - ) - for orig, repl in replacements: - text = text.replace(orig, repl) - return text - - def get_child_frames(startframe): """Get all children recursively of a given QWebFrame. diff --git a/qutebrowser/utils/javascript.py b/qutebrowser/utils/javascript.py new file mode 100644 index 000000000..19e6b22c2 --- /dev/null +++ b/qutebrowser/utils/javascript.py @@ -0,0 +1,72 @@ +# vim: ft=python fileencoding=utf-8 sts=4 sw=4 et: + +# Copyright 2016 Florian Bruhin (The Compiler) +# +# This file is part of qutebrowser. +# +# qutebrowser is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# qutebrowser is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with qutebrowser. If not, see . + +"""Utilities related to javascript interaction.""" + + +from qutebrowser.utils import utils + + +def string_escape(text): + """Escape values special to javascript in strings. + + With this we should be able to use something like: + elem.evaluateJavaScript("this.value='{}'".format(string_escape(...))) + And all values should work. + """ + # This is a list of tuples because order matters, and using OrderedDict + # makes no sense because we don't actually need dict-like properties. + replacements = ( + ('\\', r'\\'), # First escape all literal \ signs as \\. + ("'", r"\'"), # Then escape ' and " as \' and \". + ('"', r'\"'), # (note it won't hurt when we escape the wrong one). + ('\n', r'\n'), # We also need to escape newlines for some reason. + ('\r', r'\r'), + ('\x00', r'\x00'), + ('\ufeff', r'\ufeff'), + # http://stackoverflow.com/questions/2965293/ + ('\u2028', r'\u2028'), + ('\u2029', r'\u2029'), + ) + for orig, repl in replacements: + text = text.replace(orig, repl) + return text + + +def _convert_js_arg(arg): + """Convert the given argument so it's the equivalent in JS.""" + if arg is None: + return 'undefined' + elif isinstance(arg, str): + return string_escape(arg) + elif isinstance(arg, int): + return str(arg) + else: + raise TypeError("Don't know how to handle {!r} of type {}!".format( + arg, type(arg).__name__)) + + +def assemble(name, function, *args): + """Assemble a javascript file and a function call.""" + code = "{code}\n{function}({args});".format( + code=utils.read_file('javascript/{}.js'.format(name)), + function=function, + args=', '.join(_convert_js_arg(arg) for arg in args), + ) + return code diff --git a/scripts/dev/check_coverage.py b/scripts/dev/check_coverage.py index 49c4ba443..457f472ba 100644 --- a/scripts/dev/check_coverage.py +++ b/scripts/dev/check_coverage.py @@ -145,6 +145,9 @@ PERFECT_FILES = [ 'qutebrowser/utils/error.py'), ('tests/unit/utils/test_typing.py', 'qutebrowser/utils/typing.py'), + ('tests/unit/utils/test_javascript.py', + 'qutebrowser/utils/javascript.py'), + ('tests/unit/completion/test_models.py', 'qutebrowser/completion/models/base.py'), diff --git a/tests/end2end/fixtures/quteprocess.py b/tests/end2end/fixtures/quteprocess.py index 49fd9f27c..bff71ce49 100644 --- a/tests/end2end/fixtures/quteprocess.py +++ b/tests/end2end/fixtures/quteprocess.py @@ -36,8 +36,7 @@ import pytest from PyQt5.QtCore import pyqtSignal, QUrl from qutebrowser.misc import ipc -from qutebrowser.utils import log, utils -from qutebrowser.browser.webkit import webelem +from qutebrowser.utils import log, utils, javascript from helpers import utils as testutils from end2end.fixtures import testprocess @@ -527,7 +526,7 @@ class QuteProc(testprocess.Process): 'else if (_es.snapshotLength > 1) {{ console.log("qute:ambiguous ' 'elems") }} ' 'else {{ console.log("qute:okay"); _es.snapshotItem(0).click() }}' - ).format(text=webelem.javascript_escape(_xpath_escape(text))) + ).format(text=javascript.string_escape(_xpath_escape(text))) self.send_cmd(':jseval ' + script, escape=False) message = self.wait_for_js('qute:*').message if message.endswith('qute:no elems'): @@ -562,8 +561,8 @@ class QuteProc(testprocess.Process): def _xpath_escape(text): """Escape a string to be used in an XPath expression. - The resulting string should still be escaped with javascript_escape, to - prevent javascript from interpreting the quotes. + The resulting string should still be escaped with javascript.string_escape, + to prevent javascript from interpreting the quotes. This function is needed because XPath does not provide any character escaping mechanisms, so to get the string diff --git a/tests/unit/browser/test_pdfjs.py b/tests/unit/browser/test_pdfjs.py index 7293ff522..ad489bc7a 100644 --- a/tests/unit/browser/test_pdfjs.py +++ b/tests/unit/browser/test_pdfjs.py @@ -26,7 +26,7 @@ from qutebrowser.browser import pdfjs # Note that we got double protection, once because we use QUrl.FullyEncoded and -# because we use qutebrowser.browser.webelem.javascript_escape. Characters +# because we use qutebrowser.utils.javascript.string_escape. Characters # like " are already replaced by QUrl. @pytest.mark.parametrize('url, expected', [ ('http://foo.bar', "http://foo.bar"), diff --git a/tests/unit/browser/webkit/test_webelem.py b/tests/unit/browser/webkit/test_webelem.py index a14666943..f97402a9a 100644 --- a/tests/unit/browser/webkit/test_webelem.py +++ b/tests/unit/browser/webkit/test_webelem.py @@ -23,12 +23,8 @@ from unittest import mock import collections.abc import operator import itertools -import binascii -import os.path -import hypothesis -import hypothesis.strategies -from PyQt5.QtCore import PYQT_VERSION, QRect, QPoint +from PyQt5.QtCore import QRect, QPoint from PyQt5.QtWebKit import QWebElement import pytest @@ -818,98 +814,6 @@ class TestRectOnView: assert rect == QRect(20, 20, 8, 8) -class TestJavascriptEscape: - - TESTS = { - 'foo\\bar': r'foo\\bar', - 'foo\nbar': r'foo\nbar', - 'foo\rbar': r'foo\rbar', - "foo'bar": r"foo\'bar", - 'foo"bar': r'foo\"bar', - 'one\\two\rthree\nfour\'five"six': r'one\\two\rthree\nfour\'five\"six', - '\x00': r'\x00', - 'hellö': 'hellö', - '☃': '☃', - '\x80Ā': '\x80Ā', - '𐀀\x00𐀀\x00': r'𐀀\x00𐀀\x00', - '𐀀\ufeff': r'𐀀\ufeff', - '\ufeff': r'\ufeff', - # http://stackoverflow.com/questions/2965293/ - '\u2028': r'\u2028', - '\u2029': r'\u2029', - } - - # Once there was this warning here: - # load glyph failed err=6 face=0x2680ba0, glyph=1912 - # http://qutebrowser.org:8010/builders/debian-jessie/builds/765/steps/unittests/ - # Should that be ignored? - - @pytest.mark.parametrize('before, after', sorted(TESTS.items()), ids=repr) - def test_fake_escape(self, before, after): - """Test javascript escaping with some expected outcomes.""" - assert webelem.javascript_escape(before) == after - - def _test_escape(self, text, qtbot, webframe): - """Helper function for test_real_escape*.""" - try: - self._test_escape_simple(text, webframe) - except AssertionError: - # Try another method if the simple method failed. - # - # See _test_escape_hexlified documentation on why this is - # necessary. - self._test_escape_hexlified(text, qtbot, webframe) - - def _test_escape_hexlified(self, text, qtbot, webframe): - """Test conversion by hexlifying in javascript. - - Since the conversion of QStrings to Python strings is broken in some - older PyQt versions in some corner cases, we load an HTML file which - generates an MD5 of the escaped text and use that for comparisons. - """ - escaped = webelem.javascript_escape(text) - path = os.path.join(os.path.dirname(__file__), - 'test_webelem_jsescape.html') - with open(path, encoding='utf-8') as f: - html_source = f.read().replace('%INPUT%', escaped) - - with qtbot.waitSignal(webframe.loadFinished) as blocker: - webframe.setHtml(html_source) - assert blocker.args == [True] - - result = webframe.evaluateJavaScript('window.qute_test_result') - assert result is not None - assert '|' in result - result_md5, result_text = result.split('|', maxsplit=1) - text_md5 = binascii.hexlify(text.encode('utf-8')).decode('ascii') - assert result_md5 == text_md5, result_text - - def _test_escape_simple(self, text, webframe): - """Test conversion by using evaluateJavaScript.""" - escaped = webelem.javascript_escape(text) - result = webframe.evaluateJavaScript('"{}";'.format(escaped)) - assert result == text - - @pytest.mark.parametrize('text', sorted(TESTS), ids=repr) - def test_real_escape(self, webframe, qtbot, text): - """Test javascript escaping with a real QWebPage.""" - self._test_escape(text, qtbot, webframe) - - @pytest.mark.qt_log_ignore('^OpenType support missing for script') - @hypothesis.given(hypothesis.strategies.text()) - def test_real_escape_hypothesis(self, webframe, qtbot, text): - """Test javascript escaping with a real QWebPage and hypothesis.""" - # We can't simply use self._test_escape because of this: - # https://github.com/pytest-dev/pytest-qt/issues/69 - - # self._test_escape(text, qtbot, webframe) - try: - self._test_escape_simple(text, webframe) - except AssertionError: - if PYQT_VERSION >= 0x050300: - self._test_escape_hexlified(text, qtbot, webframe) - - class TestGetChildFrames: """Check get_child_frames.""" diff --git a/tests/unit/utils/test_javascript.py b/tests/unit/utils/test_javascript.py new file mode 100644 index 000000000..3cf649f6d --- /dev/null +++ b/tests/unit/utils/test_javascript.py @@ -0,0 +1,144 @@ +# vim: ft=python fileencoding=utf-8 sts=4 sw=4 et: + +# Copyright 2016 Florian Bruhin (The Compiler) +# +# This file is part of qutebrowser. +# +# qutebrowser is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# qutebrowser is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with qutebrowser. If not, see . + +"""Tests for qutebrowser.utils.javascript.""" + +import binascii +import os.path + +import pytest +import hypothesis +import hypothesis.strategies +from PyQt5.QtCore import PYQT_VERSION + +from qutebrowser.utils import javascript + + +class TestStringEscape: + + TESTS = { + 'foo\\bar': r'foo\\bar', + 'foo\nbar': r'foo\nbar', + 'foo\rbar': r'foo\rbar', + "foo'bar": r"foo\'bar", + 'foo"bar': r'foo\"bar', + 'one\\two\rthree\nfour\'five"six': r'one\\two\rthree\nfour\'five\"six', + '\x00': r'\x00', + 'hellö': 'hellö', + '☃': '☃', + '\x80Ā': '\x80Ā', + '𐀀\x00𐀀\x00': r'𐀀\x00𐀀\x00', + '𐀀\ufeff': r'𐀀\ufeff', + '\ufeff': r'\ufeff', + # http://stackoverflow.com/questions/2965293/ + '\u2028': r'\u2028', + '\u2029': r'\u2029', + } + + # Once there was this warning here: + # load glyph failed err=6 face=0x2680ba0, glyph=1912 + # http://qutebrowser.org:8010/builders/debian-jessie/builds/765/steps/unittests/ + # Should that be ignored? + + @pytest.mark.parametrize('before, after', sorted(TESTS.items()), ids=repr) + def test_fake_escape(self, before, after): + """Test javascript escaping with some expected outcomes.""" + assert javascript.string_escape(before) == after + + def _test_escape(self, text, qtbot, webframe): + """Helper function for test_real_escape*.""" + try: + self._test_escape_simple(text, webframe) + except AssertionError: + # Try another method if the simple method failed. + # + # See _test_escape_hexlified documentation on why this is + # necessary. + self._test_escape_hexlified(text, qtbot, webframe) + + def _test_escape_hexlified(self, text, qtbot, webframe): + """Test conversion by hexlifying in javascript. + + Since the conversion of QStrings to Python strings is broken in some + older PyQt versions in some corner cases, we load an HTML file which + generates an MD5 of the escaped text and use that for comparisons. + """ + escaped = javascript.string_escape(text) + path = os.path.join(os.path.dirname(__file__), + 'test_javascript_string_escape.html') + with open(path, encoding='utf-8') as f: + html_source = f.read().replace('%INPUT%', escaped) + + with qtbot.waitSignal(webframe.loadFinished) as blocker: + webframe.setHtml(html_source) + assert blocker.args == [True] + + result = webframe.evaluateJavaScript('window.qute_test_result') + assert result is not None + assert '|' in result + result_md5, result_text = result.split('|', maxsplit=1) + text_md5 = binascii.hexlify(text.encode('utf-8')).decode('ascii') + assert result_md5 == text_md5, result_text + + def _test_escape_simple(self, text, webframe): + """Test conversion by using evaluateJavaScript.""" + escaped = javascript.string_escape(text) + result = webframe.evaluateJavaScript('"{}";'.format(escaped)) + assert result == text + + @pytest.mark.parametrize('text', sorted(TESTS), ids=repr) + def test_real_escape(self, webframe, qtbot, text): + """Test javascript escaping with a real QWebPage.""" + self._test_escape(text, qtbot, webframe) + + @pytest.mark.qt_log_ignore('^OpenType support missing for script') + @hypothesis.given(hypothesis.strategies.text()) + def test_real_escape_hypothesis(self, webframe, qtbot, text): + """Test javascript escaping with a real QWebPage and hypothesis.""" + # We can't simply use self._test_escape because of this: + # https://github.com/pytest-dev/pytest-qt/issues/69 + + # self._test_escape(text, qtbot, webframe) + try: + self._test_escape_simple(text, webframe) + except AssertionError: + if PYQT_VERSION >= 0x050300: + self._test_escape_hexlified(text, qtbot, webframe) + + +@pytest.mark.parametrize('arg, expected', [ + ('foobar', 'foobar'), + ('foo\\bar', r'foo\\bar'), + (42, '42'), + (None, 'undefined'), + (object(), TypeError), +]) +def test_convert_js_arg(arg, expected): + if expected is TypeError: + with pytest.raises(TypeError): + javascript._convert_js_arg(arg) + else: + assert javascript._convert_js_arg(arg) == expected + + +def test_assemble(monkeypatch): + monkeypatch.setattr(javascript.utils, 'read_file', + lambda name: ''.format(name)) + expected = '\nfunc(23);' + assert javascript.assemble('foo', 'func', 23) == expected diff --git a/tests/unit/browser/webkit/test_webelem_jsescape.html b/tests/unit/utils/test_javascript_string_escape.html similarity index 96% rename from tests/unit/browser/webkit/test_webelem_jsescape.html rename to tests/unit/utils/test_javascript_string_escape.html index 937499258..31bda35ef 100644 --- a/tests/unit/browser/webkit/test_webelem_jsescape.html +++ b/tests/unit/utils/test_javascript_string_escape.html @@ -1,5 +1,5 @@