diff --git a/qutebrowser/browser/downloads.py b/qutebrowser/browser/downloads.py index 58b99a1d2..fddaa37ed 100644 --- a/qutebrowser/browser/downloads.py +++ b/qutebrowser/browser/downloads.py @@ -750,7 +750,7 @@ class AbstractDownloadItem(QObject): if filename is None: # pragma: no cover log.downloads.error("No filename to open the download!") return - self.pdfjs_requested.emit(filename) + self.pdfjs_requested.emit(os.path.basename(filename)) def set_target(self, target): """Set the target for a given download. @@ -1233,7 +1233,7 @@ class TempDownloadManager: "directory") self._tmpdir = None - def _get_tmpdir(self): + def get_tmpdir(self): """Return the temporary directory that is used for downloads. The directory is created lazily on first access. @@ -1259,7 +1259,7 @@ class TempDownloadManager: Return: A tempfile.NamedTemporaryFile that should be used to save the file. """ - tmpdir = self._get_tmpdir() + tmpdir = self.get_tmpdir() encoding = sys.getfilesystemencoding() suggested_name = utils.force_encoding(suggested_name, encoding) # Make sure that the filename is not too long diff --git a/qutebrowser/browser/pdfjs.py b/qutebrowser/browser/pdfjs.py index 602dda7e6..a0bad1cac 100644 --- a/qutebrowser/browser/pdfjs.py +++ b/qutebrowser/browser/pdfjs.py @@ -27,6 +27,7 @@ from PyQt5.QtCore import QUrl, QUrlQuery from qutebrowser.utils import utils, javascript, jinja, qtutils, usertypes from qutebrowser.misc import objects from qutebrowser.config import config +from qutebrowser.browser import downloads class PDFJSNotFound(Exception): @@ -43,31 +44,37 @@ class PDFJSNotFound(Exception): super().__init__(message) -def generate_pdfjs_page(url): - """Return the html content of a page that displays url with pdfjs. +def generate_pdfjs_page(filename, url): + """Return the html content of a page that displays a file with pdfjs. Returns a string. Args: - url: The url of the pdf as QUrl. + filename: The filename of the PDF to open. + url: The URL being opened. """ if not is_available(): return jinja.render('no_pdfjs.html', url=url.toDisplayString(), title="PDF.js not found") viewer = get_pdfjs_res('web/viewer.html').decode('utf-8') - script = _generate_pdfjs_script(url) + script = _generate_pdfjs_script(filename) html_page = viewer.replace('', ''.format(script)) return html_page -def _generate_pdfjs_script(url): +def _generate_pdfjs_script(filename): """Generate the script that shows the pdf with pdf.js. Args: - url: The url of the pdf page as QUrl. + filename: The name of the file to open. """ + url = QUrl('qute://pdfjs/file') + url_query = QUrlQuery() + url_query.addQueryItem('filename', filename) + url.setQuery(url_query) + return jinja.js_environment.from_string(""" document.addEventListener("DOMContentLoaded", function() { {% if disable_create_object_url %} @@ -199,13 +206,8 @@ def should_use_pdfjs(mimetype): def get_main_url(filename): """Get the URL to be opened to view a local PDF.""" url = QUrl('qute://pdfjs/web/viewer.html') - - file_url = QUrl('qute://pdfjs/file') - file_url_query = QUrlQuery() - file_url_query.addQueryItem('filename', filename) - file_url.setQuery(file_url_query) - query = QUrlQuery() - query.addQueryItem('file', file_url.toString()) + query.addQueryItem('filename', filename) # read from our JS + query.addQueryItem('file', '') # to avoid pdfjs opening the default PDF url.setQuery(query) return url diff --git a/qutebrowser/browser/qutescheme.py b/qutebrowser/browser/qutescheme.py index dc952f0f8..5ff3f6fb0 100644 --- a/qutebrowser/browser/qutescheme.py +++ b/qutebrowser/browser/qutescheme.py @@ -43,7 +43,7 @@ import pkg_resources from PyQt5.QtCore import QUrlQuery, QUrl import qutebrowser -from qutebrowser.browser import pdfjs +from qutebrowser.browser import pdfjs, downloads from qutebrowser.config import config, configdata, configexc, configdiff from qutebrowser.utils import (version, utils, jinja, log, message, docutils, objreg, urlutils) @@ -520,17 +520,27 @@ def qute_pastebin_version(_url): @add_handler('pdfjs') def qute_pdfjs(url): """Handler for qute://pdfjs. Return the pdf.js viewer.""" - # FIXME be more strict about allowed files here if url.path() == '/file': filename = QUrlQuery(url).queryItemValue('filename') - with open(filename, 'rb') as f: + if not filename: + raise UrlInvalidError("Missing filename") + if '/' in filename or os.sep in filename: + raise RequestDeniedError("Path separator in filename.") + + path = os.path.join(downloads.temp_download_manager.get_tmpdir().name, + filename) + + with open(path, 'rb') as f: data = f.read() + mimetype = utils.guess_mimetype(filename, fallback=True) return mimetype, data if url.path() == '/web/viewer.html': - filepath = QUrlQuery(url).queryItemValue("file") - data = pdfjs.generate_pdfjs_page(QUrl.fromLocalFile(filepath)) + filename = QUrlQuery(url).queryItemValue("filename") + if not filename: + raise UrlInvalidError("Missing filename") + data = pdfjs.generate_pdfjs_page(filename, url) return 'text/html', data try: diff --git a/tests/unit/browser/test_pdfjs.py b/tests/unit/browser/test_pdfjs.py index 02306c0ad..be2070292 100644 --- a/tests/unit/browser/test_pdfjs.py +++ b/tests/unit/browser/test_pdfjs.py @@ -30,7 +30,7 @@ from qutebrowser.utils import usertypes ]) def test_generate_pdfjs_page(available, snippet, monkeypatch): monkeypatch.setattr(pdfjs, 'is_available', lambda: available) - content = pdfjs.generate_pdfjs_page(QUrl('https://example.com/')) + content = pdfjs.generate_pdfjs_page('example.pdf', QUrl()) print(content) assert snippet in content @@ -38,17 +38,16 @@ def test_generate_pdfjs_page(available, snippet, monkeypatch): # Note that we got double protection, once because we use QUrl.FullyEncoded and # because we use qutebrowser.utils.javascript.string_escape. Characters # like " are already replaced by QUrl. -@pytest.mark.parametrize('url, expected', [ - ('http://foo.bar', "http://foo.bar"), - ('http://"', ''), - ('\0', '%00'), - ('http://foobar/");alert("attack!");', - 'http://foobar/%22);alert(%22attack!%22);'), +@pytest.mark.parametrize('filename, expected', [ + ('foo.bar', "foo.bar"), + ('foo"bar', "foo%22bar"), + ('foo\0bar', 'foo%00bar'), + ('foobar");alert("attack!");', + 'foobar%22);alert(%22attack!%22);'), ]) -def test_generate_pdfjs_script(url, expected): - expected_open = 'open("{}");'.format(expected) - url = QUrl(url) - actual = pdfjs._generate_pdfjs_script(url) +def test_generate_pdfjs_script(filename, expected): + expected_open = 'open("qute://pdfjs/file?filename={}");'.format(expected) + actual = pdfjs._generate_pdfjs_script(filename) assert expected_open in actual assert 'PDFView' in actual @@ -64,7 +63,7 @@ def test_generate_pdfjs_script_disable_object_url(monkeypatch, monkeypatch.setattr(pdfjs.qtutils, 'version_check', lambda _v: new_qt) monkeypatch.setattr(pdfjs.objects, 'backend', backend) - script = pdfjs._generate_pdfjs_script(QUrl('https://example.com/')) + script = pdfjs._generate_pdfjs_script('testfile') assert ('PDFJS.disableCreateObjectURL' in script) == expected @@ -162,6 +161,6 @@ def test_should_use_pdfjs(mimetype, enabled, expected, config_stub): def test_get_main_url(): - expected = ('qute://pdfjs/web/viewer.html?file=' - 'qute://pdfjs/file?filename%3Dhello?world.pdf') + expected = ('qute://pdfjs/web/viewer.html?filename=' + 'hello?world.pdf&file=') assert pdfjs.get_main_url('hello?world.pdf') == QUrl(expected) diff --git a/tests/unit/browser/test_qutescheme.py b/tests/unit/browser/test_qutescheme.py index 83cfdf1a6..322939db0 100644 --- a/tests/unit/browser/test_qutescheme.py +++ b/tests/unit/browser/test_qutescheme.py @@ -22,10 +22,11 @@ import os import time import logging +import py.path from PyQt5.QtCore import QUrl import pytest -from qutebrowser.browser import qutescheme, pdfjs +from qutebrowser.browser import qutescheme, pdfjs, downloads class TestJavascriptHandler: @@ -185,6 +186,12 @@ class TestPDFJSHandler: monkeypatch.setattr(pdfjs, 'get_pdfjs_res', get_pdfjs_res) + @pytest.fixture + def download_tmpdir(self): + tdir = downloads.temp_download_manager.get_tmpdir() + yield py.path.local(tdir.name) + tdir.cleanup() + def test_existing_resource(self): """Test with a resource that exists.""" _mimetype, data = qutescheme.data_for_url( @@ -199,3 +206,16 @@ class TestPDFJSHandler: assert len(caplog.records) == 1 assert (caplog.records[0].message == 'pdfjs resource requested but not found: /no/file.html') + + def test_viewer_page(self): + """Load the /web/viewer.html page.""" + _mimetype, data = qutescheme.data_for_url( + QUrl('qute://pdfjs/web/viewer.html?filename=foobar')) + assert b'PDF.js' in data + + def test_file(self, download_tmpdir): + """Load a file via qute://pdfjs/file.""" + (download_tmpdir / 'testfile').write_binary(b'foo') + _mimetype, data = qutescheme.data_for_url( + QUrl('qute://pdfjs/file?filename=testfile')) + assert data == b'foo'