Simplify and lock down PDF.js filename handling

This commit is contained in:
Florian Bruhin 2018-09-09 17:32:33 +02:00
parent 231c1fbe59
commit b96898db37
5 changed files with 67 additions and 36 deletions

View File

@ -750,7 +750,7 @@ class AbstractDownloadItem(QObject):
if filename is None: # pragma: no cover if filename is None: # pragma: no cover
log.downloads.error("No filename to open the download!") log.downloads.error("No filename to open the download!")
return return
self.pdfjs_requested.emit(filename) self.pdfjs_requested.emit(os.path.basename(filename))
def set_target(self, target): def set_target(self, target):
"""Set the target for a given download. """Set the target for a given download.
@ -1233,7 +1233,7 @@ class TempDownloadManager:
"directory") "directory")
self._tmpdir = None self._tmpdir = None
def _get_tmpdir(self): def get_tmpdir(self):
"""Return the temporary directory that is used for downloads. """Return the temporary directory that is used for downloads.
The directory is created lazily on first access. The directory is created lazily on first access.
@ -1259,7 +1259,7 @@ class TempDownloadManager:
Return: Return:
A tempfile.NamedTemporaryFile that should be used to save the file. A tempfile.NamedTemporaryFile that should be used to save the file.
""" """
tmpdir = self._get_tmpdir() tmpdir = self.get_tmpdir()
encoding = sys.getfilesystemencoding() encoding = sys.getfilesystemencoding()
suggested_name = utils.force_encoding(suggested_name, encoding) suggested_name = utils.force_encoding(suggested_name, encoding)
# Make sure that the filename is not too long # Make sure that the filename is not too long

View File

@ -27,6 +27,7 @@ from PyQt5.QtCore import QUrl, QUrlQuery
from qutebrowser.utils import utils, javascript, jinja, qtutils, usertypes from qutebrowser.utils import utils, javascript, jinja, qtutils, usertypes
from qutebrowser.misc import objects from qutebrowser.misc import objects
from qutebrowser.config import config from qutebrowser.config import config
from qutebrowser.browser import downloads
class PDFJSNotFound(Exception): class PDFJSNotFound(Exception):
@ -43,31 +44,37 @@ class PDFJSNotFound(Exception):
super().__init__(message) super().__init__(message)
def generate_pdfjs_page(url): def generate_pdfjs_page(filename, url):
"""Return the html content of a page that displays url with pdfjs. """Return the html content of a page that displays a file with pdfjs.
Returns a string. Returns a string.
Args: Args:
url: The url of the pdf as QUrl. filename: The filename of the PDF to open.
url: The URL being opened.
""" """
if not is_available(): if not is_available():
return jinja.render('no_pdfjs.html', return jinja.render('no_pdfjs.html',
url=url.toDisplayString(), url=url.toDisplayString(),
title="PDF.js not found") title="PDF.js not found")
viewer = get_pdfjs_res('web/viewer.html').decode('utf-8') viewer = get_pdfjs_res('web/viewer.html').decode('utf-8')
script = _generate_pdfjs_script(url) script = _generate_pdfjs_script(filename)
html_page = viewer.replace('</body>', html_page = viewer.replace('</body>',
'</body><script>{}</script>'.format(script)) '</body><script>{}</script>'.format(script))
return html_page return html_page
def _generate_pdfjs_script(url): def _generate_pdfjs_script(filename):
"""Generate the script that shows the pdf with pdf.js. """Generate the script that shows the pdf with pdf.js.
Args: Args:
url: The url of the pdf page as QUrl. filename: The name of the file to open.
""" """
url = QUrl('qute://pdfjs/file')
url_query = QUrlQuery()
url_query.addQueryItem('filename', filename)
url.setQuery(url_query)
return jinja.js_environment.from_string(""" return jinja.js_environment.from_string("""
document.addEventListener("DOMContentLoaded", function() { document.addEventListener("DOMContentLoaded", function() {
{% if disable_create_object_url %} {% if disable_create_object_url %}
@ -199,13 +206,8 @@ def should_use_pdfjs(mimetype):
def get_main_url(filename): def get_main_url(filename):
"""Get the URL to be opened to view a local PDF.""" """Get the URL to be opened to view a local PDF."""
url = QUrl('qute://pdfjs/web/viewer.html') url = QUrl('qute://pdfjs/web/viewer.html')
file_url = QUrl('qute://pdfjs/file')
file_url_query = QUrlQuery()
file_url_query.addQueryItem('filename', filename)
file_url.setQuery(file_url_query)
query = QUrlQuery() query = QUrlQuery()
query.addQueryItem('file', file_url.toString()) query.addQueryItem('filename', filename) # read from our JS
query.addQueryItem('file', '') # to avoid pdfjs opening the default PDF
url.setQuery(query) url.setQuery(query)
return url return url

View File

@ -43,7 +43,7 @@ import pkg_resources
from PyQt5.QtCore import QUrlQuery, QUrl from PyQt5.QtCore import QUrlQuery, QUrl
import qutebrowser import qutebrowser
from qutebrowser.browser import pdfjs from qutebrowser.browser import pdfjs, downloads
from qutebrowser.config import config, configdata, configexc, configdiff from qutebrowser.config import config, configdata, configexc, configdiff
from qutebrowser.utils import (version, utils, jinja, log, message, docutils, from qutebrowser.utils import (version, utils, jinja, log, message, docutils,
objreg, urlutils) objreg, urlutils)
@ -520,17 +520,27 @@ def qute_pastebin_version(_url):
@add_handler('pdfjs') @add_handler('pdfjs')
def qute_pdfjs(url): def qute_pdfjs(url):
"""Handler for qute://pdfjs. Return the pdf.js viewer.""" """Handler for qute://pdfjs. Return the pdf.js viewer."""
# FIXME be more strict about allowed files here
if url.path() == '/file': if url.path() == '/file':
filename = QUrlQuery(url).queryItemValue('filename') filename = QUrlQuery(url).queryItemValue('filename')
with open(filename, 'rb') as f: if not filename:
raise UrlInvalidError("Missing filename")
if '/' in filename or os.sep in filename:
raise RequestDeniedError("Path separator in filename.")
path = os.path.join(downloads.temp_download_manager.get_tmpdir().name,
filename)
with open(path, 'rb') as f:
data = f.read() data = f.read()
mimetype = utils.guess_mimetype(filename, fallback=True) mimetype = utils.guess_mimetype(filename, fallback=True)
return mimetype, data return mimetype, data
if url.path() == '/web/viewer.html': if url.path() == '/web/viewer.html':
filepath = QUrlQuery(url).queryItemValue("file") filename = QUrlQuery(url).queryItemValue("filename")
data = pdfjs.generate_pdfjs_page(QUrl.fromLocalFile(filepath)) if not filename:
raise UrlInvalidError("Missing filename")
data = pdfjs.generate_pdfjs_page(filename, url)
return 'text/html', data return 'text/html', data
try: try:

View File

@ -30,7 +30,7 @@ from qutebrowser.utils import usertypes
]) ])
def test_generate_pdfjs_page(available, snippet, monkeypatch): def test_generate_pdfjs_page(available, snippet, monkeypatch):
monkeypatch.setattr(pdfjs, 'is_available', lambda: available) monkeypatch.setattr(pdfjs, 'is_available', lambda: available)
content = pdfjs.generate_pdfjs_page(QUrl('https://example.com/')) content = pdfjs.generate_pdfjs_page('example.pdf', QUrl())
print(content) print(content)
assert snippet in content assert snippet in content
@ -38,17 +38,16 @@ def test_generate_pdfjs_page(available, snippet, monkeypatch):
# Note that we got double protection, once because we use QUrl.FullyEncoded and # Note that we got double protection, once because we use QUrl.FullyEncoded and
# because we use qutebrowser.utils.javascript.string_escape. Characters # because we use qutebrowser.utils.javascript.string_escape. Characters
# like " are already replaced by QUrl. # like " are already replaced by QUrl.
@pytest.mark.parametrize('url, expected', [ @pytest.mark.parametrize('filename, expected', [
('http://foo.bar', "http://foo.bar"), ('foo.bar', "foo.bar"),
('http://"', ''), ('foo"bar', "foo%22bar"),
('\0', '%00'), ('foo\0bar', 'foo%00bar'),
('http://foobar/");alert("attack!");', ('foobar");alert("attack!");',
'http://foobar/%22);alert(%22attack!%22);'), 'foobar%22);alert(%22attack!%22);'),
]) ])
def test_generate_pdfjs_script(url, expected): def test_generate_pdfjs_script(filename, expected):
expected_open = 'open("{}");'.format(expected) expected_open = 'open("qute://pdfjs/file?filename={}");'.format(expected)
url = QUrl(url) actual = pdfjs._generate_pdfjs_script(filename)
actual = pdfjs._generate_pdfjs_script(url)
assert expected_open in actual assert expected_open in actual
assert 'PDFView' in actual assert 'PDFView' in actual
@ -64,7 +63,7 @@ def test_generate_pdfjs_script_disable_object_url(monkeypatch,
monkeypatch.setattr(pdfjs.qtutils, 'version_check', lambda _v: new_qt) monkeypatch.setattr(pdfjs.qtutils, 'version_check', lambda _v: new_qt)
monkeypatch.setattr(pdfjs.objects, 'backend', backend) monkeypatch.setattr(pdfjs.objects, 'backend', backend)
script = pdfjs._generate_pdfjs_script(QUrl('https://example.com/')) script = pdfjs._generate_pdfjs_script('testfile')
assert ('PDFJS.disableCreateObjectURL' in script) == expected assert ('PDFJS.disableCreateObjectURL' in script) == expected
@ -162,6 +161,6 @@ def test_should_use_pdfjs(mimetype, enabled, expected, config_stub):
def test_get_main_url(): def test_get_main_url():
expected = ('qute://pdfjs/web/viewer.html?file=' expected = ('qute://pdfjs/web/viewer.html?filename='
'qute://pdfjs/file?filename%3Dhello?world.pdf') 'hello?world.pdf&file=')
assert pdfjs.get_main_url('hello?world.pdf') == QUrl(expected) assert pdfjs.get_main_url('hello?world.pdf') == QUrl(expected)

View File

@ -22,10 +22,11 @@ import os
import time import time
import logging import logging
import py.path
from PyQt5.QtCore import QUrl from PyQt5.QtCore import QUrl
import pytest import pytest
from qutebrowser.browser import qutescheme, pdfjs from qutebrowser.browser import qutescheme, pdfjs, downloads
class TestJavascriptHandler: class TestJavascriptHandler:
@ -185,6 +186,12 @@ class TestPDFJSHandler:
monkeypatch.setattr(pdfjs, 'get_pdfjs_res', get_pdfjs_res) monkeypatch.setattr(pdfjs, 'get_pdfjs_res', get_pdfjs_res)
@pytest.fixture
def download_tmpdir(self):
tdir = downloads.temp_download_manager.get_tmpdir()
yield py.path.local(tdir.name)
tdir.cleanup()
def test_existing_resource(self): def test_existing_resource(self):
"""Test with a resource that exists.""" """Test with a resource that exists."""
_mimetype, data = qutescheme.data_for_url( _mimetype, data = qutescheme.data_for_url(
@ -199,3 +206,16 @@ class TestPDFJSHandler:
assert len(caplog.records) == 1 assert len(caplog.records) == 1
assert (caplog.records[0].message == assert (caplog.records[0].message ==
'pdfjs resource requested but not found: /no/file.html') 'pdfjs resource requested but not found: /no/file.html')
def test_viewer_page(self):
"""Load the /web/viewer.html page."""
_mimetype, data = qutescheme.data_for_url(
QUrl('qute://pdfjs/web/viewer.html?filename=foobar'))
assert b'PDF.js' in data
def test_file(self, download_tmpdir):
"""Load a file via qute://pdfjs/file."""
(download_tmpdir / 'testfile').write_binary(b'foo')
_mimetype, data = qutescheme.data_for_url(
QUrl('qute://pdfjs/file?filename=testfile'))
assert data == b'foo'