Simplify and lock down PDF.js filename handling

This commit is contained in:
Florian Bruhin 2018-09-09 17:32:33 +02:00
parent 231c1fbe59
commit b96898db37
5 changed files with 67 additions and 36 deletions

View File

@ -750,7 +750,7 @@ class AbstractDownloadItem(QObject):
if filename is None: # pragma: no cover
log.downloads.error("No filename to open the download!")
return
self.pdfjs_requested.emit(filename)
self.pdfjs_requested.emit(os.path.basename(filename))
def set_target(self, target):
"""Set the target for a given download.
@ -1233,7 +1233,7 @@ class TempDownloadManager:
"directory")
self._tmpdir = None
def _get_tmpdir(self):
def get_tmpdir(self):
"""Return the temporary directory that is used for downloads.
The directory is created lazily on first access.
@ -1259,7 +1259,7 @@ class TempDownloadManager:
Return:
A tempfile.NamedTemporaryFile that should be used to save the file.
"""
tmpdir = self._get_tmpdir()
tmpdir = self.get_tmpdir()
encoding = sys.getfilesystemencoding()
suggested_name = utils.force_encoding(suggested_name, encoding)
# Make sure that the filename is not too long

View File

@ -27,6 +27,7 @@ from PyQt5.QtCore import QUrl, QUrlQuery
from qutebrowser.utils import utils, javascript, jinja, qtutils, usertypes
from qutebrowser.misc import objects
from qutebrowser.config import config
from qutebrowser.browser import downloads
class PDFJSNotFound(Exception):
@ -43,31 +44,37 @@ class PDFJSNotFound(Exception):
super().__init__(message)
def generate_pdfjs_page(url):
"""Return the html content of a page that displays url with pdfjs.
def generate_pdfjs_page(filename, url):
"""Return the html content of a page that displays a file with pdfjs.
Returns a string.
Args:
url: The url of the pdf as QUrl.
filename: The filename of the PDF to open.
url: The URL being opened.
"""
if not is_available():
return jinja.render('no_pdfjs.html',
url=url.toDisplayString(),
title="PDF.js not found")
viewer = get_pdfjs_res('web/viewer.html').decode('utf-8')
script = _generate_pdfjs_script(url)
script = _generate_pdfjs_script(filename)
html_page = viewer.replace('</body>',
'</body><script>{}</script>'.format(script))
return html_page
def _generate_pdfjs_script(url):
def _generate_pdfjs_script(filename):
"""Generate the script that shows the pdf with pdf.js.
Args:
url: The url of the pdf page as QUrl.
filename: The name of the file to open.
"""
url = QUrl('qute://pdfjs/file')
url_query = QUrlQuery()
url_query.addQueryItem('filename', filename)
url.setQuery(url_query)
return jinja.js_environment.from_string("""
document.addEventListener("DOMContentLoaded", function() {
{% if disable_create_object_url %}
@ -199,13 +206,8 @@ def should_use_pdfjs(mimetype):
def get_main_url(filename):
"""Get the URL to be opened to view a local PDF."""
url = QUrl('qute://pdfjs/web/viewer.html')
file_url = QUrl('qute://pdfjs/file')
file_url_query = QUrlQuery()
file_url_query.addQueryItem('filename', filename)
file_url.setQuery(file_url_query)
query = QUrlQuery()
query.addQueryItem('file', file_url.toString())
query.addQueryItem('filename', filename) # read from our JS
query.addQueryItem('file', '') # to avoid pdfjs opening the default PDF
url.setQuery(query)
return url

View File

@ -43,7 +43,7 @@ import pkg_resources
from PyQt5.QtCore import QUrlQuery, QUrl
import qutebrowser
from qutebrowser.browser import pdfjs
from qutebrowser.browser import pdfjs, downloads
from qutebrowser.config import config, configdata, configexc, configdiff
from qutebrowser.utils import (version, utils, jinja, log, message, docutils,
objreg, urlutils)
@ -520,17 +520,27 @@ def qute_pastebin_version(_url):
@add_handler('pdfjs')
def qute_pdfjs(url):
"""Handler for qute://pdfjs. Return the pdf.js viewer."""
# FIXME be more strict about allowed files here
if url.path() == '/file':
filename = QUrlQuery(url).queryItemValue('filename')
with open(filename, 'rb') as f:
if not filename:
raise UrlInvalidError("Missing filename")
if '/' in filename or os.sep in filename:
raise RequestDeniedError("Path separator in filename.")
path = os.path.join(downloads.temp_download_manager.get_tmpdir().name,
filename)
with open(path, 'rb') as f:
data = f.read()
mimetype = utils.guess_mimetype(filename, fallback=True)
return mimetype, data
if url.path() == '/web/viewer.html':
filepath = QUrlQuery(url).queryItemValue("file")
data = pdfjs.generate_pdfjs_page(QUrl.fromLocalFile(filepath))
filename = QUrlQuery(url).queryItemValue("filename")
if not filename:
raise UrlInvalidError("Missing filename")
data = pdfjs.generate_pdfjs_page(filename, url)
return 'text/html', data
try:

View File

@ -30,7 +30,7 @@ from qutebrowser.utils import usertypes
])
def test_generate_pdfjs_page(available, snippet, monkeypatch):
monkeypatch.setattr(pdfjs, 'is_available', lambda: available)
content = pdfjs.generate_pdfjs_page(QUrl('https://example.com/'))
content = pdfjs.generate_pdfjs_page('example.pdf', QUrl())
print(content)
assert snippet in content
@ -38,17 +38,16 @@ def test_generate_pdfjs_page(available, snippet, monkeypatch):
# Note that we got double protection, once because we use QUrl.FullyEncoded and
# because we use qutebrowser.utils.javascript.string_escape. Characters
# like " are already replaced by QUrl.
@pytest.mark.parametrize('url, expected', [
('http://foo.bar', "http://foo.bar"),
('http://"', ''),
('\0', '%00'),
('http://foobar/");alert("attack!");',
'http://foobar/%22);alert(%22attack!%22);'),
@pytest.mark.parametrize('filename, expected', [
('foo.bar', "foo.bar"),
('foo"bar', "foo%22bar"),
('foo\0bar', 'foo%00bar'),
('foobar");alert("attack!");',
'foobar%22);alert(%22attack!%22);'),
])
def test_generate_pdfjs_script(url, expected):
expected_open = 'open("{}");'.format(expected)
url = QUrl(url)
actual = pdfjs._generate_pdfjs_script(url)
def test_generate_pdfjs_script(filename, expected):
expected_open = 'open("qute://pdfjs/file?filename={}");'.format(expected)
actual = pdfjs._generate_pdfjs_script(filename)
assert expected_open in actual
assert 'PDFView' in actual
@ -64,7 +63,7 @@ def test_generate_pdfjs_script_disable_object_url(monkeypatch,
monkeypatch.setattr(pdfjs.qtutils, 'version_check', lambda _v: new_qt)
monkeypatch.setattr(pdfjs.objects, 'backend', backend)
script = pdfjs._generate_pdfjs_script(QUrl('https://example.com/'))
script = pdfjs._generate_pdfjs_script('testfile')
assert ('PDFJS.disableCreateObjectURL' in script) == expected
@ -162,6 +161,6 @@ def test_should_use_pdfjs(mimetype, enabled, expected, config_stub):
def test_get_main_url():
expected = ('qute://pdfjs/web/viewer.html?file='
'qute://pdfjs/file?filename%3Dhello?world.pdf')
expected = ('qute://pdfjs/web/viewer.html?filename='
'hello?world.pdf&file=')
assert pdfjs.get_main_url('hello?world.pdf') == QUrl(expected)

View File

@ -22,10 +22,11 @@ import os
import time
import logging
import py.path
from PyQt5.QtCore import QUrl
import pytest
from qutebrowser.browser import qutescheme, pdfjs
from qutebrowser.browser import qutescheme, pdfjs, downloads
class TestJavascriptHandler:
@ -185,6 +186,12 @@ class TestPDFJSHandler:
monkeypatch.setattr(pdfjs, 'get_pdfjs_res', get_pdfjs_res)
@pytest.fixture
def download_tmpdir(self):
tdir = downloads.temp_download_manager.get_tmpdir()
yield py.path.local(tdir.name)
tdir.cleanup()
def test_existing_resource(self):
"""Test with a resource that exists."""
_mimetype, data = qutescheme.data_for_url(
@ -199,3 +206,16 @@ class TestPDFJSHandler:
assert len(caplog.records) == 1
assert (caplog.records[0].message ==
'pdfjs resource requested but not found: /no/file.html')
def test_viewer_page(self):
"""Load the /web/viewer.html page."""
_mimetype, data = qutescheme.data_for_url(
QUrl('qute://pdfjs/web/viewer.html?filename=foobar'))
assert b'PDF.js' in data
def test_file(self, download_tmpdir):
"""Load a file via qute://pdfjs/file."""
(download_tmpdir / 'testfile').write_binary(b'foo')
_mimetype, data = qutescheme.data_for_url(
QUrl('qute://pdfjs/file?filename=testfile'))
assert data == b'foo'