Revert "Simplify getting pdfjs main page"

This reverts commit 9c731bde85627308fdde4730b0181a014096cb47.

We need to set some PDF.js options, so we can't just use the default viewer
with ?file=...
This commit is contained in:
Florian Bruhin 2018-09-06 00:52:52 +02:00
parent a0b4f09f02
commit 7206dde19c
3 changed files with 105 additions and 1 deletions

View File

@ -42,6 +42,66 @@ class PDFJSNotFound(Exception):
super().__init__(message)
def generate_pdfjs_page(url):
"""Return the html content of a page that displays url with pdfjs.
Returns a string.
Args:
url: The url of the pdf as QUrl.
"""
if not is_available():
return jinja.render('no_pdfjs.html',
url=url.toDisplayString(),
title="PDF.js not found")
viewer = get_pdfjs_res('web/viewer.html').decode('utf-8')
script = _generate_pdfjs_script(url)
html_page = viewer.replace('</body>',
'</body><script>{}</script>'.format(script))
return html_page
def _generate_pdfjs_script(url):
"""Generate the script that shows the pdf with pdf.js.
Args:
url: The url of the pdf page as QUrl.
"""
return (
'document.addEventListener("DOMContentLoaded", function() {{\n'
' PDFJS.verbosity = PDFJS.VERBOSITY_LEVELS.info;\n'
' (window.PDFView || window.PDFViewerApplication).open("{url}");\n'
'}});\n'
).format(url=javascript.string_escape(url.toString(QUrl.FullyEncoded)))
def fix_urls(asset):
"""Take an html page and replace each relative URL with an absolute.
This is specialized for pdf.js files and not a general purpose function.
Args:
asset: js file or html page as string.
"""
new_urls = [
('viewer.css', 'qute://pdfjs/web/viewer.css'),
('compatibility.js', 'qute://pdfjs/web/compatibility.js'),
('locale/locale.properties',
'qute://pdfjs/web/locale/locale.properties'),
('l10n.js', 'qute://pdfjs/web/l10n.js'),
('../build/pdf.js', 'qute://pdfjs/build/pdf.js'),
('debugger.js', 'qute://pdfjs/web/debugger.js'),
('viewer.js', 'qute://pdfjs/web/viewer.js'),
('compressed.tracemonkey-pldi-09.pdf', ''),
('./images/', 'qute://pdfjs/web/images/'),
('../build/pdf.worker.js', 'qute://pdfjs/build/pdf.worker.js'),
('../web/cmaps/', 'qute://pdfjs/web/cmaps/'),
]
for original, new in new_urls:
asset = asset.replace(original, new)
return asset
SYSTEM_PDFJS_PATHS = [
# Debian pdf.js-common
# Arch Linux pdfjs (AUR)
@ -154,7 +214,7 @@ def should_use_pdfjs(mimetype):
def get_main_url(filename):
"""Get the URL to be opened to view a local PDF."""
url = QUrl('qute://pdfjs/web/viewer.html')
url = QUrl('qute://pdfjs/')
file_url = QUrl('qute://pdfjs/file')
file_url_query = QUrlQuery()

View File

@ -533,6 +533,11 @@ def qute_pdfjs(url):
mimetype = 'application/octet-stream'
return mimetype, data
if url.path() == '/':
filepath = QUrlQuery(url).queryItemValue("file")
data = pdfjs.generate_pdfjs_page(QUrl.fromLocalFile(filepath))
return 'text/html', data
try:
data = pdfjs.get_pdfjs_res(url.path())
except pdfjs.PDFJSNotFound as e:

View File

@ -22,6 +22,45 @@ import pytest
from qutebrowser.browser import pdfjs
# Note that we got double protection, once because we use QUrl.FullyEncoded and
# because we use qutebrowser.utils.javascript.string_escape. Characters
# like " are already replaced by QUrl.
@pytest.mark.parametrize('url, expected', [
('http://foo.bar', "http://foo.bar"),
('http://"', ''),
('\0', '%00'),
('http://foobar/");alert("attack!");',
'http://foobar/%22);alert(%22attack!%22);'),
])
def test_generate_pdfjs_script(url, expected):
expected_open = 'open("{}");'.format(expected)
url = QUrl(url)
actual = pdfjs._generate_pdfjs_script(url)
assert expected_open in actual
assert 'PDFView' in actual
def test_fix_urls():
page = textwrap.dedent("""
<html>
<script src="viewer.js"></script>
<link href="viewer.css">
<script src="unrelated.js"></script>
</html>
""").strip()
expected = textwrap.dedent("""
<html>
<script src="qute://pdfjs/web/viewer.js"></script>
<link href="qute://pdfjs/web/viewer.css">
<script src="unrelated.js"></script>
</html>
""").strip()
actual = pdfjs.fix_urls(page)
assert actual == expected
@pytest.mark.parametrize('path, expected', [
('web/viewer.js', 'viewer.js'),
('build/locale/foo.bar', 'locale/foo.bar'),