Merge branch 'pdfjs' of https://github.com/Kingdread/qutebrowser into Kingdread-pdfjs

This commit is contained in:
Florian Bruhin 2016-01-05 07:21:26 +01:00
commit a97ba9aa09
17 changed files with 538 additions and 5 deletions

1
.eslintignore Normal file
View File

@ -0,0 +1 @@
qutebrowser/3rdparty/pdfjs/*

View File

@ -3,6 +3,7 @@ recursive-include qutebrowser/html *.html
recursive-include qutebrowser/img *.svg *.png
recursive-include qutebrowser/test *.py
recursive-include qutebrowser/javascript *.js
graft qutebrowser/3rdparty
graft icons
graft doc/img
graft misc
@ -27,6 +28,7 @@ exclude qutebrowser.rcc
exclude .coveragerc
exclude .pylintrc
exclude .eslintrc
exclude .eslintignore
exclude doc/help
exclude .appveyor.yml
exclude .travis.yml

View File

@ -272,3 +272,13 @@ GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
pdf.js
------
qutebrowser uses https://github.com/mozilla/pdf.js/[pdf.js] to display
PDF files in the browser.
pdf.js is distributed under the terms of the Apache License. You can
find a copy of the license in `qutebrowser/pdfjs/LICENSE` or online
http://www.apache.org/licenses/LICENSE-2.0.html[here].

View File

@ -31,11 +31,13 @@ Module attributes:
import functools
import configparser
import mimetypes
from PyQt5.QtCore import pyqtSlot, QObject
from PyQt5.QtNetwork import QNetworkReply
import qutebrowser
from qutebrowser.browser import pdfjs
from qutebrowser.browser.network import schemehandler, networkreply
from qutebrowser.utils import (version, utils, jinja, log, message, docutils,
objreg)
@ -93,8 +95,11 @@ class QuteSchemeHandler(schemehandler.SchemeHandler):
return networkreply.ErrorNetworkReply(
request, str(e), QNetworkReply.ContentNotFoundError,
self.parent())
mimetype, _encoding = mimetypes.guess_type(request.url().fileName())
if mimetype is None:
mimetype = 'text/html'
return networkreply.FixedDataNetworkReply(
request, data, 'text/html', self.parent())
request, data, mimetype, self.parent())
class JSBridge(QObject):
@ -201,3 +206,10 @@ def qute_settings(win_id, _request):
win_id=win_id, title='settings', config=configdata,
confget=config_getter)
return html.encode('UTF-8', errors='xmlcharrefreplace')
@add_handler('pdfjs')
def qute_pdfjs(_win_id, request):
"""Handler for qute://pdfjs. Return the pdf.js viewer."""
urlpath = request.url().path()
return pdfjs.get_pdfjs_res(urlpath)

View File

@ -0,0 +1,175 @@
# vim: ft=python fileencoding=utf-8 sts=4 sw=4 et:
# Copyright 2015 Daniel Schadt
#
# This file is part of qutebrowser.
#
# qutebrowser is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# qutebrowser is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with qutebrowser. If not, see <http://www.gnu.org/licenses/>.
"""pdf.js integration for qutebrowser."""
import os
from PyQt5.QtCore import QUrl
from qutebrowser.browser import webelem
from qutebrowser.utils import utils
class PDFJSNotFound(Exception):
"""Raised when no pdf.js installation is found."""
pass
def generate_pdfjs_page(url):
"""Return the html content of a page that displays url with pdfjs.
Returns a string.
Args:
url: The url of the pdf as QUrl.
"""
viewer = get_pdfjs_res('web/viewer.html').decode('utf-8')
script = _generate_pdfjs_script(url)
html_page = viewer.replace(
'</body>', '</body><script>{}</script>'.format(script)
)
return html_page
def _generate_pdfjs_script(url):
"""Generate the script that shows the pdf with pdf.js.
Args:
url: The url of the pdf page as QUrl.
"""
return (
'PDFJS.verbosity = PDFJS.VERBOSITY_LEVELS.info;\n'
'PDFView.open("{url}");\n'
).format(url=webelem.javascript_escape(url.toString(QUrl.FullyEncoded)))
def fix_urls(asset):
"""Take a html page and replace each relative URL wth an absolute.
This is specialized for pdf.js files and not a general purpose function.
Args:
asset: js file or html page as string.
"""
new_urls = {
'viewer.css': 'qute://pdfjs/web/viewer.css',
'compatibility.js': 'qute://pdfjs/web/compatibility.js',
'locale/locale.properties':
'qute://pdfjs/web/locale/locale.properties',
'l10n.js': 'qute://pdfjs/web/l10n.js',
'../build/pdf.js': 'qute://pdfjs/build/pdf.js',
'debugger.js': 'qute://pdfjs/web/debugger.js',
'viewer.js': 'qute://pdfjs/web/viewer.js',
'compressed.tracemonkey-pldi-09.pdf': '',
'./images/': 'qute://pdfjs/web/images/',
'../build/pdf.worker.js': 'qute://pdfjs/build/pdf.worker.js',
'../web/cmaps/': 'qute://pdfjs/web/cmaps/',
}
for original, new in new_urls.items():
asset = asset.replace(original, new)
return asset
SYSTEM_PDFJS_PATHS = [
'/usr/share/pdf.js/', # Debian pdf.js-common
'/usr/share/javascript/pdf/', # Debian libjs-pdf
os.path.expanduser('~/.local/share/qutebrowser/pdfjs/'), # fallback
]
def get_pdfjs_res(path):
"""Get a pdf.js resource in binary format.
Args:
path: The path inside the pdfjs directory.
"""
path = path.lstrip('/')
content = None
# First try a system wide installation
# System installations might strip off the 'build/' or 'web/' prefixes.
# qute expects them, so we need to adjust for it.
names_to_try = [path, _remove_prefix(path)]
for system_path in SYSTEM_PDFJS_PATHS:
content = _read_from_system(system_path, names_to_try)
if content is not None:
break
# Fallback to bundled pdf.js
if content is None:
res_path = '3rdparty/pdfjs/{}'.format(path)
try:
content = utils.read_file(res_path, binary=True)
except FileNotFoundError:
raise PDFJSNotFound
try:
# Might be script/html or might be binary
text_content = content.decode('utf-8')
except UnicodeDecodeError:
return content
text_content = fix_urls(text_content)
return text_content.encode('utf-8')
def _remove_prefix(path):
"""Remove the web/ or build/ prefix of a pdfjs-file-path.
Args:
path: Path as string where the prefix should be stripped off.
"""
prefixes = {'web/', 'build/'}
if any(path.startswith(prefix) for prefix in prefixes):
return path.split('/', maxsplit=1)[1]
# Return the unchanged path if no prefix is found
return path
def _read_from_system(system_path, names):
"""Try to read a file with one of the given names in system_path.
Each file in names is considered equal, the first file that is found
is read and its binary content returned.
Returns None if no file could be found
Args:
system_path: The folder where the file should be searched.
names: List of possible file names.
"""
for name in names:
try:
with open(os.path.join(system_path, name), 'rb') as f:
return f.read()
except OSError:
continue
return None
def is_available():
"""Return true if a pdfjs installation is available."""
try:
get_pdfjs_res('build/pdf.js')
except PDFJSNotFound:
return False
else:
return True

View File

@ -30,7 +30,7 @@ from PyQt5.QtPrintSupport import QPrintDialog
from PyQt5.QtWebKitWidgets import QWebPage
from qutebrowser.config import config
from qutebrowser.browser import http, tabhistory
from qutebrowser.browser import http, tabhistory, pdfjs
from qutebrowser.browser.network import networkmanager
from qutebrowser.utils import (message, usertypes, log, jinja, qtutils, utils,
objreg, debug)
@ -218,6 +218,19 @@ class BrowserPage(QWebPage):
q.deleteLater()
return q.answer
def _show_pdfjs(self, reply):
"""Show the reply with pdfjs."""
try:
page = pdfjs.generate_pdfjs_page(reply.url()).encode('utf-8')
except pdfjs.PDFJSNotFound:
# pylint: disable=no-member
# WORKAROUND for https://bitbucket.org/logilab/pylint/issue/490/
page = (jinja.env.get_template('no_pdfjs.html')
.render(url=reply.url().toDisplayString())
.encode('utf-8'))
self.mainFrame().setContent(page, 'text/html', reply.url())
reply.deleteLater()
def shutdown(self):
"""Prepare the web page for being deleted."""
self._is_shutting_down = True
@ -305,6 +318,10 @@ class BrowserPage(QWebPage):
else:
reply.finished.connect(functools.partial(
self.display_content, reply, 'image/jpeg'))
elif (mimetype in {'application/pdf', 'application/x-pdf'} and
config.get('content', 'enable-pdfjs')):
# Use pdf.js to display the page
self._show_pdfjs(reply)
else:
# Unknown mimetype, so download anyways.
download_manager.fetch(reply,

View File

@ -841,6 +841,11 @@ def data(readonly=False):
"required to exactly match the requested domain.\n\n"
"Local domains are always exempt from hostblocking."),
('enable-pdfjs', SettingValue(typ.Bool(), 'false'),
"Enable pdf.js to view PDF files in the browser.\n\n"
"Note that the files can still be downloaded by clicking"
" the download button in the pdf.js viewer."),
readonly=readonly
)),

View File

@ -0,0 +1,129 @@
{% extends "base.html" %}
{% block style %}
{{ super() }}
* {
margin: 0px 0px;
padding: 0px 0px;
}
body {
font-family: "Helvetica Neue", Helvetica, Arial, sans-serif;
-webkit-text-size-adjust: none;
color: #333333;
background-color: #EEEEEE;
font-size: 1.2em;
}
#error-container {
margin-left: 20px;
margin-right: 20px;
margin-top: 20px;
border: 1px solid #CCCCCC;
box-shadow: 0px 0px 6px rgba(0, 0, 0, 0.20);
border-radius: 5px;
background-color: #FFFFFF;
padding: 20px 20px;
}
#header {
border-bottom: 1px solid #CCC;
}
.qutebrowser-broken {
display: block;
width: 100%;
}
span.warning {
text-weigth: bold;
color: red;
}
td {
margin-top: 20px;
color: #555;
}
h1, h2 {
font-weight: normal;
color: #1e89c6;
margin-bottom: 10px;
}
ul {
margin-left: 20px;
margin-top: 20px;
margin-bottom: 20px;
}
li {
margin-top: 10px;
margin-bottom: 10px;
}
{% endblock %}
{% block content %}
<div id="error-container">
<table>
<tr>
<td style="width: 10%; vertical-align: top;">
<img style="width: 100%; display: block; max-width: 256px;" src="{{ resource_url('img/broken_qutebrowser_logo.png') }}" />
</td>
<td style="padding-left: 40px;">
<h1>No pdf.js installation found</h1>
<p>Error while opening {{ url }}: <br>
<p id="error-message-text" style="color: #a31a1a;">qutebrowser can't find a suitable pdf.js installation</p></p>
<p>It looks like you set <code>content -> enable-pdfjs</code>
to <em>true</em> but qutebrowser can't find the required files.</p>
<br>
<h2>Possible fixes</h2>
<ul>
<li>
Disable <code>content -> enable-pdfjs</code> and reload the page.
You will need to download the pdf-file and open it with an external
tool instead.
</li>
<li>
If you have installed a packaged version of qutebrowser, make sure
the required packages for pdf.js are also installed.
</li>
<li>
If you have installed a pdf.js package and qutebrowser still can't
find it, please send us a report with your system and the package
name, so we can add it to the list of supported packages.
</li>
<li>
If you're running a self-built version or the source version, make
sure you have pdf.js in <code>qutebrowser/3rdparty/pdfjs</code>.
You can use the <code>scripts/dev/update_3rdparty.py</code> script
to download the latest version.
</li>
<li>
You can manually download the pdf.js archive
<a href="https://mozilla.github.io/pdf.js/getting_started/#download">here</a>
and extract it to <code>~/.local/share/qutebrowser/pdfjs</code>
<br>
<span class="warning">Warning:</span> Using this method you are
responsible for yourself to keep the installation updated! If a
vulnerability is found in pdf.js, neither qutebrowser nor your
system's package manager will update your pdf.js installation.
Use it at your own risk!
</li>
</ul>
<p>
If none of these fixes work for you, please send us a bug report so
we can fix the issue.
</p>
</td>
</tr>
</table>
</div>
{% endblock %}

View File

@ -35,6 +35,7 @@ import cx_Freeze as cx # pylint: disable=import-error,useless-suppression
sys.path.insert(0, os.path.join(os.path.dirname(__file__), os.pardir,
os.pardir))
from scripts import setupcommon
from scripts.dev import update_3rdparty
BASEDIR = os.path.join(os.path.dirname(os.path.realpath(__file__)),
@ -65,6 +66,7 @@ def get_build_exe_options(skip_html=False):
('qutebrowser/git-commit-id', 'git-commit-id'),
('qutebrowser/utils/testfile', 'utils/testfile'),
('qutebrowser/html', 'html'),
('qutebrowser/3rdparty/pdfjs', '3rdparty/pdfjs'),
]
if not skip_html:
@ -119,6 +121,8 @@ def main():
'bundle_name': 'qutebrowser',
}
update_3rdparty.main()
try:
setupcommon.write_git_file()
cx.setup(

View File

@ -35,7 +35,7 @@ import httpbin
sys.path.insert(0, os.path.join(os.path.dirname(__file__), os.pardir,
os.pardir))
from scripts import setupcommon
from scripts.dev import freeze
from scripts.dev import freeze, update_3rdparty
@contextlib.contextmanager
@ -72,6 +72,7 @@ def get_build_exe_options():
def main():
"""Main entry point."""
base = 'Win32GUI' if sys.platform.startswith('win') else None
update_3rdparty.main()
with temp_git_commit_file():
cx.setup(
executables=[cx.Executable('scripts/dev/run_frozen_tests.py',

View File

@ -96,12 +96,19 @@ def check_spelling():
'[Ss]tatemachine', '[Mm]etaobject', '[Ll]ogrecord',
'[Ff]iletype'}
# Files which should be ignored, e.g. because they come from another
# package
ignored = [
os.path.join('.', 'scripts', 'dev', 'misc_checks.py'),
os.path.join('.', 'qutebrowser', '3rdparty', 'pdfjs'),
]
seen = collections.defaultdict(list)
try:
ok = True
for fn in _get_files():
with tokenize.open(fn) as f:
if fn == os.path.join('.', 'scripts', 'dev', 'misc_checks.py'):
if any(fn.startswith(i) for i in ignored):
continue
for line in f:
for w in words:

View File

@ -89,6 +89,7 @@ def whitelist_generator():
yield 'qutebrowser.utils.log.VDEBUG'
yield 'qutebrowser.utils.log.QtWarningFilter.filter'
yield 'logging.LogRecord.log_color'
yield 'qutebrowser.browser.pdfjs.is_available'
# vulture doesn't notice the hasattr() and thus thinks netrc_used is unused
# in NetworkManager.on_authentication_required
yield 'PyQt5.QtNetwork.QNetworkReply.netrc_used'

64
scripts/dev/update_3rdparty.py Executable file
View File

@ -0,0 +1,64 @@
#!/usr/bin/env python3
# vim: ft=python fileencoding=utf-8 sts=4 sw=4 et:
# Copyright 2015 Daniel Schadt
#
# This file is part of qutebrowser.
#
# qutebrowser is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# qutebrowser is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with qutebrowser. If not, see <http://www.gnu.org/licenses/>.
"""Update all third-party-modules."""
import urllib.request
import shutil
import json
import os
def get_latest_pdfjs_url():
"""Get the URL of the latest pdf.js prebuilt package.
Returns a (version, url)-tuple."""
github_api = 'https://api.github.com'
endpoint = 'repos/mozilla/pdf.js/releases/latest'
request_url = '{}/{}'.format(github_api, endpoint)
with urllib.request.urlopen(request_url) as fp:
data = json.loads(fp.read().decode('utf-8'))
download_url = data['assets'][0]['browser_download_url']
version_name = data['name']
return (version_name, download_url)
def update_pdfjs():
"""Download and extract the latest pdf.js version."""
version, url = get_latest_pdfjs_url()
target_path = os.path.join('qutebrowser', '3rdparty', 'pdfjs')
print("=> Downloading pdf.js {}".format(version))
(archive_path, _headers) = urllib.request.urlretrieve(url)
if os.path.isdir(target_path):
print("Removing old version in {}".format(target_path))
shutil.rmtree(target_path)
os.makedirs(target_path)
print("Extracting new version")
with open(archive_path, 'rb') as archive:
shutil.unpack_archive(archive, target_path, 'zip')
urllib.request.urlcleanup()
def main():
update_pdfjs()
if __name__ == '__main__':
main()

Binary file not shown.

View File

@ -245,3 +245,17 @@ Feature: Various utility commands.
When I set general -> startpage to http://localhost:(port)/data/numbers/1.txt,http://localhost:(port)/data/numbers/2.txt
And I run :home
Then data/numbers/1.txt should be loaded
# pdfjs support
Scenario: pdfjs is used for pdf files
Given pdfjs is available
When I set content -> enable-pdfjs to true
And I open data/misc/test.pdf
Then the javascript message "PDF * [*] (PDF.js: *)" should be logged
Scenario: pdfjs is not used when disabled
When I set content -> enable-pdfjs to false
And I set storage -> prompt-download-directory to false
And I open data/misc/test.pdf
Then "Download finished" should be logged

View File

@ -23,10 +23,13 @@ import subprocess
import pytest
import pytest_bdd as bdd
bdd.scenarios('misc.feature')
import qutebrowser
from qutebrowser.utils import docutils
from qutebrowser.browser import pdfjs
bdd.scenarios('misc.feature')
@bdd.when("the documentation is up to date")
@ -51,3 +54,9 @@ def update_documentation():
update_script = os.path.join(script_path, 'asciidoc2html.py')
subprocess.call([sys.executable, update_script])
@bdd.given('pdfjs is available')
def pdfjs_available():
if not pdfjs.is_available():
pytest.skip("No pdfjs installation found.")

View File

@ -0,0 +1,82 @@
# vim: ft=python fileencoding=utf-8 sts=4 sw=4 et:
# Copyright 2015 Daniel Schadt
#
# This file is part of qutebrowser.
#
# qutebrowser is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# qutebrowser is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with qutebrowser. If not, see <http://www.gnu.org/licenses/>.
"""Tests for qutebrowser.browser.pdfjs"""
import textwrap
import pytest
from PyQt5.QtCore import QUrl
from qutebrowser.browser import pdfjs
# Note that we got double protection, once because we use QUrl.FullyEncoded and
# because we use qutebrowser.browser.webelem.javascript_escape. Characters
# like " are already replaced by QUrl.
@pytest.mark.parametrize('url, expected', [
('http://foo.bar',
'PDFJS.verbosity = PDFJS.VERBOSITY_LEVELS.info;\n'
'PDFView.open("http://foo.bar");\n'),
('http://"',
'PDFJS.verbosity = PDFJS.VERBOSITY_LEVELS.info;\n'
'PDFView.open("");\n'),
('\0',
'PDFJS.verbosity = PDFJS.VERBOSITY_LEVELS.info;\n'
'PDFView.open("%00");\n'),
('http://foobar/");alert("attack!");',
'PDFJS.verbosity = PDFJS.VERBOSITY_LEVELS.info;\n'
'PDFView.open("http://foobar/%22);alert(%22attack!%22);");\n'),
])
def test_generate_pdfjs_script(url, expected):
url = QUrl(url)
actual = pdfjs._generate_pdfjs_script(url)
assert actual == expected
def test_fix_urls():
page = textwrap.dedent("""
<html>
<script src="viewer.js"></script>
<link href="viewer.css">
<script src="unrelated.js"></script>
</html>
""").strip()
expected = textwrap.dedent("""
<html>
<script src="qute://pdfjs/web/viewer.js"></script>
<link href="qute://pdfjs/web/viewer.css">
<script src="unrelated.js"></script>
</html>
""").strip()
actual = pdfjs.fix_urls(page)
assert actual == expected
@pytest.mark.parametrize('path, expected', [
('web/viewer.js', 'viewer.js'),
('build/locale/foo.bar', 'locale/foo.bar'),
('viewer.js', 'viewer.js'),
('foo/viewer.css', 'foo/viewer.css'),
])
def test_remove_prefix(path, expected):
assert pdfjs._remove_prefix(path) == expected