2015-11-23 21:33:22 +01:00
|
|
|
# vim: ft=python fileencoding=utf-8 sts=4 sw=4 et:
|
|
|
|
|
|
|
|
# Copyright 2015 Daniel Schadt
|
|
|
|
#
|
|
|
|
# This file is part of qutebrowser.
|
|
|
|
#
|
|
|
|
# qutebrowser is free software: you can redistribute it and/or modify
|
|
|
|
# it under the terms of the GNU General Public License as published by
|
|
|
|
# the Free Software Foundation, either version 3 of the License, or
|
|
|
|
# (at your option) any later version.
|
|
|
|
#
|
|
|
|
# qutebrowser is distributed in the hope that it will be useful,
|
|
|
|
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
|
|
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
|
|
# GNU General Public License for more details.
|
|
|
|
#
|
|
|
|
# You should have received a copy of the GNU General Public License
|
|
|
|
# along with qutebrowser. If not, see <http://www.gnu.org/licenses/>.
|
|
|
|
|
|
|
|
"""pdf.js integration for qutebrowser."""
|
|
|
|
|
2015-11-23 22:10:35 +01:00
|
|
|
import os
|
|
|
|
|
2015-11-24 14:24:20 +01:00
|
|
|
from PyQt5.QtCore import QUrl
|
|
|
|
|
2015-11-23 21:33:22 +01:00
|
|
|
from qutebrowser.browser import webelem
|
|
|
|
from qutebrowser.utils import utils
|
|
|
|
|
|
|
|
|
2015-11-23 22:10:35 +01:00
|
|
|
class PDFJSNotFound(Exception):
|
|
|
|
|
|
|
|
"""Raised when no pdf.js installation is found."""
|
|
|
|
|
|
|
|
pass
|
|
|
|
|
|
|
|
|
2015-11-23 21:33:22 +01:00
|
|
|
def generate_pdfjs_page(url):
|
|
|
|
"""Return the html content of a page that displays url with pdfjs.
|
|
|
|
|
|
|
|
Returns a string.
|
|
|
|
|
|
|
|
Args:
|
|
|
|
url: The url of the pdf as QUrl.
|
|
|
|
"""
|
2015-11-23 22:10:35 +01:00
|
|
|
viewer = get_pdfjs_res('web/viewer.html').decode('utf-8')
|
2015-11-23 21:33:22 +01:00
|
|
|
script = _generate_pdfjs_script(url)
|
|
|
|
html_page = viewer.replace(
|
|
|
|
'</body>', '</body><script>{}</script>'.format(script)
|
|
|
|
)
|
|
|
|
return html_page
|
|
|
|
|
|
|
|
|
|
|
|
def _generate_pdfjs_script(url):
|
|
|
|
"""Generate the script that shows the pdf with pdf.js.
|
|
|
|
|
|
|
|
Args:
|
|
|
|
url: The url of the pdf page as QUrl.
|
|
|
|
"""
|
|
|
|
return (
|
2015-12-20 18:52:08 +01:00
|
|
|
'PDFJS.verbosity = PDFJS.VERBOSITY_LEVELS.info;\n'
|
2015-11-24 14:19:49 +01:00
|
|
|
'PDFView.open("{url}");\n'
|
2015-11-24 14:24:20 +01:00
|
|
|
).format(url=webelem.javascript_escape(url.toString(QUrl.FullyEncoded)))
|
2015-11-23 21:33:22 +01:00
|
|
|
|
|
|
|
|
|
|
|
def fix_urls(asset):
|
2016-02-01 16:43:20 +01:00
|
|
|
"""Take a html page and replace each relative URL with an absolute.
|
2015-11-23 21:33:22 +01:00
|
|
|
|
|
|
|
This is specialized for pdf.js files and not a general purpose function.
|
|
|
|
|
|
|
|
Args:
|
|
|
|
asset: js file or html page as string.
|
|
|
|
"""
|
|
|
|
new_urls = {
|
|
|
|
'viewer.css': 'qute://pdfjs/web/viewer.css',
|
|
|
|
'compatibility.js': 'qute://pdfjs/web/compatibility.js',
|
|
|
|
'locale/locale.properties':
|
|
|
|
'qute://pdfjs/web/locale/locale.properties',
|
|
|
|
'l10n.js': 'qute://pdfjs/web/l10n.js',
|
|
|
|
'../build/pdf.js': 'qute://pdfjs/build/pdf.js',
|
|
|
|
'debugger.js': 'qute://pdfjs/web/debugger.js',
|
|
|
|
'viewer.js': 'qute://pdfjs/web/viewer.js',
|
|
|
|
'compressed.tracemonkey-pldi-09.pdf': '',
|
|
|
|
'./images/': 'qute://pdfjs/web/images/',
|
|
|
|
'../build/pdf.worker.js': 'qute://pdfjs/build/pdf.worker.js',
|
|
|
|
'../web/cmaps/': 'qute://pdfjs/web/cmaps/',
|
|
|
|
}
|
|
|
|
for original, new in new_urls.items():
|
|
|
|
asset = asset.replace(original, new)
|
|
|
|
return asset
|
2015-11-23 22:10:35 +01:00
|
|
|
|
|
|
|
|
|
|
|
SYSTEM_PDFJS_PATHS = [
|
2016-02-01 16:43:20 +01:00
|
|
|
# Debian pdf.js-common
|
|
|
|
# Arch Linux pdfjs (AUR)
|
|
|
|
'/usr/share/pdf.js/',
|
|
|
|
# Debian libjs-pdf
|
|
|
|
'/usr/share/javascript/pdf/',
|
|
|
|
# fallback
|
|
|
|
os.path.expanduser('~/.local/share/qutebrowser/pdfjs/'),
|
2015-11-23 22:10:35 +01:00
|
|
|
]
|
|
|
|
|
|
|
|
|
2016-02-01 17:28:18 +01:00
|
|
|
def get_pdfjs_res_and_path(path):
|
2015-11-23 22:10:35 +01:00
|
|
|
"""Get a pdf.js resource in binary format.
|
|
|
|
|
2016-02-01 17:28:18 +01:00
|
|
|
Returns a (content, path) tuple, where content is the file content and path
|
|
|
|
is the path where the file was found. If path is None, the bundled version
|
|
|
|
was used.
|
|
|
|
|
2015-11-23 22:10:35 +01:00
|
|
|
Args:
|
|
|
|
path: The path inside the pdfjs directory.
|
|
|
|
"""
|
|
|
|
path = path.lstrip('/')
|
|
|
|
content = None
|
2016-02-01 17:28:18 +01:00
|
|
|
file_path = None
|
2015-11-23 22:10:35 +01:00
|
|
|
|
|
|
|
# First try a system wide installation
|
|
|
|
# System installations might strip off the 'build/' or 'web/' prefixes.
|
|
|
|
# qute expects them, so we need to adjust for it.
|
2015-12-08 16:46:18 +01:00
|
|
|
names_to_try = [path, _remove_prefix(path)]
|
2015-11-23 22:10:35 +01:00
|
|
|
for system_path in SYSTEM_PDFJS_PATHS:
|
2016-02-01 17:28:18 +01:00
|
|
|
content, file_path = _read_from_system(system_path, names_to_try)
|
2015-11-23 22:10:35 +01:00
|
|
|
if content is not None:
|
|
|
|
break
|
|
|
|
|
|
|
|
# Fallback to bundled pdf.js
|
|
|
|
if content is None:
|
|
|
|
res_path = '3rdparty/pdfjs/{}'.format(path)
|
|
|
|
try:
|
|
|
|
content = utils.read_file(res_path, binary=True)
|
|
|
|
except FileNotFoundError:
|
|
|
|
raise PDFJSNotFound
|
|
|
|
|
|
|
|
try:
|
|
|
|
# Might be script/html or might be binary
|
|
|
|
text_content = content.decode('utf-8')
|
|
|
|
except UnicodeDecodeError:
|
2016-02-01 17:28:18 +01:00
|
|
|
return (content, file_path)
|
2015-11-23 22:10:35 +01:00
|
|
|
text_content = fix_urls(text_content)
|
2016-02-01 17:28:18 +01:00
|
|
|
return (text_content.encode('utf-8'), file_path)
|
|
|
|
|
|
|
|
|
|
|
|
def get_pdfjs_res(path):
|
|
|
|
"""Get a pdf.js resource in binary format.
|
|
|
|
|
|
|
|
Args:
|
|
|
|
path: The path inside the pdfjs directory.
|
|
|
|
"""
|
|
|
|
content, _path = get_pdfjs_res_and_path(path)
|
|
|
|
return content
|
2015-11-23 22:10:35 +01:00
|
|
|
|
|
|
|
|
2015-12-08 16:46:18 +01:00
|
|
|
def _remove_prefix(path):
|
|
|
|
"""Remove the web/ or build/ prefix of a pdfjs-file-path.
|
|
|
|
|
|
|
|
Args:
|
|
|
|
path: Path as string where the prefix should be stripped off.
|
|
|
|
"""
|
|
|
|
prefixes = {'web/', 'build/'}
|
|
|
|
if any(path.startswith(prefix) for prefix in prefixes):
|
|
|
|
return path.split('/', maxsplit=1)[1]
|
|
|
|
# Return the unchanged path if no prefix is found
|
|
|
|
return path
|
|
|
|
|
|
|
|
|
2015-11-23 22:10:35 +01:00
|
|
|
def _read_from_system(system_path, names):
|
|
|
|
"""Try to read a file with one of the given names in system_path.
|
|
|
|
|
2016-02-01 17:28:18 +01:00
|
|
|
Returns a (content, path) tuple, where the path is the filepath that was
|
|
|
|
used.
|
|
|
|
|
2015-11-23 22:10:35 +01:00
|
|
|
Each file in names is considered equal, the first file that is found
|
|
|
|
is read and its binary content returned.
|
|
|
|
|
2016-02-01 17:28:18 +01:00
|
|
|
Returns (None, None) if no file could be found
|
2015-11-23 22:10:35 +01:00
|
|
|
|
|
|
|
Args:
|
|
|
|
system_path: The folder where the file should be searched.
|
|
|
|
names: List of possible file names.
|
|
|
|
"""
|
|
|
|
for name in names:
|
|
|
|
try:
|
2016-02-01 17:28:18 +01:00
|
|
|
full_path = os.path.join(system_path, name)
|
|
|
|
with open(full_path, 'rb') as f:
|
|
|
|
return (f.read(), full_path)
|
2015-11-23 22:10:35 +01:00
|
|
|
except OSError:
|
|
|
|
continue
|
2016-02-01 17:28:18 +01:00
|
|
|
return (None, None)
|
2015-12-20 20:00:56 +01:00
|
|
|
|
|
|
|
|
|
|
|
def is_available():
|
|
|
|
"""Return true if a pdfjs installation is available."""
|
|
|
|
try:
|
|
|
|
get_pdfjs_res('build/pdf.js')
|
|
|
|
except PDFJSNotFound:
|
|
|
|
return False
|
|
|
|
else:
|
|
|
|
return True
|