Merge branch 'download-page' of https://github.com/Kingdread/qutebrowser into Kingdread-download-page
This commit is contained in:
commit
7cddd52b2d
@ -100,6 +100,9 @@ The following software and libraries are required to run qutebrowser:
|
||||
* http://pygments.org/[pygments]
|
||||
* http://pyyaml.org/wiki/PyYAML[PyYAML]
|
||||
|
||||
The following libraries are optional and provide a better user experience:
|
||||
* http://cthedot.de/cssutils/[cssutils]
|
||||
|
||||
To generate the documentation for the `:help` command, when using the git
|
||||
repository (rather than a release), http://asciidoc.org/[asciidoc] is needed.
|
||||
|
||||
|
@ -145,13 +145,19 @@ Close the current window.
|
||||
|
||||
[[download]]
|
||||
=== download
|
||||
Syntax: +:download ['url'] ['dest']+
|
||||
Syntax: +:download [*--mhtml*] [*--dest* 'DEST'] ['url'] ['dest-old']+
|
||||
|
||||
Download a given URL, or current page if no URL given.
|
||||
|
||||
The form `:download [url] [dest]` is deprecated, use `:download --dest [dest] [url]` instead.
|
||||
|
||||
==== positional arguments
|
||||
* +'url'+: The URL to download. If not given, download the current page.
|
||||
* +'dest'+: The file path to write the download to, or not given to ask.
|
||||
* +'dest-old'+: (deprecated) Same as dest.
|
||||
|
||||
==== optional arguments
|
||||
* +*-m*+, +*--mhtml*+: Download the current page and all assets as mhtml file.
|
||||
* +*-d*+, +*--dest*+: The file path to write the download to, or not given to ask.
|
||||
|
||||
[[download-cancel]]
|
||||
=== download-cancel
|
||||
|
@ -37,7 +37,7 @@ import pygments.formatters
|
||||
|
||||
from qutebrowser.commands import userscripts, cmdexc, cmdutils, runners
|
||||
from qutebrowser.config import config, configexc
|
||||
from qutebrowser.browser import webelem, inspector, urlmarks
|
||||
from qutebrowser.browser import webelem, inspector, urlmarks, downloads, mhtml
|
||||
from qutebrowser.keyinput import modeman
|
||||
from qutebrowser.utils import (message, usertypes, log, qtutils, urlutils,
|
||||
objreg, utils)
|
||||
@ -1140,22 +1140,68 @@ class CommandDispatcher:
|
||||
cur.inspector.show()
|
||||
|
||||
@cmdutils.register(instance='command-dispatcher', scope='window')
|
||||
def download(self, url=None, dest=None):
|
||||
def download(self, url=None, dest_old=None, *, mhtml_=False, dest=None):
|
||||
"""Download a given URL, or current page if no URL given.
|
||||
|
||||
The form `:download [url] [dest]` is deprecated, use `:download --dest
|
||||
[dest] [url]` instead.
|
||||
|
||||
Args:
|
||||
url: The URL to download. If not given, download the current page.
|
||||
dest_old: (deprecated) Same as dest.
|
||||
dest: The file path to write the download to, or None to ask.
|
||||
mhtml_: Download the current page and all assets as mhtml file.
|
||||
"""
|
||||
if dest_old is not None:
|
||||
message.warning(
|
||||
self._win_id, ":download [url] [dest] is deprecated - use"
|
||||
" download --dest [dest] [url]")
|
||||
if dest is not None:
|
||||
raise cmdexc.CommandError("Can't give two destinations for the"
|
||||
" download.")
|
||||
dest = dest_old
|
||||
|
||||
download_manager = objreg.get('download-manager', scope='window',
|
||||
window=self._win_id)
|
||||
if url:
|
||||
if mhtml_:
|
||||
raise cmdexc.CommandError("Can only download the current page"
|
||||
" as mhtml.")
|
||||
url = urlutils.qurl_from_user_input(url)
|
||||
urlutils.raise_cmdexc_if_invalid(url)
|
||||
download_manager.get(url, filename=dest)
|
||||
else:
|
||||
page = self._current_widget().page()
|
||||
download_manager.get(self._current_url(), page=page)
|
||||
if mhtml_:
|
||||
self._download_mhtml(dest)
|
||||
else:
|
||||
page = self._current_widget().page()
|
||||
download_manager.get(self._current_url(), page=page,
|
||||
filename=dest)
|
||||
|
||||
def _download_mhtml(self, dest=None):
|
||||
"""Download the current page as a MHTML file, including all assets.
|
||||
|
||||
Args:
|
||||
dest: The file path to write the download to.
|
||||
"""
|
||||
tab_id = self._current_index()
|
||||
if dest is None:
|
||||
suggested_fn = self._current_title() + ".mht"
|
||||
suggested_fn = utils.sanitize_filename(suggested_fn)
|
||||
q = usertypes.Question()
|
||||
q.text = "Save page to: "
|
||||
q.mode = usertypes.PromptMode.text
|
||||
q.completed.connect(q.deleteLater)
|
||||
q.default = downloads.path_suggestion(suggested_fn)
|
||||
q.answered.connect(functools.partial(
|
||||
mhtml.start_download_checked, win_id=self._win_id,
|
||||
tab_id=tab_id))
|
||||
message_bridge = objreg.get("message-bridge", scope="window",
|
||||
window=self._win_id)
|
||||
message_bridge.ask(q, blocking=False)
|
||||
else:
|
||||
mhtml.start_download_checked(dest, win_id=self._win_id,
|
||||
tab_id=tab_id)
|
||||
|
||||
@cmdutils.register(instance='command-dispatcher', scope='window',
|
||||
deprecated="Use :download instead.")
|
||||
|
@ -49,7 +49,7 @@ ModelRole = usertypes.enum('ModelRole', ['item'], start=Qt.UserRole,
|
||||
RetryInfo = collections.namedtuple('RetryInfo', ['request', 'manager'])
|
||||
|
||||
# Remember the last used directory
|
||||
_last_used_directory = None
|
||||
last_used_directory = None
|
||||
|
||||
|
||||
# All REFRESH_INTERVAL milliseconds, speeds will be recalculated and downloads
|
||||
@ -57,20 +57,20 @@ _last_used_directory = None
|
||||
REFRESH_INTERVAL = 500
|
||||
|
||||
|
||||
def _download_dir():
|
||||
def download_dir():
|
||||
"""Get the download directory to use."""
|
||||
directory = config.get('storage', 'download-directory')
|
||||
remember_dir = config.get('storage', 'remember-download-directory')
|
||||
|
||||
if remember_dir and _last_used_directory is not None:
|
||||
return _last_used_directory
|
||||
if remember_dir and last_used_directory is not None:
|
||||
return last_used_directory
|
||||
elif directory is None:
|
||||
return standarddir.download()
|
||||
else:
|
||||
return directory
|
||||
|
||||
|
||||
def _path_suggestion(filename):
|
||||
def path_suggestion(filename):
|
||||
"""Get the suggested file path.
|
||||
|
||||
Args:
|
||||
@ -79,15 +79,36 @@ def _path_suggestion(filename):
|
||||
suggestion = config.get('completion', 'download-path-suggestion')
|
||||
if suggestion == 'path':
|
||||
# add trailing '/' if not present
|
||||
return os.path.join(_download_dir(), '')
|
||||
return os.path.join(download_dir(), '')
|
||||
elif suggestion == 'filename':
|
||||
return filename
|
||||
elif suggestion == 'both':
|
||||
return os.path.join(_download_dir(), filename)
|
||||
return os.path.join(download_dir(), filename)
|
||||
else:
|
||||
raise ValueError("Invalid suggestion value {}!".format(suggestion))
|
||||
|
||||
|
||||
def create_full_filename(basename, filename):
|
||||
"""Create a full filename based on the given basename and filename.
|
||||
|
||||
Args:
|
||||
basename: The basename to use if filename is a directory.
|
||||
filename: The path to a folder or file where you want to save.
|
||||
|
||||
Return:
|
||||
The full absolute path, or None if filename creation was not possible.
|
||||
"""
|
||||
if os.path.isabs(filename) and os.path.isdir(filename):
|
||||
# We got an absolute directory from the user, so we save it under
|
||||
# the default filename in that directory.
|
||||
return os.path.join(filename, basename)
|
||||
elif os.path.isabs(filename):
|
||||
# We got an absolute filename from the user, so we save it under
|
||||
# that filename.
|
||||
return filename
|
||||
return None
|
||||
|
||||
|
||||
class DownloadItemStats(QObject):
|
||||
|
||||
"""Statistics (bytes done, total bytes, time, etc.) about a download.
|
||||
@ -201,6 +222,7 @@ class DownloadItem(QObject):
|
||||
fileobj: The file object to download the file to.
|
||||
reply: The QNetworkReply associated with this download.
|
||||
retry_info: A RetryInfo instance.
|
||||
raw_headers: The headers sent by the server.
|
||||
_filename: The filename of the download.
|
||||
_redirects: How many time we were redirected already.
|
||||
_buffer: A BytesIO object to buffer incoming data until we know the
|
||||
@ -255,6 +277,7 @@ class DownloadItem(QObject):
|
||||
self._filename = None
|
||||
self.init_reply(reply)
|
||||
self._win_id = win_id
|
||||
self.raw_headers = {}
|
||||
|
||||
def __repr__(self):
|
||||
return utils.get_repr(self, basename=self.basename)
|
||||
@ -354,6 +377,7 @@ class DownloadItem(QObject):
|
||||
reply.finished.connect(self.on_reply_finished)
|
||||
reply.error.connect(self.on_reply_error)
|
||||
reply.readyRead.connect(self.on_ready_read)
|
||||
reply.metaDataChanged.connect(self.on_meta_data_changed)
|
||||
self.retry_info = RetryInfo(request=reply.request(),
|
||||
manager=reply.manager())
|
||||
if not self.fileobj:
|
||||
@ -444,7 +468,7 @@ class DownloadItem(QObject):
|
||||
filename: The full filename to save the download to.
|
||||
None: special value to stop the download.
|
||||
"""
|
||||
global _last_used_directory
|
||||
global last_used_directory
|
||||
if self.fileobj is not None:
|
||||
raise ValueError("fileobj was already set! filename: {}, "
|
||||
"existing: {}, fileobj {}".format(
|
||||
@ -454,13 +478,16 @@ class DownloadItem(QObject):
|
||||
# See https://github.com/The-Compiler/qutebrowser/issues/427
|
||||
encoding = sys.getfilesystemencoding()
|
||||
filename = utils.force_encoding(filename, encoding)
|
||||
if not self._create_full_filename(filename):
|
||||
self._filename = create_full_filename(self.basename, filename)
|
||||
if self._filename is None:
|
||||
# We only got a filename (without directory) or a relative path
|
||||
# from the user, so we append that to the default directory and
|
||||
# try again.
|
||||
self._create_full_filename(os.path.join(_download_dir(), filename))
|
||||
self._filename = create_full_filename(
|
||||
self.basename, os.path.join(download_dir(), filename))
|
||||
|
||||
_last_used_directory = os.path.dirname(self._filename)
|
||||
self.basename = os.path.basename(self._filename)
|
||||
last_used_directory = os.path.dirname(self._filename)
|
||||
|
||||
log.downloads.debug("Setting filename to {}".format(filename))
|
||||
if os.path.isfile(self._filename):
|
||||
@ -477,25 +504,6 @@ class DownloadItem(QObject):
|
||||
else:
|
||||
self._create_fileobj()
|
||||
|
||||
def _create_full_filename(self, filename):
|
||||
"""Try to create the full filename.
|
||||
|
||||
Return:
|
||||
True if the full filename was created, False otherwise.
|
||||
"""
|
||||
if os.path.isabs(filename) and os.path.isdir(filename):
|
||||
# We got an absolute directory from the user, so we save it under
|
||||
# the default filename in that directory.
|
||||
self._filename = os.path.join(filename, self.basename)
|
||||
return True
|
||||
elif os.path.isabs(filename):
|
||||
# We got an absolute filename from the user, so we save it under
|
||||
# that filename.
|
||||
self._filename = filename
|
||||
self.basename = os.path.basename(self._filename)
|
||||
return True
|
||||
return False
|
||||
|
||||
def set_fileobj(self, fileobj):
|
||||
""""Set the file object to write the download to.
|
||||
|
||||
@ -593,6 +601,15 @@ class DownloadItem(QObject):
|
||||
if data is not None:
|
||||
self._buffer.write(data)
|
||||
|
||||
@pyqtSlot()
|
||||
def on_meta_data_changed(self):
|
||||
"""Update the download's metadata."""
|
||||
if self.reply is None:
|
||||
return
|
||||
self.raw_headers = {}
|
||||
for key, value in self.reply.rawHeaderPairs():
|
||||
self.raw_headers[bytes(key)] = bytes(value)
|
||||
|
||||
def _handle_redirect(self):
|
||||
"""Handle a HTTP redirect.
|
||||
|
||||
@ -720,7 +737,7 @@ class DownloadManager(QAbstractListModel):
|
||||
prompt_download_directory = config.get(
|
||||
'storage', 'prompt-download-directory')
|
||||
if not prompt_download_directory and not fileobj:
|
||||
filename = _download_dir()
|
||||
filename = download_dir()
|
||||
|
||||
if fileobj is not None or filename is not None:
|
||||
return self.fetch_request(request,
|
||||
@ -735,7 +752,7 @@ class DownloadManager(QAbstractListModel):
|
||||
suggested_fn = utils.force_encoding(suggested_fn, encoding)
|
||||
|
||||
q = self._prepare_question()
|
||||
q.default = _path_suggestion(suggested_fn)
|
||||
q.default = path_suggestion(suggested_fn)
|
||||
message_bridge = objreg.get('message-bridge', scope='window',
|
||||
window=self._win_id)
|
||||
q.answered.connect(
|
||||
@ -820,7 +837,7 @@ class DownloadManager(QAbstractListModel):
|
||||
prompt_download_directory = config.get('storage',
|
||||
'prompt-download-directory')
|
||||
if not prompt_download_directory and not fileobj:
|
||||
filename = _download_dir()
|
||||
filename = download_dir()
|
||||
|
||||
if filename is not None:
|
||||
download.set_filename(filename)
|
||||
@ -829,7 +846,7 @@ class DownloadManager(QAbstractListModel):
|
||||
download.autoclose = False
|
||||
else:
|
||||
q = self._prepare_question()
|
||||
q.default = _path_suggestion(suggested_filename)
|
||||
q.default = path_suggestion(suggested_filename)
|
||||
q.answered.connect(download.set_filename)
|
||||
q.cancelled.connect(download.cancel)
|
||||
download.cancelled.connect(q.abort)
|
||||
|
511
qutebrowser/browser/mhtml.py
Normal file
511
qutebrowser/browser/mhtml.py
Normal file
@ -0,0 +1,511 @@
|
||||
# vim: ft=python fileencoding=utf-8 sts=4 sw=4 et:
|
||||
|
||||
# Copyright 2015 Daniel Schadt
|
||||
#
|
||||
# This file is part of qutebrowser.
|
||||
#
|
||||
# qutebrowser is free software: you can redistribute it and/or modify
|
||||
# it under the terms of the GNU General Public License as published by
|
||||
# the Free Software Foundation, either version 3 of the License, or
|
||||
# (at your option) any later version.
|
||||
#
|
||||
# qutebrowser is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
# GNU General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the GNU General Public License
|
||||
# along with qutebrowser. If not, see <http://www.gnu.org/licenses/>.
|
||||
|
||||
"""Utils for writing a MHTML file."""
|
||||
|
||||
import functools
|
||||
import io
|
||||
import os
|
||||
import re
|
||||
import sys
|
||||
import collections
|
||||
import uuid
|
||||
import email.policy
|
||||
import email.generator
|
||||
import email.encoders
|
||||
import email.mime.multipart
|
||||
|
||||
from PyQt5.QtCore import QUrl
|
||||
|
||||
from qutebrowser.browser import webelem, downloads
|
||||
from qutebrowser.utils import log, objreg, message, usertypes, utils, urlutils
|
||||
|
||||
try:
|
||||
import cssutils
|
||||
except (ImportError, re.error):
|
||||
# Catching re.error because cssutils in earlier releases (<= 1.0) is broken
|
||||
# on Python 3.5
|
||||
# See https://bitbucket.org/cthedot/cssutils/issues/52
|
||||
cssutils = None
|
||||
|
||||
_File = collections.namedtuple('_File',
|
||||
['content', 'content_type', 'content_location',
|
||||
'transfer_encoding'])
|
||||
|
||||
|
||||
_CSS_URL_PATTERNS = [re.compile(x) for x in [
|
||||
r"@import\s+'(?P<url>[^']+)'",
|
||||
r'@import\s+"(?P<url>[^"]+)"',
|
||||
r'''url\((?P<url>[^'"][^)]*)\)''',
|
||||
r'url\("(?P<url>[^"]+)"\)',
|
||||
r"url\('(?P<url>[^']+)'\)",
|
||||
]]
|
||||
|
||||
|
||||
def _get_css_imports_regex(data):
|
||||
"""Return all assets that are referenced in the given CSS document.
|
||||
|
||||
The returned URLs are relative to the stylesheet's URL.
|
||||
|
||||
Args:
|
||||
data: The content of the stylesheet to scan as string.
|
||||
"""
|
||||
urls = []
|
||||
for pattern in _CSS_URL_PATTERNS:
|
||||
for match in pattern.finditer(data):
|
||||
url = match.group("url")
|
||||
if url:
|
||||
urls.append(url)
|
||||
return urls
|
||||
|
||||
|
||||
def _get_css_imports_cssutils(data, inline=False):
|
||||
"""Return all assets that are referenced in the given CSS document.
|
||||
|
||||
The returned URLs are relative to the stylesheet's URL.
|
||||
|
||||
Args:
|
||||
data: The content of the stylesheet to scan as string.
|
||||
inline: True if the argument is a inline HTML style attribute.
|
||||
"""
|
||||
# We don't care about invalid CSS data, this will only litter the log
|
||||
# output with CSS errors
|
||||
parser = cssutils.CSSParser(loglevel=100,
|
||||
fetcher=lambda url: (None, ""), validate=False)
|
||||
if not inline:
|
||||
sheet = parser.parseString(data)
|
||||
return list(cssutils.getUrls(sheet))
|
||||
else:
|
||||
urls = []
|
||||
declaration = parser.parseStyle(data)
|
||||
# prop = background, color, margin, ...
|
||||
for prop in declaration:
|
||||
# value = red, 10px, url(foobar), ...
|
||||
for value in prop.propertyValue:
|
||||
if isinstance(value, cssutils.css.URIValue):
|
||||
if value.uri:
|
||||
urls.append(value.uri)
|
||||
return urls
|
||||
|
||||
|
||||
def _get_css_imports(data, inline=False):
|
||||
"""Return all assets that are referenced in the given CSS document.
|
||||
|
||||
The returned URLs are relative to the stylesheet's URL.
|
||||
|
||||
Args:
|
||||
data: The content of the stylesheet to scan as string.
|
||||
inline: True if the argument is a inline HTML style attribute.
|
||||
"""
|
||||
if cssutils is None:
|
||||
return _get_css_imports_regex(data)
|
||||
else:
|
||||
return _get_css_imports_cssutils(data, inline)
|
||||
|
||||
|
||||
def _check_rel(element):
|
||||
"""Return true if the element's rel attribute fits our criteria.
|
||||
|
||||
rel has to contain 'stylesheet' or 'icon'. Also returns True if the rel
|
||||
attribute is unset.
|
||||
|
||||
Args:
|
||||
element: The WebElementWrapper which should be checked.
|
||||
"""
|
||||
if 'rel' not in element:
|
||||
return True
|
||||
must_have = {'stylesheet', 'icon'}
|
||||
rels = [rel.lower() for rel in element['rel'].split(' ')]
|
||||
return any(rel in rels for rel in must_have)
|
||||
|
||||
|
||||
MHTMLPolicy = email.policy.default.clone(linesep='\r\n', max_line_length=0)
|
||||
|
||||
|
||||
# Encode the file using base64 encoding.
|
||||
E_BASE64 = email.encoders.encode_base64
|
||||
|
||||
|
||||
# Encode the file using MIME quoted-printable encoding.
|
||||
E_QUOPRI = email.encoders.encode_quopri
|
||||
|
||||
|
||||
class MHTMLWriter():
|
||||
|
||||
"""A class for outputting multiple files to a MHTML document.
|
||||
|
||||
Attributes:
|
||||
root_content: The root content as bytes.
|
||||
content_location: The url of the page as str.
|
||||
content_type: The MIME-type of the root content as str.
|
||||
_files: Mapping of location->_File namedtuple.
|
||||
"""
|
||||
|
||||
def __init__(self, root_content, content_location, content_type):
|
||||
self.root_content = root_content
|
||||
self.content_location = content_location
|
||||
self.content_type = content_type
|
||||
self._files = {}
|
||||
|
||||
def add_file(self, location, content, content_type=None,
|
||||
transfer_encoding=E_QUOPRI):
|
||||
"""Add a file to the given MHTML collection.
|
||||
|
||||
Args:
|
||||
location: The original location (URL) of the file.
|
||||
content: The binary content of the file.
|
||||
content_type: The MIME-type of the content (if available)
|
||||
transfer_encoding: The transfer encoding to use for this file.
|
||||
"""
|
||||
self._files[location] = _File(
|
||||
content=content, content_type=content_type,
|
||||
content_location=location, transfer_encoding=transfer_encoding,
|
||||
)
|
||||
|
||||
def write_to(self, fp):
|
||||
"""Output the MHTML file to the given file-like object.
|
||||
|
||||
Args:
|
||||
fp: The file-object, opened in "wb" mode.
|
||||
"""
|
||||
msg = email.mime.multipart.MIMEMultipart(
|
||||
'related', '---=_qute-{}'.format(uuid.uuid4()))
|
||||
|
||||
root = self._create_root_file()
|
||||
msg.attach(root)
|
||||
|
||||
for _, file_data in sorted(self._files.items()):
|
||||
msg.attach(self._create_file(file_data))
|
||||
|
||||
gen = email.generator.BytesGenerator(fp, policy=MHTMLPolicy)
|
||||
gen.flatten(msg)
|
||||
|
||||
def _create_root_file(self):
|
||||
"""Return the root document as MIMEMultipart."""
|
||||
root_file = _File(
|
||||
content=self.root_content, content_type=self.content_type,
|
||||
content_location=self.content_location, transfer_encoding=E_QUOPRI,
|
||||
)
|
||||
return self._create_file(root_file)
|
||||
|
||||
def _create_file(self, f):
|
||||
"""Return the single given file as MIMEMultipart."""
|
||||
msg = email.mime.multipart.MIMEMultipart()
|
||||
msg['Content-Location'] = f.content_location
|
||||
# Get rid of the default type multipart/mixed
|
||||
del msg['Content-Type']
|
||||
if f.content_type:
|
||||
msg.set_type(f.content_type)
|
||||
msg.set_payload(f.content)
|
||||
f.transfer_encoding(msg)
|
||||
return msg
|
||||
|
||||
|
||||
class _Downloader():
|
||||
|
||||
"""A class to download whole websites.
|
||||
|
||||
Attributes:
|
||||
web_view: The QWebView which contains the website that will be saved.
|
||||
dest: Destination filename.
|
||||
writer: The MHTMLWriter object which is used to save the page.
|
||||
loaded_urls: A set of QUrls of finished asset downloads.
|
||||
pending_downloads: A set of unfinished (url, DownloadItem) tuples.
|
||||
_finished: A flag indicating if the file has already been written.
|
||||
_used: A flag indicating if the downloader has already been used.
|
||||
"""
|
||||
|
||||
def __init__(self, web_view, dest):
|
||||
self.web_view = web_view
|
||||
self.dest = dest
|
||||
self.writer = None
|
||||
self.loaded_urls = {web_view.url()}
|
||||
self.pending_downloads = set()
|
||||
self._finished = False
|
||||
self._used = False
|
||||
|
||||
def run(self):
|
||||
"""Download and save the page.
|
||||
|
||||
The object must not be reused, you should create a new one if
|
||||
you want to download another page.
|
||||
"""
|
||||
if self._used:
|
||||
raise ValueError("Downloader already used")
|
||||
self._used = True
|
||||
web_url = self.web_view.url()
|
||||
web_frame = self.web_view.page().mainFrame()
|
||||
|
||||
self.writer = MHTMLWriter(
|
||||
web_frame.toHtml().encode('utf-8'),
|
||||
content_location=urlutils.encoded_url(web_url),
|
||||
# I've found no way of getting the content type of a QWebView, but
|
||||
# since we're using .toHtml, it's probably safe to say that the
|
||||
# content-type is HTML
|
||||
content_type='text/html; charset="UTF-8"',
|
||||
)
|
||||
# Currently only downloading <link> (stylesheets), <script>
|
||||
# (javascript) and <img> (image) elements.
|
||||
elements = web_frame.findAllElements('link, script, img')
|
||||
|
||||
for element in elements:
|
||||
element = webelem.WebElementWrapper(element)
|
||||
# Websites are free to set whatever rel=... attribute they want.
|
||||
# We just care about stylesheets and icons.
|
||||
if not _check_rel(element):
|
||||
continue
|
||||
if 'src' in element:
|
||||
element_url = element['src']
|
||||
elif 'href' in element:
|
||||
element_url = element['href']
|
||||
else:
|
||||
# Might be a local <script> tag or something else
|
||||
continue
|
||||
absolute_url = web_url.resolved(QUrl(element_url))
|
||||
self.fetch_url(absolute_url)
|
||||
|
||||
styles = web_frame.findAllElements('style')
|
||||
for style in styles:
|
||||
style = webelem.WebElementWrapper(style)
|
||||
if 'type' in style and style['type'] != 'text/css':
|
||||
continue
|
||||
for element_url in _get_css_imports(str(style)):
|
||||
self.fetch_url(web_url.resolved(QUrl(element_url)))
|
||||
|
||||
# Search for references in inline styles
|
||||
for element in web_frame.findAllElements('[style]'):
|
||||
element = webelem.WebElementWrapper(element)
|
||||
style = element['style']
|
||||
for element_url in _get_css_imports(style, inline=True):
|
||||
self.fetch_url(web_url.resolved(QUrl(element_url)))
|
||||
|
||||
# Shortcut if no assets need to be downloaded, otherwise the file would
|
||||
# never be saved. Also might happen if the downloads are fast enough to
|
||||
# complete before connecting their finished signal.
|
||||
self.collect_zombies()
|
||||
if not self.pending_downloads and not self._finished:
|
||||
self.finish_file()
|
||||
|
||||
def fetch_url(self, url):
|
||||
"""Download the given url and add the file to the collection.
|
||||
|
||||
Args:
|
||||
url: The file to download as QUrl.
|
||||
"""
|
||||
if url.scheme() not in {'http', 'https'}:
|
||||
return
|
||||
# Prevent loading an asset twice
|
||||
if url in self.loaded_urls:
|
||||
return
|
||||
self.loaded_urls.add(url)
|
||||
|
||||
log.downloads.debug("loading asset at %s", url)
|
||||
|
||||
# Using the download manager to download host-blocked urls might crash
|
||||
# qute, see the comments/discussion on
|
||||
# https://github.com/The-Compiler/qutebrowser/pull/962#discussion_r40256987
|
||||
# and https://github.com/The-Compiler/qutebrowser/issues/1053
|
||||
host_blocker = objreg.get('host-blocker')
|
||||
if host_blocker.is_blocked(url):
|
||||
log.downloads.debug("Skipping %s, host-blocked", url)
|
||||
# We still need an empty file in the output, QWebView can be pretty
|
||||
# picky about displaying a file correctly when not all assets are
|
||||
# at least referenced in the mhtml file.
|
||||
self.writer.add_file(urlutils.encoded_url(url), b'')
|
||||
return
|
||||
|
||||
download_manager = objreg.get('download-manager', scope='window',
|
||||
window='current')
|
||||
item = download_manager.get(url, fileobj=_NoCloseBytesIO(),
|
||||
auto_remove=True)
|
||||
self.pending_downloads.add((url, item))
|
||||
item.finished.connect(
|
||||
functools.partial(self.finished, url, item))
|
||||
item.error.connect(
|
||||
functools.partial(self.error, url, item))
|
||||
item.cancelled.connect(
|
||||
functools.partial(self.error, url, item))
|
||||
|
||||
def finished(self, url, item):
|
||||
"""Callback when a single asset is downloaded.
|
||||
|
||||
Args:
|
||||
url: The original url of the asset as QUrl.
|
||||
item: The DownloadItem given by the DownloadManager
|
||||
"""
|
||||
self.pending_downloads.remove((url, item))
|
||||
mime = item.raw_headers.get(b'Content-Type', b'')
|
||||
|
||||
# Note that this decoding always works and doesn't produce errors
|
||||
# RFC 7230 (https://tools.ietf.org/html/rfc7230) states:
|
||||
# Historically, HTTP has allowed field content with text in the
|
||||
# ISO-8859-1 charset [ISO-8859-1], supporting other charsets only
|
||||
# through use of [RFC2047] encoding. In practice, most HTTP header
|
||||
# field values use only a subset of the US-ASCII charset [USASCII].
|
||||
# Newly defined header fields SHOULD limit their field values to
|
||||
# US-ASCII octets. A recipient SHOULD treat other octets in field
|
||||
# content (obs-text) as opaque data.
|
||||
mime = mime.decode('iso-8859-1')
|
||||
|
||||
if mime.lower() == 'text/css':
|
||||
# We can't always assume that CSS files are UTF-8, but CSS files
|
||||
# shouldn't contain many non-ASCII characters anyway (in most
|
||||
# cases). Using "ignore" lets us decode the file even if it's
|
||||
# invalid UTF-8 data.
|
||||
# The file written to the MHTML file won't be modified by this
|
||||
# decoding, since there we're taking the original bytestream.
|
||||
try:
|
||||
css_string = item.fileobj.getvalue().decode('utf-8')
|
||||
except UnicodeDecodeError:
|
||||
log.downloads.warning("Invalid UTF-8 data in %s", url)
|
||||
css_string = item.fileobj.getvalue().decode('utf-8', 'ignore')
|
||||
import_urls = _get_css_imports(css_string)
|
||||
for import_url in import_urls:
|
||||
absolute_url = url.resolved(QUrl(import_url))
|
||||
self.fetch_url(absolute_url)
|
||||
|
||||
encode = E_QUOPRI if mime.startswith('text/') else E_BASE64
|
||||
# Our MHTML handler refuses non-ASCII headers. This will replace every
|
||||
# non-ASCII char with '?'. This is probably okay, as official Content-
|
||||
# Type headers contain ASCII only anyway. Anything else is madness.
|
||||
mime = utils.force_encoding(mime, 'ascii')
|
||||
self.writer.add_file(urlutils.encoded_url(url),
|
||||
item.fileobj.getvalue(), mime, encode)
|
||||
item.fileobj.actual_close()
|
||||
if self.pending_downloads:
|
||||
return
|
||||
self.finish_file()
|
||||
|
||||
def error(self, url, item, *_args):
|
||||
"""Callback when a download error occurred.
|
||||
|
||||
Args:
|
||||
url: The orignal url of the asset as QUrl.
|
||||
item: The DownloadItem given by the DownloadManager.
|
||||
"""
|
||||
try:
|
||||
self.pending_downloads.remove((url, item))
|
||||
except KeyError:
|
||||
# This might happen if .collect_zombies() calls .finished() and the
|
||||
# error handler will be called after .collect_zombies
|
||||
log.downloads.debug("Oops! Download already gone: %s", item)
|
||||
return
|
||||
item.fileobj.actual_close()
|
||||
# Add a stub file, see comment in .fetch_url() for more information
|
||||
self.writer.add_file(urlutils.encoded_url(url), b'')
|
||||
if self.pending_downloads:
|
||||
return
|
||||
self.finish_file()
|
||||
|
||||
def finish_file(self):
|
||||
"""Save the file to the filename given in __init__."""
|
||||
if self._finished:
|
||||
log.downloads.debug("finish_file called twice, ignored!")
|
||||
return
|
||||
self._finished = True
|
||||
log.downloads.debug("All assets downloaded, ready to finish off!")
|
||||
with open(self.dest, 'wb') as file_output:
|
||||
self.writer.write_to(file_output)
|
||||
message.info('current', "Page saved as {}".format(self.dest))
|
||||
|
||||
def collect_zombies(self):
|
||||
"""Collect done downloads and add their data to the MHTML file.
|
||||
|
||||
This is needed if a download finishes before attaching its
|
||||
finished signal.
|
||||
"""
|
||||
items = set((url, item) for url, item in self.pending_downloads
|
||||
if item.done)
|
||||
log.downloads.debug("Zombie downloads: %s", items)
|
||||
for url, item in items:
|
||||
self.finished(url, item)
|
||||
|
||||
|
||||
class _NoCloseBytesIO(io.BytesIO): # pylint: disable=no-init
|
||||
|
||||
"""BytesIO that can't be .closed().
|
||||
|
||||
This is needed to prevent the DownloadManager from closing the stream, thus
|
||||
discarding the data.
|
||||
"""
|
||||
|
||||
def close(self):
|
||||
"""Do nothing."""
|
||||
pass
|
||||
|
||||
def actual_close(self):
|
||||
"""Close the stream."""
|
||||
super().close()
|
||||
|
||||
|
||||
def _start_download(dest, win_id, tab_id):
|
||||
"""Start downloading the current page and all assets to a MHTML file.
|
||||
|
||||
This will overwrite dest if it already exists.
|
||||
|
||||
Args:
|
||||
dest: The filename where the resulting file should be saved.
|
||||
win_id, tab_id: Specify the tab whose page should be loaded.
|
||||
"""
|
||||
web_view = objreg.get('webview', scope='tab', window=win_id, tab=tab_id)
|
||||
loader = _Downloader(web_view, dest)
|
||||
loader.run()
|
||||
|
||||
|
||||
def start_download_checked(dest, win_id, tab_id):
|
||||
"""First check if dest is already a file, then start the download.
|
||||
|
||||
Args:
|
||||
dest: The filename where the resulting file should be saved.
|
||||
win_id, tab_id: Specify the tab whose page should be loaded.
|
||||
"""
|
||||
# The default name is 'page title.mht'
|
||||
title = (objreg.get('webview', scope='tab', window=win_id, tab=tab_id)
|
||||
.title())
|
||||
default_name = utils.sanitize_filename(title + '.mht')
|
||||
|
||||
# Remove characters which cannot be expressed in the file system encoding
|
||||
encoding = sys.getfilesystemencoding()
|
||||
default_name = utils.force_encoding(default_name, encoding)
|
||||
dest = utils.force_encoding(dest, encoding)
|
||||
|
||||
dest = os.path.expanduser(dest)
|
||||
|
||||
# See if we already have an absolute path
|
||||
path = downloads.create_full_filename(default_name, dest)
|
||||
if path is None:
|
||||
# We still only have a relative path, prepend download_dir and
|
||||
# try again.
|
||||
path = downloads.create_full_filename(
|
||||
default_name, os.path.join(downloads.download_dir(), dest))
|
||||
downloads.last_used_directory = os.path.dirname(path)
|
||||
|
||||
if not os.path.isfile(path):
|
||||
_start_download(path, win_id=win_id, tab_id=tab_id)
|
||||
return
|
||||
|
||||
q = usertypes.Question()
|
||||
q.mode = usertypes.PromptMode.yesno
|
||||
q.text = "{} exists. Overwrite?".format(path)
|
||||
q.completed.connect(q.deleteLater)
|
||||
q.answered_yes.connect(functools.partial(
|
||||
_start_download, path, win_id=win_id, tab_id=tab_id))
|
||||
message_bridge = objreg.get('message-bridge', scope='window',
|
||||
window=win_id)
|
||||
message_bridge.ask(q, blocking=False)
|
@ -438,6 +438,15 @@ def same_domain(url1, url2):
|
||||
return domain1 == domain2
|
||||
|
||||
|
||||
def encoded_url(url):
|
||||
"""Return the fully encoded url as string.
|
||||
|
||||
Args:
|
||||
url: The url to encode as QUrl.
|
||||
"""
|
||||
return bytes(url.toEncoded()).decode('ascii')
|
||||
|
||||
|
||||
class IncDecError(Exception):
|
||||
|
||||
"""Exception raised by incdec_number on problems.
|
||||
|
@ -611,6 +611,27 @@ def force_encoding(text, encoding):
|
||||
return text.encode(encoding, errors='replace').decode(encoding)
|
||||
|
||||
|
||||
def sanitize_filename(name, replacement='_'):
|
||||
"""Replace invalid filename characters.
|
||||
|
||||
Note: This should be used for the basename, as it also removes the path
|
||||
separator.
|
||||
|
||||
Args:
|
||||
name: The filename.
|
||||
replacement: The replacement character (or None).
|
||||
"""
|
||||
if replacement is None:
|
||||
replacement = ''
|
||||
# Bad characters taken from Windows, there are even fewer on Linux
|
||||
# See also
|
||||
# https://en.wikipedia.org/wiki/Filename#Reserved_characters_and_words
|
||||
bad_chars = '\\/:*?"<>|'
|
||||
for bad_char in bad_chars:
|
||||
name = name.replace(bad_char, replacement)
|
||||
return name
|
||||
|
||||
|
||||
def newest_slice(iterable, count):
|
||||
"""Get an iterable for the n newest items of the given iterable.
|
||||
|
||||
|
@ -133,6 +133,7 @@ def _module_versions():
|
||||
('jinja2', ['__version__']),
|
||||
('pygments', ['__version__']),
|
||||
('yaml', ['__version__']),
|
||||
('cssutils', ['__version__']),
|
||||
])
|
||||
for name, attributes in modules.items():
|
||||
try:
|
||||
|
@ -5,3 +5,4 @@ pyPEG2==2.15.2
|
||||
PyYAML==3.11
|
||||
colorama==0.3.3
|
||||
colorlog==2.6.0
|
||||
cssutils==1.0.1
|
||||
|
@ -80,6 +80,7 @@ def whitelist_generator():
|
||||
# https://bitbucket.org/jendrikseipp/vulture/issues/10/
|
||||
yield 'qutebrowser.misc.utilcmds.pyeval_output'
|
||||
yield 'utils.use_color'
|
||||
yield 'qutebrowser.browser.mhtml.last_used_directory'
|
||||
|
||||
# Other false-positives
|
||||
yield ('qutebrowser.completion.models.sortfilter.CompletionFilterModel().'
|
||||
|
277
tests/unit/browser/test_mhtml.py
Normal file
277
tests/unit/browser/test_mhtml.py
Normal file
@ -0,0 +1,277 @@
|
||||
# vim: ft=python fileencoding=utf-8 sts=4 sw=4 et:
|
||||
import io
|
||||
import textwrap
|
||||
import re
|
||||
import pytest
|
||||
|
||||
from qutebrowser.browser import mhtml
|
||||
|
||||
@pytest.fixture(autouse=True)
|
||||
def patch_uuid(monkeypatch):
|
||||
monkeypatch.setattr("uuid.uuid4", lambda: "UUID")
|
||||
|
||||
|
||||
class Checker:
|
||||
|
||||
"""A helper to check mhtml output.
|
||||
|
||||
Attrs:
|
||||
fp: A BytesIO object for passing to MHTMLWriter.write_to.
|
||||
"""
|
||||
|
||||
def __init__(self):
|
||||
self.fp = io.BytesIO()
|
||||
|
||||
@property
|
||||
def value(self):
|
||||
return self.fp.getvalue()
|
||||
|
||||
def expect(self, expected):
|
||||
actual = self.value.decode('ascii')
|
||||
# Make sure there are no stray \r or \n
|
||||
assert re.search(r'\r[^\n]', actual) is None
|
||||
assert re.search(r'[^\r]\n', actual) is None
|
||||
actual = actual.replace('\r\n', '\n')
|
||||
expected = textwrap.dedent(expected).lstrip('\n')
|
||||
assert expected == actual
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def checker():
|
||||
return Checker()
|
||||
|
||||
|
||||
def test_quoted_printable_umlauts(checker):
|
||||
content = 'Die süße Hündin läuft in die Höhle des Bären'
|
||||
content = content.encode('iso-8859-1')
|
||||
writer = mhtml.MHTMLWriter(root_content=content,
|
||||
content_location='localhost',
|
||||
content_type='text/plain')
|
||||
writer.write_to(checker.fp)
|
||||
checker.expect("""
|
||||
Content-Type: multipart/related; boundary="---=_qute-UUID"
|
||||
MIME-Version: 1.0
|
||||
|
||||
-----=_qute-UUID
|
||||
Content-Location: localhost
|
||||
MIME-Version: 1.0
|
||||
Content-Type: text/plain
|
||||
Content-Transfer-Encoding: quoted-printable
|
||||
|
||||
Die=20s=FC=DFe=20H=FCndin=20l=E4uft=20in=20die=20H=F6hle=20des=20B=E4ren
|
||||
-----=_qute-UUID--
|
||||
""")
|
||||
|
||||
|
||||
@pytest.mark.parametrize('header, value', [
|
||||
('content_location', 'http://brötli.com'),
|
||||
('content_type', 'text/pläin'),
|
||||
])
|
||||
def test_refuses_non_ascii_header_value(checker, header, value):
|
||||
defaults = {
|
||||
'root_content': b'',
|
||||
'content_location': 'http://example.com',
|
||||
'content_type': 'text/plain',
|
||||
}
|
||||
defaults[header] = value
|
||||
writer = mhtml.MHTMLWriter(**defaults)
|
||||
with pytest.raises(UnicodeEncodeError) as excinfo:
|
||||
writer.write_to(checker.fp)
|
||||
assert "'ascii' codec can't encode" in str(excinfo.value)
|
||||
|
||||
|
||||
def test_file_encoded_as_base64(checker):
|
||||
content = b'Image file attached'
|
||||
writer = mhtml.MHTMLWriter(root_content=content, content_type='text/plain',
|
||||
content_location='http://example.com')
|
||||
writer.add_file(location='http://a.example.com/image.png',
|
||||
content='\U0001F601 image data'.encode('utf-8'),
|
||||
content_type='image/png',
|
||||
transfer_encoding=mhtml.E_BASE64)
|
||||
writer.write_to(checker.fp)
|
||||
checker.expect("""
|
||||
Content-Type: multipart/related; boundary="---=_qute-UUID"
|
||||
MIME-Version: 1.0
|
||||
|
||||
-----=_qute-UUID
|
||||
Content-Location: http://example.com
|
||||
MIME-Version: 1.0
|
||||
Content-Type: text/plain
|
||||
Content-Transfer-Encoding: quoted-printable
|
||||
|
||||
Image=20file=20attached
|
||||
-----=_qute-UUID
|
||||
Content-Location: http://a.example.com/image.png
|
||||
MIME-Version: 1.0
|
||||
Content-Type: image/png
|
||||
Content-Transfer-Encoding: base64
|
||||
|
||||
8J+YgSBpbWFnZSBkYXRh
|
||||
|
||||
-----=_qute-UUID--
|
||||
""")
|
||||
|
||||
|
||||
@pytest.mark.parametrize('transfer_encoding', [mhtml.E_BASE64, mhtml.E_QUOPRI],
|
||||
ids=['base64', 'quoted-printable'])
|
||||
def test_payload_lines_wrap(checker, transfer_encoding):
|
||||
payload = b'1234567890' * 10
|
||||
writer = mhtml.MHTMLWriter(root_content=b'', content_type='text/plain',
|
||||
content_location='http://example.com')
|
||||
writer.add_file(location='http://example.com/payload', content=payload,
|
||||
content_type='text/plain',
|
||||
transfer_encoding=transfer_encoding)
|
||||
writer.write_to(checker.fp)
|
||||
for line in checker.value.split(b'\r\n'):
|
||||
assert len(line) < 77
|
||||
|
||||
|
||||
def test_files_appear_sorted(checker):
|
||||
writer = mhtml.MHTMLWriter(root_content=b'root file',
|
||||
content_type='text/plain',
|
||||
content_location='http://www.example.com/')
|
||||
for subdomain in 'ahgbizt':
|
||||
writer.add_file(location='http://{}.example.com/'.format(subdomain),
|
||||
content='file {}'.format(subdomain).encode('utf-8'),
|
||||
content_type='text/plain',
|
||||
transfer_encoding=mhtml.E_QUOPRI)
|
||||
writer.write_to(checker.fp)
|
||||
checker.expect("""
|
||||
Content-Type: multipart/related; boundary="---=_qute-UUID"
|
||||
MIME-Version: 1.0
|
||||
|
||||
-----=_qute-UUID
|
||||
Content-Location: http://www.example.com/
|
||||
MIME-Version: 1.0
|
||||
Content-Type: text/plain
|
||||
Content-Transfer-Encoding: quoted-printable
|
||||
|
||||
root=20file
|
||||
-----=_qute-UUID
|
||||
Content-Location: http://a.example.com/
|
||||
MIME-Version: 1.0
|
||||
Content-Type: text/plain
|
||||
Content-Transfer-Encoding: quoted-printable
|
||||
|
||||
file=20a
|
||||
-----=_qute-UUID
|
||||
Content-Location: http://b.example.com/
|
||||
MIME-Version: 1.0
|
||||
Content-Type: text/plain
|
||||
Content-Transfer-Encoding: quoted-printable
|
||||
|
||||
file=20b
|
||||
-----=_qute-UUID
|
||||
Content-Location: http://g.example.com/
|
||||
MIME-Version: 1.0
|
||||
Content-Type: text/plain
|
||||
Content-Transfer-Encoding: quoted-printable
|
||||
|
||||
file=20g
|
||||
-----=_qute-UUID
|
||||
Content-Location: http://h.example.com/
|
||||
MIME-Version: 1.0
|
||||
Content-Type: text/plain
|
||||
Content-Transfer-Encoding: quoted-printable
|
||||
|
||||
file=20h
|
||||
-----=_qute-UUID
|
||||
Content-Location: http://i.example.com/
|
||||
MIME-Version: 1.0
|
||||
Content-Type: text/plain
|
||||
Content-Transfer-Encoding: quoted-printable
|
||||
|
||||
file=20i
|
||||
-----=_qute-UUID
|
||||
Content-Location: http://t.example.com/
|
||||
MIME-Version: 1.0
|
||||
Content-Type: text/plain
|
||||
Content-Transfer-Encoding: quoted-printable
|
||||
|
||||
file=20t
|
||||
-----=_qute-UUID
|
||||
Content-Location: http://z.example.com/
|
||||
MIME-Version: 1.0
|
||||
Content-Type: text/plain
|
||||
Content-Transfer-Encoding: quoted-printable
|
||||
|
||||
file=20z
|
||||
-----=_qute-UUID--
|
||||
""")
|
||||
|
||||
|
||||
def test_empty_content_type(checker):
|
||||
writer = mhtml.MHTMLWriter(root_content=b'',
|
||||
content_location='http://example.com/',
|
||||
content_type='text/plain')
|
||||
writer.add_file('http://example.com/file', b'file content')
|
||||
writer.write_to(checker.fp)
|
||||
checker.expect("""
|
||||
Content-Type: multipart/related; boundary="---=_qute-UUID"
|
||||
MIME-Version: 1.0
|
||||
|
||||
-----=_qute-UUID
|
||||
Content-Location: http://example.com/
|
||||
MIME-Version: 1.0
|
||||
Content-Type: text/plain
|
||||
Content-Transfer-Encoding: quoted-printable
|
||||
|
||||
|
||||
-----=_qute-UUID
|
||||
MIME-Version: 1.0
|
||||
Content-Location: http://example.com/file
|
||||
Content-Transfer-Encoding: quoted-printable
|
||||
|
||||
file=20content
|
||||
-----=_qute-UUID--
|
||||
""")
|
||||
|
||||
|
||||
@pytest.mark.parametrize('has_cssutils', [
|
||||
pytest.mark.skipif(mhtml.cssutils is None,
|
||||
reason="requires cssutils")(True),
|
||||
False,
|
||||
], ids=['with_cssutils', 'no_cssutils'])
|
||||
@pytest.mark.parametrize('inline, style, expected_urls', [
|
||||
(False, "@import 'default.css'", ['default.css']),
|
||||
(False, '@import "default.css"', ['default.css']),
|
||||
(False, "@import \t 'tabbed.css'", ['tabbed.css']),
|
||||
(False, "@import url('default.css')", ['default.css']),
|
||||
(False, """body {
|
||||
background: url("/bg-img.png")
|
||||
}""", ['/bg-img.png']),
|
||||
(True, 'background: url(folder/file.png) no-repeat', ['folder/file.png']),
|
||||
(True, 'content: url()', []),
|
||||
])
|
||||
def test_css_url_scanner(monkeypatch, has_cssutils, inline, style,
|
||||
expected_urls):
|
||||
if not has_cssutils:
|
||||
monkeypatch.setattr('qutebrowser.browser.mhtml.cssutils', None)
|
||||
expected_urls.sort()
|
||||
urls = mhtml._get_css_imports(style, inline=inline)
|
||||
urls.sort()
|
||||
assert urls == expected_urls
|
||||
|
||||
|
||||
class TestNoCloseBytesIO:
|
||||
# WORKAROUND for https://bitbucket.org/logilab/pylint/issues/540/
|
||||
# pylint: disable=no-member
|
||||
|
||||
def test_fake_close(self):
|
||||
fp = mhtml._NoCloseBytesIO()
|
||||
fp.write(b'Value')
|
||||
fp.close()
|
||||
assert fp.getvalue() == b'Value'
|
||||
fp.write(b'Eulav')
|
||||
assert fp.getvalue() == b'ValueEulav'
|
||||
|
||||
def test_actual_close(self):
|
||||
fp = mhtml._NoCloseBytesIO()
|
||||
fp.write(b'Value')
|
||||
fp.actual_close()
|
||||
with pytest.raises(ValueError) as excinfo:
|
||||
fp.getvalue()
|
||||
assert str(excinfo.value) == 'I/O operation on closed file.'
|
||||
with pytest.raises(ValueError) as excinfo:
|
||||
fp.write(b'Closed')
|
||||
assert str(excinfo.value) == 'I/O operation on closed file.'
|
@ -527,6 +527,19 @@ def test_same_domain_invalid_url(url1, url2):
|
||||
with pytest.raises(urlutils.InvalidUrlError):
|
||||
urlutils.same_domain(QUrl(url1), QUrl(url2))
|
||||
|
||||
|
||||
@pytest.mark.parametrize('url, expected', [
|
||||
('http://example.com', 'http://example.com'),
|
||||
('http://ünicode.com', 'http://xn--nicode-2ya.com'),
|
||||
('http://foo.bar/?header=text/pläin',
|
||||
'http://foo.bar/?header=text/pl%C3%A4in'),
|
||||
])
|
||||
def test_encoded_url(url, expected):
|
||||
"""Test encoded_url"""
|
||||
url = QUrl(url)
|
||||
assert urlutils.encoded_url(url) == expected
|
||||
|
||||
|
||||
class TestIncDecNumber:
|
||||
|
||||
"""Tests for urlutils.incdec_number()."""
|
||||
|
@ -839,6 +839,20 @@ def test_force_encoding(inp, enc, expected):
|
||||
assert utils.force_encoding(inp, enc) == expected
|
||||
|
||||
|
||||
@pytest.mark.parametrize('inp, expected', [
|
||||
('normal.txt', 'normal.txt'),
|
||||
('user/repo issues.mht', 'user_repo issues.mht'),
|
||||
('<Test\\File> - "*?:|', '_Test_File_ - _____'),
|
||||
])
|
||||
def test_sanitize_filename(inp, expected):
|
||||
assert utils.sanitize_filename(inp) == expected
|
||||
|
||||
|
||||
def test_sanitize_filename_empty_replacement():
|
||||
name = '/<Bad File>/'
|
||||
assert utils.sanitize_filename(name, replacement=None) == 'Bad File'
|
||||
|
||||
|
||||
class TestNewestSlice:
|
||||
|
||||
"""Test newest_slice."""
|
||||
|
@ -324,6 +324,7 @@ class ImportFake:
|
||||
'jinja2': True,
|
||||
'pygments': True,
|
||||
'yaml': True,
|
||||
'cssutils': True,
|
||||
}
|
||||
self.version_attribute = '__version__'
|
||||
self.version = '1.2.3'
|
||||
@ -383,12 +384,13 @@ class TestModuleVersions:
|
||||
"""Test with all modules present in version 1.2.3."""
|
||||
expected = ['sip: yes', 'colorlog: yes', 'colorama: 1.2.3',
|
||||
'pypeg2: 1.2.3', 'jinja2: 1.2.3', 'pygments: 1.2.3',
|
||||
'yaml: 1.2.3']
|
||||
'yaml: 1.2.3', 'cssutils: 1.2.3']
|
||||
assert version._module_versions() == expected
|
||||
|
||||
@pytest.mark.parametrize('module, idx, expected', [
|
||||
('colorlog', 1, 'colorlog: no'),
|
||||
('colorama', 2, 'colorama: no'),
|
||||
('cssutils', 7, 'cssutils: no'),
|
||||
])
|
||||
def test_missing_module(self, module, idx, expected, import_fake):
|
||||
"""Test with a module missing.
|
||||
@ -404,12 +406,13 @@ class TestModuleVersions:
|
||||
@pytest.mark.parametrize('value, expected', [
|
||||
('VERSION', ['sip: yes', 'colorlog: yes', 'colorama: 1.2.3',
|
||||
'pypeg2: yes', 'jinja2: yes', 'pygments: yes',
|
||||
'yaml: yes']),
|
||||
'yaml: yes', 'cssutils: yes']),
|
||||
('SIP_VERSION_STR', ['sip: 1.2.3', 'colorlog: yes', 'colorama: yes',
|
||||
'pypeg2: yes', 'jinja2: yes', 'pygments: yes',
|
||||
'yaml: yes']),
|
||||
'yaml: yes', 'cssutils: yes']),
|
||||
(None, ['sip: yes', 'colorlog: yes', 'colorama: yes', 'pypeg2: yes',
|
||||
'jinja2: yes', 'pygments: yes', 'yaml: yes']),
|
||||
'jinja2: yes', 'pygments: yes', 'yaml: yes',
|
||||
'cssutils: yes']),
|
||||
])
|
||||
def test_version_attribute(self, value, expected, import_fake):
|
||||
"""Test with a different version attribute.
|
||||
@ -432,6 +435,7 @@ class TestModuleVersions:
|
||||
('jinja2', True),
|
||||
('pygments', True),
|
||||
('yaml', True),
|
||||
('cssutils', True),
|
||||
])
|
||||
def test_existing_attributes(self, name, has_version):
|
||||
"""Check if all dependencies have an expected __version__ attribute.
|
||||
|
Loading…
Reference in New Issue
Block a user