Style fixes

This commit is contained in:
Daniel Schadt 2015-11-21 00:10:49 +01:00
parent b776aeac84
commit c12011c84d
9 changed files with 93 additions and 71 deletions

View File

@ -1184,24 +1184,21 @@ class CommandDispatcher:
Args: Args:
dest: The file path to write the download to. dest: The file path to write the download to.
""" """
tab_id = self._current_widget().tab_id web_view = self._current_widget()
if dest is None: if dest is None:
suggested_fn = self._current_title() + ".mht" suggested_fn = self._current_title() + ".mht"
suggested_fn = utils.sanitize_filename(suggested_fn) suggested_fn = utils.sanitize_filename(suggested_fn)
filename, q = downloads.ask_for_filename( filename, q = downloads.ask_for_filename(
suggested_fn, self._win_id, parent=self, suggested_fn, self._win_id, parent=web_view,
) )
if filename is not None: if filename is not None:
mhtml.start_download_checked(filename, win_id=self._win_id, mhtml.start_download_checked(filename, web_view=web_view)
tab_id=tab_id)
else: else:
q.answered.connect(functools.partial( q.answered.connect(functools.partial(
mhtml.start_download_checked, win_id=self._win_id, mhtml.start_download_checked, web_view=web_view))
tab_id=tab_id))
q.ask() q.ask()
else: else:
mhtml.start_download_checked(dest, win_id=self._win_id, mhtml.start_download_checked(dest, web_view=web_view)
tab_id=tab_id)
@cmdutils.register(instance='command-dispatcher', scope='window', @cmdutils.register(instance='command-dispatcher', scope='window',
deprecated="Use :download instead.") deprecated="Use :download instead.")

View File

@ -139,7 +139,7 @@ def ask_for_filename(suggested_filename, win_id, *, parent=None,
'prompt-download-directory') 'prompt-download-directory')
if not prompt_download_directory: if not prompt_download_directory:
return DownloadPath(download_dir(), None) return DownloadPath(filename=download_dir(), question=None)
encoding = sys.getfilesystemencoding() encoding = sys.getfilesystemencoding()
suggested_filename = utils.force_encoding(suggested_filename, suggested_filename = utils.force_encoding(suggested_filename,
@ -154,7 +154,7 @@ def ask_for_filename(suggested_filename, win_id, *, parent=None,
message_bridge = objreg.get('message-bridge', scope='window', message_bridge = objreg.get('message-bridge', scope='window',
window=win_id) window=win_id)
q.ask = lambda: message_bridge.ask(q, blocking=False) q.ask = lambda: message_bridge.ask(q, blocking=False)
return DownloadPath(None, q) return DownloadPath(filename=None, question=q)
class DownloadItemStats(QObject): class DownloadItemStats(QObject):

View File

@ -146,7 +146,7 @@ E_BASE64 = email.encoders.encode_base64
E_QUOPRI = email.encoders.encode_quopri E_QUOPRI = email.encoders.encode_quopri
class MHTMLWriter(): class MHTMLWriter:
"""A class for outputting multiple files to a MHTML document. """A class for outputting multiple files to a MHTML document.
@ -217,7 +217,7 @@ class MHTMLWriter():
return msg return msg
class _Downloader(): class _Downloader:
"""A class to download whole websites. """A class to download whole websites.
@ -227,20 +227,21 @@ class _Downloader():
writer: The MHTMLWriter object which is used to save the page. writer: The MHTMLWriter object which is used to save the page.
loaded_urls: A set of QUrls of finished asset downloads. loaded_urls: A set of QUrls of finished asset downloads.
pending_downloads: A set of unfinished (url, DownloadItem) tuples. pending_downloads: A set of unfinished (url, DownloadItem) tuples.
_finished: A flag indicating if the file has already been written. _finished_file: A flag indicating if the file has already been
written.
_used: A flag indicating if the downloader has already been used. _used: A flag indicating if the downloader has already been used.
_win_id: The window this downloader belongs to. _win_id: The window this downloader belongs to.
""" """
def __init__(self, web_view, dest, win_id): def __init__(self, web_view, dest):
self.web_view = web_view self.web_view = web_view
self.dest = dest self.dest = dest
self.writer = None self.writer = None
self.loaded_urls = {web_view.url()} self.loaded_urls = {web_view.url()}
self.pending_downloads = set() self.pending_downloads = set()
self._finished = False self._finished_file = False
self._used = False self._used = False
self._win_id = win_id self._win_id = web_view.win_id
def run(self): def run(self):
"""Download and save the page. """Download and save the page.
@ -280,31 +281,36 @@ class _Downloader():
# Might be a local <script> tag or something else # Might be a local <script> tag or something else
continue continue
absolute_url = web_url.resolved(QUrl(element_url)) absolute_url = web_url.resolved(QUrl(element_url))
self.fetch_url(absolute_url) self._fetch_url(absolute_url)
styles = web_frame.findAllElements('style') styles = web_frame.findAllElements('style')
for style in styles: for style in styles:
style = webelem.WebElementWrapper(style) style = webelem.WebElementWrapper(style)
# The Mozilla Developer Network says:
# type: This attribute defines the styling language as a MIME type
# (charset should not be specified). This attribute is optional and
# default to text/css if it's missing.
# https://developer.mozilla.org/en/docs/Web/HTML/Element/style
if 'type' in style and style['type'] != 'text/css': if 'type' in style and style['type'] != 'text/css':
continue continue
for element_url in _get_css_imports(str(style)): for element_url in _get_css_imports(str(style)):
self.fetch_url(web_url.resolved(QUrl(element_url))) self._fetch_url(web_url.resolved(QUrl(element_url)))
# Search for references in inline styles # Search for references in inline styles
for element in web_frame.findAllElements('[style]'): for element in web_frame.findAllElements('[style]'):
element = webelem.WebElementWrapper(element) element = webelem.WebElementWrapper(element)
style = element['style'] style = element['style']
for element_url in _get_css_imports(style, inline=True): for element_url in _get_css_imports(style, inline=True):
self.fetch_url(web_url.resolved(QUrl(element_url))) self._fetch_url(web_url.resolved(QUrl(element_url)))
# Shortcut if no assets need to be downloaded, otherwise the file would # Shortcut if no assets need to be downloaded, otherwise the file would
# never be saved. Also might happen if the downloads are fast enough to # never be saved. Also might happen if the downloads are fast enough to
# complete before connecting their finished signal. # complete before connecting their finished signal.
self.collect_zombies() self._collect_zombies()
if not self.pending_downloads and not self._finished: if not self.pending_downloads and not self._finished_file:
self.finish_file() self._finish_file()
def fetch_url(self, url): def _fetch_url(self, url):
"""Download the given url and add the file to the collection. """Download the given url and add the file to the collection.
Args: Args:
@ -317,7 +323,7 @@ class _Downloader():
return return
self.loaded_urls.add(url) self.loaded_urls.add(url)
log.downloads.debug("loading asset at %s", url) log.downloads.debug("loading asset at {}".format(url))
# Using the download manager to download host-blocked urls might crash # Using the download manager to download host-blocked urls might crash
# qute, see the comments/discussion on # qute, see the comments/discussion on
@ -325,7 +331,7 @@ class _Downloader():
# and https://github.com/The-Compiler/qutebrowser/issues/1053 # and https://github.com/The-Compiler/qutebrowser/issues/1053
host_blocker = objreg.get('host-blocker') host_blocker = objreg.get('host-blocker')
if host_blocker.is_blocked(url): if host_blocker.is_blocked(url):
log.downloads.debug("Skipping %s, host-blocked", url) log.downloads.debug("Skipping {}, host-blocked".format(url))
# We still need an empty file in the output, QWebView can be pretty # We still need an empty file in the output, QWebView can be pretty
# picky about displaying a file correctly when not all assets are # picky about displaying a file correctly when not all assets are
# at least referenced in the mhtml file. # at least referenced in the mhtml file.
@ -333,18 +339,18 @@ class _Downloader():
return return
download_manager = objreg.get('download-manager', scope='window', download_manager = objreg.get('download-manager', scope='window',
window='current') window=self._win_id)
item = download_manager.get(url, fileobj=_NoCloseBytesIO(), item = download_manager.get(url, fileobj=_NoCloseBytesIO(),
auto_remove=True) auto_remove=True)
self.pending_downloads.add((url, item)) self.pending_downloads.add((url, item))
item.finished.connect( item.finished.connect(
functools.partial(self.finished, url, item)) functools.partial(self._finished, url, item))
item.error.connect( item.error.connect(
functools.partial(self.error, url, item)) functools.partial(self._error, url, item))
item.cancelled.connect( item.cancelled.connect(
functools.partial(self.error, url, item)) functools.partial(self._error, url, item))
def finished(self, url, item): def _finished(self, url, item):
"""Callback when a single asset is downloaded. """Callback when a single asset is downloaded.
Args: Args:
@ -375,12 +381,12 @@ class _Downloader():
try: try:
css_string = item.fileobj.getvalue().decode('utf-8') css_string = item.fileobj.getvalue().decode('utf-8')
except UnicodeDecodeError: except UnicodeDecodeError:
log.downloads.warning("Invalid UTF-8 data in %s", url) log.downloads.warning("Invalid UTF-8 data in {}".format(url))
css_string = item.fileobj.getvalue().decode('utf-8', 'ignore') css_string = item.fileobj.getvalue().decode('utf-8', 'ignore')
import_urls = _get_css_imports(css_string) import_urls = _get_css_imports(css_string)
for import_url in import_urls: for import_url in import_urls:
absolute_url = url.resolved(QUrl(import_url)) absolute_url = url.resolved(QUrl(import_url))
self.fetch_url(absolute_url) self._fetch_url(absolute_url)
encode = E_QUOPRI if mime.startswith('text/') else E_BASE64 encode = E_QUOPRI if mime.startswith('text/') else E_BASE64
# Our MHTML handler refuses non-ASCII headers. This will replace every # Our MHTML handler refuses non-ASCII headers. This will replace every
@ -392,9 +398,9 @@ class _Downloader():
item.fileobj.actual_close() item.fileobj.actual_close()
if self.pending_downloads: if self.pending_downloads:
return return
self.finish_file() self._finish_file()
def error(self, url, item, *_args): def _error(self, url, item, *_args):
"""Callback when a download error occurred. """Callback when a download error occurred.
Args: Args:
@ -406,40 +412,33 @@ class _Downloader():
except KeyError: except KeyError:
# This might happen if .collect_zombies() calls .finished() and the # This might happen if .collect_zombies() calls .finished() and the
# error handler will be called after .collect_zombies # error handler will be called after .collect_zombies
log.downloads.debug("Oops! Download already gone: %s", item) log.downloads.debug("Oops! Download already gone: {}".format(item))
return return
item.fileobj.actual_close() item.fileobj.actual_close()
# Add a stub file, see comment in .fetch_url() for more information # Add a stub file, see comment in .fetch_url() for more information
self.writer.add_file(urlutils.encoded_url(url), b'') self.writer.add_file(urlutils.encoded_url(url), b'')
if self.pending_downloads: if self.pending_downloads:
return return
self.finish_file() self._finish_file()
def finish_file(self): def _finish_file(self):
"""Save the file to the filename given in __init__.""" """Save the file to the filename given in __init__."""
if self._finished: if self._finished_file:
log.downloads.debug("finish_file called twice, ignored!") log.downloads.debug("finish_file called twice, ignored!")
return return
self._finished = True self._finished_file = True
log.downloads.debug("All assets downloaded, ready to finish off!") log.downloads.debug("All assets downloaded, ready to finish off!")
try: try:
file_output = open(self.dest, 'wb') with open(self.dest, 'wb') as file_output:
self.writer.write_to(file_output)
except OSError as error: except OSError as error:
message.error(self._win_id, message.error(self._win_id,
"Could not save file: {}".format(error)) "Could not save file: {}".format(error))
return return
else:
# Yes, ugly nesting of try-blocks, but necessary because there's
# no way to let the try block only affect the with-statement-
# entry-code and not the with-statement-body.
try:
self.writer.write_to(file_output)
finally:
file_output.close()
log.downloads.debug("File successfully written.") log.downloads.debug("File successfully written.")
message.info(self._win_id, "Page saved as {}".format(self.dest)) message.info(self._win_id, "Page saved as {}".format(self.dest))
def collect_zombies(self): def _collect_zombies(self):
"""Collect done downloads and add their data to the MHTML file. """Collect done downloads and add their data to the MHTML file.
This is needed if a download finishes before attaching its This is needed if a download finishes before attaching its
@ -447,9 +446,9 @@ class _Downloader():
""" """
items = set((url, item) for url, item in self.pending_downloads items = set((url, item) for url, item in self.pending_downloads
if item.done) if item.done)
log.downloads.debug("Zombie downloads: %s", items) log.downloads.debug("Zombie downloads: {}".format(items))
for url, item in items: for url, item in items:
self.finished(url, item) self._finished(url, item)
class _NoCloseBytesIO(io.BytesIO): # pylint: disable=no-init class _NoCloseBytesIO(io.BytesIO): # pylint: disable=no-init
@ -469,7 +468,7 @@ class _NoCloseBytesIO(io.BytesIO): # pylint: disable=no-init
super().close() super().close()
def _start_download(dest, win_id, tab_id): def _start_download(dest, web_view):
"""Start downloading the current page and all assets to a MHTML file. """Start downloading the current page and all assets to a MHTML file.
This will overwrite dest if it already exists. This will overwrite dest if it already exists.
@ -478,21 +477,19 @@ def _start_download(dest, win_id, tab_id):
dest: The filename where the resulting file should be saved. dest: The filename where the resulting file should be saved.
win_id, tab_id: Specify the tab whose page should be loaded. win_id, tab_id: Specify the tab whose page should be loaded.
""" """
web_view = objreg.get('webview', scope='tab', window=win_id, tab=tab_id) loader = _Downloader(web_view, dest)
loader = _Downloader(web_view, dest, win_id)
loader.run() loader.run()
def start_download_checked(dest, win_id, tab_id): def start_download_checked(dest, web_view):
"""First check if dest is already a file, then start the download. """First check if dest is already a file, then start the download.
Args: Args:
dest: The filename where the resulting file should be saved. dest: The filename where the resulting file should be saved.
win_id, tab_id: Specify the tab whose page should be loaded. web_view: Specify the webview whose page should be loaded.
""" """
# The default name is 'page title.mht' # The default name is 'page title.mht'
title = (objreg.get('webview', scope='tab', window=win_id, tab=tab_id) title = web_view.title()
.title())
default_name = utils.sanitize_filename(title + '.mht') default_name = utils.sanitize_filename(title + '.mht')
# Remove characters which cannot be expressed in the file system encoding # Remove characters which cannot be expressed in the file system encoding
@ -516,11 +513,12 @@ def start_download_checked(dest, win_id, tab_id):
# saving the file anyway. # saving the file anyway.
if not os.path.isdir(os.path.dirname(path)): if not os.path.isdir(os.path.dirname(path)):
folder = os.path.dirname(path) folder = os.path.dirname(path)
message.error(win_id, "Directory {} does not exist.".format(folder)) message.error(web_view.win_id,
"Directory {} does not exist.".format(folder))
return return
if not os.path.isfile(path): if not os.path.isfile(path):
_start_download(path, win_id=win_id, tab_id=tab_id) _start_download(path, web_view=web_view)
return return
q = usertypes.Question() q = usertypes.Question()
@ -528,7 +526,7 @@ def start_download_checked(dest, win_id, tab_id):
q.text = "{} exists. Overwrite?".format(path) q.text = "{} exists. Overwrite?".format(path)
q.completed.connect(q.deleteLater) q.completed.connect(q.deleteLater)
q.answered_yes.connect(functools.partial( q.answered_yes.connect(functools.partial(
_start_download, path, win_id=win_id, tab_id=tab_id)) _start_download, path, web_view=web_view))
message_bridge = objreg.get('message-bridge', scope='window', message_bridge = objreg.get('message-bridge', scope='window',
window=win_id) window=web_view.win_id)
message_bridge.ask(q, blocking=False) message_bridge.ask(q, blocking=False)

View File

@ -50,7 +50,7 @@ Feature: Downloading things from a website.
Scenario: Two destinations given Scenario: Two destinations given
When I run :download --dest destination2 http://localhost:(port)/ destination1 When I run :download --dest destination2 http://localhost:(port)/ destination1
Then the warning ":download [url] [dest] is deprecated - use download --dest [dest] [url]" should be shown. Then the warning ":download [url] [dest] is deprecated - use download --dest [dest] [url]" should be shown.
Then the error "Can't give two destinations for the download." should be shown. And the error "Can't give two destinations for the download." should be shown.
Scenario: :download --mhtml with an URL given Scenario: :download --mhtml with an URL given
When I run :download --mhtml http://foobar/ When I run :download --mhtml http://foobar/

View File

@ -83,10 +83,11 @@ def test_mhtml(test_name, download_dir, quteproc, httpbin):
quteproc.open_path('{}/{}.html'.format(test_path, test_name)) quteproc.open_path('{}/{}.html'.format(test_path, test_name))
download_dest = os.path.join(download_dir.location, download_dest = os.path.join(download_dir.location,
'{}-downloaded.mht'.format(test_name)) '{}-downloaded.mht'.format(test_name))
httpbin.after_test() # Discard all requests that were necessary to display the page
httpbin.clear_data()
quteproc.send_cmd(':download --mhtml --dest "{}"'.format(download_dest)) quteproc.send_cmd(':download --mhtml --dest "{}"'.format(download_dest))
quteproc.wait_for(category='downloads', module='mhtml', quteproc.wait_for(category='downloads', module='mhtml',
function='finish_file', function='_finish_file',
message='File successfully written.') message='File successfully written.')
expected_file = os.path.join(test_dir, '{}.mht'.format(test_name)) expected_file = os.path.join(test_dir, '{}.mht'.format(test_name))

View File

@ -182,13 +182,17 @@ class Process(QObject):
time.sleep(1) time.sleep(1)
# Exit the process to make sure we're in a defined state again # Exit the process to make sure we're in a defined state again
self.terminate() self.terminate()
self._data.clear() self.clear_data()
raise InvalidLine(self._invalid) raise InvalidLine(self._invalid)
self._data.clear() self.clear_data()
if not self.is_running(): if not self.is_running():
raise ProcessExited raise ProcessExited
def clear_data(self):
"""Clear the collected data."""
self._data.clear()
def terminate(self): def terminate(self):
"""Clean up and shut down the process.""" """Clean up and shut down the process."""
self.proc.terminate() self.proc.terminate()

View File

@ -117,10 +117,9 @@ class ExpectedRequest:
else: else:
return NotImplemented return NotImplemented
def __str__(self): def __repr__(self):
return '<ExpectedRequest {} "{}">'.format(self.verb, self.path) return ('ExpectedRequest(verb={!r}, path={!r})'
.format(self.verb, self.path))
__repr__ = __str__
class HTTPBin(testprocess.Process): class HTTPBin(testprocess.Process):

View File

@ -73,6 +73,7 @@ class WSGIServer(cherrypy.wsgiserver.CherryPyWSGIServer):
"""A custom WSGIServer that prints a line on stderr when it's ready.""" """A custom WSGIServer that prints a line on stderr when it's ready."""
# pylint: disable=no-member # pylint: disable=no-member
# WORKAROUND for https://bitbucket.org/logilab/pylint/issues/702
def __init__(self, *args, **kwargs): def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs) super().__init__(*args, **kwargs)
@ -94,6 +95,7 @@ class WSGIServer(cherrypy.wsgiserver.CherryPyWSGIServer):
def main(): def main():
# pylint: disable=no-member # pylint: disable=no-member
# WORKAROUND for https://bitbucket.org/logilab/pylint/issues/702
# "Instance of 'WSGIServer' has no 'start' member (no-member)" # "Instance of 'WSGIServer' has no 'start' member (no-member)"
# "Instance of 'WSGIServer' has no 'stop' member (no-member)" # "Instance of 'WSGIServer' has no 'stop' member (no-member)"

View File

@ -1,7 +1,28 @@
# vim: ft=python fileencoding=utf-8 sts=4 sw=4 et: # vim: ft=python fileencoding=utf-8 sts=4 sw=4 et:
# Copyright 2015 Daniel Schadt
#
# This file is part of qutebrowser.
#
# qutebrowser is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# qutebrowser is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with qutebrowser. If not, see <http://www.gnu.org/licenses/>.
"""Tests for qutebrowser.browser.mhtml."""
import io import io
import textwrap import textwrap
import re import re
import pytest import pytest
from qutebrowser.browser import mhtml from qutebrowser.browser import mhtml