Use rfc6266 to handle Content-Disposition in downloads

This commit is contained in:
Florian Bruhin 2014-06-19 20:52:44 +02:00
parent 52d1645479
commit 6ce5afccbe
7 changed files with 86 additions and 34 deletions

View File

@ -9,7 +9,7 @@ arch=(any)
url="http://www.qutebrowser.org/"
license=('GPL')
depends=('python>=3.3' 'python-setuptools' 'python-pyqt5>=5.2' 'qt5-base>=5.2'
'qt5-webkit>=5.2' 'libxkbcommon-x11')
'qt5-webkit>=5.2' 'libxkbcommon-x11' 'python-rfc6266')
makedepends=('python' 'python-setuptools')
optdepends=('python-colorlog: colored logging output'
'python-ipdb: better debugging')

View File

@ -24,6 +24,7 @@ import os.path
from functools import partial
from collections import deque
import rfc6266
from PyQt5.QtCore import pyqtSlot, pyqtSignal, QObject, QCoreApplication
from PyQt5.QtNetwork import QNetworkRequest, QNetworkReply
@ -32,12 +33,16 @@ import qutebrowser.utils.message as message
import qutebrowser.utils.url as urlutils
import qutebrowser.commands.utils as cmdutils
from qutebrowser.utils.log import downloads as logger
from qutebrowser.utils.log import fix_rfc2622
from qutebrowser.utils.usertypes import PromptMode, Question, Timer
from qutebrowser.utils.misc import (interpolate_color, format_seconds,
format_size, get_http_header)
format_size)
from qutebrowser.commands.exceptions import CommandError
fix_rfc2622()
class DownloadItem(QObject):
"""A single download currently running.
@ -328,7 +333,11 @@ class DownloadManager(QObject):
"""
# First check if the Content-Disposition header has a filename
# attribute.
filename = get_http_header(reply, 'Content-Disposition', 'filename')
if reply.hasRawHeader('Content-Disposition'):
# We use the unsafe variant of the filename as we sanitize it via
# os.path.basename later.
filename = rfc6266.parse_headers(
bytes(reply.rawHeader('Content-Disposition'))).filename_unsafe
# Then try to get filename from url
if not filename:
filename = reply.url().path()

View File

@ -163,6 +163,7 @@ class BrowserPage(QWebPage):
start_download: Emitted with the QNetworkReply associated with the
passed request.
"""
from qutebrowser.utils.debug import set_trace; set_trace()
reply = self.networkAccessManager().get(request)
self.start_download.emit(reply)

View File

@ -218,3 +218,37 @@ def check_pkg_resources():
msgbox.exec_()
app.quit()
sys.exit(1)
def check_rfc6266():
"""Check if rfc6266 is installed."""
from PyQt5.QtWidgets import QApplication, QMessageBox
try:
import rfc6266 # pylint: disable=unused-variable
except ImportError:
app = QApplication(sys.argv)
msgbox = QMessageBox(QMessageBox.Critical, "qutebrowser: Fatal error!",
textwrap.dedent("""
Fatal error: rfc6266 is required to run qutebrowser but could
not be imported! Maybe it's not installed?
On Debian/Ubuntu:
No package available, try:
pip install rfc6266
On Archlinux:
pacman -S python-rfc6266
On Windows:
pip install rfc6266
For other distributions:
Check your package manager for similiarly named packages.
""").strip())
if '--debug' in sys.argv:
print(file=sys.stderr)
traceback.print_exc()
msgbox.resize(msgbox.sizeHint())
msgbox.exec_()
app.quit()
sys.exit(1)

View File

@ -72,6 +72,7 @@ downloads = getLogger('downloads')
js = getLogger('js')
qt = getLogger('qt')
style = getLogger('style')
rfc6266 = getLogger('rfc6266')
ram_handler = None
@ -86,18 +87,37 @@ def init_log(args):
raise ValueError("Invalid log level: {}".format(args.loglevel))
console, ram = _init_handlers(numeric_level, args.color, args.loglines)
if args.logfilter is not None and numeric_level <= logging.DEBUG:
console.addFilter(LogFilter(args.logfilter.split(',')))
root = getLogger()
if console is not None:
if args.logfilter is not None and numeric_level <= logging.DEBUG:
console.addFilter(LogFilter(args.logfilter.split(',')))
console.addFilter(LeplFilter())
root.addHandler(console)
if ram is not None:
root.addHandler(ram)
console.addFilter(LeplFilter())
root.setLevel(logging.NOTSET)
logging.captureWarnings(True)
qInstallMessageHandler(qt_message_handler)
def fix_rfc2622():
"""Fix the rfc6266 logger.
In rfc2622 <= v0.04, a NullHandler class is added as handler, instead of an
object, which causes an exception later.
This was fixed in [1], but since v0.05 is not out yet, we work around the
issue by deleting the wrong handler.
This should be executed after init_log is done and rfc6266 is imported, but
before using it.
[1]: https://github.com/g2p/rfc6266/commit/cad58963ed13f5e1068fcc9e4326123b6b2bdcf8
"""
rfc6266.removeHandler(logging.NullHandler)
def _init_handlers(level, color, ram_capacity):
"""Init log handlers.
@ -255,6 +275,22 @@ class LogFilter(logging.Filter):
return False
class LeplFilter(logging.Filter):
"""Filter to filter debug log records by the lepl library."""
def filter(self, record):
"""Determine if the specified record is to be logged."""
if (record.levelno == logging.INFO and
record.name == 'lepl.lexer.rewriters.AddLexer'):
# Special useless info message triggered by rfc6266
return False
if record.levelno > logging.DEBUG:
# More important than DEBUG, so we won't filter at all
return True
return not record.name.startswith('lepl.')
class RAMHandler(logging.Handler):
"""Logging handler which keeps the messages in a deque in RAM.

View File

@ -28,8 +28,6 @@ import os
import re
import sys
import shlex
import email
import email.policy
import os.path
import operator
import urllib.request
@ -397,33 +395,6 @@ def check_print_compat():
return not (os.name == 'nt' and qt_version_check('5.3.0', operator.lt))
def get_http_header(reply, headername, param=None):
"""Get a parameter from a HTTP header.
Note we use the email value to get a HTTP header, because they're both MIME
headers and email supports that.
Args:
reply: The QNetworkReply to get the header from.
headername: The name of the header.
param: The name of the param to get, or None to get the whole contents.
Return:
The data as a string, or None if the data wasn't found.
FIXME add tests
"""
if not reply.hasRawHeader(headername):
return None
header = (headername.encode('ascii') + b': ' +
bytes(reply.rawHeader(headername)))
msg = email.message_from_bytes(header, policy=email.policy.HTTP)
if param is not None:
return msg.get_param(param, header=headername)
else:
return msg.get(headername, None)
class EventLoop(QEventLoop):
"""A thin wrapper around QEventLoop.

View File

@ -92,6 +92,7 @@ setupdata = {
"QtWebKit."),
'long_description': read_file('README'),
'url': 'http://www.qutebrowser.org/',
'requires': 'rfc6266',
'author': _get_constant('author'),
'author_email': _get_constant('email'),
'license': _get_constant('license'),