Use rfc6266 to handle Content-Disposition in downloads

This commit is contained in:
Florian Bruhin 2014-06-19 20:52:44 +02:00
parent 52d1645479
commit 6ce5afccbe
7 changed files with 86 additions and 34 deletions

View File

@ -9,7 +9,7 @@ arch=(any)
url="http://www.qutebrowser.org/" url="http://www.qutebrowser.org/"
license=('GPL') license=('GPL')
depends=('python>=3.3' 'python-setuptools' 'python-pyqt5>=5.2' 'qt5-base>=5.2' depends=('python>=3.3' 'python-setuptools' 'python-pyqt5>=5.2' 'qt5-base>=5.2'
'qt5-webkit>=5.2' 'libxkbcommon-x11') 'qt5-webkit>=5.2' 'libxkbcommon-x11' 'python-rfc6266')
makedepends=('python' 'python-setuptools') makedepends=('python' 'python-setuptools')
optdepends=('python-colorlog: colored logging output' optdepends=('python-colorlog: colored logging output'
'python-ipdb: better debugging') 'python-ipdb: better debugging')

View File

@ -24,6 +24,7 @@ import os.path
from functools import partial from functools import partial
from collections import deque from collections import deque
import rfc6266
from PyQt5.QtCore import pyqtSlot, pyqtSignal, QObject, QCoreApplication from PyQt5.QtCore import pyqtSlot, pyqtSignal, QObject, QCoreApplication
from PyQt5.QtNetwork import QNetworkRequest, QNetworkReply from PyQt5.QtNetwork import QNetworkRequest, QNetworkReply
@ -32,12 +33,16 @@ import qutebrowser.utils.message as message
import qutebrowser.utils.url as urlutils import qutebrowser.utils.url as urlutils
import qutebrowser.commands.utils as cmdutils import qutebrowser.commands.utils as cmdutils
from qutebrowser.utils.log import downloads as logger from qutebrowser.utils.log import downloads as logger
from qutebrowser.utils.log import fix_rfc2622
from qutebrowser.utils.usertypes import PromptMode, Question, Timer from qutebrowser.utils.usertypes import PromptMode, Question, Timer
from qutebrowser.utils.misc import (interpolate_color, format_seconds, from qutebrowser.utils.misc import (interpolate_color, format_seconds,
format_size, get_http_header) format_size)
from qutebrowser.commands.exceptions import CommandError from qutebrowser.commands.exceptions import CommandError
fix_rfc2622()
class DownloadItem(QObject): class DownloadItem(QObject):
"""A single download currently running. """A single download currently running.
@ -328,7 +333,11 @@ class DownloadManager(QObject):
""" """
# First check if the Content-Disposition header has a filename # First check if the Content-Disposition header has a filename
# attribute. # attribute.
filename = get_http_header(reply, 'Content-Disposition', 'filename') if reply.hasRawHeader('Content-Disposition'):
# We use the unsafe variant of the filename as we sanitize it via
# os.path.basename later.
filename = rfc6266.parse_headers(
bytes(reply.rawHeader('Content-Disposition'))).filename_unsafe
# Then try to get filename from url # Then try to get filename from url
if not filename: if not filename:
filename = reply.url().path() filename = reply.url().path()

View File

@ -163,6 +163,7 @@ class BrowserPage(QWebPage):
start_download: Emitted with the QNetworkReply associated with the start_download: Emitted with the QNetworkReply associated with the
passed request. passed request.
""" """
from qutebrowser.utils.debug import set_trace; set_trace()
reply = self.networkAccessManager().get(request) reply = self.networkAccessManager().get(request)
self.start_download.emit(reply) self.start_download.emit(reply)

View File

@ -218,3 +218,37 @@ def check_pkg_resources():
msgbox.exec_() msgbox.exec_()
app.quit() app.quit()
sys.exit(1) sys.exit(1)
def check_rfc6266():
"""Check if rfc6266 is installed."""
from PyQt5.QtWidgets import QApplication, QMessageBox
try:
import rfc6266 # pylint: disable=unused-variable
except ImportError:
app = QApplication(sys.argv)
msgbox = QMessageBox(QMessageBox.Critical, "qutebrowser: Fatal error!",
textwrap.dedent("""
Fatal error: rfc6266 is required to run qutebrowser but could
not be imported! Maybe it's not installed?
On Debian/Ubuntu:
No package available, try:
pip install rfc6266
On Archlinux:
pacman -S python-rfc6266
On Windows:
pip install rfc6266
For other distributions:
Check your package manager for similiarly named packages.
""").strip())
if '--debug' in sys.argv:
print(file=sys.stderr)
traceback.print_exc()
msgbox.resize(msgbox.sizeHint())
msgbox.exec_()
app.quit()
sys.exit(1)

View File

@ -72,6 +72,7 @@ downloads = getLogger('downloads')
js = getLogger('js') js = getLogger('js')
qt = getLogger('qt') qt = getLogger('qt')
style = getLogger('style') style = getLogger('style')
rfc6266 = getLogger('rfc6266')
ram_handler = None ram_handler = None
@ -86,18 +87,37 @@ def init_log(args):
raise ValueError("Invalid log level: {}".format(args.loglevel)) raise ValueError("Invalid log level: {}".format(args.loglevel))
console, ram = _init_handlers(numeric_level, args.color, args.loglines) console, ram = _init_handlers(numeric_level, args.color, args.loglines)
if args.logfilter is not None and numeric_level <= logging.DEBUG:
console.addFilter(LogFilter(args.logfilter.split(',')))
root = getLogger() root = getLogger()
if console is not None: if console is not None:
if args.logfilter is not None and numeric_level <= logging.DEBUG:
console.addFilter(LogFilter(args.logfilter.split(',')))
console.addFilter(LeplFilter())
root.addHandler(console) root.addHandler(console)
if ram is not None: if ram is not None:
root.addHandler(ram) root.addHandler(ram)
console.addFilter(LeplFilter())
root.setLevel(logging.NOTSET) root.setLevel(logging.NOTSET)
logging.captureWarnings(True) logging.captureWarnings(True)
qInstallMessageHandler(qt_message_handler) qInstallMessageHandler(qt_message_handler)
def fix_rfc2622():
"""Fix the rfc6266 logger.
In rfc2622 <= v0.04, a NullHandler class is added as handler, instead of an
object, which causes an exception later.
This was fixed in [1], but since v0.05 is not out yet, we work around the
issue by deleting the wrong handler.
This should be executed after init_log is done and rfc6266 is imported, but
before using it.
[1]: https://github.com/g2p/rfc6266/commit/cad58963ed13f5e1068fcc9e4326123b6b2bdcf8
"""
rfc6266.removeHandler(logging.NullHandler)
def _init_handlers(level, color, ram_capacity): def _init_handlers(level, color, ram_capacity):
"""Init log handlers. """Init log handlers.
@ -255,6 +275,22 @@ class LogFilter(logging.Filter):
return False return False
class LeplFilter(logging.Filter):
"""Filter to filter debug log records by the lepl library."""
def filter(self, record):
"""Determine if the specified record is to be logged."""
if (record.levelno == logging.INFO and
record.name == 'lepl.lexer.rewriters.AddLexer'):
# Special useless info message triggered by rfc6266
return False
if record.levelno > logging.DEBUG:
# More important than DEBUG, so we won't filter at all
return True
return not record.name.startswith('lepl.')
class RAMHandler(logging.Handler): class RAMHandler(logging.Handler):
"""Logging handler which keeps the messages in a deque in RAM. """Logging handler which keeps the messages in a deque in RAM.

View File

@ -28,8 +28,6 @@ import os
import re import re
import sys import sys
import shlex import shlex
import email
import email.policy
import os.path import os.path
import operator import operator
import urllib.request import urllib.request
@ -397,33 +395,6 @@ def check_print_compat():
return not (os.name == 'nt' and qt_version_check('5.3.0', operator.lt)) return not (os.name == 'nt' and qt_version_check('5.3.0', operator.lt))
def get_http_header(reply, headername, param=None):
"""Get a parameter from a HTTP header.
Note we use the email value to get a HTTP header, because they're both MIME
headers and email supports that.
Args:
reply: The QNetworkReply to get the header from.
headername: The name of the header.
param: The name of the param to get, or None to get the whole contents.
Return:
The data as a string, or None if the data wasn't found.
FIXME add tests
"""
if not reply.hasRawHeader(headername):
return None
header = (headername.encode('ascii') + b': ' +
bytes(reply.rawHeader(headername)))
msg = email.message_from_bytes(header, policy=email.policy.HTTP)
if param is not None:
return msg.get_param(param, header=headername)
else:
return msg.get(headername, None)
class EventLoop(QEventLoop): class EventLoop(QEventLoop):
"""A thin wrapper around QEventLoop. """A thin wrapper around QEventLoop.

View File

@ -92,6 +92,7 @@ setupdata = {
"QtWebKit."), "QtWebKit."),
'long_description': read_file('README'), 'long_description': read_file('README'),
'url': 'http://www.qutebrowser.org/', 'url': 'http://www.qutebrowser.org/',
'requires': 'rfc6266',
'author': _get_constant('author'), 'author': _get_constant('author'),
'author_email': _get_constant('email'), 'author_email': _get_constant('email'),
'license': _get_constant('license'), 'license': _get_constant('license'),