Handle non-ASCII in headers/url better

This commit is contained in:
Daniel 2015-10-19 14:05:59 +02:00
parent 8bb887ddab
commit ae8a9b8798
3 changed files with 31 additions and 5 deletions

View File

@ -33,7 +33,7 @@ import email.mime.multipart
from PyQt5.QtCore import QUrl
from qutebrowser.browser import webelem
from qutebrowser.utils import log, objreg, message, usertypes
from qutebrowser.utils import log, objreg, message, usertypes, utils, urlutils
try:
import cssutils
@ -237,7 +237,7 @@ class _Downloader():
self.writer = MHTMLWriter(
web_frame.toHtml().encode('utf-8'),
content_location=web_url.toString(),
content_location=urlutils.encoded_url(web_url),
# I've found no way of getting the content type of a QWebView, but
# since we're using .toHtml, it's probably safe to say that the
# content-type is HTML
@ -347,8 +347,12 @@ class _Downloader():
self.fetch_url(absolute_url)
encode = E_QUOPRI if mime.startswith('text/') else E_BASE64
self.writer.add_file(url.toString(), item.fileobj.getvalue(), mime,
encode)
# Our MHTML handler refuses non-ASCII headers. This will replace every
# non-ASCII char with '?'. This is probably okay, as official Content-
# Type headers contain ASCII only anyway. Anything else is madness.
mime = utils.force_encoding(mime, 'ascii')
self.writer.add_file(urlutils.encoded_url(url),
item.fileobj.getvalue(), mime, encode)
item.fileobj.actual_close()
if self.pending_downloads:
return
@ -369,7 +373,7 @@ class _Downloader():
log.downloads.debug("Oops! Download already gone: %s", item)
return
item.fileobj.actual_close()
self.writer.add_file(url.toString(), b'')
self.writer.add_file(ulrutils.encoded_url(url), b'')
if self.pending_downloads:
return
self.finish_file()

View File

@ -438,6 +438,15 @@ def same_domain(url1, url2):
return domain1 == domain2
def encoded_url(url):
"""Return the fully encoded url as string.
Args:
url: The url to encode as QUrl.
"""
return bytes(url.toEncoded()).decode('ascii')
class IncDecError(Exception):
"""Exception raised by incdec_number on problems.

View File

@ -527,6 +527,19 @@ def test_same_domain_invalid_url(url1, url2):
with pytest.raises(urlutils.InvalidUrlError):
urlutils.same_domain(QUrl(url1), QUrl(url2))
@pytest.mark.parametrize('url, expected', [
('http://example.com', 'http://example.com'),
('http://ünicode.com', 'http://xn--nicode-2ya.com'),
('http://foo.bar/?header=text/pläin',
'http://foo.bar/?header=text/pl%C3%A4in'),
])
def test_encoded_url(url, expected):
"""Test encoded_url"""
url = QUrl(url)
assert urlutils.encoded_url(url) == expected
class TestIncDecNumber:
"""Tests for urlutils.incdec_number()."""