Handle non-ASCII in headers/url better
This commit is contained in:
parent
8bb887ddab
commit
ae8a9b8798
@ -33,7 +33,7 @@ import email.mime.multipart
|
||||
from PyQt5.QtCore import QUrl
|
||||
|
||||
from qutebrowser.browser import webelem
|
||||
from qutebrowser.utils import log, objreg, message, usertypes
|
||||
from qutebrowser.utils import log, objreg, message, usertypes, utils, urlutils
|
||||
|
||||
try:
|
||||
import cssutils
|
||||
@ -237,7 +237,7 @@ class _Downloader():
|
||||
|
||||
self.writer = MHTMLWriter(
|
||||
web_frame.toHtml().encode('utf-8'),
|
||||
content_location=web_url.toString(),
|
||||
content_location=urlutils.encoded_url(web_url),
|
||||
# I've found no way of getting the content type of a QWebView, but
|
||||
# since we're using .toHtml, it's probably safe to say that the
|
||||
# content-type is HTML
|
||||
@ -347,8 +347,12 @@ class _Downloader():
|
||||
self.fetch_url(absolute_url)
|
||||
|
||||
encode = E_QUOPRI if mime.startswith('text/') else E_BASE64
|
||||
self.writer.add_file(url.toString(), item.fileobj.getvalue(), mime,
|
||||
encode)
|
||||
# Our MHTML handler refuses non-ASCII headers. This will replace every
|
||||
# non-ASCII char with '?'. This is probably okay, as official Content-
|
||||
# Type headers contain ASCII only anyway. Anything else is madness.
|
||||
mime = utils.force_encoding(mime, 'ascii')
|
||||
self.writer.add_file(urlutils.encoded_url(url),
|
||||
item.fileobj.getvalue(), mime, encode)
|
||||
item.fileobj.actual_close()
|
||||
if self.pending_downloads:
|
||||
return
|
||||
@ -369,7 +373,7 @@ class _Downloader():
|
||||
log.downloads.debug("Oops! Download already gone: %s", item)
|
||||
return
|
||||
item.fileobj.actual_close()
|
||||
self.writer.add_file(url.toString(), b'')
|
||||
self.writer.add_file(ulrutils.encoded_url(url), b'')
|
||||
if self.pending_downloads:
|
||||
return
|
||||
self.finish_file()
|
||||
|
@ -438,6 +438,15 @@ def same_domain(url1, url2):
|
||||
return domain1 == domain2
|
||||
|
||||
|
||||
def encoded_url(url):
|
||||
"""Return the fully encoded url as string.
|
||||
|
||||
Args:
|
||||
url: The url to encode as QUrl.
|
||||
"""
|
||||
return bytes(url.toEncoded()).decode('ascii')
|
||||
|
||||
|
||||
class IncDecError(Exception):
|
||||
|
||||
"""Exception raised by incdec_number on problems.
|
||||
|
@ -527,6 +527,19 @@ def test_same_domain_invalid_url(url1, url2):
|
||||
with pytest.raises(urlutils.InvalidUrlError):
|
||||
urlutils.same_domain(QUrl(url1), QUrl(url2))
|
||||
|
||||
|
||||
@pytest.mark.parametrize('url, expected', [
|
||||
('http://example.com', 'http://example.com'),
|
||||
('http://ünicode.com', 'http://xn--nicode-2ya.com'),
|
||||
('http://foo.bar/?header=text/pläin',
|
||||
'http://foo.bar/?header=text/pl%C3%A4in'),
|
||||
])
|
||||
def test_encoded_url(url, expected):
|
||||
"""Test encoded_url"""
|
||||
url = QUrl(url)
|
||||
assert urlutils.encoded_url(url) == expected
|
||||
|
||||
|
||||
class TestIncDecNumber:
|
||||
|
||||
"""Tests for urlutils.incdec_number()."""
|
||||
|
Loading…
Reference in New Issue
Block a user