diff --git a/qutebrowser/browser/mhtml.py b/qutebrowser/browser/mhtml.py
index e7bdca516..82bb20ed0 100644
--- a/qutebrowser/browser/mhtml.py
+++ b/qutebrowser/browser/mhtml.py
@@ -33,7 +33,7 @@ import email.mime.multipart
from PyQt5.QtCore import QUrl
from qutebrowser.browser import webelem
-from qutebrowser.utils import log, objreg, message, usertypes
+from qutebrowser.utils import log, objreg, message, usertypes, utils, urlutils
try:
import cssutils
@@ -237,7 +237,7 @@ class _Downloader():
self.writer = MHTMLWriter(
web_frame.toHtml().encode('utf-8'),
- content_location=web_url.toString(),
+ content_location=urlutils.encoded_url(web_url),
# I've found no way of getting the content type of a QWebView, but
# since we're using .toHtml, it's probably safe to say that the
# content-type is HTML
@@ -347,8 +347,12 @@ class _Downloader():
self.fetch_url(absolute_url)
encode = E_QUOPRI if mime.startswith('text/') else E_BASE64
- self.writer.add_file(url.toString(), item.fileobj.getvalue(), mime,
- encode)
+ # Our MHTML handler refuses non-ASCII headers. This will replace every
+ # non-ASCII char with '?'. This is probably okay, as official Content-
+ # Type headers contain ASCII only anyway. Anything else is madness.
+ mime = utils.force_encoding(mime, 'ascii')
+ self.writer.add_file(urlutils.encoded_url(url),
+ item.fileobj.getvalue(), mime, encode)
item.fileobj.actual_close()
if self.pending_downloads:
return
@@ -369,7 +373,7 @@ class _Downloader():
log.downloads.debug("Oops! Download already gone: %s", item)
return
item.fileobj.actual_close()
- self.writer.add_file(url.toString(), b'')
+ self.writer.add_file(ulrutils.encoded_url(url), b'')
if self.pending_downloads:
return
self.finish_file()
diff --git a/qutebrowser/utils/urlutils.py b/qutebrowser/utils/urlutils.py
index e372dc65f..01c366c25 100644
--- a/qutebrowser/utils/urlutils.py
+++ b/qutebrowser/utils/urlutils.py
@@ -438,6 +438,15 @@ def same_domain(url1, url2):
return domain1 == domain2
+def encoded_url(url):
+ """Return the fully encoded url as string.
+
+ Args:
+ url: The url to encode as QUrl.
+ """
+ return bytes(url.toEncoded()).decode('ascii')
+
+
class IncDecError(Exception):
"""Exception raised by incdec_number on problems.
diff --git a/tests/unit/utils/test_urlutils.py b/tests/unit/utils/test_urlutils.py
index 4a19df689..5496a019b 100644
--- a/tests/unit/utils/test_urlutils.py
+++ b/tests/unit/utils/test_urlutils.py
@@ -527,6 +527,19 @@ def test_same_domain_invalid_url(url1, url2):
with pytest.raises(urlutils.InvalidUrlError):
urlutils.same_domain(QUrl(url1), QUrl(url2))
+
+@pytest.mark.parametrize('url, expected', [
+ ('http://example.com', 'http://example.com'),
+ ('http://ünicode.com', 'http://xn--nicode-2ya.com'),
+ ('http://foo.bar/?header=text/pläin',
+ 'http://foo.bar/?header=text/pl%C3%A4in'),
+])
+def test_encoded_url(url, expected):
+ """Test encoded_url"""
+ url = QUrl(url)
+ assert urlutils.encoded_url(url) == expected
+
+
class TestIncDecNumber:
"""Tests for urlutils.incdec_number()."""