Handle non-ASCII in headers/url better
This commit is contained in:
parent
8bb887ddab
commit
ae8a9b8798
@ -33,7 +33,7 @@ import email.mime.multipart
|
|||||||
from PyQt5.QtCore import QUrl
|
from PyQt5.QtCore import QUrl
|
||||||
|
|
||||||
from qutebrowser.browser import webelem
|
from qutebrowser.browser import webelem
|
||||||
from qutebrowser.utils import log, objreg, message, usertypes
|
from qutebrowser.utils import log, objreg, message, usertypes, utils, urlutils
|
||||||
|
|
||||||
try:
|
try:
|
||||||
import cssutils
|
import cssutils
|
||||||
@ -237,7 +237,7 @@ class _Downloader():
|
|||||||
|
|
||||||
self.writer = MHTMLWriter(
|
self.writer = MHTMLWriter(
|
||||||
web_frame.toHtml().encode('utf-8'),
|
web_frame.toHtml().encode('utf-8'),
|
||||||
content_location=web_url.toString(),
|
content_location=urlutils.encoded_url(web_url),
|
||||||
# I've found no way of getting the content type of a QWebView, but
|
# I've found no way of getting the content type of a QWebView, but
|
||||||
# since we're using .toHtml, it's probably safe to say that the
|
# since we're using .toHtml, it's probably safe to say that the
|
||||||
# content-type is HTML
|
# content-type is HTML
|
||||||
@ -347,8 +347,12 @@ class _Downloader():
|
|||||||
self.fetch_url(absolute_url)
|
self.fetch_url(absolute_url)
|
||||||
|
|
||||||
encode = E_QUOPRI if mime.startswith('text/') else E_BASE64
|
encode = E_QUOPRI if mime.startswith('text/') else E_BASE64
|
||||||
self.writer.add_file(url.toString(), item.fileobj.getvalue(), mime,
|
# Our MHTML handler refuses non-ASCII headers. This will replace every
|
||||||
encode)
|
# non-ASCII char with '?'. This is probably okay, as official Content-
|
||||||
|
# Type headers contain ASCII only anyway. Anything else is madness.
|
||||||
|
mime = utils.force_encoding(mime, 'ascii')
|
||||||
|
self.writer.add_file(urlutils.encoded_url(url),
|
||||||
|
item.fileobj.getvalue(), mime, encode)
|
||||||
item.fileobj.actual_close()
|
item.fileobj.actual_close()
|
||||||
if self.pending_downloads:
|
if self.pending_downloads:
|
||||||
return
|
return
|
||||||
@ -369,7 +373,7 @@ class _Downloader():
|
|||||||
log.downloads.debug("Oops! Download already gone: %s", item)
|
log.downloads.debug("Oops! Download already gone: %s", item)
|
||||||
return
|
return
|
||||||
item.fileobj.actual_close()
|
item.fileobj.actual_close()
|
||||||
self.writer.add_file(url.toString(), b'')
|
self.writer.add_file(ulrutils.encoded_url(url), b'')
|
||||||
if self.pending_downloads:
|
if self.pending_downloads:
|
||||||
return
|
return
|
||||||
self.finish_file()
|
self.finish_file()
|
||||||
|
@ -438,6 +438,15 @@ def same_domain(url1, url2):
|
|||||||
return domain1 == domain2
|
return domain1 == domain2
|
||||||
|
|
||||||
|
|
||||||
|
def encoded_url(url):
|
||||||
|
"""Return the fully encoded url as string.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
url: The url to encode as QUrl.
|
||||||
|
"""
|
||||||
|
return bytes(url.toEncoded()).decode('ascii')
|
||||||
|
|
||||||
|
|
||||||
class IncDecError(Exception):
|
class IncDecError(Exception):
|
||||||
|
|
||||||
"""Exception raised by incdec_number on problems.
|
"""Exception raised by incdec_number on problems.
|
||||||
|
@ -527,6 +527,19 @@ def test_same_domain_invalid_url(url1, url2):
|
|||||||
with pytest.raises(urlutils.InvalidUrlError):
|
with pytest.raises(urlutils.InvalidUrlError):
|
||||||
urlutils.same_domain(QUrl(url1), QUrl(url2))
|
urlutils.same_domain(QUrl(url1), QUrl(url2))
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.parametrize('url, expected', [
|
||||||
|
('http://example.com', 'http://example.com'),
|
||||||
|
('http://ünicode.com', 'http://xn--nicode-2ya.com'),
|
||||||
|
('http://foo.bar/?header=text/pläin',
|
||||||
|
'http://foo.bar/?header=text/pl%C3%A4in'),
|
||||||
|
])
|
||||||
|
def test_encoded_url(url, expected):
|
||||||
|
"""Test encoded_url"""
|
||||||
|
url = QUrl(url)
|
||||||
|
assert urlutils.encoded_url(url) == expected
|
||||||
|
|
||||||
|
|
||||||
class TestIncDecNumber:
|
class TestIncDecNumber:
|
||||||
|
|
||||||
"""Tests for urlutils.incdec_number()."""
|
"""Tests for urlutils.incdec_number()."""
|
||||||
|
Loading…
Reference in New Issue
Block a user