diff --git a/qutebrowser/misc/mhtml.py b/qutebrowser/misc/mhtml.py
index 3afc32c02..795b102e6 100644
--- a/qutebrowser/misc/mhtml.py
+++ b/qutebrowser/misc/mhtml.py
@@ -20,11 +20,13 @@
"""Utils for writing a MHTML file."""
import functools
+import quopri
import io
from collections import namedtuple
from base64 import b64encode
from urllib.parse import urljoin
+from uuid import uuid4
from PyQt5.QtCore import QUrl
from PyQt5.QtNetwork import QNetworkRequest, QNetworkReply
@@ -36,18 +38,44 @@ _File = namedtuple("_File",
"content content_type content_location transfer_encoding")
+def _chunked_base64(data, maxlen=76, linesep=b"\r\n"):
+ """Just like b64encode, except that it breaks long lines.
+
+ Args:
+ maxlen: Maximum length of a line, not including the line separator.
+ linesep: Line separator to use as bytes.
+ """
+ encoded = b64encode(data)
+ result = []
+ for i in range(0, len(encoded), maxlen):
+ result.append(encoded[i:i+maxlen])
+ return linesep.join(result)
+
+def _rn_quopri(data):
+ """Return a quoted-printable representation of data."""
+ orig_funcs = (quopri.b2a_qp, quopri.a2b_qp)
+ # Workaround for quopri mixing \n and \r\n
+ quopri.b2a_qp = quopri.a2b_qp = None
+ encoded = quopri.encodestring(data)
+ quopri.b2a_qp, quopri.a2b_qp = orig_funcs
+ return encoded.replace(b"\n", b"\r\n")
+
+
E_NONE = (None, lambda x: x)
"""No transfer encoding, copy the bytes from input to output"""
-E_BASE64 = ("BASE64", b64encode)
+E_BASE64 = ("base64", _chunked_base64)
"""Encode the file using base64 encoding"""
+E_QUOPRI = ("quoted-printable", _rn_quopri)
+"""Encode the file using MIME quoted-printable encoding."""
+
class MHTMLWriter(object):
"""A class for aggregating multiple files and outputting them to a MHTML
file."""
- BOUNDARY = b"qute-mhtml"
+ BOUNDARY = b"---qute-mhtml-" + str(uuid4()).encode("ascii")
def __init__(self, root_content=None, content_location=None,
content_type=None):
@@ -90,6 +118,9 @@ class MHTMLWriter(object):
self._output_root_file(fp)
for file_data in self._files.values():
self._output_file(fp, file_data)
+ fp.write(b"\r\n--")
+ fp.write(self.BOUNDARY)
+ fp.write(b"--")
def _output_header(self, fp):
if self.content_location is None:
@@ -99,34 +130,34 @@ class MHTMLWriter(object):
fp.write(b"Content-Location: ")
fp.write(self.content_location.encode("utf-8"))
- fp.write(b'\nContent-Type: multipart/related;boundary="')
+ fp.write(b'\r\nContent-Type: multipart/related;boundary="')
fp.write(self.BOUNDARY)
fp.write(b'";type="')
fp.write(self.content_type.encode("utf-8"))
- fp.write(b'"\n\n')
+ fp.write(b'"\r\n\r\n')
def _output_root_file(self, fp):
root_file = _File(
content=self.root_content, content_type=self.content_type,
- content_location=self.content_location, transfer_encoding=E_BASE64
+ content_location=self.content_location, transfer_encoding=E_QUOPRI,
)
self._output_file(fp, root_file)
def _output_file(self, fp, file_struct):
fp.write(b"--")
fp.write(self.BOUNDARY)
- fp.write(b"\nContent-Location: ")
+ fp.write(b"\r\nContent-Location: ")
fp.write(file_struct.content_location.encode("utf-8"))
if file_struct.content_type is not None:
- fp.write(b"\nContent-Type: ")
+ fp.write(b"\r\nContent-Type: ")
fp.write(file_struct.content_type.encode("utf-8"))
encoding_name, encoding_func = file_struct.transfer_encoding
if encoding_name:
- fp.write(b"\nContent-Transfer-Encoding: ")
+ fp.write(b"\r\nContent-Transfer-Encoding: ")
fp.write(encoding_name.encode("utf-8"))
- fp.write(b"\n\n")
+ fp.write(b"\r\n\r\n")
fp.write(encoding_func(file_struct.content))
- fp.write(b"\n\n")
+ fp.write(b"\r\n\r\n")
def start_download(dest):
@@ -163,13 +194,15 @@ def start_download(dest):
pending_downloads.remove(item)
mime = item.raw_headers.get(b"Content-Type", b"")
mime = mime.decode("ascii", "ignore")
- writer.add_file(name, item.fileobj.getvalue(), mime)
+ encode = E_QUOPRI if mime.startswith("text/") else E_BASE64
+ writer.add_file(name, item.fileobj.getvalue(), mime, encode)
if pending_downloads:
return
finish_file()
- def error(item, *args):
+ def error(name, item, *args):
pending_downloads.remove(item)
+ writer.add_file(name, b"")
if pending_downloads:
return
finish_file()
@@ -190,7 +223,6 @@ def start_download(dest):
# Might be a local