Add tests for mhtml

This also makes the output of MHTMLWriter deterministic, by 1) Setting the boundary at object creation, allowing uuid.uuid4 to be monkey patched 2) Outputting the files in sorted order (sorted by location), as python dicts are unordered by default.
2015-09-24 17:56:33 +02:00 · 2015-09-24 17:56:33 +02:00 · bf90c8c06b
commit bf90c8c06b
parent 5fcbc839bb
2 changed files with 271 additions and 4 deletions
--- a/qutebrowser/misc/mhtml.py
+++ b/qutebrowser/misc/mhtml.py
@ -87,8 +87,6 @@ class MHTMLWriter():
        _files: Mapping of location->_File struct.
    """

-    BOUNDARY = "---=_qute-" + str(uuid.uuid4())
-
    def __init__(self, root_content, content_location, content_type):
        self.root_content = root_content
        self.content_location = content_location
@ -125,12 +123,13 @@ class MHTMLWriter():
        Args:
            fp: The file-object, openend in "wb" mode.
        """
-        msg = multipart.MIMEMultipart("related", self.BOUNDARY)
+        msg = multipart.MIMEMultipart("related",
+                                      "---=_qute-{}".format(uuid.uuid4()))

        root = self._create_root_file()
        msg.attach(root)

-        for file_data in self._files.values():
+        for _, file_data in sorted(self._files.items()):
            msg.attach(self._create_file(file_data))

        gen = generator.BytesGenerator(fp, policy=MHTMLPolicy)
--- a/tests/unit/misc/test_mhtml.py
+++ b/tests/unit/misc/test_mhtml.py
@ -0,0 +1,268 @@
+# vim: ft=python fileencoding=utf-8 sts=4 sw=4 et:
+import io
+import textwrap
+import re
+
+import pytest
+
+from qutebrowser.misc import mhtml
+
+@pytest.fixture(autouse=True)
+def patch_uuid(monkeypatch):
+    monkeypatch.setattr("uuid.uuid4", lambda: "UUID")
+
+
+class Checker:
+
+    """A helper to check mhtml output.
+
+    Attrs:
+        fp: A BytesIO object for passing to MHTMLWriter.write_to.
+    """
+
+    def __init__(self):
+        self.fp = io.BytesIO()
+
+    @property
+    def value(self):
+        return self.fp.getvalue()
+
+    def expect(self, expected):
+        actual = self.value.decode("ascii")
+        # Make sure there are no stray \r or \n
+        assert re.search(r"\r[^\n]", actual) is None
+        assert re.search(r"[^\r]\n", actual) is None
+        actual = actual.replace("\r\n", "\n")
+        expected = textwrap.dedent(expected).lstrip("\n")
+        assert expected == actual
+
+
+@pytest.fixture
+def checker():
+    return Checker()
+
+
+def test_quoted_printable_umlauts(checker):
+    content = "Die süße Hündin läuft in die Höhle des Bären"
+    content = content.encode("iso-8859-1")
+    writer = mhtml.MHTMLWriter(root_content=content,
+                               content_location="localhost",
+                               content_type="text/plain")
+    writer.write_to(checker.fp)
+    checker.expect("""
+    Content-Type: multipart/related; boundary="---=_qute-UUID"
+    MIME-Version: 1.0
+
+    -----=_qute-UUID
+    Content-Location: localhost
+    MIME-Version: 1.0
+    Content-Type: text/plain
+    Content-Transfer-Encoding: quoted-printable
+
+    Die=20s=FC=DFe=20H=FCndin=20l=E4uft=20in=20die=20H=F6hle=20des=20B=E4ren
+    -----=_qute-UUID--
+    """)
+
+
+@pytest.mark.parametrize("header, value", [
+    ("content_location", "http://brötli.com"),
+    ("content_type", "text/pläin"),
+])
+def test_refuses_non_ascii_header_value(checker, header, value):
+    defaults = {
+        "root_content": b"",
+        "content_location": "http://example.com",
+        "content_type": "text/plain",
+    }
+    defaults[header] = value
+    writer = mhtml.MHTMLWriter(**defaults)
+    with pytest.raises(UnicodeEncodeError) as e:
+        writer.write_to(checker.fp)
+        assert "'ascii' codec can't encode" in str(e)
+
+
+def test_file_encoded_as_base64(checker):
+    content = b"Image file attached"
+    writer = mhtml.MHTMLWriter(root_content=content, content_type="text/plain",
+                               content_location="http://example.com")
+    writer.add_file(location="http://a.example.com/image.png",
+                    content="\U0001F601 image data".encode("utf-8"),
+                    content_type="image/png",
+                    transfer_encoding=mhtml.E_BASE64)
+    writer.write_to(checker.fp)
+    checker.expect("""
+    Content-Type: multipart/related; boundary="---=_qute-UUID"
+    MIME-Version: 1.0
+
+    -----=_qute-UUID
+    Content-Location: http://example.com
+    MIME-Version: 1.0
+    Content-Type: text/plain
+    Content-Transfer-Encoding: quoted-printable
+
+    Image=20file=20attached
+    -----=_qute-UUID
+    Content-Location: http://a.example.com/image.png
+    MIME-Version: 1.0
+    Content-Type: image/png
+    Content-Transfer-Encoding: base64
+
+    8J+YgSBpbWFnZSBkYXRh
+
+    -----=_qute-UUID--
+    """)
+
+
+@pytest.mark.parametrize("transfer_encoding", [mhtml.E_BASE64, mhtml.E_QUOPRI],
+                         ids=["base64", "quoted-printable"])
+def test_payload_lines_wrap(checker, transfer_encoding):
+    payload = b"1234567890" * 10
+    writer = mhtml.MHTMLWriter(root_content=b"", content_type="text/plain",
+                               content_location="http://example.com")
+    writer.add_file(location="http://example.com/payload", content=payload,
+                    content_type="text/plain",
+                    transfer_encoding=transfer_encoding)
+    writer.write_to(checker.fp)
+    for line in checker.value.split(b"\r\n"):
+        assert len(line) < 77
+
+
+def test_files_appear_sorted(checker):
+    writer = mhtml.MHTMLWriter(root_content=b"root file",
+                               content_type="text/plain",
+                               content_location="http://www.example.com/")
+    for subdomain in "ahgbizt":
+        writer.add_file(location="http://{}.example.com/".format(subdomain),
+                        content="file {}".format(subdomain).encode("utf-8"),
+                        content_type="text/plain",
+                        transfer_encoding=mhtml.E_QUOPRI)
+    writer.write_to(checker.fp)
+    checker.expect("""
+    Content-Type: multipart/related; boundary="---=_qute-UUID"
+    MIME-Version: 1.0
+
+    -----=_qute-UUID
+    Content-Location: http://www.example.com/
+    MIME-Version: 1.0
+    Content-Type: text/plain
+    Content-Transfer-Encoding: quoted-printable
+
+    root=20file
+    -----=_qute-UUID
+    Content-Location: http://a.example.com/
+    MIME-Version: 1.0
+    Content-Type: text/plain
+    Content-Transfer-Encoding: quoted-printable
+
+    file=20a
+    -----=_qute-UUID
+    Content-Location: http://b.example.com/
+    MIME-Version: 1.0
+    Content-Type: text/plain
+    Content-Transfer-Encoding: quoted-printable
+
+    file=20b
+    -----=_qute-UUID
+    Content-Location: http://g.example.com/
+    MIME-Version: 1.0
+    Content-Type: text/plain
+    Content-Transfer-Encoding: quoted-printable
+
+    file=20g
+    -----=_qute-UUID
+    Content-Location: http://h.example.com/
+    MIME-Version: 1.0
+    Content-Type: text/plain
+    Content-Transfer-Encoding: quoted-printable
+
+    file=20h
+    -----=_qute-UUID
+    Content-Location: http://i.example.com/
+    MIME-Version: 1.0
+    Content-Type: text/plain
+    Content-Transfer-Encoding: quoted-printable
+
+    file=20i
+    -----=_qute-UUID
+    Content-Location: http://t.example.com/
+    MIME-Version: 1.0
+    Content-Type: text/plain
+    Content-Transfer-Encoding: quoted-printable
+
+    file=20t
+    -----=_qute-UUID
+    Content-Location: http://z.example.com/
+    MIME-Version: 1.0
+    Content-Type: text/plain
+    Content-Transfer-Encoding: quoted-printable
+
+    file=20z
+    -----=_qute-UUID--
+    """)
+
+
+def test_empty_content_type(checker):
+    writer = mhtml.MHTMLWriter(root_content=b"",
+                               content_location="http://example.com/",
+                               content_type="text/plain")
+    writer.add_file("http://example.com/file", b"file content")
+    writer.write_to(checker.fp)
+    checker.expect("""
+    Content-Type: multipart/related; boundary="---=_qute-UUID"
+    MIME-Version: 1.0
+
+    -----=_qute-UUID
+    Content-Location: http://example.com/
+    MIME-Version: 1.0
+    Content-Type: text/plain
+    Content-Transfer-Encoding: quoted-printable
+
+
+    -----=_qute-UUID
+    MIME-Version: 1.0
+    Content-Location: http://example.com/file
+    Content-Transfer-Encoding: quoted-printable
+
+    file=20content
+    -----=_qute-UUID--
+    """)
+
+
+def test_removing_file_from_mhtml(checker):
+    writer = mhtml.MHTMLWriter(root_content=b"root",
+                               content_location="http://example.com/",
+                               content_type="text/plain")
+    writer.add_file("http://evil.com/", b"file content")
+    writer.remove_file("http://evil.com/")
+    writer.write_to(checker.fp)
+    checker.expect("""
+    Content-Type: multipart/related; boundary="---=_qute-UUID"
+    MIME-Version: 1.0
+
+    -----=_qute-UUID
+    Content-Location: http://example.com/
+    MIME-Version: 1.0
+    Content-Type: text/plain
+    Content-Transfer-Encoding: quoted-printable
+
+    root
+    -----=_qute-UUID--
+    """)
+
+
+@pytest.mark.parametrize("style, expected_urls", [
+    ("@import 'default.css'", ["default.css"]),
+    ('@import "default.css"', ["default.css"]),
+    ("@import \t 'tabbed.css'", ["tabbed.css"]),
+    ("@import url('default.css')", ["default.css"]),
+    ("""body {
+    background: url("/bg-img.png")
+    }""", ["/bg-img.png"]),
+    ("background: url(folder/file.png)", ["folder/file.png"]),
+    ("content: url()", []),
+])
+def test_css_url_scanner(style, expected_urls):
+    expected_urls.sort()
+    urls = mhtml._get_css_imports(style)
+    urls.sort()
+    assert urls == expected_urls