From 729cc61152335674b970e641c9afd4f168f9cf05 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Maciej=20Wo=C5=82czyk?= Date: Tue, 29 Nov 2016 04:03:35 +0100 Subject: [PATCH 1/3] Fixed mhtml quopri encoding with spaces and tabs (see: #2063) --- qutebrowser/browser/webkit/mhtml.py | 19 +- .../data/downloads/mhtml/complex/complex.mht | 298 ++++++++---------- .../data/downloads/mhtml/simple/simple.mht | 15 +- 3 files changed, 164 insertions(+), 168 deletions(-) diff --git a/qutebrowser/browser/webkit/mhtml.py b/qutebrowser/browser/webkit/mhtml.py index 237898809..2e828dd3b 100644 --- a/qutebrowser/browser/webkit/mhtml.py +++ b/qutebrowser/browser/webkit/mhtml.py @@ -32,6 +32,7 @@ import email.generator import email.encoders import email.mime.multipart import email.message +from quopri import encodestring from PyQt5.QtCore import QUrl @@ -138,6 +139,22 @@ def _check_rel(element): return any(rel in rels for rel in must_have) +def _encode_quopri_mhtml(msg): + """Encode the message's payload in quoted-printable. + + Substitute for quopri's default 'encode_quopri' method, which needlessly + encodes all spaces and tabs, instead of only those at the end on the + line. + + Args: + msg: Email message to quote. + """ + orig = msg.get_payload(decode=True) + encdata = encodestring(orig, quotetabs=False) + msg.set_payload(encdata) + msg['Content-Transfer-Encoding'] = 'quoted-printable' + + MHTMLPolicy = email.policy.default.clone(linesep='\r\n', max_line_length=0) @@ -146,7 +163,7 @@ E_BASE64 = email.encoders.encode_base64 # Encode the file using MIME quoted-printable encoding. -E_QUOPRI = email.encoders.encode_quopri +E_QUOPRI = _encode_quopri_mhtml class MHTMLWriter: diff --git a/tests/end2end/data/downloads/mhtml/complex/complex.mht b/tests/end2end/data/downloads/mhtml/complex/complex.mht index d7988cb63..f4cfb4649 100644 --- a/tests/end2end/data/downloads/mhtml/complex/complex.mht +++ b/tests/end2end/data/downloads/mhtml/complex/complex.mht @@ -8,143 +8,125 @@ Content-Type: text/html; charset="UTF-8" Content-Transfer-Encoding: quoted-printable -=20=20=20=20=20=20=20=20 -=20=20=20=20=20=20=20=20qutebrowser=20mhtml=20test -=20=20=20=20 -=20=20=20=20=20=20=20=20 -=20=20=20=20=20=20=20=20 -=20=20=20=20=20=20=20=20 -=20=20=20=20=20=20=20=20 -=20=20=20=20=20=20=20=20 -=20=20=20=20=20=20=20=20 -=20=20=20=20=20=20=20=20 -=20=20=20=20=20=20=20=20 -=20=20=20=20=20=20=20=20@import=20"actually-it's-css"; -=20=20=20=20=20=20=20=20 -=20=20=20=20=20=20=20=20 -=20=20=20=20=20=20=20=20 -=20=20=20=20=20=20=20=20 -=20=20=20=20=20=20=20=20 -=20=20=20=20=20=20=20=20 -=20=20=20=20=20=20=20=20 -=20=20=20=20=20=20=20=20 -=20=20=20=20=20=20=20=20 -=20=20=20=20=20=20=20=20 -=20=20=20=20=20=20=20=20 -=20=20=20=20=20=20=20=20=20 -=20=20=20=20=20=20=20=20 -=20=20=20=20 -=20=20=20=20 -=20=20=20=20=20=20=20=20 -=20=20=20=20=20=20=20=20 -=20=20=20=20=20=20=20=20 -=20=20=20=20=20=20=20=20

Welcome=20to=20the=20qutebrowser=20mhtml=20test= -=20page

-=20=20=20=20=20=20=20=20 -=20=20=20=20=20=20=20=20 -=20=20=20=20=20=20=20=20...that=20the=20word=20qutebrowser=20is=20= -a=20word=20play=20on=20Qt,=20the -=20=20=20=20=20=20=20=20framework=20the=20browser=20is=20built=20with? -=20=20=20=20=20=20=20=20 -=20=20=20=20=20=20=20=20 -=20=20=20=20=20=20=20=20

What=20is=20this=20page?

-=20=20=20=20=20=20=20=20 -=20=20=20=20=20=20=20=20

This=20page=20is=20a=20test-case=20for=20the=20m= -html=20download=20feature=20of -=20=20=20=20=20=20=20=20qutebrowser.=20Under=20normal=20circumstances,=20yo= -u=20won't=20see=20this=20page,=20except -=20=20=20=20=20=20=20=20if=20you're=20a=20qutebrowser=20developer=20or<= -/em>=20you're=20attending=20one=20of -=20=20=20=20=20=20=20=20The-Compiler's=20pytest=20demos.

-=20=20=20=20=20=20=20=20 -=20=20=20=20=20=20=20=20 -=20=20=20=20=20=20=20=20...that=20this=20page=20was=20once=20a=20monstrosit= -y=20with=20"this=20weird=20pixelated -=20=20=20=20=20=20=20=20globe=20with=20the=20geocities-like=20background"?=20You=20can=20find=20the=20old -=20=20=20=20=20=20=20=20page=20in=20the=20old=20commits=20and=20indeed,=20i= -t=20was=20quite=20atrocious.=20But=20hey, -=20=20=20=20=20=20=20=20every=20browser=20needs=20a=20globe... -=20=20=20=20=20=20=20=20 -=20=20=20=20=20=20=20=20 -=20=20=20=20=20=20=20=20

This=20page=20references=20other=20assets=20and= -=20when=20the=20page=20is=20downloaded, -=20=20=20=20=20=20=20=20qutebrowser=20checks=20if=20each=20asset=20was=20do= -wnloaded.=20If=20some=20assets=20are -=20=20=20=20=20=20=20=20missing,=20the=20test=20fails=20and=20the=20poor=20= -developers=20have=20to=20search=20for=20the -=20=20=20=20=20=20=20=20error.

-=20=20=20=20=20=20=20=20 -=20=20=20=20=20=20=20=20

Can=20I=20contribute=20to=20qutebrowser?

-=20=20=20=20=20=20=20=20 -=20=20=20=20=20=20=20=20

Yes!

-=20=20=20=20=20=20=20=20 -=20=20=20=20=20=20=20=20 -=20=20=20=20=20=20=20=20...that=20qutebrowser=20is=20free=20software?=20Fre= -e=20as=20in=20free=20beer=20and -=20=20=20=20=20=20=20=20free=20speech!=20Isn't=20that=20great? -=20=20=20=20=20=20=20=20 -=20=20=20=20=20=20=20=20 -=20=20=20=20=20=20=20=20

...and=20how?

-=20=20=20=20=20=20=20=20 -=20=20=20=20=20=20=20=20

See=20 -=20=20=20=20=20=20=20=20here=20for=20more=20information.

-=20=20=20=20=20=20=20=20 -=20=20=20=20=20=20=20=20

More=20useless=20trivia!

-=20=20=20=20=20=20=20=20=20=20=20=20=20=20=20=20 -=20=20=20=20=20=20=20=20 -=20=20=20=20=20=20=20=20...that=20the=20font=20in=20the=20header=20is=20Com= -ic=20Sans? -=20=20=20=20=20=20=20=20 -=20=20=20=20=20=20=20=20 -=20=20=20=20=20=20=20=20 -=20=20=20=20=20=20=20=20...the=20IRC=20channel=20for=20qutebrowser=20is=20<= -code>#qutebrowser=20on -=20=20=20=20=20=20=20=20irc.freenode.net -=20=20=20=20=20=20=20=20 -=20=20=20=20=20=20=20=20 -=20=20=20=20=20=20=20=20 -=20=20=20=20=20=20=20=20...the=20area=20of=20a=20circle=20is=20=CF=80*r2? -=20=20=20=20=20=20=20=20 -=20=20=20=20=20=20=20=20 -=20=20=20=20=20=20=20=20

To=20make=20this=20page=20a=20bit=20useful,=20I'= -ve=20included=20a=20chessboard,=20so=20you -=20=20=20=20=20=20=20=20can=20play=20chess.=20Just=20turn=20your=20screen= -=2090=20degrees,=20such=20that=20it=20forms=20a -=20=20=20=20=20=20=20=20flat,=20horizontal=20surface=20(you=20can=20skip=20= -this=20step=20if=20you're=20using=20a -=20=20=20=20=20=20=20=20tablet).=20Next,=20zoom=20the=20page=20until=20it= -=20fits=20your=20needs.=20Enjoy=20your=20round -=20=20=20=20=20=20=20=20of=20chess!

-=20=20=20=20=20=20=20=20 -=20=20=20=20=20=20=20=20 -=20=20=20=20 + + qutebrowser mhtml test + =20 + + + =20 + + + =20 + + + =20 + + + =20 + + + =20 + + + =20 + + + + + + + =20 +

Welcome to the qutebrowser mhtml test page

+ =20 +
+ ...that the word qutebrowser is a word play on Qt, the + framework the browser is built with? +
+ =20 +

What is this page?

+ =20 +

This page is a test-case for the mhtml download feature of + qutebrowser. Under normal circumstances, you won't see this page, e= +xcept + if you're a qutebrowser developer or you're attending one = +of + The-Compiler's pytest demos.

+ =20 +
+ ...that this page was once a monstrosity with "this weird pixel= +ated + globe with the geocities-like background"? You can find the old + page in the old commits and indeed, it was quite atrocious. But hey, + every browser needs a globe... +
+ =20 +

This page references other assets and when the page is downloade= +d, + qutebrowser checks if each asset was downloaded. If some assets are + missing, the test fails and the poor developers have to search for = +the + error.

+ =20 +

Can I contribute to qutebrowser?

+ =20 +

Yes!

+ =20 +
+ ...that qutebrowser is free software? Free as in free beer= + and + free speech! Isn't that great? +
+ =20 +

...and how?

+ =20 +

See + here for more information.

+ =20 +

More useless trivia!

+ =20 +
+ ...that the font in the header is Comic Sans? +
+ =20 +
+ ...the IRC channel for qutebrowser is #qutebrowser on + irc.freenode.net +
+ =20 +
+ ...the area of a circle is =CF=80*r2? +
+ =20 +

To make this page a bit useful, I've included a chessboard, so y= +ou + can play chess. Just turn your screen 90 degrees, such that it form= +s a + flat, horizontal surface (you can skip this step if you're using a + tablet). Next, zoom the page until it fits your needs. Enjoy your r= +ound + of chess!

+ +
+ =20 -----=_qute-5314618b-e51d-46e1-9598-103536e86b59 @@ -709,35 +691,33 @@ MIME-Version: 1.0 Content-Type: text/css; charset=utf-8 Content-Transfer-Encoding: quoted-printable -@import=20'external-in-extern.css'; -/*=20We=20want=20to=20make=20sure=20that=20assets=20referenced=20in=20exter= -nal=20css=20files=20are -=20*=20properly=20included -=20*/ -div.dyk=20{ -=20=20=20=20/*=20Did=20you=20know?=20*/ -=20=20=20=20background-image:=20url('DYK.png'); -=20=20=20=20background-repeat:=20no-repeat; -=20=20=20=20/*=20Image=20is=20128px=20wide=20*/ -=20=20=20=20min-height:=20128px; -=20=20=20=20padding-left:=20148px; -=20=20=20=20margin-top:=2010px; -=20=20=20=20margin-bottom:=2010px; -=20=20=20=20border:=202px=20solid=20#474747; -=20=20=20=20border-radius:=2064px; +@import 'external-in-extern.css'; +/* We want to make sure that assets referenced in external css files are + * properly included + */ +div.dyk { + /* Did you know? */ + background-image: url('DYK.png'); + background-repeat: no-repeat; + /* Image is 128px wide */ + min-height: 128px; + padding-left: 148px; + margin-top: 10px; + margin-bottom: 10px; + border: 2px solid #474747; + border-radius: 64px; } -=20=20=20=20 + =20 -----=_qute-5314618b-e51d-46e1-9598-103536e86b59 Content-Location: http://localhost:1337/data/downloads/mhtml/complex/external-in-extern.css MIME-Version: 1.0 Content-Type: text/css; charset=utf-8 Content-Transfer-Encoding: quoted-printable -/*=20Just=20making=20sure=20that=20more=20than=20one=20level=20of=20externa= -l=20css=20is=20included=20*/ -h1,=20h2,=20h3,=20h4,=20h5,=20h6=20{ -=20=20=20=20color:=20#0A396E; -=20=20=20=20border-bottom:=201px=20dotted=20#474747; +/* Just making sure that more than one level of external css is included */ +h1, h2, h3, h4, h5, h6 { + color: #0A396E; + border-bottom: 1px dotted #474747; } -----=_qute-5314618b-e51d-46e1-9598-103536e86b59 Content-Location: http://localhost:1337/data/downloads/mhtml/complex/favicon.png diff --git a/tests/end2end/data/downloads/mhtml/simple/simple.mht b/tests/end2end/data/downloads/mhtml/simple/simple.mht index d0b7a7c48..ef4431362 100644 --- a/tests/end2end/data/downloads/mhtml/simple/simple.mht +++ b/tests/end2end/data/downloads/mhtml/simple/simple.mht @@ -7,14 +7,13 @@ MIME-Version: 1.0 Content-Type: text/html; charset="UTF-8" Content-Transfer-Encoding: quoted-printable - -=20=20=20=20=20=20=20=20 -=20=20=20=20=20=20=20=20Simple=20MHTML=20test -=20=20=20=20 -=20=20=20=20 -=20=20=20=20=20=20=20=20normal=20link=20to=20another=20page= - -=20=20=20=20 + + + Simple MHTML test + + + normal link to another page + =20 -----=_qute-6d584056-b1e4-4882-91e6-d4a6d23adb67-- From 2cbea5015246b7d4f05bace313ff71ccd83f8cee Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Maciej=20Wo=C5=82czyk?= Date: Tue, 29 Nov 2016 19:10:36 +0100 Subject: [PATCH 2/3] Fixed style issues, corrected unit test --- qutebrowser/browser/webkit/mhtml.py | 10 +++++----- tests/unit/browser/webkit/test_mhtml.py | 22 +++++++++++----------- 2 files changed, 16 insertions(+), 16 deletions(-) diff --git a/qutebrowser/browser/webkit/mhtml.py b/qutebrowser/browser/webkit/mhtml.py index 2e828dd3b..fbfeafef4 100644 --- a/qutebrowser/browser/webkit/mhtml.py +++ b/qutebrowser/browser/webkit/mhtml.py @@ -32,7 +32,7 @@ import email.generator import email.encoders import email.mime.multipart import email.message -from quopri import encodestring +import quopri from PyQt5.QtCore import QUrl @@ -142,15 +142,15 @@ def _check_rel(element): def _encode_quopri_mhtml(msg): """Encode the message's payload in quoted-printable. - Substitute for quopri's default 'encode_quopri' method, which needlessly - encodes all spaces and tabs, instead of only those at the end on the - line. + Substitute for quopri's default 'encode_quopri' method, which needlessly + encodes all spaces and tabs, instead of only those at the end on the + line. Args: msg: Email message to quote. """ orig = msg.get_payload(decode=True) - encdata = encodestring(orig, quotetabs=False) + encdata = quopri.encodestring(orig, quotetabs=False) msg.set_payload(encdata) msg['Content-Transfer-Encoding'] = 'quoted-printable' diff --git a/tests/unit/browser/webkit/test_mhtml.py b/tests/unit/browser/webkit/test_mhtml.py index 3b33857e9..fd642b46f 100644 --- a/tests/unit/browser/webkit/test_mhtml.py +++ b/tests/unit/browser/webkit/test_mhtml.py @@ -87,7 +87,7 @@ def test_quoted_printable_umlauts(checker): Content-Type: text/plain Content-Transfer-Encoding: quoted-printable - Die=20s=FC=DFe=20H=FCndin=20l=E4uft=20in=20die=20H=F6hle=20des=20B=E4ren + Die s=FC=DFe H=FCndin l=E4uft in die H=F6hle des B=E4ren -----=_qute-UUID-- """) @@ -128,7 +128,7 @@ def test_file_encoded_as_base64(checker): Content-Type: text/plain Content-Transfer-Encoding: quoted-printable - Image=20file=20attached + Image file attached -----=_qute-UUID Content-Location: http://a.example.com/image.png MIME-Version: 1.0 @@ -175,56 +175,56 @@ def test_files_appear_sorted(checker): Content-Type: text/plain Content-Transfer-Encoding: quoted-printable - root=20file + root file -----=_qute-UUID Content-Location: http://a.example.com/ MIME-Version: 1.0 Content-Type: text/plain Content-Transfer-Encoding: quoted-printable - file=20a + file a -----=_qute-UUID Content-Location: http://b.example.com/ MIME-Version: 1.0 Content-Type: text/plain Content-Transfer-Encoding: quoted-printable - file=20b + file b -----=_qute-UUID Content-Location: http://g.example.com/ MIME-Version: 1.0 Content-Type: text/plain Content-Transfer-Encoding: quoted-printable - file=20g + file g -----=_qute-UUID Content-Location: http://h.example.com/ MIME-Version: 1.0 Content-Type: text/plain Content-Transfer-Encoding: quoted-printable - file=20h + file h -----=_qute-UUID Content-Location: http://i.example.com/ MIME-Version: 1.0 Content-Type: text/plain Content-Transfer-Encoding: quoted-printable - file=20i + file i -----=_qute-UUID Content-Location: http://t.example.com/ MIME-Version: 1.0 Content-Type: text/plain Content-Transfer-Encoding: quoted-printable - file=20t + file t -----=_qute-UUID Content-Location: http://z.example.com/ MIME-Version: 1.0 Content-Type: text/plain Content-Transfer-Encoding: quoted-printable - file=20z + file z -----=_qute-UUID-- """) @@ -251,7 +251,7 @@ def test_empty_content_type(checker): Content-Location: http://example.com/file Content-Transfer-Encoding: quoted-printable - file=20content + file content -----=_qute-UUID-- """) From f6802272c7c50574cfddd9c23511179299fcfa8e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Maciej=20Wo=C5=82czyk?= Date: Thu, 1 Dec 2016 01:17:00 +0100 Subject: [PATCH 3/3] Added a test for quopri with spaces at the end of the line --- tests/unit/browser/webkit/test_mhtml.py | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/tests/unit/browser/webkit/test_mhtml.py b/tests/unit/browser/webkit/test_mhtml.py index fd642b46f..e5fdf4ffd 100644 --- a/tests/unit/browser/webkit/test_mhtml.py +++ b/tests/unit/browser/webkit/test_mhtml.py @@ -283,6 +283,28 @@ def test_css_url_scanner(monkeypatch, has_cssutils, inline, style, assert urls == expected_urls +def test_quoted_printable_spaces(checker): + content = b' ' * 100 + writer = mhtml.MHTMLWriter(root_content=content, + content_location='localhost', + content_type='text/plain') + writer.write_to(checker.fp) + checker.expect(""" + Content-Type: multipart/related; boundary="---=_qute-UUID" + MIME-Version: 1.0 + + -----=_qute-UUID + Content-Location: localhost + MIME-Version: 1.0 + Content-Type: text/plain + Content-Transfer-Encoding: quoted-printable + + {}= + {}=20 + -----=_qute-UUID-- + """.format(' ' * 75, ' ' * 24)) + + class TestNoCloseBytesIO: def test_fake_close(self):