String quote style changes
"" for user facing strings '' for internal strings except when quotes appear inside a string, to avoid escaping them
This commit is contained in:
parent
9bf9124324
commit
a092ef1fe6
@ -35,9 +35,9 @@ from qutebrowser.browser import webelem
|
||||
from qutebrowser.utils import log, objreg, message
|
||||
|
||||
|
||||
_File = collections.namedtuple("_File",
|
||||
["content", "content_type", "content_location",
|
||||
"transfer_encoding"])
|
||||
_File = collections.namedtuple('_File',
|
||||
['content', 'content_type', 'content_location',
|
||||
'transfer_encoding'])
|
||||
|
||||
|
||||
_CSS_URL_PATTERNS = [re.compile(x) for x in [
|
||||
@ -51,9 +51,7 @@ _CSS_URL_PATTERNS = [re.compile(x) for x in [
|
||||
|
||||
def _get_css_imports(data):
|
||||
"""Return all assets that are referenced in the given CSS document.
|
||||
|
||||
The returned URLs are relative to the stylesheet's URL.
|
||||
|
||||
Args:
|
||||
data: The content of the stylesheet to scan as string.
|
||||
"""
|
||||
@ -66,7 +64,7 @@ def _get_css_imports(data):
|
||||
return urls
|
||||
|
||||
|
||||
MHTMLPolicy = policy.default.clone(linesep="\r\n", max_line_length=0)
|
||||
MHTMLPolicy = policy.default.clone(linesep='\r\n', max_line_length=0)
|
||||
|
||||
|
||||
E_BASE64 = encoders.encode_base64
|
||||
@ -123,8 +121,8 @@ class MHTMLWriter():
|
||||
Args:
|
||||
fp: The file-object, openend in "wb" mode.
|
||||
"""
|
||||
msg = multipart.MIMEMultipart("related",
|
||||
"---=_qute-{}".format(uuid.uuid4()))
|
||||
msg = multipart.MIMEMultipart('related',
|
||||
'---=_qute-{}'.format(uuid.uuid4()))
|
||||
|
||||
root = self._create_root_file()
|
||||
msg.attach(root)
|
||||
@ -146,9 +144,9 @@ class MHTMLWriter():
|
||||
def _create_file(self, f):
|
||||
"""Return the single given file as MIMEMultipart."""
|
||||
msg = multipart.MIMEMultipart()
|
||||
msg["Content-Location"] = f.content_location
|
||||
msg['Content-Location'] = f.content_location
|
||||
# Get rid of the default type multipart/mixed
|
||||
del msg["Content-Type"]
|
||||
del msg['Content-Type']
|
||||
if f.content_type:
|
||||
msg.set_type(f.content_type)
|
||||
msg.set_payload(f.content)
|
||||
@ -192,7 +190,7 @@ class _Downloader():
|
||||
web_frame = self.web_view.page().mainFrame()
|
||||
|
||||
self.writer = MHTMLWriter(
|
||||
web_frame.toHtml().encode("utf-8"),
|
||||
web_frame.toHtml().encode('utf-8'),
|
||||
content_location=web_url.toString(),
|
||||
# I've found no way of getting the content type of a QWebView, but
|
||||
# since we're using .toHtml, it's probably safe to say that the
|
||||
@ -201,32 +199,32 @@ class _Downloader():
|
||||
)
|
||||
# Currently only downloading <link> (stylesheets), <script>
|
||||
# (javascript) and <img> (image) elements.
|
||||
elements = web_frame.findAllElements("link, script, img")
|
||||
elements = web_frame.findAllElements('link, script, img')
|
||||
|
||||
for element in elements:
|
||||
element = webelem.WebElementWrapper(element)
|
||||
if "src" in element:
|
||||
element_url = element["src"]
|
||||
elif "href" in element:
|
||||
element_url = element["href"]
|
||||
if 'src' in element:
|
||||
element_url = element['src']
|
||||
elif 'href' in element:
|
||||
element_url = element['href']
|
||||
else:
|
||||
# Might be a local <script> tag or something else
|
||||
continue
|
||||
absolute_url = web_url.resolved(QUrl(element_url))
|
||||
self.fetch_url(absolute_url)
|
||||
|
||||
styles = web_frame.findAllElements("style")
|
||||
styles = web_frame.findAllElements('style')
|
||||
for style in styles:
|
||||
style = webelem.WebElementWrapper(style)
|
||||
if "type" in style and style["type"] != "text/css":
|
||||
if 'type' in style and style['type'] != 'text/css':
|
||||
continue
|
||||
for element_url in _get_css_imports(str(style)):
|
||||
self.fetch_url(web_url.resolved(QUrl(element_url)))
|
||||
|
||||
# Search for references in inline styles
|
||||
for element in web_frame.findAllElements("[style]"):
|
||||
for element in web_frame.findAllElements('[style]'):
|
||||
element = webelem.WebElementWrapper(element)
|
||||
style = element["style"]
|
||||
style = element['style']
|
||||
for element_url in _get_css_imports(style):
|
||||
self.fetch_url(web_url.resolved(QUrl(element_url)))
|
||||
|
||||
@ -243,7 +241,7 @@ class _Downloader():
|
||||
Args:
|
||||
url: The file to download as QUrl.
|
||||
"""
|
||||
if url.scheme() == "data":
|
||||
if url.scheme() == 'data':
|
||||
return
|
||||
# Prevent loading an asset twice
|
||||
if url in self.loaded_urls:
|
||||
@ -252,8 +250,8 @@ class _Downloader():
|
||||
|
||||
log.downloads.debug("loading asset at %s", url)
|
||||
|
||||
download_manager = objreg.get("download-manager", scope="window",
|
||||
window="current")
|
||||
download_manager = objreg.get('download-manager', scope='window',
|
||||
window='current')
|
||||
item = download_manager.get(url, fileobj=_NoCloseBytesIO(),
|
||||
auto_remove=True)
|
||||
self.pending_downloads.add((url, item))
|
||||
@ -272,7 +270,7 @@ class _Downloader():
|
||||
item: The DownloadItem given by the DownloadManager
|
||||
"""
|
||||
self.pending_downloads.remove((url, item))
|
||||
mime = item.raw_headers.get(b"Content-Type", b"")
|
||||
mime = item.raw_headers.get(b'Content-Type', b'')
|
||||
|
||||
# Note that this decoding always works and doesn't produce errors
|
||||
# RFC 7230 (https://tools.ietf.org/html/rfc7230) states:
|
||||
@ -283,9 +281,9 @@ class _Downloader():
|
||||
# Newly defined header fields SHOULD limit their field values to
|
||||
# US-ASCII octets. A recipient SHOULD treat other octets in field
|
||||
# content (obs-text) as opaque data.
|
||||
mime = mime.decode("iso-8859-1")
|
||||
mime = mime.decode('iso-8859-1')
|
||||
|
||||
if mime.lower() == "text/css":
|
||||
if mime.lower() == 'text/css':
|
||||
# We can't always assume that CSS files are UTF-8, but CSS files
|
||||
# shouldn't contain many non-ASCII characters anyway (in most
|
||||
# cases). Using "ignore" lets us decode the file even if it's
|
||||
@ -293,16 +291,16 @@ class _Downloader():
|
||||
# The file written to the MHTML file won't be modified by this
|
||||
# decoding, since there we're taking the original bytestream.
|
||||
try:
|
||||
css_string = item.fileobj.getvalue().decode("utf-8")
|
||||
css_string = item.fileobj.getvalue().decode('utf-8')
|
||||
except UnicodeDecodeError:
|
||||
log.downloads.warning("Invalid UTF-8 data in %s", url)
|
||||
css_string = item.fileobj.getvalue().decode("utf-8", "ignore")
|
||||
css_string = item.fileobj.getvalue().decode('utf-8', 'ignore')
|
||||
import_urls = _get_css_imports(css_string)
|
||||
for import_url in import_urls:
|
||||
absolute_url = url.resolved(QUrl(import_url))
|
||||
self.fetch_url(absolute_url)
|
||||
|
||||
encode = E_QUOPRI if mime.startswith("text/") else E_BASE64
|
||||
encode = E_QUOPRI if mime.startswith('text/') else E_BASE64
|
||||
self.writer.add_file(url.toString(), item.fileobj.getvalue(), mime,
|
||||
encode)
|
||||
item.fileobj.actual_close()
|
||||
@ -325,7 +323,7 @@ class _Downloader():
|
||||
log.downloads.debug("Oops! Download already gone: %s", item)
|
||||
return
|
||||
item.fileobj.actual_close()
|
||||
self.writer.add_file(url.toString(), b"")
|
||||
self.writer.add_file(url.toString(), b'')
|
||||
if self.pending_downloads:
|
||||
return
|
||||
self.finish_file()
|
||||
@ -337,9 +335,9 @@ class _Downloader():
|
||||
return
|
||||
self._finished = True
|
||||
log.downloads.debug("All assets downloaded, ready to finish off!")
|
||||
with open(self.dest, "wb") as file_output:
|
||||
with open(self.dest, 'wb') as file_output:
|
||||
self.writer.write_to(file_output)
|
||||
message.info("current", "Page saved as {}".format(self.dest))
|
||||
message.info('current', "Page saved as {}".format(self.dest))
|
||||
|
||||
def collect_zombies(self):
|
||||
"""Collect done downloads and add their data to the MHTML file.
|
||||
@ -378,6 +376,6 @@ def start_download(dest):
|
||||
dest: The filename where the resulting file should be saved.
|
||||
"""
|
||||
dest = os.path.expanduser(dest)
|
||||
web_view = objreg.get("webview", scope="tab", tab="current")
|
||||
web_view = objreg.get('webview', scope='tab', tab='current')
|
||||
loader = _Downloader(web_view, dest)
|
||||
loader.run()
|
||||
|
@ -28,12 +28,12 @@ class Checker:
|
||||
return self.fp.getvalue()
|
||||
|
||||
def expect(self, expected):
|
||||
actual = self.value.decode("ascii")
|
||||
actual = self.value.decode('ascii')
|
||||
# Make sure there are no stray \r or \n
|
||||
assert re.search(r"\r[^\n]", actual) is None
|
||||
assert re.search(r"[^\r]\n", actual) is None
|
||||
actual = actual.replace("\r\n", "\n")
|
||||
expected = textwrap.dedent(expected).lstrip("\n")
|
||||
assert re.search(r'\r[^\n]', actual) is None
|
||||
assert re.search(r'[^\r]\n', actual) is None
|
||||
actual = actual.replace('\r\n', '\n')
|
||||
expected = textwrap.dedent(expected).lstrip('\n')
|
||||
assert expected == actual
|
||||
|
||||
|
||||
@ -43,11 +43,11 @@ def checker():
|
||||
|
||||
|
||||
def test_quoted_printable_umlauts(checker):
|
||||
content = "Die süße Hündin läuft in die Höhle des Bären"
|
||||
content = content.encode("iso-8859-1")
|
||||
content = 'Die süße Hündin läuft in die Höhle des Bären'
|
||||
content = content.encode('iso-8859-1')
|
||||
writer = mhtml.MHTMLWriter(root_content=content,
|
||||
content_location="localhost",
|
||||
content_type="text/plain")
|
||||
content_location='localhost',
|
||||
content_type='text/plain')
|
||||
writer.write_to(checker.fp)
|
||||
checker.expect("""
|
||||
Content-Type: multipart/related; boundary="---=_qute-UUID"
|
||||
@ -64,15 +64,15 @@ def test_quoted_printable_umlauts(checker):
|
||||
""")
|
||||
|
||||
|
||||
@pytest.mark.parametrize("header, value", [
|
||||
("content_location", "http://brötli.com"),
|
||||
("content_type", "text/pläin"),
|
||||
@pytest.mark.parametrize('header, value', [
|
||||
('content_location', 'http://brötli.com'),
|
||||
('content_type', 'text/pläin'),
|
||||
])
|
||||
def test_refuses_non_ascii_header_value(checker, header, value):
|
||||
defaults = {
|
||||
"root_content": b"",
|
||||
"content_location": "http://example.com",
|
||||
"content_type": "text/plain",
|
||||
'root_content': b'',
|
||||
'content_location': 'http://example.com',
|
||||
'content_type': 'text/plain',
|
||||
}
|
||||
defaults[header] = value
|
||||
writer = mhtml.MHTMLWriter(**defaults)
|
||||
@ -82,12 +82,12 @@ def test_refuses_non_ascii_header_value(checker, header, value):
|
||||
|
||||
|
||||
def test_file_encoded_as_base64(checker):
|
||||
content = b"Image file attached"
|
||||
writer = mhtml.MHTMLWriter(root_content=content, content_type="text/plain",
|
||||
content_location="http://example.com")
|
||||
writer.add_file(location="http://a.example.com/image.png",
|
||||
content="\U0001F601 image data".encode("utf-8"),
|
||||
content_type="image/png",
|
||||
content = b'Image file attached'
|
||||
writer = mhtml.MHTMLWriter(root_content=content, content_type='text/plain',
|
||||
content_location='http://example.com')
|
||||
writer.add_file(location='http://a.example.com/image.png',
|
||||
content='\U0001F601 image data'.encode('utf-8'),
|
||||
content_type='image/png',
|
||||
transfer_encoding=mhtml.E_BASE64)
|
||||
writer.write_to(checker.fp)
|
||||
checker.expect("""
|
||||
@ -113,28 +113,28 @@ def test_file_encoded_as_base64(checker):
|
||||
""")
|
||||
|
||||
|
||||
@pytest.mark.parametrize("transfer_encoding", [mhtml.E_BASE64, mhtml.E_QUOPRI],
|
||||
ids=["base64", "quoted-printable"])
|
||||
@pytest.mark.parametrize('transfer_encoding', [mhtml.E_BASE64, mhtml.E_QUOPRI],
|
||||
ids=['base64', 'quoted-printable'])
|
||||
def test_payload_lines_wrap(checker, transfer_encoding):
|
||||
payload = b"1234567890" * 10
|
||||
writer = mhtml.MHTMLWriter(root_content=b"", content_type="text/plain",
|
||||
content_location="http://example.com")
|
||||
writer.add_file(location="http://example.com/payload", content=payload,
|
||||
content_type="text/plain",
|
||||
payload = b'1234567890' * 10
|
||||
writer = mhtml.MHTMLWriter(root_content=b'', content_type='text/plain',
|
||||
content_location='http://example.com')
|
||||
writer.add_file(location='http://example.com/payload', content=payload,
|
||||
content_type='text/plain',
|
||||
transfer_encoding=transfer_encoding)
|
||||
writer.write_to(checker.fp)
|
||||
for line in checker.value.split(b"\r\n"):
|
||||
for line in checker.value.split(b'\r\n'):
|
||||
assert len(line) < 77
|
||||
|
||||
|
||||
def test_files_appear_sorted(checker):
|
||||
writer = mhtml.MHTMLWriter(root_content=b"root file",
|
||||
content_type="text/plain",
|
||||
content_location="http://www.example.com/")
|
||||
for subdomain in "ahgbizt":
|
||||
writer.add_file(location="http://{}.example.com/".format(subdomain),
|
||||
content="file {}".format(subdomain).encode("utf-8"),
|
||||
content_type="text/plain",
|
||||
writer = mhtml.MHTMLWriter(root_content=b'root file',
|
||||
content_type='text/plain',
|
||||
content_location='http://www.example.com/')
|
||||
for subdomain in 'ahgbizt':
|
||||
writer.add_file(location='http://{}.example.com/'.format(subdomain),
|
||||
content='file {}'.format(subdomain).encode('utf-8'),
|
||||
content_type='text/plain',
|
||||
transfer_encoding=mhtml.E_QUOPRI)
|
||||
writer.write_to(checker.fp)
|
||||
checker.expect("""
|
||||
@ -202,10 +202,10 @@ def test_files_appear_sorted(checker):
|
||||
|
||||
|
||||
def test_empty_content_type(checker):
|
||||
writer = mhtml.MHTMLWriter(root_content=b"",
|
||||
content_location="http://example.com/",
|
||||
content_type="text/plain")
|
||||
writer.add_file("http://example.com/file", b"file content")
|
||||
writer = mhtml.MHTMLWriter(root_content=b'',
|
||||
content_location='http://example.com/',
|
||||
content_type='text/plain')
|
||||
writer.add_file('http://example.com/file', b'file content')
|
||||
writer.write_to(checker.fp)
|
||||
checker.expect("""
|
||||
Content-Type: multipart/related; boundary="---=_qute-UUID"
|
||||
@ -229,11 +229,11 @@ def test_empty_content_type(checker):
|
||||
|
||||
|
||||
def test_removing_file_from_mhtml(checker):
|
||||
writer = mhtml.MHTMLWriter(root_content=b"root",
|
||||
content_location="http://example.com/",
|
||||
content_type="text/plain")
|
||||
writer.add_file("http://evil.com/", b"file content")
|
||||
writer.remove_file("http://evil.com/")
|
||||
writer = mhtml.MHTMLWriter(root_content=b'root',
|
||||
content_location='http://example.com/',
|
||||
content_type='text/plain')
|
||||
writer.add_file('http://evil.com/', b'file content')
|
||||
writer.remove_file('http://evil.com/')
|
||||
writer.write_to(checker.fp)
|
||||
checker.expect("""
|
||||
Content-Type: multipart/related; boundary="---=_qute-UUID"
|
||||
@ -250,16 +250,16 @@ def test_removing_file_from_mhtml(checker):
|
||||
""")
|
||||
|
||||
|
||||
@pytest.mark.parametrize("style, expected_urls", [
|
||||
("@import 'default.css'", ["default.css"]),
|
||||
('@import "default.css"', ["default.css"]),
|
||||
("@import \t 'tabbed.css'", ["tabbed.css"]),
|
||||
("@import url('default.css')", ["default.css"]),
|
||||
@pytest.mark.parametrize('style, expected_urls', [
|
||||
("@import 'default.css'", ['default.css']),
|
||||
('@import "default.css"', ['default.css']),
|
||||
("@import \t 'tabbed.css'", ['tabbed.css']),
|
||||
("@import url('default.css')", ['default.css']),
|
||||
("""body {
|
||||
background: url("/bg-img.png")
|
||||
}""", ["/bg-img.png"]),
|
||||
("background: url(folder/file.png)", ["folder/file.png"]),
|
||||
("content: url()", []),
|
||||
}""", ['/bg-img.png']),
|
||||
('background: url(folder/file.png)', ['folder/file.png']),
|
||||
('content: url()', []),
|
||||
])
|
||||
def test_css_url_scanner(style, expected_urls):
|
||||
expected_urls.sort()
|
||||
|
Loading…
Reference in New Issue
Block a user