String quote style changes

"" for user facing strings
'' for internal strings
except when quotes appear inside a string, to avoid escaping them
This commit is contained in:
Daniel 2015-09-24 21:56:02 +02:00
parent 9bf9124324
commit a092ef1fe6
2 changed files with 84 additions and 86 deletions

View File

@ -35,9 +35,9 @@ from qutebrowser.browser import webelem
from qutebrowser.utils import log, objreg, message from qutebrowser.utils import log, objreg, message
_File = collections.namedtuple("_File", _File = collections.namedtuple('_File',
["content", "content_type", "content_location", ['content', 'content_type', 'content_location',
"transfer_encoding"]) 'transfer_encoding'])
_CSS_URL_PATTERNS = [re.compile(x) for x in [ _CSS_URL_PATTERNS = [re.compile(x) for x in [
@ -51,9 +51,7 @@ _CSS_URL_PATTERNS = [re.compile(x) for x in [
def _get_css_imports(data): def _get_css_imports(data):
"""Return all assets that are referenced in the given CSS document. """Return all assets that are referenced in the given CSS document.
The returned URLs are relative to the stylesheet's URL. The returned URLs are relative to the stylesheet's URL.
Args: Args:
data: The content of the stylesheet to scan as string. data: The content of the stylesheet to scan as string.
""" """
@ -66,7 +64,7 @@ def _get_css_imports(data):
return urls return urls
MHTMLPolicy = policy.default.clone(linesep="\r\n", max_line_length=0) MHTMLPolicy = policy.default.clone(linesep='\r\n', max_line_length=0)
E_BASE64 = encoders.encode_base64 E_BASE64 = encoders.encode_base64
@ -123,8 +121,8 @@ class MHTMLWriter():
Args: Args:
fp: The file-object, openend in "wb" mode. fp: The file-object, openend in "wb" mode.
""" """
msg = multipart.MIMEMultipart("related", msg = multipart.MIMEMultipart('related',
"---=_qute-{}".format(uuid.uuid4())) '---=_qute-{}'.format(uuid.uuid4()))
root = self._create_root_file() root = self._create_root_file()
msg.attach(root) msg.attach(root)
@ -146,9 +144,9 @@ class MHTMLWriter():
def _create_file(self, f): def _create_file(self, f):
"""Return the single given file as MIMEMultipart.""" """Return the single given file as MIMEMultipart."""
msg = multipart.MIMEMultipart() msg = multipart.MIMEMultipart()
msg["Content-Location"] = f.content_location msg['Content-Location'] = f.content_location
# Get rid of the default type multipart/mixed # Get rid of the default type multipart/mixed
del msg["Content-Type"] del msg['Content-Type']
if f.content_type: if f.content_type:
msg.set_type(f.content_type) msg.set_type(f.content_type)
msg.set_payload(f.content) msg.set_payload(f.content)
@ -192,7 +190,7 @@ class _Downloader():
web_frame = self.web_view.page().mainFrame() web_frame = self.web_view.page().mainFrame()
self.writer = MHTMLWriter( self.writer = MHTMLWriter(
web_frame.toHtml().encode("utf-8"), web_frame.toHtml().encode('utf-8'),
content_location=web_url.toString(), content_location=web_url.toString(),
# I've found no way of getting the content type of a QWebView, but # I've found no way of getting the content type of a QWebView, but
# since we're using .toHtml, it's probably safe to say that the # since we're using .toHtml, it's probably safe to say that the
@ -201,32 +199,32 @@ class _Downloader():
) )
# Currently only downloading <link> (stylesheets), <script> # Currently only downloading <link> (stylesheets), <script>
# (javascript) and <img> (image) elements. # (javascript) and <img> (image) elements.
elements = web_frame.findAllElements("link, script, img") elements = web_frame.findAllElements('link, script, img')
for element in elements: for element in elements:
element = webelem.WebElementWrapper(element) element = webelem.WebElementWrapper(element)
if "src" in element: if 'src' in element:
element_url = element["src"] element_url = element['src']
elif "href" in element: elif 'href' in element:
element_url = element["href"] element_url = element['href']
else: else:
# Might be a local <script> tag or something else # Might be a local <script> tag or something else
continue continue
absolute_url = web_url.resolved(QUrl(element_url)) absolute_url = web_url.resolved(QUrl(element_url))
self.fetch_url(absolute_url) self.fetch_url(absolute_url)
styles = web_frame.findAllElements("style") styles = web_frame.findAllElements('style')
for style in styles: for style in styles:
style = webelem.WebElementWrapper(style) style = webelem.WebElementWrapper(style)
if "type" in style and style["type"] != "text/css": if 'type' in style and style['type'] != 'text/css':
continue continue
for element_url in _get_css_imports(str(style)): for element_url in _get_css_imports(str(style)):
self.fetch_url(web_url.resolved(QUrl(element_url))) self.fetch_url(web_url.resolved(QUrl(element_url)))
# Search for references in inline styles # Search for references in inline styles
for element in web_frame.findAllElements("[style]"): for element in web_frame.findAllElements('[style]'):
element = webelem.WebElementWrapper(element) element = webelem.WebElementWrapper(element)
style = element["style"] style = element['style']
for element_url in _get_css_imports(style): for element_url in _get_css_imports(style):
self.fetch_url(web_url.resolved(QUrl(element_url))) self.fetch_url(web_url.resolved(QUrl(element_url)))
@ -243,7 +241,7 @@ class _Downloader():
Args: Args:
url: The file to download as QUrl. url: The file to download as QUrl.
""" """
if url.scheme() == "data": if url.scheme() == 'data':
return return
# Prevent loading an asset twice # Prevent loading an asset twice
if url in self.loaded_urls: if url in self.loaded_urls:
@ -252,8 +250,8 @@ class _Downloader():
log.downloads.debug("loading asset at %s", url) log.downloads.debug("loading asset at %s", url)
download_manager = objreg.get("download-manager", scope="window", download_manager = objreg.get('download-manager', scope='window',
window="current") window='current')
item = download_manager.get(url, fileobj=_NoCloseBytesIO(), item = download_manager.get(url, fileobj=_NoCloseBytesIO(),
auto_remove=True) auto_remove=True)
self.pending_downloads.add((url, item)) self.pending_downloads.add((url, item))
@ -272,7 +270,7 @@ class _Downloader():
item: The DownloadItem given by the DownloadManager item: The DownloadItem given by the DownloadManager
""" """
self.pending_downloads.remove((url, item)) self.pending_downloads.remove((url, item))
mime = item.raw_headers.get(b"Content-Type", b"") mime = item.raw_headers.get(b'Content-Type', b'')
# Note that this decoding always works and doesn't produce errors # Note that this decoding always works and doesn't produce errors
# RFC 7230 (https://tools.ietf.org/html/rfc7230) states: # RFC 7230 (https://tools.ietf.org/html/rfc7230) states:
@ -283,9 +281,9 @@ class _Downloader():
# Newly defined header fields SHOULD limit their field values to # Newly defined header fields SHOULD limit their field values to
# US-ASCII octets. A recipient SHOULD treat other octets in field # US-ASCII octets. A recipient SHOULD treat other octets in field
# content (obs-text) as opaque data. # content (obs-text) as opaque data.
mime = mime.decode("iso-8859-1") mime = mime.decode('iso-8859-1')
if mime.lower() == "text/css": if mime.lower() == 'text/css':
# We can't always assume that CSS files are UTF-8, but CSS files # We can't always assume that CSS files are UTF-8, but CSS files
# shouldn't contain many non-ASCII characters anyway (in most # shouldn't contain many non-ASCII characters anyway (in most
# cases). Using "ignore" lets us decode the file even if it's # cases). Using "ignore" lets us decode the file even if it's
@ -293,16 +291,16 @@ class _Downloader():
# The file written to the MHTML file won't be modified by this # The file written to the MHTML file won't be modified by this
# decoding, since there we're taking the original bytestream. # decoding, since there we're taking the original bytestream.
try: try:
css_string = item.fileobj.getvalue().decode("utf-8") css_string = item.fileobj.getvalue().decode('utf-8')
except UnicodeDecodeError: except UnicodeDecodeError:
log.downloads.warning("Invalid UTF-8 data in %s", url) log.downloads.warning("Invalid UTF-8 data in %s", url)
css_string = item.fileobj.getvalue().decode("utf-8", "ignore") css_string = item.fileobj.getvalue().decode('utf-8', 'ignore')
import_urls = _get_css_imports(css_string) import_urls = _get_css_imports(css_string)
for import_url in import_urls: for import_url in import_urls:
absolute_url = url.resolved(QUrl(import_url)) absolute_url = url.resolved(QUrl(import_url))
self.fetch_url(absolute_url) self.fetch_url(absolute_url)
encode = E_QUOPRI if mime.startswith("text/") else E_BASE64 encode = E_QUOPRI if mime.startswith('text/') else E_BASE64
self.writer.add_file(url.toString(), item.fileobj.getvalue(), mime, self.writer.add_file(url.toString(), item.fileobj.getvalue(), mime,
encode) encode)
item.fileobj.actual_close() item.fileobj.actual_close()
@ -325,7 +323,7 @@ class _Downloader():
log.downloads.debug("Oops! Download already gone: %s", item) log.downloads.debug("Oops! Download already gone: %s", item)
return return
item.fileobj.actual_close() item.fileobj.actual_close()
self.writer.add_file(url.toString(), b"") self.writer.add_file(url.toString(), b'')
if self.pending_downloads: if self.pending_downloads:
return return
self.finish_file() self.finish_file()
@ -337,9 +335,9 @@ class _Downloader():
return return
self._finished = True self._finished = True
log.downloads.debug("All assets downloaded, ready to finish off!") log.downloads.debug("All assets downloaded, ready to finish off!")
with open(self.dest, "wb") as file_output: with open(self.dest, 'wb') as file_output:
self.writer.write_to(file_output) self.writer.write_to(file_output)
message.info("current", "Page saved as {}".format(self.dest)) message.info('current', "Page saved as {}".format(self.dest))
def collect_zombies(self): def collect_zombies(self):
"""Collect done downloads and add their data to the MHTML file. """Collect done downloads and add their data to the MHTML file.
@ -378,6 +376,6 @@ def start_download(dest):
dest: The filename where the resulting file should be saved. dest: The filename where the resulting file should be saved.
""" """
dest = os.path.expanduser(dest) dest = os.path.expanduser(dest)
web_view = objreg.get("webview", scope="tab", tab="current") web_view = objreg.get('webview', scope='tab', tab='current')
loader = _Downloader(web_view, dest) loader = _Downloader(web_view, dest)
loader.run() loader.run()

View File

@ -28,12 +28,12 @@ class Checker:
return self.fp.getvalue() return self.fp.getvalue()
def expect(self, expected): def expect(self, expected):
actual = self.value.decode("ascii") actual = self.value.decode('ascii')
# Make sure there are no stray \r or \n # Make sure there are no stray \r or \n
assert re.search(r"\r[^\n]", actual) is None assert re.search(r'\r[^\n]', actual) is None
assert re.search(r"[^\r]\n", actual) is None assert re.search(r'[^\r]\n', actual) is None
actual = actual.replace("\r\n", "\n") actual = actual.replace('\r\n', '\n')
expected = textwrap.dedent(expected).lstrip("\n") expected = textwrap.dedent(expected).lstrip('\n')
assert expected == actual assert expected == actual
@ -43,11 +43,11 @@ def checker():
def test_quoted_printable_umlauts(checker): def test_quoted_printable_umlauts(checker):
content = "Die süße Hündin läuft in die Höhle des Bären" content = 'Die süße Hündin läuft in die Höhle des Bären'
content = content.encode("iso-8859-1") content = content.encode('iso-8859-1')
writer = mhtml.MHTMLWriter(root_content=content, writer = mhtml.MHTMLWriter(root_content=content,
content_location="localhost", content_location='localhost',
content_type="text/plain") content_type='text/plain')
writer.write_to(checker.fp) writer.write_to(checker.fp)
checker.expect(""" checker.expect("""
Content-Type: multipart/related; boundary="---=_qute-UUID" Content-Type: multipart/related; boundary="---=_qute-UUID"
@ -64,15 +64,15 @@ def test_quoted_printable_umlauts(checker):
""") """)
@pytest.mark.parametrize("header, value", [ @pytest.mark.parametrize('header, value', [
("content_location", "http://brötli.com"), ('content_location', 'http://brötli.com'),
("content_type", "text/pläin"), ('content_type', 'text/pläin'),
]) ])
def test_refuses_non_ascii_header_value(checker, header, value): def test_refuses_non_ascii_header_value(checker, header, value):
defaults = { defaults = {
"root_content": b"", 'root_content': b'',
"content_location": "http://example.com", 'content_location': 'http://example.com',
"content_type": "text/plain", 'content_type': 'text/plain',
} }
defaults[header] = value defaults[header] = value
writer = mhtml.MHTMLWriter(**defaults) writer = mhtml.MHTMLWriter(**defaults)
@ -82,12 +82,12 @@ def test_refuses_non_ascii_header_value(checker, header, value):
def test_file_encoded_as_base64(checker): def test_file_encoded_as_base64(checker):
content = b"Image file attached" content = b'Image file attached'
writer = mhtml.MHTMLWriter(root_content=content, content_type="text/plain", writer = mhtml.MHTMLWriter(root_content=content, content_type='text/plain',
content_location="http://example.com") content_location='http://example.com')
writer.add_file(location="http://a.example.com/image.png", writer.add_file(location='http://a.example.com/image.png',
content="\U0001F601 image data".encode("utf-8"), content='\U0001F601 image data'.encode('utf-8'),
content_type="image/png", content_type='image/png',
transfer_encoding=mhtml.E_BASE64) transfer_encoding=mhtml.E_BASE64)
writer.write_to(checker.fp) writer.write_to(checker.fp)
checker.expect(""" checker.expect("""
@ -113,28 +113,28 @@ def test_file_encoded_as_base64(checker):
""") """)
@pytest.mark.parametrize("transfer_encoding", [mhtml.E_BASE64, mhtml.E_QUOPRI], @pytest.mark.parametrize('transfer_encoding', [mhtml.E_BASE64, mhtml.E_QUOPRI],
ids=["base64", "quoted-printable"]) ids=['base64', 'quoted-printable'])
def test_payload_lines_wrap(checker, transfer_encoding): def test_payload_lines_wrap(checker, transfer_encoding):
payload = b"1234567890" * 10 payload = b'1234567890' * 10
writer = mhtml.MHTMLWriter(root_content=b"", content_type="text/plain", writer = mhtml.MHTMLWriter(root_content=b'', content_type='text/plain',
content_location="http://example.com") content_location='http://example.com')
writer.add_file(location="http://example.com/payload", content=payload, writer.add_file(location='http://example.com/payload', content=payload,
content_type="text/plain", content_type='text/plain',
transfer_encoding=transfer_encoding) transfer_encoding=transfer_encoding)
writer.write_to(checker.fp) writer.write_to(checker.fp)
for line in checker.value.split(b"\r\n"): for line in checker.value.split(b'\r\n'):
assert len(line) < 77 assert len(line) < 77
def test_files_appear_sorted(checker): def test_files_appear_sorted(checker):
writer = mhtml.MHTMLWriter(root_content=b"root file", writer = mhtml.MHTMLWriter(root_content=b'root file',
content_type="text/plain", content_type='text/plain',
content_location="http://www.example.com/") content_location='http://www.example.com/')
for subdomain in "ahgbizt": for subdomain in 'ahgbizt':
writer.add_file(location="http://{}.example.com/".format(subdomain), writer.add_file(location='http://{}.example.com/'.format(subdomain),
content="file {}".format(subdomain).encode("utf-8"), content='file {}'.format(subdomain).encode('utf-8'),
content_type="text/plain", content_type='text/plain',
transfer_encoding=mhtml.E_QUOPRI) transfer_encoding=mhtml.E_QUOPRI)
writer.write_to(checker.fp) writer.write_to(checker.fp)
checker.expect(""" checker.expect("""
@ -202,10 +202,10 @@ def test_files_appear_sorted(checker):
def test_empty_content_type(checker): def test_empty_content_type(checker):
writer = mhtml.MHTMLWriter(root_content=b"", writer = mhtml.MHTMLWriter(root_content=b'',
content_location="http://example.com/", content_location='http://example.com/',
content_type="text/plain") content_type='text/plain')
writer.add_file("http://example.com/file", b"file content") writer.add_file('http://example.com/file', b'file content')
writer.write_to(checker.fp) writer.write_to(checker.fp)
checker.expect(""" checker.expect("""
Content-Type: multipart/related; boundary="---=_qute-UUID" Content-Type: multipart/related; boundary="---=_qute-UUID"
@ -229,11 +229,11 @@ def test_empty_content_type(checker):
def test_removing_file_from_mhtml(checker): def test_removing_file_from_mhtml(checker):
writer = mhtml.MHTMLWriter(root_content=b"root", writer = mhtml.MHTMLWriter(root_content=b'root',
content_location="http://example.com/", content_location='http://example.com/',
content_type="text/plain") content_type='text/plain')
writer.add_file("http://evil.com/", b"file content") writer.add_file('http://evil.com/', b'file content')
writer.remove_file("http://evil.com/") writer.remove_file('http://evil.com/')
writer.write_to(checker.fp) writer.write_to(checker.fp)
checker.expect(""" checker.expect("""
Content-Type: multipart/related; boundary="---=_qute-UUID" Content-Type: multipart/related; boundary="---=_qute-UUID"
@ -250,16 +250,16 @@ def test_removing_file_from_mhtml(checker):
""") """)
@pytest.mark.parametrize("style, expected_urls", [ @pytest.mark.parametrize('style, expected_urls', [
("@import 'default.css'", ["default.css"]), ("@import 'default.css'", ['default.css']),
('@import "default.css"', ["default.css"]), ('@import "default.css"', ['default.css']),
("@import \t 'tabbed.css'", ["tabbed.css"]), ("@import \t 'tabbed.css'", ['tabbed.css']),
("@import url('default.css')", ["default.css"]), ("@import url('default.css')", ['default.css']),
("""body { ("""body {
background: url("/bg-img.png") background: url("/bg-img.png")
}""", ["/bg-img.png"]), }""", ['/bg-img.png']),
("background: url(folder/file.png)", ["folder/file.png"]), ('background: url(folder/file.png)', ['folder/file.png']),
("content: url()", []), ('content: url()', []),
]) ])
def test_css_url_scanner(style, expected_urls): def test_css_url_scanner(style, expected_urls):
expected_urls.sort() expected_urls.sort()