diff --git a/qutebrowser/browser/mhtml.py b/qutebrowser/browser/mhtml.py
index cc394b21b..7e0422eaf 100644
--- a/qutebrowser/browser/mhtml.py
+++ b/qutebrowser/browser/mhtml.py
@@ -35,6 +35,10 @@ from PyQt5.QtCore import QUrl
from qutebrowser.browser import webelem
from qutebrowser.utils import log, objreg, message
+try:
+ import cssutils
+except ImportError:
+ cssutils = None
_File = collections.namedtuple('_File',
['content', 'content_type', 'content_location',
@@ -50,7 +54,7 @@ _CSS_URL_PATTERNS = [re.compile(x) for x in [
]]
-def _get_css_imports(data):
+def _get_css_imports_regex(data):
"""Return all assets that are referenced in the given CSS document.
The returned URLs are relative to the stylesheet's URL.
@@ -67,6 +71,47 @@ def _get_css_imports(data):
return urls
+def _get_css_imports_cssutils(data, inline=False):
+ """Return all assets that are referenced in the given CSS document.
+
+ The returned URLs are relative to the stylesheet's URL.
+
+ Args:
+ data: The content of the stylesheet to scan as string.
+ inline: True if the argument is a inline HTML style attribute.
+ """
+ parser = cssutils.CSSParser(fetcher=lambda url: (None, ""), validate=False)
+ if not inline:
+ sheet = parser.parseString(data)
+ return list(cssutils.getUrls(sheet))
+ else:
+ urls = []
+ declaration = parser.parseStyle(data)
+ # prop = background, color, margin, ...
+ for prop in declaration:
+ # value = red, 10px, url(foobar), ...
+ for value in prop.propertyValue:
+ if isinstance(value, cssutils.css.URIValue):
+ if value.uri:
+ urls.append(value.uri)
+ return urls
+
+
+def _get_css_imports(data, inline=False):
+ """Return all assets that are referenced in the given CSS document.
+
+ The returned URLs are relative to the stylesheet's URL.
+
+ Args:
+ data: The content of the stylesheet to scan as string.
+ inline: True if the argument is a inline HTML style attribute.
+ """
+ if cssutils is None:
+ return _get_css_imports_regex(data)
+ else:
+ return _get_css_imports_cssutils(data, inline)
+
+
MHTMLPolicy = email.policy.default.clone(linesep='\r\n', max_line_length=0)
@@ -228,7 +273,7 @@ class _Downloader():
for element in web_frame.findAllElements('[style]'):
element = webelem.WebElementWrapper(element)
style = element['style']
- for element_url in _get_css_imports(style):
+ for element_url in _get_css_imports(style, inline=True):
self.fetch_url(web_url.resolved(QUrl(element_url)))
# Shortcut if no assets need to be downloaded, otherwise the file would
diff --git a/requirements.txt b/requirements.txt
index fd5f81a0b..9e07256bd 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -5,3 +5,4 @@ pyPEG2==2.15.2
PyYAML==3.11
colorama==0.3.3
colorlog==2.6.0
+cssutils==1.0
diff --git a/tests/unit/browser/test_mhtml.py b/tests/unit/browser/test_mhtml.py
index 37d1131b3..71c6ca2d6 100644
--- a/tests/unit/browser/test_mhtml.py
+++ b/tests/unit/browser/test_mhtml.py
@@ -250,20 +250,26 @@ def test_removing_file_from_mhtml(checker):
""")
-@pytest.mark.parametrize('style, expected_urls', [
- ("@import 'default.css'", ['default.css']),
- ('@import "default.css"', ['default.css']),
- ("@import \t 'tabbed.css'", ['tabbed.css']),
- ("@import url('default.css')", ['default.css']),
- ("""body {
+@pytest.mark.parametrize('has_cssutils', [True, False])
+@pytest.mark.parametrize('inline, style, expected_urls', [
+ (False, "@import 'default.css'", ['default.css']),
+ (False, '@import "default.css"', ['default.css']),
+ (False, "@import \t 'tabbed.css'", ['tabbed.css']),
+ (False, "@import url('default.css')", ['default.css']),
+ (False, """body {
background: url("/bg-img.png")
}""", ['/bg-img.png']),
- ('background: url(folder/file.png)', ['folder/file.png']),
- ('content: url()', []),
+ (True, 'background: url(folder/file.png) no-repeat', ['folder/file.png']),
+ (True, 'content: url()', []),
])
-def test_css_url_scanner(style, expected_urls):
+def test_css_url_scanner(monkeypatch, has_cssutils, inline, style,
+ expected_urls):
+ if has_cssutils:
+ assert mhtml.cssutils is not None
+ else:
+ monkeypatch.setattr('qutebrowser.browser.mhtml.cssutils', None)
expected_urls.sort()
- urls = mhtml._get_css_imports(style)
+ urls = mhtml._get_css_imports(style, inline=inline)
urls.sort()
assert urls == expected_urls