diff --git a/qutebrowser/browser/mhtml.py b/qutebrowser/browser/mhtml.py index cc394b21b..7e0422eaf 100644 --- a/qutebrowser/browser/mhtml.py +++ b/qutebrowser/browser/mhtml.py @@ -35,6 +35,10 @@ from PyQt5.QtCore import QUrl from qutebrowser.browser import webelem from qutebrowser.utils import log, objreg, message +try: + import cssutils +except ImportError: + cssutils = None _File = collections.namedtuple('_File', ['content', 'content_type', 'content_location', @@ -50,7 +54,7 @@ _CSS_URL_PATTERNS = [re.compile(x) for x in [ ]] -def _get_css_imports(data): +def _get_css_imports_regex(data): """Return all assets that are referenced in the given CSS document. The returned URLs are relative to the stylesheet's URL. @@ -67,6 +71,47 @@ def _get_css_imports(data): return urls +def _get_css_imports_cssutils(data, inline=False): + """Return all assets that are referenced in the given CSS document. + + The returned URLs are relative to the stylesheet's URL. + + Args: + data: The content of the stylesheet to scan as string. + inline: True if the argument is a inline HTML style attribute. + """ + parser = cssutils.CSSParser(fetcher=lambda url: (None, ""), validate=False) + if not inline: + sheet = parser.parseString(data) + return list(cssutils.getUrls(sheet)) + else: + urls = [] + declaration = parser.parseStyle(data) + # prop = background, color, margin, ... + for prop in declaration: + # value = red, 10px, url(foobar), ... + for value in prop.propertyValue: + if isinstance(value, cssutils.css.URIValue): + if value.uri: + urls.append(value.uri) + return urls + + +def _get_css_imports(data, inline=False): + """Return all assets that are referenced in the given CSS document. + + The returned URLs are relative to the stylesheet's URL. + + Args: + data: The content of the stylesheet to scan as string. + inline: True if the argument is a inline HTML style attribute. + """ + if cssutils is None: + return _get_css_imports_regex(data) + else: + return _get_css_imports_cssutils(data, inline) + + MHTMLPolicy = email.policy.default.clone(linesep='\r\n', max_line_length=0) @@ -228,7 +273,7 @@ class _Downloader(): for element in web_frame.findAllElements('[style]'): element = webelem.WebElementWrapper(element) style = element['style'] - for element_url in _get_css_imports(style): + for element_url in _get_css_imports(style, inline=True): self.fetch_url(web_url.resolved(QUrl(element_url))) # Shortcut if no assets need to be downloaded, otherwise the file would diff --git a/requirements.txt b/requirements.txt index fd5f81a0b..9e07256bd 100644 --- a/requirements.txt +++ b/requirements.txt @@ -5,3 +5,4 @@ pyPEG2==2.15.2 PyYAML==3.11 colorama==0.3.3 colorlog==2.6.0 +cssutils==1.0 diff --git a/tests/unit/browser/test_mhtml.py b/tests/unit/browser/test_mhtml.py index 37d1131b3..71c6ca2d6 100644 --- a/tests/unit/browser/test_mhtml.py +++ b/tests/unit/browser/test_mhtml.py @@ -250,20 +250,26 @@ def test_removing_file_from_mhtml(checker): """) -@pytest.mark.parametrize('style, expected_urls', [ - ("@import 'default.css'", ['default.css']), - ('@import "default.css"', ['default.css']), - ("@import \t 'tabbed.css'", ['tabbed.css']), - ("@import url('default.css')", ['default.css']), - ("""body { +@pytest.mark.parametrize('has_cssutils', [True, False]) +@pytest.mark.parametrize('inline, style, expected_urls', [ + (False, "@import 'default.css'", ['default.css']), + (False, '@import "default.css"', ['default.css']), + (False, "@import \t 'tabbed.css'", ['tabbed.css']), + (False, "@import url('default.css')", ['default.css']), + (False, """body { background: url("/bg-img.png") }""", ['/bg-img.png']), - ('background: url(folder/file.png)', ['folder/file.png']), - ('content: url()', []), + (True, 'background: url(folder/file.png) no-repeat', ['folder/file.png']), + (True, 'content: url()', []), ]) -def test_css_url_scanner(style, expected_urls): +def test_css_url_scanner(monkeypatch, has_cssutils, inline, style, + expected_urls): + if has_cssutils: + assert mhtml.cssutils is not None + else: + monkeypatch.setattr('qutebrowser.browser.mhtml.cssutils', None) expected_urls.sort() - urls = mhtml._get_css_imports(style) + urls = mhtml._get_css_imports(style, inline=inline) urls.sort() assert urls == expected_urls