Revert "Remove cssutils from mhtml.py"

This reverts commit 22a0f0952704d284846ab2572790d99a85515c57.
This commit is contained in:
Daniel 2015-10-07 11:21:47 +02:00
parent ce1a99cc7c
commit 957d68c477
3 changed files with 57 additions and 6 deletions

View File

@ -35,6 +35,10 @@ from PyQt5.QtCore import QUrl
from qutebrowser.browser import webelem
from qutebrowser.utils import log, objreg, message, usertypes
try:
import cssutils
except ImportError:
cssutils = None
_File = collections.namedtuple('_File',
['content', 'content_type', 'content_location',
@ -50,18 +54,14 @@ _CSS_URL_PATTERNS = [re.compile(x) for x in [
]]
def _get_css_imports(data, inline=False):
def _get_css_imports_regex(data):
"""Return all assets that are referenced in the given CSS document.
The returned URLs are relative to the stylesheet's URL.
Args:
data: The content of the stylesheet to scan as string.
inline: True if data is a HTML inline style (style="...").
"""
# We keep the inline argument to stay consistent with the cssutils
# interface, in case we reintroduce cssutils.
# pylint: disable=unused-argument
urls = []
for pattern in _CSS_URL_PATTERNS:
for match in pattern.finditer(data):
@ -71,6 +71,50 @@ def _get_css_imports(data, inline=False):
return urls
def _get_css_imports_cssutils(data, inline=False):
"""Return all assets that are referenced in the given CSS document.
The returned URLs are relative to the stylesheet's URL.
Args:
data: The content of the stylesheet to scan as string.
inline: True if the argument is a inline HTML style attribute.
"""
# We don't care about invalid CSS data, this will only litter the log
# output with CSS errors
parser = cssutils.CSSParser(loglevel=100,
fetcher=lambda url: (None, ""), validate=False)
if not inline:
sheet = parser.parseString(data)
return list(cssutils.getUrls(sheet))
else:
urls = []
declaration = parser.parseStyle(data)
# prop = background, color, margin, ...
for prop in declaration:
# value = red, 10px, url(foobar), ...
for value in prop.propertyValue:
if isinstance(value, cssutils.css.URIValue):
if value.uri:
urls.append(value.uri)
return urls
def _get_css_imports(data, inline=False):
"""Return all assets that are referenced in the given CSS document.
The returned URLs are relative to the stylesheet's URL.
Args:
data: The content of the stylesheet to scan as string.
inline: True if the argument is a inline HTML style attribute.
"""
if cssutils is None:
return _get_css_imports_regex(data)
else:
return _get_css_imports_cssutils(data, inline)
MHTMLPolicy = email.policy.default.clone(linesep='\r\n', max_line_length=0)

View File

@ -5,3 +5,4 @@ pyPEG2==2.15.2
PyYAML==3.11
colorama==0.3.3
colorlog==2.6.0
cssutils==1.0

View File

@ -250,6 +250,7 @@ def test_removing_file_from_mhtml(checker):
""")
@pytest.mark.parametrize('has_cssutils', [True, False])
@pytest.mark.parametrize('inline, style, expected_urls', [
(False, "@import 'default.css'", ['default.css']),
(False, '@import "default.css"', ['default.css']),
@ -261,7 +262,12 @@ def test_removing_file_from_mhtml(checker):
(True, 'background: url(folder/file.png) no-repeat', ['folder/file.png']),
(True, 'content: url()', []),
])
def test_css_url_scanner(monkeypatch, inline, style, expected_urls):
def test_css_url_scanner(monkeypatch, has_cssutils, inline, style,
expected_urls):
if has_cssutils:
assert mhtml.cssutils is not None
else:
monkeypatch.setattr('qutebrowser.browser.mhtml.cssutils', None)
expected_urls.sort()
urls = mhtml._get_css_imports(style, inline=inline)
urls.sort()