Revert "Remove cssutils from mhtml.py"

This reverts commit 22a0f0952704d284846ab2572790d99a85515c57.
This commit is contained in:
Daniel 2015-10-07 11:21:47 +02:00
parent ce1a99cc7c
commit 957d68c477
3 changed files with 57 additions and 6 deletions

View File

@ -35,6 +35,10 @@ from PyQt5.QtCore import QUrl
from qutebrowser.browser import webelem from qutebrowser.browser import webelem
from qutebrowser.utils import log, objreg, message, usertypes from qutebrowser.utils import log, objreg, message, usertypes
try:
import cssutils
except ImportError:
cssutils = None
_File = collections.namedtuple('_File', _File = collections.namedtuple('_File',
['content', 'content_type', 'content_location', ['content', 'content_type', 'content_location',
@ -50,18 +54,14 @@ _CSS_URL_PATTERNS = [re.compile(x) for x in [
]] ]]
def _get_css_imports(data, inline=False): def _get_css_imports_regex(data):
"""Return all assets that are referenced in the given CSS document. """Return all assets that are referenced in the given CSS document.
The returned URLs are relative to the stylesheet's URL. The returned URLs are relative to the stylesheet's URL.
Args: Args:
data: The content of the stylesheet to scan as string. data: The content of the stylesheet to scan as string.
inline: True if data is a HTML inline style (style="...").
""" """
# We keep the inline argument to stay consistent with the cssutils
# interface, in case we reintroduce cssutils.
# pylint: disable=unused-argument
urls = [] urls = []
for pattern in _CSS_URL_PATTERNS: for pattern in _CSS_URL_PATTERNS:
for match in pattern.finditer(data): for match in pattern.finditer(data):
@ -71,6 +71,50 @@ def _get_css_imports(data, inline=False):
return urls return urls
def _get_css_imports_cssutils(data, inline=False):
"""Return all assets that are referenced in the given CSS document.
The returned URLs are relative to the stylesheet's URL.
Args:
data: The content of the stylesheet to scan as string.
inline: True if the argument is a inline HTML style attribute.
"""
# We don't care about invalid CSS data, this will only litter the log
# output with CSS errors
parser = cssutils.CSSParser(loglevel=100,
fetcher=lambda url: (None, ""), validate=False)
if not inline:
sheet = parser.parseString(data)
return list(cssutils.getUrls(sheet))
else:
urls = []
declaration = parser.parseStyle(data)
# prop = background, color, margin, ...
for prop in declaration:
# value = red, 10px, url(foobar), ...
for value in prop.propertyValue:
if isinstance(value, cssutils.css.URIValue):
if value.uri:
urls.append(value.uri)
return urls
def _get_css_imports(data, inline=False):
"""Return all assets that are referenced in the given CSS document.
The returned URLs are relative to the stylesheet's URL.
Args:
data: The content of the stylesheet to scan as string.
inline: True if the argument is a inline HTML style attribute.
"""
if cssutils is None:
return _get_css_imports_regex(data)
else:
return _get_css_imports_cssutils(data, inline)
MHTMLPolicy = email.policy.default.clone(linesep='\r\n', max_line_length=0) MHTMLPolicy = email.policy.default.clone(linesep='\r\n', max_line_length=0)

View File

@ -5,3 +5,4 @@ pyPEG2==2.15.2
PyYAML==3.11 PyYAML==3.11
colorama==0.3.3 colorama==0.3.3
colorlog==2.6.0 colorlog==2.6.0
cssutils==1.0

View File

@ -250,6 +250,7 @@ def test_removing_file_from_mhtml(checker):
""") """)
@pytest.mark.parametrize('has_cssutils', [True, False])
@pytest.mark.parametrize('inline, style, expected_urls', [ @pytest.mark.parametrize('inline, style, expected_urls', [
(False, "@import 'default.css'", ['default.css']), (False, "@import 'default.css'", ['default.css']),
(False, '@import "default.css"', ['default.css']), (False, '@import "default.css"', ['default.css']),
@ -261,7 +262,12 @@ def test_removing_file_from_mhtml(checker):
(True, 'background: url(folder/file.png) no-repeat', ['folder/file.png']), (True, 'background: url(folder/file.png) no-repeat', ['folder/file.png']),
(True, 'content: url()', []), (True, 'content: url()', []),
]) ])
def test_css_url_scanner(monkeypatch, inline, style, expected_urls): def test_css_url_scanner(monkeypatch, has_cssutils, inline, style,
expected_urls):
if has_cssutils:
assert mhtml.cssutils is not None
else:
monkeypatch.setattr('qutebrowser.browser.mhtml.cssutils', None)
expected_urls.sort() expected_urls.sort()
urls = mhtml._get_css_imports(style, inline=inline) urls = mhtml._get_css_imports(style, inline=inline)
urls.sort() urls.sort()