use cssutils
This commit is contained in:
parent
749b1c02cc
commit
420c087373
@ -35,6 +35,10 @@ from PyQt5.QtCore import QUrl
|
|||||||
from qutebrowser.browser import webelem
|
from qutebrowser.browser import webelem
|
||||||
from qutebrowser.utils import log, objreg, message
|
from qutebrowser.utils import log, objreg, message
|
||||||
|
|
||||||
|
try:
|
||||||
|
import cssutils
|
||||||
|
except ImportError:
|
||||||
|
cssutils = None
|
||||||
|
|
||||||
_File = collections.namedtuple('_File',
|
_File = collections.namedtuple('_File',
|
||||||
['content', 'content_type', 'content_location',
|
['content', 'content_type', 'content_location',
|
||||||
@ -50,7 +54,7 @@ _CSS_URL_PATTERNS = [re.compile(x) for x in [
|
|||||||
]]
|
]]
|
||||||
|
|
||||||
|
|
||||||
def _get_css_imports(data):
|
def _get_css_imports_regex(data):
|
||||||
"""Return all assets that are referenced in the given CSS document.
|
"""Return all assets that are referenced in the given CSS document.
|
||||||
|
|
||||||
The returned URLs are relative to the stylesheet's URL.
|
The returned URLs are relative to the stylesheet's URL.
|
||||||
@ -67,6 +71,47 @@ def _get_css_imports(data):
|
|||||||
return urls
|
return urls
|
||||||
|
|
||||||
|
|
||||||
|
def _get_css_imports_cssutils(data, inline=False):
|
||||||
|
"""Return all assets that are referenced in the given CSS document.
|
||||||
|
|
||||||
|
The returned URLs are relative to the stylesheet's URL.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
data: The content of the stylesheet to scan as string.
|
||||||
|
inline: True if the argument is a inline HTML style attribute.
|
||||||
|
"""
|
||||||
|
parser = cssutils.CSSParser(fetcher=lambda url: (None, ""), validate=False)
|
||||||
|
if not inline:
|
||||||
|
sheet = parser.parseString(data)
|
||||||
|
return list(cssutils.getUrls(sheet))
|
||||||
|
else:
|
||||||
|
urls = []
|
||||||
|
declaration = parser.parseStyle(data)
|
||||||
|
# prop = background, color, margin, ...
|
||||||
|
for prop in declaration:
|
||||||
|
# value = red, 10px, url(foobar), ...
|
||||||
|
for value in prop.propertyValue:
|
||||||
|
if isinstance(value, cssutils.css.URIValue):
|
||||||
|
if value.uri:
|
||||||
|
urls.append(value.uri)
|
||||||
|
return urls
|
||||||
|
|
||||||
|
|
||||||
|
def _get_css_imports(data, inline=False):
|
||||||
|
"""Return all assets that are referenced in the given CSS document.
|
||||||
|
|
||||||
|
The returned URLs are relative to the stylesheet's URL.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
data: The content of the stylesheet to scan as string.
|
||||||
|
inline: True if the argument is a inline HTML style attribute.
|
||||||
|
"""
|
||||||
|
if cssutils is None:
|
||||||
|
return _get_css_imports_regex(data)
|
||||||
|
else:
|
||||||
|
return _get_css_imports_cssutils(data, inline)
|
||||||
|
|
||||||
|
|
||||||
MHTMLPolicy = email.policy.default.clone(linesep='\r\n', max_line_length=0)
|
MHTMLPolicy = email.policy.default.clone(linesep='\r\n', max_line_length=0)
|
||||||
|
|
||||||
|
|
||||||
@ -228,7 +273,7 @@ class _Downloader():
|
|||||||
for element in web_frame.findAllElements('[style]'):
|
for element in web_frame.findAllElements('[style]'):
|
||||||
element = webelem.WebElementWrapper(element)
|
element = webelem.WebElementWrapper(element)
|
||||||
style = element['style']
|
style = element['style']
|
||||||
for element_url in _get_css_imports(style):
|
for element_url in _get_css_imports(style, inline=True):
|
||||||
self.fetch_url(web_url.resolved(QUrl(element_url)))
|
self.fetch_url(web_url.resolved(QUrl(element_url)))
|
||||||
|
|
||||||
# Shortcut if no assets need to be downloaded, otherwise the file would
|
# Shortcut if no assets need to be downloaded, otherwise the file would
|
||||||
|
@ -5,3 +5,4 @@ pyPEG2==2.15.2
|
|||||||
PyYAML==3.11
|
PyYAML==3.11
|
||||||
colorama==0.3.3
|
colorama==0.3.3
|
||||||
colorlog==2.6.0
|
colorlog==2.6.0
|
||||||
|
cssutils==1.0
|
||||||
|
@ -250,20 +250,26 @@ def test_removing_file_from_mhtml(checker):
|
|||||||
""")
|
""")
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.parametrize('style, expected_urls', [
|
@pytest.mark.parametrize('has_cssutils', [True, False])
|
||||||
("@import 'default.css'", ['default.css']),
|
@pytest.mark.parametrize('inline, style, expected_urls', [
|
||||||
('@import "default.css"', ['default.css']),
|
(False, "@import 'default.css'", ['default.css']),
|
||||||
("@import \t 'tabbed.css'", ['tabbed.css']),
|
(False, '@import "default.css"', ['default.css']),
|
||||||
("@import url('default.css')", ['default.css']),
|
(False, "@import \t 'tabbed.css'", ['tabbed.css']),
|
||||||
("""body {
|
(False, "@import url('default.css')", ['default.css']),
|
||||||
|
(False, """body {
|
||||||
background: url("/bg-img.png")
|
background: url("/bg-img.png")
|
||||||
}""", ['/bg-img.png']),
|
}""", ['/bg-img.png']),
|
||||||
('background: url(folder/file.png)', ['folder/file.png']),
|
(True, 'background: url(folder/file.png) no-repeat', ['folder/file.png']),
|
||||||
('content: url()', []),
|
(True, 'content: url()', []),
|
||||||
])
|
])
|
||||||
def test_css_url_scanner(style, expected_urls):
|
def test_css_url_scanner(monkeypatch, has_cssutils, inline, style,
|
||||||
|
expected_urls):
|
||||||
|
if has_cssutils:
|
||||||
|
assert mhtml.cssutils is not None
|
||||||
|
else:
|
||||||
|
monkeypatch.setattr('qutebrowser.browser.mhtml.cssutils', None)
|
||||||
expected_urls.sort()
|
expected_urls.sort()
|
||||||
urls = mhtml._get_css_imports(style)
|
urls = mhtml._get_css_imports(style, inline=inline)
|
||||||
urls.sort()
|
urls.sort()
|
||||||
assert urls == expected_urls
|
assert urls == expected_urls
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user