diff --git a/qutebrowser/misc/mhtml.py b/qutebrowser/misc/mhtml.py
index 2353602fc..b45f0f6a9 100644
--- a/qutebrowser/misc/mhtml.py
+++ b/qutebrowser/misc/mhtml.py
@@ -22,6 +22,7 @@
import functools
import io
import os
+import re
from collections import namedtuple
from base64 import b64encode
@@ -37,6 +38,32 @@ _File = namedtuple("_File",
"content content_type content_location transfer_encoding")
+_CSS_URL_PATTERNS = [re.compile(x) for x in [
+ rb"@import '(?P[^']+)'",
+ rb'@import "(?P[^"]+)"',
+ rb'''url\((?P[^'"][^)]*)\)''',
+ rb'url\("(?P[^"]+)"\)',
+ rb"url\('(?P[^']+)'\)",
+]]
+
+
+def _get_css_imports(data):
+ """Return all assets that are referenced in the given CSS document.
+
+ The returned URLs are relative to the stylesheet's URL.
+
+ Args:
+ data: The content of the stylesheet to scan as bytes.
+ """
+ urls = []
+ for pattern in _CSS_URL_PATTERNS:
+ for match in pattern.finditer(data):
+ url = match.group("url")
+ if url:
+ urls.append(url)
+ return urls
+
+
def _chunked_base64(data, maxlen=76, linesep=b"\r\n"):
"""Just like b64encode, except that it breaks long lines.
@@ -191,7 +218,7 @@ class _Downloader(object):
self.web_view = web_view
self.dest = dest
self.writer = MHTMLWriter()
- self.loaded_urls = set()
+ self.loaded_urls = {web_view.url()}
self.pending_downloads = set()
def run(self):
@@ -200,8 +227,6 @@ class _Downloader(object):
The object must not be reused, you should create a new one if
you want to download another page.
"""
- download_manager = objreg.get("download-manager", scope="window",
- window="current")
web_url_str = self.web_view.url().toString()
web_frame = self.web_view.page().mainFrame()
@@ -225,22 +250,32 @@ class _Downloader(object):
# Might be a local