Use WebElementWrapper instead of QWebElement
* also don't derive from object * also set the _used flag on _Downloader
This commit is contained in:
parent
a3cc71e317
commit
e5bfb9884b
@ -32,6 +32,7 @@ from email.mime import multipart
|
|||||||
|
|
||||||
from PyQt5.QtCore import QUrl
|
from PyQt5.QtCore import QUrl
|
||||||
|
|
||||||
|
from qutebrowser.browser import webelem
|
||||||
from qutebrowser.utils import log, objreg, message
|
from qutebrowser.utils import log, objreg, message
|
||||||
|
|
||||||
|
|
||||||
@ -125,7 +126,7 @@ E_QUOPRI = ("quoted-printable", _rn_quopri)
|
|||||||
"""Encode the file using MIME quoted-printable encoding."""
|
"""Encode the file using MIME quoted-printable encoding."""
|
||||||
|
|
||||||
|
|
||||||
class MHTMLWriter(object):
|
class MHTMLWriter():
|
||||||
|
|
||||||
"""A class for outputting multiple files to a MHTML document.
|
"""A class for outputting multiple files to a MHTML document.
|
||||||
|
|
||||||
@ -209,7 +210,7 @@ class MHTMLWriter(object):
|
|||||||
return msg
|
return msg
|
||||||
|
|
||||||
|
|
||||||
class _Downloader(object):
|
class _Downloader():
|
||||||
|
|
||||||
"""A class to download whole websites.
|
"""A class to download whole websites.
|
||||||
|
|
||||||
@ -220,6 +221,7 @@ class _Downloader(object):
|
|||||||
loaded_urls: A set of QUrls of finished asset downloads.
|
loaded_urls: A set of QUrls of finished asset downloads.
|
||||||
pending_downloads: A set of unfinished (url, DownloadItem) tuples.
|
pending_downloads: A set of unfinished (url, DownloadItem) tuples.
|
||||||
_finished: A flag indicating if the file has already been written.
|
_finished: A flag indicating if the file has already been written.
|
||||||
|
_used: A flag indicating if the downloader has already been used.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
def __init__(self, web_view, dest):
|
def __init__(self, web_view, dest):
|
||||||
@ -229,6 +231,7 @@ class _Downloader(object):
|
|||||||
self.loaded_urls = {web_view.url()}
|
self.loaded_urls = {web_view.url()}
|
||||||
self.pending_downloads = set()
|
self.pending_downloads = set()
|
||||||
self._finished = False
|
self._finished = False
|
||||||
|
self._used = False
|
||||||
|
|
||||||
def run(self):
|
def run(self):
|
||||||
"""Download and save the page.
|
"""Download and save the page.
|
||||||
@ -236,6 +239,9 @@ class _Downloader(object):
|
|||||||
The object must not be reused, you should create a new one if
|
The object must not be reused, you should create a new one if
|
||||||
you want to download another page.
|
you want to download another page.
|
||||||
"""
|
"""
|
||||||
|
if self._used:
|
||||||
|
raise ValueError("Downloader already used")
|
||||||
|
self._used = True
|
||||||
web_url = self.web_view.url()
|
web_url = self.web_view.url()
|
||||||
web_frame = self.web_view.page().mainFrame()
|
web_frame = self.web_view.page().mainFrame()
|
||||||
|
|
||||||
@ -252,10 +258,12 @@ class _Downloader(object):
|
|||||||
web_frame.findAllElements("img"))
|
web_frame.findAllElements("img"))
|
||||||
|
|
||||||
for element in elements:
|
for element in elements:
|
||||||
element_url = element.attribute("src")
|
element = webelem.WebElementWrapper(element)
|
||||||
if not element_url:
|
if "src" in element:
|
||||||
element_url = element.attribute("href")
|
element_url = element["src"]
|
||||||
if not element_url:
|
elif "href" in element:
|
||||||
|
element_url = element["href"]
|
||||||
|
else:
|
||||||
# Might be a local <script> tag or something else
|
# Might be a local <script> tag or something else
|
||||||
continue
|
continue
|
||||||
absolute_url = web_url.resolved(QUrl(element_url))
|
absolute_url = web_url.resolved(QUrl(element_url))
|
||||||
@ -263,17 +271,19 @@ class _Downloader(object):
|
|||||||
|
|
||||||
styles = web_frame.findAllElements("style")
|
styles = web_frame.findAllElements("style")
|
||||||
for style in styles:
|
for style in styles:
|
||||||
if style.attribute("type", "text/css") != "text/css":
|
style = webelem.WebElementWrapper(style)
|
||||||
|
if "type" in style and style["type"] != "text/css":
|
||||||
continue
|
continue
|
||||||
for element_url in _get_css_imports(style.toPlainText()):
|
for element_url in _get_css_imports(str(style)):
|
||||||
element_url = element_url.decode("ascii")
|
element_url = element_url.decode("ascii")
|
||||||
self.fetch_url(web_url.resolved(QUrl(element_url)))
|
self.fetch_url(web_url.resolved(QUrl(element_url)))
|
||||||
|
|
||||||
# Search for references in inline styles
|
# Search for references in inline styles
|
||||||
for element in web_frame.findAllElements("*"):
|
for element in web_frame.findAllElements("*"):
|
||||||
style = element.attribute("style")
|
element = webelem.WebElementWrapper(element)
|
||||||
if not style:
|
if "style" not in element:
|
||||||
continue
|
continue
|
||||||
|
style = element["style"]
|
||||||
for element_url in _get_css_imports(style):
|
for element_url in _get_css_imports(style):
|
||||||
element_url = element_url.decode("ascii")
|
element_url = element_url.decode("ascii")
|
||||||
self.fetch_url(web_url.resolved(QUrl(element_url)))
|
self.fetch_url(web_url.resolved(QUrl(element_url)))
|
||||||
@ -367,7 +377,7 @@ class _Downloader(object):
|
|||||||
log.misc.debug("All assets downloaded, ready to finish off!")
|
log.misc.debug("All assets downloaded, ready to finish off!")
|
||||||
with open(self.dest, "wb") as file_output:
|
with open(self.dest, "wb") as file_output:
|
||||||
self.writer.write_to(file_output)
|
self.writer.write_to(file_output)
|
||||||
message.info("current", "Page saved as {}".format(self.dest), True)
|
message.info("current", "Page saved as {}".format(self.dest))
|
||||||
|
|
||||||
def collect_zombies(self):
|
def collect_zombies(self):
|
||||||
"""Collect done downloads and add their data to the MHTML file.
|
"""Collect done downloads and add their data to the MHTML file.
|
||||||
|
Loading…
Reference in New Issue
Block a user