Move checking if an URL is blocked to adblock.py.

This commit is contained in:
Florian Bruhin 2015-09-17 06:46:27 +02:00
parent accbf157e0
commit ee2b9adce4
2 changed files with 33 additions and 28 deletions

View File

@ -24,6 +24,7 @@ import os.path
import functools import functools
import posixpath import posixpath
import zipfile import zipfile
import fnmatch
from qutebrowser.config import config from qutebrowser.config import config
from qutebrowser.utils import objreg, standarddir, log, message from qutebrowser.utils import objreg, standarddir, log, message
@ -59,6 +60,22 @@ def get_fileobj(byte_io):
return io.TextIOWrapper(byte_io, encoding='utf-8') return io.TextIOWrapper(byte_io, encoding='utf-8')
def is_whitelisted_host(host):
"""Check if the given host is on the adblock whitelist.
Args:
host: The host of the request as string.
"""
whitelist = config.get('content', 'host-blocking-whitelist')
if whitelist is None:
return False
for pattern in whitelist:
if fnmatch.fnmatch(host, pattern.lower()):
return True
return False
class FakeDownload: class FakeDownload:
"""A download stub to use on_download_finished with local files.""" """A download stub to use on_download_finished with local files."""
@ -74,7 +91,7 @@ class HostBlocker:
"""Manage blocked hosts based from /etc/hosts-like files. """Manage blocked hosts based from /etc/hosts-like files.
Attributes: Attributes:
blocked_hosts: A set of blocked hosts. _blocked_hosts: A set of blocked hosts.
_in_progress: The DownloadItems which are currently downloading. _in_progress: The DownloadItems which are currently downloading.
_done_count: How many files have been read successfully. _done_count: How many files have been read successfully.
_hosts_file: The path to the blocked-hosts file. _hosts_file: The path to the blocked-hosts file.
@ -87,7 +104,7 @@ class HostBlocker:
'local') 'local')
def __init__(self): def __init__(self):
self.blocked_hosts = set() self._blocked_hosts = set()
self._in_progress = [] self._in_progress = []
self._done_count = 0 self._done_count = 0
data_dir = standarddir.data() data_dir = standarddir.data()
@ -97,16 +114,23 @@ class HostBlocker:
self._hosts_file = os.path.join(data_dir, 'blocked-hosts') self._hosts_file = os.path.join(data_dir, 'blocked-hosts')
objreg.get('config').changed.connect(self.on_config_changed) objreg.get('config').changed.connect(self.on_config_changed)
def is_blocked(self, url):
"""Check if the given URL (as QUrl) is blocked."""
if not config.get('content', 'host-blocking-enabled'):
return False
host = url.host()
return host in self._blocked_hosts and not is_whitelisted_host(host)
def read_hosts(self): def read_hosts(self):
"""Read hosts from the existing blocked-hosts file.""" """Read hosts from the existing blocked-hosts file."""
self.blocked_hosts = set() self._blocked_hosts = set()
if self._hosts_file is None: if self._hosts_file is None:
return return
if os.path.exists(self._hosts_file): if os.path.exists(self._hosts_file):
try: try:
with open(self._hosts_file, 'r', encoding='utf-8') as f: with open(self._hosts_file, 'r', encoding='utf-8') as f:
for line in f: for line in f:
self.blocked_hosts.add(line.strip()) self._blocked_hosts.add(line.strip())
except OSError: except OSError:
log.misc.exception("Failed to read host blocklist!") log.misc.exception("Failed to read host blocklist!")
else: else:
@ -121,7 +145,7 @@ class HostBlocker:
"""Update the adblock block lists.""" """Update the adblock block lists."""
if self._hosts_file is None: if self._hosts_file is None:
raise cmdexc.CommandError("No data storage is configured!") raise cmdexc.CommandError("No data storage is configured!")
self.blocked_hosts = set() self._blocked_hosts = set()
self._done_count = 0 self._done_count = 0
urls = config.get('content', 'host-block-lists') urls = config.get('content', 'host-block-lists')
download_manager = objreg.get('download-manager', scope='window', download_manager = objreg.get('download-manager', scope='window',
@ -189,7 +213,7 @@ class HostBlocker:
error_count += 1 error_count += 1
continue continue
if host not in self.WHITELISTED: if host not in self.WHITELISTED:
self.blocked_hosts.add(host) self._blocked_hosts.add(host)
log.misc.debug("{}: read {} lines".format(byte_io.name, line_count)) log.misc.debug("{}: read {} lines".format(byte_io.name, line_count))
if error_count > 0: if error_count > 0:
message.error('current', "adblock: {} read errors for {}".format( message.error('current', "adblock: {} read errors for {}".format(
@ -198,10 +222,10 @@ class HostBlocker:
def on_lists_downloaded(self): def on_lists_downloaded(self):
"""Install block lists after files have been downloaded.""" """Install block lists after files have been downloaded."""
with open(self._hosts_file, 'w', encoding='utf-8') as f: with open(self._hosts_file, 'w', encoding='utf-8') as f:
for host in sorted(self.blocked_hosts): for host in sorted(self._blocked_hosts):
f.write(host + '\n') f.write(host + '\n')
message.info('current', "adblock: Read {} hosts from {} sources." message.info('current', "adblock: Read {} hosts from {} sources."
.format(len(self.blocked_hosts), self._done_count)) .format(len(self._blocked_hosts), self._done_count))
@config.change_filter('content', 'host-block-lists') @config.change_filter('content', 'host-block-lists')
def on_config_changed(self): def on_config_changed(self):

View File

@ -20,7 +20,6 @@
"""Our own QNetworkAccessManager.""" """Our own QNetworkAccessManager."""
import collections import collections
import fnmatch
from PyQt5.QtCore import (pyqtSlot, pyqtSignal, PYQT_VERSION, QCoreApplication, from PyQt5.QtCore import (pyqtSlot, pyqtSignal, PYQT_VERSION, QCoreApplication,
QUrl, QByteArray) QUrl, QByteArray)
@ -50,22 +49,6 @@ def init():
QSslSocket.setDefaultCiphers(good_ciphers) QSslSocket.setDefaultCiphers(good_ciphers)
def is_whitelisted_domain(host):
"""Check if the given host is on the adblock whitelist.
Args:
host: The host of the request as string.
"""
whitelist = config.get('content', 'host-blocking-whitelist')
if whitelist is None:
return False
for pattern in whitelist:
if fnmatch.fnmatch(host, pattern.lower()):
return True
return False
class SslError(QSslError): class SslError(QSslError):
"""A QSslError subclass which provides __hash__ on Qt < 5.4.""" """A QSslError subclass which provides __hash__ on Qt < 5.4."""
@ -363,9 +346,7 @@ class NetworkManager(QNetworkAccessManager):
host_blocker = objreg.get('host-blocker') host_blocker = objreg.get('host-blocker')
if (op == QNetworkAccessManager.GetOperation and if (op == QNetworkAccessManager.GetOperation and
req.url().host() in host_blocker.blocked_hosts and host_blocker.is_blocked(req.url())):
config.get('content', 'host-blocking-enabled') and
not is_whitelisted_domain(req.url().host())):
log.webview.info("Request to {} blocked by host blocker.".format( log.webview.info("Request to {} blocked by host blocker.".format(
req.url().host())) req.url().host()))
return networkreply.ErrorNetworkReply( return networkreply.ErrorNetworkReply(