From ccdb59cce10f385b0079a32508fa3a2afaeaa96d Mon Sep 17 00:00:00 2001 From: Daniel Date: Wed, 16 Sep 2015 16:31:17 +0200 Subject: [PATCH 1/4] Add adblock host whitelisting The config option "content host-blocking-whitelist" may contain comma separated domains that are exempt from host blocking. The listed domains may contain the wildcards "*" and "?" to match many and one character, respectively. You need to run :adblock-update after modifying the list. --- doc/help/settings.asciidoc | 11 +++++++++++ qutebrowser/browser/adblock.py | 23 ++++++++++++++++++++++- qutebrowser/config/configdata.py | 8 ++++++++ 3 files changed, 41 insertions(+), 1 deletion(-) diff --git a/doc/help/settings.asciidoc b/doc/help/settings.asciidoc index 796199a19..499c9c88b 100644 --- a/doc/help/settings.asciidoc +++ b/doc/help/settings.asciidoc @@ -159,6 +159,7 @@ |<>|Whether to store cookies. |<>|List of URLs of lists which contain hosts to block. |<>|Whether host blocking is enabled. +|<>|List of domains that should always be loaded, despite being ad-blocked. |============== .Quick reference for section ``hints'' @@ -1433,6 +1434,16 @@ Valid values: Default: +pass:[true]+ +[[content-host-blocking-whitelist]] +=== host-blocking-whitelist +List of domains that should always be loaded, despite being ad-blocked. + +Domains may contain * and ? wildcards and are otherwise required to exactly match the requested domain. + +Local domains are always exempt from hostblocking. + +Default: empty + == hints Hinting settings. diff --git a/qutebrowser/browser/adblock.py b/qutebrowser/browser/adblock.py index bec74f096..0f4b50b75 100644 --- a/qutebrowser/browser/adblock.py +++ b/qutebrowser/browser/adblock.py @@ -24,6 +24,8 @@ import os.path import functools import posixpath import zipfile +import fnmatch +import re from qutebrowser.config import config from qutebrowser.utils import objreg, standarddir, log, message @@ -59,6 +61,24 @@ def get_fileobj(byte_io): return io.TextIOWrapper(byte_io, encoding='utf-8') +def is_whitelisted_domain(host): + """Check if the given host is on the adblock whitelist. + + Args: + host: The host as given by the adblocker as string. + """ + whitelist = objreg.get('config').get('content', 'host-blocking-whitelist') + if whitelist is None: + return False + + for domain in whitelist: + fnmatch_translated = fnmatch.translate(domain) + domain_regex = re.compile(fnmatch_translated, re.IGNORECASE) + if domain_regex.match(host): + return True + return False + + class FakeDownload: """A download stub to use on_download_finished with local files.""" @@ -188,7 +208,8 @@ class HostBlocker: else: error_count += 1 continue - if host not in self.WHITELISTED: + if (host not in self.WHITELISTED + and not is_whitelisted_domain(host)): self.blocked_hosts.add(host) log.misc.debug("{}: read {} lines".format(byte_io.name, line_count)) if error_count > 0: diff --git a/qutebrowser/config/configdata.py b/qutebrowser/config/configdata.py index c9d82838d..cbede4a00 100644 --- a/qutebrowser/config/configdata.py +++ b/qutebrowser/config/configdata.py @@ -735,6 +735,14 @@ def data(readonly=False): SettingValue(typ.Bool(), 'true'), "Whether host blocking is enabled."), + ('host-blocking-whitelist', + SettingValue(typ.List(none_ok=True), ''), + "List of domains that should always be loaded, despite being " + "ad-blocked.\n\n" + "Domains may contain * and ? wildcards and are otherwise " + "required to exactly match the requested domain.\n\n" + "Local domains are always exempt from hostblocking."), + readonly=readonly )), From 523e071a9725f4f17e2980d754a634e795e11d41 Mon Sep 17 00:00:00 2001 From: Daniel Date: Wed, 16 Sep 2015 17:04:19 +0200 Subject: [PATCH 2/4] Stylistic changes to is_whitelisted_domain --- qutebrowser/browser/adblock.py | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/qutebrowser/browser/adblock.py b/qutebrowser/browser/adblock.py index 0f4b50b75..774e154fd 100644 --- a/qutebrowser/browser/adblock.py +++ b/qutebrowser/browser/adblock.py @@ -25,7 +25,6 @@ import functools import posixpath import zipfile import fnmatch -import re from qutebrowser.config import config from qutebrowser.utils import objreg, standarddir, log, message @@ -67,14 +66,12 @@ def is_whitelisted_domain(host): Args: host: The host as given by the adblocker as string. """ - whitelist = objreg.get('config').get('content', 'host-blocking-whitelist') + whitelist = config.get('content', 'host-blocking-whitelist') if whitelist is None: return False - for domain in whitelist: - fnmatch_translated = fnmatch.translate(domain) - domain_regex = re.compile(fnmatch_translated, re.IGNORECASE) - if domain_regex.match(host): + for pattern in whitelist: + if fnmatch.fnmatch(host, pattern.lower()): return True return False From fc806525a266c207e69286c13490651837dfa44d Mon Sep 17 00:00:00 2001 From: Daniel Date: Wed, 16 Sep 2015 17:10:03 +0200 Subject: [PATCH 3/4] Move whitelist host code to networkmanager This means no :adblock-update after modifying the whitelist. --- qutebrowser/browser/adblock.py | 20 +------------------ qutebrowser/browser/network/networkmanager.py | 20 ++++++++++++++++++- 2 files changed, 20 insertions(+), 20 deletions(-) diff --git a/qutebrowser/browser/adblock.py b/qutebrowser/browser/adblock.py index 774e154fd..bec74f096 100644 --- a/qutebrowser/browser/adblock.py +++ b/qutebrowser/browser/adblock.py @@ -24,7 +24,6 @@ import os.path import functools import posixpath import zipfile -import fnmatch from qutebrowser.config import config from qutebrowser.utils import objreg, standarddir, log, message @@ -60,22 +59,6 @@ def get_fileobj(byte_io): return io.TextIOWrapper(byte_io, encoding='utf-8') -def is_whitelisted_domain(host): - """Check if the given host is on the adblock whitelist. - - Args: - host: The host as given by the adblocker as string. - """ - whitelist = config.get('content', 'host-blocking-whitelist') - if whitelist is None: - return False - - for pattern in whitelist: - if fnmatch.fnmatch(host, pattern.lower()): - return True - return False - - class FakeDownload: """A download stub to use on_download_finished with local files.""" @@ -205,8 +188,7 @@ class HostBlocker: else: error_count += 1 continue - if (host not in self.WHITELISTED - and not is_whitelisted_domain(host)): + if host not in self.WHITELISTED: self.blocked_hosts.add(host) log.misc.debug("{}: read {} lines".format(byte_io.name, line_count)) if error_count > 0: diff --git a/qutebrowser/browser/network/networkmanager.py b/qutebrowser/browser/network/networkmanager.py index e433b5e66..1642ea746 100644 --- a/qutebrowser/browser/network/networkmanager.py +++ b/qutebrowser/browser/network/networkmanager.py @@ -20,6 +20,7 @@ """Our own QNetworkAccessManager.""" import collections +import fnmatch from PyQt5.QtCore import (pyqtSlot, pyqtSignal, PYQT_VERSION, QCoreApplication, QUrl, QByteArray) @@ -49,6 +50,22 @@ def init(): QSslSocket.setDefaultCiphers(good_ciphers) +def is_whitelisted_domain(host): + """Check if the given host is on the adblock whitelist. + + Args: + host: The host as given by the adblocker as string. + """ + whitelist = config.get('content', 'host-blocking-whitelist') + if whitelist is None: + return False + + for pattern in whitelist: + if fnmatch.fnmatch(host, pattern.lower()): + return True + return False + + class SslError(QSslError): """A QSslError subclass which provides __hash__ on Qt < 5.4.""" @@ -347,7 +364,8 @@ class NetworkManager(QNetworkAccessManager): host_blocker = objreg.get('host-blocker') if (op == QNetworkAccessManager.GetOperation and req.url().host() in host_blocker.blocked_hosts and - config.get('content', 'host-blocking-enabled')): + config.get('content', 'host-blocking-enabled') and + not is_whitelisted_domain(req.url().host())): log.webview.info("Request to {} blocked by host blocker.".format( req.url().host())) return networkreply.ErrorNetworkReply( From dd679c6c14685cdd20f192fbf0367f2616294ab4 Mon Sep 17 00:00:00 2001 From: Daniel Date: Wed, 16 Sep 2015 17:16:35 +0200 Subject: [PATCH 4/4] Fix is_whitelisted_domain docstring --- qutebrowser/browser/network/networkmanager.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/qutebrowser/browser/network/networkmanager.py b/qutebrowser/browser/network/networkmanager.py index 1642ea746..c2eee80e9 100644 --- a/qutebrowser/browser/network/networkmanager.py +++ b/qutebrowser/browser/network/networkmanager.py @@ -54,7 +54,7 @@ def is_whitelisted_domain(host): """Check if the given host is on the adblock whitelist. Args: - host: The host as given by the adblocker as string. + host: The host of the request as string. """ whitelist = config.get('content', 'host-blocking-whitelist') if whitelist is None: