Bring back separate is_blocked method

This commit is contained in:
Florian Bruhin 2018-12-10 16:25:36 +01:00
parent 007250033a
commit 7b1bcea306
3 changed files with 62 additions and 75 deletions

View File

@ -108,25 +108,25 @@ class HostBlocker:
self._config_hosts_file = str(config_dir / 'blocked-hosts') self._config_hosts_file = str(config_dir / 'blocked-hosts')
def filter_request(self, info: requests.Request) -> None: def _is_blocked(self, request_url: QUrl,
"""Block the given request if necessary.""" first_party_url: QUrl = None) -> None:
if info.first_party_url is None: """Check whether the given request is blocked."""
if first_party_url is not None and not first_party_url.isValid():
first_party_url = None first_party_url = None
elif not info.first_party_url.isValid():
first_party_url = None
else:
first_party_url = info.first_party_url
if not config.get('content.host_blocking.enabled', if not config.get('content.host_blocking.enabled',
url=first_party_url): url=first_party_url):
return return False
host = info.request_url.host() host = request_url.host()
blocked = ((host in self._blocked_hosts or return ((host in self._blocked_hosts or
host in self._config_blocked_hosts) and host in self._config_blocked_hosts) and
not _is_whitelisted_url(info.request_url)) not _is_whitelisted_url(request_url))
if blocked: def filter_request(self, info: requests.Request) -> None:
"""Block the given request if necessary."""
if self._is_blocked(request_url=info.request_url,
first_party_url=info.first_party_url):
logger.info("Request to {} blocked by host blocker." logger.info("Request to {} blocked by host blocker."
.format(info.request_url.host())) .format(info.request_url.host()))
info.block() info.block()

View File

@ -44,6 +44,7 @@ from PyQt5.QtNetwork import QNetworkCookieJar
import helpers.stubs as stubsmod import helpers.stubs as stubsmod
from qutebrowser.config import (config, configdata, configtypes, configexc, from qutebrowser.config import (config, configdata, configtypes, configexc,
configfiles, configcache) configfiles, configcache)
from qutebrowser.api import config as configapi
from qutebrowser.utils import objreg, standarddir, utils, usertypes from qutebrowser.utils import objreg, standarddir, utils, usertypes
from qutebrowser.browser import greasemonkey, history, qutescheme from qutebrowser.browser import greasemonkey, history, qutescheme
from qutebrowser.browser.webkit import cookies from qutebrowser.browser.webkit import cookies
@ -306,6 +307,7 @@ def config_stub(stubs, monkeypatch, configdata_init, yaml_config_stub):
container = config.ConfigContainer(conf) container = config.ConfigContainer(conf)
monkeypatch.setattr(config, 'val', container) monkeypatch.setattr(config, 'val', container)
monkeypatch.setattr(configapi, 'val', container)
cache = configcache.ConfigCache() cache = configcache.ConfigCache()
monkeypatch.setattr(config, 'cache', cache) monkeypatch.setattr(config, 'cache', cache)

View File

@ -33,7 +33,7 @@ from qutebrowser.utils import urlmatch
from tests.helpers import utils from tests.helpers import utils
pytestmark = pytest.mark.usefixtures('qapp', 'config_tmpdir') pytestmark = pytest.mark.usefixtures('qapp')
# TODO See ../utils/test_standarddirutils for OSError and caplog assertion # TODO See ../utils/test_standarddirutils for OSError and caplog assertion
@ -58,18 +58,13 @@ URLS_TO_CHECK = ('http://localhost',
'http://veryverygoodhost.edu') 'http://veryverygoodhost.edu')
class BaseDirStub:
"""Mock for objreg.get('args') called in adblock.HostBlocker.read_hosts."""
def __init__(self):
self.basedir = None
@pytest.fixture @pytest.fixture
def basedir(fake_args): def host_blocker_factory(config_tmpdir, data_tmpdir, download_stub,
"""Register a Fake basedir.""" config_stub):
fake_args.basedir = None def factory():
return adblock.HostBlocker(config_dir=config_tmpdir,
data_dir=data_tmpdir)
return factory
def create_zipfile(directory, files, zipname='test'): def create_zipfile(directory, files, zipname='test'):
@ -133,9 +128,9 @@ def assert_urls(host_blocker, blocked=BLOCKLIST_HOSTS,
url = QUrl(str_url) url = QUrl(str_url)
host = url.host() host = url.host()
if host in blocked and host not in whitelisted: if host in blocked and host not in whitelisted:
assert host_blocker.is_blocked(url) assert host_blocker._is_blocked(url)
else: else:
assert not host_blocker.is_blocked(url) assert not host_blocker._is_blocked(url)
def blocklist_to_url(filename): def blocklist_to_url(filename):
@ -202,13 +197,13 @@ def generic_blocklists(directory):
blocklist5.toString()] blocklist5.toString()]
def test_disabled_blocking_update(basedir, config_stub, download_stub, def test_disabled_blocking_update(config_stub, tmpdir, caplog,
data_tmpdir, tmpdir, win_registry, caplog): host_blocker_factory):
"""Ensure no URL is blocked when host blocking is disabled.""" """Ensure no URL is blocked when host blocking is disabled."""
config_stub.val.content.host_blocking.lists = generic_blocklists(tmpdir) config_stub.val.content.host_blocking.lists = generic_blocklists(tmpdir)
config_stub.val.content.host_blocking.enabled = False config_stub.val.content.host_blocking.enabled = False
host_blocker = adblock.HostBlocker() host_blocker = host_blocker_factory()
host_blocker.adblock_update() host_blocker.adblock_update()
while host_blocker._in_progress: while host_blocker._in_progress:
current_download = host_blocker._in_progress[0] current_download = host_blocker._in_progress[0]
@ -217,10 +212,10 @@ def test_disabled_blocking_update(basedir, config_stub, download_stub,
current_download.finished.emit() current_download.finished.emit()
host_blocker.read_hosts() host_blocker.read_hosts()
for str_url in URLS_TO_CHECK: for str_url in URLS_TO_CHECK:
assert not host_blocker.is_blocked(QUrl(str_url)) assert not host_blocker._is_blocked(QUrl(str_url))
def test_disabled_blocking_per_url(config_stub, data_tmpdir): def test_disabled_blocking_per_url(config_stub, host_blocker_factory):
example_com = 'https://www.example.com/' example_com = 'https://www.example.com/'
config_stub.val.content.host_blocking.lists = [] config_stub.val.content.host_blocking.lists = []
@ -230,36 +225,34 @@ def test_disabled_blocking_per_url(config_stub, data_tmpdir):
url = QUrl('blocked.example.com') url = QUrl('blocked.example.com')
host_blocker = adblock.HostBlocker() host_blocker = host_blocker_factory()
host_blocker._blocked_hosts.add(url.host()) host_blocker._blocked_hosts.add(url.host())
assert host_blocker.is_blocked(url) assert host_blocker._is_blocked(url)
assert not host_blocker.is_blocked(url, first_party_url=QUrl(example_com)) assert not host_blocker._is_blocked(url, first_party_url=QUrl(example_com))
def test_no_blocklist_update(config_stub, download_stub, def test_no_blocklist_update(config_stub, download_stub, host_blocker_factory):
data_tmpdir, basedir, tmpdir, win_registry):
"""Ensure no URL is blocked when no block list exists.""" """Ensure no URL is blocked when no block list exists."""
config_stub.val.content.host_blocking.lists = None config_stub.val.content.host_blocking.lists = None
config_stub.val.content.host_blocking.enabled = True config_stub.val.content.host_blocking.enabled = True
host_blocker = adblock.HostBlocker() host_blocker = host_blocker_factory()
host_blocker.adblock_update() host_blocker.adblock_update()
host_blocker.read_hosts() host_blocker.read_hosts()
for dl in download_stub.downloads: for dl in download_stub.downloads:
dl.successful = True dl.successful = True
for str_url in URLS_TO_CHECK: for str_url in URLS_TO_CHECK:
assert not host_blocker.is_blocked(QUrl(str_url)) assert not host_blocker._is_blocked(QUrl(str_url))
def test_successful_update(config_stub, basedir, download_stub, def test_successful_update(config_stub, tmpdir, caplog, host_blocker_factory):
data_tmpdir, tmpdir, win_registry, caplog):
"""Ensure hosts from host_blocking.lists are blocked after an update.""" """Ensure hosts from host_blocking.lists are blocked after an update."""
config_stub.val.content.host_blocking.lists = generic_blocklists(tmpdir) config_stub.val.content.host_blocking.lists = generic_blocklists(tmpdir)
config_stub.val.content.host_blocking.enabled = True config_stub.val.content.host_blocking.enabled = True
config_stub.val.content.host_blocking.whitelist = None config_stub.val.content.host_blocking.whitelist = None
host_blocker = adblock.HostBlocker() host_blocker = host_blocker_factory()
host_blocker.adblock_update() host_blocker.adblock_update()
# Simulate download is finished # Simulate download is finished
while host_blocker._in_progress: while host_blocker._in_progress:
@ -271,11 +264,9 @@ def test_successful_update(config_stub, basedir, download_stub,
assert_urls(host_blocker, whitelisted=[]) assert_urls(host_blocker, whitelisted=[])
def test_parsing_multiple_hosts_on_line(config_stub, basedir, download_stub, def test_parsing_multiple_hosts_on_line(host_blocker_factory):
data_tmpdir, tmpdir, win_registry,
caplog):
"""Ensure multiple hosts on a line get parsed correctly.""" """Ensure multiple hosts on a line get parsed correctly."""
host_blocker = adblock.HostBlocker() host_blocker = host_blocker_factory()
bytes_host_line = ' '.join(BLOCKLIST_HOSTS).encode('utf-8') bytes_host_line = ' '.join(BLOCKLIST_HOSTS).encode('utf-8')
host_blocker._parse_line(bytes_host_line) host_blocker._parse_line(bytes_host_line)
assert_urls(host_blocker, whitelisted=[]) assert_urls(host_blocker, whitelisted=[])
@ -299,17 +290,15 @@ def test_parsing_multiple_hosts_on_line(config_stub, basedir, download_stub,
('127.0.1.1', 'myhostname'), ('127.0.1.1', 'myhostname'),
('127.0.0.53', 'myhostname'), ('127.0.0.53', 'myhostname'),
]) ])
def test_whitelisted_lines(config_stub, basedir, download_stub, data_tmpdir, def test_whitelisted_lines(host_blocker_factory, ip, host):
tmpdir, win_registry, caplog, ip, host):
"""Make sure we don't block hosts we don't want to.""" """Make sure we don't block hosts we don't want to."""
host_blocker = adblock.HostBlocker() host_blocker = host_blocker_factory()
line = ('{} {}'.format(ip, host)).encode('ascii') line = ('{} {}'.format(ip, host)).encode('ascii')
host_blocker._parse_line(line) host_blocker._parse_line(line)
assert host not in host_blocker._blocked_hosts assert host not in host_blocker._blocked_hosts
def test_failed_dl_update(config_stub, basedir, download_stub, def test_failed_dl_update(config_stub, tmpdir, caplog, host_blocker_factory):
data_tmpdir, tmpdir, win_registry, caplog):
"""One blocklist fails to download. """One blocklist fails to download.
Ensure hosts from this list are not blocked. Ensure hosts from this list are not blocked.
@ -323,7 +312,7 @@ def test_failed_dl_update(config_stub, basedir, download_stub,
config_stub.val.content.host_blocking.enabled = True config_stub.val.content.host_blocking.enabled = True
config_stub.val.content.host_blocking.whitelist = None config_stub.val.content.host_blocking.whitelist = None
host_blocker = adblock.HostBlocker() host_blocker = host_blocker_factory()
host_blocker.adblock_update() host_blocker.adblock_update()
while host_blocker._in_progress: while host_blocker._in_progress:
current_download = host_blocker._in_progress[0] current_download = host_blocker._in_progress[0]
@ -339,8 +328,8 @@ def test_failed_dl_update(config_stub, basedir, download_stub,
@pytest.mark.parametrize('location', ['content', 'comment']) @pytest.mark.parametrize('location', ['content', 'comment'])
def test_invalid_utf8(config_stub, download_stub, tmpdir, data_tmpdir, def test_invalid_utf8(config_stub, tmpdir, caplog, host_blocker_factory,
caplog, location): location):
"""Make sure invalid UTF-8 is handled correctly. """Make sure invalid UTF-8 is handled correctly.
See https://github.com/qutebrowser/qutebrowser/issues/2301 See https://github.com/qutebrowser/qutebrowser/issues/2301
@ -359,7 +348,7 @@ def test_invalid_utf8(config_stub, download_stub, tmpdir, data_tmpdir,
config_stub.val.content.host_blocking.enabled = True config_stub.val.content.host_blocking.enabled = True
config_stub.val.content.host_blocking.whitelist = None config_stub.val.content.host_blocking.whitelist = None
host_blocker = adblock.HostBlocker() host_blocker = host_blocker_factory()
host_blocker.adblock_update() host_blocker.adblock_update()
current_download = host_blocker._in_progress[0] current_download = host_blocker._in_progress[0]
@ -379,26 +368,25 @@ def test_invalid_utf8(config_stub, download_stub, tmpdir, data_tmpdir,
def test_invalid_utf8_compiled(config_stub, config_tmpdir, data_tmpdir, def test_invalid_utf8_compiled(config_stub, config_tmpdir, data_tmpdir,
monkeypatch, caplog): monkeypatch, caplog, host_blocker_factory):
"""Make sure invalid UTF-8 in the compiled file is handled.""" """Make sure invalid UTF-8 in the compiled file is handled."""
config_stub.val.content.host_blocking.lists = [] config_stub.val.content.host_blocking.lists = []
# Make sure the HostBlocker doesn't delete blocked-hosts in __init__ # Make sure the HostBlocker doesn't delete blocked-hosts in __init__
monkeypatch.setattr(adblock.HostBlocker, '_update_files', monkeypatch.setattr(adblock.HostBlocker, 'update_files',
lambda _self: None) lambda _self: None)
(config_tmpdir / 'blocked-hosts').write_binary( (config_tmpdir / 'blocked-hosts').write_binary(
b'https://www.example.org/\xa0') b'https://www.example.org/\xa0')
(data_tmpdir / 'blocked-hosts').ensure() (data_tmpdir / 'blocked-hosts').ensure()
host_blocker = adblock.HostBlocker() host_blocker = host_blocker_factory()
with caplog.at_level(logging.ERROR): with caplog.at_level(logging.ERROR):
host_blocker.read_hosts() host_blocker.read_hosts()
assert caplog.messages[-1] == "Failed to read host blocklist!" assert caplog.messages[-1] == "Failed to read host blocklist!"
def test_blocking_with_whitelist(config_stub, basedir, download_stub, def test_blocking_with_whitelist(config_stub, data_tmpdir, host_blocker_factory):
data_tmpdir, tmpdir):
"""Ensure hosts in content.host_blocking.whitelist are never blocked.""" """Ensure hosts in content.host_blocking.whitelist are never blocked."""
# Simulate adblock_update has already been run # Simulate adblock_update has already been run
# by creating a file named blocked-hosts, # by creating a file named blocked-hosts,
@ -412,13 +400,12 @@ def test_blocking_with_whitelist(config_stub, basedir, download_stub,
config_stub.val.content.host_blocking.enabled = True config_stub.val.content.host_blocking.enabled = True
config_stub.val.content.host_blocking.whitelist = list(WHITELISTED_HOSTS) config_stub.val.content.host_blocking.whitelist = list(WHITELISTED_HOSTS)
host_blocker = adblock.HostBlocker() host_blocker = host_blocker_factory()
host_blocker.read_hosts() host_blocker.read_hosts()
assert_urls(host_blocker) assert_urls(host_blocker)
def test_config_change_initial(config_stub, basedir, download_stub, def test_config_change_initial(config_stub, tmpdir, host_blocker_factory):
data_tmpdir, tmpdir):
"""Test emptying host_blocking.lists with existing blocked_hosts. """Test emptying host_blocking.lists with existing blocked_hosts.
- A blocklist is present in host_blocking.lists and blocked_hosts is - A blocklist is present in host_blocking.lists and blocked_hosts is
@ -432,14 +419,13 @@ def test_config_change_initial(config_stub, basedir, download_stub,
config_stub.val.content.host_blocking.enabled = True config_stub.val.content.host_blocking.enabled = True
config_stub.val.content.host_blocking.whitelist = None config_stub.val.content.host_blocking.whitelist = None
host_blocker = adblock.HostBlocker() host_blocker = host_blocker_factory()
host_blocker.read_hosts() host_blocker.read_hosts()
for str_url in URLS_TO_CHECK: for str_url in URLS_TO_CHECK:
assert not host_blocker.is_blocked(QUrl(str_url)) assert not host_blocker._is_blocked(QUrl(str_url))
def test_config_change(config_stub, basedir, download_stub, def test_config_change(config_stub, tmpdir, host_blocker_factory):
data_tmpdir, tmpdir):
"""Ensure blocked-hosts resets if host-block-list is changed to None.""" """Ensure blocked-hosts resets if host-block-list is changed to None."""
filtered_blocked_hosts = BLOCKLIST_HOSTS[1:] # Exclude localhost filtered_blocked_hosts = BLOCKLIST_HOSTS[1:] # Exclude localhost
blocklist = blocklist_to_url(create_blocklist( blocklist = blocklist_to_url(create_blocklist(
@ -449,16 +435,15 @@ def test_config_change(config_stub, basedir, download_stub,
config_stub.val.content.host_blocking.enabled = True config_stub.val.content.host_blocking.enabled = True
config_stub.val.content.host_blocking.whitelist = None config_stub.val.content.host_blocking.whitelist = None
host_blocker = adblock.HostBlocker() host_blocker = host_blocker_factory()
host_blocker.read_hosts() host_blocker.read_hosts()
config_stub.val.content.host_blocking.lists = None config_stub.val.content.host_blocking.lists = None
host_blocker.read_hosts() host_blocker.read_hosts()
for str_url in URLS_TO_CHECK: for str_url in URLS_TO_CHECK:
assert not host_blocker.is_blocked(QUrl(str_url)) assert not host_blocker._is_blocked(QUrl(str_url))
def test_add_directory(config_stub, basedir, download_stub, def test_add_directory(config_stub, tmpdir, host_blocker_factory):
data_tmpdir, tmpdir):
"""Ensure adblocker can import all files in a directory.""" """Ensure adblocker can import all files in a directory."""
blocklist_hosts2 = [] blocklist_hosts2 = []
for i in BLOCKLIST_HOSTS[1:]: for i in BLOCKLIST_HOSTS[1:]:
@ -471,18 +456,18 @@ def test_add_directory(config_stub, basedir, download_stub,
config_stub.val.content.host_blocking.lists = [tmpdir.strpath] config_stub.val.content.host_blocking.lists = [tmpdir.strpath]
config_stub.val.content.host_blocking.enabled = True config_stub.val.content.host_blocking.enabled = True
host_blocker = adblock.HostBlocker() host_blocker = host_blocker_factory()
host_blocker.adblock_update() host_blocker.adblock_update()
assert len(host_blocker._blocked_hosts) == len(blocklist_hosts2) * 2 assert len(host_blocker._blocked_hosts) == len(blocklist_hosts2) * 2
def test_adblock_benchmark(config_stub, data_tmpdir, basedir, benchmark): def test_adblock_benchmark(data_tmpdir, benchmark, host_blocker_factory):
blocked_hosts = os.path.join(utils.abs_datapath(), 'blocked-hosts') blocked_hosts = os.path.join(utils.abs_datapath(), 'blocked-hosts')
shutil.copy(blocked_hosts, str(data_tmpdir)) shutil.copy(blocked_hosts, str(data_tmpdir))
url = QUrl('https://www.example.org/') url = QUrl('https://www.example.org/')
blocker = adblock.HostBlocker() blocker = host_blocker_factory()
blocker.read_hosts() blocker.read_hosts()
assert blocker._blocked_hosts assert blocker._blocked_hosts
benchmark(lambda: blocker.is_blocked(url)) benchmark(lambda: blocker._is_blocked(url))