qutebrowser/tests/unit/browser/test_adblock.py

397 lines
16 KiB
Python
Raw Normal View History

2015-12-03 22:00:39 +01:00
# vim: ft=python fileencoding=utf-8 sts=4 sw=4 et:
2015-12-04 01:52:00 +01:00
#!/usr/bin/env python3
2015-12-03 22:00:39 +01:00
2016-01-23 16:15:19 +01:00
# Copyright 2015 Corentin Julé <corentinjule@gmail.com>
2015-12-03 22:00:39 +01:00
#
# This file is part of qutebrowser.
#
# qutebrowser is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# qutebrowser is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with qutebrowser. If not, see <http://www.gnu.org/licenses/>.
2015-12-04 01:52:00 +01:00
import os
import os.path
2015-12-04 01:52:00 +01:00
import zipfile
import logging
2015-12-04 01:52:00 +01:00
import pytest
2015-12-04 01:52:00 +01:00
from PyQt5.QtCore import QUrl
2015-12-06 00:10:56 +01:00
from qutebrowser.browser import adblock
2015-12-06 00:10:56 +01:00
pytestmark = pytest.mark.usefixtures('qapp', 'config_tmpdir')
2016-05-07 22:08:55 +02:00
2016-05-07 22:09:43 +02:00
# TODO See ../utils/test_standarddirutils for OSError and caplog assertion
WHITELISTED_HOSTS = ('qutebrowser.org', 'mediumhost.io')
2015-12-08 00:52:51 +01:00
BLOCKLIST_HOSTS = ('localhost',
2015-12-08 00:52:51 +01:00
'mediumhost.io',
'malware.badhost.org',
'4-verybadhost.com',
'ads.worsthostever.net')
2015-12-08 00:52:51 +01:00
CLEAN_HOSTS = ('goodhost.gov', 'verygoodhost.com')
URLS_TO_CHECK = ('http://localhost',
'http://mediumhost.io',
2016-01-23 16:15:19 +01:00
'ftp://malware.badhost.org',
'http://4-verybadhost.com',
'http://ads.worsthostever.net',
2015-12-08 00:52:51 +01:00
'http://goodhost.gov',
2016-01-23 16:34:05 +01:00
'ftp://verygoodhost.com',
'http://qutebrowser.org')
2015-12-06 19:33:35 +01:00
2016-01-23 16:34:05 +01:00
2015-12-06 15:35:10 +01:00
class BaseDirStub:
2015-12-08 00:52:51 +01:00
"""Mock for objreg.get('args') called in adblock.HostBlocker.read_hosts."""
2015-12-06 15:35:10 +01:00
def __init__(self):
self.basedir = None
2015-12-06 00:51:44 +01:00
2015-12-08 00:52:51 +01:00
2016-07-08 19:30:17 +02:00
@pytest.fixture
def basedir(fake_args):
2015-12-06 19:33:35 +01:00
"""Register a Fake basedir."""
2016-07-08 19:30:17 +02:00
fake_args.basedir = None
2015-12-06 19:33:35 +01:00
2015-12-06 00:10:56 +01:00
2015-12-08 00:52:51 +01:00
def create_zipfile(directory, files, zipname='test'):
2016-05-07 22:15:27 +02:00
"""Return a path to a newly created zip file.
2015-12-08 00:52:51 +01:00
2016-05-07 22:15:27 +02:00
Args:
directory: path object where to create the zip file.
files: list of filenames (relative to directory) to each file to add.
2016-05-07 22:15:27 +02:00
zipname: name to give to the zip file.
2015-12-08 00:52:51 +01:00
"""
2016-01-23 16:15:19 +01:00
zipfile_path = directory / zipname + '.zip'
with zipfile.ZipFile(str(zipfile_path), 'w') as new_zipfile:
for file_path in files:
new_zipfile.write(str(directory / file_path),
2016-01-23 16:15:19 +01:00
arcname=os.path.basename(str(file_path)))
# Removes path from file name
return str(zipname + '.zip')
2015-12-08 00:52:51 +01:00
2016-01-23 16:15:19 +01:00
def create_blocklist(directory, blocked_hosts=BLOCKLIST_HOSTS,
name='hosts', line_format='one_per_line'):
"""Return a path to a blocklist file.
2015-12-08 00:52:51 +01:00
2016-05-08 11:14:42 +02:00
Args:
directory: path object where to create the blocklist file
blocked_hosts: an iterable of string hosts to add to the blocklist
name: name to give to the blocklist file
line_format: 'etc_hosts' --> /etc/hosts format
'one_per_line' --> one host per line format
'not_correct' --> Not a correct hosts file format.
2015-12-08 00:52:51 +01:00
"""
2016-01-23 16:15:19 +01:00
blocklist_file = directory / name
with open(str(blocklist_file), 'w', encoding='UTF-8') as blocklist:
2015-12-08 00:52:51 +01:00
# ensure comments are ignored when processing blocklist
2016-01-23 16:15:19 +01:00
blocklist.write('# Blocked Hosts List #\n\n')
2016-01-23 16:34:05 +01:00
if line_format == 'etc_hosts': # /etc/hosts like format
2016-01-23 16:15:19 +01:00
for host in blocked_hosts:
blocklist.write('127.0.0.1 ' + host + '\n')
2015-12-08 00:52:51 +01:00
elif line_format == 'one_per_line':
2016-01-23 16:15:19 +01:00
for host in blocked_hosts:
blocklist.write(host + '\n')
2015-12-08 00:52:51 +01:00
elif line_format == 'not_correct':
2016-01-23 16:15:19 +01:00
for host in blocked_hosts:
blocklist.write(host + ' This is not a correct hosts file\n')
else:
raise ValueError('Incorrect line_format argument')
return name
2015-12-08 00:52:51 +01:00
2016-01-23 16:15:19 +01:00
def assert_urls(host_blocker, blocked=BLOCKLIST_HOSTS,
whitelisted=WHITELISTED_HOSTS, urls_to_check=URLS_TO_CHECK):
2016-05-07 22:15:27 +02:00
"""Test if Urls to check are blocked or not by HostBlocker.
Ensure URLs in 'blocked' and not in 'whitelisted' are blocked.
All other URLs must not be blocked.
"""
2016-01-23 16:15:19 +01:00
whitelisted = list(whitelisted) + list(host_blocker.WHITELISTED)
2015-12-06 15:35:10 +01:00
for str_url in urls_to_check:
url = QUrl(str_url)
host = url.host()
2015-12-08 00:52:51 +01:00
if host in blocked and host not in whitelisted:
2015-12-06 15:35:10 +01:00
assert host_blocker.is_blocked(url)
else:
assert not host_blocker.is_blocked(url)
2015-12-03 22:00:39 +01:00
def blocklist_to_url(filename):
"""Get an example.com-URL with the given filename as path."""
assert not os.path.isabs(filename), filename
url = QUrl('http://example.com/')
url.setPath('/' + filename)
assert url.isValid(), url.errorString()
return url
2015-12-08 00:52:51 +01:00
def generic_blocklists(directory):
"""Return a generic list of files to be used in hosts-block-lists option.
2016-05-07 22:15:27 +02:00
This list contains :
- a remote zip file with 1 hosts file and 2 useless files
- a remote zip file with only useless files
(Should raise a FileNotFoundError)
- a remote zip file with only one valid hosts file
- a local text file with valid hosts
- a remote text file without valid hosts format.
"""
2015-12-08 00:52:51 +01:00
# remote zip file with 1 hosts file and 2 useless files
2016-01-23 16:15:19 +01:00
file1 = create_blocklist(directory, blocked_hosts=CLEAN_HOSTS,
name='README', line_format='not_correct')
file2 = create_blocklist(directory, blocked_hosts=BLOCKLIST_HOSTS[:3],
name='hosts', line_format='etc_hosts')
file3 = create_blocklist(directory, blocked_hosts=CLEAN_HOSTS,
name='false_positive', line_format='one_per_line')
files_to_zip = [file1, file2, file3]
blocklist1 = blocklist_to_url(
create_zipfile(directory, files_to_zip, 'block1'))
2015-12-08 00:52:51 +01:00
# remote zip file without file named hosts
# (Should raise a FileNotFoundError)
2016-01-23 16:15:19 +01:00
file1 = create_blocklist(directory, blocked_hosts=CLEAN_HOSTS,
name='md5sum', line_format='etc_hosts')
file2 = create_blocklist(directory, blocked_hosts=CLEAN_HOSTS,
name='README', line_format='not_correct')
file3 = create_blocklist(directory, blocked_hosts=CLEAN_HOSTS,
name='false_positive', line_format='one_per_line')
files_to_zip = [file1, file2, file3]
blocklist2 = blocklist_to_url(
create_zipfile(directory, files_to_zip, 'block2'))
2015-12-08 00:52:51 +01:00
# remote zip file with only one valid hosts file inside
file1 = create_blocklist(directory, blocked_hosts=[BLOCKLIST_HOSTS[3]],
name='malwarelist', line_format='etc_hosts')
blocklist3 = blocklist_to_url(create_zipfile(directory, [file1], 'block3'))
2015-12-08 00:52:51 +01:00
# local text file with valid hosts
blocklist4 = QUrl.fromLocalFile(str(directory / create_blocklist(
directory, blocked_hosts=[BLOCKLIST_HOSTS[4]],
name='mycustomblocklist', line_format='one_per_line')))
assert blocklist4.isValid(), blocklist4.errorString()
2015-12-08 00:52:51 +01:00
# remote text file without valid hosts format
blocklist5 = blocklist_to_url(create_blocklist(
directory, blocked_hosts=CLEAN_HOSTS, name='notcorrectlist',
line_format='not_correct'))
2015-12-08 00:52:51 +01:00
return [blocklist1.toString(), blocklist2.toString(),
blocklist3.toString(), blocklist4.toString(),
blocklist5.toString()]
2015-12-08 00:52:51 +01:00
def test_disabled_blocking_update(basedir, config_stub, download_stub,
data_tmpdir, tmpdir, win_registry, caplog):
2016-05-07 22:39:53 +02:00
"""Ensure no URL is blocked when host blocking is disabled."""
config_stub.val.content.host_blocking.lists = generic_blocklists(tmpdir)
config_stub.val.content.host_blocking.enabled = False
2015-12-08 00:52:51 +01:00
host_blocker = adblock.HostBlocker()
host_blocker.adblock_update()
2015-12-08 00:52:51 +01:00
while host_blocker._in_progress:
current_download = host_blocker._in_progress[0]
with caplog.at_level(logging.ERROR):
current_download.successful = True
current_download.finished.emit()
2015-12-08 00:52:51 +01:00
host_blocker.read_hosts()
for str_url in URLS_TO_CHECK:
assert not host_blocker.is_blocked(QUrl(str_url))
def test_no_blocklist_update(config_stub, download_stub,
data_tmpdir, basedir, tmpdir, win_registry):
2016-05-07 22:15:27 +02:00
"""Ensure no URL is blocked when no block list exists."""
config_stub.val.content.host_blocking.lists = None
config_stub.val.content.host_blocking.enabled = True
2015-12-08 00:52:51 +01:00
host_blocker = adblock.HostBlocker()
host_blocker.adblock_update()
2015-12-08 00:52:51 +01:00
host_blocker.read_hosts()
for dl in download_stub.downloads:
dl.successful = True
2015-12-08 00:52:51 +01:00
for str_url in URLS_TO_CHECK:
assert not host_blocker.is_blocked(QUrl(str_url))
def test_successful_update(config_stub, basedir, download_stub,
data_tmpdir, tmpdir, win_registry, caplog):
"""Ensure hosts from host_blocking.lists are blocked after an update."""
config_stub.val.content.host_blocking.lists = generic_blocklists(tmpdir)
config_stub.val.content.host_blocking.enabled = True
config_stub.val.content.host_blocking.whitelist = None
2015-12-08 00:52:51 +01:00
host_blocker = adblock.HostBlocker()
host_blocker.adblock_update()
2015-12-08 00:52:51 +01:00
# Simulate download is finished
while host_blocker._in_progress:
current_download = host_blocker._in_progress[0]
with caplog.at_level(logging.ERROR):
current_download.successful = True
current_download.finished.emit()
2015-12-08 00:52:51 +01:00
host_blocker.read_hosts()
2016-01-23 16:15:19 +01:00
assert_urls(host_blocker, whitelisted=[])
2015-12-08 00:52:51 +01:00
def test_failed_dl_update(config_stub, basedir, download_stub,
data_tmpdir, tmpdir, win_registry, caplog):
2016-05-07 22:15:27 +02:00
"""One blocklist fails to download.
Ensure hosts from this list are not blocked.
"""
dl_fail_blocklist = blocklist_to_url(create_blocklist(
tmpdir, blocked_hosts=CLEAN_HOSTS, name='download_will_fail',
line_format='one_per_line'))
hosts_to_block = (generic_blocklists(tmpdir) +
[dl_fail_blocklist.toString()])
config_stub.val.content.host_blocking.lists = hosts_to_block
config_stub.val.content.host_blocking.enabled = True
config_stub.val.content.host_blocking.whitelist = None
2015-12-08 00:52:51 +01:00
host_blocker = adblock.HostBlocker()
host_blocker.adblock_update()
2015-12-08 00:52:51 +01:00
while host_blocker._in_progress:
current_download = host_blocker._in_progress[0]
# if current download is the file we want to fail, make it fail
if current_download.name == dl_fail_blocklist.path():
current_download.successful = False
else:
current_download.successful = True
with caplog.at_level(logging.ERROR):
current_download.finished.emit()
2015-12-08 00:52:51 +01:00
host_blocker.read_hosts()
2016-01-23 16:15:19 +01:00
assert_urls(host_blocker, whitelisted=[])
2015-12-08 00:52:51 +01:00
@pytest.mark.parametrize('location', ['content', 'comment'])
def test_invalid_utf8(config_stub, download_stub, tmpdir, data_tmpdir,
caplog, location):
"""Make sure invalid UTF-8 is handled correctly.
See https://github.com/qutebrowser/qutebrowser/issues/2301
"""
blocklist = tmpdir / 'blocklist'
if location == 'comment':
blocklist.write_binary(b'# nbsp: \xa0\n')
else:
assert location == 'content'
blocklist.write_binary(b'https://www.example.org/\xa0')
for url in BLOCKLIST_HOSTS:
blocklist.write(url + '\n', mode='a')
url = blocklist_to_url('blocklist')
config_stub.val.content.host_blocking.lists = [url.toString()]
config_stub.val.content.host_blocking.enabled = True
config_stub.val.content.host_blocking.whitelist = None
host_blocker = adblock.HostBlocker()
host_blocker.adblock_update()
current_download = host_blocker._in_progress[0]
if location == 'content':
with caplog.at_level(logging.ERROR):
current_download.successful = True
current_download.finished.emit()
expected = (r"Failed to decode: "
2017-03-13 07:01:13 +01:00
r"b'https://www.example.org/\xa0localhost")
assert caplog.records[-2].message.startswith(expected)
else:
current_download.successful = True
current_download.finished.emit()
host_blocker.read_hosts()
assert_urls(host_blocker, whitelisted=[])
def test_invalid_utf8_compiled(config_stub, config_tmpdir, data_tmpdir,
monkeypatch, caplog):
"""Make sure invalid UTF-8 in the compiled file is handled."""
config_stub.val.content.host_blocking.lists = []
# Make sure the HostBlocker doesn't delete blocked-hosts in __init__
monkeypatch.setattr(adblock.HostBlocker, '_update_files',
lambda _self: None)
(config_tmpdir / 'blocked-hosts').write_binary(
b'https://www.example.org/\xa0')
(data_tmpdir / 'blocked-hosts').ensure()
host_blocker = adblock.HostBlocker()
with caplog.at_level(logging.ERROR):
host_blocker.read_hosts()
assert caplog.records[-1].message == "Failed to read host blocklist!"
2015-12-08 00:52:51 +01:00
def test_blocking_with_whitelist(config_stub, basedir, download_stub,
data_tmpdir, tmpdir):
"""Ensure hosts in content.host_blocking.whitelist are never blocked."""
2016-01-23 16:15:19 +01:00
# Simulate adblock_update has already been run
# by creating a file named blocked-hosts,
# Exclude localhost from it, since localhost is in HostBlocker.WHITELISTED
2015-12-08 00:52:51 +01:00
filtered_blocked_hosts = BLOCKLIST_HOSTS[1:]
blocklist = create_blocklist(data_tmpdir,
2015-12-08 00:52:51 +01:00
blocked_hosts=filtered_blocked_hosts,
name='blocked-hosts',
line_format='one_per_line')
config_stub.val.content.host_blocking.lists = [blocklist]
config_stub.val.content.host_blocking.enabled = True
config_stub.val.content.host_blocking.whitelist = list(WHITELISTED_HOSTS)
2015-12-08 00:52:51 +01:00
host_blocker = adblock.HostBlocker()
host_blocker.read_hosts()
assert_urls(host_blocker)
2016-05-07 23:31:35 +02:00
def test_config_change_initial(config_stub, basedir, download_stub,
data_tmpdir, tmpdir):
"""Test emptying host_blocking.lists with existing blocked_hosts.
2016-05-07 23:31:35 +02:00
- A blocklist is present in host_blocking.lists and blocked_hosts is
populated
- User quits qutebrowser, empties host_blocking.lists from his config
2016-05-07 23:31:35 +02:00
- User restarts qutebrowser, does adblock-update
"""
create_blocklist(tmpdir, blocked_hosts=BLOCKLIST_HOSTS,
name='blocked-hosts', line_format='one_per_line')
config_stub.val.content.host_blocking.lists = None
config_stub.val.content.host_blocking.enabled = True
config_stub.val.content.host_blocking.whitelist = None
2016-05-07 23:31:35 +02:00
host_blocker = adblock.HostBlocker()
host_blocker.read_hosts()
for str_url in URLS_TO_CHECK:
assert not host_blocker.is_blocked(QUrl(str_url))
2015-12-08 00:52:51 +01:00
def test_config_change(config_stub, basedir, download_stub,
data_tmpdir, tmpdir):
2016-01-23 16:15:19 +01:00
"""Ensure blocked-hosts resets if host-block-list is changed to None."""
2016-01-23 16:34:05 +01:00
filtered_blocked_hosts = BLOCKLIST_HOSTS[1:] # Exclude localhost
blocklist = blocklist_to_url(create_blocklist(
tmpdir, blocked_hosts=filtered_blocked_hosts, name='blocked-hosts',
line_format='one_per_line'))
config_stub.val.content.host_blocking.lists = [blocklist.toString()]
config_stub.val.content.host_blocking.enabled = True
config_stub.val.content.host_blocking.whitelist = None
2015-12-08 00:52:51 +01:00
host_blocker = adblock.HostBlocker()
host_blocker.read_hosts()
config_stub.set_obj('content.host_blocking.lists', None)
2015-12-08 00:52:51 +01:00
host_blocker.read_hosts()
for str_url in URLS_TO_CHECK:
assert not host_blocker.is_blocked(QUrl(str_url))