From c8db9e1c762c5d526cea327448c774b8b235891f Mon Sep 17 00:00:00 2001 From: George Edward Bulmer Date: Sat, 24 Mar 2018 19:42:34 +0000 Subject: [PATCH 01/10] Remove WHITELISTED, making file parsing satisfy: 1) 'dotless' hosts, e.g. localhost, cannot be blocked by a file 2) hosts ending in '.localdomain' cannot be blocked by a file --- qutebrowser/browser/adblock.py | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) diff --git a/qutebrowser/browser/adblock.py b/qutebrowser/browser/adblock.py index f0462a778..961994a9a 100644 --- a/qutebrowser/browser/adblock.py +++ b/qutebrowser/browser/adblock.py @@ -94,14 +94,8 @@ class HostBlocker: _done_count: How many files have been read successfully. _local_hosts_file: The path to the blocked-hosts file. _config_hosts_file: The path to a blocked-hosts in ~/.config - - Class attributes: - WHITELISTED: Hosts which never should be blocked. """ - WHITELISTED = ('localhost', 'localhost.localdomain', 'broadcasthost', - 'local') - def __init__(self): self._blocked_hosts = set() self._config_blocked_hosts = set() @@ -242,7 +236,7 @@ class HostBlocker: log.misc.error("Failed to parse: {!r}".format(line)) return False - if host not in self.WHITELISTED: + if '.' not in host and not host.endswith('.localdomain'): self._blocked_hosts.add(host) return True From 3f37fcf8fadb96658cf58fea64f371735a5bf144 Mon Sep 17 00:00:00 2001 From: George Edward Bulmer Date: Sat, 24 Mar 2018 20:15:34 +0000 Subject: [PATCH 02/10] Modify tests, localhost should never be blocked --- qutebrowser/browser/adblock.py | 2 +- tests/unit/browser/test_adblock.py | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/qutebrowser/browser/adblock.py b/qutebrowser/browser/adblock.py index 961994a9a..8328a8a76 100644 --- a/qutebrowser/browser/adblock.py +++ b/qutebrowser/browser/adblock.py @@ -236,7 +236,7 @@ class HostBlocker: log.misc.error("Failed to parse: {!r}".format(line)) return False - if '.' not in host and not host.endswith('.localdomain'): + if '.' in host and not host.endswith('.localdomain'): self._blocked_hosts.add(host) return True diff --git a/tests/unit/browser/test_adblock.py b/tests/unit/browser/test_adblock.py index 5b353efb9..d6ebb3877 100644 --- a/tests/unit/browser/test_adblock.py +++ b/tests/unit/browser/test_adblock.py @@ -114,14 +114,14 @@ def create_blocklist(directory, blocked_hosts=BLOCKLIST_HOSTS, return name -def assert_urls(host_blocker, blocked=BLOCKLIST_HOSTS, +def assert_urls(host_blocker, blocked=BLOCKLIST_HOSTS[1:], whitelisted=WHITELISTED_HOSTS, urls_to_check=URLS_TO_CHECK): """Test if Urls to check are blocked or not by HostBlocker. Ensure URLs in 'blocked' and not in 'whitelisted' are blocked. All other URLs must not be blocked. """ - whitelisted = list(whitelisted) + list(host_blocker.WHITELISTED) + whitelisted = list(whitelisted) for str_url in urls_to_check: url = QUrl(str_url) host = url.host() @@ -341,7 +341,7 @@ def test_blocking_with_whitelist(config_stub, basedir, download_stub, """Ensure hosts in content.host_blocking.whitelist are never blocked.""" # Simulate adblock_update has already been run # by creating a file named blocked-hosts, - # Exclude localhost from it, since localhost is in HostBlocker.WHITELISTED + # Exclude localhost from it, since localhost is never blocked by list filtered_blocked_hosts = BLOCKLIST_HOSTS[1:] blocklist = create_blocklist(data_tmpdir, blocked_hosts=filtered_blocked_hosts, From 8809ef02a109e3dc590020301cf7858a5414679a Mon Sep 17 00:00:00 2001 From: George Edward Bulmer Date: Sat, 24 Mar 2018 20:20:16 +0000 Subject: [PATCH 03/10] Add support for more than 1 host on a given line --- qutebrowser/browser/adblock.py | 13 +++++-------- 1 file changed, 5 insertions(+), 8 deletions(-) diff --git a/qutebrowser/browser/adblock.py b/qutebrowser/browser/adblock.py index 8328a8a76..1cee34f25 100644 --- a/qutebrowser/browser/adblock.py +++ b/qutebrowser/browser/adblock.py @@ -228,16 +228,13 @@ class HostBlocker: parts = line.split() if len(parts) == 1: # "one host per line" format - host = parts[0] - elif len(parts) == 2: - # /etc/hosts format - host = parts[1] + hosts = [parts[0]] else: - log.misc.error("Failed to parse: {!r}".format(line)) - return False + hosts = parts[1:] - if '.' in host and not host.endswith('.localdomain'): - self._blocked_hosts.add(host) + for host in hosts: + if '.' in host and not host.endswith('.localdomain'): + self._blocked_hosts.add(host) return True From 1380fef600fd70f35677567e4bec7c6e42f99013 Mon Sep 17 00:00:00 2001 From: George Edward Bulmer Date: Sat, 24 Mar 2018 21:08:55 +0000 Subject: [PATCH 04/10] Add test for parsing multiple lines --- tests/unit/browser/test_adblock.py | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/tests/unit/browser/test_adblock.py b/tests/unit/browser/test_adblock.py index d6ebb3877..573ac3ad8 100644 --- a/tests/unit/browser/test_adblock.py +++ b/tests/unit/browser/test_adblock.py @@ -94,6 +94,7 @@ def create_blocklist(directory, blocked_hosts=BLOCKLIST_HOSTS, name: name to give to the blocklist file line_format: 'etc_hosts' --> /etc/hosts format 'one_per_line' --> one host per line format + 'all_on_one_line' --> pathological example with one line 'not_correct' --> Not a correct hosts file format. """ blocklist_file = directory / name @@ -106,6 +107,8 @@ def create_blocklist(directory, blocked_hosts=BLOCKLIST_HOSTS, elif line_format == 'one_per_line': for host in blocked_hosts: blocklist.write(host + '\n') + elif line_format == 'all_on_one_line': + blocklist.write('127.0.0.1 ' + ' '.join(blocked_hosts) + '\n') elif line_format == 'not_correct': for host in blocked_hosts: blocklist.write(host + ' This is not a correct hosts file\n') @@ -247,6 +250,15 @@ def test_successful_update(config_stub, basedir, download_stub, assert_urls(host_blocker, whitelisted=[]) +def test_parsing_multiple_hosts_on_line(config_stub, basedir, download_stub, + data_tmpdir, tmpdir, win_registry, caplog): + """Ensure multiple hosts on a line get parsed correctly""" + host_blocker = adblock.HostBlocker() + bytes_host_line = ' '.join(BLOCKLIST_HOSTS).encode('utf-8') + host_blocker._parse_line(bytes_host_line) + assert_urls(host_blocker, whitelisted=[]) + + def test_failed_dl_update(config_stub, basedir, download_stub, data_tmpdir, tmpdir, win_registry, caplog): """One blocklist fails to download. From 64b01cc07674773b32d96f30a84cb39ef6121edf Mon Sep 17 00:00:00 2001 From: George Edward Bulmer Date: Sat, 24 Mar 2018 21:10:23 +0000 Subject: [PATCH 05/10] Remove extraneous part --- tests/unit/browser/test_adblock.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/tests/unit/browser/test_adblock.py b/tests/unit/browser/test_adblock.py index 573ac3ad8..464ae87bd 100644 --- a/tests/unit/browser/test_adblock.py +++ b/tests/unit/browser/test_adblock.py @@ -94,7 +94,6 @@ def create_blocklist(directory, blocked_hosts=BLOCKLIST_HOSTS, name: name to give to the blocklist file line_format: 'etc_hosts' --> /etc/hosts format 'one_per_line' --> one host per line format - 'all_on_one_line' --> pathological example with one line 'not_correct' --> Not a correct hosts file format. """ blocklist_file = directory / name @@ -107,8 +106,6 @@ def create_blocklist(directory, blocked_hosts=BLOCKLIST_HOSTS, elif line_format == 'one_per_line': for host in blocked_hosts: blocklist.write(host + '\n') - elif line_format == 'all_on_one_line': - blocklist.write('127.0.0.1 ' + ' '.join(blocked_hosts) + '\n') elif line_format == 'not_correct': for host in blocked_hosts: blocklist.write(host + ' This is not a correct hosts file\n') From b9bcad9c147a19535fe45f4e46d541ccea82e7ac Mon Sep 17 00:00:00 2001 From: George Edward Bulmer Date: Sat, 24 Mar 2018 21:13:22 +0000 Subject: [PATCH 06/10] Grammar change --- tests/unit/browser/test_adblock.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/unit/browser/test_adblock.py b/tests/unit/browser/test_adblock.py index 464ae87bd..ab40fa543 100644 --- a/tests/unit/browser/test_adblock.py +++ b/tests/unit/browser/test_adblock.py @@ -350,7 +350,7 @@ def test_blocking_with_whitelist(config_stub, basedir, download_stub, """Ensure hosts in content.host_blocking.whitelist are never blocked.""" # Simulate adblock_update has already been run # by creating a file named blocked-hosts, - # Exclude localhost from it, since localhost is never blocked by list + # Exclude localhost from it as localhost is never blocked via list filtered_blocked_hosts = BLOCKLIST_HOSTS[1:] blocklist = create_blocklist(data_tmpdir, blocked_hosts=filtered_blocked_hosts, From eb5684e5f7aa0ec1d49077ef390220b9fb491573 Mon Sep 17 00:00:00 2001 From: George Edward Bulmer Date: Sat, 24 Mar 2018 21:52:26 +0000 Subject: [PATCH 07/10] Pylint fix --- tests/unit/browser/test_adblock.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tests/unit/browser/test_adblock.py b/tests/unit/browser/test_adblock.py index ab40fa543..39c83d698 100644 --- a/tests/unit/browser/test_adblock.py +++ b/tests/unit/browser/test_adblock.py @@ -248,7 +248,8 @@ def test_successful_update(config_stub, basedir, download_stub, def test_parsing_multiple_hosts_on_line(config_stub, basedir, download_stub, - data_tmpdir, tmpdir, win_registry, caplog): + data_tmpdir, tmpdir, win_registry, + caplog): """Ensure multiple hosts on a line get parsed correctly""" host_blocker = adblock.HostBlocker() bytes_host_line = ' '.join(BLOCKLIST_HOSTS).encode('utf-8') From a85ac1725faaa8186c27c76976e1fc9ce64490de Mon Sep 17 00:00:00 2001 From: George Edward Bulmer Date: Sat, 24 Mar 2018 22:56:47 +0000 Subject: [PATCH 08/10] Missing fullstop in a docstring --- tests/unit/browser/test_adblock.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/unit/browser/test_adblock.py b/tests/unit/browser/test_adblock.py index 39c83d698..69392fc7b 100644 --- a/tests/unit/browser/test_adblock.py +++ b/tests/unit/browser/test_adblock.py @@ -250,7 +250,7 @@ def test_successful_update(config_stub, basedir, download_stub, def test_parsing_multiple_hosts_on_line(config_stub, basedir, download_stub, data_tmpdir, tmpdir, win_registry, caplog): - """Ensure multiple hosts on a line get parsed correctly""" + """Ensure multiple hosts on a line get parsed correctly.""" host_blocker = adblock.HostBlocker() bytes_host_line = ' '.join(BLOCKLIST_HOSTS).encode('utf-8') host_blocker._parse_line(bytes_host_line) From 1ccb464d1cb9d713915cc054f6a45075aac5950a Mon Sep 17 00:00:00 2001 From: George Edward Bulmer Date: Wed, 28 Mar 2018 14:17:13 +0100 Subject: [PATCH 09/10] Return removed comment about hosts format --- qutebrowser/browser/adblock.py | 1 + 1 file changed, 1 insertion(+) diff --git a/qutebrowser/browser/adblock.py b/qutebrowser/browser/adblock.py index 1cee34f25..f42d1a1db 100644 --- a/qutebrowser/browser/adblock.py +++ b/qutebrowser/browser/adblock.py @@ -230,6 +230,7 @@ class HostBlocker: # "one host per line" format hosts = [parts[0]] else: + # /etc/hosts format hosts = parts[1:] for host in hosts: From 2789bec1e79621d78ba2716e4a28dd8e52301367 Mon Sep 17 00:00:00 2001 From: George Edward Bulmer Date: Wed, 28 Mar 2018 14:27:17 +0100 Subject: [PATCH 10/10] Modify assert_url to treat localhost differently --- tests/unit/browser/test_adblock.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/tests/unit/browser/test_adblock.py b/tests/unit/browser/test_adblock.py index 69392fc7b..8ab3b930d 100644 --- a/tests/unit/browser/test_adblock.py +++ b/tests/unit/browser/test_adblock.py @@ -114,14 +114,16 @@ def create_blocklist(directory, blocked_hosts=BLOCKLIST_HOSTS, return name -def assert_urls(host_blocker, blocked=BLOCKLIST_HOSTS[1:], +def assert_urls(host_blocker, blocked=BLOCKLIST_HOSTS, whitelisted=WHITELISTED_HOSTS, urls_to_check=URLS_TO_CHECK): """Test if Urls to check are blocked or not by HostBlocker. Ensure URLs in 'blocked' and not in 'whitelisted' are blocked. All other URLs must not be blocked. + + localhost is an example of a special case that shouldn't be blocked. """ - whitelisted = list(whitelisted) + whitelisted = list(whitelisted) + ['localhost'] for str_url in urls_to_check: url = QUrl(str_url) host = url.host()