From 0134646c6753f309af01ebecc10e879ae5198825 Mon Sep 17 00:00:00 2001 From: Florian Bruhin Date: Mon, 24 Nov 2014 21:05:09 +0100 Subject: [PATCH] Basic host blocker implementation. Squashed commit of the following: commit 8d38e9c2f13778fe21cc2c1d61c5d26907b469ef Author: Florian Bruhin Date: Mon Nov 24 21:04:30 2014 +0100 Fix default adblock lists. commit a734b87e753daec28fa0c6643af8de088041179a Author: Florian Bruhin Date: Mon Nov 24 21:04:14 2014 +0100 Fix error output on adblock errors. commit 15bc10f605ebb86c12523bb81b55d1440fcacdbf Author: Florian Bruhin Date: Mon Nov 24 07:38:47 2014 +0100 Update adblock docks commit 475d530ee50f52295c2171208c6a0278750cf4b4 Author: Florian Bruhin Date: Mon Nov 24 07:38:39 2014 +0100 Remove adblock ram calculation commit 0e52cc3ec93912fd3fa3ecf6de833aef0c540a60 Author: Florian Bruhin Date: Mon Nov 24 07:03:11 2014 +0100 Adjust docs for hosts-file. commit 941df242d38300d7e42a26f2e67cfd668c6f8590 Author: Florian Bruhin Date: Mon Nov 24 06:55:40 2014 +0100 Add logging for host blocking. commit 4ca1561ccf32affb8439113f4f1c222d7dee1e14 Author: Florian Bruhin Date: Mon Nov 24 00:11:59 2014 +0100 Don't do adblock updates automatically after changing config. commit 26d4e86ca393d4a1a5fd8fac7729ce9c2e7cd240 Author: Florian Bruhin Date: Thu Nov 20 19:58:32 2014 +0100 adblock: Don't delete blocked hosts between files. commit c7af08c68adf3c135d5a2fc746eb6d3c715b0711 Author: Florian Bruhin Date: Thu Nov 20 19:58:22 2014 +0100 adblock: Add comments for rewinding. commit 9af87313088cd79ab8c022bcee9308511d6328f0 Author: Florian Bruhin Date: Thu Nov 20 19:56:39 2014 +0100 adblock: Rewind files properly. commit 7f3dadc04ba68c92d6789ecf5822f7b2dfc367a0 Author: Florian Bruhin Date: Thu Nov 20 19:40:18 2014 +0100 Add untested file support. commit 334c01867d510b599d94c9397097a1d789c0c64c Author: Florian Bruhin Date: Wed Nov 19 22:33:25 2014 +0100 adblock: Handle download errors correctly commit 3a9af4cd4a903386dc323640f1d52e03383b3aa7 Author: Florian Bruhin Date: Wed Nov 19 22:23:04 2014 +0100 adblock: Add error handling for invalid input. commit 5934c3c027ff9205adb512aab396ca913b31e50d Author: Florian Bruhin Date: Wed Nov 19 22:22:47 2014 +0100 adblock: Handle done_count correctly. commit 3ba5e83fda9b7d764bfcf1e4b1aa479daae39791 Author: Florian Bruhin Date: Wed Nov 19 21:18:10 2014 +0100 Read adblock files when they finished downloading. commit 86a5ecf194445e2ebe0cf22b59e24223387880c5 Author: Florian Bruhin Date: Wed Nov 19 20:51:25 2014 +0100 Add hosts-file.net to host-block-lists. commit 121f0bff953497449c32e45e671b71d954beed4c Author: Florian Bruhin Date: Wed Nov 19 20:51:13 2014 +0100 Add zip-file support to adblock.py. commit f1c0e67cb9d890a44a1d1f545ace997f0a453f91 Author: Florian Bruhin Date: Mon Nov 17 07:41:14 2014 +0100 print line counts for debugging commit 75dbb8964fd44862abc378ef26fb47f8cda6061e Author: Florian Bruhin Date: Fri Nov 14 19:22:20 2014 +0100 Use statusbar messages for adblock commit 74d9142d831496e02c5baae72c2723e320af1778 Author: Florian Bruhin Date: Fri Nov 14 08:34:46 2014 +0100 Add basic adblock functionality. --- doc/help/commands.asciidoc | 5 + doc/help/settings.asciidoc | 13 ++ qutebrowser/app.py | 6 +- qutebrowser/browser/adblock.py | 214 ++++++++++++++++++++++++++ qutebrowser/config/configdata.py | 16 ++ qutebrowser/network/networkmanager.py | 7 + qutebrowser/utils/message.py | 6 +- 7 files changed, 264 insertions(+), 3 deletions(-) create mode 100644 qutebrowser/browser/adblock.py diff --git a/doc/help/commands.asciidoc b/doc/help/commands.asciidoc index ded0fc739..82f0e3b75 100644 --- a/doc/help/commands.asciidoc +++ b/doc/help/commands.asciidoc @@ -5,6 +5,7 @@ [options="header",width="75%",cols="25%,75%"] |============== |Command|Description +|<>|Update the adblock block lists. |<>|Go back in the history of the current tab. |<>|Bind a key to a command. |<>|Cancel the first/[count]th download. @@ -50,6 +51,10 @@ |<>|Increase the zoom level for the current tab. |<>|Decrease the zoom level for the current tab. |============== +[[adblock-update]] +=== adblock-update +Update the adblock block lists. + [[back]] === back Syntax: +:back [*--tab*] [*--bg*] [*--window*]+ diff --git a/doc/help/settings.asciidoc b/doc/help/settings.asciidoc index bcbc4a30a..2305cc3a3 100644 --- a/doc/help/settings.asciidoc +++ b/doc/help/settings.asciidoc @@ -121,6 +121,7 @@ |<>|Whether locally loaded documents are allowed to access other local urls. |<>|Whether to accept cookies. |<>|Whether to store cookies. +|<>|List of URLs of lists which contain hosts to block. |============== .Quick reference for section ``hints'' @@ -789,6 +790,18 @@ Whether to store cookies. Default: +pass:[true]+ +[[permissions-host-block-lists]] +=== host-block-lists +List of URLs of lists which contain hosts to block. + +The file can be in one of the following formats: + +- An '/etc/hosts'-like file +- One host per line +- A zip-file of any of the above, with either only one file, or a file named 'hosts' (with any extension). + +Default: +pass:[http://www.malwaredomainlist.com/hostslist/hosts.txt,http://someonewhocares.org/hosts/hosts,http://winhelp2002.mvps.org/hosts.zip,http://malwaredomains.lehigh.edu/files/justdomains.zip,http://pgl.yoyo.org/adservers/serverlist.php?hostformat=hosts&mimetype=plaintext,http://hosts-file.net/download/hosts.zip]+ + == hints Hinting settings. diff --git a/qutebrowser/app.py b/qutebrowser/app.py index 92810346f..22ec7ab38 100644 --- a/qutebrowser/app.py +++ b/qutebrowser/app.py @@ -40,7 +40,7 @@ import qutebrowser from qutebrowser.commands import cmdutils, runners from qutebrowser.config import style, config, websettings from qutebrowser.network import qutescheme, proxy -from qutebrowser.browser import quickmarks, cookies, cache +from qutebrowser.browser import quickmarks, cookies, cache, adblock from qutebrowser.widgets import mainwindow, crash from qutebrowser.keyinput import modeman from qutebrowser.utils import (log, version, message, readline, utils, qtutils, @@ -156,6 +156,10 @@ class Application(QApplication): self._handle_segfault() log.init.debug("Initializing websettings...") websettings.init() + log.init.debug("Initializing adblock...") + host_blocker = adblock.HostBlocker() + host_blocker.read_hosts() + objreg.register('host-blocker', host_blocker) log.init.debug("Initializing quickmarks...") quickmark_manager = quickmarks.QuickmarkManager() objreg.register('quickmark-manager', quickmark_manager) diff --git a/qutebrowser/browser/adblock.py b/qutebrowser/browser/adblock.py new file mode 100644 index 000000000..5029c0f65 --- /dev/null +++ b/qutebrowser/browser/adblock.py @@ -0,0 +1,214 @@ +# vim: ft=python fileencoding=utf-8 sts=4 sw=4 et: + +# Copyright 2014 Florian Bruhin (The Compiler) +# +# This file is part of qutebrowser. +# +# qutebrowser is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# qutebrowser is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with qutebrowser. If not, see . + +"""Functions related to adblocking.""" + +import io +import os.path +import functools +import posixpath +import zipfile + +from PyQt5.QtCore import QStandardPaths + +from qutebrowser.config import config +from qutebrowser.utils import objreg, standarddir, log, message, utils +from qutebrowser.commands import cmdutils + + +class FakeDownload: + + """A download stub to use on_download_finished with local files.""" + + def __init__(self, fileobj): + self.basename = os.path.basename(fileobj.name) + self.fileobj = fileobj + self.successful = True + + +class HostBlocker: + + """Manage blocked hosts based from /etc/hosts-like files. + + Attributes: + blocked_hosts: A set of blocked hosts. + _in_progress: The DownloadItems which are currently downloading. + _done_count: How many files have been read successfully. + _hosts_file: The path to the blocked-hosts file. + + Class attributes: + WHITELISTED: Hosts which never should be blocked. + """ + + WHITELISTED = ('localhost', 'localhost.localdomain', 'broadcasthost', + 'local') + + def __init__(self): + self.blocked_hosts = set() + self._in_progress = [] + self._done_count = 0 + data_dir = standarddir.get(QStandardPaths.DataLocation) + self._hosts_file = os.path.join(data_dir, 'blocked-hosts') + objreg.get('config').changed.connect(self.on_config_changed) + + def read_hosts(self): + """Read hosts from the existing blocked-hosts file.""" + self.blocked_hosts = set() + if os.path.exists(self._hosts_file): + with open(self._hosts_file, 'r', encoding='utf-8') as f: + for line in f: + self.blocked_hosts.add(line.strip()) + else: + if config.get('permissions', 'host-block-lists') is not None: + message.info('last-focused', + "Run :adblock-update to get adblock lists.") + + @cmdutils.register(instance='host-blocker') + def adblock_update(self): + """Update the adblock block lists.""" + self.blocked_hosts = set() + self._done_count = 0 + urls = config.get('permissions', 'host-block-lists') + download_manager = objreg.get('download-manager', scope='window', + window='last-focused') + if urls is None: + return + for url in urls: + if url.scheme() == 'file': + fileobj = open(url.path(), 'rb') + download = FakeDownload(fileobj) + self._in_progress.append(download) + self.on_download_finished(download) + else: + fobj = io.BytesIO() + fobj.name = 'adblock: ' + url.host() + download = download_manager.get(url, fileobj=fobj) + self._in_progress.append(download) + download.finished.connect( + functools.partial(self.on_download_finished, download)) + + def _guess_zip_filename(self, zf): + """Guess which file to use inside a zip file. + + Args: + zf: A ZipFile instance. + """ + files = zf.namelist() + if len(files) == 1: + return files[0] + else: + for e in files: + if posixpath.splitext(e)[0].lower() == 'hosts': + return e + raise FileNotFoundError("No hosts file found in zip") + + def _get_fileobj(self, byte_io): + """Get an usable file object to read the hosts file from.""" + byte_io.seek(0) # rewind downloaded file + if zipfile.is_zipfile(byte_io): + byte_io.seek(0) # rewind what zipfile.is_zipfile did + zf = zipfile.ZipFile(byte_io) + filename = self._guess_zip_filename(zf) + byte_io = zf.open(filename, mode='r') + else: + byte_io.seek(0) # rewind what zipfile.is_zipfile did + return io.TextIOWrapper(byte_io, encoding='utf-8') + + def _merge_file(self, byte_io): + """Read and merge host files. + + Args: + byte_io: The BytesIO object of the completed download. + + Return: + A set of the merged hosts. + """ + error_count = 0 + line_count = 0 + try: + f = self._get_fileobj(byte_io) + except (FileNotFoundError, UnicodeDecodeError, zipfile.BadZipFile, + zipfile.LargeZipFile) as e: + message.error('last-focused', "adblock: Error while reading {}: " + "{} - {}".format( + byte_io.name, e.__class__.__name__, e)) + return + for line in f: + line_count += 1 + # Remove comments + try: + hash_idx = line.index('#') + line = line[:hash_idx] + except ValueError: + pass + line = line.strip() + # Skip empty lines + if not line: + continue + parts = line.split() + if len(parts) == 1: + # "one host per line" format + host = parts[0] + elif len(parts) == 2: + # /etc/hosts format + host = parts[1] + else: + error_count += 1 + continue + if host not in self.WHITELISTED: + self.blocked_hosts.add(host) + log.misc.debug("{}: read {} lines".format(byte_io.name, line_count)) + if error_count > 0: + message.error('last-focused', "adblock: {} read errors for " + "{}".format(error_count, byte_io.name)) + + def on_lists_downloaded(self): + """Install block lists after files have been downloaded.""" + with open(self._hosts_file, 'w', encoding='utf-8') as f: + for host in sorted(self.blocked_hosts): + f.write(host + '\n') + message.info('last-focused', "adblock: Read {} hosts from {} " + "sources.".format(len(self.blocked_hosts), + self._done_count)) + + @config.change_filter('permissions', 'host-block-lists') + def on_config_changed(self): + """Update files when the config changed.""" + urls = config.get('permissions', 'host-block-lists') + if urls is None: + try: + os.remove(self._hosts_file) + except IOError: + log.misc.exception("Failed to delete hosts file.") + + def on_download_finished(self, download): + """Check if all downloads are finished and if so, trigger reading. + + Arguments: + download: The finished DownloadItem. + """ + self._in_progress.remove(download) + if download.successful: + self._done_count += 1 + try: + self._merge_file(download.fileobj) + finally: + download.fileobj.close() + if not self._in_progress: + self.on_lists_downloaded() diff --git a/qutebrowser/config/configdata.py b/qutebrowser/config/configdata.py index ce017e96c..99a9f28ee 100644 --- a/qutebrowser/config/configdata.py +++ b/qutebrowser/config/configdata.py @@ -497,6 +497,22 @@ DATA = collections.OrderedDict([ ('cookies-store', SettingValue(typ.Bool(), 'true'), "Whether to store cookies."), + + ('host-block-lists', + SettingValue(typ.UrlList(none_ok=True), + 'http://www.malwaredomainlist.com/hostslist/hosts.txt,' + 'http://someonewhocares.org/hosts/hosts,' + 'http://winhelp2002.mvps.org/hosts.zip,' + 'http://malwaredomains.lehigh.edu/files/justdomains.zip,' + 'http://pgl.yoyo.org/adservers/serverlist.php?' + 'hostformat=hosts&mimetype=plaintext,' + 'http://hosts-file.net/ad_servers.asp'), + "List of URLs of lists which contain hosts to block.\n\n" + "The file can be in one of the following formats:\n\n" + "- An '/etc/hosts'-like file\n" + "- One host per line\n" + "- A zip-file of any of the above, with either only one file, or a " + "file named 'hosts' (with any extension)."), )), ('hints', sect.KeyValue( diff --git a/qutebrowser/network/networkmanager.py b/qutebrowser/network/networkmanager.py index 4e57d5211..73b901b72 100644 --- a/qutebrowser/network/networkmanager.py +++ b/qutebrowser/network/networkmanager.py @@ -154,6 +154,13 @@ class NetworkManager(QNetworkAccessManager): elif scheme in self._scheme_handlers: return self._scheme_handlers[scheme].createRequest( op, req, outgoing_data) + if (op == QNetworkAccessManager.GetOperation and + req.url().host() in objreg.get('host-blocker').blocked_hosts): + log.webview.info("Request to {} blocked by host blocker.".format( + req.url().host())) + return networkreply.ErrorNetworkReply( + req, "Request was blocked by host blocker.", + QNetworkReply.ContentAccessDenied) if config.get('network', 'do-not-track'): dnt = '1'.encode('ascii') else: diff --git a/qutebrowser/utils/message.py b/qutebrowser/utils/message.py index e30ad6ede..b00f0c4a3 100644 --- a/qutebrowser/utils/message.py +++ b/qutebrowser/utils/message.py @@ -36,7 +36,8 @@ def error(win_id, message, immediately=False): win_id: The ID of the window which is calling this function. others: See MessageBridge.error. """ - _get_bridge(win_id).error(message, immediately) + QTimer.singleShot( + 0, lambda: _get_bridge(win_id).error(message, immediately)) def info(win_id, message, immediately=True): @@ -46,7 +47,8 @@ def info(win_id, message, immediately=True): win_id: The ID of the window which is calling this function. others: See MessageBridge.info. """ - _get_bridge(win_id).info(message, immediately) + QTimer.singleShot( + 0, lambda: _get_bridge(win_id).info(message, immediately)) def set_cmd_text(win_id, txt):