Refactor adblock parsing

This commit is contained in:
Florian Bruhin 2017-03-07 21:44:32 +01:00
parent 3258ef7e3f
commit 4c3c86081f

View File

@ -206,6 +206,54 @@ class HostBlocker:
download.finished.connect(
functools.partial(self.on_download_finished, download))
def _parse_line(self, line):
"""Parse a line from a host file.
Args:
line: The bytes object to parse.
Returns:
True if parsing succeeded, False otherwise.
"""
if line.startswith(b'#'):
# Ignoring comments early so we don't have to care about
# encoding errors in them.
return True
try:
line = line.decode('utf-8')
except UnicodeDecodeError:
log.misc.error("Failed to decode: {!r}".format(line))
return False
# Remove comments
try:
hash_idx = line.index('#')
line = line[:hash_idx]
except ValueError:
pass
line = line.strip()
# Skip empty lines
if not line:
return True
parts = line.split()
if len(parts) == 1:
# "one host per line" format
host = parts[0]
elif len(parts) == 2:
# /etc/hosts format
host = parts[1]
else:
log.misc.error("Failed to parse: {!r}".format(line))
return False
if host not in self.WHITELISTED:
self._blocked_hosts.add(host)
return True
def _merge_file(self, byte_io):
"""Read and merge host files.
@ -226,43 +274,10 @@ class HostBlocker:
return
for line in f:
if line.startswith(b'#'):
# Ignoring comments early so we don't have to care about
# encoding errors in them.
continue
line_count += 1
try:
line = line.decode('utf-8')
except UnicodeDecodeError:
log.misc.error("Failed to decode: {!r}".format(line))
ok = self._parse_line(line)
if not ok:
error_count += 1
continue
# Remove comments
try:
hash_idx = line.index('#')
line = line[:hash_idx]
except ValueError:
pass
line = line.strip()
# Skip empty lines
if not line:
continue
parts = line.split()
if len(parts) == 1:
# "one host per line" format
host = parts[0]
elif len(parts) == 2:
# /etc/hosts format
host = parts[1]
else:
error_count += 1
log.misc.error("Failed to parse: {!r}".format(line))
continue
if host not in self.WHITELISTED:
self._blocked_hosts.add(host)
log.misc.debug("{}: read {} lines".format(byte_io.name, line_count))
if error_count > 0: