Refactor adblock parsing
This commit is contained in:
parent
3258ef7e3f
commit
4c3c86081f
@ -206,6 +206,54 @@ class HostBlocker:
|
||||
download.finished.connect(
|
||||
functools.partial(self.on_download_finished, download))
|
||||
|
||||
def _parse_line(self, line):
|
||||
"""Parse a line from a host file.
|
||||
|
||||
Args:
|
||||
line: The bytes object to parse.
|
||||
|
||||
Returns:
|
||||
True if parsing succeeded, False otherwise.
|
||||
"""
|
||||
if line.startswith(b'#'):
|
||||
# Ignoring comments early so we don't have to care about
|
||||
# encoding errors in them.
|
||||
return True
|
||||
|
||||
try:
|
||||
line = line.decode('utf-8')
|
||||
except UnicodeDecodeError:
|
||||
log.misc.error("Failed to decode: {!r}".format(line))
|
||||
return False
|
||||
|
||||
# Remove comments
|
||||
try:
|
||||
hash_idx = line.index('#')
|
||||
line = line[:hash_idx]
|
||||
except ValueError:
|
||||
pass
|
||||
|
||||
line = line.strip()
|
||||
# Skip empty lines
|
||||
if not line:
|
||||
return True
|
||||
|
||||
parts = line.split()
|
||||
if len(parts) == 1:
|
||||
# "one host per line" format
|
||||
host = parts[0]
|
||||
elif len(parts) == 2:
|
||||
# /etc/hosts format
|
||||
host = parts[1]
|
||||
else:
|
||||
log.misc.error("Failed to parse: {!r}".format(line))
|
||||
return False
|
||||
|
||||
if host not in self.WHITELISTED:
|
||||
self._blocked_hosts.add(host)
|
||||
|
||||
return True
|
||||
|
||||
def _merge_file(self, byte_io):
|
||||
"""Read and merge host files.
|
||||
|
||||
@ -226,43 +274,10 @@ class HostBlocker:
|
||||
return
|
||||
|
||||
for line in f:
|
||||
if line.startswith(b'#'):
|
||||
# Ignoring comments early so we don't have to care about
|
||||
# encoding errors in them.
|
||||
continue
|
||||
|
||||
line_count += 1
|
||||
|
||||
try:
|
||||
line = line.decode('utf-8')
|
||||
except UnicodeDecodeError:
|
||||
log.misc.error("Failed to decode: {!r}".format(line))
|
||||
ok = self._parse_line(line)
|
||||
if not ok:
|
||||
error_count += 1
|
||||
continue
|
||||
|
||||
# Remove comments
|
||||
try:
|
||||
hash_idx = line.index('#')
|
||||
line = line[:hash_idx]
|
||||
except ValueError:
|
||||
pass
|
||||
line = line.strip()
|
||||
# Skip empty lines
|
||||
if not line:
|
||||
continue
|
||||
parts = line.split()
|
||||
if len(parts) == 1:
|
||||
# "one host per line" format
|
||||
host = parts[0]
|
||||
elif len(parts) == 2:
|
||||
# /etc/hosts format
|
||||
host = parts[1]
|
||||
else:
|
||||
error_count += 1
|
||||
log.misc.error("Failed to parse: {!r}".format(line))
|
||||
continue
|
||||
if host not in self.WHITELISTED:
|
||||
self._blocked_hosts.add(host)
|
||||
|
||||
log.misc.debug("{}: read {} lines".format(byte_io.name, line_count))
|
||||
if error_count > 0:
|
||||
|
Loading…
Reference in New Issue
Block a user