Add AppendLineParser and use it in WebHistory.

The former approach (always reading the whole history from disk) was rather
inefficient, and we had performance problems e.g. when marking text in Qt
documentation.
This commit is contained in:
Florian Bruhin 2015-03-02 22:44:43 +01:00
parent 5b4b793538
commit 27f4ada799
3 changed files with 98 additions and 17 deletions

View File

@ -20,7 +20,6 @@
"""Simple history which gets written to disk.""" """Simple history which gets written to disk."""
import time import time
import functools
from PyQt5.QtCore import pyqtSignal from PyQt5.QtCore import pyqtSignal
from PyQt5.QtWebKit import QWebHistoryInterface from PyQt5.QtWebKit import QWebHistoryInterface
@ -58,29 +57,52 @@ class HistoryEntry:
class WebHistory(QWebHistoryInterface): class WebHistory(QWebHistoryInterface):
"""A QWebHistoryInterface which supports being written to disk.""" """A QWebHistoryInterface which supports being written to disk.
Attributes:
_lineparser: The AppendLineParser used to save the history.
_old_urls: A set of URLs read from the on-disk history.
_new_history: A list of HistoryEntry items of the current session.
_saved_count: How many HistoryEntries have been written to disk.
_old_hit: How many times an URL was found in _old_urls.
_old_miss: How many times an URL was not found in _old_urls.
"""
changed = pyqtSignal() changed = pyqtSignal()
def __init__(self, parent=None): def __init__(self, parent=None):
super().__init__(parent) super().__init__(parent)
self._lineparser = lineparser.LineParser( self._lineparser = lineparser.AppendLineParser(
standarddir.data(), 'history', parent=self) standarddir.data(), 'history', parent=self)
self._history = [HistoryEntry.from_str(e) self._old_urls = set()
for e in self._lineparser.data] with self._lineparser.open():
objreg.get('save-manager').add_saveable('history', self.save, for line in self._lineparser:
self.changed) _time, url = line.rstrip().split(maxsplit=1)
self._old_urls.add(url)
self._new_history = []
self._saved_count = 0
self._old_hit = 0
self._old_miss = 0
objreg.get('save-manager').add_saveable(
'history', self.save, self.changed)
def __repr__(self): def __repr__(self):
return utils.get_repr(self, length=len(self._history)) return utils.get_repr(self, new_length=len(self._new_history))
def __getitem__(self, key): def __getitem__(self, key):
return self._history[key] return self._new_history[key]
def get_recent(self):
"""Get the most recent history entries."""
old = self._lineparser.get_recent()
return old + [str(e) for e in self._new_history]
def save(self): def save(self):
"""Save the history to disk.""" """Save the history to disk."""
self._lineparser.data = (str(e) for e in self._history) new = (str(e) for e in self._new_history[self._saved_count:])
self._lineparser.new_data = new
self._lineparser.save() self._lineparser.save()
self._saved_count = len(self._new_history)
def addHistoryEntry(self, url_string): def addHistoryEntry(self, url_string):
"""Called by WebKit when an URL should be added to the history. """Called by WebKit when an URL should be added to the history.
@ -90,11 +112,9 @@ class WebHistory(QWebHistoryInterface):
""" """
if not config.get('general', 'private-browsing'): if not config.get('general', 'private-browsing'):
entry = HistoryEntry(time.time(), url_string) entry = HistoryEntry(time.time(), url_string)
self._history.append(entry) self._new_history.append(entry)
self.historyContains.cache_clear()
self.changed.emit() self.changed.emit()
@functools.lru_cache()
def historyContains(self, url_string): def historyContains(self, url_string):
"""Called by WebKit to determine if an URL is contained in the history. """Called by WebKit to determine if an URL is contained in the history.
@ -104,7 +124,12 @@ class WebHistory(QWebHistoryInterface):
Return: Return:
True if the url is in the history, False otherwise. True if the url is in the history, False otherwise.
""" """
return url_string in (entry.url for entry in self._history) if url_string in self._old_urls:
self._old_hit += 1
return True
else:
self._old_miss += 1
return url_string in (entry.url for entry in self._new_history)
def init(): def init():

View File

@ -461,9 +461,8 @@ class FatalCrashDialog(_CrashDialog):
super()._gather_crash_info() super()._gather_crash_info()
if self._chk_history.isChecked(): if self._chk_history.isChecked():
try: try:
history = objreg.get('web-history')[-10:] history = objreg.get('web-history').get_recent()
history_str = '\n'.join(str(e) for e in history) self._crash_info.append(("History", ''.join(history)))
self._crash_info.append(("History", history_str))
except Exception: except Exception:
self._crash_info.append(("History", traceback.format_exc())) self._crash_info.append(("History", traceback.format_exc()))

View File

@ -21,6 +21,8 @@
import os import os
import os.path import os.path
import itertools
import contextlib
from PyQt5.QtCore import pyqtSlot, pyqtSignal, QObject from PyQt5.QtCore import pyqtSlot, pyqtSignal, QObject
@ -85,14 +87,69 @@ class BaseLineParser(QObject):
""" """
if self._binary: if self._binary:
fp.write(b'\n'.join(data)) fp.write(b'\n'.join(data))
fp.write(b'\n')
else: else:
fp.write('\n'.join(data)) fp.write('\n'.join(data))
fp.write('\n')
def save(self): def save(self):
"""Save the history to disk.""" """Save the history to disk."""
raise NotImplementedError raise NotImplementedError
class AppendLineParser(BaseLineParser):
"""LineParser which reads lazily and appends data to existing one.
Attributes:
_new_data: The data which was added in this session.
"""
def __init__(self, configdir, fname, *, parent=None):
super().__init__(configdir, fname, binary=False, parent=parent)
self.new_data = []
self._fileobj = None
def __iter__(self):
if self._fileobj is None:
raise ValueError("Iterating without open() being called!")
return itertools.chain(iter(self._fileobj), iter(self.new_data))
@contextlib.contextmanager
def open(self):
"""Open the on-disk history file. Needed for __iter__."""
try:
with self._open_for_reading() as f:
self._fileobj = f
yield
except FileNotFoundError:
self._fileobj = []
yield
finally:
self._fileobj = None
def get_recent(self, count=4096):
"""Get the last count bytes from the underlying file."""
with self._open_for_reading() as f:
f.seek(0, os.SEEK_END)
size = f.tell()
try:
if size - count > 0:
offset = size - count
else:
offset = 0
f.seek(offset)
data = f.readlines()
finally:
f.seek(0, os.SEEK_END)
return data
def save(self):
with open(self._configfile, 'a', encoding='utf-8') as f:
self._write(f, self.new_data)
self.new_data = []
class LineParser(BaseLineParser): class LineParser(BaseLineParser):
"""Parser for configuration files which are simply line-based. """Parser for configuration files which are simply line-based.