From 27f4ada799e312a6a9d8c1e404d8ba7652a1e5a5 Mon Sep 17 00:00:00 2001 From: Florian Bruhin Date: Mon, 2 Mar 2015 22:44:43 +0100 Subject: [PATCH] Add AppendLineParser and use it in WebHistory. The former approach (always reading the whole history from disk) was rather inefficient, and we had performance problems e.g. when marking text in Qt documentation. --- qutebrowser/browser/history.py | 53 ++++++++++++++++++++++-------- qutebrowser/misc/crashdialog.py | 5 ++- qutebrowser/misc/lineparser.py | 57 +++++++++++++++++++++++++++++++++ 3 files changed, 98 insertions(+), 17 deletions(-) diff --git a/qutebrowser/browser/history.py b/qutebrowser/browser/history.py index 3f6a75e50..72982499e 100644 --- a/qutebrowser/browser/history.py +++ b/qutebrowser/browser/history.py @@ -20,7 +20,6 @@ """Simple history which gets written to disk.""" import time -import functools from PyQt5.QtCore import pyqtSignal from PyQt5.QtWebKit import QWebHistoryInterface @@ -58,29 +57,52 @@ class HistoryEntry: class WebHistory(QWebHistoryInterface): - """A QWebHistoryInterface which supports being written to disk.""" + """A QWebHistoryInterface which supports being written to disk. + + Attributes: + _lineparser: The AppendLineParser used to save the history. + _old_urls: A set of URLs read from the on-disk history. + _new_history: A list of HistoryEntry items of the current session. + _saved_count: How many HistoryEntries have been written to disk. + _old_hit: How many times an URL was found in _old_urls. + _old_miss: How many times an URL was not found in _old_urls. + """ changed = pyqtSignal() def __init__(self, parent=None): super().__init__(parent) - self._lineparser = lineparser.LineParser( + self._lineparser = lineparser.AppendLineParser( standarddir.data(), 'history', parent=self) - self._history = [HistoryEntry.from_str(e) - for e in self._lineparser.data] - objreg.get('save-manager').add_saveable('history', self.save, - self.changed) + self._old_urls = set() + with self._lineparser.open(): + for line in self._lineparser: + _time, url = line.rstrip().split(maxsplit=1) + self._old_urls.add(url) + self._new_history = [] + self._saved_count = 0 + self._old_hit = 0 + self._old_miss = 0 + objreg.get('save-manager').add_saveable( + 'history', self.save, self.changed) def __repr__(self): - return utils.get_repr(self, length=len(self._history)) + return utils.get_repr(self, new_length=len(self._new_history)) def __getitem__(self, key): - return self._history[key] + return self._new_history[key] + + def get_recent(self): + """Get the most recent history entries.""" + old = self._lineparser.get_recent() + return old + [str(e) for e in self._new_history] def save(self): """Save the history to disk.""" - self._lineparser.data = (str(e) for e in self._history) + new = (str(e) for e in self._new_history[self._saved_count:]) + self._lineparser.new_data = new self._lineparser.save() + self._saved_count = len(self._new_history) def addHistoryEntry(self, url_string): """Called by WebKit when an URL should be added to the history. @@ -90,11 +112,9 @@ class WebHistory(QWebHistoryInterface): """ if not config.get('general', 'private-browsing'): entry = HistoryEntry(time.time(), url_string) - self._history.append(entry) - self.historyContains.cache_clear() + self._new_history.append(entry) self.changed.emit() - @functools.lru_cache() def historyContains(self, url_string): """Called by WebKit to determine if an URL is contained in the history. @@ -104,7 +124,12 @@ class WebHistory(QWebHistoryInterface): Return: True if the url is in the history, False otherwise. """ - return url_string in (entry.url for entry in self._history) + if url_string in self._old_urls: + self._old_hit += 1 + return True + else: + self._old_miss += 1 + return url_string in (entry.url for entry in self._new_history) def init(): diff --git a/qutebrowser/misc/crashdialog.py b/qutebrowser/misc/crashdialog.py index 1cf5a75e8..6f527ade1 100644 --- a/qutebrowser/misc/crashdialog.py +++ b/qutebrowser/misc/crashdialog.py @@ -461,9 +461,8 @@ class FatalCrashDialog(_CrashDialog): super()._gather_crash_info() if self._chk_history.isChecked(): try: - history = objreg.get('web-history')[-10:] - history_str = '\n'.join(str(e) for e in history) - self._crash_info.append(("History", history_str)) + history = objreg.get('web-history').get_recent() + self._crash_info.append(("History", ''.join(history))) except Exception: self._crash_info.append(("History", traceback.format_exc())) diff --git a/qutebrowser/misc/lineparser.py b/qutebrowser/misc/lineparser.py index b4db5ec13..db1e71bc1 100644 --- a/qutebrowser/misc/lineparser.py +++ b/qutebrowser/misc/lineparser.py @@ -21,6 +21,8 @@ import os import os.path +import itertools +import contextlib from PyQt5.QtCore import pyqtSlot, pyqtSignal, QObject @@ -85,14 +87,69 @@ class BaseLineParser(QObject): """ if self._binary: fp.write(b'\n'.join(data)) + fp.write(b'\n') else: fp.write('\n'.join(data)) + fp.write('\n') def save(self): """Save the history to disk.""" raise NotImplementedError +class AppendLineParser(BaseLineParser): + + """LineParser which reads lazily and appends data to existing one. + + Attributes: + _new_data: The data which was added in this session. + """ + + def __init__(self, configdir, fname, *, parent=None): + super().__init__(configdir, fname, binary=False, parent=parent) + self.new_data = [] + self._fileobj = None + + def __iter__(self): + if self._fileobj is None: + raise ValueError("Iterating without open() being called!") + return itertools.chain(iter(self._fileobj), iter(self.new_data)) + + @contextlib.contextmanager + def open(self): + """Open the on-disk history file. Needed for __iter__.""" + try: + with self._open_for_reading() as f: + self._fileobj = f + yield + except FileNotFoundError: + self._fileobj = [] + yield + finally: + self._fileobj = None + + def get_recent(self, count=4096): + """Get the last count bytes from the underlying file.""" + with self._open_for_reading() as f: + f.seek(0, os.SEEK_END) + size = f.tell() + try: + if size - count > 0: + offset = size - count + else: + offset = 0 + f.seek(offset) + data = f.readlines() + finally: + f.seek(0, os.SEEK_END) + return data + + def save(self): + with open(self._configfile, 'a', encoding='utf-8') as f: + self._write(f, self.new_data) + self.new_data = [] + + class LineParser(BaseLineParser): """Parser for configuration files which are simply line-based.