Use SQL for history storage.

The browser-wide in-memory web history is now stored in an in-memory sql
database instead of a python dict. Long-term storage is not affected, it
is still persisted in a text file of the same format.

This will set the stage for SQL-based history completion.
See #1765.
This commit is contained in:
Ryan Roden-Corrent 2017-01-15 21:42:35 -05:00
parent f43f78c40f
commit 9477a2eeb2
3 changed files with 65 additions and 107 deletions

View File

@ -27,7 +27,7 @@ from PyQt5.QtCore import pyqtSignal, pyqtSlot, QUrl, QObject
from qutebrowser.commands import cmdutils
from qutebrowser.utils import (utils, objreg, standarddir, log, qtutils,
usertypes, message)
from qutebrowser.misc import lineparser, objects
from qutebrowser.misc import lineparser, objects, sql
class Entry:
@ -103,19 +103,16 @@ class Entry:
return cls(atime, url, title, redirect=redirect)
class WebHistory(QObject):
class WebHistory(sql.SqlTable):
"""The global history of visited pages.
This is a little more complex as you'd expect so the history can be read
from disk async while new history is already arriving.
self.history_dict is the main place where the history is stored, in an
OrderedDict (sorted by time) of URL strings mapped to Entry objects.
While reading from disk is still ongoing, the history is saved in
self._temp_history instead, and then appended to self.history_dict once
that's fully populated.
self._temp_history instead, and then inserted into the sql table once
the async read completes.
All history which is new in this session (rather than read from disk from a
previous browsing session) is also stored in self._new_history.
@ -123,52 +120,34 @@ class WebHistory(QObject):
disk, so we can always append to the existing data.
Attributes:
history_dict: An OrderedDict of URLs read from the on-disk history.
_lineparser: The AppendLineParser used to save the history.
_new_history: A list of Entry items of the current session.
_saved_count: How many HistoryEntries have been written to disk.
_initial_read_started: Whether async_read was called.
_initial_read_done: Whether async_read has completed.
_temp_history: OrderedDict of temporary history entries before
async_read was called.
_temp_history: List of history entries from before async_read finished.
Signals:
add_completion_item: Emitted before a new Entry is added.
Used to sync with the completion.
arg: The new Entry.
item_added: Emitted after a new Entry is added.
Used to tell the savemanager that the history is dirty.
arg: The new Entry.
cleared: Emitted after the history is cleared.
"""
add_completion_item = pyqtSignal(Entry)
item_added = pyqtSignal(Entry)
cleared = pyqtSignal()
async_read_done = pyqtSignal()
def __init__(self, hist_dir, hist_name, parent=None):
super().__init__(parent)
super().__init__("History", ['url', 'title', 'atime', 'redirect'],
primary_key='url', parent=parent)
self._initial_read_started = False
self._initial_read_done = False
self._lineparser = lineparser.AppendLineParser(hist_dir, hist_name,
parent=self)
self.history_dict = collections.OrderedDict()
self._temp_history = collections.OrderedDict()
self._temp_history = []
self._new_history = []
self._saved_count = 0
objreg.get('save-manager').add_saveable(
'history', self.save, self.item_added)
def __repr__(self):
return utils.get_repr(self, length=len(self))
def __iter__(self):
return iter(self.history_dict.values())
def __len__(self):
return len(self.history_dict)
def async_read(self):
"""Read the initial history."""
if self._initial_read_started:
@ -200,21 +179,18 @@ class WebHistory(QObject):
self._initial_read_done = True
self.async_read_done.emit()
objreg.get('save-manager').add_saveable(
'history', self.save, self.changed)
for entry in self._temp_history.values():
for entry in self._temp_history:
self._add_entry(entry)
self._new_history.append(entry)
if not entry.redirect:
self.add_completion_item.emit(entry)
self._temp_history.clear()
def _add_entry(self, entry, target=None):
"""Add an entry to self.history_dict or another given OrderedDict."""
if target is None:
target = self.history_dict
url_str = entry.url_str()
target[url_str] = entry
target.move_to_end(url_str)
def _add_entry(self, entry):
"""Add an entry to the in-memory database."""
self.insert(entry.url_str(), entry.title, entry.atime, entry.redirect,
replace=True)
def get_recent(self):
"""Get the most recent history entries."""
@ -247,7 +223,7 @@ class WebHistory(QObject):
def _do_clear(self):
self._lineparser.clear()
self.history_dict.clear()
self.delete_all()
self._temp_history.clear()
self._new_history.clear()
self._saved_count = 0
@ -291,11 +267,8 @@ class WebHistory(QObject):
if self._initial_read_done:
self._add_entry(entry)
self._new_history.append(entry)
self.item_added.emit(entry)
if not entry.redirect:
self.add_completion_item.emit(entry)
else:
self._add_entry(entry, target=self._temp_history)
self._temp_history.append(entry)
def init(parent=None):

View File

@ -48,7 +48,7 @@ class WebHistoryInterface(QWebHistoryInterface):
Return:
True if the url is in the history, False otherwise.
"""
return url_string in self._history.history_dict
return url_string in self._history
def init(history):

View File

@ -27,24 +27,33 @@ from hypothesis import strategies
from PyQt5.QtCore import QUrl
from qutebrowser.browser import history
from qutebrowser.utils import objreg, urlutils, usertypes
from qutebrowser.utils import objreg, urlutils
from qutebrowser.misc import sql
class FakeWebHistory:
"""A fake WebHistory object."""
def __init__(self, history_dict):
self.history_dict = history_dict
@pytest.fixture(autouse=True)
def prerequisites(config_stub, fake_save_manager):
"""Make sure everything is ready to initialize a WebHistory."""
config_stub.data = {'general': {'private-browsing': False}}
sql.init()
yield
sql.close()
@pytest.fixture()
def hist(tmpdir, fake_save_manager):
def hist(tmpdir):
return history.WebHistory(hist_dir=str(tmpdir), hist_name='history')
def test_async_read_twice(monkeypatch, qtbot, tmpdir, caplog,
def test_register_saveable(monkeypatch, qtbot, tmpdir, caplog,
fake_save_manager):
(tmpdir / 'filled-history').write('12345 http://example.com/ title')
hist = history.WebHistory(hist_dir=str(tmpdir), hist_name='filled-history')
list(hist.async_read())
assert fake_save_manager.add_saveable.called
def test_async_read_twice(monkeypatch, qtbot, tmpdir, caplog):
(tmpdir / 'filled-history').write('\n'.join([
'12345 http://example.com/ title',
'67890 http://example.com/',
@ -55,42 +64,30 @@ def test_async_read_twice(monkeypatch, qtbot, tmpdir, caplog,
with pytest.raises(StopIteration):
next(hist.async_read())
expected = "Ignoring async_read() because reading is started."
assert len(caplog.records) == 1
assert caplog.records[0].msg == expected
assert expected in (record.msg for record in caplog.records)
@pytest.mark.parametrize('redirect', [True, False])
def test_adding_item_during_async_read(qtbot, hist, redirect):
"""Check what happens when adding URL while reading the history."""
url = QUrl('http://www.example.com/')
url = 'http://www.example.com/'
hist.add_url(QUrl(url), redirect=redirect, atime=12345)
with qtbot.assertNotEmitted(hist.add_completion_item), \
qtbot.assertNotEmitted(hist.item_added):
hist.add_url(url, redirect=redirect, atime=12345)
if redirect:
with qtbot.assertNotEmitted(hist.add_completion_item):
with qtbot.waitSignal(hist.async_read_done):
list(hist.async_read())
else:
with qtbot.waitSignals([hist.add_completion_item,
hist.async_read_done], order='strict'):
list(hist.async_read())
with qtbot.waitSignal(hist.async_read_done):
list(hist.async_read())
assert not hist._temp_history
expected = history.Entry(url=url, atime=12345, redirect=redirect, title="")
assert list(hist.history_dict.values()) == [expected]
assert list(hist) == [(url, '', 12345, redirect)]
def test_iter(hist):
list(hist.async_read())
url = QUrl('http://www.example.com/')
urlstr = 'http://www.example.com/'
url = QUrl(urlstr)
hist.add_url(url, atime=12345)
entry = history.Entry(url=url, atime=12345, redirect=False, title="")
assert list(hist) == [entry]
assert list(hist) == [(urlstr, '', 12345, False)]
def test_len(hist):
@ -110,38 +107,39 @@ def test_len(hist):
' ',
'',
])
def test_read(hist, tmpdir, line):
def test_read(tmpdir, line):
(tmpdir / 'filled-history').write(line + '\n')
hist = history.WebHistory(hist_dir=str(tmpdir), hist_name='filled-history')
list(hist.async_read())
def test_updated_entries(hist, tmpdir):
def test_updated_entries(tmpdir):
(tmpdir / 'filled-history').write('12345 http://example.com/\n'
'67890 http://example.com/\n')
hist = history.WebHistory(hist_dir=str(tmpdir), hist_name='filled-history')
list(hist.async_read())
assert hist.history_dict['http://example.com/'].atime == 67890
assert hist['http://example.com/'] == ('http://example.com/', '', 67890,
False)
hist.add_url(QUrl('http://example.com/'), atime=99999)
assert hist.history_dict['http://example.com/'].atime == 99999
assert hist['http://example.com/'] == ('http://example.com/', '', 99999,
False)
def test_invalid_read(hist, tmpdir, caplog):
def test_invalid_read(tmpdir, caplog):
(tmpdir / 'filled-history').write('foobar\n12345 http://example.com/')
hist = history.WebHistory(hist_dir=str(tmpdir), hist_name='filled-history')
with caplog.at_level(logging.WARNING):
list(hist.async_read())
entries = list(hist.history_dict.values())
entries = list(hist)
assert len(entries) == 1
assert len(caplog.records) == 1
msg = "Invalid history entry 'foobar': 2 or 3 fields expected!"
assert caplog.records[0].msg == msg
assert msg in (rec.msg for rec in caplog.records)
def test_get_recent(hist, tmpdir):
def test_get_recent(tmpdir):
(tmpdir / 'filled-history').write('12345 http://example.com/')
hist = history.WebHistory(hist_dir=str(tmpdir), hist_name='filled-history')
list(hist.async_read())
@ -154,7 +152,7 @@ def test_get_recent(hist, tmpdir):
assert lines == expected
def test_save(hist, tmpdir):
def test_save(tmpdir):
hist_file = tmpdir / 'filled-history'
hist_file.write('12345 http://example.com/\n')
@ -177,7 +175,7 @@ def test_save(hist, tmpdir):
assert lines == expected
def test_clear(qtbot, hist, tmpdir):
def test_clear(qtbot, tmpdir):
hist_file = tmpdir / 'filled-history'
hist_file.write('12345 http://example.com/\n')
@ -190,7 +188,6 @@ def test_clear(qtbot, hist, tmpdir):
hist._do_clear()
assert not hist_file.read()
assert not hist.history_dict
assert not hist._new_history
hist.add_url(QUrl('http://www.the-compiler.org/'), atime=67890)
@ -204,24 +201,17 @@ def test_add_item(qtbot, hist):
list(hist.async_read())
url = 'http://www.example.com/'
with qtbot.waitSignals([hist.add_completion_item, hist.item_added],
order='strict'):
hist.add_url(QUrl(url), atime=12345, title="the title")
hist.add_url(QUrl(url), atime=12345, title="the title")
entry = history.Entry(url=QUrl(url), redirect=False, atime=12345,
title="the title")
assert hist.history_dict[url] == entry
assert hist[url] == (url, 'the title', 12345, False)
def test_add_item_redirect(qtbot, hist):
list(hist.async_read())
url = 'http://www.example.com/'
with qtbot.assertNotEmitted(hist.add_completion_item):
with qtbot.waitSignal(hist.item_added):
hist.add_url(QUrl(url), redirect=True, atime=12345)
hist.add_url(QUrl(url), redirect=True, atime=12345)
entry = history.Entry(url=QUrl(url), redirect=True, atime=12345, title="")
assert hist.history_dict[url] == entry
assert hist[url] == (url, '', 12345, True)
def test_add_item_redirect_update(qtbot, tmpdir, fake_save_manager):
@ -233,12 +223,9 @@ def test_add_item_redirect_update(qtbot, tmpdir, fake_save_manager):
hist = history.WebHistory(hist_dir=str(tmpdir), hist_name='filled-history')
list(hist.async_read())
with qtbot.assertNotEmitted(hist.add_completion_item):
with qtbot.waitSignal(hist.item_added):
hist.add_url(QUrl(url), redirect=True, atime=67890)
hist.add_url(QUrl(url), redirect=True, atime=67890)
entry = history.Entry(url=QUrl(url), redirect=True, atime=67890, title="")
assert hist.history_dict[url] == entry
assert hist[url] == (url, '', 67890, True)
@pytest.mark.parametrize('line, expected', [
@ -333,8 +320,7 @@ def hist_interface():
entry = history.Entry(atime=0, url=QUrl('http://www.example.com/'),
title='example')
history_dict = {'http://www.example.com/': entry}
fake_hist = FakeWebHistory(history_dict)
interface = webkithistory.WebHistoryInterface(fake_hist)
interface = webkithistory.WebHistoryInterface(history_dict)
QWebHistoryInterface.setDefaultInterface(interface)
yield
QWebHistoryInterface.setDefaultInterface(None)
@ -349,7 +335,7 @@ def test_history_interface(qtbot, webview, hist_interface):
@pytest.mark.parametrize('backend', [usertypes.Backend.QtWebEngine,
usertypes.Backend.QtWebKit])
def test_init(backend, qapp, tmpdir, monkeypatch, fake_save_manager):
def test_init(backend, qapp, tmpdir, monkeypatch):
if backend == usertypes.Backend.QtWebKit:
pytest.importorskip('PyQt5.QtWebKitWidgets')
else:
@ -379,5 +365,4 @@ def test_init(backend, qapp, tmpdir, monkeypatch, fake_save_manager):
# before (so we need to test webengine before webkit)
assert default_interface is None
assert fake_save_manager.add_saveable.called
objreg.delete('web-history')