Store history in an on-disk sqlite database.

Instead of reading sqlite history from a file and storing it in an in-memory
database, just directly use an on-disk database. This resolves #755, where
history entries don't pop in to the completion menu immediately as they are
still being read asynchronously for a few seconds after the browser starts.
This commit is contained in:
Ryan Roden-Corrent 2017-03-26 16:52:10 -04:00
parent b47c3b6a60
commit de5be0dc5a
7 changed files with 92 additions and 231 deletions

View File

@ -158,8 +158,6 @@ def init(args, crash_handler):
QDesktopServices.setUrlHandler('https', open_desktopservices_url) QDesktopServices.setUrlHandler('https', open_desktopservices_url)
QDesktopServices.setUrlHandler('qute', open_desktopservices_url) QDesktopServices.setUrlHandler('qute', open_desktopservices_url)
QTimer.singleShot(10, functools.partial(_init_late_modules, args))
log.init.debug("Init done!") log.init.debug("Init done!")
crash_handler.raise_crashdlg() crash_handler.raise_crashdlg()
@ -428,7 +426,7 @@ def _init_modules(args, crash_handler):
keyconf.init(qApp) keyconf.init(qApp)
log.init.debug("Initializing sql...") log.init.debug("Initializing sql...")
sql.init() sql.init(os.path.join(standarddir.data(), 'history.sqlite'))
log.init.debug("Initializing web history...") log.init.debug("Initializing web history...")
history.init(qApp) history.init(qApp)
@ -476,23 +474,6 @@ def _init_modules(args, crash_handler):
browsertab.init() browsertab.init()
def _init_late_modules(args):
"""Initialize modules which can be inited after the window is shown."""
log.init.debug("Reading web history...")
reader = objreg.get('web-history').async_read()
with debug.log_time(log.init, 'Reading history'):
while True:
QApplication.processEvents()
try:
next(reader)
except StopIteration:
break
except (OSError, UnicodeDecodeError) as e:
error.handle_fatal_exc(e, args, "Error while initializing!",
pre_text="Error while initializing")
sys.exit(usertypes.Exit.err_init)
class Quitter: class Quitter:
"""Utility class to quit/restart the QApplication. """Utility class to quit/restart the QApplication.

View File

@ -26,7 +26,7 @@ from PyQt5.QtCore import pyqtSignal, pyqtSlot, QUrl
from qutebrowser.commands import cmdutils from qutebrowser.commands import cmdutils
from qutebrowser.utils import (utils, objreg, standarddir, log, qtutils, from qutebrowser.utils import (utils, objreg, standarddir, log, qtutils,
usertypes, message) usertypes, message)
from qutebrowser.misc import lineparser, objects, sql from qutebrowser.misc import objects, sql
class Entry: class Entry:
@ -118,14 +118,6 @@ class WebHistory(sql.SqlTable):
self._saved_count tracks how many of those entries were already written to self._saved_count tracks how many of those entries were already written to
disk, so we can always append to the existing data. disk, so we can always append to the existing data.
Attributes:
_lineparser: The AppendLineParser used to save the history.
_new_history: A list of Entry items of the current session.
_saved_count: How many HistoryEntries have been written to disk.
_initial_read_started: Whether async_read was called.
_initial_read_done: Whether async_read has completed.
_temp_history: List of history entries from before async_read finished.
Signals: Signals:
cleared: Emitted after the history is cleared. cleared: Emitted after the history is cleared.
""" """
@ -133,59 +125,13 @@ class WebHistory(sql.SqlTable):
cleared = pyqtSignal() cleared = pyqtSignal()
async_read_done = pyqtSignal() async_read_done = pyqtSignal()
def __init__(self, hist_dir, hist_name, parent=None): def __init__(self, parent=None):
super().__init__("History", ['url', 'title', 'atime', 'redirect'], super().__init__("History", ['url', 'title', 'atime', 'redirect'],
primary_key='url', parent=parent) primary_key='url', parent=parent)
self._initial_read_started = False
self._initial_read_done = False
self._lineparser = lineparser.AppendLineParser(hist_dir, hist_name,
parent=self)
self._temp_history = []
self._new_history = []
self._saved_count = 0
def __repr__(self): def __repr__(self):
return utils.get_repr(self, length=len(self)) return utils.get_repr(self, length=len(self))
def async_read(self):
"""Read the initial history."""
if self._initial_read_started:
log.init.debug("Ignoring async_read() because reading is started.")
return
self._initial_read_started = True
with self._lineparser.open():
for line in self._lineparser:
yield
line = line.rstrip()
if not line:
continue
try:
entry = Entry.from_str(line)
except ValueError as e:
log.init.warning("Invalid history entry {!r}: {}!".format(
line, e))
continue
# This de-duplicates history entries; only the latest
# entry for each URL is kept. If you want to keep
# information about previous hits change the items in
# old_urls to be lists or change Entry to have a
# list of atimes.
self._add_entry(entry)
self._initial_read_done = True
self.async_read_done.emit()
objreg.get('save-manager').add_saveable(
'history', self.save, self.changed)
for entry in self._temp_history:
self._add_entry(entry)
self._new_history.append(entry)
self._temp_history.clear()
def _add_entry(self, entry): def _add_entry(self, entry):
"""Add an entry to the in-memory database.""" """Add an entry to the in-memory database."""
self.insert([entry.url_str(), entry.title, entry.atime, self.insert([entry.url_str(), entry.title, entry.atime,
@ -193,15 +139,7 @@ class WebHistory(sql.SqlTable):
def get_recent(self): def get_recent(self):
"""Get the most recent history entries.""" """Get the most recent history entries."""
old = self._lineparser.get_recent() return self.select(sort_by='atime', sort_order='desc', limit=100)
return old + [str(e) for e in self._new_history]
def save(self):
"""Save the history to disk."""
new = (str(e) for e in self._new_history[self._saved_count:])
self._lineparser.new_data = new
self._lineparser.save()
self._saved_count = len(self._new_history)
@cmdutils.register(name='history-clear', instance='web-history') @cmdutils.register(name='history-clear', instance='web-history')
def clear(self, force=False): def clear(self, force=False):
@ -221,11 +159,7 @@ class WebHistory(sql.SqlTable):
"history?") "history?")
def _do_clear(self): def _do_clear(self):
self._lineparser.clear()
self.delete_all() self.delete_all()
self._temp_history.clear()
self._new_history.clear()
self._saved_count = 0
self.cleared.emit() self.cleared.emit()
@pyqtSlot(QUrl, QUrl, str) @pyqtSlot(QUrl, QUrl, str)
@ -263,11 +197,7 @@ class WebHistory(sql.SqlTable):
if atime is None: if atime is None:
atime = time.time() atime = time.time()
entry = Entry(atime, url, title, redirect=redirect) entry = Entry(atime, url, title, redirect=redirect)
if self._initial_read_done: self._add_entry(entry)
self._add_entry(entry)
self._new_history.append(entry)
else:
self._temp_history.append(entry)
def init(parent=None): def init(parent=None):
@ -276,8 +206,7 @@ def init(parent=None):
Args: Args:
parent: The parent to use for WebHistory. parent: The parent to use for WebHistory.
""" """
history = WebHistory(hist_dir=standarddir.data(), hist_name='history', history = WebHistory(parent=parent)
parent=parent)
objreg.register('web-history', history) objreg.register('web-history', history)
if objects.backend == usertypes.Backend.QtWebKit: if objects.backend == usertypes.Backend.QtWebKit:

View File

@ -34,14 +34,13 @@ class SqlException(Exception):
pass pass
def init(): def init(db_path):
"""Initialize the SQL database connection.""" """Initialize the SQL database connection."""
database = QSqlDatabase.addDatabase('QSQLITE') database = QSqlDatabase.addDatabase('QSQLITE')
# In-memory database, see https://sqlite.org/inmemorydb.html database.setDatabaseName(db_path)
database.setDatabaseName(':memory:')
if not database.open(): if not database.open():
raise SqlException("Failed to open in-memory sqlite database: {}" raise SqlException("Failed to open sqlite database at {}: {}"
.format(database.lastError().text())) .format(db_path, database.lastError().text()))
def close(): def close():
@ -103,8 +102,8 @@ class SqlTable(QObject):
super().__init__(parent) super().__init__(parent)
self._name = name self._name = name
self._primary_key = primary_key self._primary_key = primary_key
run_query("CREATE TABLE {} ({}, PRIMARY KEY ({}))".format( run_query("CREATE TABLE IF NOT EXISTS {} ({}, PRIMARY KEY ({}))"
name, ','.join(fields), primary_key)) .format(name, ','.join(fields), primary_key))
# pylint: disable=invalid-name # pylint: disable=invalid-name
self.Entry = collections.namedtuple(name + '_Entry', fields) self.Entry = collections.namedtuple(name + '_Entry', fields)
@ -177,3 +176,11 @@ class SqlTable(QObject):
"""Remove all row from the table.""" """Remove all row from the table."""
run_query("DELETE FROM {}".format(self._name)) run_query("DELETE FROM {}".format(self._name))
self.changed.emit() self.changed.emit()
def select(self, sort_by, sort_order, limit):
"""Remove all row from the table."""
result = run_query('SELECT * FROM {} ORDER BY {} {} LIMIT {}'
.format(self._name, sort_by, sort_order, limit))
while result.next():
rec = result.record()
yield self.Entry(*[rec.value(i) for i in range(rec.count())])

View File

@ -55,6 +55,8 @@ PERFECT_FILES = [
'browser/history.py'), 'browser/history.py'),
('tests/unit/browser/webkit/test_history.py', ('tests/unit/browser/webkit/test_history.py',
'browser/webkit/webkithistory.py'), 'browser/webkit/webkithistory.py'),
('tests/unit/browser/webkit/test_history.py',
'browser/history.py'),
('tests/unit/browser/webkit/http/test_http.py', ('tests/unit/browser/webkit/http/test_http.py',
'browser/webkit/http.py'), 'browser/webkit/http.py'),
('tests/unit/browser/webkit/http/test_content_disposition.py', ('tests/unit/browser/webkit/http/test_content_disposition.py',

View File

@ -476,8 +476,9 @@ def short_tmpdir():
@pytest.fixture @pytest.fixture
def init_sql(): def init_sql(data_tmpdir):
"""Initialize the SQL module, and shut it down after the test.""" """Initialize the SQL module, and shut it down after the test."""
sql.init() path = str(data_tmpdir / 'test.db')
sql.init(path)
yield yield
sql.close() sql.close()

View File

@ -38,47 +38,17 @@ def prerequisites(config_stub, fake_save_manager, init_sql):
@pytest.fixture() @pytest.fixture()
def hist(tmpdir): def hist(tmpdir):
return history.WebHistory(hist_dir=str(tmpdir), hist_name='history') return history.WebHistory()
def test_register_saveable(monkeypatch, qtbot, tmpdir, caplog, @pytest.fixture()
fake_save_manager): def mock_time(mocker):
(tmpdir / 'filled-history').write('12345 http://example.com/ title') m = mocker.patch('qutebrowser.browser.history.time')
hist = history.WebHistory(hist_dir=str(tmpdir), hist_name='filled-history') m.time.return_value = 12345
list(hist.async_read()) return 12345
assert fake_save_manager.add_saveable.called
def test_async_read_twice(monkeypatch, qtbot, tmpdir, caplog):
(tmpdir / 'filled-history').write('\n'.join([
'12345 http://example.com/ title',
'67890 http://example.com/',
'12345 http://qutebrowser.org/ blah',
]))
hist = history.WebHistory(hist_dir=str(tmpdir), hist_name='filled-history')
next(hist.async_read())
with pytest.raises(StopIteration):
next(hist.async_read())
expected = "Ignoring async_read() because reading is started."
assert expected in (record.msg for record in caplog.records)
@pytest.mark.parametrize('redirect', [True, False])
def test_adding_item_during_async_read(qtbot, hist, redirect):
"""Check what happens when adding URL while reading the history."""
url = 'http://www.example.com/'
hist.add_url(QUrl(url), redirect=redirect, atime=12345)
with qtbot.waitSignal(hist.async_read_done):
list(hist.async_read())
assert not hist._temp_history
assert list(hist) == [(url, '', 12345, redirect)]
def test_iter(hist): def test_iter(hist):
list(hist.async_read())
urlstr = 'http://www.example.com/' urlstr = 'http://www.example.com/'
url = QUrl(urlstr) url = QUrl(urlstr)
hist.add_url(url, atime=12345) hist.add_url(url, atime=12345)
@ -88,7 +58,6 @@ def test_iter(hist):
def test_len(hist): def test_len(hist):
assert len(hist) == 0 assert len(hist) == 0
list(hist.async_read())
url = QUrl('http://www.example.com/') url = QUrl('http://www.example.com/')
hist.add_url(url) hist.add_url(url)
@ -96,101 +65,49 @@ def test_len(hist):
assert len(hist) == 1 assert len(hist) == 1
@pytest.mark.parametrize('line', [ def test_updated_entries(tmpdir, hist):
'12345 http://example.com/ title', # with title hist.add_url(QUrl('http://example.com/'), atime=67890)
'67890 http://example.com/', # no title assert list(hist) == [('http://example.com/', '', 67890, False)]
'12345 http://qutebrowser.org/ ', # trailing space
' ',
'',
])
def test_read(tmpdir, line):
(tmpdir / 'filled-history').write(line + '\n')
hist = history.WebHistory(hist_dir=str(tmpdir), hist_name='filled-history')
list(hist.async_read())
def test_updated_entries(tmpdir):
(tmpdir / 'filled-history').write('12345 http://example.com/\n'
'67890 http://example.com/\n')
hist = history.WebHistory(hist_dir=str(tmpdir), hist_name='filled-history')
list(hist.async_read())
assert hist['http://example.com/'] == ('http://example.com/', '', 67890,
False)
hist.add_url(QUrl('http://example.com/'), atime=99999) hist.add_url(QUrl('http://example.com/'), atime=99999)
assert hist['http://example.com/'] == ('http://example.com/', '', 99999, assert list(hist) == [('http://example.com/', '', 99999, False)]
False)
def test_invalid_read(tmpdir, caplog): def test_get_recent(hist):
(tmpdir / 'filled-history').write('foobar\n12345 http://example.com/')
hist = history.WebHistory(hist_dir=str(tmpdir), hist_name='filled-history')
with caplog.at_level(logging.WARNING):
list(hist.async_read())
entries = list(hist)
assert len(entries) == 1
msg = "Invalid history entry 'foobar': 2 or 3 fields expected!"
assert msg in (rec.msg for rec in caplog.records)
def test_get_recent(tmpdir):
(tmpdir / 'filled-history').write('12345 http://example.com/')
hist = history.WebHistory(hist_dir=str(tmpdir), hist_name='filled-history')
list(hist.async_read())
hist.add_url(QUrl('http://www.qutebrowser.org/'), atime=67890) hist.add_url(QUrl('http://www.qutebrowser.org/'), atime=67890)
lines = hist.get_recent() hist.add_url(QUrl('http://example.com/'), atime=12345)
assert list(hist.get_recent()) == [
expected = ['12345 http://example.com/', ('http://www.qutebrowser.org/', '', 67890 , False),
'67890 http://www.qutebrowser.org/'] ('http://example.com/', '', 12345, False),
assert lines == expected ]
def test_save(tmpdir): def test_save(tmpdir, hist):
hist_file = tmpdir / 'filled-history' hist.add_url(QUrl('http://example.com/'), atime=12345)
hist_file.write('12345 http://example.com/\n')
hist = history.WebHistory(hist_dir=str(tmpdir), hist_name='filled-history')
list(hist.async_read())
hist.add_url(QUrl('http://www.qutebrowser.org/'), atime=67890) hist.add_url(QUrl('http://www.qutebrowser.org/'), atime=67890)
hist.save()
lines = hist_file.read().splitlines() hist2 = history.WebHistory()
expected = ['12345 http://example.com/', assert list(hist2) == [('http://example.com/', '', 12345, False),
'67890 http://www.qutebrowser.org/'] ('http://www.qutebrowser.org/', '', 67890, False)]
assert lines == expected
hist.add_url(QUrl('http://www.the-compiler.org/'), atime=99999)
hist.save()
expected.append('99999 http://www.the-compiler.org/')
lines = hist_file.read().splitlines()
assert lines == expected
def test_clear(qtbot, tmpdir): def test_clear(qtbot, tmpdir, hist, mocker):
hist_file = tmpdir / 'filled-history' hist.add_url(QUrl('http://example.com/'))
hist_file.write('12345 http://example.com/\n') hist.add_url(QUrl('http://www.qutebrowser.org/'))
hist = history.WebHistory(hist_dir=str(tmpdir), hist_name='filled-history') m = mocker.patch('qutebrowser.browser.history.message.confirm_async')
list(hist.async_read()) hist.clear()
m.assert_called()
def test_clear_force(qtbot, tmpdir, hist):
hist.add_url(QUrl('http://example.com/'))
hist.add_url(QUrl('http://www.qutebrowser.org/')) hist.add_url(QUrl('http://www.qutebrowser.org/'))
with qtbot.waitSignal(hist.cleared): with qtbot.waitSignal(hist.cleared):
hist._do_clear() hist.clear(force=True)
assert not hist_file.read() assert not len(hist)
assert not hist._new_history
hist.add_url(QUrl('http://www.the-compiler.org/'), atime=67890)
hist.save()
lines = hist_file.read().splitlines()
assert lines == ['67890 http://www.the-compiler.org/']
@pytest.mark.parametrize('item', [ @pytest.mark.parametrize('item', [
@ -198,21 +115,34 @@ def test_clear(qtbot, tmpdir):
('http://www.example.com', 12346, 'the title', True) ('http://www.example.com', 12346, 'the title', True)
]) ])
def test_add_item(qtbot, hist, item): def test_add_item(qtbot, hist, item):
list(hist.async_read())
(url, atime, title, redirect) = item (url, atime, title, redirect) = item
hist.add_url(QUrl(url), atime=atime, title=title, redirect=redirect) hist.add_url(QUrl(url), atime=atime, title=title, redirect=redirect)
assert hist[url] == (url, title, atime, redirect) assert hist[url] == (url, title, atime, redirect)
def test_add_item_redirect_update(qtbot, tmpdir, fake_save_manager): def test_add_item_invalid(qtbot, hist, caplog):
with caplog.at_level(logging.WARNING):
hist.add_url(QUrl())
assert not list(hist)
@pytest.mark.parametrize('level, url, req_url, expected', [
(logging.DEBUG, 'a.com', 'a.com', [('a.com', 'title', 12345, False)]),
(logging.DEBUG, 'a.com', 'b.com', [('a.com', 'title', 12345, False),
('b.com', 'title', 12345, True)]),
(logging.WARNING, 'a.com', '', [('a.com', 'title', 12345, False)]),
(logging.WARNING, '', '', []),
])
def test_add_from_tab(hist, level, url, req_url, expected, mock_time, caplog):
with caplog.at_level(level):
hist.add_from_tab(QUrl(url), QUrl(req_url), 'title')
assert set(list(hist)) == set(expected)
def test_add_item_redirect_update(qtbot, tmpdir, hist):
"""A redirect update added should override a non-redirect one.""" """A redirect update added should override a non-redirect one."""
url = 'http://www.example.com/' url = 'http://www.example.com/'
hist.add_url(QUrl(url), atime=5555)
hist_file = tmpdir / 'filled-history'
hist_file.write('12345 {}\n'.format(url))
hist = history.WebHistory(hist_dir=str(tmpdir), hist_name='filled-history')
list(hist.async_read())
hist.add_url(QUrl(url), redirect=True, atime=67890) hist.add_url(QUrl(url), redirect=True, atime=67890)
assert hist[url] == (url, '', 67890, True) assert hist[url] == (url, '', 67890, True)

View File

@ -28,9 +28,8 @@ pytestmark = pytest.mark.usefixtures('init_sql')
def test_init(): def test_init():
sql.SqlTable('Foo', ['name', 'val', 'lucky'], primary_key='name') sql.SqlTable('Foo', ['name', 'val', 'lucky'], primary_key='name')
with pytest.raises(sql.SqlException): # should not error if table already exists
# table name collision on 'Foo' sql.SqlTable('Foo', ['name', 'val', 'lucky'], primary_key='name')
sql.SqlTable('Foo', ['foo', 'bar'], primary_key='foo')
def test_insert(qtbot): def test_insert(qtbot):
@ -54,6 +53,18 @@ def test_iter():
('thirteen', 13, True)] ('thirteen', 13, True)]
@pytest.mark.parametrize('rows, sort_by, sort_order, limit, result', [
([[2, 5], [1, 6], [3, 4]], 'a', 'asc', 5, [(1, 6), (2, 5), (3, 4)]),
([[2, 5], [1, 6], [3, 4]], 'a', 'desc', 3, [(3, 4), (2, 5), (1, 6)]),
([[2, 5], [1, 6], [3, 4]], 'b', 'desc', 2, [(1, 6), (2, 5)])
])
def test_select(rows, sort_by, sort_order, limit, result):
table = sql.SqlTable('Foo', ['a', 'b'], primary_key='a')
for row in rows:
table.insert(row)
assert list(table.select(sort_by, sort_order, limit)) == result
def test_replace(qtbot): def test_replace(qtbot):
table = sql.SqlTable('Foo', ['name', 'val', 'lucky'], primary_key='name') table = sql.SqlTable('Foo', ['name', 'val', 'lucky'], primary_key='name')
table.insert(['one', 1, False]) table.insert(['one', 1, False])