Store history in an on-disk sqlite database.

Instead of reading sqlite history from a file and storing it in an in-memory
database, just directly use an on-disk database. This resolves #755, where
history entries don't pop in to the completion menu immediately as they are
still being read asynchronously for a few seconds after the browser starts.
This commit is contained in:
Ryan Roden-Corrent 2017-03-26 16:52:10 -04:00
parent b47c3b6a60
commit de5be0dc5a
7 changed files with 92 additions and 231 deletions

View File

@ -158,8 +158,6 @@ def init(args, crash_handler):
QDesktopServices.setUrlHandler('https', open_desktopservices_url)
QDesktopServices.setUrlHandler('qute', open_desktopservices_url)
QTimer.singleShot(10, functools.partial(_init_late_modules, args))
log.init.debug("Init done!")
crash_handler.raise_crashdlg()
@ -428,7 +426,7 @@ def _init_modules(args, crash_handler):
keyconf.init(qApp)
log.init.debug("Initializing sql...")
sql.init()
sql.init(os.path.join(standarddir.data(), 'history.sqlite'))
log.init.debug("Initializing web history...")
history.init(qApp)
@ -476,23 +474,6 @@ def _init_modules(args, crash_handler):
browsertab.init()
def _init_late_modules(args):
"""Initialize modules which can be inited after the window is shown."""
log.init.debug("Reading web history...")
reader = objreg.get('web-history').async_read()
with debug.log_time(log.init, 'Reading history'):
while True:
QApplication.processEvents()
try:
next(reader)
except StopIteration:
break
except (OSError, UnicodeDecodeError) as e:
error.handle_fatal_exc(e, args, "Error while initializing!",
pre_text="Error while initializing")
sys.exit(usertypes.Exit.err_init)
class Quitter:
"""Utility class to quit/restart the QApplication.

View File

@ -26,7 +26,7 @@ from PyQt5.QtCore import pyqtSignal, pyqtSlot, QUrl
from qutebrowser.commands import cmdutils
from qutebrowser.utils import (utils, objreg, standarddir, log, qtutils,
usertypes, message)
from qutebrowser.misc import lineparser, objects, sql
from qutebrowser.misc import objects, sql
class Entry:
@ -118,14 +118,6 @@ class WebHistory(sql.SqlTable):
self._saved_count tracks how many of those entries were already written to
disk, so we can always append to the existing data.
Attributes:
_lineparser: The AppendLineParser used to save the history.
_new_history: A list of Entry items of the current session.
_saved_count: How many HistoryEntries have been written to disk.
_initial_read_started: Whether async_read was called.
_initial_read_done: Whether async_read has completed.
_temp_history: List of history entries from before async_read finished.
Signals:
cleared: Emitted after the history is cleared.
"""
@ -133,59 +125,13 @@ class WebHistory(sql.SqlTable):
cleared = pyqtSignal()
async_read_done = pyqtSignal()
def __init__(self, hist_dir, hist_name, parent=None):
def __init__(self, parent=None):
super().__init__("History", ['url', 'title', 'atime', 'redirect'],
primary_key='url', parent=parent)
self._initial_read_started = False
self._initial_read_done = False
self._lineparser = lineparser.AppendLineParser(hist_dir, hist_name,
parent=self)
self._temp_history = []
self._new_history = []
self._saved_count = 0
def __repr__(self):
return utils.get_repr(self, length=len(self))
def async_read(self):
"""Read the initial history."""
if self._initial_read_started:
log.init.debug("Ignoring async_read() because reading is started.")
return
self._initial_read_started = True
with self._lineparser.open():
for line in self._lineparser:
yield
line = line.rstrip()
if not line:
continue
try:
entry = Entry.from_str(line)
except ValueError as e:
log.init.warning("Invalid history entry {!r}: {}!".format(
line, e))
continue
# This de-duplicates history entries; only the latest
# entry for each URL is kept. If you want to keep
# information about previous hits change the items in
# old_urls to be lists or change Entry to have a
# list of atimes.
self._add_entry(entry)
self._initial_read_done = True
self.async_read_done.emit()
objreg.get('save-manager').add_saveable(
'history', self.save, self.changed)
for entry in self._temp_history:
self._add_entry(entry)
self._new_history.append(entry)
self._temp_history.clear()
def _add_entry(self, entry):
"""Add an entry to the in-memory database."""
self.insert([entry.url_str(), entry.title, entry.atime,
@ -193,15 +139,7 @@ class WebHistory(sql.SqlTable):
def get_recent(self):
"""Get the most recent history entries."""
old = self._lineparser.get_recent()
return old + [str(e) for e in self._new_history]
def save(self):
"""Save the history to disk."""
new = (str(e) for e in self._new_history[self._saved_count:])
self._lineparser.new_data = new
self._lineparser.save()
self._saved_count = len(self._new_history)
return self.select(sort_by='atime', sort_order='desc', limit=100)
@cmdutils.register(name='history-clear', instance='web-history')
def clear(self, force=False):
@ -221,11 +159,7 @@ class WebHistory(sql.SqlTable):
"history?")
def _do_clear(self):
self._lineparser.clear()
self.delete_all()
self._temp_history.clear()
self._new_history.clear()
self._saved_count = 0
self.cleared.emit()
@pyqtSlot(QUrl, QUrl, str)
@ -263,11 +197,7 @@ class WebHistory(sql.SqlTable):
if atime is None:
atime = time.time()
entry = Entry(atime, url, title, redirect=redirect)
if self._initial_read_done:
self._add_entry(entry)
self._new_history.append(entry)
else:
self._temp_history.append(entry)
def init(parent=None):
@ -276,8 +206,7 @@ def init(parent=None):
Args:
parent: The parent to use for WebHistory.
"""
history = WebHistory(hist_dir=standarddir.data(), hist_name='history',
parent=parent)
history = WebHistory(parent=parent)
objreg.register('web-history', history)
if objects.backend == usertypes.Backend.QtWebKit:

View File

@ -34,14 +34,13 @@ class SqlException(Exception):
pass
def init():
def init(db_path):
"""Initialize the SQL database connection."""
database = QSqlDatabase.addDatabase('QSQLITE')
# In-memory database, see https://sqlite.org/inmemorydb.html
database.setDatabaseName(':memory:')
database.setDatabaseName(db_path)
if not database.open():
raise SqlException("Failed to open in-memory sqlite database: {}"
.format(database.lastError().text()))
raise SqlException("Failed to open sqlite database at {}: {}"
.format(db_path, database.lastError().text()))
def close():
@ -103,8 +102,8 @@ class SqlTable(QObject):
super().__init__(parent)
self._name = name
self._primary_key = primary_key
run_query("CREATE TABLE {} ({}, PRIMARY KEY ({}))".format(
name, ','.join(fields), primary_key))
run_query("CREATE TABLE IF NOT EXISTS {} ({}, PRIMARY KEY ({}))"
.format(name, ','.join(fields), primary_key))
# pylint: disable=invalid-name
self.Entry = collections.namedtuple(name + '_Entry', fields)
@ -177,3 +176,11 @@ class SqlTable(QObject):
"""Remove all row from the table."""
run_query("DELETE FROM {}".format(self._name))
self.changed.emit()
def select(self, sort_by, sort_order, limit):
"""Remove all row from the table."""
result = run_query('SELECT * FROM {} ORDER BY {} {} LIMIT {}'
.format(self._name, sort_by, sort_order, limit))
while result.next():
rec = result.record()
yield self.Entry(*[rec.value(i) for i in range(rec.count())])

View File

@ -55,6 +55,8 @@ PERFECT_FILES = [
'browser/history.py'),
('tests/unit/browser/webkit/test_history.py',
'browser/webkit/webkithistory.py'),
('tests/unit/browser/webkit/test_history.py',
'browser/history.py'),
('tests/unit/browser/webkit/http/test_http.py',
'browser/webkit/http.py'),
('tests/unit/browser/webkit/http/test_content_disposition.py',

View File

@ -476,8 +476,9 @@ def short_tmpdir():
@pytest.fixture
def init_sql():
def init_sql(data_tmpdir):
"""Initialize the SQL module, and shut it down after the test."""
sql.init()
path = str(data_tmpdir / 'test.db')
sql.init(path)
yield
sql.close()

View File

@ -38,47 +38,17 @@ def prerequisites(config_stub, fake_save_manager, init_sql):
@pytest.fixture()
def hist(tmpdir):
return history.WebHistory(hist_dir=str(tmpdir), hist_name='history')
return history.WebHistory()
def test_register_saveable(monkeypatch, qtbot, tmpdir, caplog,
fake_save_manager):
(tmpdir / 'filled-history').write('12345 http://example.com/ title')
hist = history.WebHistory(hist_dir=str(tmpdir), hist_name='filled-history')
list(hist.async_read())
assert fake_save_manager.add_saveable.called
def test_async_read_twice(monkeypatch, qtbot, tmpdir, caplog):
(tmpdir / 'filled-history').write('\n'.join([
'12345 http://example.com/ title',
'67890 http://example.com/',
'12345 http://qutebrowser.org/ blah',
]))
hist = history.WebHistory(hist_dir=str(tmpdir), hist_name='filled-history')
next(hist.async_read())
with pytest.raises(StopIteration):
next(hist.async_read())
expected = "Ignoring async_read() because reading is started."
assert expected in (record.msg for record in caplog.records)
@pytest.mark.parametrize('redirect', [True, False])
def test_adding_item_during_async_read(qtbot, hist, redirect):
"""Check what happens when adding URL while reading the history."""
url = 'http://www.example.com/'
hist.add_url(QUrl(url), redirect=redirect, atime=12345)
with qtbot.waitSignal(hist.async_read_done):
list(hist.async_read())
assert not hist._temp_history
assert list(hist) == [(url, '', 12345, redirect)]
@pytest.fixture()
def mock_time(mocker):
m = mocker.patch('qutebrowser.browser.history.time')
m.time.return_value = 12345
return 12345
def test_iter(hist):
list(hist.async_read())
urlstr = 'http://www.example.com/'
url = QUrl(urlstr)
hist.add_url(url, atime=12345)
@ -88,7 +58,6 @@ def test_iter(hist):
def test_len(hist):
assert len(hist) == 0
list(hist.async_read())
url = QUrl('http://www.example.com/')
hist.add_url(url)
@ -96,101 +65,49 @@ def test_len(hist):
assert len(hist) == 1
@pytest.mark.parametrize('line', [
'12345 http://example.com/ title', # with title
'67890 http://example.com/', # no title
'12345 http://qutebrowser.org/ ', # trailing space
' ',
'',
])
def test_read(tmpdir, line):
(tmpdir / 'filled-history').write(line + '\n')
hist = history.WebHistory(hist_dir=str(tmpdir), hist_name='filled-history')
list(hist.async_read())
def test_updated_entries(tmpdir, hist):
hist.add_url(QUrl('http://example.com/'), atime=67890)
assert list(hist) == [('http://example.com/', '', 67890, False)]
def test_updated_entries(tmpdir):
(tmpdir / 'filled-history').write('12345 http://example.com/\n'
'67890 http://example.com/\n')
hist = history.WebHistory(hist_dir=str(tmpdir), hist_name='filled-history')
list(hist.async_read())
assert hist['http://example.com/'] == ('http://example.com/', '', 67890,
False)
hist.add_url(QUrl('http://example.com/'), atime=99999)
assert hist['http://example.com/'] == ('http://example.com/', '', 99999,
False)
assert list(hist) == [('http://example.com/', '', 99999, False)]
def test_invalid_read(tmpdir, caplog):
(tmpdir / 'filled-history').write('foobar\n12345 http://example.com/')
hist = history.WebHistory(hist_dir=str(tmpdir), hist_name='filled-history')
with caplog.at_level(logging.WARNING):
list(hist.async_read())
entries = list(hist)
assert len(entries) == 1
msg = "Invalid history entry 'foobar': 2 or 3 fields expected!"
assert msg in (rec.msg for rec in caplog.records)
def test_get_recent(tmpdir):
(tmpdir / 'filled-history').write('12345 http://example.com/')
hist = history.WebHistory(hist_dir=str(tmpdir), hist_name='filled-history')
list(hist.async_read())
def test_get_recent(hist):
hist.add_url(QUrl('http://www.qutebrowser.org/'), atime=67890)
lines = hist.get_recent()
expected = ['12345 http://example.com/',
'67890 http://www.qutebrowser.org/']
assert lines == expected
hist.add_url(QUrl('http://example.com/'), atime=12345)
assert list(hist.get_recent()) == [
('http://www.qutebrowser.org/', '', 67890 , False),
('http://example.com/', '', 12345, False),
]
def test_save(tmpdir):
hist_file = tmpdir / 'filled-history'
hist_file.write('12345 http://example.com/\n')
hist = history.WebHistory(hist_dir=str(tmpdir), hist_name='filled-history')
list(hist.async_read())
def test_save(tmpdir, hist):
hist.add_url(QUrl('http://example.com/'), atime=12345)
hist.add_url(QUrl('http://www.qutebrowser.org/'), atime=67890)
hist.save()
lines = hist_file.read().splitlines()
expected = ['12345 http://example.com/',
'67890 http://www.qutebrowser.org/']
assert lines == expected
hist.add_url(QUrl('http://www.the-compiler.org/'), atime=99999)
hist.save()
expected.append('99999 http://www.the-compiler.org/')
lines = hist_file.read().splitlines()
assert lines == expected
hist2 = history.WebHistory()
assert list(hist2) == [('http://example.com/', '', 12345, False),
('http://www.qutebrowser.org/', '', 67890, False)]
def test_clear(qtbot, tmpdir):
hist_file = tmpdir / 'filled-history'
hist_file.write('12345 http://example.com/\n')
def test_clear(qtbot, tmpdir, hist, mocker):
hist.add_url(QUrl('http://example.com/'))
hist.add_url(QUrl('http://www.qutebrowser.org/'))
hist = history.WebHistory(hist_dir=str(tmpdir), hist_name='filled-history')
list(hist.async_read())
m = mocker.patch('qutebrowser.browser.history.message.confirm_async')
hist.clear()
m.assert_called()
def test_clear_force(qtbot, tmpdir, hist):
hist.add_url(QUrl('http://example.com/'))
hist.add_url(QUrl('http://www.qutebrowser.org/'))
with qtbot.waitSignal(hist.cleared):
hist._do_clear()
hist.clear(force=True)
assert not hist_file.read()
assert not hist._new_history
hist.add_url(QUrl('http://www.the-compiler.org/'), atime=67890)
hist.save()
lines = hist_file.read().splitlines()
assert lines == ['67890 http://www.the-compiler.org/']
assert not len(hist)
@pytest.mark.parametrize('item', [
@ -198,21 +115,34 @@ def test_clear(qtbot, tmpdir):
('http://www.example.com', 12346, 'the title', True)
])
def test_add_item(qtbot, hist, item):
list(hist.async_read())
(url, atime, title, redirect) = item
hist.add_url(QUrl(url), atime=atime, title=title, redirect=redirect)
assert hist[url] == (url, title, atime, redirect)
def test_add_item_redirect_update(qtbot, tmpdir, fake_save_manager):
def test_add_item_invalid(qtbot, hist, caplog):
with caplog.at_level(logging.WARNING):
hist.add_url(QUrl())
assert not list(hist)
@pytest.mark.parametrize('level, url, req_url, expected', [
(logging.DEBUG, 'a.com', 'a.com', [('a.com', 'title', 12345, False)]),
(logging.DEBUG, 'a.com', 'b.com', [('a.com', 'title', 12345, False),
('b.com', 'title', 12345, True)]),
(logging.WARNING, 'a.com', '', [('a.com', 'title', 12345, False)]),
(logging.WARNING, '', '', []),
])
def test_add_from_tab(hist, level, url, req_url, expected, mock_time, caplog):
with caplog.at_level(level):
hist.add_from_tab(QUrl(url), QUrl(req_url), 'title')
assert set(list(hist)) == set(expected)
def test_add_item_redirect_update(qtbot, tmpdir, hist):
"""A redirect update added should override a non-redirect one."""
url = 'http://www.example.com/'
hist_file = tmpdir / 'filled-history'
hist_file.write('12345 {}\n'.format(url))
hist = history.WebHistory(hist_dir=str(tmpdir), hist_name='filled-history')
list(hist.async_read())
hist.add_url(QUrl(url), atime=5555)
hist.add_url(QUrl(url), redirect=True, atime=67890)
assert hist[url] == (url, '', 67890, True)

View File

@ -28,9 +28,8 @@ pytestmark = pytest.mark.usefixtures('init_sql')
def test_init():
sql.SqlTable('Foo', ['name', 'val', 'lucky'], primary_key='name')
with pytest.raises(sql.SqlException):
# table name collision on 'Foo'
sql.SqlTable('Foo', ['foo', 'bar'], primary_key='foo')
# should not error if table already exists
sql.SqlTable('Foo', ['name', 'val', 'lucky'], primary_key='name')
def test_insert(qtbot):
@ -54,6 +53,18 @@ def test_iter():
('thirteen', 13, True)]
@pytest.mark.parametrize('rows, sort_by, sort_order, limit, result', [
([[2, 5], [1, 6], [3, 4]], 'a', 'asc', 5, [(1, 6), (2, 5), (3, 4)]),
([[2, 5], [1, 6], [3, 4]], 'a', 'desc', 3, [(3, 4), (2, 5), (1, 6)]),
([[2, 5], [1, 6], [3, 4]], 'b', 'desc', 2, [(1, 6), (2, 5)])
])
def test_select(rows, sort_by, sort_order, limit, result):
table = sql.SqlTable('Foo', ['a', 'b'], primary_key='a')
for row in rows:
table.insert(row)
assert list(table.select(sort_by, sort_order, limit)) == result
def test_replace(qtbot):
table = sql.SqlTable('Foo', ['name', 'val', 'lucky'], primary_key='name')
table.insert(['one', 1, False])