Convert old history file to sqlite.

If qutebrowser detects a history text file when it starts
(~/.local/share/qutebrowser/history by default on Linux), it will import this
file into the new sqlite database, then delete it.

The read is done as a coroutine as it can take some time.
This commit is contained in:
Ryan Roden-Corrent 2017-03-29 08:59:16 -04:00
parent de5be0dc5a
commit 80647b062a
4 changed files with 135 additions and 119 deletions

View File

@ -158,6 +158,8 @@ def init(args, crash_handler):
QDesktopServices.setUrlHandler('https', open_desktopservices_url)
QDesktopServices.setUrlHandler('qute', open_desktopservices_url)
_import_history()
log.init.debug("Init done!")
crash_handler.raise_crashdlg()
@ -474,6 +476,27 @@ def _init_modules(args, crash_handler):
browsertab.init()
def _import_history():
"""Import a history text file into sqlite if it exists.
In older versions of qutebrowser, history was stored in a text format.
This converts that file into the new sqlite format and removes it.
"""
path = os.path.join(standarddir.data(), 'history')
if not os.path.isfile(path):
return
def action():
with debug.log_time(log.init, 'Converting old history file to sqlite'):
objreg.get('web-history').read(path)
message.info('History import complete. Removing {}'.format(path))
os.remove(path)
# delay to give message time to appear before locking down for import
message.info('Converting {} to sqlite...'.format(path))
QTimer.singleShot(100, action)
class Quitter:
"""Utility class to quit/restart the QApplication.

View File

@ -20,6 +20,7 @@
"""Simple history which gets written to disk."""
import time
import os
from PyQt5.QtCore import pyqtSignal, pyqtSlot, QUrl
@ -70,37 +71,6 @@ class Entry:
"""Get the URL as a lossless string."""
return self.url.toString(QUrl.FullyEncoded | QUrl.RemovePassword)
@classmethod
def from_str(cls, line):
"""Parse a history line like '12345 http://example.com title'."""
data = line.split(maxsplit=2)
if len(data) == 2:
atime, url = data
title = ""
elif len(data) == 3:
atime, url, title = data
else:
raise ValueError("2 or 3 fields expected")
url = QUrl(url)
if not url.isValid():
raise ValueError("Invalid URL: {}".format(url.errorString()))
# https://github.com/qutebrowser/qutebrowser/issues/670
atime = atime.lstrip('\0')
if '-' in atime:
atime, flags = atime.split('-')
else:
flags = ''
if not set(flags).issubset('r'):
raise ValueError("Invalid flags {!r}".format(flags))
redirect = 'r' in flags
return cls(atime, url, title, redirect=redirect)
class WebHistory(sql.SqlTable):
@ -199,6 +169,48 @@ class WebHistory(sql.SqlTable):
entry = Entry(atime, url, title, redirect=redirect)
self._add_entry(entry)
def _parse_entry(self, line):
"""Parse a history line like '12345 http://example.com title'."""
data = line.split(maxsplit=2)
if len(data) == 2:
atime, url = data
title = ""
elif len(data) == 3:
atime, url, title = data
else:
raise ValueError("2 or 3 fields expected")
url = QUrl(url)
if not url.isValid():
raise ValueError("Invalid URL: {}".format(url.errorString()))
# https://github.com/qutebrowser/qutebrowser/issues/670
atime = atime.lstrip('\0')
if '-' in atime:
atime, flags = atime.split('-')
else:
flags = ''
if not set(flags).issubset('r'):
raise ValueError("Invalid flags {!r}".format(flags))
redirect = 'r' in flags
return (url, title, float(atime), bool(redirect))
def read(self, path):
"""Import a text file into the sql database."""
with open(path, 'r') as f:
rows = []
for line in f:
try:
row = self._parse_entry(line.strip())
rows.append(row)
except ValueError:
log.init.warning('Skipping history line {}'.format(line))
self.insert_batch(rows)
def init(parent=None):
"""Initialize the web history.

View File

@ -55,16 +55,21 @@ def version():
return result.record().value(0)
def _prepare_query(querystr):
log.sql.debug('Preparing SQL query: "{}"'.format(querystr))
database = QSqlDatabase.database()
query = QSqlQuery(database)
query.prepare(querystr)
return query
def run_query(querystr, values=None):
"""Run the given SQL query string on the database.
Args:
values: A list of positional parameter bindings.
"""
log.sql.debug('Running SQL query: "{}"'.format(querystr))
database = QSqlDatabase.database()
query = QSqlQuery(database)
query.prepare(querystr)
query = _prepare_query(querystr)
for val in values or []:
query.addBindValue(val)
log.sql.debug('Query bindings: {}'.format(query.boundValues()))
@ -74,6 +79,28 @@ def run_query(querystr, values=None):
return query
def run_batch(querystr, values):
"""Run the given SQL query string on the database in batch mode.
Args:
values: A list of lists, where each inner list contains positional
bindings for one run of the batch.
"""
query = _prepare_query(querystr)
transposed = [list(row) for row in zip(*values)]
for val in transposed:
query.addBindValue(val)
log.sql.debug('Batch Query bindings: {}'.format(query.boundValues()))
db = QSqlDatabase.database()
db.transaction()
if not query.execBatch():
raise SqlException('Failed to exec query "{}": "{}"'.format(
querystr, query.lastError().text()))
db.commit()
return query
class SqlTable(QObject):
"""Interface to a sql table.
@ -102,8 +129,8 @@ class SqlTable(QObject):
super().__init__(parent)
self._name = name
self._primary_key = primary_key
run_query("CREATE TABLE IF NOT EXISTS {} ({}, PRIMARY KEY ({}))"
.format(name, ','.join(fields), primary_key))
run_query("CREATE TABLE IF NOT EXISTS {} ({})"
.format(name, ','.join(fields)))
# pylint: disable=invalid-name
self.Entry = collections.namedtuple(name + '_Entry', fields)
@ -172,6 +199,19 @@ class SqlTable(QObject):
values)
self.changed.emit()
def insert_batch(self, rows, replace=False):
"""Performantly append multiple rows to the table.
Args:
rows: A list of lists, where each sub-list is a row.
replace: If true, allow inserting over an existing primary key.
"""
cmd = "REPLACE" if replace else "INSERT"
paramstr = ','.join(['?'] * len(rows[0]))
run_batch("{} INTO {} values({})".format(cmd, self._name, paramstr),
rows)
self.changed.emit()
def delete_all(self):
"""Remove all row from the table."""
run_query("DELETE FROM {}".format(self._name))

View File

@ -148,88 +148,6 @@ def test_add_item_redirect_update(qtbot, tmpdir, hist):
assert hist[url] == (url, '', 67890, True)
@pytest.mark.parametrize('line, expected', [
(
# old format without title
'12345 http://example.com/',
history.Entry(atime=12345, url=QUrl('http://example.com/'), title='',)
),
(
# trailing space without title
'12345 http://example.com/ ',
history.Entry(atime=12345, url=QUrl('http://example.com/'), title='',)
),
(
# new format with title
'12345 http://example.com/ this is a title',
history.Entry(atime=12345, url=QUrl('http://example.com/'),
title='this is a title')
),
(
# weird NUL bytes
'\x0012345 http://example.com/',
history.Entry(atime=12345, url=QUrl('http://example.com/'), title=''),
),
(
# redirect flag
'12345-r http://example.com/ this is a title',
history.Entry(atime=12345, url=QUrl('http://example.com/'),
title='this is a title', redirect=True)
),
])
def test_entry_parse_valid(line, expected):
entry = history.Entry.from_str(line)
assert entry == expected
@pytest.mark.parametrize('line', [
'12345', # one field
'12345 ::', # invalid URL
'xyz http://www.example.com/', # invalid timestamp
'12345-x http://www.example.com/', # invalid flags
'12345-r-r http://www.example.com/', # double flags
])
def test_entry_parse_invalid(line):
with pytest.raises(ValueError):
history.Entry.from_str(line)
@hypothesis.given(strategies.text())
def test_entry_parse_hypothesis(text):
"""Make sure parsing works or gives us ValueError."""
try:
history.Entry.from_str(text)
except ValueError:
pass
@pytest.mark.parametrize('entry, expected', [
# simple
(
history.Entry(12345, QUrl('http://example.com/'), "the title"),
"12345 http://example.com/ the title",
),
# timestamp as float
(
history.Entry(12345.678, QUrl('http://example.com/'), "the title"),
"12345 http://example.com/ the title",
),
# no title
(
history.Entry(12345.678, QUrl('http://example.com/'), ""),
"12345 http://example.com/",
),
# redirect flag
(
history.Entry(12345.678, QUrl('http://example.com/'), "",
redirect=True),
"12345-r http://example.com/",
),
])
def test_entry_str(entry, expected):
assert str(entry) == expected
@pytest.fixture
def hist_interface():
# pylint: disable=invalid-name
@ -298,3 +216,26 @@ def test_init(backend, qapp, tmpdir, monkeypatch, cleanup_init):
# For this to work, nothing can ever have called setDefaultInterface
# before (so we need to test webengine before webkit)
assert default_interface is None
def test_read(hist, tmpdir, caplog):
histfile = tmpdir / 'history'
histfile.write('''12345 http://example.com/ title
12346 http://qutebrowser.org/
67890 http://example.com/path
xyz http://example.com/bad-timestamp
12345
http://example.com/no-timestamp
68891-r http://example.com/path/other
68891-r-r http://example.com/double-flag''')
with caplog.at_level(logging.WARNING):
hist.read(str(histfile))
assert list(hist) == [
('http://example.com/', 'title', 12345, False),
('http://qutebrowser.org/', '', 12346, False),
('http://example.com/path', '', 67890, False),
('http://example.com/path/other', '', 68891, True)
]