Convert old history file to sqlite.

If qutebrowser detects a history text file when it starts
(~/.local/share/qutebrowser/history by default on Linux), it will import this
file into the new sqlite database, then delete it.

The read is done as a coroutine as it can take some time.
This commit is contained in:
Ryan Roden-Corrent 2017-03-29 08:59:16 -04:00
parent de5be0dc5a
commit 80647b062a
4 changed files with 135 additions and 119 deletions

View File

@ -158,6 +158,8 @@ def init(args, crash_handler):
QDesktopServices.setUrlHandler('https', open_desktopservices_url) QDesktopServices.setUrlHandler('https', open_desktopservices_url)
QDesktopServices.setUrlHandler('qute', open_desktopservices_url) QDesktopServices.setUrlHandler('qute', open_desktopservices_url)
_import_history()
log.init.debug("Init done!") log.init.debug("Init done!")
crash_handler.raise_crashdlg() crash_handler.raise_crashdlg()
@ -474,6 +476,27 @@ def _init_modules(args, crash_handler):
browsertab.init() browsertab.init()
def _import_history():
"""Import a history text file into sqlite if it exists.
In older versions of qutebrowser, history was stored in a text format.
This converts that file into the new sqlite format and removes it.
"""
path = os.path.join(standarddir.data(), 'history')
if not os.path.isfile(path):
return
def action():
with debug.log_time(log.init, 'Converting old history file to sqlite'):
objreg.get('web-history').read(path)
message.info('History import complete. Removing {}'.format(path))
os.remove(path)
# delay to give message time to appear before locking down for import
message.info('Converting {} to sqlite...'.format(path))
QTimer.singleShot(100, action)
class Quitter: class Quitter:
"""Utility class to quit/restart the QApplication. """Utility class to quit/restart the QApplication.

View File

@ -20,6 +20,7 @@
"""Simple history which gets written to disk.""" """Simple history which gets written to disk."""
import time import time
import os
from PyQt5.QtCore import pyqtSignal, pyqtSlot, QUrl from PyQt5.QtCore import pyqtSignal, pyqtSlot, QUrl
@ -70,37 +71,6 @@ class Entry:
"""Get the URL as a lossless string.""" """Get the URL as a lossless string."""
return self.url.toString(QUrl.FullyEncoded | QUrl.RemovePassword) return self.url.toString(QUrl.FullyEncoded | QUrl.RemovePassword)
@classmethod
def from_str(cls, line):
"""Parse a history line like '12345 http://example.com title'."""
data = line.split(maxsplit=2)
if len(data) == 2:
atime, url = data
title = ""
elif len(data) == 3:
atime, url, title = data
else:
raise ValueError("2 or 3 fields expected")
url = QUrl(url)
if not url.isValid():
raise ValueError("Invalid URL: {}".format(url.errorString()))
# https://github.com/qutebrowser/qutebrowser/issues/670
atime = atime.lstrip('\0')
if '-' in atime:
atime, flags = atime.split('-')
else:
flags = ''
if not set(flags).issubset('r'):
raise ValueError("Invalid flags {!r}".format(flags))
redirect = 'r' in flags
return cls(atime, url, title, redirect=redirect)
class WebHistory(sql.SqlTable): class WebHistory(sql.SqlTable):
@ -199,6 +169,48 @@ class WebHistory(sql.SqlTable):
entry = Entry(atime, url, title, redirect=redirect) entry = Entry(atime, url, title, redirect=redirect)
self._add_entry(entry) self._add_entry(entry)
def _parse_entry(self, line):
"""Parse a history line like '12345 http://example.com title'."""
data = line.split(maxsplit=2)
if len(data) == 2:
atime, url = data
title = ""
elif len(data) == 3:
atime, url, title = data
else:
raise ValueError("2 or 3 fields expected")
url = QUrl(url)
if not url.isValid():
raise ValueError("Invalid URL: {}".format(url.errorString()))
# https://github.com/qutebrowser/qutebrowser/issues/670
atime = atime.lstrip('\0')
if '-' in atime:
atime, flags = atime.split('-')
else:
flags = ''
if not set(flags).issubset('r'):
raise ValueError("Invalid flags {!r}".format(flags))
redirect = 'r' in flags
return (url, title, float(atime), bool(redirect))
def read(self, path):
"""Import a text file into the sql database."""
with open(path, 'r') as f:
rows = []
for line in f:
try:
row = self._parse_entry(line.strip())
rows.append(row)
except ValueError:
log.init.warning('Skipping history line {}'.format(line))
self.insert_batch(rows)
def init(parent=None): def init(parent=None):
"""Initialize the web history. """Initialize the web history.

View File

@ -55,16 +55,21 @@ def version():
return result.record().value(0) return result.record().value(0)
def _prepare_query(querystr):
log.sql.debug('Preparing SQL query: "{}"'.format(querystr))
database = QSqlDatabase.database()
query = QSqlQuery(database)
query.prepare(querystr)
return query
def run_query(querystr, values=None): def run_query(querystr, values=None):
"""Run the given SQL query string on the database. """Run the given SQL query string on the database.
Args: Args:
values: A list of positional parameter bindings. values: A list of positional parameter bindings.
""" """
log.sql.debug('Running SQL query: "{}"'.format(querystr)) query = _prepare_query(querystr)
database = QSqlDatabase.database()
query = QSqlQuery(database)
query.prepare(querystr)
for val in values or []: for val in values or []:
query.addBindValue(val) query.addBindValue(val)
log.sql.debug('Query bindings: {}'.format(query.boundValues())) log.sql.debug('Query bindings: {}'.format(query.boundValues()))
@ -74,6 +79,28 @@ def run_query(querystr, values=None):
return query return query
def run_batch(querystr, values):
"""Run the given SQL query string on the database in batch mode.
Args:
values: A list of lists, where each inner list contains positional
bindings for one run of the batch.
"""
query = _prepare_query(querystr)
transposed = [list(row) for row in zip(*values)]
for val in transposed:
query.addBindValue(val)
log.sql.debug('Batch Query bindings: {}'.format(query.boundValues()))
db = QSqlDatabase.database()
db.transaction()
if not query.execBatch():
raise SqlException('Failed to exec query "{}": "{}"'.format(
querystr, query.lastError().text()))
db.commit()
return query
class SqlTable(QObject): class SqlTable(QObject):
"""Interface to a sql table. """Interface to a sql table.
@ -102,8 +129,8 @@ class SqlTable(QObject):
super().__init__(parent) super().__init__(parent)
self._name = name self._name = name
self._primary_key = primary_key self._primary_key = primary_key
run_query("CREATE TABLE IF NOT EXISTS {} ({}, PRIMARY KEY ({}))" run_query("CREATE TABLE IF NOT EXISTS {} ({})"
.format(name, ','.join(fields), primary_key)) .format(name, ','.join(fields)))
# pylint: disable=invalid-name # pylint: disable=invalid-name
self.Entry = collections.namedtuple(name + '_Entry', fields) self.Entry = collections.namedtuple(name + '_Entry', fields)
@ -172,6 +199,19 @@ class SqlTable(QObject):
values) values)
self.changed.emit() self.changed.emit()
def insert_batch(self, rows, replace=False):
"""Performantly append multiple rows to the table.
Args:
rows: A list of lists, where each sub-list is a row.
replace: If true, allow inserting over an existing primary key.
"""
cmd = "REPLACE" if replace else "INSERT"
paramstr = ','.join(['?'] * len(rows[0]))
run_batch("{} INTO {} values({})".format(cmd, self._name, paramstr),
rows)
self.changed.emit()
def delete_all(self): def delete_all(self):
"""Remove all row from the table.""" """Remove all row from the table."""
run_query("DELETE FROM {}".format(self._name)) run_query("DELETE FROM {}".format(self._name))

View File

@ -148,88 +148,6 @@ def test_add_item_redirect_update(qtbot, tmpdir, hist):
assert hist[url] == (url, '', 67890, True) assert hist[url] == (url, '', 67890, True)
@pytest.mark.parametrize('line, expected', [
(
# old format without title
'12345 http://example.com/',
history.Entry(atime=12345, url=QUrl('http://example.com/'), title='',)
),
(
# trailing space without title
'12345 http://example.com/ ',
history.Entry(atime=12345, url=QUrl('http://example.com/'), title='',)
),
(
# new format with title
'12345 http://example.com/ this is a title',
history.Entry(atime=12345, url=QUrl('http://example.com/'),
title='this is a title')
),
(
# weird NUL bytes
'\x0012345 http://example.com/',
history.Entry(atime=12345, url=QUrl('http://example.com/'), title=''),
),
(
# redirect flag
'12345-r http://example.com/ this is a title',
history.Entry(atime=12345, url=QUrl('http://example.com/'),
title='this is a title', redirect=True)
),
])
def test_entry_parse_valid(line, expected):
entry = history.Entry.from_str(line)
assert entry == expected
@pytest.mark.parametrize('line', [
'12345', # one field
'12345 ::', # invalid URL
'xyz http://www.example.com/', # invalid timestamp
'12345-x http://www.example.com/', # invalid flags
'12345-r-r http://www.example.com/', # double flags
])
def test_entry_parse_invalid(line):
with pytest.raises(ValueError):
history.Entry.from_str(line)
@hypothesis.given(strategies.text())
def test_entry_parse_hypothesis(text):
"""Make sure parsing works or gives us ValueError."""
try:
history.Entry.from_str(text)
except ValueError:
pass
@pytest.mark.parametrize('entry, expected', [
# simple
(
history.Entry(12345, QUrl('http://example.com/'), "the title"),
"12345 http://example.com/ the title",
),
# timestamp as float
(
history.Entry(12345.678, QUrl('http://example.com/'), "the title"),
"12345 http://example.com/ the title",
),
# no title
(
history.Entry(12345.678, QUrl('http://example.com/'), ""),
"12345 http://example.com/",
),
# redirect flag
(
history.Entry(12345.678, QUrl('http://example.com/'), "",
redirect=True),
"12345-r http://example.com/",
),
])
def test_entry_str(entry, expected):
assert str(entry) == expected
@pytest.fixture @pytest.fixture
def hist_interface(): def hist_interface():
# pylint: disable=invalid-name # pylint: disable=invalid-name
@ -298,3 +216,26 @@ def test_init(backend, qapp, tmpdir, monkeypatch, cleanup_init):
# For this to work, nothing can ever have called setDefaultInterface # For this to work, nothing can ever have called setDefaultInterface
# before (so we need to test webengine before webkit) # before (so we need to test webengine before webkit)
assert default_interface is None assert default_interface is None
def test_read(hist, tmpdir, caplog):
histfile = tmpdir / 'history'
histfile.write('''12345 http://example.com/ title
12346 http://qutebrowser.org/
67890 http://example.com/path
xyz http://example.com/bad-timestamp
12345
http://example.com/no-timestamp
68891-r http://example.com/path/other
68891-r-r http://example.com/double-flag''')
with caplog.at_level(logging.WARNING):
hist.read(str(histfile))
assert list(hist) == [
('http://example.com/', 'title', 12345, False),
('http://qutebrowser.org/', '', 12346, False),
('http://example.com/path', '', 67890, False),
('http://example.com/path/other', '', 68891, True)
]