305 lines
10 KiB
Python
305 lines
10 KiB
Python
# vim: ft=python fileencoding=utf-8 sts=4 sw=4 et:
|
|
|
|
# Copyright 2015-2016 Florian Bruhin (The Compiler) <mail@qutebrowser.org>
|
|
#
|
|
# This file is part of qutebrowser.
|
|
#
|
|
# qutebrowser is free software: you can redistribute it and/or modify
|
|
# it under the terms of the GNU General Public License as published by
|
|
# the Free Software Foundation, either version 3 of the License, or
|
|
# (at your option) any later version.
|
|
#
|
|
# qutebrowser is distributed in the hope that it will be useful,
|
|
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
# GNU General Public License for more details.
|
|
#
|
|
# You should have received a copy of the GNU General Public License
|
|
# along with qutebrowser. If not, see <http://www.gnu.org/licenses/>.
|
|
|
|
"""Simple history which gets written to disk."""
|
|
|
|
import time
|
|
import collections
|
|
|
|
from PyQt5.QtCore import pyqtSignal, QUrl, QObject
|
|
from PyQt5.QtWebKit import QWebHistoryInterface
|
|
|
|
from qutebrowser.commands import cmdutils
|
|
from qutebrowser.utils import utils, objreg, standarddir, log, qtutils
|
|
from qutebrowser.config import config
|
|
from qutebrowser.misc import lineparser
|
|
|
|
|
|
class Entry:
|
|
|
|
"""A single entry in the web history.
|
|
|
|
Attributes:
|
|
atime: The time the page was accessed.
|
|
url: The URL which was accessed as QUrl.
|
|
hidden: If True, don't save this entry to disk
|
|
"""
|
|
|
|
def __init__(self, atime, url, title, hidden=False):
|
|
self.atime = float(atime)
|
|
self.url = url
|
|
self.title = title
|
|
self.hidden = hidden
|
|
qtutils.ensure_valid(url)
|
|
|
|
def __repr__(self):
|
|
return utils.get_repr(self, constructor=True, atime=self.atime,
|
|
url=self.url_str(), title=self.title,
|
|
hidden=self.hidden)
|
|
|
|
def __str__(self):
|
|
elems = [str(int(self.atime)), self.url_str()]
|
|
if self.title:
|
|
elems.append(self.title)
|
|
return ' '.join(elems)
|
|
|
|
def __eq__(self, other):
|
|
return (self.atime == other.atime and
|
|
self.title == other.title and
|
|
self.url == other.url and
|
|
self.hidden == other.hidden)
|
|
|
|
def url_str(self):
|
|
"""Get the URL as a lossless string."""
|
|
return self.url.toString(QUrl.FullyEncoded | QUrl.RemovePassword)
|
|
|
|
@classmethod
|
|
def from_str(cls, line):
|
|
"""Parse a history line like '12345 http://example.com title'."""
|
|
data = line.split(maxsplit=2)
|
|
if len(data) == 2:
|
|
atime, url = data
|
|
title = ""
|
|
elif len(data) == 3:
|
|
atime, url, title = data
|
|
else:
|
|
raise ValueError("2 or 3 fields expected")
|
|
|
|
url = QUrl(url)
|
|
if not url.isValid():
|
|
raise ValueError("Invalid URL: {}".format(url.errorString()))
|
|
|
|
if atime.startswith('\0'):
|
|
log.init.debug(
|
|
"Removing NUL bytes from entry {!r} - see "
|
|
"https://github.com/The-Compiler/qutebrowser/issues/"
|
|
"670".format(data))
|
|
atime = atime.lstrip('\0')
|
|
return cls(atime, url, title)
|
|
|
|
|
|
class WebHistoryInterface(QWebHistoryInterface):
|
|
|
|
"""Glue code between WebHistory and Qt's QWebHistoryInterface.
|
|
|
|
Attributes:
|
|
_history: The WebHistory object.
|
|
"""
|
|
|
|
def __init__(self, webhistory, parent):
|
|
super().__init__(parent)
|
|
self._history = webhistory
|
|
|
|
def addHistoryEntry(self, url_string):
|
|
"""Required for a QWebHistoryInterface impl, obsoleted by add_url."""
|
|
pass
|
|
|
|
def historyContains(self, url_string):
|
|
"""Called by WebKit to determine if an URL is contained in the history.
|
|
|
|
Args:
|
|
url_string: The URL (as string) to check for.
|
|
|
|
Return:
|
|
True if the url is in the history, False otherwise.
|
|
"""
|
|
return url_string in self._history.history_dict
|
|
|
|
|
|
class WebHistory(QObject):
|
|
|
|
"""The global history of visited pages.
|
|
|
|
This is a little more complex as you'd expect so the history can be read
|
|
from disk async while new history is already arriving.
|
|
|
|
self.history_dict is the main place where the history is stored, in an
|
|
OrderedDict (sorted by time) of URL strings mapped to Entry objects.
|
|
|
|
While reading from disk is still ongoing, the history is saved in
|
|
self._temp_history instead, and then appended to self.history_dict once
|
|
that's fully populated.
|
|
|
|
All history which is new in this session (rather than read from disk from a
|
|
previous browsing session) is also stored in self._new_history.
|
|
self._saved_count tracks how many of those entries were already written to
|
|
disk, so we can always append to the existing data.
|
|
|
|
Attributes:
|
|
history_dict: An OrderedDict of URLs read from the on-disk history.
|
|
_hist_dir: The directory to store the history in
|
|
_lineparser: The AppendLineParser used to save the history.
|
|
_new_history: A list of Entry items of the current session.
|
|
_saved_count: How many HistoryEntries have been written to disk.
|
|
_initial_read_started: Whether async_read was called.
|
|
_initial_read_done: Whether async_read has completed.
|
|
_temp_history: OrderedDict of temporary history entries before
|
|
async_read was called.
|
|
|
|
Signals:
|
|
add_completion_item: Emitted before a new Entry is added.
|
|
Used to sync with the completion.
|
|
arg: The new Entry.
|
|
item_added: Emitted after a new Entry is added.
|
|
Used to tell the savemanager that the history is dirty.
|
|
arg: The new Entry.
|
|
cleared: Emitted after the history is cleared.
|
|
"""
|
|
|
|
add_completion_item = pyqtSignal(Entry)
|
|
item_added = pyqtSignal(Entry)
|
|
cleared = pyqtSignal()
|
|
async_read_done = pyqtSignal()
|
|
|
|
def __init__(self, hist_dir, hist_name, parent=None):
|
|
super().__init__(parent)
|
|
self._initial_read_started = False
|
|
self._initial_read_done = False
|
|
self._hist_dir = hist_dir
|
|
self._lineparser = lineparser.AppendLineParser(hist_dir, hist_name,
|
|
parent=self)
|
|
self.history_dict = collections.OrderedDict()
|
|
self._temp_history = collections.OrderedDict()
|
|
self._new_history = []
|
|
self._saved_count = 0
|
|
objreg.get('save-manager').add_saveable(
|
|
'history', self.save, self.item_added)
|
|
|
|
def __repr__(self):
|
|
return utils.get_repr(self, length=len(self))
|
|
|
|
def __iter__(self):
|
|
return iter(self.history_dict.values())
|
|
|
|
def __len__(self):
|
|
return len(self.history_dict)
|
|
|
|
def async_read(self):
|
|
"""Read the initial history."""
|
|
if self._initial_read_started:
|
|
log.init.debug("Ignoring async_read() because reading is started.")
|
|
return
|
|
self._initial_read_started = True
|
|
|
|
if self._hist_dir is None:
|
|
self._initial_read_done = True
|
|
self.async_read_done.emit()
|
|
assert not self._temp_history
|
|
return
|
|
|
|
with self._lineparser.open():
|
|
for line in self._lineparser:
|
|
yield
|
|
|
|
line = line.rstrip()
|
|
if not line:
|
|
continue
|
|
|
|
try:
|
|
entry = Entry.from_str(line)
|
|
except ValueError as e:
|
|
log.init.warning("Invalid history entry {!r}: {}!".format(
|
|
line, e))
|
|
continue
|
|
|
|
# This de-duplicates history entries; only the latest
|
|
# entry for each URL is kept. If you want to keep
|
|
# information about previous hits change the items in
|
|
# old_urls to be lists or change Entry to have a
|
|
# list of atimes.
|
|
self._add_entry(entry)
|
|
|
|
self._initial_read_done = True
|
|
self.async_read_done.emit()
|
|
|
|
for entry in self._temp_history.values():
|
|
self._add_entry(entry)
|
|
if not entry.hidden:
|
|
self._new_history.append(entry)
|
|
self.add_completion_item.emit(entry)
|
|
self._temp_history.clear()
|
|
|
|
def _add_entry(self, entry, target=None):
|
|
"""Add an entry to self.history_dict or another given OrderedDict."""
|
|
if target is None:
|
|
target = self.history_dict
|
|
url_str = entry.url_str()
|
|
target[url_str] = entry
|
|
target.move_to_end(url_str)
|
|
|
|
def get_recent(self):
|
|
"""Get the most recent history entries."""
|
|
old = self._lineparser.get_recent()
|
|
return old + [str(e) for e in self._new_history]
|
|
|
|
def save(self):
|
|
"""Save the history to disk."""
|
|
new = (str(e) for e in self._new_history[self._saved_count:])
|
|
self._lineparser.new_data = new
|
|
self._lineparser.save()
|
|
self._saved_count = len(self._new_history)
|
|
|
|
@cmdutils.register(name='history-clear', instance='web-history')
|
|
def clear(self):
|
|
"""Clear all browsing history.
|
|
|
|
Note this only clears the global history
|
|
(e.g. `~/.local/share/qutebrowser/history` on Linux) but not cookies,
|
|
the back/forward history of a tab, cache or other persistent data.
|
|
"""
|
|
self._lineparser.clear()
|
|
self.history_dict.clear()
|
|
self._temp_history.clear()
|
|
self._new_history.clear()
|
|
self._saved_count = 0
|
|
self.cleared.emit()
|
|
|
|
def add_url(self, url, title="", hidden=False):
|
|
"""Called by WebKit when an URL should be added to the history.
|
|
|
|
Args:
|
|
url: An url (as QUrl) to add to the history.
|
|
hidden: Whether to hide the entry from the on-disk history
|
|
"""
|
|
if config.get('general', 'private-browsing'):
|
|
return
|
|
entry = Entry(time.time(), url, title, hidden=hidden)
|
|
if self._initial_read_done:
|
|
self._add_entry(entry)
|
|
if not entry.hidden:
|
|
self.add_completion_item.emit(entry)
|
|
self._new_history.append(entry)
|
|
self.item_added.emit(entry)
|
|
else:
|
|
self._add_entry(entry, target=self._temp_history)
|
|
|
|
|
|
def init(parent=None):
|
|
"""Initialize the web history.
|
|
|
|
Args:
|
|
parent: The parent to use for WebHistory.
|
|
"""
|
|
history = WebHistory(hist_dir=standarddir.data(), hist_name='history',
|
|
parent=parent)
|
|
objreg.register('web-history', history)
|
|
|
|
interface = WebHistoryInterface(history, parent=history)
|
|
QWebHistoryInterface.setDefaultInterface(interface)
|