2017-10-02 06:26:47 +02:00
|
|
|
#!/usr/bin/env python3
|
|
|
|
# vim: ft=python fileencoding=utf-8 sts=4 sw=4 et:
|
|
|
|
|
2018-02-05 12:19:50 +01:00
|
|
|
# Copyright 2017-2018 Florian Bruhin (The Compiler) <mail@qutebrowser.org>
|
|
|
|
# Copyright 2017-2018 Josefson Souza <josefson.br@gmail.com>
|
2017-10-02 06:26:47 +02:00
|
|
|
|
2017-11-17 06:38:56 +01:00
|
|
|
# This file is part of qutebrowser.
|
|
|
|
#
|
2017-10-02 06:26:47 +02:00
|
|
|
# qutebrowser is free software: you can redistribute it and/or modify
|
|
|
|
# it under the terms of the GNU General Public License as published by
|
|
|
|
# the Free Software Foundation, either version 3 of the License, or
|
|
|
|
# (at your option) any later version.
|
|
|
|
#
|
|
|
|
# qutebrowser is distributed in the hope that it will be useful,
|
|
|
|
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
|
|
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
|
|
# GNU General Public License for more details.
|
|
|
|
#
|
|
|
|
# You should have received a copy of the GNU General Public License
|
|
|
|
# along with qutebrowser. If not, see <http://www.gnu.org/licenses/>.
|
|
|
|
|
|
|
|
|
2017-11-17 06:38:56 +01:00
|
|
|
"""Tool to import browser history from other browsers."""
|
2017-10-02 06:26:47 +02:00
|
|
|
|
|
|
|
|
|
|
|
import argparse
|
|
|
|
import sqlite3
|
|
|
|
import sys
|
2017-11-17 06:38:56 +01:00
|
|
|
import os
|
2017-10-02 06:26:47 +02:00
|
|
|
|
|
|
|
|
2017-12-15 16:03:34 +01:00
|
|
|
class Error(Exception):
|
|
|
|
|
2017-12-15 19:06:23 +01:00
|
|
|
"""Exception for errors in this module."""
|
|
|
|
|
2017-12-15 16:03:34 +01:00
|
|
|
|
2017-11-17 06:38:56 +01:00
|
|
|
def parse():
|
2017-10-02 06:26:47 +02:00
|
|
|
"""Parse command line arguments."""
|
2017-11-17 06:38:56 +01:00
|
|
|
description = ("This program is meant to extract browser history from your"
|
2017-12-04 17:08:56 +01:00
|
|
|
" previous browser and import them into qutebrowser.")
|
2017-12-06 07:45:52 +01:00
|
|
|
epilog = ("Databases:\n\n\tqutebrowser: Is named 'history.sqlite' and can "
|
|
|
|
"be found at your --basedir. In order to find where your "
|
|
|
|
"basedir is you can run ':open qute:version' inside qutebrowser."
|
|
|
|
"\n\n\tFirefox: Is named 'places.sqlite', and can be found at "
|
|
|
|
"your system's profile folder. Check this link for where it is "
|
2017-12-04 17:08:56 +01:00
|
|
|
"located: http://kb.mozillazine.org/Profile_folder"
|
|
|
|
"\n\n\tChrome: Is named 'History', and can be found at the "
|
|
|
|
"respective User Data Directory. Check this link for where it is"
|
|
|
|
"located: https://chromium.googlesource.com/chromium/src/+/"
|
|
|
|
"master/docs/user_data_dir.md\n\n"
|
2017-11-17 06:38:56 +01:00
|
|
|
"Example: hist_importer.py -b firefox -s /Firefox/Profile/"
|
|
|
|
"places.sqlite -d /qutebrowser/data/history.sqlite")
|
|
|
|
parser = argparse.ArgumentParser(
|
2017-10-02 06:26:47 +02:00
|
|
|
description=description, epilog=epilog,
|
|
|
|
formatter_class=argparse.RawTextHelpFormatter
|
|
|
|
)
|
2017-11-17 06:38:56 +01:00
|
|
|
parser.add_argument('-b', '--browser', dest='browser', required=True,
|
|
|
|
type=str, help='Browsers: {firefox, chrome}')
|
|
|
|
parser.add_argument('-s', '--source', dest='source', required=True,
|
|
|
|
type=str, help='Source: Full path to the sqlite data'
|
2017-10-02 06:26:47 +02:00
|
|
|
'base file from the source browser.')
|
2017-11-17 06:38:56 +01:00
|
|
|
parser.add_argument('-d', '--dest', dest='dest', required=True, type=str,
|
2017-12-04 17:08:56 +01:00
|
|
|
help='\nDestination: Full path to the qutebrowser '
|
2017-10-02 06:26:47 +02:00
|
|
|
'sqlite database')
|
2017-11-17 06:38:56 +01:00
|
|
|
return parser.parse_args()
|
2017-10-02 06:26:47 +02:00
|
|
|
|
|
|
|
|
2017-10-02 19:54:24 +02:00
|
|
|
def open_db(data_base):
|
2017-10-02 06:26:47 +02:00
|
|
|
"""Open connection with database."""
|
2017-11-17 06:38:56 +01:00
|
|
|
if os.path.isfile(data_base):
|
2017-12-15 16:03:34 +01:00
|
|
|
return sqlite3.connect(data_base)
|
|
|
|
raise Error('The file {} does not exist.'.format(data_base))
|
2017-10-02 06:26:47 +02:00
|
|
|
|
|
|
|
|
|
|
|
def extract(source, query):
|
2017-11-17 15:48:34 +01:00
|
|
|
"""Get records from source database.
|
|
|
|
|
|
|
|
Args:
|
|
|
|
source: File path to the source database where we want to extract the
|
|
|
|
data from.
|
|
|
|
query: The query string to be executed in order to retrieve relevant
|
|
|
|
attributes as (datetime, url, time) from the source database according
|
|
|
|
to the browser chosen.
|
|
|
|
"""
|
2017-10-02 06:26:47 +02:00
|
|
|
try:
|
|
|
|
conn = open_db(source)
|
|
|
|
cursor = conn.cursor()
|
|
|
|
cursor.execute(query)
|
|
|
|
history = cursor.fetchall()
|
|
|
|
conn.close()
|
|
|
|
return history
|
2017-11-17 06:38:56 +01:00
|
|
|
except sqlite3.OperationalError as op_e:
|
2017-12-15 16:03:34 +01:00
|
|
|
raise Error('Could not perform queries on the source database: '
|
|
|
|
'{}'.format(op_e))
|
2017-10-02 06:26:47 +02:00
|
|
|
|
|
|
|
|
|
|
|
def clean(history):
|
2017-11-17 06:38:56 +01:00
|
|
|
"""Clean up records from source database.
|
2017-11-17 15:48:34 +01:00
|
|
|
|
|
|
|
Receives a list of record and sanityze them in order for them to be
|
2018-01-03 04:06:29 +01:00
|
|
|
properly imported to qutebrowser. Sanitation requires adding a 4th
|
2017-11-17 15:48:34 +01:00
|
|
|
attribute 'redirect' which is filled with '0's, and also purging all
|
|
|
|
records that have a NULL/None datetime attribute.
|
|
|
|
|
|
|
|
Args:
|
|
|
|
history: List of records (datetime, url, title) from source database.
|
|
|
|
"""
|
2018-01-03 04:06:29 +01:00
|
|
|
# replace missing titles with an empty string
|
|
|
|
for index, record in enumerate(history):
|
|
|
|
if record[1] is None:
|
|
|
|
cleaned = list(record)
|
|
|
|
cleaned[1] = ''
|
|
|
|
history[index] = tuple(cleaned)
|
|
|
|
|
2017-12-28 02:00:02 +01:00
|
|
|
nulls = [record for record in history if None in record]
|
2018-01-03 04:06:29 +01:00
|
|
|
for null_record in nulls:
|
|
|
|
history.remove(null_record)
|
2017-10-03 07:55:31 +02:00
|
|
|
history = [list(record) for record in history]
|
|
|
|
for record in history:
|
|
|
|
record.append('0')
|
2017-10-02 06:26:47 +02:00
|
|
|
return history
|
|
|
|
|
|
|
|
|
|
|
|
def insert_qb(history, dest):
|
2017-11-17 15:48:34 +01:00
|
|
|
"""Insert history into dest database.
|
|
|
|
|
|
|
|
Args:
|
|
|
|
history: List of records.
|
|
|
|
dest: File path to the destination database, where history will be
|
|
|
|
inserted.
|
|
|
|
"""
|
2017-10-02 06:26:47 +02:00
|
|
|
conn = open_db(dest)
|
|
|
|
cursor = conn.cursor()
|
|
|
|
cursor.executemany(
|
2017-10-03 07:55:31 +02:00
|
|
|
'INSERT INTO History (url,title,atime,redirect) VALUES (?,?,?,?)',
|
2017-10-03 04:50:52 +02:00
|
|
|
history
|
|
|
|
)
|
2017-10-03 07:55:31 +02:00
|
|
|
cursor.execute('DROP TABLE CompletionHistory')
|
2017-10-02 06:26:47 +02:00
|
|
|
conn.commit()
|
|
|
|
conn.close()
|
|
|
|
|
|
|
|
|
2017-12-15 16:03:34 +01:00
|
|
|
def run():
|
2017-10-02 19:54:24 +02:00
|
|
|
"""Main control flux of the script."""
|
2017-11-17 06:38:56 +01:00
|
|
|
args = parse()
|
2017-10-02 06:26:47 +02:00
|
|
|
browser = args.browser.lower()
|
|
|
|
source, dest = args.source, args.dest
|
|
|
|
query = {
|
|
|
|
'firefox': 'select url,title,last_visit_date/1000000 as date '
|
2018-01-04 19:46:18 +01:00
|
|
|
'from moz_places where url like "http%" or url '
|
|
|
|
'like "ftp%" or url like "file://%"',
|
2017-10-02 06:26:47 +02:00
|
|
|
'chrome': 'select url,title,last_visit_time/10000000 as date '
|
2017-10-02 19:54:24 +02:00
|
|
|
'from urls',
|
2017-10-02 06:26:47 +02:00
|
|
|
}
|
|
|
|
if browser not in query:
|
2017-12-15 16:03:34 +01:00
|
|
|
raise Error('Sorry, the selected browser: "{}" is not '
|
|
|
|
'supported.'.format(browser))
|
2017-10-02 06:26:47 +02:00
|
|
|
else:
|
|
|
|
history = extract(source, query[browser])
|
|
|
|
history = clean(history)
|
|
|
|
insert_qb(history, dest)
|
|
|
|
|
|
|
|
|
2017-12-15 16:03:34 +01:00
|
|
|
def main():
|
|
|
|
try:
|
|
|
|
run()
|
|
|
|
except Error as e:
|
|
|
|
sys.exit(str(e))
|
|
|
|
|
|
|
|
|
2017-10-02 06:26:47 +02:00
|
|
|
if __name__ == "__main__":
|
|
|
|
main()
|