2014-10-11 23:32:50 +02:00
|
|
|
#!/usr/bin/env python3
|
2014-10-15 20:43:47 +02:00
|
|
|
# vim: ft=python fileencoding=utf-8 sts=4 sw=4 et:
|
2014-10-11 23:32:50 +02:00
|
|
|
|
2017-05-09 21:37:03 +02:00
|
|
|
# Copyright 2014-2017 Claude (longneck) <longneck@scratchbook.ch>
|
|
|
|
# Copyright 2014-2017 Florian Bruhin (The Compiler) <mail@qutebrowser.org>
|
2014-10-11 23:32:50 +02:00
|
|
|
|
|
|
|
# This file is part of qutebrowser.
|
|
|
|
#
|
|
|
|
# qutebrowser is free software: you can redistribute it and/or modify
|
|
|
|
# it under the terms of the GNU General Public License as published by
|
|
|
|
# the Free Software Foundation, either version 3 of the License, or
|
|
|
|
# (at your option) any later version.
|
|
|
|
#
|
|
|
|
# qutebrowser is distributed in the hope that it will be useful,
|
|
|
|
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
|
|
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
|
|
# GNU General Public License for more details.
|
|
|
|
#
|
|
|
|
# You should have received a copy of the GNU General Public License
|
|
|
|
# along with qutebrowser. If not, see <http://www.gnu.org/licenses/>.
|
2017-10-11 03:07:29 +02:00
|
|
|
|
|
|
|
|
2014-10-15 06:20:54 +02:00
|
|
|
"""Tool to import data from other browsers.
|
|
|
|
|
2016-03-20 10:49:33 +01:00
|
|
|
Currently only importing bookmarks from Netscape Bookmark files is supported.
|
2014-10-15 06:20:54 +02:00
|
|
|
"""
|
|
|
|
|
2017-10-11 03:07:29 +02:00
|
|
|
|
2014-10-15 06:16:12 +02:00
|
|
|
import argparse
|
2017-10-17 21:47:45 +02:00
|
|
|
import sys
|
2017-10-11 03:07:29 +02:00
|
|
|
|
2017-09-24 13:25:13 +02:00
|
|
|
browser_default_input_format = {
|
|
|
|
'chromium': 'netscape',
|
|
|
|
'ie': 'netscape',
|
|
|
|
'firefox': 'netscape',
|
|
|
|
'seamonkey': 'netscape',
|
|
|
|
'palemoon': 'netscape'
|
|
|
|
}
|
|
|
|
|
2014-10-11 23:32:50 +02:00
|
|
|
|
|
|
|
def main():
|
|
|
|
args = get_args()
|
2017-09-14 16:50:52 +02:00
|
|
|
bookmark_types = []
|
2017-10-11 03:07:29 +02:00
|
|
|
output_format = None
|
2017-09-24 13:25:13 +02:00
|
|
|
input_format = args.input_format
|
2017-10-11 03:07:29 +02:00
|
|
|
if args.import_search:
|
2017-09-14 16:50:52 +02:00
|
|
|
bookmark_types = ['search']
|
2017-09-23 00:27:36 +02:00
|
|
|
if args.oldconfig:
|
|
|
|
output_format = 'oldsearch'
|
2017-09-14 16:50:52 +02:00
|
|
|
else:
|
|
|
|
output_format = 'search'
|
|
|
|
else:
|
|
|
|
if args.bookmark_output:
|
|
|
|
output_format = 'bookmark'
|
|
|
|
elif args.quickmark_output:
|
|
|
|
output_format = 'quickmark'
|
2017-10-11 03:07:29 +02:00
|
|
|
if args.import_bookmarks:
|
2017-09-14 16:50:52 +02:00
|
|
|
bookmark_types.append('bookmark')
|
2017-10-11 03:07:29 +02:00
|
|
|
if args.import_keywords:
|
2017-09-14 16:50:52 +02:00
|
|
|
bookmark_types.append('keyword')
|
|
|
|
if not bookmark_types:
|
2017-09-15 10:37:16 +02:00
|
|
|
bookmark_types = ['bookmark', 'keyword']
|
2017-09-14 16:50:52 +02:00
|
|
|
if not output_format:
|
|
|
|
output_format = 'quickmark'
|
2017-09-24 13:25:13 +02:00
|
|
|
if not input_format:
|
2017-10-11 03:07:29 +02:00
|
|
|
if not args.browser:
|
2017-10-17 21:47:45 +02:00
|
|
|
sys.exit("Must specify either browser or input format")
|
2017-09-24 13:25:13 +02:00
|
|
|
input_format = browser_default_input_format[args.browser]
|
2017-09-14 16:50:52 +02:00
|
|
|
|
2017-09-24 13:25:13 +02:00
|
|
|
import_function = {'netscape': import_netscape_bookmarks}
|
2017-09-24 13:41:12 +02:00
|
|
|
import_function[input_format](args.bookmarks, bookmark_types,
|
|
|
|
output_format)
|
2014-10-11 23:32:50 +02:00
|
|
|
|
2014-10-15 06:16:12 +02:00
|
|
|
|
2014-10-11 23:32:50 +02:00
|
|
|
def get_args():
|
|
|
|
"""Get the argparse parser."""
|
2014-10-15 06:24:45 +02:00
|
|
|
parser = argparse.ArgumentParser(
|
2016-03-20 10:49:33 +01:00
|
|
|
epilog="To import bookmarks from Chromium, Firefox or IE, "
|
2017-09-24 19:02:44 +02:00
|
|
|
"export them to HTML in your browsers bookmark manager. ")
|
2017-09-15 10:37:16 +02:00
|
|
|
parser.add_argument(
|
|
|
|
'browser',
|
2017-09-24 19:02:44 +02:00
|
|
|
help="Which browser? {%(choices)s}",
|
2017-09-24 13:25:13 +02:00
|
|
|
choices=browser_default_input_format.keys(),
|
2017-09-25 13:27:12 +02:00
|
|
|
nargs='?',
|
2017-09-15 10:37:16 +02:00
|
|
|
metavar='browser')
|
2017-09-24 13:25:13 +02:00
|
|
|
parser.add_argument(
|
|
|
|
'-i',
|
|
|
|
'--input-format',
|
|
|
|
help='Which input format? (overrides browser default)',
|
|
|
|
choices=set(browser_default_input_format.values()),
|
|
|
|
required=False)
|
2017-09-15 10:37:16 +02:00
|
|
|
parser.add_argument(
|
|
|
|
'-b',
|
2017-10-11 03:07:29 +02:00
|
|
|
'--bookmark-output',
|
2017-09-15 10:37:16 +02:00
|
|
|
help="Output in bookmark format.",
|
|
|
|
action='store_true',
|
|
|
|
default=False,
|
|
|
|
required=False)
|
|
|
|
parser.add_argument(
|
|
|
|
'-q',
|
2017-10-11 03:07:29 +02:00
|
|
|
'--quickmark-output',
|
2017-09-15 10:37:16 +02:00
|
|
|
help="Output in quickmark format (default).",
|
|
|
|
action='store_true',
|
|
|
|
default=False,
|
|
|
|
required=False)
|
|
|
|
parser.add_argument(
|
|
|
|
'-s',
|
2017-10-11 03:07:29 +02:00
|
|
|
'--search-output',
|
|
|
|
help="Output config.py search engine format (negates -B and -K)",
|
2017-09-15 10:37:16 +02:00
|
|
|
action='store_true',
|
|
|
|
default=False,
|
|
|
|
required=False)
|
|
|
|
parser.add_argument(
|
2017-09-23 00:27:36 +02:00
|
|
|
'--oldconfig',
|
|
|
|
help="Output search engine format for old qutebrowser.conf format",
|
2017-09-15 10:37:16 +02:00
|
|
|
default=False,
|
|
|
|
action='store_true',
|
|
|
|
required=False)
|
|
|
|
parser.add_argument(
|
|
|
|
'-B',
|
2017-10-11 03:07:29 +02:00
|
|
|
'--import-bookmarks',
|
|
|
|
help="Import plain bookmarks (can be combiend with -K)",
|
2017-09-15 10:37:16 +02:00
|
|
|
action='store_true',
|
|
|
|
default=False,
|
|
|
|
required=False)
|
|
|
|
parser.add_argument(
|
|
|
|
'-K',
|
2017-10-11 03:07:29 +02:00
|
|
|
'--import-keywords',
|
|
|
|
help="Import keywords (can be combined with -B)",
|
2017-09-15 10:37:16 +02:00
|
|
|
action='store_true',
|
|
|
|
default=False,
|
|
|
|
required=False)
|
2016-03-25 21:01:21 +01:00
|
|
|
parser.add_argument('bookmarks', help="Bookmarks file (html format)")
|
2014-10-11 23:32:50 +02:00
|
|
|
args = parser.parse_args()
|
|
|
|
return args
|
|
|
|
|
2016-03-20 13:02:04 +01:00
|
|
|
|
2017-09-14 16:50:52 +02:00
|
|
|
def import_netscape_bookmarks(bookmarks_file, bookmark_types, output_format):
|
2016-03-20 13:02:04 +01:00
|
|
|
"""Import bookmarks from a NETSCAPE-Bookmark-file v1.
|
2014-10-15 06:16:12 +02:00
|
|
|
|
2016-03-20 12:08:26 +01:00
|
|
|
Generated by Chromium, Firefox, IE and possibly more browsers
|
2016-03-20 10:49:33 +01:00
|
|
|
"""
|
2014-10-15 06:17:00 +02:00
|
|
|
import bs4
|
2014-10-15 21:06:52 +02:00
|
|
|
with open(bookmarks_file, encoding='utf-8') as f:
|
Update to beautifulsoup 4.4.0.
Upstream changelog:
Especially important changes:
* Added a warning when you instantiate a BeautifulSoup object without
explicitly naming a parser. [bug=1398866]
* __repr__ now returns an ASCII bytestring in Python 2, and a Unicode
string in Python 3, instead of a UTF8-encoded bytestring in both
versions. In Python 3, __str__ now returns a Unicode string instead
of a bytestring. [bug=1420131]
* The `text` argument to the find_* methods is now called `string`,
which is more accurate. `text` still works, but `string` is the
argument described in the documentation. `text` may eventually
change its meaning, but not for a very long time. [bug=1366856]
* Changed the way soup objects work under copy.copy(). Copying a
NavigableString or a Tag will give you a new NavigableString that's
equal to the old one but not connected to the parse tree. Patch by
Martijn Peters. [bug=1307490]
* Started using a standard MIT license. [bug=1294662]
* Added a Chinese translation of the documentation by Delong .w.
New features:
* Introduced the select_one() method, which uses a CSS selector but
only returns the first match, instead of a list of
matches. [bug=1349367]
* You can now create a Tag object without specifying a
TreeBuilder. Patch by Martijn Pieters. [bug=1307471]
* You can now create a NavigableString or a subclass just by invoking
the constructor. [bug=1294315]
* Added an `exclude_encodings` argument to UnicodeDammit and to the
Beautiful Soup constructor, which lets you prohibit the detection of
an encoding that you know is wrong. [bug=1469408]
* The select() method now supports selector grouping. Patch by
Francisco Canas [bug=1191917]
Bug fixes:
* Fixed yet another problem that caused the html5lib tree builder to
create a disconnected parse tree. [bug=1237763]
* Force object_was_parsed() to keep the tree intact even when an element
from later in the document is moved into place. [bug=1430633]
* Fixed yet another bug that caused a disconnected tree when html5lib
copied an element from one part of the tree to another. [bug=1270611]
* Fixed a bug where Element.extract() could create an infinite loop in
the remaining tree.
* The select() method can now find tags whose names contain
dashes. Patch by Francisco Canas. [bug=1276211]
* The select() method can now find tags with attributes whose names
contain dashes. Patch by Marek Kapolka. [bug=1304007]
* Improved the lxml tree builder's handling of processing
instructions. [bug=1294645]
* Restored the helpful syntax error that happens when you try to
import the Python 2 edition of Beautiful Soup under Python
3. [bug=1213387]
* In Python 3.4 and above, set the new convert_charrefs argument to
the html.parser constructor to avoid a warning and future
failures. Patch by Stefano Revera. [bug=1375721]
* The warning when you pass in a filename or URL as markup will now be
displayed correctly even if the filename or URL is a Unicode
string. [bug=1268888]
* If the initial <html> tag contains a CDATA list attribute such as
'class', the html5lib tree builder will now turn its value into a
list, as it would with any other tag. [bug=1296481]
* Fixed an import error in Python 3.5 caused by the removal of the
HTMLParseError class. [bug=1420063]
* Improved docstring for encode_contents() and
decode_contents(). [bug=1441543]
* Fixed a crash in Unicode, Dammit's encoding detector when the name
of the encoding itself contained invalid bytes. [bug=1360913]
* Improved the exception raised when you call .unwrap() or
.replace_with() on an element that's not attached to a tree.
* Raise a NotImplementedError whenever an unsupported CSS pseudoclass
is used in select(). Previously some cases did not result in a
NotImplementedError.
* It's now possible to pickle a BeautifulSoup object no matter which
tree builder was used to create it. However, the only tree builder
that survives the pickling process is the HTMLParserTreeBuilder
('html.parser'). If you unpickle a BeautifulSoup object created with
some other tree builder, soup.builder will be None. [bug=1231545]
2015-07-06 10:47:49 +02:00
|
|
|
soup = bs4.BeautifulSoup(f, 'html.parser')
|
2017-09-14 16:50:52 +02:00
|
|
|
bookmark_query = {
|
2017-09-15 10:37:16 +02:00
|
|
|
'search': lambda tag: (
|
|
|
|
(tag.name == 'a') and
|
|
|
|
('shortcuturl' in tag.attrs) and
|
|
|
|
('%s' in tag['href'])),
|
|
|
|
'keyword': lambda tag: (
|
|
|
|
(tag.name == 'a') and
|
|
|
|
('shortcuturl' in tag.attrs) and
|
|
|
|
('%s' not in tag['href'])),
|
|
|
|
'bookmark': lambda tag: (
|
|
|
|
(tag.name == 'a') and
|
|
|
|
('shortcuturl' not in tag.attrs) and
|
|
|
|
(tag.string)),
|
2017-09-14 16:50:52 +02:00
|
|
|
}
|
|
|
|
output_template = {
|
2017-09-23 00:27:36 +02:00
|
|
|
'search': {
|
2017-09-15 10:37:16 +02:00
|
|
|
'search':
|
|
|
|
"c.url.searchengines['{tag[shortcuturl]}'] = "
|
|
|
|
"'{tag[href]}' #{tag.string}"
|
2017-09-14 16:50:52 +02:00
|
|
|
},
|
2017-09-23 00:27:36 +02:00
|
|
|
'oldsearch': {
|
2017-09-14 16:50:52 +02:00
|
|
|
'search': '{tag[shortcuturl]} = {tag[href]} #{tag.string}',
|
|
|
|
},
|
|
|
|
'bookmark': {
|
|
|
|
'bookmark': '{tag[href]} {tag.string}',
|
|
|
|
'keyword': '{tag[href]} {tag.string}'
|
|
|
|
},
|
|
|
|
'quickmark': {
|
|
|
|
'bookmark': '{tag.string} {tag[href]}',
|
|
|
|
'keyword': '{tag[shortcuturl]} {tag[href]}'
|
|
|
|
}
|
|
|
|
}
|
2014-10-11 23:32:50 +02:00
|
|
|
bookmarks = []
|
2017-09-14 16:50:52 +02:00
|
|
|
for typ in bookmark_types:
|
|
|
|
tags = soup.findAll(bookmark_query[typ])
|
|
|
|
for tag in tags:
|
2017-09-15 10:37:16 +02:00
|
|
|
if typ == 'search':
|
|
|
|
tag['href'] = tag['href'].replace('%s', '{}')
|
2017-09-14 16:50:52 +02:00
|
|
|
if tag['href'] not in bookmarks:
|
2017-09-15 10:37:16 +02:00
|
|
|
bookmarks.append(
|
|
|
|
output_template[output_format][typ].format(tag=tag))
|
2014-10-11 23:32:50 +02:00
|
|
|
for bookmark in bookmarks:
|
|
|
|
print(bookmark)
|
|
|
|
|
2014-10-15 06:16:12 +02:00
|
|
|
|
2014-10-11 23:32:50 +02:00
|
|
|
if __name__ == '__main__':
|
|
|
|
main()
|