importer support for keywords and search engines

This allows importer.py to process Netscape HTML exports from Firefox
(and other Mozilla browsers) with three distinct types:
	* bookmarks (sans shortcuturl attribute)
	* keywords (bookmarks with a shortcuturl attribute)
	* searches (keywords with a URL containing a %s substitution)
The first two can be combined at will in either quickmark or bookmark
output formats, the only difference being that keywords will be used in
place of titles when exporting to quickmark format. Searches are
exported to qutebrowser.conf format, or the new config.py format.

Dictionaries are used in the import function for readability's sake, but
the command line arguments follow the same general formula of true-false
flags used to select input bookmark types and the output format.
This commit is contained in:
Ryan Farley 2017-09-14 09:50:52 -05:00
parent e90a5f509e
commit 31bbc8c5b3

View File

@ -31,8 +31,30 @@ import argparse
def main(): def main():
args = get_args() args = get_args()
bookmark_types = []
output_format = ''
if args.search_query or args.search_output:
bookmark_types = ['search']
if args.newconfig:
output_format = 'ncsearch'
else:
output_format = 'search'
else:
if args.bookmark_output:
output_format = 'bookmark'
elif args.quickmark_output:
output_format = 'quickmark'
if args.bookmark_query:
bookmark_types.append('bookmark')
if args.keyword_query:
bookmark_types.append('keyword')
if not bookmark_types:
bookmark_types = ['bookmark','keyword']
if not output_format:
output_format = 'quickmark'
if args.browser in ['chromium', 'firefox', 'ie']: if args.browser in ['chromium', 'firefox', 'ie']:
import_netscape_bookmarks(args.bookmarks, args.bookmark_format) import_netscape_bookmarks(args.bookmarks,bookmark_types,output_format)
def get_args(): def get_args():
@ -45,14 +67,31 @@ def get_args():
choices=['chromium', 'firefox', 'ie'], choices=['chromium', 'firefox', 'ie'],
metavar='browser') metavar='browser')
parser.add_argument('-b', help="Output in bookmark format.", parser.add_argument('-b', help="Output in bookmark format.",
dest='bookmark_format', action='store_true', dest='bookmark_output', action='store_true',
default=False, required=False) default=False, required=False)
parser.add_argument('-q', help="Output in quickmark format (default).",
dest='quickmark_output', action='store_true',
default=False,required=False)
parser.add_argument('-s', help="Output search engine format",
dest='search_output', action='store_true',
default=False,required=False)
parser.add_argument('--newconfig', help="Output search engine format for new config.py format",
default=False,action='store_true',required=False)
parser.add_argument('-S', help="Import search engines",
dest='search_query', action='store_true',
default=False,required=False)
parser.add_argument('-B', help="Import plain bookmarks (no keywords)",
dest='bookmark_query', action='store_true',
default=False,required=False)
parser.add_argument('-K', help="Import keywords (no search)",
dest='keyword_query', action='store_true',
default=False,required=False)
parser.add_argument('bookmarks', help="Bookmarks file (html format)") parser.add_argument('bookmarks', help="Bookmarks file (html format)")
args = parser.parse_args() args = parser.parse_args()
return args return args
def import_netscape_bookmarks(bookmarks_file, is_bookmark_format): def import_netscape_bookmarks(bookmarks_file, bookmark_types, output_format):
"""Import bookmarks from a NETSCAPE-Bookmark-file v1. """Import bookmarks from a NETSCAPE-Bookmark-file v1.
Generated by Chromium, Firefox, IE and possibly more browsers Generated by Chromium, Firefox, IE and possibly more browsers
@ -60,18 +99,38 @@ def import_netscape_bookmarks(bookmarks_file, is_bookmark_format):
import bs4 import bs4
with open(bookmarks_file, encoding='utf-8') as f: with open(bookmarks_file, encoding='utf-8') as f:
soup = bs4.BeautifulSoup(f, 'html.parser') soup = bs4.BeautifulSoup(f, 'html.parser')
bookmark_query = {
html_tags = soup.findAll('a') 'search':
if is_bookmark_format: lambda tag: (tag.name == 'a') and ('shortcuturl' in tag.attrs) and ('%s' in tag['href']),
output_template = '{tag[href]} {tag.string}' 'keyword':
else: lambda tag: (tag.name == 'a') and ('shortcuturl' in tag.attrs) and ('%s' not in tag['href']),
output_template = '{tag.string} {tag[href]}' 'bookmark':
lambda tag: (tag.name == 'a') and ('shortcuturl' not in tag.attrs) and (tag.string)
}
output_template = {
'ncsearch': {
'search': "config.val.url.searchengines['{tag[shortcuturl]}'] = '{tag[href]}' #{tag.string}"
},
'search': {
'search': '{tag[shortcuturl]} = {tag[href]} #{tag.string}',
},
'bookmark': {
'bookmark': '{tag[href]} {tag.string}',
'keyword': '{tag[href]} {tag.string}'
},
'quickmark': {
'bookmark': '{tag.string} {tag[href]}',
'keyword': '{tag[shortcuturl]} {tag[href]}'
}
}
bookmarks = [] bookmarks = []
for tag in html_tags: for typ in bookmark_types:
tags = soup.findAll(bookmark_query[typ])
for tag in tags:
if typ=='search':
tag['href'] = tag['href'].replace('%s','{}')
if tag['href'] not in bookmarks: if tag['href'] not in bookmarks:
bookmarks.append(output_template.format(tag=tag)) bookmarks.append(output_template[output_format][typ].format(tag=tag))
for bookmark in bookmarks: for bookmark in bookmarks:
print(bookmark) print(bookmark)