From 31bbc8c5b39abfe7f5d0a28df94b821d6055baa7 Mon Sep 17 00:00:00 2001 From: Ryan Farley Date: Thu, 14 Sep 2017 09:50:52 -0500 Subject: [PATCH] importer support for keywords and search engines This allows importer.py to process Netscape HTML exports from Firefox (and other Mozilla browsers) with three distinct types: * bookmarks (sans shortcuturl attribute) * keywords (bookmarks with a shortcuturl attribute) * searches (keywords with a URL containing a %s substitution) The first two can be combined at will in either quickmark or bookmark output formats, the only difference being that keywords will be used in place of titles when exporting to quickmark format. Searches are exported to qutebrowser.conf format, or the new config.py format. Dictionaries are used in the import function for readability's sake, but the command line arguments follow the same general formula of true-false flags used to select input bookmark types and the output format. --- scripts/importer.py | 87 +++++++++++++++++++++++++++++++++++++-------- 1 file changed, 73 insertions(+), 14 deletions(-) diff --git a/scripts/importer.py b/scripts/importer.py index 1b3be4d32..d140fa2a7 100755 --- a/scripts/importer.py +++ b/scripts/importer.py @@ -31,8 +31,30 @@ import argparse def main(): args = get_args() + bookmark_types = [] + output_format = '' + if args.search_query or args.search_output: + bookmark_types = ['search'] + if args.newconfig: + output_format = 'ncsearch' + else: + output_format = 'search' + else: + if args.bookmark_output: + output_format = 'bookmark' + elif args.quickmark_output: + output_format = 'quickmark' + if args.bookmark_query: + bookmark_types.append('bookmark') + if args.keyword_query: + bookmark_types.append('keyword') + if not bookmark_types: + bookmark_types = ['bookmark','keyword'] + if not output_format: + output_format = 'quickmark' + if args.browser in ['chromium', 'firefox', 'ie']: - import_netscape_bookmarks(args.bookmarks, args.bookmark_format) + import_netscape_bookmarks(args.bookmarks,bookmark_types,output_format) def get_args(): @@ -45,14 +67,31 @@ def get_args(): choices=['chromium', 'firefox', 'ie'], metavar='browser') parser.add_argument('-b', help="Output in bookmark format.", - dest='bookmark_format', action='store_true', + dest='bookmark_output', action='store_true', default=False, required=False) + parser.add_argument('-q', help="Output in quickmark format (default).", + dest='quickmark_output', action='store_true', + default=False,required=False) + parser.add_argument('-s', help="Output search engine format", + dest='search_output', action='store_true', + default=False,required=False) + parser.add_argument('--newconfig', help="Output search engine format for new config.py format", + default=False,action='store_true',required=False) + parser.add_argument('-S', help="Import search engines", + dest='search_query', action='store_true', + default=False,required=False) + parser.add_argument('-B', help="Import plain bookmarks (no keywords)", + dest='bookmark_query', action='store_true', + default=False,required=False) + parser.add_argument('-K', help="Import keywords (no search)", + dest='keyword_query', action='store_true', + default=False,required=False) parser.add_argument('bookmarks', help="Bookmarks file (html format)") args = parser.parse_args() return args -def import_netscape_bookmarks(bookmarks_file, is_bookmark_format): +def import_netscape_bookmarks(bookmarks_file, bookmark_types, output_format): """Import bookmarks from a NETSCAPE-Bookmark-file v1. Generated by Chromium, Firefox, IE and possibly more browsers @@ -60,18 +99,38 @@ def import_netscape_bookmarks(bookmarks_file, is_bookmark_format): import bs4 with open(bookmarks_file, encoding='utf-8') as f: soup = bs4.BeautifulSoup(f, 'html.parser') - - html_tags = soup.findAll('a') - if is_bookmark_format: - output_template = '{tag[href]} {tag.string}' - else: - output_template = '{tag.string} {tag[href]}' - + bookmark_query = { + 'search': + lambda tag: (tag.name == 'a') and ('shortcuturl' in tag.attrs) and ('%s' in tag['href']), + 'keyword': + lambda tag: (tag.name == 'a') and ('shortcuturl' in tag.attrs) and ('%s' not in tag['href']), + 'bookmark': + lambda tag: (tag.name == 'a') and ('shortcuturl' not in tag.attrs) and (tag.string) + } + output_template = { + 'ncsearch': { + 'search': "config.val.url.searchengines['{tag[shortcuturl]}'] = '{tag[href]}' #{tag.string}" + }, + 'search': { + 'search': '{tag[shortcuturl]} = {tag[href]} #{tag.string}', + }, + 'bookmark': { + 'bookmark': '{tag[href]} {tag.string}', + 'keyword': '{tag[href]} {tag.string}' + }, + 'quickmark': { + 'bookmark': '{tag.string} {tag[href]}', + 'keyword': '{tag[shortcuturl]} {tag[href]}' + } + } bookmarks = [] - for tag in html_tags: - if tag['href'] not in bookmarks: - bookmarks.append(output_template.format(tag=tag)) - + for typ in bookmark_types: + tags = soup.findAll(bookmark_query[typ]) + for tag in tags: + if typ=='search': + tag['href'] = tag['href'].replace('%s','{}') + if tag['href'] not in bookmarks: + bookmarks.append(output_template[output_format][typ].format(tag=tag)) for bookmark in bookmarks: print(bookmark)