importer: Chrome support

This adds Chrome/Chromium support to the importer (which ought to be the last of these). Bookmarks are read from JSON, while keywords/search engines (the same thing here) are read from the Web Data sqlite3 database, and converted from OpenSearch format. importer: add tests for opensearch
2017-11-06 00:51:41 -06:00 · 2017-11-06 00:51:41 -06:00 · f5d719dfd4
commit f5d719dfd4
parent 4d8ac7486c
2 changed files with 125 additions and 2 deletions
--- a/scripts/importer.py
+++ b/scripts/importer.py
@ -30,9 +30,13 @@ profiles is supported.
 import argparse
 import sqlite3
 import os
+import urllib.parse
+import json
+import string

 browser_default_input_format = {
-    'chromium': 'netscape',
+    'chromium': 'chrome',
+    'chrome': 'chrome',
    'ie': 'netscape',
    'firefox': 'mozilla',
    'seamonkey': 'mozilla',
@ -73,7 +77,8 @@ def main():

    import_function = {
        'netscape': import_netscape_bookmarks,
-        'mozilla': import_moz_places
+        'mozilla': import_moz_places,
+        'chrome': import_chrome
    }
    import_function[input_format](args.bookmarks, bookmark_types,
                                  output_format)
@ -154,6 +159,33 @@ def search_escape(url):
    return url.replace('{', '{{').replace('}', '}}')


+def opensearch_convert(url):
+    """Convert a basic OpenSearch URL into something Qutebrowser can use.
+
+    Exceptions:
+        KeyError:
+            An unknown and required parameter is present in the URL. This
+            usually means there's browser/addon specific functionality needed
+            to build the URL (I'm looking at you and your browser, Google) that
+            obviously won't be present here.
+    """
+    subst = {
+        'searchTerms': '%s',  # for proper escaping later
+        'language': '*',
+        'inputEncoding': 'UTF-8',
+        'outputEncoding': 'UTF-8'
+    }
+
+    # remove optional parameters (even those we don't support)
+    for param in string.Formatter().parse(url):
+        if param[1]:
+            if param[1].endswith('?'):
+                url = url.replace('{' + param[1] + '}', '')
+            elif param[2] and param[2].endswith('?'):
+                url = url.replace('{' + param[1] + ':' + param[2] + '}', '')
+    return search_escape(url.format(**subst)).replace('%s', '{}')
+
+
 def import_netscape_bookmarks(bookmarks_file, bookmark_types, output_format):
    """Import bookmarks from a NETSCAPE-Bookmark-file v1.

@ -268,5 +300,49 @@ def import_moz_places(profile, bookmark_types, output_format):
            print(out_template[output_format][typ].format(**row))


+def import_chrome(profile, bookmark_types, output_format):
+    """Import bookmarks and search keywords from Chrome-type profiles.
+
+    On Chrome, keywords and search engines are the same thing and handled in
+    their own database table; bookmarks cannot have associated keywords. This
+    is why the dictionary lookups here are much simpler.
+    """
+    out_template = {
+        'bookmark': '{url} {name}',
+        'quickmark': '{name} {url}',
+        'search': "c.url.searchengines['{keyword}'] = '{url}'",
+        'oldsearch': '{keyword} {url}'
+    }
+
+    if 'search' in bookmark_types:
+        webdata = sqlite3.connect(os.path.join(profile, 'Web Data'))
+        c = webdata.cursor()
+        c.execute('SELECT keyword,url FROM keywords;')
+        for keyword, url in c:
+            try:
+                url = opensearch_convert(url)
+                print(out_template[output_format].format(
+                    keyword=keyword, url=url))
+            except KeyError:
+                print('# Unsupported parameter in url for {}; skipping....'.
+                      format(keyword))
+
+    else:
+        with open(os.path.join(profile, 'Bookmarks'), encoding='utf-8') as f:
+            bookmarks = json.load(f)
+
+        def bm_tree_walk(bm, template):
+            assert 'type' in bm
+            if bm['type'] == 'url':
+                if urllib.parse.urlparse(bm['url']).scheme != 'chrome':
+                    print(template.format(**bm))
+            elif bm['type'] == 'folder':
+                for child in bm['children']:
+                    bm_tree_walk(child, template)
+
+        for root in bookmarks['roots'].values():
+            bm_tree_walk(root, out_template[output_format])
+
+
 if __name__ == '__main__':
    main()
--- a/tests/unit/scripts/test_importer.py
+++ b/tests/unit/scripts/test_importer.py
@ -0,0 +1,47 @@
+#!/usr/bin/env python3
+# vim: ft=python fileencoding=utf-8 sts=4 sw=4 et:
+
+# Copyright 2017 Florian Bruhin (The Compiler) <mail@qutebrowser.org>
+
+# This file is part of qutebrowser.
+#
+# qutebrowser is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# qutebrowser is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with qutebrowser.  If not, see <http://www.gnu.org/licenses/>.
+
+import pytest
+from scripts import importer
+
+
+def test_opensearch_convert():
+    urls = [
+        # simple search query
+        ('http://foo.bar/s?q={searchTerms}', 'http://foo.bar/s?q={}'),
+        # simple search query with supported additional parameter
+        ('http://foo.bar/s?q={searchTerms}&enc={inputEncoding}',
+         'http://foo.bar/s?q={}&enc=UTF-8'),
+        # same as above but with supported optional parameter
+        ('http://foo.bar/s?q={searchTerms}&enc={inputEncoding?}',
+         'http://foo.bar/s?q={}&enc='),
+        # unsupported-but-optional parameter
+        ('http://foo.bar/s?q={searchTerms}&opt={unsupported?}',
+         'http://foo.bar/s?q={}&opt='),
+        # unsupported-but-optional subset parameter
+        ('http://foo.bar/s?q={searchTerms}&opt={unsupported:unsupported?}',
+         'http://foo.bar/s?q={}&opt=')
+    ]
+    for os_url, qb_url in urls:
+        assert importer.opensearch_convert(os_url) == qb_url
+    # pass a required unsupported parameter
+    with pytest.raises(KeyError):
+        os_url = 'http://foo.bar/s?q={searchTerms}&req={unsupported}'
+        importer.opensearch_convert(os_url)