From c2197102a38c9eee128d57db406682b51448b1e0 Mon Sep 17 00:00:00 2001 From: Michal Siedlaczek Date: Sun, 6 Aug 2017 16:10:12 -0700 Subject: [PATCH] Enable spell checking and installing dictionaries for QtWebEngine --- MANIFEST.in | 1 + misc/lang_list | 42 ++++++ qutebrowser/browser/webengine/spell.py | 142 ++++++++++++++++++ .../browser/webengine/webenginesettings.py | 20 +++ scripts/install_dict.py | 66 ++++++++ tests/unit/browser/webengine/test_spell.py | 69 +++++++++ 6 files changed, 340 insertions(+) create mode 100644 misc/lang_list create mode 100644 qutebrowser/browser/webengine/spell.py create mode 100755 scripts/install_dict.py create mode 100644 tests/unit/browser/webengine/test_spell.py diff --git a/MANIFEST.in b/MANIFEST.in index ec906aaf4..d6f9e713a 100644 --- a/MANIFEST.in +++ b/MANIFEST.in @@ -18,6 +18,7 @@ include tox.ini include qutebrowser.py include misc/cheatsheet.svg include qutebrowser/config/configdata.yml +include misc/lang_list prune www prune scripts/dev diff --git a/misc/lang_list b/misc/lang_list new file mode 100644 index 000000000..ea5a1b17b --- /dev/null +++ b/misc/lang_list @@ -0,0 +1,42 @@ +af-ZA Afrikaans (South Africa) af-ZA-3-0.bdic +bg-BG Bulgarian (Bulgaria) bg-BG-3-0.bdic +ca-ES Catalan (Spain) ca-ES-3-0.bdic +cs-CZ Czech (Czech Republic) cs-CZ-3-0.bdic +da-DK Danish (Denmark) da-DK-3-0.bdic +de-DE German (Germany) de-DE-3-0.bdic +el-GR Greek (Greece) el-GR-3-0.bdic +en-CA English (Canada) en-CA-7-1.bdic +en-GB English (United Kingdom) en-GB-7-1.bdic +en-US English (United States) en-US-7-1.bdic +es-ES Spanish (Spain) es-ES-3-0.bdic +et-EE Estonian (Estonia) et-EE-3-0.bdic +fa-IR Farsi (Iran) fa-IR-7-0.bdic +fo-FO Faroese (Faroe Islands) fo-FO-3-0.bdic +fr-FR French (France) fr-FR-3-0.bdic +he-IL Hebrew (Israel) he-IL-3-0.bdic +hi-IN Hindi (India) hi-IN-3-0.bdic +hr-HR Croatian (Croatia) hr-HR-3-0.bdic +hu-HU Hungarian (Hungary) hu-HU-3-0.bdic +id-ID Indonesian (Indonesia) id-ID-3-0.bdic +it-IT Italian (Italy) it-IT-3-0.bdic +ko Korean ko-3-0.bdic +lt-LT Lithuanian (Lithuania) lt-LT-3-0.bdic +lv-LV Latvian (Latvia) lv-LV-3-0.bdic +nb-NO Norwegian (Norway) nb-NO-3-0.bdic +nl-NL Dutch (Netherlands) nl-NL-3-0.bdic +pl-PL Polish (Poland) pl-PL-3-0.bdic +pt-BR Portuguese (Brazil) pt-BR-3-0.bdic +pt-PT Portuguese (Portugal) pt-PT-3-0.bdic +ro-RO Romanian (Romania) ro-RO-3-0.bdic +ru-RU Russian (Russia) ru-RU-3-0.bdic +sh Serbo-Croatian sh-3-0.bdic +sk-SK Slovak (Slovakia) sk-SK-3-0.bdic +sl-SI Slovenian (Slovenia) sl-SI-3-0.bdic +sq Albanian sq-3-0.bdic +sr Serbian sr-3-0.bdic +sv-SE Swedish (Sweden) sv-SE-3-0.bdic +ta-IN Tamil (India) ta-IN-3-0.bdic +tg-TG Tajik (Tajikistan) tg-TG-5-0.bdic +tr-TR Turkish (Turkey) tr-TR-4-0.bdic +uk-UA Ukrainian (Ukraine) uk-UA-3-0.bdic +vi-VN Vietnamese (Viet Nam) vi-VN-3-0.bdic diff --git a/qutebrowser/browser/webengine/spell.py b/qutebrowser/browser/webengine/spell.py new file mode 100644 index 000000000..5a5df344b --- /dev/null +++ b/qutebrowser/browser/webengine/spell.py @@ -0,0 +1,142 @@ +# vim: ft=python fileencoding=utf-8 sts=4 sw=4 et + +# Copyright 2017 Michal Siedlaczek + +# This file is part of qutebrowser. +# +# qutebrowser is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# qutebrowser is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with qutebrowser. If not, see . + +"""Installing and configuring spell-checking for QtWebEngine.""" + +import os +from urllib.request import urlretrieve + +from PyQt5.QtCore import QLibraryInfo + + +class Language: + + """Dictionary language specs.""" + + def __init__(self, code, name, file): + self.code = code + self.name = name + self.file = file + + @staticmethod + def from_array(lang_array): + """Create Language object from an array. + + Args: + lang_array: an array of strings containing + the specs of the language in the following format: + [code, name, file] + """ + return Language.from_tuple(tuple(lang_array)) + + @staticmethod + def from_tuple(lang_tuple): + """Create Language object from a tuple. + + Args: + lang_tuple: a tuple of strings containing + the specs of the language in the following format: + (code, name, file) + """ + code, name, file = lang_tuple + return Language(code, name, file) + + @staticmethod + def from_tsv_string(tsv_string): + """Create Language object from a string in tab-separated values format. + + Args: + tsv_string: a string containing + the specs of the language in the following format: + "code name file" + """ + lang_array = tsv_string.split('\t') + return Language.from_array(lang_array) + + def __repr__(self): + return 'Language({}, {}, {})'.format(self.code, self.name, self.file) + + +def get_dictionary_dir(): + """Return the path to the QtWebEngine's dictionaries directory.""" + return QLibraryInfo.location(QLibraryInfo.DataPath) + \ + '/qtwebengine_dictionaries' + + +def get_language_list_file(): + """Return the path to the file with the list of all available languages.""" + root_dir = os.path.dirname(os.path.abspath(__file__)) + # TODO: not sure how to determine the following path 'the right way' + return os.path.join(root_dir, '../../../', 'misc', 'lang_list') + + +def get_available_languages(): + """Return a list of Language objects of all available languages.""" + with open(get_language_list_file(), 'r', encoding='UTF-8') as file: + return [Language.from_tsv_string(line[:-1]) for line in file] + + +def get_installed_languages(): + """Return a list of Language objects of all installed languages.""" + installed_files = [os.path.basename(file) + for file in os.listdir(get_dictionary_dir())] + all_languages = get_available_languages() + return filter_languages(all_languages, installed_files, + by=lambda lang: lang.file, + fail_on_unknown=False) + + +def filter_languages(languages, selected, by=lambda lang: lang.code, + fail_on_unknown=True): + """Filter a list of languages based on an inclusion list. + + Args: + languages: a list of languages to filter + selected: a list of keys to select + by: a function returning the selection key (code by default) + fail_on_unknown: whether to raise an error if there is an unknown + key in selected + """ + filtered_languages = [] + for language in languages: + if by(language) in selected: + filtered_languages.append(language) + selected.remove(by(language)) + if fail_on_unknown and selected: + unknown = ', '.join(selected) + raise ValueError('unknown languages found: {}'.format(unknown)) + return filtered_languages + + +def install(languages): + """Install languages.""" + repository_url = 'https://redirector.gvt1.com/edgedl/chrome/dict' + for lang in languages: + try: + print('Installing {}: {}'.format(lang.code, lang.name)) + lang_url = '{}/{}'.format(repository_url, lang.file) + if not os.path.isdir(get_dictionary_dir()): + print('WARN: {} does not exist, creating the directory'.format( + get_dictionary_dir())) + os.makedirs(get_dictionary_dir()) + print('Downloading {}'.format(lang_url)) + urlretrieve(lang_url, get_dictionary_dir() + '/' + lang.file) + print('Done.') + except PermissionError as e: + print(e) diff --git a/qutebrowser/browser/webengine/webenginesettings.py b/qutebrowser/browser/webengine/webenginesettings.py index 7b4ece0e8..8130ecbde 100644 --- a/qutebrowser/browser/webengine/webenginesettings.py +++ b/qutebrowser/browser/webengine/webenginesettings.py @@ -36,6 +36,7 @@ from PyQt5.QtWebEngineWidgets import (QWebEngineSettings, QWebEngineProfile, QWebEngineScript) from qutebrowser.browser import shared +from qutebrowser.browser.webengine.spell import get_installed_languages from qutebrowser.config import config, websettings from qutebrowser.utils import utils, standarddir, javascript, qtutils @@ -127,6 +128,22 @@ class PersistentCookiePolicy(DefaultProfileSetter): ) +class DictionaryLanguageSetter(DefaultProfileSetter): + + """Sets paths to dictionary files based on language codes.""" + + def __init__(self): + super().__init__('setSpellCheckLanguages') + + def _set(self, value, settings=None): + if settings is not None: + raise ValueError("'settings' may not be set with " + "DictionaryLanguageSetter!") + files = [lang.file[:-5] + for lang in get_installed_languages() if lang.code in value] + super()._set(files, settings) + + def _init_stylesheet(profile): """Initialize custom stylesheets. @@ -299,6 +316,9 @@ MAPPINGS = { 'scrolling.smooth': Attribute(QWebEngineSettings.ScrollAnimatorEnabled), + + 'spell': DefaultProfileSetter('setSpellCheckEnabled'), + 'spell-languages': DictionaryLanguageSetter() } try: diff --git a/scripts/install_dict.py b/scripts/install_dict.py new file mode 100755 index 000000000..8d45f2f6b --- /dev/null +++ b/scripts/install_dict.py @@ -0,0 +1,66 @@ +#!/usr/bin/env python3 +# vim: ft=python fileencoding=utf-8 sts=4 sw=4 et + +# Copyright 2017 Michal Siedlaczek + +# This file is part of qutebrowser. +# +# qutebrowser is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# qutebrowser is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with qutebrowser. If not, see . + +"""A script installing Hunspell dictionaries. + +Use: python -m scripts.install_dict [--list] [lang [lang [...]]] +""" + +import argparse +import sys + +from qutebrowser.browser.webengine import spell + + +def get_argparser(): + """Get the argparse parser.""" + desc = 'Install Hunspell dictionaries for QtWebEngine.' + parser = argparse.ArgumentParser(prog='install_dict', + description=desc) + parser.add_argument('-l', '--list', action='store_true', + help="Display the list of available languages.") + parser.add_argument('languages', nargs='*', + help="A list of languages to install.") + return parser + + +def print_list(languages): + for lang in languages: + print('{1}\t{0}'.format(lang.name, lang.code)) + + +def main(): + parser = get_argparser() + argv = sys.argv[1:] + args = parser.parse_args(argv) + languages = spell.get_available_languages() + if args.list: + print_list(languages) + elif not args.languages: + parser.print_usage() + else: + try: + spell.install(spell.filter_languages(languages, args.languages)) + except ValueError as e: + print(e) + + +if __name__ == '__main__': + main() diff --git a/tests/unit/browser/webengine/test_spell.py b/tests/unit/browser/webengine/test_spell.py new file mode 100644 index 000000000..d042a5991 --- /dev/null +++ b/tests/unit/browser/webengine/test_spell.py @@ -0,0 +1,69 @@ +# vim: ft=python fileencoding=utf-8 sts=4 sw=4 et: + +# Copyright 2017 Michal Siedlaczek + +# This file is part of qutebrowser. +# +# qutebrowser is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# qutebrowser is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with qutebrowser. If not, see . + + +from os.path import basename + +import pytest + +from qutebrowser.browser.webengine import spell + +AFRIKAANS = spell.Language('af-ZA', + 'Afrikaans (South Africa)', + 'af-ZA-3-0.bdic') +ENGLISH = spell.Language('en-US', + 'English (United States)', + 'en-US-7-1.bdic') +POLISH = spell.Language('pl-PL', + 'Polish (Poland)', + 'pl-PL-3-0.bdic') + +LANGUAGE_LIST = [AFRIKAANS, ENGLISH, POLISH] + + +def test_get_available_languages(): + language_list = spell.get_available_languages() + assert len(language_list) == 42 + first_lang = language_list[0] + assert (first_lang.code, first_lang.name, first_lang.file) ==\ + (AFRIKAANS.code, AFRIKAANS.name, AFRIKAANS.file) + + +def test_filter_languages(): + filtered_languages = spell.filter_languages(LANGUAGE_LIST, ['af-ZA']) + assert filtered_languages == [AFRIKAANS] + filtered_languages = spell.filter_languages(LANGUAGE_LIST, + ['pl-PL', 'en-US']) + assert filtered_languages == [ENGLISH, POLISH] + with pytest.raises(ValueError): + spell.filter_languages(LANGUAGE_LIST, ['pl-PL', 'en-GB']) + filtered_languages = spell.filter_languages(LANGUAGE_LIST, + ['pl-PL-3-0.bdic'], + by=lambda lang: lang.file) + assert filtered_languages == [POLISH] + + +def test_install(tmpdir, mocker): + mocker.patch('qutebrowser.browser.webengine.spell.get_dictionary_dir', + lambda: str(tmpdir)) + all_languages = spell.get_available_languages() + spell.install(all_languages) + installed_files = [basename(file) for file in tmpdir.listdir()] + expected_files = [lang.file for lang in all_languages] + assert sorted(installed_files) == sorted(expected_files)