def enchant_sanity_test(language: str = '', word: str = '') -> bool:
    '''Checks whether python3-enchant returns some suggestions given a
    language and a word.

    :param language: The language of the dictionary to try
    :param word: The word to give to enchant to ask for suggestions

    This is used as a sanity check whether python3-enchant works at all.
    For example, if a Czech dictionary is opened like

        d = enchant.Dict('cs_CZ')

    and then something like

        retval = d.suggest('Praha')

    returns an empty list instead of a list of some words, then
    something is seriously wrong with python3-enchant and it is better
    to skip the test case which relies on python3-enchant working for
    that language.
    '''
    if not (language and word):
        return False
    if not itb_util.get_hunspell_dictionary_wordlist(language)[0]:
        return False
    d = enchant.Dict(language)
    if d.suggest(word):
        return True
    return False
示例#2
0
 def test_korean(self):
     if not itb_util.get_hunspell_dictionary_wordlist('ko_KR')[0]:
         # No Korean dictionary file could be found, skip this
         # test.  On some systems, like 'Arch' or 'FreeBSD', there
         # is no ko_KR.dic hunspell dictionary available, therefore
         # there is no way to run this test on these systems.
         # On systems where a Korean hunspell dictionary is available,
         # make sure it is installed to make this test case run.
         # In the ibus-typing-booster.spec file for Fedora,
         # I have a тАЬBuildRequires:  hunspell-koтАЭ for that purpose
         # to make sure this test runs when building the rpm package.
         return
     self.engine.set_current_imes(['ko-romaja'])
     self.engine.set_dictionary_names(['ko_KR'])
     self.engine.do_process_key_event(IBus.KEY_a, 0, 0)
     self.engine.do_process_key_event(IBus.KEY_n, 0, 0)
     self.engine.do_process_key_event(IBus.KEY_n, 0, 0)
     self.engine.do_process_key_event(IBus.KEY_y, 0, 0)
     self.engine.do_process_key_event(IBus.KEY_e, 0, 0)
     self.engine.do_process_key_event(IBus.KEY_o, 0, 0)
     self.engine.do_process_key_event(IBus.KEY_n, 0, 0)
     self.engine.do_process_key_event(IBus.KEY_g, 0, 0)
     self.engine.do_process_key_event(IBus.KEY_h, 0, 0)
     self.engine.do_process_key_event(IBus.KEY_a, 0, 0)
     self.engine.do_process_key_event(IBus.KEY_s, 0, 0)
     self.engine.do_process_key_event(IBus.KEY_e, 0, 0)
     self.engine.do_process_key_event(IBus.KEY_y, 0, 0)
     self.assertEqual(self.engine.mock_preedit_text, 'ьХИыЕХэХШьД╕ьЭ┤')
     candidates = [
         unicodedata.normalize('NFC', x[0]) for x in self.engine._candidates
     ]
     self.assertEqual(True, 'ьХИыЕХэХШьД╕ьЪФ' in candidates)
     self.engine.do_process_key_event(IBus.KEY_o, 0, 0)
     self.assertEqual(self.engine.mock_preedit_text, 'ьХИыЕХэХШьД╕ьЪФ')
     self.engine.do_process_key_event(IBus.KEY_space, 0, 0)
     self.assertEqual(self.engine.mock_preedit_text, '')
     self.assertEqual(self.engine.mock_committed_text, 'ьХИыЕХэХШьД╕ьЪФ ')
     self.engine.do_process_key_event(IBus.KEY_a, 0, 0)
     self.engine.do_process_key_event(IBus.KEY_n, 0, 0)
     self.engine.do_process_key_event(IBus.KEY_n, 0, 0)
     self.engine.do_process_key_event(IBus.KEY_y, 0, 0)
     self.engine.do_process_key_event(IBus.KEY_e, 0, 0)
     self.engine.do_process_key_event(IBus.KEY_o, 0, 0)
     self.engine.do_process_key_event(IBus.KEY_n, 0, 0)
     self.engine.do_process_key_event(IBus.KEY_g, 0, 0)
     self.engine.do_process_key_event(IBus.KEY_h, 0, 0)
     self.engine.do_process_key_event(IBus.KEY_a, 0, 0)
     self.engine.do_process_key_event(IBus.KEY_s, 0, 0)
     self.engine.do_process_key_event(IBus.KEY_e, 0, 0)
     self.engine.do_process_key_event(IBus.KEY_y, 0, 0)
     self.assertEqual(self.engine.mock_preedit_text, 'ьХИыЕХэХШьД╕ьЭ┤')
     candidates = [
         unicodedata.normalize('NFC', x[0]) for x in self.engine._candidates
     ]
     self.assertEqual(True, 'ьХИыЕХэХШьД╕ьЪФ' in candidates)
     self.assertEqual('ьХИыЕХэХШьД╕ьЪФ', candidates[0])
 def test_korean(self):
     if not itb_util.get_hunspell_dictionary_wordlist('ko_KR')[0]:
         # No Korean dictionary file could be found, skip this
         # test.  On some systems, like 'Arch' or 'FreeBSD', there
         # is no ko_KR.dic hunspell dictionary available, therefore
         # there is no way to run this test on these systems.
         # On systems where a Korean hunspell dictionary is available,
         # make sure it is installed to make this test case run.
         # In the ibus-typing-booster.spec file for Fedora,
         # I have a тАЬBuildRequires:  hunspell-koтАЭ for that purpose
         # to make sure this test runs when building the rpm package.
         return
     self.engine.set_current_imes(['ko-romaja'])
     self.engine.set_dictionary_names(['ko_KR'])
     self.engine.do_process_key_event(IBus.KEY_a, 0, 0)
     self.engine.do_process_key_event(IBus.KEY_n, 0, 0)
     self.engine.do_process_key_event(IBus.KEY_n, 0, 0)
     self.engine.do_process_key_event(IBus.KEY_y, 0, 0)
     self.engine.do_process_key_event(IBus.KEY_e, 0, 0)
     self.engine.do_process_key_event(IBus.KEY_o, 0, 0)
     self.engine.do_process_key_event(IBus.KEY_n, 0, 0)
     self.engine.do_process_key_event(IBus.KEY_g, 0, 0)
     self.engine.do_process_key_event(IBus.KEY_h, 0, 0)
     self.engine.do_process_key_event(IBus.KEY_a, 0, 0)
     self.engine.do_process_key_event(IBus.KEY_s, 0, 0)
     self.engine.do_process_key_event(IBus.KEY_e, 0, 0)
     self.engine.do_process_key_event(IBus.KEY_y, 0, 0)
     self.assertEqual(self.engine.mock_preedit_text, 'ьХИыЕХэХШьД╕ьЭ┤')
     candidates = [unicodedata.normalize('NFC', x[0])
                   for x in self.engine._candidates]
     self.assertEqual(True, 'ьХИыЕХэХШьД╕ьЪФ' in candidates)
     self.engine.do_process_key_event(IBus.KEY_o, 0, 0)
     self.assertEqual(self.engine.mock_preedit_text, 'ьХИыЕХэХШьД╕ьЪФ')
     self.engine.do_process_key_event(IBus.KEY_space, 0, 0)
     self.assertEqual(self.engine.mock_preedit_text, '')
     self.assertEqual(self.engine.mock_committed_text, 'ьХИыЕХэХШьД╕ьЪФ ')
     self.engine.do_process_key_event(IBus.KEY_a, 0, 0)
     self.engine.do_process_key_event(IBus.KEY_n, 0, 0)
     self.engine.do_process_key_event(IBus.KEY_n, 0, 0)
     self.engine.do_process_key_event(IBus.KEY_y, 0, 0)
     self.engine.do_process_key_event(IBus.KEY_e, 0, 0)
     self.engine.do_process_key_event(IBus.KEY_o, 0, 0)
     self.engine.do_process_key_event(IBus.KEY_n, 0, 0)
     self.engine.do_process_key_event(IBus.KEY_g, 0, 0)
     self.engine.do_process_key_event(IBus.KEY_h, 0, 0)
     self.engine.do_process_key_event(IBus.KEY_a, 0, 0)
     self.engine.do_process_key_event(IBus.KEY_s, 0, 0)
     self.engine.do_process_key_event(IBus.KEY_e, 0, 0)
     self.engine.do_process_key_event(IBus.KEY_y, 0, 0)
     self.assertEqual(self.engine.mock_preedit_text, 'ьХИыЕХэХШьД╕ьЭ┤')
     candidates = [unicodedata.normalize('NFC', x[0])
                   for x in self.engine._candidates]
     self.assertEqual(True, 'ьХИыЕХэХШьД╕ьЪФ' in candidates)
     self.assertEqual('ьХИыЕХэХШьД╕ьЪФ', candidates[0])
    def load_dictionary(self):
        '''Load a hunspell dictionary and instantiate a
        enchant.Dict() or a hunspell.Hunspell() object.

        '''
        if DEBUG_LEVEL > 0:
            sys.stderr.write("load_dictionary() ...\n")
        (self.dic_path,
         self.encoding,
         self.words) = itb_util.get_hunspell_dictionary_wordlist(self.name)
        if self.words:
            # List of languages where accent insensitive matching makes sense:
            accent_languages = (
                'af', 'ast', 'az', 'be', 'bg', 'br', 'bs', 'ca', 'cs', 'csb',
                'cv', 'cy', 'da', 'de', 'dsb', 'el', 'en', 'es', 'eu', 'fo',
                'fr', 'fur', 'fy', 'ga', 'gd', 'gl', 'grc', 'gv', 'haw', 'hr',
                'hsb', 'ht', 'hu', 'ia', 'is', 'it', 'kk', 'ku', 'ky', 'lb',
                'ln', 'lv', 'mg', 'mi', 'mk', 'mn', 'mos', 'mt', 'nb', 'nds',
                'nl', 'nn', 'nr', 'nso', 'ny', 'oc', 'pl', 'plt', 'pt', 'qu',
                'quh', 'ru', 'sc', 'se', 'sh', 'shs', 'sk', 'sl', 'smj', 'sq',
                'sr', 'ss', 'st', 'sv', 'tet', 'tk', 'tn', 'ts', 'uk', 'uz',
                've', 'vi', 'wa', 'xh',
            )
            if self.name.split('_')[0] in accent_languages:
                self.word_pairs = [
                    (x, itb_util.remove_accents(x))
                    for x in self.words
                ]
            for word in self.words:
                if len(word) > self.max_word_len:
                    self.max_word_len = len(word)
            if DEBUG_LEVEL > 1:
                sys.stderr.write(
                    'load_dictionary() max_word_len = %s\n'
                    % self.max_word_len)
            if IMPORT_ENCHANT_SUCCESSFUL:
                self.enchant_dict = enchant.Dict(self.name)
            elif IMPORT_HUNSPELL_SUCCESSFUL and self.dic_path:
                aff_path = self.dic_path.replace('.dic', '.aff')
                self.pyhunspell_object = hunspell.HunSpell(
                    self.dic_path, aff_path)
示例#5
0
    def load_dictionary(self):
        '''Load a hunspell dictionary and instantiate a
        enchant.Dict() or a hunspell.Hunspell() object.

        '''
        if DEBUG_LEVEL > 0:
            sys.stderr.write("load_dictionary() ...\n")
        (self.dic_path,
         self.encoding,
         self.words) = itb_util.get_hunspell_dictionary_wordlist(self.name)
        if self.words:
            # List of languages where accent insensitive matching makes sense:
            accent_languages = (
                'af', 'ast', 'az', 'be', 'bg', 'br', 'bs', 'ca', 'cs', 'csb',
                'cv', 'cy', 'da', 'de', 'dsb', 'el', 'en', 'es', 'eu', 'fo',
                'fr', 'fur', 'fy', 'ga', 'gd', 'gl', 'grc', 'gv', 'haw', 'hr',
                'hsb', 'ht', 'hu', 'ia', 'is', 'it', 'kk', 'ku', 'ky', 'lb',
                'ln', 'lv', 'mg', 'mi', 'mk', 'mn', 'mos', 'mt', 'nb', 'nds',
                'nl', 'nn', 'nr', 'nso', 'ny', 'oc', 'pl', 'plt', 'pt', 'qu',
                'quh', 'ru', 'sc', 'se', 'sh', 'shs', 'sk', 'sl', 'smj', 'sq',
                'sr', 'ss', 'st', 'sv', 'tet', 'tk', 'tn', 'ts', 'uk', 'uz',
                've', 'vi', 'wa', 'xh',
            )
            if self.name.split('_')[0] in accent_languages:
                self.word_pairs = [
                    (x, itb_util.remove_accents(x))
                    for x in self.words
                ]
            for x in self.words:
                if len(x) > self.max_word_len:
                    self.max_word_len = len(x)
            if DEBUG_LEVEL > 1:
                sys.stderr.write(
                    'load_dictionary() max_word_len = %s\n'
                    % self.max_word_len)
            if IMPORT_ENCHANT_SUCCESSFUL:
                self.enchant_dict = enchant.Dict(self.name)
            elif IMPORT_HUNSPELL_SUCCESSFUL and self.dic_path:
                aff_path = self.dic_path.replace('.dic', '.aff')
                self.pyhunspell_object = hunspell.HunSpell(self.dic_path, aff_path)
示例#6
0
    def load_dictionary(self):
        '''Load a hunspell dictionary and instantiate a
        enchant.Dict() or a hunspell.Hunspell() object.

        '''
        if DEBUG_LEVEL > 0:
            LOGGER.debug('load_dictionary() ...\n')
        (self.dic_path, self.encoding,
         self.words) = itb_util.get_hunspell_dictionary_wordlist(self.name)
        if self.words:
            # List of languages where accent insensitive matching makes sense:
            accent_languages = (
                'af',
                'ast',
                'az',
                'be',
                'bg',
                'br',
                'bs',
                'ca',
                'cs',
                'csb',
                'cv',
                'cy',
                'da',
                'de',
                'dsb',
                'el',
                'en',
                'es',
                'eu',
                'fi',
                'fo',
                'fr',
                'fur',
                'fy',
                'ga',
                'gd',
                'gl',
                'grc',
                'gv',
                'haw',
                'hr',
                'hsb',
                'ht',
                'hu',
                'ia',
                'is',
                'it',
                'kk',
                'ku',
                'ky',
                'lb',
                'ln',
                'lv',
                'mg',
                'mi',
                'mk',
                'mn',
                'mos',
                'mt',
                'nb',
                'nds',
                'nl',
                'nn',
                'nr',
                'nso',
                'ny',
                'oc',
                'pl',
                'plt',
                'pt',
                'qu',
                'quh',
                'ru',
                'sc',
                'se',
                'sh',
                'shs',
                'sk',
                'sl',
                'smj',
                'sq',
                'sr',
                'ss',
                'st',
                'sv',
                'tet',
                'tk',
                'tn',
                'ts',
                'uk',
                'uz',
                've',
                'vi',
                'wa',
                'xh',
            )
            if self.name.split('_')[0] in accent_languages:
                self.word_pairs = [(x, itb_util.remove_accents(x))
                                   for x in self.words]
            for word in self.words:
                if len(word) > self.max_word_len:
                    self.max_word_len = len(word)
            if DEBUG_LEVEL > 1:
                LOGGER.debug('max_word_len = %s\n', self.max_word_len)
            if self.name.split('_')[0] == 'fi':
                self.enchant_dict = None
                self.pyhunspell_object = None
                if IMPORT_LIBVOIKKO_SUCCESSFUL:
                    self.voikko = libvoikko.Voikko('fi')
                return
            if IMPORT_ENCHANT_SUCCESSFUL:
                try:
                    self.enchant_dict = enchant.Dict(self.name)
                except enchant.errors.DictNotFoundError:
                    LOGGER.exception('Error initializing enchant for %s',
                                     self.name)
                    self.enchant_dict = None
                except Exception:
                    LOGGER.exception(
                        'Unknown error initializing enchant for %s', self.name)
                    self.enchant_dict = None
            elif IMPORT_HUNSPELL_SUCCESSFUL and self.dic_path:
                aff_path = self.dic_path.replace('.dic', '.aff')
                try:
                    self.pyhunspell_object = hunspell.HunSpell(
                        self.dic_path, aff_path)
                except hunspell.HunSpellError:
                    LOGGER.debug('Error initializing hunspell for %s',
                                 self.name)
                    self.pyhunspell_object = None
                except Exception:
                    LOGGER.debug('Unknown error initializing hunspell for %s',
                                 self.name)
                    self.pyhunspell_object = None
class HunspellSuggestTestCase(unittest.TestCase):
    def setUp(self):
        self.maxDiff = None

    def tearDown(self):
        pass

    def test_dummy(self):
        self.assertEqual(True, True)

    @unittest.skipUnless(
        IMPORT_ENCHANT_SUCCESSFUL,
        "Skipping because this test requires python3-enchant to work.")
    def test_de_DE_cs_CZ_enchant(self):
        h = hunspell_suggest.Hunspell(['de_DE', 'cs_CZ'])
        self.assertEqual(
            h.suggest('Geschwindigkeitsubertre')[0],
            ('Geschwindigkeitsu\u0308bertretungsverfahren', 0))
        self.assertEqual(
            h.suggest('Geschwindigkeitsübertretungsverfahren')[0],
            ('Geschwindigkeitsu\u0308bertretungsverfahren', 0))
        self.assertEqual(
            h.suggest('Glühwürmchen')[0], ('Glu\u0308hwu\u0308rmchen', 0))
        self.assertEqual(h.suggest('Alpengluhen')[0], ('Alpenglu\u0308hen', 0))
        self.assertEqual(h.suggest('filosofictejsi'),
                         [('filosofic\u030Cte\u030Cjs\u030Ci\u0301', 0),
                          ('filosofic\u030Cte\u030Cji', -1)])
        self.assertEqual(
            h.suggest('filosofictejs')[0],
            ('filosofic\u030Cte\u030Cjs\u030Ci\u0301', 0))
        self.assertEqual(
            h.suggest('filosofičtější')[0],
            ('filosofic\u030Cte\u030Cjs\u030Ci\u0301', 0))
        self.assertEqual(
            h.suggest('filosofičtějš')[0],
            ('filosofic\u030Cte\u030Cjs\u030Ci\u0301', 0))

    @unittest.skipUnless(
        IMPORT_HUNSPELL_SUCCESSFUL and not IMPORT_ENCHANT_SUCCESSFUL,
        "Skipping because this test requires python3-pyhunspell to work.")
    def test_de_DE_cs_CZ_pyhunspell(self):
        h = hunspell_suggest.Hunspell(['de_DE', 'cs_CZ'])
        self.assertEqual(
            h.suggest('Geschwindigkeitsubertre')[0],
            ('Geschwindigkeitsu\u0308bertretungsverfahren', 0))
        self.assertEqual(
            h.suggest('Geschwindigkeitsübertretungsverfahren')[0],
            ('Geschwindigkeitsu\u0308bertretungsverfahren', 0))
        self.assertEqual(
            h.suggest('Glühwürmchen')[0], ('Glu\u0308hwu\u0308rmchen', 0))
        self.assertEqual(h.suggest('Alpengluhen')[0], ('Alpenglu\u0308hen', 0))
        self.assertEqual(h.suggest('filosofictejsi'),
                         [('filosofic\u030Cte\u030Cjs\u030Ci\u0301', 0),
                          ('filosofie\u0300ti\u0300ji', -1)])
        self.assertEqual(
            h.suggest('filosofictejs')[0],
            ('filosofic\u030Cte\u030Cjs\u030Ci\u0301', 0))
        self.assertEqual(
            h.suggest('filosofičtější')[0],
            ('filosofic\u030Cte\u030Cjs\u030Ci\u0301', 0))
        self.assertEqual(
            h.suggest('filosofičtějš')[0],
            ('filosofic\u030Cte\u030Cjs\u030Ci\u0301', 0))

    def test_it_IT(self):
        h = hunspell_suggest.Hunspell(['it_IT'])
        self.assertEqual(h.suggest('principianti'), [('principianti', 0),
                                                     ('principiati', -1),
                                                     ('principiante', -1),
                                                     ('principiarti', -1),
                                                     ('principiasti', -1)])

    def test_es_ES(self):
        h = hunspell_suggest.Hunspell(['es_ES'])
        self.assertEqual(h.suggest('teneis'), [('tene\u0301is', 0),
                                               ('tenes', -1), ('tenis', -1),
                                               ('teneos', -1), ('tienes', -1),
                                               ('te neis', -1),
                                               ('te-neis', -1)])
        self.assertEqual(h.suggest('tenéis')[0], ('tene\u0301is', 0))

    def test_en_US(self):
        h = hunspell_suggest.Hunspell(['en_US'])
        self.assertEqual(h.suggest('camel'), [('camel', 0), ('camellia', 0),
                                              ('camelhair', 0), ('came', -1),
                                              ('Camel', -1), ('cameo', -1),
                                              ('came l', -1), ('camels', -1)])

    def test_fr_FR(self):
        h = hunspell_suggest.Hunspell(['fr_FR'])
        self.assertEqual(h.suggest('differemmen'), [('diffe\u0301remment', 0)])

    @unittest.skipUnless(
        itb_util.get_hunspell_dictionary_wordlist('el_GR')[0],
        "Skipping because no Greek dictionary could be found. ")
    def test_el_GR(self):
        h = hunspell_suggest.Hunspell(['el_GR'])
        self.assertEqual(h.suggest('αλφαβητο')[0], ('αλφάβητο', 0))

    def test_fi_FI_dictionary_file(self):
        # dictionary file is included in ibus-typing-booster
        h = hunspell_suggest.Hunspell(['fi_FI'])
        self.assertEqual(h.suggest('kissa'), [('kissa', 0), ('kissaa', 0),
                                              ('kissani', 0), ('kissassa', 0),
                                              ('kissajuttu', 0),
                                              ('kissamaiseksi', 0)])
        self.assertEqual(h.suggest('Pariisin-suurlahettila'),
                         [('Pariisin-suurla\u0308hettila\u0308s', 0)])

    @unittest.skipUnless(
        IMPORT_LIBVOIKKO_SUCCESSFUL,
        "Skipping because this test requires python3-libvoikko to work.")
    def test_fi_FI_voikko(self):
        d = hunspell_suggest.Dictionary('fi_FI')
        self.assertEqual(d.has_spellchecking(), True)
        h = hunspell_suggest.Hunspell(['fi_FI'])
        self.assertEqual(h.suggest('kisssa'), [('kissa', -1), ('kissaa', -1),
                                               ('kisassa', -1),
                                               ('kisussa', -1)])

    @unittest.skipUnless(
        IMPORT_ENCHANT_SUCCESSFUL,
        "Skipping because this test requires python3-enchant to work.")
    def test_en_US_spellcheck_enchant(self):
        d = hunspell_suggest.Dictionary('en_US')
        self.assertEqual(d.spellcheck_enchant('winter'), True)
        self.assertEqual(d.spellcheck_enchant('winxer'), False)

    @unittest.skipUnless(
        IMPORT_ENCHANT_SUCCESSFUL,
        "Skipping because this test requires python3-enchant to work.")
    def test_en_US_spellcheck_suggest_enchant(self):
        d = hunspell_suggest.Dictionary('en_US')
        self.assertEqual(d.spellcheck_suggest_enchant('kamel'),
                         ['camel', 'Camel'])

    @unittest.skipUnless(
        IMPORT_HUNSPELL_SUCCESSFUL and not IMPORT_ENCHANT_SUCCESSFUL,
        "Skipping because this test requires python3-pyhunspell to work.")
    def test_en_US_spellcheck_pyhunspell(self):
        d = hunspell_suggest.Dictionary('en_US')
        self.assertEqual(d.spellcheck_pyhunspell('winter'), True)
        self.assertEqual(d.spellcheck_pyhunspell('winxer'), False)

    @unittest.skipUnless(
        IMPORT_HUNSPELL_SUCCESSFUL and not IMPORT_ENCHANT_SUCCESSFUL,
        "Skipping because this test requires python3-pyhunspell to work.")
    def test_en_US_spellcheck_suggest_pyhunspell(self):
        d = hunspell_suggest.Dictionary('en_US')
        self.assertEqual(d.spellcheck_suggest_pyhunspell('kamel'),
                         ['camel', 'Camel'])

    @unittest.skipUnless(
        IMPORT_LIBVOIKKO_SUCCESSFUL,
        "Skipping because this test requires python3-libvoikko to work.")
    def test_fi_FI_spellcheck_voikko(self):
        d = hunspell_suggest.Dictionary('fi_FI')
        self.assertEqual(d.spellcheck_voikko('kissa'), True)
        self.assertEqual(d.spellcheck_voikko('kisssa'), False)
        self.assertEqual(d.spellcheck_voikko('Päiviä'), True)
        self.assertEqual(d.spellcheck_voikko('Päivia'), False)

    @unittest.skipUnless(
        IMPORT_LIBVOIKKO_SUCCESSFUL,
        "Skipping because this test requires python3-libvoikko to work.")
    def test_fi_FI_spellcheck_suggest_voikko(self):
        d = hunspell_suggest.Dictionary('fi_FI')
        self.assertEqual(d.spellcheck_suggest_voikko('kisssa'),
                         ['kissa', 'kissaa', 'kisassa', 'kisussa'])
示例#8
0
class TabSqliteDbTestCase(unittest.TestCase):
    def setUp(self) -> None:
        pass

    def tearDown(self) -> None:
        pass

    def init_database(
            self,
            user_db_file: str = ':memory:',
            dictionary_names: Iterable[str] = ('en_US',)) -> None:
        self.database = tabsqlitedb.TabSqliteDb(user_db_file=user_db_file)
        self.database.hunspell_obj.set_dictionary_names(
            list(dictionary_names))

    def read_training_data_from_file(self, filename: str) -> bool:
        if '/' not in filename:
            path = os.path.join(os.path.dirname(__file__), filename)
        path = os.path.expanduser(path)
        if not os.path.isfile(path):
            path += '.gz'
        if not os.path.isfile(path):
            return False
        return self.database.read_training_data_from_file(path)

    def simulate_typing_file(
            self,
            path: str,
            verbose: bool = True) -> Dict[str, Union[int, float]]:
        stats: Dict[str, Union[int, float]] = {
            'typed': 0, 'committed': 0, 'saved': 0, 'percent': 0.0}
        if '/' not in path:
            path = os.path.join(os.path.dirname(__file__), path)
        path = os.path.expanduser(path)
        if not os.path.isfile(path):
            path += '.gz'
        if not os.path.isfile(path):
            self.assertFalse(True)
            return stats
        open_function: Callable[[Any], Any] = open
        if path.endswith('.gz'):
            open_function = gzip.open
        with open_function( # type: ignore
                path, mode='rt', encoding='UTF-8') as file_handle:
            lines = file_handle.readlines()
        p_token = ''
        pp_token = ''
        total_length_typed = 0
        total_length_committed = 0
        total_length_saved = 0
        total_percent_saved = 0.0
        current_line = 0
        total_lines = len(lines)
        for line in lines:
            current_line += 1
            for token in itb_util.tokenize(line):
                length_typed = 0
                length_saved = 0
                percent_saved = 0.0
                for i in range(1, len(token)):
                    candidates = self.database.select_words(
                        token[:i], p_phrase=p_token, pp_phrase=pp_token)
                    if candidates and candidates[0][0] == token:
                        length_typed = i
                        break
                    if i == len(token) - 1:
                        length_typed = len(token)
                length_saved = length_typed - len(token)
                percent_saved = 100.0 * length_saved / len(token)
                total_length_typed += length_typed
                total_length_committed += len(token)
                total_length_saved += length_saved
                total_percent_saved = (
                    100.0 * total_length_saved / total_length_committed)
                if verbose:
                    LOGGER.info(
                        'line %s/%s: %s -> %s %s %2.1f%% '
                        'total: %s -> %s %s %2.1f%%',
                        current_line,
                        total_lines,
                        token[:length_typed],
                        token,
                        length_saved,
                        percent_saved,
                        total_length_typed,
                        total_length_committed,
                        total_length_saved,
                        total_percent_saved)
        stats['typed'] = total_length_typed
        stats['committed'] = total_length_committed
        stats['saved'] = total_length_saved
        stats['percent'] = total_percent_saved
        return stats

    def test_dummy(self) -> None:
        self.assertEqual(True, True)

    @unittest.skipUnless(
        itb_util.get_hunspell_dictionary_wordlist('en_US')[0],
        'Skipping because no en_US hunspell dictionary could be found.')
    def test_empty_database_only_dictionary(self) -> None:
        self.init_database(
            user_db_file=':memory:', dictionary_names=['en_US'])
        self.assertEqual(
            'Baltimore',
            self.database.select_words(
                'baltim', p_phrase='foo', pp_phrase='bar')[0][0])

    @unittest.skipUnless(
        itb_util.get_hunspell_dictionary_wordlist('en_US')[0],
        'Skipping because no en_US hunspell dictionary could be found.')
    @unittest.skipUnless(
        itb_util.get_hunspell_dictionary_wordlist('fr_FR')[0],
        'Skipping because no fr_FR hunspell dictionary could be found.')
    @unittest.skipUnless(
        IMPORT_DISTRO_SUCCESSFUL
        and distro.id() == 'fedora',
        'Skipping on other distros then Fedora, '
        'French dictionary might be too different on other distributions.')
    def test_english_poem(self) -> None:
        training_file = 'the_road_not_taken.txt'
        self.init_database(
            user_db_file=':memory:',dictionary_names=['fr_FR'])
        stats = self.simulate_typing_file(training_file, verbose=False)
        LOGGER.info('stats=%s', repr(stats))
        # -2.5% saved when typing the English poem with the fr_FR dictionary:
        self.assertEqual(-2.5, round(stats['percent'], 1))
        # Set the en_US dictionary and see whether the result is better:
        self.database.hunspell_obj.set_dictionary_names(['en_US'])
        stats = self.simulate_typing_file(training_file, verbose=False)
        LOGGER.info('stats=%s', repr(stats))
        # -9.3% saved when typing the English poem with the en_US dictionary:
        self.assertEqual(-9.3, round(stats['percent'], 1))
        self.assertEqual(
            'undergrad',
            self.database.select_words(
                'undergr', p_phrase='in', pp_phrase='the')[0][0])
        self.assertEqual(0, self.database.number_of_rows_in_database())
        self.assertEqual(
            True, self.read_training_data_from_file(training_file))
        # Now the database should have rows:
        self.assertEqual(148, self.database.number_of_rows_in_database())
        # Now that the training data has been read into the database
        # the result should change:
        self.assertEqual(
            'undergrowth',
            self.database.select_words(
                'undergr', p_phrase='in', pp_phrase='the')[0][0])
        stats = self.simulate_typing_file(training_file, verbose=False)
        LOGGER.info('stats=%s', repr(stats))
        # -51.3% saved when typing the English poem with the trained database
        # and the en_US dictionary:
        self.assertEqual(-51.3, round(stats['percent'], 1))
        # Set the fr_FR dictionary and see whether that makes the result worse:
        self.database.hunspell_obj.set_dictionary_names(['fr_FR'])
        stats = self.simulate_typing_file(training_file, verbose=False)
        LOGGER.info('stats=%s', repr(stats))
        # -51.3% saved when typing the English poem with the trained database
        # and the fr_FR dictionary. When the database is trained so well,
        # the dictionary almost doesn’t matter anymore:
        self.assertEqual(-51.3, round(stats['percent'], 1))

    @unittest.skipUnless(
        itb_util.get_hunspell_dictionary_wordlist('en_US')[0],
        'Skipping because no en_US hunspell dictionary could be found.')
    @unittest.skipUnless(
        itb_util.get_hunspell_dictionary_wordlist('fr_FR')[0],
        'Skipping because no fr_FR hunspell dictionary could be found.')
    @unittest.skipUnless(
        IMPORT_DISTRO_SUCCESSFUL
        and distro.id() == 'fedora',
        'Skipping on other distros then Fedora, '
        'French dictionary might be too different on other distributions.')
    def test_french_poem(self) -> None:
        training_file = 'chant_d_automne.txt'
        self.init_database(
            user_db_file=':memory:',dictionary_names=['en_US'])
        stats = self.simulate_typing_file(training_file, verbose=False)
        LOGGER.info('stats=%s', repr(stats))
        # -2.3% saved when typing the French poem with the en_US dictionary:
        self.assertEqual(-2.3, round(stats['percent'], 1))
        # Set the fr_FR dictionary and see whether the result is better:
        self.database.hunspell_obj.set_dictionary_names(['fr_FR'])
        stats = self.simulate_typing_file(training_file, verbose=False)
        LOGGER.info('stats=%s', repr(stats))
        # -7.3% saved on Fedora 35 when typing the French poem with
        # the fr_FR dictionary. On openSUSE Tumbleweed (2021-11-23)
        # it is -8.2%.
        self.assertEqual(-7.3, round(stats['percent'], 1))
        self.assertEqual(
            'plonge',
            self.database.select_words(
                'plong', p_phrase='nous', pp_phrase='Bientôt')[0][0])
        self.assertEqual(0, self.database.number_of_rows_in_database())
        self.assertEqual(
            True, self.read_training_data_from_file(training_file))
        # Now the database should have rows:
        self.assertEqual(224, self.database.number_of_rows_in_database())
        # Now that the training data has been read into the database
        # the result should change:
        self.assertEqual(
            'plongerons',
            self.database.select_words(
                'plong', p_phrase='nous', pp_phrase='Bientôt')[0][0])
        stats = self.simulate_typing_file(training_file, verbose=False)
        LOGGER.info('stats=%s', repr(stats))
        # -37.6% saved when typing the French poem with the trained database
        # and the fr_FR dictionary:
        self.assertEqual(-37.6, round(stats['percent'], 1))
        # Set the fr_FR dictionary and see whether that makes the result worse:
        self.database.hunspell_obj.set_dictionary_names(['en_US'])
        stats = self.simulate_typing_file(training_file, verbose=False)
        LOGGER.info('stats=%s', repr(stats))
        # -37.6% saved when typing the French poem with the trained database
        # and the en_US dictionary. When the database is trained so well,
        # the dictionary almost doesn’t matter anymore:
        self.assertEqual(-37.6, round(stats['percent'], 1))

    @unittest.skipUnless(
        itb_util.get_hunspell_dictionary_wordlist('fr_FR')[0],
        'Skipping because no fr_FR hunspell dictionary could be found.')
    @unittest.skipUnless(
        IMPORT_DISTRO_SUCCESSFUL
        and distro.id() == 'fedora',
        'Skipping on other distros then Fedora, '
        'French dictionary might be too different on other distributions.')
    def test_french_book(self) -> None:
        training_file = 'victor_hugo_notre_dame_de_paris.txt'
        self.init_database(
            user_db_file=':memory:',dictionary_names=['fr_FR'])
        self.assertEqual(0, self.database.number_of_rows_in_database())
        if not self.read_training_data_from_file(training_file):
            self.skipTest('Training file %s not available' % training_file)
        # Now the database should have rows:
        self.assertEqual(156245, self.database.number_of_rows_in_database())
        self.database.cleanup_database(thread=False)
        self.assertEqual(50000, self.database.number_of_rows_in_database())
        stats = self.simulate_typing_file(training_file, verbose=True)
        LOGGER.info('stats=%s', repr(stats))
        # -27% saved when typing the French poem with the trained database
        # and the fr_FR dictionary:
        self.assertEqual(-24, round(stats['percent'], 0))
class HunspellSuggestTestCase(unittest.TestCase):
    def setUp(self) -> None:
        self.maxDiff = None

    def tearDown(self) -> None:
        pass

    def test_dummy(self) -> None:
        self.assertEqual(True, True)

    @unittest.skipUnless(
        IMPORT_ENCHANT_SUCCESSFUL,
        "Skipping because this test requires python3-enchant to work.")
    @unittest.skipUnless(
        itb_util.get_hunspell_dictionary_wordlist('cs_CZ')[0],
        'Skipping because no Czech hunspell dictionary could be found.')
    @unittest.skipUnless(
        itb_util.get_hunspell_dictionary_wordlist('de_DE')[0],
        'Skipping because no German hunspell dictionary could be found.')
    @unittest.skipUnless(
        testutils.enchant_sanity_test(language='cs_CZ', word='Praha'),
        'Skipping because python3-enchant seems broken for cs_CZ.')
    def test_de_DE_cs_CZ_enchant(self) -> None:
        h = hunspell_suggest.Hunspell(['de_DE', 'cs_CZ'])
        self.assertEqual(
            h.suggest('Geschwindigkeitsubertre')[0],
            ('Geschwindigkeitsu\u0308bertretungsverfahren', 0))
        self.assertEqual(
            h.suggest('Geschwindigkeitsübertretungsverfahren')[0],
            ('Geschwindigkeitsu\u0308bertretungsverfahren', 0))
        self.assertEqual(
            h.suggest('Glühwürmchen')[0],
            ('Glu\u0308hwu\u0308rmchen', 0))
        self.assertEqual(
            h.suggest('Alpengluhen')[0],
            ('Alpenglu\u0308hen', 0))
        self.assertEqual(
            h.suggest('filosofictejsi'),
            [('filosofic\u030Cte\u030Cjs\u030Ci\u0301', 0),
             ('filosofic\u030Cte\u030Cji', -1)])
        self.assertEqual(
            h.suggest('filosofictejs')[0],
            ('filosofic\u030Cte\u030Cjs\u030Ci\u0301', 0))
        self.assertEqual(
            h.suggest('filosofičtější')[0],
            ('filosofic\u030Cte\u030Cjs\u030Ci\u0301', 0))
        self.assertEqual(
            h.suggest('filosofičtějš')[0],
            ('filosofic\u030Cte\u030Cjs\u030Ci\u0301', 0))

    @unittest.skipUnless(
        IMPORT_HUNSPELL_SUCCESSFUL and not IMPORT_ENCHANT_SUCCESSFUL,
        "Skipping because this test requires python3-pyhunspell to work.")
    @unittest.skipUnless(
        itb_util.get_hunspell_dictionary_wordlist('cs_CZ')[0],
        'Skipping because no Czech hunspell dictionary could be found.')
    @unittest.skipUnless(
        itb_util.get_hunspell_dictionary_wordlist('de_DE')[0],
        'Skipping because no German hunspell dictionary could be found.')
    def test_de_DE_cs_CZ_pyhunspell(self) -> None:
        h = hunspell_suggest.Hunspell(['de_DE', 'cs_CZ'])
        self.assertEqual(
            h.suggest('Geschwindigkeitsubertre')[0],
            ('Geschwindigkeitsu\u0308bertretungsverfahren', 0))
        self.assertEqual(
            h.suggest('Geschwindigkeitsübertretungsverfahren')[0],
            ('Geschwindigkeitsu\u0308bertretungsverfahren', 0))
        self.assertEqual(
            h.suggest('Glühwürmchen')[0],
            ('Glu\u0308hwu\u0308rmchen', 0))
        self.assertEqual(
            h.suggest('Alpengluhen')[0],
            ('Alpenglu\u0308hen', 0))
        self.assertEqual(
            h.suggest('filosofictejsi'),
            [('filosofic\u030Cte\u030Cjs\u030Ci\u0301', 0),
             ('filosofie\u0300ti\u0300ji', -1)])
        self.assertEqual(
            h.suggest('filosofictejs')[0],
            ('filosofic\u030Cte\u030Cjs\u030Ci\u0301', 0))
        self.assertEqual(
            h.suggest('filosofičtější')[0],
            ('filosofic\u030Cte\u030Cjs\u030Ci\u0301', 0))
        self.assertEqual(
            h.suggest('filosofičtějš')[0],
            ('filosofic\u030Cte\u030Cjs\u030Ci\u0301', 0))

    @unittest.skipUnless(
        itb_util.get_hunspell_dictionary_wordlist('it_IT')[0],
        'Skipping because no Italian hunspell dictionary could be found.')
    def test_it_IT(self) -> None:
        h = hunspell_suggest.Hunspell(['it_IT'])
        self.assertEqual(
            h.suggest('principianti'),
            [('principianti', 0),
             ('principiati', -1),
             ('principiante', -1),
             ('principiarti', -1),
             ('principiasti', -1)])

    @unittest.skipUnless(
        itb_util.get_hunspell_dictionary_wordlist('es_ES')[0],
        'Skipping because no Spanish hunspell dictionary could be found.')
    def test_es_ES(self) -> None:
        h = hunspell_suggest.Hunspell(['es_ES'])
        self.assertEqual(
            h.suggest('teneis'),
            [('tene\u0301is', 0),
             ('tenes', -1),
             ('tenis', -1),
             ('teneos', -1),
             ('tienes', -1),
             ('te neis', -1),
             ('te-neis', -1)])
        self.assertEqual(
            h.suggest('tenéis')[0],
            ('tene\u0301is', 0))

    @unittest.skipUnless(
        itb_util.get_hunspell_dictionary_wordlist('en_US')[0],
        'Skipping because no US English hunspell dictionary could be found.')
    @unittest.skipUnless(
        testutils.enchant_sanity_test(language='cs_CZ', word='Praha'),
        'Skipping because python3-enchant seems broken for cs_CZ.')
    @unittest.skipUnless(
        testutils.enchant_working_as_expected(),
        'Skipping because of an unexpected change in the enchant behaviour.')
    def test_en_US(self) -> None:
        h = hunspell_suggest.Hunspell(['en_US'])
        self.assertEqual(
            [('Camel', 0),
             ('camel', 0),
             ('Camelot', 0),
             ('camellia', 0),
             ('camelhair', 0),
             ('Camelopardalis', 0),
             ('CAM', -1),
             ('Cal', -1),
             ('Mel', -1),
             ('cal', -1),
             ('cam', -1),
             ('Carl', -1),
             ('Gael', -1),
             ('Jame', -1),
             ('call', -1),
             ('came', -1),
             ('come', -1),
             ('game', -1),
             ('Jamal', -1),
             ('Jamel', -1),
             ('Ocaml', -1),
             ('cable', -1),
             ('cameo', -1),
             ('calmer', -1),
             ('camels', -1),
             ('comely', -1),
             ('compel', -1),
             ('gamely', -1),
             ("Camel's", -1),
             ('Camilla', -1),
             ('Camille', -1),
             ('Carmela', -1),
             ('Carmelo', -1),
             ("Jamel's", -1),
             ("camel's", -1),
             ('caramel', -1),
             ('Carmella', -1)],
            h.suggest('camel'))

    @unittest.skipUnless(
        itb_util.get_hunspell_dictionary_wordlist('fr_FR')[0],
        'Skipping because no French hunspell dictionary could be found.')
    def test_fr_FR(self) -> None:
        h = hunspell_suggest.Hunspell(['fr_FR'])
        self.assertEqual(
            h.suggest('differemmen'),
            [('diffe\u0301remment', 0)])

    @unittest.skipUnless(
        itb_util.get_hunspell_dictionary_wordlist('el_GR')[0],
        "Skipping because no Greek dictionary could be found. ")
    def test_el_GR(self) -> None:
        h = hunspell_suggest.Hunspell(['el_GR'])
        self.assertEqual(
            h.suggest('αλφαβητο')[0],
            ('αλφάβητο', 0))

    def test_fi_FI_dictionary_file(self) -> None:
        # dictionary file is included in ibus-typing-booster
        #
        # This should work with and without voikko
        h = hunspell_suggest.Hunspell(['fi_FI'])
        self.assertEqual(
            h.suggest('kissa'),
            [('kissa', 0),
             ('kissaa', 0),
             ('kissani', 0),
             ('kissassa', 0),
             ('kissajuttu', 0),
             ('kissamaiseksi',0)])

    @unittest.skipUnless(
        testutils.get_libvoikko_version() >= '4.3',
        "Skipping, requires python3-libvoikko version >= 4.3.")
    @unittest.skipIf(
        testutils.init_libvoikko_error(),
        "Skipping, %s" % testutils.init_libvoikko_error())
    def test_fi_FI_voikko(self) -> None:
        d = hunspell_suggest.Dictionary('fi_FI')
        self.assertEqual(d.has_spellchecking(), True)
        h = hunspell_suggest.Hunspell(['fi_FI'])
        self.assertEqual(
            h.suggest('kisssa'),
            [('kissa', -1),
             ('Kiassa', -1),
             ('kissaa', -1),
             ('kisassa', -1),
             ('kisussa', -1)])
        self.assertEqual(
            h.suggest('Pariisin-suurlähettila'),
            [('Pariisin-suurla\u0308hettila\u0308s', 0),
             ('Pariisin-suurlähetetila', -1),
             ('Pariisin-suurlähettiala', -1)])

    @unittest.skipUnless(
        IMPORT_ENCHANT_SUCCESSFUL,
        "Skipping because this test requires python3-enchant to work.")
    @unittest.skipUnless(
        itb_util.get_hunspell_dictionary_wordlist('en_US')[0],
        'Skipping because no US English hunspell dictionary could be found.')
    def test_en_US_spellcheck_enchant(self) -> None:
        d = hunspell_suggest.Dictionary('en_US')
        self.assertEqual(d.spellcheck_enchant('winter'), True)
        self.assertEqual(d.spellcheck_enchant('winxer'), False)

    @unittest.skipUnless(
        IMPORT_ENCHANT_SUCCESSFUL,
        "Skipping because this test requires python3-enchant to work.")
    @unittest.skipUnless(
        itb_util.get_hunspell_dictionary_wordlist('en_US')[0],
        'Skipping because no US English hunspell dictionary could be found.')
    @unittest.skipUnless(
        testutils.enchant_sanity_test(language='cs_CZ', word='Praha'),
        'Skipping because python3-enchant seems broken for cs_CZ.')
    @unittest.skipUnless(
        testutils.enchant_working_as_expected(),
        'Skipping because of an unexpected change in the enchant behaviour.')
    def test_en_US_spellcheck_suggest_enchant(self) -> None:
        d = hunspell_suggest.Dictionary('en_US')
        self.assertEqual(
            d.spellcheck_suggest_enchant('kamel'),
            ['Jamel', 'Camel', 'camel', 'Jamal', 'gamely'])

    @unittest.skipUnless(
        IMPORT_HUNSPELL_SUCCESSFUL and not IMPORT_ENCHANT_SUCCESSFUL,
        "Skipping because this test requires python3-pyhunspell to work.")
    @unittest.skipUnless(
        itb_util.get_hunspell_dictionary_wordlist('en_US')[0],
        'Skipping because no US English hunspell dictionary could be found.')
    def test_en_US_spellcheck_pyhunspell(self) -> None:
        d = hunspell_suggest.Dictionary('en_US')
        self.assertEqual(d.spellcheck_pyhunspell('winter'), True)
        self.assertEqual(d.spellcheck_pyhunspell('winxer'), False)

    @unittest.skipUnless(
        IMPORT_HUNSPELL_SUCCESSFUL and not IMPORT_ENCHANT_SUCCESSFUL,
        "Skipping because this test requires python3-pyhunspell to work.")
    @unittest.skipUnless(
        itb_util.get_hunspell_dictionary_wordlist('en_US')[0],
        'Skipping because no US English hunspell dictionary could be found.')
    def test_en_US_spellcheck_suggest_pyhunspell(self) -> None:
        d = hunspell_suggest.Dictionary('en_US')
        self.assertEqual(
            d.spellcheck_suggest_pyhunspell('kamel'),
            ['camel', 'Camel'])

    @unittest.skipUnless(
        testutils.get_libvoikko_version() >= '4.3',
        "Skipping, requires python3-libvoikko version >= 4.3.")
    @unittest.skipIf(
        testutils.init_libvoikko_error(),
        "Skipping, %s" % testutils.init_libvoikko_error())
    def test_fi_FI_spellcheck_voikko(self) -> None:
        d = hunspell_suggest.Dictionary('fi_FI')
        self.assertEqual(d.spellcheck_voikko('kissa'), True)
        self.assertEqual(d.spellcheck_voikko('kisssa'), False)
        self.assertEqual(d.spellcheck_voikko('Päiviä'), True)
        self.assertEqual(d.spellcheck_voikko('Päivia'), False)

    @unittest.skipUnless(
        testutils.get_libvoikko_version() >= '4.3',
        "Skipping, requires python3-libvoikko version >= 4.3.")
    @unittest.skipIf(
        testutils.init_libvoikko_error(),
        "Skipping, %s" % testutils.init_libvoikko_error())
    def test_fi_FI_spellcheck_suggest_voikko(self) -> None:
        d = hunspell_suggest.Dictionary('fi_FI')
        self.assertEqual(
            d.spellcheck_suggest_voikko('kisssa'),
            ['kissa', 'kissaa', 'kisassa', 'kisussa', 'Kiassa'])

    @unittest.skipUnless(
        itb_util.get_hunspell_dictionary_wordlist('sv_SE')[0],
        "Skipping because no Swedisch dictionary could be found. ")
    def test_sv_SE(self) -> None:
        h = hunspell_suggest.Hunspell(['sv_SE'])
        self.assertEqual(
            h.suggest('östgo'),
            [('östgot', 0),
             ('Östgöta', 0),
             ('östgöte', 0),
             ('östgotisk', 0),
             ('östgötsk', 0),
             ('östgötska', 0)])
        self.assertEqual(
            h.suggest('östgot'),
            [('östgot', 0),
             ('östgotisk', 0),
             ('Östgot', -1)])
        self.assertEqual(
            h.suggest('östgö'),
            [('Östgöta', 0),
             ('östgöte', 0),
             ('östgötsk', 0),
             ('östgötska', 0)])
        self.assertEqual(
            h.suggest('östgöt')[0:5],
            [('Östgöta', 0),
             ('östgöte', 0),
             ('östgötsk', 0),
             ('östgötska', 0),
             ('östgot', -1)])