Exemplo n.º 1
0
    def test_pickle_compressed(self):
        pickle_path = os.path.join(self.fortests_path, "dictionary.pickle")
        edit_distance_max = 2
        prefix_length = 7
        sym_spell = SymSpell(edit_distance_max, prefix_length)
        sym_spell.load_dictionary(self.dictionary_path, 0, 1)
        sym_spell.save_pickle(pickle_path)

        sym_spell_2 = SymSpell(edit_distance_max, prefix_length)
        sym_spell_2.load_pickle(pickle_path)
        self.assertEqual(sym_spell.deletes, sym_spell_2.deletes)
        self.assertEqual(sym_spell.words, sym_spell_2.words)
        self.assertEqual(sym_spell._max_length, sym_spell_2._max_length)
        os.remove(pickle_path)
Exemplo n.º 2
0
def init_sym_spell():
    from pathlib import Path
    from symspellpy import SymSpell
    from ds_tools.fs.paths import get_user_cache_dir

    sym_spell = SymSpell(max_dictionary_edit_distance=0, prefix_length=1)
    dict_path_pkl = Path(
        get_user_cache_dir('music_manager')).joinpath('words.pkl.gz')
    if dict_path_pkl.exists():
        log.debug(f'Loading pickled spellcheck dictionary: {dict_path_pkl}')
        sym_spell.load_pickle(dict_path_pkl)
    else:
        import lzma
        import pkg_resources

        dict_path = pkg_resources.resource_filename(
            'symspellpy', 'frequency_dictionary_en_82_765.txt')
        sym_spell.load_dictionary(dict_path, 0, 1)
        word_list_path_xz = Path(
            pkg_resources.resource_filename(
                'music', '../../etc/scowl/words.xz')).resolve()
        log.debug(
            f'Loading default dictionary + word list from {word_list_path_xz}')
        with lzma.open(word_list_path_xz, 'rt', encoding='utf-8') as f:
            word_list = f.read().splitlines()

        loaded = sym_spell._words
        min_count = min(loaded.values())
        add_word = sym_spell.create_dictionary_entry
        for word in word_list:
            try:
                loaded[word]
            except KeyError:
                add_word(word, min_count)

        fmt = 'Saving pickled spellcheck dictionary (this is a one-time action that may take about 15 seconds): {}'
        log.info(fmt.format(dict_path_pkl))
        sym_spell.save_pickle(dict_path_pkl)

    return sym_spell