Пример #1
0
def make_tries(wordlist):
    tries = {}
    wordlist = sorted(wordlist, key=lambda x: x.lang)
    for lang, group in itertools.groupby(wordlist, key=lambda x: x.lang):
        words = [word for lang, word in group]
        tries[lang] = CompletionDAWG(words)
    return tries
Пример #2
0
    def __init__(self):
        """ Create a new lexicon """
        self.word_list = CompletionDAWG()
        self.word_list.load(os.path.join(sys.path[0], 'csw.dawg'))
        sll_suffixes = []
        temp_suffixes = self.list_words()
        while temp_suffixes:
            temp_suffixes = list(
                set([
                    temp_suffixes[i][1:] for i in range(len(temp_suffixes))
                    if len(temp_suffixes[i]) > 1
                ]))
            sll_suffixes.extend(temp_suffixes)
        sll_suffixes = sorted(list(set(sll_suffixes)))
        self.suffix_list = CompletionDAWG(sll_suffixes)

        self.reverse_list = CompletionDAWG(
            sorted([word[::-1] for word in self.list_words()]))
Пример #3
0
    def _create_dawg(self) -> CompletionDAWG:
        def generate_dawg_keys():
            vbs_set = set()
            for sve_key in self.cache.keys():
                yield sve_key

                # creating shallow edge from key is faster than
                # retrieval full edge from cache index
                edge = Edge.from_line(sve_key, ts=TS.sve)
                yield edge.vse
                yield edge.evs
                vbs_set.add(edge.vbs)

            yield from vbs_set

        it_keys = generate_dawg_keys()
        dawg = CompletionDAWG(it_keys)
        return dawg
Пример #4
0
    def _create_dawg(self) -> CompletionDAWG:
        def generate_dawg_keys():
            labels = set()
            for node in self.cache.values():
                for term in node.terms:
                    norm = self.normalizer.normalize(term)
                    yield self._tky.join(["", norm, node.key])
                    yield self._ltk.join(["", node.label, norm, node.key])

                yield self._lky.join(["", node.label, node.key])

                if node.label not in labels:
                    labels.add(node.label)
                    yield f"{self._lbl}{node.label}"

        it_keys = generate_dawg_keys()
        dawg = CompletionDAWG(it_keys)
        return dawg
Пример #5
0
def save_lex_dawg(dictionary_files=('dictionaries/sowpods.txt', ),
                  outfile=DAWGS_PATH + 'sowpods.dawg'):

    completion_dawg = CompletionDAWG(get_words(dictionary_files))
    completion_dawg.save(outfile)
Пример #6
0
 def _load_dawg(self):
     if self.dawg_path.is_file():
         return CompletionDAWG().load(str(self.dawg_path))
     return CompletionDAWG([])