Exemplo n.º 1
0
    def prepare(self, word_type):
        """
        Generates trie structure containing all words from clp
        :param word_type: part of speech of which kind read words from clp data
        :return: None
        """
        old = ""
        index = 0
        keys = []
        values = []
        for i in range(16777231, 18663982):
            if Util.is_word_unneeded(i):
                continue
            if Util.is_word_appropriate_type(self.plp.label(i)[0], word_type):
                continue
            form = self.plp.bform(i)

            if old != form:
                for s in self.plp.forms(i):
                    if len(s) > 0:
                        a = Util.substring(s, form)
                        to_remove = s[len(a): len(s)]
                        to_add = form[len(a): len(form)]
                        keys.append(Util.reverse(s))
                        a = unicode(to_remove).encode('utf-8')
                        b = unicode(to_add).encode('utf-8')
                        values.append((a, b))
                index += 1

            old = self.plp.bform(i)
        return zip(keys, values)