示例#1
0
    def __init__(self, verbs):
        self.identify_word_cache = {}
        self.verb_cache = {}

        self.verb_derivations, self.lemma2deriv_index = \
            collect_verb_derivations(verbs)

        self.deriv_index_picker = \
            SuffixGeneralizingMap(self.lemma2deriv_index, min)
示例#2
0
class Conjugator(object):
    """
    Conjugates and un-conjugates verbs.
    """

    def __init__(self, verbs):
        self.identify_word_cache = {}
        self.verb_cache = {}

        self.verb_derivations, self.lemma2deriv_index = \
            collect_verb_derivations(verbs)

        self.deriv_index_picker = \
            SuffixGeneralizingMap(self.lemma2deriv_index, min)

    @staticmethod
    def from_file(f):
        vv = conjugations_from_file(f)
        return Conjugator(vv)

    def create_verb(self, lemma):
        """
        lemma -> Verb
        """
        verb = self.verb_cache.get(lemma)
        if verb:
            return verb

        if lemma == MAGIC_INTS_LEMMA:
            verb = Verb('0', '1', '2', map(str, range(3, 3 + 6)),
                        map(str, range(9, 9 + 6)))
        else:
            deriv_index = self.deriv_index_picker.get(lemma)
            deriv = self.verb_derivations[deriv_index]
            verb = deriv.derive_verb(lemma)
        self.verb_cache[lemma] = verb
        return verb

    def identify_word(self, word, is_picky_about_verbs=True):
        """
        conjugated word, is picky -> list of (lemma, verb field index)
        """
        key = (word, is_picky_about_verbs)
        lemmas_indexes = self.identify_word_cache.get(key)
        if lemmas_indexes is not None:
            return lemmas_indexes

        # For each verb derivation, undo the conjugation in order to get the
        # hypothetical original lemma.
        #
        # If the generalizing suffix map picks the same verb derivation we used
        # for that lemma, it's a match.
        lemmas_indexes = []
        for i, deriv in enumerate(self.verb_derivations):
            for lemma, field_index in deriv.identify_word(word):
                if self.deriv_index_picker.get(lemma) == i:
                    lemmas_indexes.append((lemma, field_index))

        # If we're picky, and we have some results that contain known verbs,
        # forget about the results with unknown verbs.
        if is_picky_about_verbs:
            has_known = False
            for lemma, field_index in lemmas_indexes:
                if lemma in self.lemma2deriv_index:
                    has_known = True
                    break
            if has_known:
                lemmas_indexes = filter(
                    lambda (lemma, index): lemma in self.lemma2deriv_index,
                    lemmas_indexes)

        lemmas_indexes.append((word, 0))

        key = (word, is_picky_about_verbs)
        self.identify_word_cache[key] = lemmas_indexes
        return lemmas_indexes