def find_lemmata(tokens): """ Annotates the tokens with lemmata for plural nouns and conjugated verbs, where each token is a [word, part-of-speech] list. """ for token in tokens: word, pos, lemma = token[0], token[1], token[0] if pos.startswith(("DT", "PR", "WP")): lemma = singularize(word, pos=pos) if pos.startswith(("RB", "IN")) and (word.endswith(("'", u"’")) or word == "du"): lemma = singularize(word, pos=pos) if pos.startswith(("JJ",)): lemma = predicative(word) if pos == "NNS": lemma = singularize(word) if pos.startswith(("VB", "MD")): lemma = conjugate(word, INFINITIVE) or word token.append(lemma.lower()) return tokens