def test_parse_lemma(self): # Assert the accuracy of the verb lemmatization algorithm. # Note: the accuracy is higher (88%) when measured on CELEX word forms # (presumably because de.inflect.VERBS has high percentage irregular verbs). i, n = 0, 0 for v in de.inflect.VERBS.infinitives: for tense in de.inflect.VERBS.TENSES: if de.conjugate(v, tense, parse=False) is None: continue if de.inflect._parse_lemma(de.conjugate(v, tense)) == v: i += 1 n += 1 self.assertTrue(float(i) / n > 0.85) print "pattern.de.inflect._parse_lemma()"
def test_conjugate(self): # Assert different tenses with different conjugations. for (v1, v2, tense) in ( ("sein", "sein", de.INFINITIVE), ("sein", "bin", (de.PRESENT, 1, de.SINGULAR)), ("sein", "bist", (de.PRESENT, 2, de.SINGULAR)), ("sein", "ist", (de.PRESENT, 3, de.SINGULAR)), ("sein", "sind", (de.PRESENT, 1, de.PLURAL)), ("sein", "seid", (de.PRESENT, 2, de.PLURAL)), ("sein", "sind", (de.PRESENT, 3, de.PLURAL)), ("sein", "seiend", (de.PRESENT + de.PARTICIPLE)), ("sein", "war", (de.PAST, 1, de.SINGULAR)), ("sein", "warst", (de.PAST, 2, de.SINGULAR)), ("sein", "war", (de.PAST, 3, de.SINGULAR)), ("sein", "waren", (de.PAST, 1, de.PLURAL)), ("sein", "wart", (de.PAST, 2, de.PLURAL)), ("sein", "waren", (de.PAST, 3, de.PLURAL)), ("sein", "gewesen", (de.PAST + de.PARTICIPLE)), ("sein", "sei", (de.PRESENT, 2, de.SINGULAR, de.IMPERATIVE)), ("sein", "seien", (de.PRESENT, 1, de.PLURAL, de.IMPERATIVE)), ("sein", "seid", (de.PRESENT, 2, de.PLURAL, de.IMPERATIVE)), ("sein", u"sei", (de.PRESENT, 1, de.SINGULAR, de.SUBJUNCTIVE)), ("sein", u"seiest", (de.PRESENT, 2, de.SINGULAR, de.SUBJUNCTIVE)), ("sein", u"sei", (de.PRESENT, 3, de.SINGULAR, de.SUBJUNCTIVE)), ("sein", u"seien", (de.PRESENT, 1, de.PLURAL, de.SUBJUNCTIVE)), ("sein", u"seiet", (de.PRESENT, 2, de.PLURAL, de.SUBJUNCTIVE)), ("sein", u"seien", (de.PRESENT, 3, de.PLURAL, de.SUBJUNCTIVE)), ("sein", u"wäre", (de.PAST, 1, de.SINGULAR, de.SUBJUNCTIVE)), ("sein", u"wärest", (de.PAST, 2, de.SINGULAR, de.SUBJUNCTIVE)), ("sein", u"wäre", (de.PAST, 3, de.SINGULAR, de.SUBJUNCTIVE)), ("sein", u"wären", (de.PAST, 1, de.PLURAL, de.SUBJUNCTIVE)), ("sein", u"wäret", (de.PAST, 2, de.PLURAL, de.SUBJUNCTIVE)), ("sein", u"wären", (de.PAST, 3, de.PLURAL, de.SUBJUNCTIVE))): self.assertEqual(de.conjugate(v1, tense), v2) print("pattern.de.conjugate()")
def test_conjugate(self): # Assert different tenses with different conjugations. for (v1, v2, tense) in ( ("sein", "sein", de.INFINITIVE), ("sein", "bin", (de.PRESENT, 1, de.SINGULAR)), ("sein", "bist", (de.PRESENT, 2, de.SINGULAR)), ("sein", "ist", (de.PRESENT, 3, de.SINGULAR)), ("sein", "sind", (de.PRESENT, 1, de.PLURAL)), ("sein", "seid", (de.PRESENT, 2, de.PLURAL)), ("sein", "sind", (de.PRESENT, 3, de.PLURAL)), ("sein", "seiend", (de.PRESENT + de.PARTICIPLE)), ("sein", "war", (de.PAST, 1, de.SINGULAR)), ("sein", "warst", (de.PAST, 2, de.SINGULAR)), ("sein", "war", (de.PAST, 3, de.SINGULAR)), ("sein", "waren", (de.PAST, 1, de.PLURAL)), ("sein", "wart", (de.PAST, 2, de.PLURAL)), ("sein", "waren", (de.PAST, 3, de.PLURAL)), ("sein", "gewesen", (de.PAST + de.PARTICIPLE)), ("sein", "sei", (de.PRESENT, 2, de.SINGULAR, de.IMPERATIVE)), ("sein", "seien", (de.PRESENT, 1, de.PLURAL, de.IMPERATIVE)), ("sein", "seid", (de.PRESENT, 2, de.PLURAL, de.IMPERATIVE)), ("sein", u"sei", (de.PRESENT, 1, de.SINGULAR, de.SUBJUNCTIVE)), ("sein", u"seiest", (de.PRESENT, 2, de.SINGULAR, de.SUBJUNCTIVE)), ("sein", u"sei", (de.PRESENT, 3, de.SINGULAR, de.SUBJUNCTIVE)), ("sein", u"seien", (de.PRESENT, 1, de.PLURAL, de.SUBJUNCTIVE)), ("sein", u"seiet", (de.PRESENT, 2, de.PLURAL, de.SUBJUNCTIVE)), ("sein", u"seien", (de.PRESENT, 3, de.PLURAL, de.SUBJUNCTIVE)), ("sein", u"wäre", (de.PAST, 1, de.SINGULAR, de.SUBJUNCTIVE)), ("sein", u"wärest", (de.PAST, 2, de.SINGULAR, de.SUBJUNCTIVE)), ("sein", u"wäre", (de.PAST, 3, de.SINGULAR, de.SUBJUNCTIVE)), ("sein", u"wären", (de.PAST, 1, de.PLURAL, de.SUBJUNCTIVE)), ("sein", u"wäret", (de.PAST, 2, de.PLURAL, de.SUBJUNCTIVE)), ("sein", u"wären", (de.PAST, 3, de.PLURAL, de.SUBJUNCTIVE))): self.assertEqual(de.conjugate(v1, tense), v2) print "pattern.de.conjugate()"
def lemma_via_patternlib(token, pos): if pos == 'NP': # singularize noun return singularize(token) elif pos.startswith('V'): # get infinitive of verb return conjugate(token) elif pos.startswith('ADJ') or pos.startswith('ADV'): # get baseform of adjective or adverb return predicative(token) return token
def test_conjugate(self): # Assert different tenses with different conjugations. for (v1, v2, tense) in ( ("sein", "sein", de.INFINITIVE), ("sein", "bin", de.PRESENT_1ST_PERSON_SINGULAR), ("sein", "bist", de.PRESENT_2ND_PERSON_SINGULAR), ("sein", "ist", de.PRESENT_3RD_PERSON_SINGULAR), ("sein", "sind", de.PRESENT_1ST_PERSON_PLURAL), ("sein", "seid", de.PRESENT_2ND_PERSON_PLURAL), ("sein", "sind", de.PRESENT_3RD_PERSON_PLURAL), ("sein", "seiend", de.PRESENT_PARTICIPLE), ("sein", "war", de.PAST_1ST_PERSON_SINGULAR), ("sein", "warst", de.PAST_2ND_PERSON_SINGULAR), ("sein", "war", de.PAST_3RD_PERSON_SINGULAR), ("sein", "waren", de.PAST_1ST_PERSON_PLURAL), ("sein", "wart", de.PAST_2ND_PERSON_PLURAL), ("sein", "waren", de.PAST_3RD_PERSON_PLURAL), ("sein", "gewesen", de.PAST_PARTICIPLE), ("sein", "sei", de.IMPERATIVE_2ND_PERSON_SINGULAR), ("sein", "seien", de.IMPERATIVE_1ST_PERSON_PLURAL), ("sein", "seid", de.IMPERATIVE_2ND_PERSON_PLURAL), ("sein", "seien", de.IMPERATIVE_3RD_PERSON_PLURAL), ("sein", u"sei", de.PRESENT_SUBJUNCTIVE_1ST_PERSON_SINGULAR), ("sein", u"seiest", de.PRESENT_SUBJUNCTIVE_2ND_PERSON_SINGULAR), ("sein", u"sei", de.PRESENT_SUBJUNCTIVE_3RD_PERSON_SINGULAR), ("sein", u"seien", de.PRESENT_SUBJUNCTIVE_1ST_PERSON_PLURAL), ("sein", u"seiet", de.PRESENT_SUBJUNCTIVE_2ND_PERSON_PLURAL), ("sein", u"seien", de.PRESENT_SUBJUNCTIVE_3RD_PERSON_PLURAL), ("sein", u"wäre", de.PAST_SUBJUNCTIVE_1ST_PERSON_SINGULAR), ("sein", u"wärest", de.PAST_SUBJUNCTIVE_2ND_PERSON_SINGULAR), ("sein", u"wäre", de.PAST_SUBJUNCTIVE_3RD_PERSON_SINGULAR), ("sein", u"wären", de.PAST_SUBJUNCTIVE_1ST_PERSON_PLURAL), ("sein", u"wäret", de.PAST_SUBJUNCTIVE_2ND_PERSON_PLURAL), ("sein", u"wären", de.PAST_SUBJUNCTIVE_3RD_PERSON_PLURAL)): self.assertEqual(de.conjugate(v1, tense), v2) print "pattern.de.conjugate()"
def check_if_subjunctive(word): """Returns true if a word is subjuntive, False otherwise word -- a Blob Word object that is a verb TODO: must the word be a verb? Saya? TODO: write tests for this """ _lemmatizer = PatternParserLemmatizer() lemma = _lemmatizer.lemmatize(word)[0][0] if conjugate(lemma, PRESENT, 1, SG, mood=SUBJUNCTIVE) == word: return True elif conjugate(lemma, PRESENT, 2, SG, mood=SUBJUNCTIVE) == word: return True elif conjugate(lemma, PRESENT, 3, SG, mood=SUBJUNCTIVE) == word: return True elif conjugate(lemma, PRESENT, 1, PL, mood=SUBJUNCTIVE) == word: return True elif conjugate(lemma, PRESENT, 2, PL, mood=SUBJUNCTIVE) == word: return True elif conjugate(lemma, PRESENT, 3, PL, mood=SUBJUNCTIVE) == word: return True elif conjugate(lemma, PAST, 1, SG, mood=SUBJUNCTIVE) == word: return True elif conjugate(lemma, PAST, 2, SG, mood=SUBJUNCTIVE) == word: return True elif conjugate(lemma, PAST, 3, SG, mood=SUBJUNCTIVE) == word: return True elif conjugate(lemma, PAST, 1, PL, mood=SUBJUNCTIVE) == word: return True elif conjugate(lemma, PAST, 2, PL, mood=SUBJUNCTIVE) == word: return True elif conjugate(lemma, PAST, 3, PL, mood=SUBJUNCTIVE) == word: return True else: return False
f = open('besser_gehts_nicht.txt') f = open('lives_short.txt') #python 3 syntax f = open('besser_gehts_nicht.txt', encoding='utf-8') raw = f.read() print raw from pattern.de import gender, MALE, FEMALE, NEUTRAL from pattern.de import article, DEFINITE, FEMALE, OBJECT print gender('Katze') print article('Katze', DEFINITE, gender=FEMALE, role=OBJECT) from pattern.de import conjugate from pattern.de import INFINITIVE, PRESENT, SG, SUBJUNCTIVE print conjugate('sehe', INFINITIVE) print conjugate('sehen', PRESENT, 1, SG, mood=SUBJUNCTIVE) from pattern.de import parse, parsetree, split """ import pattern.de pattern.de.verbs - 1962 verbs. pattern.de.tenses pattern.de.tenses('erblicken') pattern.de.conjugate.__doc__ """ """ lst=parse(raw) (Pdb) split(lst)[0]