def test_generation(test_file, lexicon_file): lexicon = Lexicon(lexicon_file) counter = Counter() with open(test_file) as f: for test in yaml.load(f): lemma = test.pop("lemma") location = test.pop("location", "") for parse, form in test.items(): predicted = lexicon.generate(lemma, parse, context=location) if predicted is None: counter.fail("didn't know how to work out {} {} {}".format(lemma, parse, form)) elif strip_length(form) == strip_length(predicted): counter.success() continue elif strip_length(form) not in [strip_length(p) for p in predicted.split("/")]: counter.fail("{} {} got {} instead of {} in {}".format(lemma, parse, predicted, form, location)) else: counter.skip("{} {} {} {} {}".format(lemma, parse, form, predicted, location)) counter.results()
#!/usr/bin/env python3 from pysblgnt import morphgnt_rows from characters import strip_length from utils import Counter from verbs import Lexicon lexicon = Lexicon("lexicons/morphgnt.yaml") counter = Counter() for book_num in range(1, 28): for row in morphgnt_rows(book_num): ccat_pos = row["ccat-pos"] ccat_parse = row["ccat-parse"] norm = row["norm"] lemma = row["lemma"] if ccat_pos != "V-": continue if ccat_parse[3] == "N": parse = ccat_parse[1:4] elif ccat_parse[3] == "P": parse = ccat_parse[1:4] + "." + ccat_parse[4:7] elif ccat_parse[3] == "I": parse = ccat_parse[1:4] + "." + ccat_parse[0] + ccat_parse[5] else: continue
words[lemma]["present.mp1.actual"].add(norm) if ccat_parse[1:4] == "PPN": words[lemma]["present.mp1.actual"].add(norm) if ccat_parse[1:4] == "AAN": words[lemma]["aorist.act.actual"].add(norm) if ccat_parse[1:4] == "AMN": words[lemma]["aorist.mp1.actual"].add(norm) if ccat_parse[1:4] == "APN": words[lemma]["aorist.mp2.actual"].add(norm) lexicon = Lexicon("lexicons/morphgnt.yaml") for k in sorted(words.keys(), key=collator.sort_key): PAN_generated = lexicon.generate(k, "PAN") PMN_generated = lexicon.generate(k, "PMN") PPN_generated = lexicon.generate(k, "PPN") AAN_generated = lexicon.generate(k, "AAN") AMN_generated = lexicon.generate(k, "AMN") APN_generated = lexicon.generate(k, "APN") if PAN_generated: if "present.act.actual" not in words[k]: words[k]["present.act.generated"].add(strip_length(PAN_generated)) if PMN_generated: if "present.mp1.actual" not in words[k]: words[k]["present.mp1.generated"].add(strip_length(PMN_generated))
if ccat_parse[1:4] == "PMN": words[lemma]["present.mp1.actual"].add(norm) if ccat_parse[1:4] == "PPN": words[lemma]["present.mp1.actual"].add(norm) if ccat_parse[1:4] == "AAN": words[lemma]["aorist.act.actual"].add(norm) if ccat_parse[1:4] == "AMN": words[lemma]["aorist.mp1.actual"].add(norm) if ccat_parse[1:4] == "APN": words[lemma]["aorist.mp2.actual"].add(norm) lexicon = Lexicon("lexicons/morphgnt.yaml") for k in sorted(words.keys(), key=collator.sort_key): PAN_generated = lexicon.generate(k, "PAN") PMN_generated = lexicon.generate(k, "PMN") PPN_generated = lexicon.generate(k, "PPN") AAN_generated = lexicon.generate(k, "AAN") AMN_generated = lexicon.generate(k, "AMN") APN_generated = lexicon.generate(k, "APN") if PAN_generated: if "present.act.actual" not in words[k]: words[k]["present.act.generated"].add(strip_length(PAN_generated)) if PMN_generated: if "present.mp1.actual" not in words[k]: words[k]["present.mp1.generated"].add(strip_length(PMN_generated))
#!/usr/bin/env python3 from pysblgnt import morphgnt_rows from characters import strip_length from utils import Counter from verbs import Lexicon lexicon = Lexicon("lexicons/morphgnt.yaml") counter = Counter() for book_num in range(1, 28): for row in morphgnt_rows(book_num): ccat_pos = row["ccat-pos"] ccat_parse = row["ccat-parse"] norm = row["norm"] lemma = row["lemma"] if ccat_pos != "V-": continue if ccat_parse[3] == "N": parse = ccat_parse[1:4] elif ccat_parse[3] == "P": parse = ccat_parse[1:4] + "." + ccat_parse[4:7] elif ccat_parse[3] == "I": parse = ccat_parse[1:4] + "." + ccat_parse[0] + ccat_parse[5] else: