Пример #1
0
def test_generation(test_file, lexicon_file):

    lexicon = Lexicon(lexicon_file)

    counter = Counter()

    with open(test_file) as f:
        for test in yaml.load(f):
            lemma = test.pop("lemma")
            location = test.pop("location", "")
            for parse, form in test.items():
                predicted = lexicon.generate(lemma, parse, context=location)
                if predicted is None:
                    counter.fail("didn't know how to work out {} {} {}".format(lemma, parse, form))
                elif strip_length(form) == strip_length(predicted):
                    counter.success()
                    continue
                elif strip_length(form) not in [strip_length(p) for p in predicted.split("/")]:
                    counter.fail("{} {} got {} instead of {} in {}".format(lemma, parse, predicted, form, location))
                else:
                    counter.skip("{} {} {} {} {}".format(lemma, parse, form, predicted, location))

    counter.results()
Пример #2
0
        ccat_parse = row["ccat-parse"]
        norm = row["norm"]
        lemma = row["lemma"]
        if ccat_pos != "V-":
            continue

        if ccat_parse[3] == "N":
            parse = ccat_parse[1:4]
        elif ccat_parse[3] == "P":
            parse = ccat_parse[1:4] + "." + ccat_parse[4:7]
        elif ccat_parse[3] == "I":
            parse = ccat_parse[1:4] + "." + ccat_parse[0] + ccat_parse[5]
        else:
            continue

        predicted = lexicon.generate(lemma, parse)
        if predicted is None:
            counter.fail("didn't know how to work out {} {} {}".format(
                lemma, parse, norm))
        elif strip_length(norm) == strip_length(predicted):
            counter.success()
            continue
        elif strip_length(norm) not in [
                strip_length(p) for p in predicted.split("/")
        ]:
            counter.fail("{} {} got {} instead of {} in {}".format(
                lemma, parse, predicted, norm, row["bcv"]))
        else:
            counter.skip("{} {} {} {} {}".format(lemma, parse, norm, predicted,
                                                 row["bcv"]))
Пример #3
0
            words[lemma]["present.mp1.actual"].add(norm)

        if ccat_parse[1:4] == "AAN":
            words[lemma]["aorist.act.actual"].add(norm)

        if ccat_parse[1:4] == "AMN":
            words[lemma]["aorist.mp1.actual"].add(norm)

        if ccat_parse[1:4] == "APN":
            words[lemma]["aorist.mp2.actual"].add(norm)


lexicon = Lexicon("lexicons/morphgnt.yaml")

for k in sorted(words.keys(), key=collator.sort_key):
    PAN_generated = lexicon.generate(k, "PAN")
    PMN_generated = lexicon.generate(k, "PMN")
    PPN_generated = lexicon.generate(k, "PPN")
    AAN_generated = lexicon.generate(k, "AAN")
    AMN_generated = lexicon.generate(k, "AMN")
    APN_generated = lexicon.generate(k, "APN")

    if PAN_generated:
        if "present.act.actual" not in words[k]:
            words[k]["present.act.generated"].add(strip_length(PAN_generated))
    if PMN_generated:
        if "present.mp1.actual" not in words[k]:
            words[k]["present.mp1.generated"].add(strip_length(PMN_generated))
    if PPN_generated:
        if "present.mp1.actual" not in words[k]:
            words[k]["present.mp1.generated"].add(strip_length(PPN_generated))
Пример #4
0
        if ccat_parse[1:4] == "PPN":
            words[lemma]["present.mp1.actual"].add(norm)

        if ccat_parse[1:4] == "AAN":
            words[lemma]["aorist.act.actual"].add(norm)

        if ccat_parse[1:4] == "AMN":
            words[lemma]["aorist.mp1.actual"].add(norm)

        if ccat_parse[1:4] == "APN":
            words[lemma]["aorist.mp2.actual"].add(norm)

lexicon = Lexicon("lexicons/morphgnt.yaml")

for k in sorted(words.keys(), key=collator.sort_key):
    PAN_generated = lexicon.generate(k, "PAN")
    PMN_generated = lexicon.generate(k, "PMN")
    PPN_generated = lexicon.generate(k, "PPN")
    AAN_generated = lexicon.generate(k, "AAN")
    AMN_generated = lexicon.generate(k, "AMN")
    APN_generated = lexicon.generate(k, "APN")

    if PAN_generated:
        if "present.act.actual" not in words[k]:
            words[k]["present.act.generated"].add(strip_length(PAN_generated))
    if PMN_generated:
        if "present.mp1.actual" not in words[k]:
            words[k]["present.mp1.generated"].add(strip_length(PMN_generated))
    if PPN_generated:
        if "present.mp1.actual" not in words[k]:
            words[k]["present.mp1.generated"].add(strip_length(PPN_generated))
for book_num in range(1, 28):
    for row in morphgnt_rows(book_num):
        ccat_pos = row["ccat-pos"]
        ccat_parse = row["ccat-parse"]
        norm = row["norm"]
        lemma = row["lemma"]
        if ccat_pos != "V-":
            continue

        if ccat_parse[3] == "N":
            parse = ccat_parse[1:4]
        elif ccat_parse[3] == "P":
            parse = ccat_parse[1:4] + "." + ccat_parse[4:7]
        elif ccat_parse[3] == "I":
            parse = ccat_parse[1:4] + "." + ccat_parse[0] + ccat_parse[5]
        else:
            continue

        predicted = lexicon.generate(lemma, parse)
        if predicted is None:
            counter.fail("didn't know how to work out {} {} {}".format(lemma, parse, norm))
        elif strip_length(norm) == strip_length(predicted):
            counter.success()
            continue
        elif strip_length(norm) not in [strip_length(p) for p in predicted.split("/")]:
            counter.fail("{} {} got {} instead of {} in {}".format(lemma, parse, predicted, norm, row["bcv"]))
        else:
            counter.skip("{} {} {} {} {}".format(lemma, parse, norm, predicted, row["bcv"]))

counter.results()