def test_generate(stemming_file,
                  lexicon_file,
                  test_file,
                  global_tags=None,
                  debug=False):
    """
    generates all the forms in the test_file using the lexicon_file and
    stemming_file and outputs any discrepancies (or everything if debug on)
    """

    ginflexion = GreekInflexion(stemming_file, lexicon_file)

    with open(test_file) as f:
        for test in yaml.safe_load(f):
            source = test.pop("source", None)
            test.pop("test_length", False)
            lemma = test.pop("lemma")
            tags = set(test.pop("tags", []))
            if source:
                tags.update({source})
            if global_tags:
                tags.update(global_tags)

            segmented_lemma = ginflexion.segmented_lemmas.get(lemma)
            for key, form in sorted(test.items()):
                stem = ginflexion.find_stems(lemma, key, tags)
                generated = ginflexion.generate(lemma, key, tags)

                if stem:
                    stem_guess = None
                else:
                    stem_guess = [
                        stem for key, stem in ginflexion.possible_stems(
                            form, "^" + key + "$")
                    ]

                if [strip_length(w) for w in sorted(generated)] == \
                        [strip_length(w) for w in sorted(form.split("/"))]:
                    correct = "✓"
                else:
                    correct = "✕"
                if debug or correct == "✕":
                    output_item(lemma, segmented_lemma, key, None, form, None,
                                stem, stem_guess, None, None, generated,
                                correct)
示例#2
0
def test_generate(
    stemming_file, lexicon_file, test_file,
    global_tags=None, debug=False
):
    """
    generates all the forms in the test_file using the lexicon_file and
    stemming_file and outputs any discrepancies (or everything if debug on)
    """

    ginflexion = GreekInflexion(stemming_file, lexicon_file)

    with open(test_file) as f:
        for test in yaml.load(f):
            source = test.pop("source", None)
            test.pop("test_length", False)
            lemma = test.pop("lemma")
            tags = set(test.pop("tags", []))
            if source:
                tags.update({source})
            if global_tags:
                tags.update(global_tags)

            for key, form in sorted(test.items()):
                stem = ginflexion.find_stems(lemma, key, tags)
                generated = ginflexion.generate(lemma, key, tags)

                if stem:
                    stem_guess = None
                else:
                    stem_guess = [
                        stem for key, stem in
                        ginflexion.possible_stems(form, "^" + key + "$")]

                if [strip_length(w) for w in sorted(generated)] == \
                        [strip_length(w) for w in sorted(form.split("/"))]:
                    correct = "✓"
                else:
                    correct = "✕"
                if debug or correct == "✕":
                    output_item(
                        lemma, key, None, form, None, stem,
                        stem_guess, None, None, generated, correct)
def test_generate(
    stemming_file, lexicon_file, test_file,
    global_tags=None, debug=False
):
    """
    generates all the forms in the test_file using the lexicon_file and
    stemming_file and outputs any discrepancies (or everything if debug on)
    """

    ginflexion = GreekInflexion(stemming_file, lexicon_file)

    with open(test_file) as f:
        for test in yaml.load(f):
            source = test.pop("source", None)
            test.pop("test_length", False)
            lemma = test.pop("lemma")
            tags = set(test.pop("tags", []))
            if source:
                tags.update({source})
            if global_tags:
                tags.update(global_tags)

            for key, form in sorted(test.items()):
                c = form.count("/") + 1
                stem = ginflexion.find_stems(lemma, key, tags)
                generated = ginflexion.generate(lemma, key, tags)

                if [strip_length(w) for w in sorted(generated)] == \
                        [strip_length(w) for w in sorted(form.split("/"))]:
                    correct = "✓"
                else:
                    correct = "✕"
                if debug or correct == "✕":
                    print()
                    print(lemma, key, form)
                    print("stem: {}".format(stem))
                    print("generate[{}/{}{}]:".format(
                        len(generated), c, correct))
                    for generated_form, details in generated.items():
                        print("    - {}".format(generated_form))
                        for detail in details:
                            print("        {}".format(detail))
示例#4
0
                continue

            tags = set([
                "final-nu-aai.3s",
                "oida-yai3p-variant",
                "no-final-nu-yai.3s",
                "late-pluperfect-singulars",
                "sigma-loss-pmd.2s",
                "HGrk",
            ])

            c = form.count("/") + 1
            stem = ginflexion.find_stems(lemma, key, tags)
            generated = ginflexion.generate(lemma, key, tags)

            if strip_length(form) in [
                    strip_length(w) for w in sorted(generated)
            ]:
                correct = "✓"
                stem_guess = None
            else:
                correct = "✕"
                incorrect_count += 1
                possible_stems = [(key_to_part(a), b, a)
                                  for a, b in ginflexion.possible_stems(form)]
                likely_stems = [(key_to_part(a), b)
                                for a, b in ginflexion.possible_stems(
                                    form, "^" + key + "$")]
                possible_parses = ginflexion.parse(form)

            if debug or correct == "✕":
                "HGrk",
            ])

            c = form.count("/") + 1
            stem = ginflexion.find_stems(lemma, key, tags)
            generated = ginflexion.generate(lemma, key, tags)

            if stem:
                stem_guess = None
            else:
                stem_guess = [
                    stem for key, stem in ginflexion.possible_stems(
                        form, "^" + key + "$")
                ]

            if [strip_length(w) for w in sorted(generated)] == \
                    [strip_length(w) for w in sorted(form.split("/"))]:
                correct = "✓"
            else:
                correct = "✕"
            if correct == "✕":
                if stem_guess:
                    STEM_GUESSES[lemma][key_to_part(key)].add(
                        frozenset(stem_guess))

for lemma, parts in sorted(STEM_GUESSES.items()):
    print()
    print("{}:".format(lemma))
    print("    stems:".format(lemma))
    for part, stem_sets in sorted(parts.items()):
        stem = set.intersection(*(set(s) for s in stem_sets))