Пример #1
0
                "late-pluperfect-singulars",
                "sigma-loss-pmd.2s",
                "HGrk",
            ])

            c = form.count("/") + 1
            stem = ginflexion.find_stems(lemma, key, tags)
            generated = ginflexion.generate(lemma, key, tags)

            if strip_length(form) in [
                    strip_length(w) for w in sorted(generated)
            ]:
                correct = "✓"
                stem_guess = None
            else:
                correct = "✕"
                incorrect_count += 1
                possible_stems = [(key_to_part(a), b, a)
                                  for a, b in ginflexion.possible_stems(form)]
                likely_stems = [(key_to_part(a), b)
                                for a, b in ginflexion.possible_stems(
                                    form, "^" + key + "$")]
                possible_parses = ginflexion.parse(form)

            if debug or correct == "✕":
                output_item(lemma, key, key_to_part(key), form, None, stem,
                            possible_stems, likely_stems, possible_parses,
                            generated, correct)

print("{}/{} incorrect".format(incorrect_count, total_count))
            if stem:
                stem_guess = None
            else:
                stem_guess = [
                    stem for key, stem in ginflexion.possible_stems(
                        form, "^" + key + "$")
                ]

            if [strip_length(w) for w in sorted(generated)] == \
                    [strip_length(w) for w in sorted(form.split("/"))]:
                correct = "✓"
            else:
                correct = "✕"
            if correct == "✕":
                if stem_guess:
                    STEM_GUESSES[lemma][key_to_part(key)].add(
                        frozenset(stem_guess))

for lemma, parts in sorted(STEM_GUESSES.items()):
    print()
    print("{}:".format(lemma))
    print("    stems:".format(lemma))
    for part, stem_sets in sorted(parts.items()):
        stem = set.intersection(*(set(s) for s in stem_sets))
        if len(stem) == 0:
            print("        {}: {}  # @0".format(part, stem_sets))
        elif len(stem) == 1:
            print("        {}: {}  # @1".format(part, stem.pop()))
        else:
            print("        {}: {}  # @m".format(part, stem))
                "sigma-loss-pmd.2s",
                "HGrk",
            ])

            c = form.count("/") + 1
            stem = ginflexion.find_stems(lemma, key, tags)
            generated = ginflexion.generate(lemma, key, tags)
            segmented_lemma = ginflexion.segmented_lemmas.get(lemma)

            if strip_length(form) in [
                    strip_length(w) for w in sorted(generated)
            ]:
                correct = "✓"
                stem_guess = None
            else:
                correct = "✕"
                incorrect_count += 1
                possible_stems = [(key_to_part(a), b, a)
                                  for a, b in ginflexion.possible_stems(form)]
                likely_stems = [(key_to_part(a), b)
                                for a, b in ginflexion.possible_stems(
                                    form, "^" + key + "$")]
                possible_parses = ginflexion.parse(form)

            if debug or correct == "✕":
                output_item(lemma, segmented_lemma, key, key_to_part(key),
                            form, None, stem, possible_stems, likely_stems,
                            possible_parses, generated, correct)

print("{}/{} incorrect".format(incorrect_count, total_count))
    if stem:
        stem_guess = None
    else:
        stem_guess = [
            stem
            for key, stem in ginflexion.possible_stems2(form, "^" + key + "$")
        ]

    if [strip_length(w) for w in sorted(generated)] == \
            [strip_length(w) for w in sorted(form.split("/"))]:
        correct = "✓"
    else:
        correct = "✕"
    if correct == "✕":
        if stem_guess:
            STEM_GUESSES[lemma][key_to_part(key)].add(frozenset(stem_guess))

for lemma, parts in sorted(STEM_GUESSES.items()):
    print()
    print("{}:".format(lemma))
    print("    stems:".format(lemma))
    for part, stem_sets in sorted(parts.items(),
                                  key=lambda x: (x[0][0], {
                                      "-": 0,
                                      "+": 1
                                  }[x[0][1]])):
        stem = set.intersection(*(set(s) for s in stem_sets))
        if len(stem) == 0:
            print("        {}: {}  # @0".format(part, stem_sets))
        elif len(stem) == 1:
            print("        {}: {}  # @1".format(part, stem.pop()))
            "HGrk",
        ])

        c = form.count("/") + 1
        stem = ginflexion.find_stems(lemma, key, tags)
        generated = ginflexion.generate(lemma, key, tags)

        if strip_length(form) in [
                strip_length(w) for w in sorted(generated)]:
            correct = "✓"
        else:
            correct = "✕"
            incorrect_count += 1
            summary_by_lemma[lemma].add(key)
            possible_stems = [
                (key_to_part(a), b, a)
                for a, b in ginflexion.possible_stems(form)
            ]
            likely_stems = [
                (key_to_part(a), b)
                for a, b in ginflexion.possible_stems(form, "^" + key + "$")
            ]
            possible_parses = []
            for plemma, pparse in ginflexion.parse(form):
                possible_parses.append((
                    plemma,
                    pparse,
                    set(ginflexion.generate(plemma, pparse, tags))
                ))

        if debug or correct == "✕":