Exemplo n.º 1
0
def test_generate(stemming_file,
                  lexicon_file,
                  test_file,
                  global_tags=None,
                  debug=False):
    """
    generates all the forms in the test_file using the lexicon_file and
    stemming_file and outputs any discrepancies (or everything if debug on)
    """

    ginflexion = GreekInflexion(stemming_file, lexicon_file)

    with open(test_file) as f:
        for test in yaml.safe_load(f):
            source = test.pop("source", None)
            test.pop("test_length", False)
            lemma = test.pop("lemma")
            tags = set(test.pop("tags", []))
            if source:
                tags.update({source})
            if global_tags:
                tags.update(global_tags)

            segmented_lemma = ginflexion.segmented_lemmas.get(lemma)
            for key, form in sorted(test.items()):
                stem = ginflexion.find_stems(lemma, key, tags)
                generated = ginflexion.generate(lemma, key, tags)

                if stem:
                    stem_guess = None
                else:
                    stem_guess = [
                        stem for key, stem in ginflexion.possible_stems(
                            form, "^" + key + "$")
                    ]

                if [strip_length(w) for w in sorted(generated)] == \
                        [strip_length(w) for w in sorted(form.split("/"))]:
                    correct = "✓"
                else:
                    correct = "✕"
                if debug or correct == "✕":
                    output_item(lemma, segmented_lemma, key, None, form, None,
                                stem, stem_guess, None, None, generated,
                                correct)
Exemplo n.º 2
0
def test_generate(
    stemming_file, lexicon_file, test_file,
    global_tags=None, debug=False
):
    """
    generates all the forms in the test_file using the lexicon_file and
    stemming_file and outputs any discrepancies (or everything if debug on)
    """

    ginflexion = GreekInflexion(stemming_file, lexicon_file)

    with open(test_file) as f:
        for test in yaml.load(f):
            source = test.pop("source", None)
            test.pop("test_length", False)
            lemma = test.pop("lemma")
            tags = set(test.pop("tags", []))
            if source:
                tags.update({source})
            if global_tags:
                tags.update(global_tags)

            for key, form in sorted(test.items()):
                stem = ginflexion.find_stems(lemma, key, tags)
                generated = ginflexion.generate(lemma, key, tags)

                if stem:
                    stem_guess = None
                else:
                    stem_guess = [
                        stem for key, stem in
                        ginflexion.possible_stems(form, "^" + key + "$")]

                if [strip_length(w) for w in sorted(generated)] == \
                        [strip_length(w) for w in sorted(form.split("/"))]:
                    correct = "✓"
                else:
                    correct = "✕"
                if debug or correct == "✕":
                    output_item(
                        lemma, key, None, form, None, stem,
                        stem_guess, None, None, generated, correct)
Exemplo n.º 3
0
def test_generate(
    stemming_file, lexicon_file, test_file,
    global_tags=None, debug=False
):
    """
    generates all the forms in the test_file using the lexicon_file and
    stemming_file and outputs any discrepancies (or everything if debug on)
    """

    ginflexion = GreekInflexion(stemming_file, lexicon_file)

    with open(test_file) as f:
        for test in yaml.load(f):
            source = test.pop("source", None)
            test.pop("test_length", False)
            lemma = test.pop("lemma")
            tags = set(test.pop("tags", []))
            if source:
                tags.update({source})
            if global_tags:
                tags.update(global_tags)

            for key, form in sorted(test.items()):
                c = form.count("/") + 1
                stem = ginflexion.find_stems(lemma, key, tags)
                generated = ginflexion.generate(lemma, key, tags)

                if [strip_length(w) for w in sorted(generated)] == \
                        [strip_length(w) for w in sorted(form.split("/"))]:
                    correct = "✓"
                else:
                    correct = "✕"
                if debug or correct == "✕":
                    print()
                    print(lemma, key, form)
                    print("stem: {}".format(stem))
                    print("generate[{}/{}{}]:".format(
                        len(generated), c, correct))
                    for generated_form, details in generated.items():
                        print("    - {}".format(generated_form))
                        for detail in details:
                            print("        {}".format(detail))
Exemplo n.º 4
0
argparser.add_argument("--lexicon",
                       dest="lexicon",
                       default="STEM_DATA/morphgnt_lexicon.yaml",
                       help="path to stem lexicon file "
                       "(defaults to morphgnt_lexicon.yaml)")

argparser.add_argument("--stemming",
                       dest="stemming",
                       default="stemming.yaml",
                       help="path to stemming rules file "
                       "(defaults to stemming.yaml)")

args = argparser.parse_args()

ginflexion = GreekInflexion(args.stemming, args.lexicon)

debug = False

incorrect_count = 0
total_count = 0

IGNORE_LIST = [
    "κουμ",
    "εφφαθα",
    "σαβαχθάνι",
    "θά",
]

for book_num in args.books:
    for row in morphgnt_rows(book_num):
argparser.add_argument("--lexicon",
                       dest="lexicon",
                       default="STEM_DATA/morphgnt_lexicon.yaml",
                       help="path to initial stem lexicon file "
                       "(defaults to morphgnt_lexicon.yaml)")

argparser.add_argument("--stemming",
                       dest="stemming",
                       default="stemming.yaml",
                       help="path to stemming rules file "
                       "(defaults to stemming.yaml)")

args = argparser.parse_args()

ginflexion = GreekInflexion(args.stemming, args.lexicon, strip_length=True)

STEM_GUESSES = defaultdict(lambda: defaultdict(set))

for book_num in args.books:
    for row in morphgnt_rows(book_num):
        b, c, v = bcv_tuple(row["bcv"])
        if row["ccat-pos"] == "V-":
            lemma = row["lemma"]
            key = convert_parse(row["ccat-parse"])
            form = row["norm"]

            tags = set([
                "final-nu-aai.3s",
                "oida-yai3p-variant",
                "no-final-nu-yai.3s",
#!/usr/bin/env python3

from collections import defaultdict

from accent import strip_length
from greek_inflexion import GreekInflexion
from morphgnt_utils import key_to_part
from normalise import convert as norm_convert
from lxxmorph_utils import get_words, convert_parse, trim_multiples

ginflexion = GreekInflexion("stemming.yaml",
                            "STEM_DATA/lxx_lexicon.yaml",
                            strip_length=True)

LXX_FILENAME = "lxxmorph/12.1Sam.mlxx"

STEM_GUESSES = defaultdict(lambda: defaultdict(set))

for row in get_words(LXX_FILENAME):
    form = row["word"]
    preverb = row["preverb"]
    lemma = row["lemma"]
    key = convert_parse(row["parse"])
    if preverb:
        lemma = "+".join(preverb.split()) + "++" + lemma

    form = norm_convert(form, lemma, key)

    tags = set([
        "final-nu-aai.3s",
        "alt-apo-pl",
#!/usr/bin/env python3

from collections import defaultdict

from accent import strip_length
from greek_inflexion import GreekInflexion
from homer_utils import key_to_part, trim_multiples

ginflexion = GreekInflexion("stemming.yaml", "STEM_DATA/homer_lexicon.yaml")

STEM_GUESSES = defaultdict(lambda: defaultdict(set))

with open("homer-data/verbs.tsv") as f:
    for row in f:
        lemma, key, form = row.strip().split("\t")

        tags = set([
            "fixed-final-nu-aai.3s",
            "no-final-nu-aai.3s",
            "no-final-nu-aao.3s",
            "no-final-nu-fai.3p",
            "no-final-nu-pai.3p",
            "no-final-nu-iai.3s",
            "no-final-nu-xai.3s",
            "no-final-nu-xai.3p",
            "no-final-nu-yai.3s",
            "no-final-nu-aps.3p",
            "no-final-nu-pai.3s",
            "no-final-nu-aas.3p",
            "no-final-nu-xas.3p",
            "no-sigma-loss-imi.2s",
Exemplo n.º 8
0
from accent import strip_length
from greek_inflexion import GreekInflexion
from test_generate import output_item
from homer_utils import key_to_part


debug = False

incorrect_count = 0
total_count = 0

summary_by_lemma = defaultdict(set)

ginflexion = GreekInflexion(
    "stemming.yaml", "STEM_DATA/homer_lexicon.yaml"
)

first = True

FILENAME = "homer-data/paradigms.tsv"


with open(FILENAME) as f:
    for row in f:
        total_count += 1

        lemma, key, form = row.strip().split()

        tags = set([
            "fixed-final-nu-aai.3s",
Exemplo n.º 9
0
 def setUp(self):
     self.inflexion = GreekInflexion("stemming.yaml",
                                     "STEM_DATA/pratt_lexicon.yaml")
Exemplo n.º 10
0
class InflexionTest(unittest.TestCase):
    def setUp(self):
        self.inflexion = GreekInflexion("stemming.yaml",
                                        "STEM_DATA/pratt_lexicon.yaml")

    def test_generate(self):
        self.inflexion.generate('λύω', 'AAI.1S')
        # @@@

    def test_find_stems(self):
        self.assertEqual(self.inflexion.find_stems('λύω', 'AAI.1S'), {'ἐλυσ'})

    def test_parse1(self):
        self.assertEqual(self.inflexion.parse('ἔλυσα'), {('λύω', 'AAI.1S')})

    def test_parse2(self):
        self.assertEqual(self.inflexion.parse('ποιοῦμαι'), set())

    def test_possible_stems1(self):
        self.assertEqual(sorted(self.inflexion.possible_stems('ποιοῦμαι')), [
            ('AAN', 'ποιουμ'),
            ('AAO.3S', 'ποιουμ'),
            ('AMD.2S', 'ποιουμ'),
            ('FMI.1S', 'ποι{contract}'),
            ('PMI.1S', 'ποιε'),
            ('PMI.1S', 'ποιο'),
            ('XMI.1S', 'ποιου'),
            ('ZMI.1S', 'ποι{contract}'),
        ])

    def test_possible_stems2(self):
        self.assertEqual(
            sorted(self.inflexion.possible_stems('ποιοῦμαι', '.+1S$')), [
                ('FMI.1S', 'ποι{contract}'),
                ('PMI.1S', 'ποιε'),
                ('PMI.1S', 'ποιο'),
                ('XMI.1S', 'ποιου'),
                ('ZMI.1S', 'ποι{contract}'),
            ])

    def test_conjugate(self):
        self.inflexion.conjugate("λύω", "PAI", "AAI", tags={"final-nu-aai.3s"})
        # @@@

    def test_decline(self):
        self.inflexion.decline("λύω", "PAP")
Exemplo n.º 11
0
 def setUp(self):
     self.inflexion = GreekInflexion("stemming.yaml",
                                     "test_data/pratt_lexicon.yaml")
Exemplo n.º 12
0
#!/usr/bin/env python3

import sys

from greek_inflexion import GreekInflexion

mi = GreekInflexion("stemming.yaml", "STEM_DATA/morphgnt_lexicon.yaml")


incorrect_count = 0


def test(ref, inflexion, lemma, key, expected):
    global incorrect_count
    result = set(inflexion.generate(lemma, key))
    if result != expected:
        print(f"failed {ref} {lemma} {key} {expected} (got {result}))")
        incorrect_count += 1


test("#3", mi, "ἀνίστημι", "AMD.2S", {"ἀνάστησαι"})
test("#3", mi, "ἀνίστημι", "AMD.3S", {"ἀναστησάσθω"})
test("#3", mi, "ἀνίστημι", "AMD.2P", {"ἀναστήσασθε"})
test("#3", mi, "ἀνίστημι", "AMD.3P", {"ἀναστησάσθων"})

test("#29", mi, "δίδωμι", "PAP.DPF", {"διδούσαις"})

test("#30", mi, "τίθημι", "AAS.3P", {"θῶσι(ν)", "θήσωσι(ν)"})
test("#30", mi, "τίθημι", "AMP.APF", {"θεμένᾱς", "θησαμένᾱς"})
test("#30", mi, "τίθημι", "AMP.APM", {"θεμένους", "θησαμένους"})
test("#30", mi, "τίθημι", "AMP.APN", {"θέμενα", "θησάμενα"})
Exemplo n.º 13
0
from greek_inflexion import GreekInflexion
import paradigm_tools as pu

inflexion = GreekInflexion('stemming.yaml', 'STEM_DATA/pratt_lexicon.yaml')

labels = pu.load_labels("labels.yaml", "el")
pu.decline_html("λύω", "PAP")
pu.decline_md("λύω", "PAP")
pu.conjugate_html("λύω", "PAD", "AAD", tags={"final-nu-aai.3s"}, merge_paradigms=False)
pu.conjugate_html("λύω", "PAI", "AAI", tags={"final-nu-aai.3s"})
pu.conjugate_html("λύω", "PAI", "PMI", "FAI", "FMI", tags={"final-nu-aai.3s"})
pu.conjugate_html("λύω", "PAI", "PMI", tags={"final-nu-aai.3s"}, merge_paradigms=False)
pu.layout_merged_verb_paradigm_html([["1", "2", "3", "4", "5", "6"]], ["Random"], labels)
pu.layout_merged_verb_paradigm_md([["1", "2", "3", "4", "5", "6"]], ["Random"], labels)
pu.layout_non_merged_verb_paradigm_md(["1", "2", "3", "4", "5", "6"], "Random", labels)
pu.conjugate_md("λύω", "PAI", "PMI", tags={"final-nu-aai.3s"}, merge_paradigms=True)
pu.conjugate_md("λύω", "PAI", "PMI", tags={"final-nu-aai.3s"}, merge_paradigms=False)
Exemplo n.º 14
0
 def setUp(self):
     self.inflexion = GreekInflexion(
         "stemming.yaml",
         "test_data/pratt_lexicon.yaml"
     )
Exemplo n.º 15
0
class InflexionTest(unittest.TestCase):

    def setUp(self):
        self.inflexion = GreekInflexion(
            "stemming.yaml",
            "test_data/pratt_lexicon.yaml"
        )

    def test_generate(self):
        self.inflexion.generate('λύω', 'AAI.1S')
        # @@@

    def test_find_stems(self):
        self.assertEqual(
            self.inflexion.find_stems('λύω', 'AAI.1S'),
            {'ἐλυσ'}
        )

    def test_parse1(self):
        self.assertEqual(
            self.inflexion.parse('ἔλυσα'),
            {('λύω', 'AAI.1S')}
        )

    def test_parse2(self):
        self.assertEqual(
            self.inflexion.parse('ποιοῦμαι'),
            set()
        )

    def test_possible_stems1(self):
        self.assertEqual(
            sorted(self.inflexion.possible_stems('ποιοῦμαι')),
            [
                ('AAN', 'ποιουμ'),
                ('AAO.3S', 'ποιουμ'),
                ('AMD.2S', 'ποιουμ'),
                ('FMI.1S', 'ποι{contract}'),
                ('PMI.1S', 'ποιε'),
                ('PMI.1S', 'ποιο'),
                ('XMI.1S', 'ποιου')
            ]
        )

    def test_possible_stems2(self):
        self.assertEqual(
            sorted(self.inflexion.possible_stems('ποιοῦμαι', '.+1S$')),
            [
                ('FMI.1S', 'ποι{contract}'),
                ('PMI.1S', 'ποιε'),
                ('PMI.1S', 'ποιο'),
                ('XMI.1S', 'ποιου')
            ]
        )

    def test_conjugate(self):
        self.inflexion.conjugate(
            "λύω", "PAI", "AAI", tags={"final-nu-aai.3s"}
        )
        # @@@

    def test_decline(self):
        self.inflexion.decline(
            "λύω", "PAP"
        )