示例#1
0
 def setUpClass(cls):
     super().setUpClass()
     case = features.Feature("case", "nom", "gen", "dat", "acc", "abl")
     num = features.Feature("num", "sg", "pl")
     # Ignoring gender since gender is a property of the stem rather than the
     # ending.
     noun = features.Category(case, num)
     nomsg = features.FeatureVector(noun, "case=nom", "num=sg")
     stem = paradigms.make_byte_star_except_boundary()
     slots = [(paradigms.suffix("+a", stem), nomsg),
              (paradigms.suffix("+ae", stem),
               features.FeatureVector(noun, "case=gen", "num=sg")),
              (paradigms.suffix("+ae", stem),
               features.FeatureVector(noun, "case=dat", "num=sg")),
              (paradigms.suffix("+am", stem),
               features.FeatureVector(noun, "case=acc", "num=sg")),
              (paradigms.suffix("+ā", stem),
               features.FeatureVector(noun, "case=abl", "num=sg")),
              (paradigms.suffix("+ae", stem),
               features.FeatureVector(noun, "case=nom", "num=pl")),
              (paradigms.suffix("+ārum", stem),
               features.FeatureVector(noun, "case=gen", "num=pl")),
              (paradigms.suffix("+īs", stem),
               features.FeatureVector(noun, "case=dat", "num=pl")),
              (paradigms.suffix("+ās", stem),
               features.FeatureVector(noun, "case=acc", "num=pl")),
              (paradigms.suffix("+īs", stem),
               features.FeatureVector(noun, "case=abl", "num=pl"))]
     cls.paradigm = paradigms.Paradigm(
         category=noun,
         slots=slots,
         lemma_feature_vector=nomsg,
         stems=["aqu", "bell", "caus", "cicād", "mens", "naut", "puell"])
示例#2
0
 def setUpClass(cls):
     super().setUpClass()
     case = features.Feature("case", "nom", "gen", "dat", "acc", "abl")
     number = features.Feature("num", "sg", "pl")
     # Ignoring gender since gender is a property of the stem rather than the
     # ending.
     noun = features.Category(case, number)
     nomsg = features.FeatureVector(noun, "case=nom", "num=sg")
     stem = paradigms.make_byte_star_except_boundary()
     slots = [(paradigms.suffix("+a", stem), nomsg),
              (paradigms.suffix("+ae", stem),
               features.FeatureVector(noun, "case=gen", "num=sg")),
              (paradigms.suffix("+ae", stem),
               features.FeatureVector(noun, "case=dat", "num=sg")),
              (paradigms.suffix("+am", stem),
               features.FeatureVector(noun, "case=acc", "num=sg")),
              (paradigms.suffix("+ā", stem),
               features.FeatureVector(noun, "case=abl", "num=sg")),
              (paradigms.suffix("+ae", stem),
               features.FeatureVector(noun, "case=nom", "num=pl")),
              (paradigms.suffix("+ārum", stem),
               features.FeatureVector(noun, "case=gen", "num=pl")),
              (paradigms.suffix("+īs", stem),
               features.FeatureVector(noun, "case=dat", "num=pl")),
              (paradigms.suffix("+ās", stem),
               features.FeatureVector(noun, "case=acc", "num=pl")),
              (paradigms.suffix("+īs", stem),
               features.FeatureVector(noun, "case=abl", "num=pl"))]
     v = pynini.union("a", "i", "e", "o", "u")
     c = pynini.union("b", "c", "d", "f", "g", "h", "l", "m", "n", "p", "q",
                      "r", "s", "t")
     cls.paradigm = paradigms.Paradigm(category=noun,
                                       slots=slots,
                                       lemma_feature_vector=nomsg,
                                       stems=[(v | c).closure(1)])
示例#3
0
 def setUpClass(cls):
     super().setUpClass()
     case = features.Feature("case", "nom", "gen", "dat", "acc", "abl")
     number = features.Feature("num", "sg", "pl")
     cls.noun = features.Category(case, number)
     cls.noun = features.Category(case, number)
     nomsg = features.FeatureVector(cls.noun, "case=nom", "num=sg")
     stem = paradigms.make_byte_star_except_boundary()
     slots = [(paradigms.suffix("+s", stem), nomsg),
              (paradigms.suffix("+is", stem),
               features.FeatureVector(cls.noun, "case=gen", "num=sg")),
              (paradigms.suffix("+ī", stem),
               features.FeatureVector(cls.noun, "case=dat", "num=sg")),
              (paradigms.suffix("+em", stem),
               features.FeatureVector(cls.noun, "case=acc", "num=sg")),
              (paradigms.suffix("+e", stem),
               features.FeatureVector(cls.noun, "case=abl", "num=sg")),
              (paradigms.suffix("+ēs", stem),
               features.FeatureVector(cls.noun, "case=nom", "num=pl")),
              (paradigms.suffix("+um", stem),
               features.FeatureVector(cls.noun, "case=gen", "num=pl")),
              (paradigms.suffix("+ibus", stem),
               features.FeatureVector(cls.noun, "case=dat", "num=pl")),
              (paradigms.suffix("+ēs", stem),
               features.FeatureVector(cls.noun, "case=acc", "num=pl")),
              (paradigms.suffix("+ibus", stem),
               features.FeatureVector(cls.noun, "case=abl", "num=pl"))]
     velar = pynini.union("c", "ct", "g")
     v = pynini.union("a", "i", "ī", "e", "ē", "u")
     # Builds way more stem IDs than we need to show that that this is efficient.
     stem_ids = paradigms.build_stem_ids(1000, 101000)
     rules = [
         # c, ct, g -> x in nominative singular. Note the spelling of "cs" as "x"
         # in Latin breaks the segmentation. One might also consider representing
         # this as "c+s".
         pynini.cdrewrite(
             pynini.cross(velar, "x") + stem_ids + pynini.cross("+s", "+"),
             "", "", cls.noun.sigma_star),
         # s -> r / V __ V.
         pynini.cdrewrite(pynini.cross("s", "r"), "", stem_ids + "+" + v,
                          cls.noun.sigma_star),
         # s+s -> s.
         pynini.cdrewrite(pynini.cross("s", ""), "s" + stem_ids + "+", "",
                          cls.noun.sigma_star)
     ]
     cls.paradigm = paradigms.Paradigm(
         category=cls.noun,
         slots=slots,
         lemma_feature_vector=nomsg,
         stems=["noct__1000__", "ōs__1001__", "pac__1002__", "rēg__1003__"],
         rules=rules)
     cls.delete_stem_ids = pynini.cdrewrite(pynutil.delete(stem_ids), "",
                                            "", cls.noun.sigma_star)
示例#4
0
 def setUpClass(cls):
     super().setUpClass()
     case = features.Feature("case", "nom", "gen", "dat", "acc", "abl")
     num = features.Feature("num", "sg", "pl")
     # Ignoring gender since gender is a property of the stem rather than the
     # ending.
     noun = features.Category(case, num)
     nomsg = features.FeatureVector(noun, "case=nom", "num=sg")
     stem = paradigms.make_byte_star_except_boundary()
     slots = [(paradigms.suffix("+s", stem), nomsg),
              (paradigms.suffix("+is", stem),
               features.FeatureVector(noun, "case=gen", "num=sg")),
              (paradigms.suffix("+ī", stem),
               features.FeatureVector(noun, "case=dat", "num=sg")),
              (paradigms.suffix("+em", stem),
               features.FeatureVector(noun, "case=acc", "num=sg")),
              (paradigms.suffix("+e", stem),
               features.FeatureVector(noun, "case=abl", "num=sg")),
              (paradigms.suffix("+ēs", stem),
               features.FeatureVector(noun, "case=nom", "num=pl")),
              (paradigms.suffix("+um", stem),
               features.FeatureVector(noun, "case=gen", "num=pl")),
              (paradigms.suffix("+ibus", stem),
               features.FeatureVector(noun, "case=dat", "num=pl")),
              (paradigms.suffix("+ēs", stem),
               features.FeatureVector(noun, "case=acc", "num=pl")),
              (paradigms.suffix("+ibus", stem),
               features.FeatureVector(noun, "case=abl", "num=pl"))]
     velar = pynini.union("c", "ct", "g")
     v = pynini.union("a", "i", "ī", "e", "ē", "u")
     rules = [
         # c, ct, g -> x in nominative singular. Note the spelling of "cs" as "x"
         # in Latin breaks the segmentation. One might also consider representing
         # this as "c+s".
         pynini.cdrewrite(pynini.cross(velar + "+s", "x+"), "", "",
                          noun.sigma_star),
         # Rhotacize /s/ prevocalically: a non-Gorman theory of this alternation.
         pynini.cdrewrite(pynini.cross("s", "r"), "", "+" + v,
                          noun.sigma_star),
         # s+s -> s.
         pynini.cdrewrite(pynini.cross("s+s", "s+"), "", "",
                          noun.sigma_star)
     ]
     cls.paradigm = paradigms.Paradigm(category=noun,
                                       slots=slots,
                                       lemma_feature_vector=nomsg,
                                       stems=["noct", "ōs", "pac", "rēg"],
                                       rules=rules)
示例#5
0
 def setUpClass(cls):
     super().setUpClass()
     focus = features.Feature("focus", "none", "actor")
     verb = features.Category(focus)
     none = features.FeatureVector(verb, "focus=none")
     v = pynini.union("a", "e", "i", "o", "u")
     c = pynini.union("b", "d", "f", "g", "h", "k", "l", "ly", "k", "m",
                      "n", "ng", "ny", "p", "r", "s", "t", "ts", "w", "y",
                      "z")
     stem = paradigms.make_byte_star_except_boundary()
     um = pynini.union(c.plus + pynutil.insert("+um+") + v + stem,
                       pynutil.insert("um+") + v + stem)
     slots = [(stem, none),
              (um, features.FeatureVector(verb, "focus=actor"))]
     cls.paradigm = paradigms.Paradigm(
         category=verb,
         slots=slots,
         lemma_feature_vector=none,
         stems=["bilang", "ibig", "lipad", "kopya", "punta"])
示例#6
0
 def testFeatureVector(self):
     fv = features.FeatureVector(self.noun, "num=sg", "case=dat")
     fvm = fv.acceptor @ self.fm
     self.assertSameElements(
         fvm.paths().ostrings(),
         ("[case=dat][gen=fem][num=sg]", "[case=dat][gen=mas][num=sg]",
          "[case=dat][gen=neu][num=sg]"))
     fv = features.FeatureVector(self.noun, "gen=fem", "case=nom")
     fvm = fv.acceptor @ self.fm
     self.assertSameElements(fvm.paths().ostrings(), (
         "[case=nom][gen=fem][num=sg]",
         "[case=nom][gen=fem][num=pl]",
     ))
     # Checks that we fail appropriately when we pass an illegal combo.
     with self.assertRaises(features.Error):
         fv = features.FeatureVector(self.noun, "gen=acc", "case=nom")
     with self.assertRaises(features.Error):
         fv = features.FeatureVector(self.noun, "gen=foofoo", "case=nom")
     with self.assertRaises(features.Error):
         fv = features.FeatureVector(self.noun, "wiggywoggy=fem",
                                     "case=nom")
示例#7
0
 def setUpClass(cls):
     super().setUpClass()
     # Not clear "aspect" is exactly the right concept.
     aspect = features.Feature("aspect", "root", "dubitative", "gerundial",
                               "durative")
     verb = features.Category(aspect)
     root = features.FeatureVector(verb, "aspect=root")
     stem = paradigms.make_byte_star_except_boundary()
     # Naming these with short names for space reasons.
     vowels = ("a", "i", "o", "u")
     v = pynini.union(*vowels)
     c = pynini.union("c", "m", "h", "l", "y", "k", "ʔ", "d", "n", "w", "t")
     # First template: apply Procrustean transformation to CVCC^?.
     cvcc = (c + v + pynutil.delete(v).ques + c + pynutil.delete(v).star +
             c.ques).optimize()
     # Second template: apply Procrustean transformation to CVCVVC^?. The
     # CVCVVC^? case involves copying vowels, which is most easily achieved by
     # iterating over the vowels in the construction.
     cvcvvc = pynini.Fst()
     for v in vowels:
         cvcvvc.union(c + v + pynutil.delete(v).ques + c +
                      pynutil.delete(v).star + pynutil.insert(v + v) +
                      c.ques)
     cvcvvc.optimize()
     slots = [(stem, root),
              (paradigms.suffix("+al", stem),
               features.FeatureVector(verb, "aspect=dubitative")),
              (paradigms.suffix("+inay", stem @ cvcc),
               features.FeatureVector(verb, "aspect=gerundial")),
              (paradigms.suffix("+ʔaa", stem @ cvcvvc),
               features.FeatureVector(verb, "aspect=durative"))]
     cls.paradigm = paradigms.Paradigm(
         category=verb,
         slots=slots,
         lemma_feature_vector=root,
         stems=["caw", "cuum", "hoyoo", "diiyl", "ʔilk", "hiwiit"])
示例#8
0
 def testUnification(self):
     fv = features.FeatureVector(self.noun, "num=sg", "case=dat")
     fv_other = features.FeatureVector(self.noun, "num=sg", "case=dat")
     # Identical should unify to the same.
     self.assertEqual(fv.unify(fv_other), fv)
     # Feature clash should fail.
     fv_other = features.FeatureVector(self.noun, "num=sg", "case=nom")
     self.assertFalse(fv.unify(fv_other))
     fv_orig = fv
     fv = features.FeatureVector(self.noun, "num=sg")
     fv_other = features.FeatureVector(self.noun, "case=dat")
     # Free values for features unify with any particular specification.
     self.assertEqual(fv.unify(fv_other), fv_orig)
     some_other_category = features.Category(self.number, self.gender)
     # Mismatched categories should fail.
     fv_other = features.FeatureVector(some_other_category, "num=sg")
     self.assertFalse(fv.unify(fv_other))
示例#9
0
 def setUpClass(cls):
     super().setUpClass()
     case = features.Feature("case", "nom", "gen", "dat", "acc", "ins",
                             "prp")
     num = features.Feature("num", "sg", "pl")
     noun = features.Category(case, num)
     stem = paradigms.make_byte_star_except_boundary()
     nomsg = features.FeatureVector(noun, "case=nom", "num=sg")
     # Accent A has stem stress.
     slots_a = [
         (stem, nomsg),
         (paradigms.suffix("+a", stem),
          features.FeatureVector(noun, "case=gen", "num=sg")),
         (paradigms.suffix("+u", stem),
          features.FeatureVector(noun, "case=dat", "num=sg")),
         (stem, features.FeatureVector(noun, "case=acc", "num=sg")),
         (paradigms.suffix("+om", stem),
          features.FeatureVector(noun, "case=ins", "num=sg")),
         (paradigms.suffix("+e", stem),
          features.FeatureVector(noun, "case=prp", "num=sg")),
         (paradigms.suffix("+y", stem),
          features.FeatureVector(noun, "case=nom", "num=pl")),
         (paradigms.suffix("+ov", stem),
          features.FeatureVector(noun, "case=gen", "num=pl")),
         (paradigms.suffix("+am", stem),
          features.FeatureVector(noun, "case=dat", "num=pl")),
         (paradigms.suffix("+y", stem),
          features.FeatureVector(noun, "case=acc", "num=pl")),
         (paradigms.suffix("+ami", stem),
          features.FeatureVector(noun, "case=ins", "num=pl")),
         (paradigms.suffix("+ax", stem),
          features.FeatureVector(noun, "case=prp", "num=pl")),
     ]
     cls.paradigm_a = paradigms.Paradigm(
         category=noun,
         name="hard stem masculine accent A",
         slots=slots_a,
         lemma_feature_vector=nomsg,
         stems=["grádus", "žurnál"],
     )
     # Accent B has stress-shift to the desinence except in the nom./acc.
     deaccentuation_map = pynini.string_map([
         ("á", "a"),
         ("é", "e"),
         ("í", "i"),
         ("ó", "o"),
         ("ú", "u"),
         ("ý", "y"),
     ])
     acc_v = pynini.project(deaccentuation_map, "input")
     deaccentuation = pynini.cdrewrite(deaccentuation_map, "",
                                       noun.sigma_star + acc_v,
                                       noun.sigma_star).optimize()
     slots_b = [
         (paradigms.suffix("+á", stem),
          features.FeatureVector(noun, "case=gen", "num=sg")),
         (paradigms.suffix("+ú", stem),
          features.FeatureVector(noun, "case=dat", "num=sg")),
         (paradigms.suffix("+óm", stem),
          features.FeatureVector(noun, "case=ins", "num=sg")),
         (paradigms.suffix("+é", stem),
          features.FeatureVector(noun, "case=prp", "num=sg")),
         (paradigms.suffix("+ý", stem),
          features.FeatureVector(noun, "case=nom", "num=pl")),
         (paradigms.suffix("+óv", stem),
          features.FeatureVector(noun, "case=gen", "num=pl")),
         (paradigms.suffix("+ám", stem),
          features.FeatureVector(noun, "case=dat", "num=pl")),
         (paradigms.suffix("+ý", stem),
          features.FeatureVector(noun, "case=acc", "num=pl")),
         (paradigms.suffix("+ámi", stem),
          features.FeatureVector(noun, "case=ins", "num=pl")),
         (paradigms.suffix("+áx", stem),
          features.FeatureVector(noun, "case=prp", "num=pl")),
     ]
     cls.paradigm_b = paradigms.Paradigm(
         category=noun,
         name="hard stem masculine accent B",
         slots=slots_b,
         parent_paradigm=cls.paradigm_a,
         lemma_feature_vector=nomsg,
         stems=["górb", "stól"],
         rules=[deaccentuation])