Exemplo n.º 1
0
 def setUp(self):
     self.l = Lemmatizer()
Exemplo n.º 2
0
class TestLemmatizer(unittest.TestCase):
    def setUp(self):
        self.l = Lemmatizer()

    def tearDown(self):
        self.l = None

    def test_bi(self):
        self.assertEqual(self.l.lemmatize("tha", "V-p"), "bi")
        self.assertEqual(self.l.lemmatize("thà", "V-p"), "bi")
        self.assertEqual(self.l.lemmatize("Tha", "V-p"), "bi")
        self.assertEqual(self.l.lemmatize("th'", "V-p"), "bi")
        self.assertEqual(self.l.lemmatize("bha", "V-s"), "bi")
        self.assertEqual(self.l.lemmatize("bh'", "V-s"), "bi")
        self.assertEqual(self.l.lemmatize("eil", "V-p--d"), "bi")
        self.assertEqual(self.l.lemmatize("robh", "V-s--d"), "bi")
        self.assertEqual(self.l.lemmatize("bhith", "Nv"), "bi")
        self.assertEqual(self.l.lemmatize("bhiodh", "V-h"), "bi")
        self.assertEqual(self.l.lemmatize("bhi", "V-f--d"), "bi")
        self.assertEqual(self.l.lemmatize("bi", "V-f--d"), "bi")
        self.assertEqual(self.l.lemmatize("bhios", "V-f--r"), "bi")
        self.assertEqual(self.l.lemmatize("bidh", "V-f"), "bi")
        self.assertEqual(self.l.lemmatize("biodh", "V-h--d"), "bi")

    def test_copula(self):
        self.assertEqual(self.l.lemmatize("an", "Wpdqa"), "is")
        self.assertEqual(self.l.lemmatize("B'", "Ws"), "is")
        self.assertEqual(self.l.lemmatize("b'", "Ws"), "is")
        self.assertEqual(self.l.lemmatize("bu", "Ws"), "is")
        self.assertEqual(self.l.lemmatize("cha", "Wp-in"), "is")
        self.assertEqual(self.l.lemmatize("chan", "Wp-in"), "is")
        self.assertEqual(self.l.lemmatize("gur", "Wpdia"), "is")
        self.assertEqual(self.l.lemmatize("'S", "Wp-i"), "is")
        self.assertEqual(self.l.lemmatize("'s", "Wp-i"), "is")
        self.assertEqual(self.l.lemmatize("is", "Wp-i"), "is")
        self.assertEqual(self.l.lemmatize("nach", "Wpdqn"), "is")
        self.assertEqual(self.l.lemmatize("'se", "Wp-i-3"), "is")
        self.assertEqual(self.l.lemmatize("as", "Wpr"), "is")

    def test_irregulars(self):
        self.assertEqual(self.l.lemmatize("Thuirt", "V-s"), "abair")
        self.assertEqual(self.l.lemmatize("thuirt", "V-s"), "abair")
        self.assertEqual(self.l.lemmatize("ràdh", "Nv"), "abair")
        self.assertEqual(self.l.lemmatize("ars\xe2\x80\x99", "V-s"), "arsa")
        self.assertEqual(self.l.lemmatize("ars'", "V-s"), "arsa")
        self.assertEqual(self.l.lemmatize("as", "V-s"), "arsa")
        self.assertEqual(self.l.lemmatize("chuala", "V-s"), "cluinn")
        self.assertEqual(self.l.lemmatize("dèanamh", "Nv"), "dèan")
        self.assertEqual(self.l.lemmatize("dhèanamh", "Nv"), "dèan")
        self.assertEqual(self.l.lemmatize("nì", "V-f"), "dèan")
        self.assertEqual(self.l.lemmatize("Rinn", "V-s"), "dèan")
        self.assertEqual(self.l.lemmatize("rinn", "V-s"), "dèan")
        self.assertEqual(self.l.lemmatize("chunnaic", "V-s"), "faic")
        self.assertEqual(self.l.lemmatize("faicinn", "Nv"), "faic")
        self.assertEqual(self.l.lemmatize("fhaicinn", "Nv"), "faic")
        self.assertEqual(self.l.lemmatize("faigheadh", "V-h--d"), "faigh")
        self.assertEqual(self.l.lemmatize("faighinn", "Nv"), "faigh")
        self.assertEqual(self.l.lemmatize("fhuair", "V-s"), "faigh")
        self.assertEqual(self.l.lemmatize("gheibh", "V-f"), "faigh")
        self.assertEqual(self.l.lemmatize("Chaidh", "V-s"), "rach")
        self.assertEqual(self.l.lemmatize("chaidh", "V-s"), "rach")
        self.assertEqual(self.l.lemmatize("deach", "V-s--d"), "rach")
        self.assertEqual(self.l.lemmatize("dhol", "Nv"), "rach")
        self.assertEqual(self.l.lemmatize("dol", "Nv"), "rach")
        self.assertEqual(self.l.lemmatize("Thèid", "V-f"), "rach")
        self.assertEqual(self.l.lemmatize("thèid", "V-f"), "rach")
        self.assertEqual(self.l.lemmatize("tèid", "V-f--d"), "rach")
        self.assertEqual(self.l.lemmatize("thàinig", "V-s"), "thig")
        self.assertEqual(self.l.lemmatize("tighinn", "Nv"), "thig")
        self.assertEqual(self.l.lemmatize("thug", "V-s"), "thoir")
        self.assertEqual(self.l.lemmatize("tug", "V-s--d"), "thoir")
        self.assertEqual(self.l.lemmatize("bheir", "V-f"), "thoir")
        self.assertEqual(self.l.lemmatize("thoirt", "Nv"), "thoir")
        self.assertEqual(self.l.lemmatize("toirt", "Nv"), "thoir")

    def test_regulars(self):
        self.assertEqual(self.l.lemmatize("bhuail", "V-s"), "buail")
        self.assertEqual(self.l.lemmatize("choinnich", "V-s"), "coinnich")
        self.assertEqual(self.l.lemmatize("chuir", "V-s"), "cuir")
        self.assertEqual(self.l.lemmatize("fhàg", "V-s"), "fàg")
        self.assertEqual(self.l.lemmatize("faodaidh", "V-f"), "faod")
        self.assertEqual(self.l.lemmatize("feuch", "Vm-2s"), "feuch")
        self.assertEqual(self.l.lemmatize("fheuch", "V-s"), "feuch")
        self.assertEqual(self.l.lemmatize("feumaidh", "V-f"), "feum")
        self.assertEqual(self.l.lemmatize("ghabh", "V-s"), "gabh")
        self.assertEqual(self.l.lemmatize("ràinig", "V-s"), "ruig")
        self.assertEqual(self.l.lemmatize("thachair", "V-s"), "tachair")
        self.assertEqual(self.l.lemmatize("thòisich", "V-s"), "tòisich")

    def nv(self, vn, root):
        self.assertEqual(self.l.lemmatize_vn(vn), root)

    def test_nvs(self):
        self.assertEqual(self.l.lemmatize_vn('àicheadh'), "àicheidh")
        self.assertEqual(self.l.lemmatize_vn('amas'), 'amais')
        self.assertEqual(self.l.lemmatize_vn('amharc'), 'amhairc')
        self.assertEqual(self.l.lemmatize_vn('agairt'), 'agair')
        self.assertEqual(self.l.lemmatize_vn('bagairt'), 'bagair')
        self.assertEqual(self.l.lemmatize_vn('baisteadh'), 'baist')
        self.assertEqual(self.l.lemmatize_vn('blasad'), 'blais')
        self.assertEqual(self.l.lemmatize_vn('blasadh'), 'blais')
        self.assertEqual(self.l.lemmatize_vn('brìodal'), 'brìodail')
        self.assertEqual(self.l.lemmatize_vn('briseadh'), 'bris')
        self.assertEqual(self.l.lemmatize_vn('bristeadh'), 'bris')
        self.assertEqual(self.l.lemmatize_vn('bruich'), 'bruich')
        self.assertEqual(self.l.lemmatize("bruidhinn", "Nv"), "bruidhinn")
        self.assertEqual(self.l.lemmatize_vn('bruthadh'), 'brùth')
        self.assertEqual(self.l.lemmatize_vn('buntainn'), 'buin')
        self.nv("cinntinn", "cinn")
        self.nv("cagar", "cagair")
        self.nv("cagarsaich", "cagair")
        self.nv("cagartaich", "cagair")
        self.nv("casgairt", "casgair")
        self.nv("casgradh", "casgair")
        self.assertEqual(self.l.lemmatize("coimhead", "Nv"), "coimhead")
        self.assertEqual(self.l.lemmatize("cur", "Nv"), "cuir")
        self.assertEqual(self.l.lemmatize("chur", "Nv"), "cuir")
        self.nv("cluiche", "cluich")
        self.assertEqual(self.l.lemmatize("creidsinn", "Nv"), "creid")
        self.assertEqual(self.l.lemmatize("cumail", "Nv"), "cùm")
        self.assertEqual(self.l.lemmatize("falbh", "Nv"), "falbh")
        self.assertEqual(self.l.lemmatize("fàs", "Nv"), "fàs")
        self.assertEqual(self.l.lemmatize("feuchainn", "Nv"), "feuch")
        self.assertEqual(self.l.lemmatize("fhalbh", "Nv"), "falbh")
        self.assertEqual(self.l.lemmatize("fuireach", "Nv"), "fuirich")
        self.assertEqual(self.l.lemmatize("gabhail", "Nv"), "gabh")
        self.assertEqual(self.l.lemmatize("gluasad", "Nv"), "gluais")
        self.assertEqual(self.l.lemmatize("iarraidh", "Nv"), "iarr")
        self.assertEqual(self.l.lemmatize("innse", "Nv"), "inns")
        self.nv("ithe", "ith")
        self.nv("laighe", "laigh")
        self.assertEqual(self.l.lemmatize("leigeil", "Nv"), "leig")
        self.nv("leigheas", "leighis")
        self.assertEqual(self.l.lemmatize("obair", "Nv"), "obraich")
        self.assertEqual(self.l.lemmatize("ruith", "Nv"), "ruith")
        self.assertEqual(self.l.lemmatize("sealltainn", "Nv"), "seall")
        self.assertEqual(self.l.lemmatize("smaoineachadh", "Nv"), "smaoinich")
        self.assertEqual(self.l.lemmatize("tachairt", "Nv"), "tachair")
        self.assertEqual(self.l.lemmatize_vn("tòiseachadh"), "tòisich")
        self.assertEqual(self.l.lemmatize_vn("tionndadh"), "tionndaidh")