def test_sent_histories_1gram(self): model = MEMM(1, self.tagged_sents) hs = list(model.sent_histories(self.tagged_sents[0])) sent = 'el gato come pescado .'.split() hs2 = [ History(sent, (), 0), History(sent, (), 1), History(sent, (), 2), History(sent, (), 3), History(sent, (), 4), ] self.assertEqual(hs, hs2)
def test_sent_histories_3gram(self): model = MEMM(3, self.tagged_sents) hs = list(model.sent_histories(self.tagged_sents[0])) sent = 'el gato come pescado .'.split() hs2 = [ History(sent, ('<s>', '<s>'), 0), History(sent, ('<s>', 'D'), 1), History(sent, ('D', 'N'), 2), History(sent, ('N', 'V'), 3), History(sent, ('V', 'N'), 4), ] self.assertEqual(hs, hs2)
def test_tag(self): models = [MEMM(i, self.tagged_sents) for i in [1, 2, 3]] sent = 'el gato come pescado .'.split() result = 'D N V N P'.split() for model in models: self.assertEqual(model.tag(sent), result)
def test_tag_history(self): models = [MEMM(i, self.tagged_sents) for i in [1, 2, 3]] result = 'D N V N P'.split() for model in models: hs = model.sent_histories(self.tagged_sents[0]) for h, r in zip(hs, result): self.assertEqual(model.tag_history(h), r)
def test_sents_tags(self): model = MEMM(3, self.tagged_sents) tags = list(model.sents_tags(self.tagged_sents)) self.assertEqual(tags, 'D N V N P D N V N P'.split())
def MEM_trainer(tagged_sents): return MEMM(n, tagged_sents, c)
sents = list(corpus.tagged_sents()) # order of the model m = str(opts['-m']) # train the model filename = opts['-o'] if m == "base": print("Baseline Model selected") model = BaselineTagger(tagged_sents=sents) elif m == "mlhmm": n = int(opts['-n']) print("Maximum Likelihood Hidden Markov Model selected, n=%d" % n) model = MLHMM(n=n, tagged_sents=sents, addone=True) elif m == 'memm': n = int(opts['-n']) c = str(opts['-c']) if c not in ['logreg', 'nb', 'svc']: print("Bad classifier type, use --help option for help") exit() print("Maximum Entropy Markov Model selected, n=%d, c=%s" % (n, c)) model = MEMM(n=n, tagged_sents=sents, classifier=c) else: print("Bad model type, use --help option for help") exit() # save it f = open(filename, 'wb') pickle.dump(model, f) f.close()