def test_prob_2gram(self): hmm = MLHMM(2, self.tagged_sents, addone=False) x = 'el gato come pescado .'.split() y = 'D N V N P'.split() p = hmm.prob(x, y) # V after N and P after N have prob 0.5. the rest is 1.0. tag_prob = 0.5 * 0.5 # probs for el/D gato/N come/V pescado/N ./P out_prob = 0.5 * 0.25 * 1.0 * 0.25 * 1.0 self.assertAlmostEqual(p, tag_prob * out_prob) lp = hmm.log_prob(x, y) self.assertAlmostEqual(lp, log2(tag_prob) + log2(out_prob))
def test_prob_1gram(self): hmm = MLHMM(1, self.tagged_sents, addone=False) x = 'el gato come pescado .'.split() y = 'D N V N P'.split() p = hmm.prob(x, y) # D V P and </s> have prob 2.0 / 12.0, N has prob 4.0 / 12.0. tag_prob = (2.0 / 12.0)**4 * \ (4.0 / 12.0)**2 # probs for el/D gato/N come/V pescado/N ./P out_prob = 0.5 * 0.25 * 1.0 * 0.25 * 1.0 self.assertAlmostEqual(p, tag_prob * out_prob) lp = hmm.log_prob(x, y) self.assertAlmostEqual(lp, log2(tag_prob) + log2(out_prob))