Пример #1
0
    def test_unknown(self):
        hmm = MLHMM(2, self.tagged_sents)

        known = {'el', 'gato', 'come', 'pescado', '.', 'la', 'gata', 'salmón'}
        for w in known:
            self.assertFalse(hmm.unknown(w))

        unknown = {'perro', 'salame'}
        for w in unknown:
            self.assertTrue(hmm.unknown(w))
Пример #2
0
    def test_tag_prob_2gram(self):
        hmm = MLHMM(2, self.tagged_sents, addone=False)

        y = 'D N V N P'.split()
        p = hmm.tag_prob(y)
        tag_prob = 0.5 * 0.5
        self.assertAlmostEqual(p, tag_prob)

        lp = hmm.tag_log_prob(y)
        self.assertAlmostEqual(lp, log2(tag_prob))
Пример #3
0
    def test_tag_prob_1gram(self):
        hmm = MLHMM(1, self.tagged_sents, addone=False)

        y = 'D N V N P'.split()
        p = hmm.tag_prob(y)
        # D V P and </s> have prob 2.0 / 12.0, N has prob 4.0 / 12.0.
        tag_prob = (2.0 / 12.0)**4 *  \
                   (4.0 / 12.0)**2
        self.assertAlmostEqual(p, tag_prob)

        lp = hmm.tag_log_prob(y)
        self.assertAlmostEqual(lp, log2(tag_prob))
Пример #4
0
    def test_trans_prob_2gram(self):
        hmm = MLHMM(2, self.tagged_sents, addone=False)

        probs = {
            ('D', ('<s>',)): 1.0,
            ('N', ('D',)): 1.0,
            ('V', ('N',)): 0.5,
            ('N', ('V',)): 1.0,
            ('P', ('N',)): 0.5,
            ('</s>', ('P',)): 1.0,
        }
        for params, p in probs.items():
            self.assertAlmostEqual(hmm.trans_prob(*params), p, msg=params)
Пример #5
0
    def test_tcount_1gram(self):
        hmm = MLHMM(1, self.tagged_sents)

        tcount = {
            (): 12,
            ('D',): 2,
            ('N',): 4,
            ('V',): 2,
            ('P',): 2,
            ('</s>',): 2,
        }
        for gram, c in tcount.items():
            self.assertEqual(hmm.tcount(gram), c, gram)
Пример #6
0
    def test_prob_2gram(self):
        hmm = MLHMM(2, self.tagged_sents, addone=False)

        x = 'el gato come pescado .'.split()
        y = 'D N V N P'.split()
        p = hmm.prob(x, y)
        # V after N and P after N have prob 0.5. the rest is 1.0.
        tag_prob = 0.5 * 0.5
        # probs for el/D gato/N come/V pescado/N ./P
        out_prob = 0.5 * 0.25 * 1.0 * 0.25 * 1.0
        self.assertAlmostEqual(p, tag_prob * out_prob)

        lp = hmm.log_prob(x, y)
        self.assertAlmostEqual(lp, log2(tag_prob) + log2(out_prob))
Пример #7
0
    def test_prob_1gram(self):
        hmm = MLHMM(1, self.tagged_sents, addone=False)

        x = 'el gato come pescado .'.split()
        y = 'D N V N P'.split()
        p = hmm.prob(x, y)
        # D V P and </s> have prob 2.0 / 12.0, N has prob 4.0 / 12.0.
        tag_prob = (2.0 / 12.0)**4 *  \
                   (4.0 / 12.0)**2
        # probs for el/D gato/N come/V pescado/N ./P
        out_prob = 0.5 * 0.25 * 1.0 * 0.25 * 1.0
        self.assertAlmostEqual(p, tag_prob * out_prob)

        lp = hmm.log_prob(x, y)
        self.assertAlmostEqual(lp, log2(tag_prob) + log2(out_prob))