示例#1
0
class IceCreamHMM(TestCase):
    def setUp(self):
        """Initialize Eisner ice cream HMM (J & M, Figure 6.3)"""
        self.hmm = HMM()
        # These variables have many aliases. J & M call them π, A, B, Q, and V.
        # You don't need to use these names, but you do need to provide a way
        # of initializing them.
        self.hmm.train(
            [],
            initial_probabilities=[.8, .2],  # P(Hot, Cold)
            transition_probabilities=[
                [.7, .3],  # P(Hot, Cold|Hot)
                [.4, .6]
            ],  # P(Hot, Cold|Cold)
            emission_probabilities=[
                [.2, .4, .4],  # P(1, 2, 3|Hot)
                [.5, .4, .1]
            ],  # P(1, 2, 3|Cold)
            states=("Hot", "Cold"),
            vocabulary=(1, 2, 3))

    def test_likelihood(self):
        """Test likelihood for Eisner ice cream HMM (J & M, Figure 6.7)"""
        # Figure 6.7 of J & M (slide 15 of Lecture6_Handout.pdf, 2014-10-15)
        # has a known erratum in the computation of alpha_2(2): .7*.2 = .14,
        # not .014.
        self.assertAlmostEqual(self.hmm.likelihood(IceCreamCones(
            [3, 1])), (.32 * .14 + .02 * .08) + (.32 * .15 + .02 * .30))

    def test_decoding(self):
        """Test decoding of Eisner ice cream HMM (J & M, Section 6.4)"""
        # The same error occurs in Figure 6.10, but the value given for the
        # Viterbi variable v_2(2) is .0448, which is correct (as you should
        # verify manually and perhaps add a test for here).
        self.assertAlmostEqual(
            self.hmm.classify(IceCreamCones([3, 1, 3]), Test=True, T=1)[0],
            max(.32 * .14, .02 * .08))
        self.assertAlmostEqual(
            self.hmm.classify(IceCreamCones([3, 1, 3]), Test=True, T=1)[1],
            max(.32 * .15, .02 * .30))
        #Test the value at time step 2
        self.assertEqual(self.hmm.classify(IceCreamCones([3, 1, 3])),
                         ["Hot", "Hot", "Hot"])
示例#2
0
class TagHMM(TestCase):
    """Train and test an HMM POS tagger."""
    def setUp(self):
        self.train, self.test = self.split_sents()
        self.hmm = HMM()
        self.hmm.train(self.train)

    def split_sents(self,
                    train=0.95,
                    total=3500,
                    document_class=TaggedSentence):
        sents = tagged_corpus.tagged_sents()[:total]
        total = len(sents) if total is None else total
        i = int(round(train * total))
        j = i + int(round(total - train * total))
        return (map(document_class, sents[0:i]), map(document_class,
                                                     sents[i:j]))

    def accuracy(self, test_sents, verbose=sys.stderr):
        """Compute accuracy of the HMM tagger on the given sentences."""
        total = correct = 0
        for sent in test_sents:
            tags = self.hmm.classify(sent)
            total += len(tags)
            for guess, tag in zip(tags, sent.label):
                correct += (guess == tag)
        if verbose:
            print >> verbose, "%.2d%% " % (100 * correct / total),
        return correct / total

    @skip("too slow")
    def test_tag_train(self):
        """Tag the training data"""
        self.assertGreater(self.accuracy(self.train), 0.85)

    def test_tag(self):
        """Tag the test data"""
        self.assertGreater(self.accuracy(self.test), 0.85)
示例#3
0
class TagHMM(TestCase):
    """Train and test an HMM POS tagger."""

    def setUp(self):
        self.train, self.test = self.split_sents()
        self.hmm = HMM()
        self.hmm.train(self.train)

    def split_sents(self, train=0.95, total=3500,
                    document_class=TaggedSentence):
        sents = tagged_corpus.tagged_sents()[:total]
        total = len(sents) if total is None else total
        i = int(round(train * total))
        j = i + int(round(total - train * total))
        return (map(document_class, sents[0:i]),
                map(document_class, sents[i:j]))

    def accuracy(self, test_sents, verbose=sys.stderr):
        """Compute accuracy of the HMM tagger on the given sentences."""
        total = correct = 0
        for sent in test_sents:
            tags = self.hmm.classify(sent)
            total += len(tags)
            for guess, tag in zip(tags, sent.label):
                correct += (guess == tag)
        if verbose:
            print >> verbose, "%.2d%% " % (100 * correct / total),
        return correct / total

    @skip("too slow")
    def test_tag_train(self):
        """Tag the training data"""
        self.assertGreater(self.accuracy(self.train), 0.85)

    def test_tag(self):
        """Tag the test data"""
        self.assertGreater(self.accuracy(self.test), 0.85)
示例#4
0

if __name__ == "__main__":
    hmm = HMM()
    hmm.train(
        [],
        initial_probabilities=[.5, .5],  # P(non-coding, Coding)
        transition_probabilities=[
            [.95, .05],  # P(Hot, Cold|Hot)
            [.15, .85]
        ],  # P(Hot, Cold|Cold)
        emission_probabilities=[
            [.4, .1, .1, .4],  # P(1, 2, 3|Hot)
            [.2, .3, .3, .2]
        ],  # P(1, 2, 3|Cold)
        states=("1", "2"),  #noncoding,coding
        vocabulary=('A', 'C', 'G', 'T'))
    print hmm.likelihood(Gene(['T', 'G', 'C', 'A']))
    print hmm.classify(
        Gene([
            'G', 'C', 'G', 'C', 'A', 'T', 'T', 'A', 'A', 'T', 'C', 'G', 'T',
            'C', 'G', 'T', 'C', 'G', 'T', 'A', 'G', 'T', 'T', 'C', 'C', 'T',
            'T'
        ]))
    print hmm.classify(
        Gene([
            'G', 'C', 'G', 'C', 'A', 'T', 'T', 'A', 'A', 'T', 'C', 'G', 'T',
            'C', 'G', 'G', 'T', 'C', 'G', 'T', 'A', 'G', 'T', 'T', 'C', 'C',
            'T', 'T'
        ]))