Exemplo n.º 1
0
    def test_fit_left_right(self):
        transmat = np.zeros((self.n_components, self.n_components))

        # Left-to-right: each state is connected to itself and its
        # direct successor.
        for i in range(self.n_components):
            if i == self.n_components - 1:
                transmat[i, i] = 1.0
            else:
                transmat[i, i] = transmat[i, i + 1] = 0.5

        # Always start in first state
        startprob = np.zeros(self.n_components)
        startprob[0] = 1.0

        lengths = [10, 8, 1]
        X = self.prng.rand(sum(lengths), self.n_features)

        h = hmm.GaussianHMM(self.n_components,
                            covariance_type="diag",
                            params="mct",
                            init_params="cm")
        h.startprob_ = startprob.copy()
        h.transmat_ = transmat.copy()
        h.fit(X)

        self.assertTrue((h.startprob_[startprob == 0.0] == 0.0).all())
        self.assertTrue((h.transmat_[transmat == 0.0] == 0.0).all())

        posteriors = h.predict_proba(X)
        self.assertFalse(np.isnan(posteriors).any())
        self.assertTrue(np.allclose(posteriors.sum(axis=1), 1.))

        score, state_sequence = h.decode(X, algorithm="viterbi")
        self.assertTrue(np.isfinite(score))
Exemplo n.º 2
0
    def test_fit_with_priors(self, params='stmc', n_iter=5):
        self.setUp()
        startprob_prior = 10 * self.startprob + 2.0
        transmat_prior = 10 * self.transmat + 2.0
        means_prior = self.means
        means_weight = 2.0
        covars_weight = 2.0
        if self.covariance_type in ('full', 'tied'):
            covars_weight += self.n_features
        covars_prior = self.covars

        h = hmm.GaussianHMM(self.n_components, self.covariance_type)
        h.startprob_ = self.startprob
        h.startprob_prior = startprob_prior
        h.transmat_ = normalized(
            self.transmat + np.diag(self.prng.rand(self.n_components)), 1)
        h.transmat_prior = transmat_prior
        h.means_ = 20 * self.means
        h.means_prior = means_prior
        h.means_weight = means_weight
        h.covars_ = self.covars
        h.covars_prior = covars_prior
        h.covars_weight = covars_weight

        lengths = [200] * 10
        X, _state_sequence = h.sample(sum(lengths), random_state=self.prng)

        # Re-initialize the parameters and check that we can converge to the
        # original parameter values.
        h_learn = hmm.GaussianHMM(self.n_components,
                                  self.covariance_type,
                                  params=params)
        h_learn.n_iter = 0
        h_learn.fit(X, lengths=lengths)

        self.assertTrue(log_likelihood_increasing(h_learn, X, lengths, n_iter))

        # Make sure we've converged to the right parameters.
        # a) means
        self.assertTrue(
            np.allclose(sorted(h.means_.tolist()),
                        sorted(h_learn.means_.tolist()), 0.01))
        # b) covars are hard to estimate precisely from a relatively small
        #    sample, thus the large threshold
        self.assertTrue(
            np.allclose(sorted(h._covars_.tolist()),
                        sorted(h_learn._covars_.tolist()), 10))
Exemplo n.º 3
0
    def test_fit_sequences_of_different_length(self):
        self.setUp()
        lengths = [3, 4, 5]
        X = self.prng.rand(sum(lengths), self.n_features)

        h = hmm.GaussianHMM(self.n_components, self.covariance_type)
        # This shouldn't raise
        # ValueError: setting an array element with a sequence.
        self.assertIsNotNone(h.fit(X, lengths=lengths))
Exemplo n.º 4
0
 def test_bad_covariance_type(self):
     self.setUp()
     with self.assertRaises(ValueError):
         h = hmm.GaussianHMM(20, covariance_type='badcovariance_type')
         h.means_ = self.means
         h.covars_ = []
         h.startprob_ = self.startprob
         h.transmat_ = self.transmat
         h._check()
Exemplo n.º 5
0
    def test_fit_with_length_one_signal(self):
        self.setUp()
        lengths = [10, 8, 1]
        X = self.prng.rand(sum(lengths), self.n_features)

        h = hmm.GaussianHMM(self.n_components, self.covariance_type)
        # This shouldn't raise
        # ValueError: zero-size array to reduction operation maximum which
        #             has no identity
        self.assertIsNotNone(h.fit(X, lengths=lengths))
Exemplo n.º 6
0
    def test_covar_is_writeable(self):
        h = hmm.GaussianHMM(n_components=1,
                            covariance_type="diag",
                            init_params="c")
        X = np.random.normal(size=(1000, 5))
        h._init(X)

        # np.diag returns a read-only view of the array in NumPy 1.9.X.
        # Make sure this doesn't prevent us from fitting an HMM with
        # diagonal covariance matrix. See PR#44 on GitHub for details
        # and discussion.
        self.assertTrue(h._covars_.flags["WRITEABLE"])
Exemplo n.º 7
0
    def test_sample(self, n=1000):
        self.setUp()
        h = hmm.GaussianHMM(self.n_components, self.covariance_type)
        h.startprob_ = self.startprob
        h.transmat_ = self.transmat
        # Make sure the means are far apart so posteriors.argmax()
        # picks the actual component used to generate the observations.
        h.means_ = 20 * self.means
        h.covars_ = np.maximum(self.covars, 0.1)

        X, state_sequence = h.sample(n, random_state=self.prng)
        self.assertEqual(X.shape, (n, self.n_features))
        self.assertEqual(len(state_sequence), n)
Exemplo n.º 8
0
    def test_fit(self, params='stmc', n_iter=5, **kwargs):
        self.setUp()
        h = hmm.GaussianHMM(self.n_components, self.covariance_type)
        h.startprob_ = self.startprob
        h.transmat_ = normalized(
            self.transmat + np.diag(self.prng.rand(self.n_components)), 1)
        h.means_ = 20 * self.means
        h.covars_ = self.covars

        lengths = [10] * 10
        X, _state_sequence = h.sample(sum(lengths), random_state=self.prng)

        # Mess up the parameters and see if we can re-learn them.
        # TODO: change the params and uncomment the check
        self.assertIsNotNone(h.fit(X, lengths=lengths))
Exemplo n.º 9
0
    def test_fit_zero_variance(self):
        self.setUp()
        # Example from issue #2 on GitHub.
        X = np.asarray([
            [7.15000000e+02, 5.85000000e+02, 0.00000000e+00, 0.00000000e+00],
            [7.15000000e+02, 5.20000000e+02, 1.04705811e+00, -6.03696289e+01],
            [7.15000000e+02, 4.55000000e+02, 7.20886230e-01, -5.27055664e+01],
            [7.15000000e+02, 3.90000000e+02, -4.57946777e-01, -7.80605469e+01],
            [7.15000000e+02, 3.25000000e+02, -6.43127441e+00, -5.59954834e+01],
            [7.15000000e+02, 2.60000000e+02, -2.90063477e+00, -7.80220947e+01],
            [7.15000000e+02, 1.95000000e+02, 8.45532227e+00, -7.03294373e+01],
            [7.15000000e+02, 1.30000000e+02, 4.09387207e+00, -5.83621216e+01],
            [7.15000000e+02, 6.50000000e+01, -1.21667480e+00, -4.48131409e+01]
        ])

        h = hmm.GaussianHMM(3, self.covariance_type)
        self.assertIsNotNone(h.fit(X))
Exemplo n.º 10
0
    def test_score_samples_and_decode(self):
        self.setUp()
        h = hmm.GaussianHMM(self.n_components,
                            self.covariance_type,
                            init_params="st")
        h.means_ = self.means
        h.covars_ = self.covars

        # Make sure the means are far apart so posteriors.argmax()
        # picks the actual component used to generate the observations.
        h.means_ = 20 * h.means_

        gaussidx = np.repeat(np.arange(self.n_components), 5)
        n_samples = len(gaussidx)
        X = self.prng.randn(n_samples, self.n_features) + h.means_[gaussidx]
        h._init(X)
        ll, posteriors = h.score_samples(X)

        self.assertEqual(posteriors.shape, (n_samples, self.n_components))
        assert np.allclose(posteriors.sum(axis=1), np.ones(n_samples))

        viterbi_ll, stateseq = h.decode(X)
        assert np.allclose(stateseq, gaussidx)