Пример #1
0
    def test_fit(self, params='ste', n_iter=15, verbose=False, **kwargs):
        h = self.h

        # Create training data by sampling from the HMM.
        train_obs = [h.sample(n=10)[0] for x in xrange(10)]

        # Mess up the parameters and see if we can re-learn them.
        h.startprob_ = hmm.normalize(self.prng.rand(self.n_components))
        h.transmat_ = hmm.normalize(self.prng.rand(self.n_components,
                                                   self.n_components),
                                    axis=1)
        h.emissionprob_ = hmm.normalize(self.prng.rand(self.n_components,
                                                       self.n_symbols),
                                        axis=1)

        trainll = train_hmm_and_keep_track_of_log_likelihood(h,
                                                             train_obs,
                                                             n_iter=n_iter,
                                                             params=params,
                                                             **kwargs)[1:]

        # Check that the loglik is always increasing during training
        if not np.all(np.diff(trainll) > 0) and verbose:
            print
            print 'Test train: (%s)\n  %s\n  %s' % (params, trainll,
                                                    np.diff(trainll))
        self.assertTrue(np.all(np.diff(trainll) > -1.e-3))
Пример #2
0
    def test_fit(self, params='stmwc', n_iter=5, verbose=False, **kwargs):
        h = hmm.GMMHMM(self.n_components)
        h.startprob = self.startprob
        h.transmat = hmm.normalize(
            self.transmat + np.diag(self.prng.rand(self.n_components)), 1)
        h.gmms = self.gmms

        # Create training data by sampling from the HMM.
        train_obs = [h.rvs(n=10, random_state=self.prng) for x in xrange(10)]

        # Mess up the parameters and see if we can re-learn them.
        h.fit(train_obs, n_iter=0)
        h.transmat = hmm.normalize(self.prng.rand(self.n_components,
                                                  self.n_components),
                                   axis=1)
        h.startprob = hmm.normalize(self.prng.rand(self.n_components))

        trainll = train_hmm_and_keep_track_of_log_likelihood(h,
                                                             train_obs,
                                                             n_iter=n_iter,
                                                             params=params,
                                                             covars_prior=1.0,
                                                             **kwargs)[1:]
        if not np.all(np.diff(trainll) > 0) and verbose:
            print
            print 'Test train: (%s)\n  %s\n  %s' % (params, trainll,
                                                    np.diff(trainll))
        self.assertTrue(np.all(np.diff(trainll) > -0.5))
Пример #3
0
    def test_fit(self, params='stmwc', n_iter=5, verbose=False, **kwargs):
        h = hmm.GMMHMM(self.n_components, covars_prior=1.0)
        h.startprob_ = self.startprob
        h.transmat_ = hmm.normalize(
            self.transmat + np.diag(self.prng.rand(self.n_components)), 1)
        h.gmms = self.gmms

        # Create training data by sampling from the HMM.
        train_obs = [h.sample(n=10,
            random_state=self.prng)[0] for x in xrange(10)]

        # Mess up the parameters and see if we can re-learn them.
        h.n_iter = 0
        h.fit(train_obs)
        h.transmat_ = hmm.normalize(self.prng.rand(self.n_components,
                                                   self.n_components), axis=1)
        h.startprob_ = hmm.normalize(self.prng.rand(self.n_components))

        trainll = train_hmm_and_keep_track_of_log_likelihood(
            h, train_obs, n_iter=n_iter, params=params)[1:]

        if not np.all(np.diff(trainll) > 0) and verbose:
            print
            print 'Test train: (%s)\n  %s\n  %s' % (params, trainll,
                                                    np.diff(trainll))

        # XXX: this test appears to check that training log likelihood should
        # never be decreasing (up to a tolerance of 0.5, why?) but this is not
        # the case when the seed changes.
        raise SkipTest("Unstable test: trainll is not always increasing "
                       "depending on seed")

        self.assertTrue(np.all(np.diff(trainll) > -0.5))
Пример #4
0
    def test_fit(self, params='ste', n_iter=15, verbose=False, **kwargs):
        np.random.seed(0)
        h = hmm.MultinomialHMM(self.n_components,
                               startprob=self.startprob,
                               transmat=self.transmat)
        h.emissionprob = self.emissionprob

        # Create training data by sampling from the HMM.
        train_obs = [h.rvs(n=10) for x in xrange(10)]

        # Mess up the parameters and see if we can re-learn them.
        h.startprob = hmm.normalize(self.prng.rand(self.n_components))
        h.transmat = hmm.normalize(self.prng.rand(self.n_components,
                                                  self.n_components),
                                   axis=1)
        h.emissionprob = hmm.normalize(self.prng.rand(self.n_components,
                                                      self.n_symbols),
                                       axis=1)

        trainll = train_hmm_and_keep_track_of_log_likelihood(h,
                                                             train_obs,
                                                             n_iter=n_iter,
                                                             params=params,
                                                             **kwargs)[1:]
        if not np.all(np.diff(trainll) > 0) and verbose:
            print
            print 'Test train: (%s)\n  %s\n  %s' % (params, trainll,
                                                    np.diff(trainll))
        self.assertTrue(np.all(np.diff(trainll) > 0))
Пример #5
0
    def test_fit(self, params='stmwc', n_iter=5, verbose=False, **kwargs):
        h = hmm.GMMHMM(self.n_components, covars_prior=1.0)
        h.startprob_ = self.startprob
        h.transmat_ = hmm.normalize(
            self.transmat + np.diag(self.prng.rand(self.n_components)), 1)
        h.gmms_ = self.gmms_

        # Create training data by sampling from the HMM.
        train_obs = [h.sample(n=10, random_state=self.prng)[0]
                     for x in range(10)]

        # Mess up the parameters and see if we can re-learn them.
        h.n_iter = 0
        h.fit(train_obs)
        h.transmat_ = hmm.normalize(self.prng.rand(self.n_components,
                                                   self.n_components), axis=1)
        h.startprob_ = hmm.normalize(self.prng.rand(self.n_components))

        trainll = train_hmm_and_keep_track_of_log_likelihood(
            h, train_obs, n_iter=n_iter, params=params)[1:]

        if not np.all(np.diff(trainll) > 0) and verbose:
            print('Test train: (%s)\n  %s\n  %s' % (params, trainll,
                                                    np.diff(trainll)))

        # XXX: this test appears to check that training log likelihood should
        # never be decreasing (up to a tolerance of 0.5, why?) but this is not
        # the case when the seed changes.
        raise SkipTest("Unstable test: trainll is not always increasing "
                       "depending on seed")

        self.assertTrue(np.all(np.diff(trainll) > -0.5))
Пример #6
0
 def fromJSON(self, jsonObj):
     """Initializes the prototype with a JSON dictionary."""
     super(PrototypeHMM, self).fromJSON(jsonObj)
     self.N = jsonObj["N"]
     self.model = WeightedGaussianHMM(self.N, "diag", algorithm="map", params="mc")
     self.model.n_features = jsonObj["n_features"]
     self.model.transmat_ = normalize(jsonObj["transmat"], axis=1)
     self.model.startprob_ = normalize(jsonObj["startprob"], axis=0)
     self.model._means_ = np.asarray(jsonObj["means"])
     self.model._covars_ = np.asarray(jsonObj["covars"])
Пример #7
0
    def _do_mstep(self, stats, params):
        if self.startprob_prior is None:
            self.startprob_prior = 1.0
        if self.transmat_prior is None:
            self.transmat_prior = 1.0

        if 's' in params:
            self.startprob_ = normalize(
                np.maximum(self.startprob_prior - 1.0 + stats['start'], 1e-20))

        if 't' in params:
            self.transmat_ = normalize(
                np.maximum(self.transmat_prior - 1.0 + stats['trans'], 1e-20),
                axis=1)
Пример #8
0
    def test_fit(self, params='stmc', n_iter=25, verbose=False, **kwargs):
        h = hmm.GaussianHMM(self.n_components, self.covariance_type)
        h.startprob_ = self.startprob
        h.transmat_ = hmm.normalize(self.transmat
                + np.diag(self.prng.rand(self.n_components)), 1)
        h.means_ = 20 * self.means
        h.covars_ = self.covars[self.covariance_type]

        # Create training data by sampling from the HMM.
        train_obs = [h.sample(n=10)[0] for x in xrange(10)]

        # Mess up the parameters and see if we can re-learn them.
        h.n_iter = 0
        h.fit(train_obs)

        trainll = train_hmm_and_keep_track_of_log_likelihood(
            h, train_obs, n_iter=n_iter, params=params, **kwargs)[1:]

        # Check that the loglik is always increasing during training
        if not np.all(np.diff(trainll) > 0) and verbose:
            print
            print ('Test train: %s (%s)\n  %s\n  %s'
                   % (self.covariance_type, params, trainll, np.diff(trainll)))

        delta_min = np.diff(trainll).min()
        self.assertTrue(
            delta_min > -0.8,
            "The min nll increase is %f which is lower than the admissible"
            " threshold of %f, for model %s. The likelihoods are %s."
            % (delta_min, -0.8, self.covariance_type, trainll))
Пример #9
0
    def test_fit(self, params='stmc', n_iter=25, verbose=False, **kwargs):
        h = hmm.GaussianHMM(self.n_components, self.covariance_type)
        h.startprob_ = self.startprob
        h.transmat_ = hmm.normalize(
            self.transmat + np.diag(self.prng.rand(self.n_components)), 1)
        h.means_ = 20 * self.means
        h.covars_ = self.covars[self.covariance_type]

        # Create training data by sampling from the HMM.
        train_obs = [h.sample(n=10)[0] for x in xrange(10)]

        # Mess up the parameters and see if we can re-learn them.
        h.fit(train_obs, n_iter=0)

        trainll = train_hmm_and_keep_track_of_log_likelihood(h,
                                                             train_obs,
                                                             n_iter=n_iter,
                                                             params=params,
                                                             **kwargs)[1:]

        # Check that the loglik is always increasing during training
        if not np.all(np.diff(trainll) > 0) and verbose:
            print
            print('Test train: %s (%s)\n  %s\n  %s' %
                  (self.covariance_type, params, trainll, np.diff(trainll)))

        delta_min = np.diff(trainll).min()
        self.assertTrue(
            delta_min > -0.8,
            "The min nll increase is %f which is lower than the admissible"
            " threshold of %f, for model %s. The likelihoods are %s." %
            (delta_min, -0.8, self.covariance_type, trainll))
Пример #10
0
    def test_fit(self, params="ste", n_iter=5, verbose=False, **kwargs):
        h = self.h

        # Create training data by sampling from the HMM.
        train_obs = [h.sample(n=10)[0] for x in range(10)]

        # Mess up the parameters and see if we can re-learn them.
        h.startprob_ = hmm.normalize(self.prng.rand(self.n_components))
        h.transmat_ = hmm.normalize(self.prng.rand(self.n_components, self.n_components), axis=1)
        h.emissionprob_ = hmm.normalize(self.prng.rand(self.n_components, self.n_symbols), axis=1)

        trainll = train_hmm_and_keep_track_of_log_likelihood(h, train_obs, n_iter=n_iter, params=params, **kwargs)[1:]

        # Check that the loglik is always increasing during training
        if not np.all(np.diff(trainll) > 0) and verbose:
            print("Test train: (%s)\n  %s\n  %s" % (params, trainll, np.diff(trainll)))
        self.assertTrue(np.all(np.diff(trainll) > -1.0e-3))
Пример #11
0
    def test_fit(self, params='ste', n_iter=15, verbose=False, **kwargs):
        np.random.seed(0)
        h = hmm.MultinomialHMM(self.n_components,
                               startprob=self.startprob,
                               transmat=self.transmat)
        h.emissionprob = self.emissionprob

        # Create training data by sampling from the HMM.
        train_obs = [h.rvs(n=10) for x in xrange(10)]

        # Mess up the parameters and see if we can re-learn them.
        h.startprob = hmm.normalize(self.prng.rand(self.n_components))
        h.transmat = hmm.normalize(self.prng.rand(self.n_components,
                                                  self.n_components), axis=1)
        h.emissionprob = hmm.normalize(
            self.prng.rand(self.n_components, self.n_symbols), axis=1)

        trainll = train_hmm_and_keep_track_of_log_likelihood(
            h, train_obs, n_iter=n_iter, params=params, **kwargs)[1:]
        if not np.all(np.diff(trainll) > 0) and verbose:
            print
            print 'Test train: (%s)\n  %s\n  %s' % (params, trainll,
                                                    np.diff(trainll))
        self.assertTrue(np.all(np.diff(trainll) > 0))
Пример #12
0
def create_random_gmm(n_mix, n_features, covariance_type, prng=0):
    prng = check_random_state(prng)
    g = mixture.GMM(n_mix, covariance_type=covariance_type)
    g.means_ = prng.randint(-20, 20, (n_mix, n_features))
    mincv = 0.1
    g.covars_ = {
        "spherical": (mincv + mincv * np.dot(prng.rand(n_mix, 1), np.ones((1, n_features)))) ** 2,
        "tied": (make_spd_matrix(n_features, random_state=prng) + mincv * np.eye(n_features)),
        "diag": (mincv + mincv * prng.rand(n_mix, n_features)) ** 2,
        "full": np.array(
            [make_spd_matrix(n_features, random_state=prng) + mincv * np.eye(n_features) for x in range(n_mix)]
        ),
    }[covariance_type]
    g.weights_ = hmm.normalize(prng.rand(n_mix))
    return g
Пример #13
0
    def test_fit(self, params='stmwc', n_iter=5, verbose=False, **kwargs):
        h = hmm.GMMHMM(self.n_components)
        h.startprob = self.startprob
        h.transmat = hmm.normalize(
            self.transmat + np.diag(self.prng.rand(self.n_components)), 1)
        h.gmms = self.gmms

        # Create training data by sampling from the HMM.
        train_obs = [h.rvs(n=10, random_state=self.prng) for x in xrange(10)]

        # Mess up the parameters and see if we can re-learn them.
        h.fit(train_obs, n_iter=0)
        h.transmat = hmm.normalize(self.prng.rand(self.n_components,
                                                  self.n_components), axis=1)
        h.startprob = hmm.normalize(self.prng.rand(self.n_components))

        trainll = train_hmm_and_keep_track_of_log_likelihood(
            h, train_obs, n_iter=n_iter, params=params,
            covars_prior=1.0, **kwargs)[1:]
        if not np.all(np.diff(trainll) > 0) and verbose:
            print
            print 'Test train: (%s)\n  %s\n  %s' % (params, trainll,
                                                    np.diff(trainll))
        self.assertTrue(np.all(np.diff(trainll) > -0.5))
Пример #14
0
def create_random_gmm(n_mix, n_features, cvtype, prng=prng):
    from sklearn import mixture

    g = mixture.GMM(n_mix, cvtype=cvtype)
    g.means = prng.randint(-20, 20, (n_mix, n_features))
    mincv = 0.1
    g.covars = {
        'spherical': (mincv + mincv * prng.rand(n_mix)) ** 2,
        'tied': (make_spd_matrix(n_features, random_state=prng)
                 + mincv * np.eye(n_features)),
        'diag': (mincv + mincv * prng.rand(n_mix, n_features)) ** 2,
        'full': np.array(
            [make_spd_matrix(n_features, random_state=prng)
             + mincv * np.eye(n_features) for x in xrange(n_mix)])
    }[cvtype]
    g.weights = hmm.normalize(prng.rand(n_mix))
    return g
Пример #15
0
def create_random_gmm(n_mix, n_features, covariance_type, prng=0):
    prng = check_random_state(prng)
    g = mixture.GMM(n_mix, covariance_type=covariance_type)
    g.means_ = prng.randint(-20, 20, (n_mix, n_features))
    mincv = 0.1
    g.covars_ = {
        'spherical': (mincv + mincv * np.dot(prng.rand(n_mix, 1),
                                             np.ones((1, n_features)))) ** 2,
        'tied': (make_spd_matrix(n_features, random_state=prng)
                 + mincv * np.eye(n_features)),
        'diag': (mincv + mincv * prng.rand(n_mix, n_features)) ** 2,
        'full': np.array(
            [make_spd_matrix(n_features, random_state=prng)
             + mincv * np.eye(n_features) for x in range(n_mix)])
    }[covariance_type]
    g.weights_ = hmm.normalize(prng.rand(n_mix))
    return g
Пример #16
0
def create_random_gmm(n_mix, n_features, cvtype, prng=prng):
    from sklearn import mixture

    g = mixture.GMM(n_mix, cvtype=cvtype)
    g.means = prng.randint(-20, 20, (n_mix, n_features))
    mincv = 0.1
    g.covars = {
        'spherical': (mincv + mincv * prng.rand(n_mix))**2,
        'tied': (make_spd_matrix(n_features, random_state=prng) +
                 mincv * np.eye(n_features)),
        'diag': (mincv + mincv * prng.rand(n_mix, n_features))**2,
        'full':
        np.array([
            make_spd_matrix(n_features, random_state=prng) +
            mincv * np.eye(n_features) for x in xrange(n_mix)
        ])
    }[cvtype]
    g.weights = hmm.normalize(prng.rand(n_mix))
    return g
Пример #17
0
    def test_fit_with_priors(self, params='stmc', n_iter=5, verbose=False):
        startprob_prior = 10 * self.startprob + 2.0
        transmat_prior = 10 * self.transmat + 2.0
        means_prior = self.means
        means_weight = 2.0
        covars_weight = 2.0
        if self.covariance_type in ('full', 'tied'):
            covars_weight += self.n_features
        covars_prior = self.covars[self.covariance_type]

        h = hmm.GaussianHMM(self.n_components, self.covariance_type)
        h.startprob_ = self.startprob
        h.startprob_prior = startprob_prior
        h.transmat_ = hmm.normalize(
            self.transmat + np.diag(self.prng.rand(self.n_components)), 1)
        h.transmat_prior = transmat_prior
        h.means_ = 20 * self.means
        h.means_prior = means_prior
        h.means_weight = means_weight
        h.covars_ = self.covars[self.covariance_type]
        h.covars_prior = covars_prior
        h.covars_weight = covars_weight

        # Create training data by sampling from the HMM.
        train_obs = [h.sample(n=10)[0] for x in xrange(10)]

        # Mess up the parameters and see if we can re-learn them.
        h.n_iter = 0
        h.fit(train_obs[:1])

        trainll = train_hmm_and_keep_track_of_log_likelihood(h,
                                                             train_obs,
                                                             n_iter=n_iter,
                                                             params=params)[1:]

        # Check that the loglik is always increasing during training
        if not np.all(np.diff(trainll) > 0) and verbose:
            print
            print('Test MAP train: %s (%s)\n  %s\n  %s' %
                  (self.covariance_type, params, trainll, np.diff(trainll)))
        # XXX: Why such a large tolerance?
        self.assertTrue(np.all(np.diff(trainll) > -0.5))
Пример #18
0
    def test_fit_with_priors(self, params='stmc', n_iter=10, verbose=False):
        startprob_prior = 10 * self.startprob + 2.0
        transmat_prior = 10 * self.transmat + 2.0
        means_prior = self.means
        means_weight = 2.0
        covars_weight = 2.0
        if self.covariance_type in ('full', 'tied'):
            covars_weight += self.n_features
        covars_prior = self.covars[self.covariance_type]

        h = hmm.GaussianHMM(self.n_components, self.covariance_type)
        h.startprob_ = self.startprob
        h.startprob_prior = startprob_prior
        h.transmat_ = hmm.normalize(self.transmat
                + np.diag(self.prng.rand(self.n_components)), 1)
        h.transmat_prior = transmat_prior
        h.means_ = 20 * self.means
        h.means_prior = means_prior
        h.means_weight = means_weight
        h.covars_ = self.covars[self.covariance_type]
        h.covars_prior = covars_prior
        h.covars_weight = covars_weight

        # Create training data by sampling from the HMM.
        train_obs = [h.sample(n=10)[0] for x in xrange(10)]

        # Mess up the parameters and see if we can re-learn them.
        h.n_iter = 0
        h.fit(train_obs[:1])

        trainll = train_hmm_and_keep_track_of_log_likelihood(
            h, train_obs, n_iter=n_iter, params=params)[1:]

        # Check that the loglik is always increasing during training
        if not np.all(np.diff(trainll) > 0) and verbose:
            print
            print ('Test MAP train: %s (%s)\n  %s\n  %s'
                   % (self.covariance_type, params, trainll, np.diff(trainll)))
        # XXX: Why such a large tolerance?
        self.assertTrue(np.all(np.diff(trainll) > -0.5))
Пример #19
0
    def train(self, obs, obs_weights=None, max_N=15):
        """Estimates the prototype from a set of observations.
        
        Parameters
        ----------
        max_N : int
           The maximum lenght of the HMM.
      
        """
        if obs_weights is None:
            obs_weights = np.ones(len(obs))
        else:
            obs_weights = np.asarray(obs_weights)

        # set the number of states
        if self.num_states >= 1.0:
            self.N = int(self.num_states)
        else:
            mean_length = np.mean([each_obs.shape[0] for each_obs in obs])
            self.N = min(int(self.num_states * mean_length), max_N)

        # transition prob: left-to-right
        self.transmat = np.zeros((self.N, self.N))
        for i in range(self.N):
            self.transmat[i, i] = self.self_transprob
            if i + 1 < self.N:
                self.transmat[i, i + 1] = self.next_transprob
            for j in range(i + 2, self.N):
                self.transmat[i, j] = self.skip_transprob

        self.transmat = normalize(self.transmat, axis=1)

        # state prior prob: left-most only
        self.startprob = np.zeros(self.N)
        self.startprob[0] = 1.0

        self.model = WeightedGaussianHMM(self.N, "diag", self.startprob, self.transmat, algorithm="map", params="mc")
        self.num_obs = len(obs)

        return self.model.fit(obs, obs_weights=obs_weights)
Пример #20
0
def test_normalize_3D():
    A = rng.rand(2, 2, 2) + 1.0
    for axis in range(3):
        Anorm = hmm.normalize(A, axis)
        assert np.all(np.allclose(Anorm.sum(axis), 1.0))
Пример #21
0
def test_normalize_3D():
    A = rng.rand(2, 2, 2) + 1.0
    for axis in range(3):
        Anorm = hmm.normalize(A, axis)
        assert np.all(np.allclose(Anorm.sum(axis), 1.0))