def test_fit(self, params='ste', n_iter=15, verbose=False, **kwargs): h = self.h # Create training data by sampling from the HMM. train_obs = [h.sample(n=10)[0] for x in xrange(10)] # Mess up the parameters and see if we can re-learn them. h.startprob_ = hmm.normalize(self.prng.rand(self.n_components)) h.transmat_ = hmm.normalize(self.prng.rand(self.n_components, self.n_components), axis=1) h.emissionprob_ = hmm.normalize(self.prng.rand(self.n_components, self.n_symbols), axis=1) trainll = train_hmm_and_keep_track_of_log_likelihood(h, train_obs, n_iter=n_iter, params=params, **kwargs)[1:] # Check that the loglik is always increasing during training if not np.all(np.diff(trainll) > 0) and verbose: print print 'Test train: (%s)\n %s\n %s' % (params, trainll, np.diff(trainll)) self.assertTrue(np.all(np.diff(trainll) > -1.e-3))
def test_fit(self, params='stmwc', n_iter=5, verbose=False, **kwargs): h = hmm.GMMHMM(self.n_components) h.startprob = self.startprob h.transmat = hmm.normalize( self.transmat + np.diag(self.prng.rand(self.n_components)), 1) h.gmms = self.gmms # Create training data by sampling from the HMM. train_obs = [h.rvs(n=10, random_state=self.prng) for x in xrange(10)] # Mess up the parameters and see if we can re-learn them. h.fit(train_obs, n_iter=0) h.transmat = hmm.normalize(self.prng.rand(self.n_components, self.n_components), axis=1) h.startprob = hmm.normalize(self.prng.rand(self.n_components)) trainll = train_hmm_and_keep_track_of_log_likelihood(h, train_obs, n_iter=n_iter, params=params, covars_prior=1.0, **kwargs)[1:] if not np.all(np.diff(trainll) > 0) and verbose: print print 'Test train: (%s)\n %s\n %s' % (params, trainll, np.diff(trainll)) self.assertTrue(np.all(np.diff(trainll) > -0.5))
def test_fit(self, params='stmwc', n_iter=5, verbose=False, **kwargs): h = hmm.GMMHMM(self.n_components, covars_prior=1.0) h.startprob_ = self.startprob h.transmat_ = hmm.normalize( self.transmat + np.diag(self.prng.rand(self.n_components)), 1) h.gmms = self.gmms # Create training data by sampling from the HMM. train_obs = [h.sample(n=10, random_state=self.prng)[0] for x in xrange(10)] # Mess up the parameters and see if we can re-learn them. h.n_iter = 0 h.fit(train_obs) h.transmat_ = hmm.normalize(self.prng.rand(self.n_components, self.n_components), axis=1) h.startprob_ = hmm.normalize(self.prng.rand(self.n_components)) trainll = train_hmm_and_keep_track_of_log_likelihood( h, train_obs, n_iter=n_iter, params=params)[1:] if not np.all(np.diff(trainll) > 0) and verbose: print print 'Test train: (%s)\n %s\n %s' % (params, trainll, np.diff(trainll)) # XXX: this test appears to check that training log likelihood should # never be decreasing (up to a tolerance of 0.5, why?) but this is not # the case when the seed changes. raise SkipTest("Unstable test: trainll is not always increasing " "depending on seed") self.assertTrue(np.all(np.diff(trainll) > -0.5))
def test_fit(self, params='ste', n_iter=15, verbose=False, **kwargs): np.random.seed(0) h = hmm.MultinomialHMM(self.n_components, startprob=self.startprob, transmat=self.transmat) h.emissionprob = self.emissionprob # Create training data by sampling from the HMM. train_obs = [h.rvs(n=10) for x in xrange(10)] # Mess up the parameters and see if we can re-learn them. h.startprob = hmm.normalize(self.prng.rand(self.n_components)) h.transmat = hmm.normalize(self.prng.rand(self.n_components, self.n_components), axis=1) h.emissionprob = hmm.normalize(self.prng.rand(self.n_components, self.n_symbols), axis=1) trainll = train_hmm_and_keep_track_of_log_likelihood(h, train_obs, n_iter=n_iter, params=params, **kwargs)[1:] if not np.all(np.diff(trainll) > 0) and verbose: print print 'Test train: (%s)\n %s\n %s' % (params, trainll, np.diff(trainll)) self.assertTrue(np.all(np.diff(trainll) > 0))
def test_fit(self, params='stmwc', n_iter=5, verbose=False, **kwargs): h = hmm.GMMHMM(self.n_components, covars_prior=1.0) h.startprob_ = self.startprob h.transmat_ = hmm.normalize( self.transmat + np.diag(self.prng.rand(self.n_components)), 1) h.gmms_ = self.gmms_ # Create training data by sampling from the HMM. train_obs = [h.sample(n=10, random_state=self.prng)[0] for x in range(10)] # Mess up the parameters and see if we can re-learn them. h.n_iter = 0 h.fit(train_obs) h.transmat_ = hmm.normalize(self.prng.rand(self.n_components, self.n_components), axis=1) h.startprob_ = hmm.normalize(self.prng.rand(self.n_components)) trainll = train_hmm_and_keep_track_of_log_likelihood( h, train_obs, n_iter=n_iter, params=params)[1:] if not np.all(np.diff(trainll) > 0) and verbose: print('Test train: (%s)\n %s\n %s' % (params, trainll, np.diff(trainll))) # XXX: this test appears to check that training log likelihood should # never be decreasing (up to a tolerance of 0.5, why?) but this is not # the case when the seed changes. raise SkipTest("Unstable test: trainll is not always increasing " "depending on seed") self.assertTrue(np.all(np.diff(trainll) > -0.5))
def fromJSON(self, jsonObj): """Initializes the prototype with a JSON dictionary.""" super(PrototypeHMM, self).fromJSON(jsonObj) self.N = jsonObj["N"] self.model = WeightedGaussianHMM(self.N, "diag", algorithm="map", params="mc") self.model.n_features = jsonObj["n_features"] self.model.transmat_ = normalize(jsonObj["transmat"], axis=1) self.model.startprob_ = normalize(jsonObj["startprob"], axis=0) self.model._means_ = np.asarray(jsonObj["means"]) self.model._covars_ = np.asarray(jsonObj["covars"])
def _do_mstep(self, stats, params): if self.startprob_prior is None: self.startprob_prior = 1.0 if self.transmat_prior is None: self.transmat_prior = 1.0 if 's' in params: self.startprob_ = normalize( np.maximum(self.startprob_prior - 1.0 + stats['start'], 1e-20)) if 't' in params: self.transmat_ = normalize( np.maximum(self.transmat_prior - 1.0 + stats['trans'], 1e-20), axis=1)
def test_fit(self, params='stmc', n_iter=25, verbose=False, **kwargs): h = hmm.GaussianHMM(self.n_components, self.covariance_type) h.startprob_ = self.startprob h.transmat_ = hmm.normalize(self.transmat + np.diag(self.prng.rand(self.n_components)), 1) h.means_ = 20 * self.means h.covars_ = self.covars[self.covariance_type] # Create training data by sampling from the HMM. train_obs = [h.sample(n=10)[0] for x in xrange(10)] # Mess up the parameters and see if we can re-learn them. h.n_iter = 0 h.fit(train_obs) trainll = train_hmm_and_keep_track_of_log_likelihood( h, train_obs, n_iter=n_iter, params=params, **kwargs)[1:] # Check that the loglik is always increasing during training if not np.all(np.diff(trainll) > 0) and verbose: print print ('Test train: %s (%s)\n %s\n %s' % (self.covariance_type, params, trainll, np.diff(trainll))) delta_min = np.diff(trainll).min() self.assertTrue( delta_min > -0.8, "The min nll increase is %f which is lower than the admissible" " threshold of %f, for model %s. The likelihoods are %s." % (delta_min, -0.8, self.covariance_type, trainll))
def test_fit(self, params='stmc', n_iter=25, verbose=False, **kwargs): h = hmm.GaussianHMM(self.n_components, self.covariance_type) h.startprob_ = self.startprob h.transmat_ = hmm.normalize( self.transmat + np.diag(self.prng.rand(self.n_components)), 1) h.means_ = 20 * self.means h.covars_ = self.covars[self.covariance_type] # Create training data by sampling from the HMM. train_obs = [h.sample(n=10)[0] for x in xrange(10)] # Mess up the parameters and see if we can re-learn them. h.fit(train_obs, n_iter=0) trainll = train_hmm_and_keep_track_of_log_likelihood(h, train_obs, n_iter=n_iter, params=params, **kwargs)[1:] # Check that the loglik is always increasing during training if not np.all(np.diff(trainll) > 0) and verbose: print print('Test train: %s (%s)\n %s\n %s' % (self.covariance_type, params, trainll, np.diff(trainll))) delta_min = np.diff(trainll).min() self.assertTrue( delta_min > -0.8, "The min nll increase is %f which is lower than the admissible" " threshold of %f, for model %s. The likelihoods are %s." % (delta_min, -0.8, self.covariance_type, trainll))
def test_fit(self, params="ste", n_iter=5, verbose=False, **kwargs): h = self.h # Create training data by sampling from the HMM. train_obs = [h.sample(n=10)[0] for x in range(10)] # Mess up the parameters and see if we can re-learn them. h.startprob_ = hmm.normalize(self.prng.rand(self.n_components)) h.transmat_ = hmm.normalize(self.prng.rand(self.n_components, self.n_components), axis=1) h.emissionprob_ = hmm.normalize(self.prng.rand(self.n_components, self.n_symbols), axis=1) trainll = train_hmm_and_keep_track_of_log_likelihood(h, train_obs, n_iter=n_iter, params=params, **kwargs)[1:] # Check that the loglik is always increasing during training if not np.all(np.diff(trainll) > 0) and verbose: print("Test train: (%s)\n %s\n %s" % (params, trainll, np.diff(trainll))) self.assertTrue(np.all(np.diff(trainll) > -1.0e-3))
def test_fit(self, params='ste', n_iter=15, verbose=False, **kwargs): np.random.seed(0) h = hmm.MultinomialHMM(self.n_components, startprob=self.startprob, transmat=self.transmat) h.emissionprob = self.emissionprob # Create training data by sampling from the HMM. train_obs = [h.rvs(n=10) for x in xrange(10)] # Mess up the parameters and see if we can re-learn them. h.startprob = hmm.normalize(self.prng.rand(self.n_components)) h.transmat = hmm.normalize(self.prng.rand(self.n_components, self.n_components), axis=1) h.emissionprob = hmm.normalize( self.prng.rand(self.n_components, self.n_symbols), axis=1) trainll = train_hmm_and_keep_track_of_log_likelihood( h, train_obs, n_iter=n_iter, params=params, **kwargs)[1:] if not np.all(np.diff(trainll) > 0) and verbose: print print 'Test train: (%s)\n %s\n %s' % (params, trainll, np.diff(trainll)) self.assertTrue(np.all(np.diff(trainll) > 0))
def create_random_gmm(n_mix, n_features, covariance_type, prng=0): prng = check_random_state(prng) g = mixture.GMM(n_mix, covariance_type=covariance_type) g.means_ = prng.randint(-20, 20, (n_mix, n_features)) mincv = 0.1 g.covars_ = { "spherical": (mincv + mincv * np.dot(prng.rand(n_mix, 1), np.ones((1, n_features)))) ** 2, "tied": (make_spd_matrix(n_features, random_state=prng) + mincv * np.eye(n_features)), "diag": (mincv + mincv * prng.rand(n_mix, n_features)) ** 2, "full": np.array( [make_spd_matrix(n_features, random_state=prng) + mincv * np.eye(n_features) for x in range(n_mix)] ), }[covariance_type] g.weights_ = hmm.normalize(prng.rand(n_mix)) return g
def test_fit(self, params='stmwc', n_iter=5, verbose=False, **kwargs): h = hmm.GMMHMM(self.n_components) h.startprob = self.startprob h.transmat = hmm.normalize( self.transmat + np.diag(self.prng.rand(self.n_components)), 1) h.gmms = self.gmms # Create training data by sampling from the HMM. train_obs = [h.rvs(n=10, random_state=self.prng) for x in xrange(10)] # Mess up the parameters and see if we can re-learn them. h.fit(train_obs, n_iter=0) h.transmat = hmm.normalize(self.prng.rand(self.n_components, self.n_components), axis=1) h.startprob = hmm.normalize(self.prng.rand(self.n_components)) trainll = train_hmm_and_keep_track_of_log_likelihood( h, train_obs, n_iter=n_iter, params=params, covars_prior=1.0, **kwargs)[1:] if not np.all(np.diff(trainll) > 0) and verbose: print print 'Test train: (%s)\n %s\n %s' % (params, trainll, np.diff(trainll)) self.assertTrue(np.all(np.diff(trainll) > -0.5))
def create_random_gmm(n_mix, n_features, cvtype, prng=prng): from sklearn import mixture g = mixture.GMM(n_mix, cvtype=cvtype) g.means = prng.randint(-20, 20, (n_mix, n_features)) mincv = 0.1 g.covars = { 'spherical': (mincv + mincv * prng.rand(n_mix)) ** 2, 'tied': (make_spd_matrix(n_features, random_state=prng) + mincv * np.eye(n_features)), 'diag': (mincv + mincv * prng.rand(n_mix, n_features)) ** 2, 'full': np.array( [make_spd_matrix(n_features, random_state=prng) + mincv * np.eye(n_features) for x in xrange(n_mix)]) }[cvtype] g.weights = hmm.normalize(prng.rand(n_mix)) return g
def create_random_gmm(n_mix, n_features, covariance_type, prng=0): prng = check_random_state(prng) g = mixture.GMM(n_mix, covariance_type=covariance_type) g.means_ = prng.randint(-20, 20, (n_mix, n_features)) mincv = 0.1 g.covars_ = { 'spherical': (mincv + mincv * np.dot(prng.rand(n_mix, 1), np.ones((1, n_features)))) ** 2, 'tied': (make_spd_matrix(n_features, random_state=prng) + mincv * np.eye(n_features)), 'diag': (mincv + mincv * prng.rand(n_mix, n_features)) ** 2, 'full': np.array( [make_spd_matrix(n_features, random_state=prng) + mincv * np.eye(n_features) for x in range(n_mix)]) }[covariance_type] g.weights_ = hmm.normalize(prng.rand(n_mix)) return g
def create_random_gmm(n_mix, n_features, cvtype, prng=prng): from sklearn import mixture g = mixture.GMM(n_mix, cvtype=cvtype) g.means = prng.randint(-20, 20, (n_mix, n_features)) mincv = 0.1 g.covars = { 'spherical': (mincv + mincv * prng.rand(n_mix))**2, 'tied': (make_spd_matrix(n_features, random_state=prng) + mincv * np.eye(n_features)), 'diag': (mincv + mincv * prng.rand(n_mix, n_features))**2, 'full': np.array([ make_spd_matrix(n_features, random_state=prng) + mincv * np.eye(n_features) for x in xrange(n_mix) ]) }[cvtype] g.weights = hmm.normalize(prng.rand(n_mix)) return g
def test_fit_with_priors(self, params='stmc', n_iter=5, verbose=False): startprob_prior = 10 * self.startprob + 2.0 transmat_prior = 10 * self.transmat + 2.0 means_prior = self.means means_weight = 2.0 covars_weight = 2.0 if self.covariance_type in ('full', 'tied'): covars_weight += self.n_features covars_prior = self.covars[self.covariance_type] h = hmm.GaussianHMM(self.n_components, self.covariance_type) h.startprob_ = self.startprob h.startprob_prior = startprob_prior h.transmat_ = hmm.normalize( self.transmat + np.diag(self.prng.rand(self.n_components)), 1) h.transmat_prior = transmat_prior h.means_ = 20 * self.means h.means_prior = means_prior h.means_weight = means_weight h.covars_ = self.covars[self.covariance_type] h.covars_prior = covars_prior h.covars_weight = covars_weight # Create training data by sampling from the HMM. train_obs = [h.sample(n=10)[0] for x in xrange(10)] # Mess up the parameters and see if we can re-learn them. h.n_iter = 0 h.fit(train_obs[:1]) trainll = train_hmm_and_keep_track_of_log_likelihood(h, train_obs, n_iter=n_iter, params=params)[1:] # Check that the loglik is always increasing during training if not np.all(np.diff(trainll) > 0) and verbose: print print('Test MAP train: %s (%s)\n %s\n %s' % (self.covariance_type, params, trainll, np.diff(trainll))) # XXX: Why such a large tolerance? self.assertTrue(np.all(np.diff(trainll) > -0.5))
def test_fit_with_priors(self, params='stmc', n_iter=10, verbose=False): startprob_prior = 10 * self.startprob + 2.0 transmat_prior = 10 * self.transmat + 2.0 means_prior = self.means means_weight = 2.0 covars_weight = 2.0 if self.covariance_type in ('full', 'tied'): covars_weight += self.n_features covars_prior = self.covars[self.covariance_type] h = hmm.GaussianHMM(self.n_components, self.covariance_type) h.startprob_ = self.startprob h.startprob_prior = startprob_prior h.transmat_ = hmm.normalize(self.transmat + np.diag(self.prng.rand(self.n_components)), 1) h.transmat_prior = transmat_prior h.means_ = 20 * self.means h.means_prior = means_prior h.means_weight = means_weight h.covars_ = self.covars[self.covariance_type] h.covars_prior = covars_prior h.covars_weight = covars_weight # Create training data by sampling from the HMM. train_obs = [h.sample(n=10)[0] for x in xrange(10)] # Mess up the parameters and see if we can re-learn them. h.n_iter = 0 h.fit(train_obs[:1]) trainll = train_hmm_and_keep_track_of_log_likelihood( h, train_obs, n_iter=n_iter, params=params)[1:] # Check that the loglik is always increasing during training if not np.all(np.diff(trainll) > 0) and verbose: print print ('Test MAP train: %s (%s)\n %s\n %s' % (self.covariance_type, params, trainll, np.diff(trainll))) # XXX: Why such a large tolerance? self.assertTrue(np.all(np.diff(trainll) > -0.5))
def train(self, obs, obs_weights=None, max_N=15): """Estimates the prototype from a set of observations. Parameters ---------- max_N : int The maximum lenght of the HMM. """ if obs_weights is None: obs_weights = np.ones(len(obs)) else: obs_weights = np.asarray(obs_weights) # set the number of states if self.num_states >= 1.0: self.N = int(self.num_states) else: mean_length = np.mean([each_obs.shape[0] for each_obs in obs]) self.N = min(int(self.num_states * mean_length), max_N) # transition prob: left-to-right self.transmat = np.zeros((self.N, self.N)) for i in range(self.N): self.transmat[i, i] = self.self_transprob if i + 1 < self.N: self.transmat[i, i + 1] = self.next_transprob for j in range(i + 2, self.N): self.transmat[i, j] = self.skip_transprob self.transmat = normalize(self.transmat, axis=1) # state prior prob: left-most only self.startprob = np.zeros(self.N) self.startprob[0] = 1.0 self.model = WeightedGaussianHMM(self.N, "diag", self.startprob, self.transmat, algorithm="map", params="mc") self.num_obs = len(obs) return self.model.fit(obs, obs_weights=obs_weights)
def test_normalize_3D(): A = rng.rand(2, 2, 2) + 1.0 for axis in range(3): Anorm = hmm.normalize(A, axis) assert np.all(np.allclose(Anorm.sum(axis), 1.0))