def train(self, nstates, nmix, niter, covtype, data, labels): train_pos = self.tensor_to_list(data[labels == 1, :, :]) train_neg = self.tensor_to_list(data[labels == 0, :, :]) print "Start training the models..." success = False while not success: try: print "Training with covariance type %s" % covtype model_pos = hmm.GMMHMM(nstates, nmix, covariance_type=covtype, n_iter=niter) model_neg = hmm.GMMHMM(nstates, nmix, covariance_type=covtype, n_iter=niter) model_pos.fit(train_pos) model_neg.fit(train_neg) success = True except: if covtype == 'full': covtype = 'diag' elif covtype == 'diag': covtype = 'tied' elif covtype == 'tied': covtype = 'spherical' else: print 'Error: HMM sucks' success = True return model_pos, model_neg
def __init__(self, model_name='GaussianHMM', n_components=10, cov_type='diag', n_iter=100): #模型名称 hmmlearn实现了三种HMM模型类,GaussianHMM和GMMHMM是连续观测状态的HMM模型,MultinomialHMM是离散观测状态的模型 self.model = None self.model_name = model_name #隐藏状态个数 self.n_components = n_components #转移矩阵协方差类型 self.cov_type = cov_type #训练迭代次数 self.n_iter = n_iter self.models = [] # self.states = ["als", "control","hunt", "park"] # self.n_states = len(self.states) self.observations = [] self.n_observations = len(self.observations) if self.model_name == 'GaussianHMM': self.model = hmm.GaussianHMM(n_components=self.n_components, covariance_type=self.cov_type, n_iter=self.n_iter) else: # self.model = hmm.MultinomialHMM(n_components=self.n_components, n_iter=self.n_iter, tol=0.01) self.model = hmm.GMMHMM(n_components=self.n_components, n_iter=self.n_iter, tol=0.01)
def __init__(self, model_name='GaussianHMM', n_components=4, cov_type='diag', n_iter=1000, n_mix=2): self.model_name = model_name self.n_components = n_components self.cov_type = cov_type self.n_iter = n_iter self.n_mix = n_mix self.models = [] # This stuff is depricated. Disabling the warnings for now. warnings.filterwarnings("ignore", category=DeprecationWarning) if self.model_name == 'GaussianHMM': self.model = hmm.GaussianHMM(n_components=self.n_components, covariance_type=self.cov_type, n_iter=self.n_iter) elif self.model_name == 'GMMHMM': self.model = hmm.GMMHMM(n_components=self.n_components, covariance_type=self.cov_type, n_iter=self.n_iter, n_mix=self.n_mix) else: raise TypeError('Invalid model type')
def train_lr(seqs): # select features seqs = [select_feature(s) for s in seqs] # convert to hmmlearn formats feats = np.concatenate(seqs) lengths = [s.shape[0] for s in seqs] nc = 9 nm = 3 t = np.diag(np.ones(nc)) * .9 t[:-1, 1:] += np.diag(np.ones(nc - 1)) * .1 t[-1, -1] = 1 s = np.zeros(nc) s[0] = 1.0 # model = hmm.GaussianHMM(n_components=2, covariance_type='diag').fit(feats, lengths) model = hmm.GMMHMM(n_components=nc, n_mix=nm, covariance_type='diag', min_covar=1e-6, init_params="mcw", params="tmcw", verbose=False) model.startprob_ = s model.transmat_ = t model.fit(feats, lengths) return model
def main(): states = task_name n_task = len(states) # don't know the task prior here start_probability = np.array(([1.0 / n_task] * n_task)) # don't know the task transition probability here transition_probability = np.array([1.0 / n_task] * n_task * n_task).reshape([n_task, n_task]) gmm_model_full = [GaussianMixture(n_components=3) for i in range(n_task)] task_data_full = [] for task_idx, task_data in enumerate(datasets_raw): demo_data_full = np.array([]).reshape(0, num_joints) for demo_data in task_data: h = np.hstack([demo_data['left_hand'], demo_data['left_joints']]) demo_data_full = np.vstack([demo_data_full, h]) task_data_full.append(demo_data_full) for task_data, gmm_model in zip(task_data_full, gmm_model_full): gmm_model.fit(task_data) # hmm_model = hmm.GMMHMM() # hmm_model.n_components = n_task # hmm_model.startprob_ = start_probability # hmm_model.transmat_ = transition_probability # hmm_model.means_ = gmm_model_full[0].means_ # testing hmm_model1 = hmm.GMMHMM() hmm_model1.n_components = 10 hmm_model1.fit(task_data_full[0]) print 1
def test_score_samples_and_decode(self): h = hmm.GMMHMM(self.n_components) h.startprob_ = self.startprob h.transmat_ = self.transmat h.gmms_ = self.gmms # Make sure the means are far apart so posteriors.argmax() # picks the actual component used to generate the observations. for g in h.gmms_: g.means_ *= 20 refstateseq = np.repeat(np.arange(self.n_components), 5) n_samples = len(refstateseq) X = [ h.gmms_[x].sample(1, random_state=self.prng).flatten() for x in refstateseq ] _ll, posteriors = h.score_samples(X) self.assertEqual(posteriors.shape, (n_samples, self.n_components)) assert_array_almost_equal(posteriors.sum(axis=1), np.ones(n_samples)) _logprob, stateseq = h.decode(X) assert_array_equal(stateseq, refstateseq)
def train_GMMHMM(dataset): GMMHMM_Models = {} states_num = 5 GMM_mix_num = 3 tmp_p = 1.0 / (states_num - 2) transmatPrior = np.array([[tmp_p, tmp_p, tmp_p, 0 ,0], \ [0, tmp_p, tmp_p, tmp_p , 0], \ [0, 0, tmp_p, tmp_p,tmp_p], \ [0, 0, 0, 0.5, 0.5], \ [0, 0, 0, 0, 1]],dtype=np.float) startprobPrior = np.array([0.5, 0.5, 0, 0, 0], dtype=np.float) for label in dataset.keys(): model = hmm.GMMHMM(n_components=states_num, n_mix=GMM_mix_num, \ transmat_prior=transmatPrior, startprob_prior=startprobPrior, \ covariance_type='diag', n_iter=10) trainData = dataset[label] length = np.zeros([ len(trainData), ], dtype=np.int) for m in range(len(trainData)): length[m] = trainData[m].shape[0] trainData = np.vstack(trainData) model.fit(trainData, lengths=length) # get optimal parameters GMMHMM_Models[label] = model return GMMHMM_Models
def get_hmm_model(self): """ get hmm model from training data """ # Gaussian Mixture HMM model = hmm.GMMHMM(n_components=self.n_hidden_states, n_mix=self.n_mixtures, covariance_type=self.covariance_type, n_iter=self.n_iter) train = self.get_training_data() """ def fit(self, X, lengths=None): Parameters ---------- X : array-like, shape (n_samples, n_features) Feature matrix of individual samples. lengths : array-like of integers, shape (n_sequences, ) Lengths of the individual sequences in ``X``. The sum of these should be ``n_samples``. """ lengths = list() data_combined = np.concatenate( (train[0], train[1], train[2], train[3], train[4], train[5], train[6], train[7], train[8], train[9])) for example in train: lengths.append(example.shape[0]) model.fit(data_combined, lengths) self.wordhmm = model
def test_fit(self, params='stmwc', n_iter=5, verbose=False, **kwargs): h = hmm.GMMHMM(self.n_components, covars_prior=1.0) h.startprob_ = self.startprob h.transmat_ = normalize( self.transmat + np.diag(self.prng.rand(self.n_components)), 1) h.gmms_ = self.gmms lengths = [10] * 10 X, _state_sequence = h.sample(sum(lengths), random_state=self.prng) # Mess up the parameters and see if we can re-learn them. h.n_iter = 0 h.fit(X, lengths=lengths) h.transmat_ = normalize(self.prng.rand(self.n_components, self.n_components), axis=1) h.startprob_ = normalize(self.prng.rand(self.n_components)) trainll = fit_hmm_and_monitor_log_likelihood(h, X, lengths=lengths, n_iter=n_iter) if not np.all(np.diff(trainll) > 0) and verbose: print('Test train: (%s)\n %s\n %s' % (params, trainll, np.diff(trainll))) # XXX: this test appears to check that training log likelihood should # never be decreasing (up to a tolerance of 0.5, why?) but this is not # the case when the seed changes. raise SkipTest("Unstable test: trainll is not always increasing " "depending on seed") self.assertTrue(np.all(np.diff(trainll) > -0.5))
def test_fit(self, params='stmwc', n_iter=5, verbose=False, **kwargs): h = hmm.GMMHMM(self.n_components, covars_prior=1.0) h.startprob_ = self.startprob h.transmat_ = normalize( self.transmat + np.diag(self.prng.rand(self.n_components)), 1) h.gmms_ = self.gmms_ # Create training data by sampling from the HMM. train_obs = [h.sample(n=10, random_state=self.prng)[0] for x in range(10)] # Mess up the parameters and see if we can re-learn them. h.n_iter = 0 h.fit(train_obs) h.transmat_ = normalize(self.prng.rand(self.n_components, self.n_components), axis=1) h.startprob_ = normalize(self.prng.rand(self.n_components)) trainll = train_hmm_and_keep_track_of_log_likelihood( h, train_obs, n_iter=n_iter, params=params)[1:] if not np.all(np.diff(trainll) > 0) and verbose: print('Test train: (%s)\n %s\n %s' % (params, trainll, np.diff(trainll))) # XXX: this test appears to check that training log likelihood should # never be decreasing (up to a tolerance of 0.5, why?) but this is not # the case when the seed changes. raise SkipTest("Unstable test: trainll is not always increasing " "depending on seed") self.assertTrue(np.all(np.diff(trainll) > -0.5))
def __init__(self, Class, label): self.traindata = np.zeros((0, 6)) self.Class = Class self.label = label self.model = hmm.GMMHMM(n_components = m_num_of_HMMStates, n_mix = m_num_of_mixtures, \ transmat_prior = m_transmatPrior, startprob_prior = m_startprobPrior, \ covariance_type = m_covarianceType, n_iter = m_n_iter)
def get_models(self): for i in range(self.label_num): model = hmm.GMMHMM(n_components=self.n_components, n_mix=self.n_mix, covariance_type=self.covariance_type, n_iter=self.n_iter) self.models.append(model)
def main(): k = 5 list_x, list_y = data_split(xtrain, ytrain, k) acc = [] for i in range(k): testx = list_x[i] testy = list_y[i] trainx = list_x[:i] + list_x[i + 1:] trainy = list_y[:i] + list_y[i + 1:] trainx_fix = cv_fix(trainx, trainy) model_list = [ hmm.GMMHMM(n_components=10, n_mix=10, covariance_type='diag', algorithm='viterbi', n_iter=30, params='stmc', init_params='stmc') for _ in range(20) ] for j in range(20): X = transpose(trainx_fix[j]) model_list[j].fit(X) predic = predict(model_list, testx) accurac = accuracy(testy, predic) acc.append(accurac) print(accurac) return acc
def train(self, train_list, length_list, n_components, n_mixs, name_list=None): """ :param train_list: list, 训练数据列表 :param length_list: 序列长度列表 :param name_list: 各hmm模型名称, 默认为None :return: hmms: list, hmm模型list comment: 模型个数由train_list的个数类别决定 """ self.model_name = name_list print('开始训练hmm模型!') for i in range(len(train_list)): if name_list is not None: print('\t训练%s模型...' % name_list[i]) model = hmm.GMMHMM(n_components=n_components[i], n_mix=n_mixs[i], covariance_type="diag") model.n_iter = self.n_iter model.fit(train_list[i], length_list[i]) # # 重置初始概率 # model.startprob_ = self.startprob self.hmms.append(model) print('所有模型训练完成!')
def test_sample(self, n=1000): h = hmm.GMMHMM(self.n_components, self.covariance_type, startprob=self.startprob, transmat=self.transmat, gmms=self.gmms_) samples = h.sample(n)[0] self.assertEqual(samples.shape, (n, self.n_features))
def train(self, train_data): self.train_obs = self.data_prep(train_data) self.model = hmm.GMMHMM(n_components=self.n_components, n_mix=self.n_mix, algorithm=self.algorithm, n_iter=self.n_iter) self.model.fit(self.train_obs)
def test_sample(self, n_samples=1000): h = hmm.GMMHMM(self.n_components, covariance_type=self.covariance_type) h.startprob_ = self.startprob h.transmat_ = self.transmat h.gmms_ = self.gmms X, state_sequence = h.sample(n_samples) assert X.shape == (n_samples, self.n_features) assert len(state_sequence) == n_samples
def test_fit_works_on_sequences_of_different_length(self): lengths = [3, 4, 5] X = self.prng.rand(sum(lengths), self.n_features) h = hmm.GMMHMM(self.n_components, covariance_type=self.covariance_type) # This shouldn't raise # ValueError: setting an array element with a sequence. h.fit(X, lengths=lengths)
def test_sample(self, n=1000): h = hmm.GMMHMM(self.n_components, covariance_type=self.covariance_type) h.startprob_ = self.startprob h.transmat_ = self.transmat h.gmms_ = self.gmms X, state_sequence = h.sample(n, random_state=self.prng) self.assertEqual(X.shape, (n, self.n_features)) self.assertEqual(len(state_sequence), n)
def fit(self): # data_loader: DataLoader Class start, trans = initByBakis(self.n_com, 2) for word in self.data_loader.spoken: self.classes.append(word) self.models[word] = hmm.GMMHMM(n_components=self.n_com, n_mix = self.n_mix, covariance_type="diag", n_iter = 500) self.models[word].transmat_ = trans self.models[word].startprob_ = start self.models[word].fit(self.data_loader.train_features[word], self.data_loader.train_lengths[word])
def train_HMM(dataset): Models = {} for label in dataset.keys(): model = hmm.GMMHMM(n_components=10) trainData = dataset[label] trData = np.vstack(trainData) model.fit(trData) Models[label] = model return Models
def test_fit_works_on_sequences_of_different_length(self): obs = [self.prng.rand(3, self.n_features), self.prng.rand(4, self.n_features), self.prng.rand(5, self.n_features)] h = hmm.GMMHMM(self.n_components, covariance_type=self.covariance_type) # This shouldn't raise # ValueError: setting an array element with a sequence. h.fit(obs)
def train_model(data): learned_hmm = dict() for label in data.keys(): model = hmm.GMMHMM(n_components=4, covariance_type="full") feature = np.ndarray(shape=(1, 13)) for list_feature in data[label]: feature = np.vstack((feature, list_feature)) obj = model.fit(feature) learned_hmm[label] = obj return learned_hmm
def gmm_hmm(pca_output, n_comps, cov_type): ghmm = hmm.GMMHMM(n_components=n_comps, covariance_type=cov_type, n_iter=100) scores = pca_output['pc_scores'] ghmm.fit(scores) pred_labels = ghmm.predict(scores) # Project hidden state mean vectors to original dimensions mean_maps = np.squeeze(ghmm.means_) @ pca_output['Va'] return ghmm, pred_labels, mean_maps
def train_model(data): learned_hmm = dict() for label in data.keys(): model = hmm.GMMHMM(verbose=False, n_components=100, n_iter=10000) feature = np.ndarray(shape=(1, 13)) for list_feature in data[label]: feature = np.vstack((feature, list_feature)) obj = model.fit(feature) learned_hmm[label] = obj return learned_hmm
def train_samad_hmm(train_data_path, states_num): GMMHMM_Models = {} train_data = create_train_set(train_data_path) for label in train_data.keys(): model = hmm.GMMHMM(n_components=states_num) t_Data = train_data[label] t_Data = np.vstack(t_Data) model = model.fit(t_Data) # get optimal parameters GMMHMM_Models[label] = model return GMMHMM_Models
def fit(self, data, num_gaussians, num_states, n_iterations=10, trans_mat=None, start_prob=None): #@TODO play with covariance types and compare results frame_0 = data[0][0] num_features = frame_0.shape[1] num_classes = len(data) self.mapping_observation_state = np.empty( (0, num_features + 1)) # add a 1 to add a row for getting corresponding states # iterate through each class and train a HMM for class_index, frames in enumerate(data): # train this model X = np.concatenate(frames) lengths = np.zeros(len(frames), dtype=np.dtype(np.int32)) for i, arr in enumerate(frames): # each arr is a numpy array and each row in this array represents an observation lengths[i] = arr.shape[0] scores = np.zeros(n_iterations) HMM_list = [] for j in np.arange(n_iterations): lr = hmm.GMMHMM(n_components=num_states, n_mix=num_gaussians[class_index], init_params="cm", params="cmt") # lr.startprob_ = np.array([1.0, 0.0, 0.0]) # lr.transmat_ = np.array([[0.5, 0.5, 0.0],[0.0, 0.5, 0.5],[0.0, 0.0, 1.0]]) lr.transmat_ = trans_mat lr.startprob_ = start_prob lr.fit(X=X, lengths=lengths) # temp_HMM = GMMHMM(n_components=num_states, n_mix = num_gaussians[class_index]) # temp_HMM.startprob_ = [1.0,0.0,0.0] # temp_HMM.transmat_ = trans_mat # temp_HMM.fit(X=X,lengths=lengths) scores[j] = lr.score(X, lengths=lengths) HMM_list.append(lr) # finding the HMM with the best fit cur_HMM = HMM_list[np.argmax(scores)] self.HMMList.append(cur_HMM) temp = cur_HMM.decode(X, lengths) states = temp[1] # we do this Hack to make sure that the state numbers are different for different classes states += num_states * class_index a = np.concatenate((X, states.reshape(-1, 1)), axis=1) self.mapping_observation_state = np.concatenate( (self.mapping_observation_state, a))
def getHmmModel(self): ''' get hmm model from training data ''' # GaussianHMM # model = hmm.GaussianHMM(numStates, "diag") # initialize hmm model # Gaussian Mixture HMM model = hmm.GMMHMM(n_components = self.nComp, n_mix = self.nMix, \ transmat_prior = self.transmatPrior, startprob_prior = self.startprobPrior, \ covariance_type = self.covarianceType, n_iter = self.n_iter) model.fit(self.trainData) # get optimal parameters self.hmmModel = model
def create_model(n_components, n_mix): model = hmm.GMMHMM(n_components=n_components, n_mix=n_mix, init_params='mcw') start_probability = np.ones(n_components) start_probability = start_probability / n_components transition_probability = np.ones((n_components, n_components)) transition_probability = transition_probability / n_components model.startprob_ = start_probability model.transmat_ = transition_probability return model
def load_one_model(self, filename): hmm_dict = np.load(filename) hmm_dict = hmm_dict.item() model = hmm.GMMHMM(n_components=hmm_dict['n_components'], n_mix=hmm_dict['n_mix'], covariance_type=hmm_dict['covariance_type']) model.startprob_ = hmm_dict['startprob'] model.transmat_ = hmm_dict['transmat'] model.means_ = hmm_dict['means'] model.covars_ = hmm_dict['covars'] model.weights_ = hmm_dict['weights'] self.hmms.append(model) self.model_name.append(hmm_dict['model_name'])