def learn_likelihoods_progress(i, n, m, A, B, pi, F, X_train, nEmissionDim, g_mu, g_sig, nState): if nEmissionDim >= 2: ml = ghmm.HMMFromMatrices(F, ghmm.MultivariateGaussianDistribution(F), A, B, pi) else: ml = ghmm.HMMFromMatrices(F, ghmm.GaussianDistribution(F), A, B, pi) l_likelihood_mean = 0.0 l_likelihood_mean2 = 0.0 l_statePosterior = np.zeros(nState) for j in xrange(n): g_post = np.zeros(nState) g_lhood = 0.0 g_lhood2 = 0.0 prop_sum = 0.0 for k in xrange(1, m): final_ts_obj = ghmm.EmissionSequence(F, X_train[j][:k*nEmissionDim]) logp = ml.loglikelihoods(final_ts_obj)[0] # print 'Log likelihood:', logp post = np.array(ml.posterior(final_ts_obj)) k_prop = norm(loc=g_mu, scale=g_sig).pdf(k) g_post += post[k-1] * k_prop g_lhood += logp * k_prop g_lhood2 += logp * logp * k_prop prop_sum += k_prop l_statePosterior += g_post / prop_sum / float(n) l_likelihood_mean += g_lhood / prop_sum / float(n) l_likelihood_mean2 += g_lhood2 / prop_sum / float(n) return i, l_statePosterior, l_likelihood_mean, np.sqrt(l_likelihood_mean2 - l_likelihood_mean**2)
def markov_model(self): mm = ghmm.HMMFromMatrices( self.F, ghmm.MultivariateGaussianDistribution(self.F), self.transition_probabilities, self.observation_probabilities, self.initial_probabilities) #print ".>"+str(mm.asMatrices()) return mm
def set_hmm_object(self, A, B, pi, out_a_num=None, vec_num=None, mat_num=None, u_denom=None): """Set HMM's hyper parameters """ if self.nEmissionDim == 1: self.ml = ghmm.HMMFromMatrices(self.F, ghmm.GaussianDistribution(self.F), \ A, B, pi) else: self.ml = ghmm.HMMFromMatrices(self.F, ghmm.MultivariateGaussianDistribution(self.F), \ A, B, pi) self.A = A self.B = B self.pi = pi try: self.ml.setBaumWelchParams(out_a_num, vec_num, mat_num, u_denom) except: print "Install Daehyung's custom ghmm if you want partial fit functionalities." return self.ml
def get_hidden_markov_model(mixture_model, guess_t_matrix): """Get an (unoptomized) hidden markov model from the mixture model and a guess at the transition matrix. The guess transition matrix is typically created by summing over the outer product of time-pairs of membership vectors. """ # Emission probabilities for HMM, using their very silly # matrix arrangement emissions = [[mixture_model.means_[j], mixture_model.covars_[j].flatten()] for j in xrange(mixture_model.n_components)] # Initial transition matrix if isinstance(guess_t_matrix, scipy.sparse.csr.csr_matrix): guess_t_matrix = guess_t_matrix.todense() guess_t_matrix = guess_t_matrix.tolist() # Initial occupancy # Todo: figure out if initial occupancy matters initial_occupancy = ([1.0 / mixture_model.n_components] * mixture_model.n_components) # Set up distribution g_float = ghmm.Float() g_distribution = ghmm.MultivariateGaussianDistribution(g_float) # Put it all together model = ghmm.HMMFromMatrices(g_float, g_distribution, guess_t_matrix, emissions, initial_occupancy) return model
def create_model(self, flag, number_states): A, B, pi = self.calculate_A_B_pi(number_states, flag) # generate models from parameters #model = ghmm.HMMFromMatrices(self.F,ghmm.GaussianDistribution(self.F), A, B, pi) model = ghmm.HMMFromMatrices(self.F, ghmm.MultivariateGaussianDistribution(self.F), A, B, pi) model.normalize() return model
def test_ghmm(self): # this is being extended to also support mixtures of multivariate gaussians # Interpretation of B matrix for the multivariate gaussian case # (Example with three states and two mixture components with two dimensions): # B = [ # [["mu111","mu112"],["sig1111","sig1112","sig1121","sig1122"], # ["mu121","mu122"],["sig1211","sig1212","sig1221","sig1222"], # ["w11","w12"] ], # [["mu211","mu212"],["sig2111","sig2112","sig2121","sig2122"], # ["mu221","mu222"],["sig2211","sig2212","sig2221","sig2222"], # ["w21","w22"] ], # [["mu311","mu312"],["sig3111","sig3112","sig3121","sig3122"], # ["mu321","mu322"],["sig3211","sig3212","sig3221","sig3222"], # ["w31","w32"] ], # ] # # ["mu311","mu312"] is the mean vector of the two dimensional # gaussian in state 3, mixture component 1 # ["sig1211","sig1212","sig1221","sig1222"] is the covariance # matrix of the two dimensional gaussian in state 1, mixture component 2 # ["w21","w22"] are the weights of the mixture components # in state 2 # For states with only one mixture component, a implicit weight # of 1.0 is assumed import ghmm F = ghmm.Float() Abig = [[0.0, 1.0], [1.0, 0.0]] Bbig = [[[1.0, 1.0, 1.0], [0.9, 0.4, 0.2, 0.4, 2.2, 0.5, 0.2, 0.5, 1.0]], [[10.0, 10.0, 10.0], [1.0, 0.2, 0.8, 0.2, 2.0, 0.6, 0.8, 0.6, 0.9]]] piBig = [0.5, 0.5] modelBig = ghmm.HMMFromMatrices( F, ghmm.MultivariateGaussianDistribution(F), Abig, Bbig, piBig) modelBig.sample(10, 100, seed=3586662) e = modelBig.sampleSingle(1) print[x for x in e] # get log P(seq | model) logp = model.loglikelihood(seq) print logp # cacluate viterbi path path = model.viterbi(seq) print path # train model parameters model.baumWelch(seq_set, 500, 0.0001)
def _new_model(n_features, n_states, means, covars, topology): # Generate emissions emissions = [] for i in range(n_states): emission = [means[i].tolist(), covars[i].ravel().tolist()] emissions.append(emission) # Create model domain = impl.Float() transitions = transition_matrix(n_states, topology).tolist() pi = start_probabilities(n_states, topology) distribution = impl.MultivariateGaussianDistribution(domain) model = impl.HMMFromMatrices(domain, distribution, transitions, emissions, pi) return model
def reset(self): """Reset the HMM object """ [A, B, pi] = self.ml.asMatrices() if self.nEmissionDim == 1: self.ml = ghmm.HMMFromMatrices(self.F, ghmm.GaussianDistribution(self.F), \ A, B, pi) else: self.ml = ghmm.HMMFromMatrices(self.F, ghmm.MultivariateGaussianDistribution(self.F), \ A, B, pi) self.A = A self.B = B self.pi = pi
def ghmm_from_multivariate_continuous_hmm(hmm): hmm = deepcopy(hmm) domain = ghmm.Float() trans = hmm.transitionMatrix.tolist() init = hmm.initialProbabilities.tolist() emissions = [[d.mean.tolist(), d.variance.flatten().tolist()] for d in hmm.emissionDistributions] # print init # print trans # print emissions return ghmm.HMMFromMatrices(emissionDomain=domain, distribution=ghmm.MultivariateGaussianDistribution(domain), A=trans, B=emissions, pi=init)
def computeLikelihood(idx, A, B, pi, F, X, nEmissionDim, nState, startIdx=1, \ bPosterior=False, converted_X=False, cov_type='full'): ''' This function will be deprecated. Please, use computeLikelihoods. ''' if nEmissionDim >= 2: ml = ghmm.HMMFromMatrices(F, ghmm.MultivariateGaussianDistribution(F), A, B, pi) if cov_type == 'diag' or cov_type.find('diag') >= 0: ml.setDiagonalCovariance(1) else: ml = ghmm.HMMFromMatrices(F, ghmm.GaussianDistribution(F), A, B, pi) if converted_X is False: X_test = util.convert_sequence(X, emission=False) X_test = np.squeeze(X_test) X_test = X_test.tolist() else: X_test = X l_idx = [] l_likelihood = [] l_posterior = [] for i in xrange(startIdx, len(X_test) / nEmissionDim): final_ts_obj = ghmm.EmissionSequence(F, X_test[:i * nEmissionDim]) try: logp = ml.loglikelihood(final_ts_obj) if bPosterior: post = np.array(ml.posterior(final_ts_obj)) except: print "Unexpected profile!! GHMM cannot handle too low probability. Underflow?" l_idx.append(i) l_likelihood.append(-100000000) if bPosterior: if len(l_posterior) == 0: l_posterior.append(list(pi)) else: l_posterior.append(l_posterior[-1]) ## return False, False # anomaly continue l_idx.append(i) l_likelihood.append(logp) if bPosterior: l_posterior.append(post[i - 1]) if bPosterior: return idx, l_idx, l_likelihood, l_posterior else: return idx, l_idx, l_likelihood
def conditional_prob(self, x): ''' Input @ x: dim x length Output @ A list of conditional probabilities P(x_t|x_s,lambda) Only single sample works ''' from scipy.stats import norm, entropy # logp from all features X_test = util.convert_sequence2(x, emission=False) X_test = np.squeeze(X_test) final_ts_obj = ghmm.EmissionSequence(self.F, X_test.tolist()) logp_all = self.ml.loglikelihood(final_ts_obj) # feature-wise conditional probability cond_prob = [] for i in xrange(self.nEmissionDim): # per feature B = copy.copy(self.B) for j in xrange(self.nState): B[j][0] = [b for idx, b in enumerate(B[j][0]) if idx != i] B_arr = copy.copy(B[j][1]) B_arr = np.array(B_arr).reshape( (self.nEmissionDim, self.nEmissionDim)) B_arr = np.delete(B_arr, (i), axis=0) B_arr = np.delete(B_arr, (i), axis=1) B[j][1] = B_arr.flatten().tolist() ml_src = ghmm.HMMFromMatrices(self.F, ghmm.MultivariateGaussianDistribution(self.F), \ self.A, B, self.pi) # logp from remains X_test = util.convert_sequence2([ x[j] for j in xrange(len(x)) if j != i ], \ emission=False) X_test = np.squeeze(X_test) final_ts_obj = ghmm.EmissionSequence(self.F, X_test.tolist()) logp_src = ml_src.loglikelihood(final_ts_obj) cond_prob.append(logp_all - logp_src) if np.isnan(cond_prob[-1]) or np.isinf(cond_prob[-1]): print "NaN in conditional probabilities: ", np.shape(x) return None return np.array(cond_prob)
def computeLikelihoods(idx, A, B, pi, F, X, nEmissionDim, nState, startIdx=2, \ bPosterior=False, converted_X=False, cov_type='full'): ''' Input: - X: dimension x length ''' if nEmissionDim >= 2: ml = ghmm.HMMFromMatrices(F, ghmm.MultivariateGaussianDistribution(F), A, B, pi) if cov_type == 'diag': ml.setDiagonalCovariance(1) else: ml = ghmm.HMMFromMatrices(F, ghmm.GaussianDistribution(F), A, B, pi) X_test = util.convert_sequence(X, emission=False) X_test = np.squeeze(X_test) l_idx = [] l_likelihood = [] l_posterior = [] for i in xrange(startIdx, len(X[0])): final_ts_obj = ghmm.EmissionSequence( F, X_test[:i * nEmissionDim].tolist()) try: logp = ml.loglikelihood(final_ts_obj) if bPosterior: post = np.array(ml.posterior(final_ts_obj)) l_likelihood.append(logp) if bPosterior: l_posterior.append(post[i - 1]) except: print "Unexpected profile!! GHMM cannot handle too low probability. Underflow?" ## return False, False # anomaly ## continue # we keep the state as the previous one l_likelihood.append(-1000000000000) if bPosterior: if len(l_posterior) == 0: l_posterior.append(list(pi)) else: l_posterior.append(l_posterior[-1]) l_idx.append(i) if bPosterior: return idx, l_idx, l_likelihood, l_posterior else: return idx, l_idx, l_likelihood
def computeLikelihood(F, k, data, g_mu, g_sig, nEmissionDim, A, B, pi): if nEmissionDim >= 2: hmm_ml = ghmm.HMMFromMatrices(F, ghmm.MultivariateGaussianDistribution(F), A, B, pi) else: hmm_ml = ghmm.HMMFromMatrices(F, ghmm.GaussianDistribution(F), A, B, pi) final_ts_obj = ghmm.EmissionSequence(F, data) logp = hmm_ml.loglikelihoods(final_ts_obj)[0] post = np.array(hmm_ml.posterior(final_ts_obj)) k_prop = norm(loc=g_mu, scale=g_sig).pdf(k) g_post = post[k-1] * k_prop g_lhood = logp * k_prop g_lhood2 = logp * logp * k_prop prop_sum = k_prop # print np.shape(g_post), np.shape(g_lhood), np.shape(g_lhood2), np.shape(prop_sum) return g_post, g_lhood, g_lhood2, prop_sum
def fit(self, xData, A=None, B=None, pi=None, cov_mult=None, ml_pkl=None, use_pkl=False): if ml_pkl is None: ml_pkl = os.path.join(os.path.dirname(__file__), 'ml_temp_n.pkl') if cov_mult is None: cov_mult = [1.0]*(self.nEmissionDim**2) # Daehyung: What is the shape and type of input data? X = [np.array(data) for data in xData] if A is None: if self.verbose: print "Generating a new A matrix" # Transition probability matrix (Initial transition probability, TODO?) A = self.init_trans_mat(self.nState).tolist() if B is None: if self.verbose: print "Generating a new B matrix" # We should think about multivariate Gaussian pdf. mus, cov = self.vectors_to_mean_cov(X, self.nState) for i in xrange(self.nEmissionDim): for j in xrange(self.nEmissionDim): cov[:, j, i] *= cov_mult[self.nEmissionDim*i + j] if self.verbose: for i, mu in enumerate(mus): print 'mu%i' % i, mu print 'cov', cov # Emission probability matrix B = [0] * self.nState for i in range(self.nState): B[i] = [[mu[i] for mu in mus]] B[i].append(cov[i].flatten()) if pi is None: # pi - initial probabilities per state ## pi = [1.0/float(self.nState)] * self.nState pi = [0.0] * self.nState pi[0] = 1.0 # print 'Generating HMM' # HMM model object self.ml = ghmm.HMMFromMatrices(self.F, ghmm.MultivariateGaussianDistribution(self.F), A, B, pi) # print 'Creating Training Data' X_train = self.convert_sequence(X) # Training input X_train = X_train.tolist() if self.verbose: print 'Run Baum Welch method with (samples, length)', np.shape(X_train) final_seq = ghmm.SequenceSet(self.F, X_train) ## ret = self.ml.baumWelch(final_seq, loglikelihoodCutoff=2.0) ret = self.ml.baumWelch(final_seq, 10000) print 'Baum Welch return:', ret if np.isnan(ret): return 'Failure' [self.A, self.B, self.pi] = self.ml.asMatrices() self.A = np.array(self.A) self.B = np.array(self.B) #--------------- learning for anomaly detection ---------------------------- [A, B, pi] = self.ml.asMatrices() n, m = np.shape(X[0]) self.nGaussian = self.nState if self.check_method == 'change' or self.check_method == 'globalChange': # Get maximum change of loglikelihood over whole time ll_delta_logp = [] for j in xrange(n): l_logp = [] for k in xrange(1, m): final_ts_obj = ghmm.EmissionSequence(self.F, X_train[j][:k*self.nEmissionDim]) logp = self.ml.loglikelihoods(final_ts_obj)[0] l_logp.append(logp) l_delta_logp = np.array(l_logp[1:]) - np.array(l_logp[:-1]) ll_delta_logp.append(l_delta_logp) self.l_mean_delta = np.mean(abs(np.array(ll_delta_logp).flatten())) self.l_std_delta = np.std(abs(np.array(ll_delta_logp).flatten())) if self.verbose: print "mean_delta: ", self.l_mean_delta, " std_delta: ", self.l_std_delta if self.check_method == 'global' or self.check_method == 'globalChange': # Get average loglikelihood threshold over whole time l_logp = [] for j in xrange(n): for k in xrange(1, m): final_ts_obj = ghmm.EmissionSequence(self.F, X_train[j][:k*self.nEmissionDim]) logp = self.ml.loglikelihoods(final_ts_obj)[0] l_logp.append(logp) self.l_mu = np.mean(l_logp) self.l_std = np.std(l_logp) elif self.check_method == 'progress': # Get average loglikelihood threshold wrt progress if os.path.isfile(ml_pkl) and use_pkl: if self.verbose: print 'Load detector parameters' d = ut.load_pickle(ml_pkl) self.l_statePosterior = d['state_post'] # time x state division self.ll_mu = d['ll_mu'] self.ll_std = d['ll_std'] else: if self.cluster_type == 'time': if self.verbose: print 'Begining parallel job' self.std_coff = 1.0 g_mu_list = np.linspace(0, m-1, self.nGaussian) #, dtype=np.dtype(np.int16)) g_sig = float(m) / float(self.nGaussian) * self.std_coff r = Parallel(n_jobs=-1)(delayed(learn_likelihoods_progress)(i, n, m, A, B, pi, self.F, X_train, self.nEmissionDim, g_mu_list[i], g_sig, self.nState) for i in xrange(self.nGaussian)) if self.verbose: print 'Completed parallel job' l_i, self.l_statePosterior, self.ll_mu, self.ll_std = zip(*r) elif self.cluster_type == 'state': self.km = None self.ll_mu = None self.ll_std = None self.ll_mu, self.ll_std = self.state_clustering(X) path_mat = np.zeros((self.nState, m*n)) likelihood_mat = np.zeros((1, m*n)) self.l_statePosterior=None d = dict() d['state_post'] = self.l_statePosterior d['ll_mu'] = self.ll_mu d['ll_std'] = self.ll_std ut.save_pickle(d, ml_pkl)
def fit(self, xData1, xData2, xData3, A=None, B=None, pi=None, cov_mult=[100.0] * 9, verbose=False, ml_pkl='ml_temp.pkl', use_pkl=False): ml_pkl = os.path.join(os.path.dirname(__file__), ml_pkl) X1 = np.array(xData1) X2 = np.array(xData2) X3 = np.array(xData3) if A is None: if verbose: print "Generating a new A matrix" # Transition probability matrix (Initial transition probability, TODO?) A = self.init_trans_mat(self.nState).tolist() # print 'A', A if B is None: if verbose: print "Generating a new B matrix" # We should think about multivariate Gaussian pdf. mu1, mu2, mu3, cov = self.vectors_to_mean_cov( X1, X2, X3, self.nState) cov[:, 0, 0] *= cov_mult[0] #1.5 # to avoid No convergence warning cov[:, 1, 0] *= cov_mult[1] #5.5 # to avoid No convergence warning cov[:, 2, 0] *= cov_mult[2] cov[:, 0, 1] *= cov_mult[3] cov[:, 1, 1] *= cov_mult[4] cov[:, 2, 1] *= cov_mult[5] cov[:, 0, 2] *= cov_mult[6] cov[:, 1, 2] *= cov_mult[7] cov[:, 2, 2] *= cov_mult[8] print 'mu1:', mu1 print 'mu2:', mu2 print 'mu3:', mu3 print 'cov', cov # Emission probability matrix B = [0.0] * self.nState for i in range(self.nState): B[i] = [[mu1[i], mu2[i], mu3[i]], [ cov[i, 0, 0], cov[i, 0, 1], cov[i, 0, 2], cov[i, 1, 0], cov[i, 1, 1], cov[i, 1, 2], cov[i, 2, 0], cov[i, 2, 1], cov[i, 2, 2] ]] if pi is None: # pi - initial probabilities per state ## pi = [1.0/float(self.nState)] * self.nState pi = [0.0] * self.nState pi[0] = 1.0 # HMM model object self.ml = ghmm.HMMFromMatrices( self.F, ghmm.MultivariateGaussianDistribution(self.F), A, B, pi) X_train = self.convert_sequence(X1, X2, X3) # Training input X_train = X_train.tolist() print 'Run Baum Welch method with (samples, length)', np.shape(X_train) final_seq = ghmm.SequenceSet(self.F, X_train) ## ret = self.ml.baumWelch(final_seq, loglikelihoodCutoff=2.0) ret = self.ml.baumWelch(final_seq, 10000) print 'Baum Welch return:', ret [self.A, self.B, self.pi] = self.ml.asMatrices() self.A = np.array(self.A) self.B = np.array(self.B) # print 'B\'s shape:', self.B.shape, self.B[0].shape, self.B[1].shape # print B[0] # print B[1] #--------------- learning for anomaly detection ---------------------------- [A, B, pi] = self.ml.asMatrices() n, m = np.shape(X1) self.nGaussian = self.nState # Get average loglikelihood threshold wrt progress self.std_coff = 1.0 g_mu_list = np.linspace(0, m - 1, self.nGaussian) #, dtype=np.dtype(np.int16)) g_sig = float(m) / float(self.nGaussian) * self.std_coff print 'g_mu_list:', g_mu_list print 'g_sig:', g_sig ###################################################################################### if os.path.isfile(ml_pkl) and use_pkl: with open(ml_pkl, 'rb') as f: d = pickle.load(f) self.l_statePosterior = d[ 'state_post'] # time x state division self.ll_mu = d['ll_mu'] self.ll_std = d['ll_std'] else: n_jobs = -1 print 'Begining parallel job' r = Parallel(n_jobs=n_jobs)(delayed(learn_likelihoods_progress)( i, n, m, A, B, pi, self.F, X_train, self.nEmissionDim, g_mu_list[i], g_sig, self.nState) for i in xrange(self.nGaussian)) print 'Completed parallel job' l_i, self.l_statePosterior, self.ll_mu, self.ll_std = zip(*r) d = dict() d['state_post'] = self.l_statePosterior d['ll_mu'] = self.ll_mu d['ll_std'] = self.ll_std with open(ml_pkl, 'wb') as f: pickle.dump(d, f, protocol=pickle.HIGHEST_PROTOCOL)
def fit(self, xData1, xData2=None, A=None, B=None, pi=None, cov_mult=(1.0, 1.0, 1.0, 1.0), verbose=False, ml_pkl='ml_temp.pkl', use_pkl=False): ml_pkl = os.path.join(os.path.dirname(__file__), ml_pkl) X1 = np.array(xData1) X2 = np.array(xData2) if A is None: if verbose: print "Generating a new A matrix" # Transition probability matrix (Initial transition probability, TODO?) A = self.init_trans_mat(self.nState).tolist() if B is None: if verbose: print "Generating a new B matrix" # We should think about multivariate Gaussian pdf. mu1, mu2, cov = self.vectors_to_mean_cov(X1, X2, self.nState) cov[:, 0, 0] *= cov_mult[0] #1.5 # to avoid No convergence warning cov[:, 1, 0] *= cov_mult[1] #5.5 # to avoid No convergence warning cov[:, 0, 1] *= cov_mult[2] #5.5 # to avoid No convergence warning cov[:, 1, 1] *= cov_mult[3] #5.5 # to avoid No convergence warning # Emission probability matrix B = [0.0] * self.nState for i in range(self.nState): B[i] = [[mu1[i], mu2[i]], [ cov[i, 0, 0], cov[i, 0, 1], cov[i, 1, 0], cov[i, 1, 1] ]] if pi is None: # pi - initial probabilities per state ## pi = [1.0/float(self.nState)] * self.nState pi = [0.0] * self.nState pi[0] = 1.0 # HMM model object self.ml = ghmm.HMMFromMatrices( self.F, ghmm.MultivariateGaussianDistribution(self.F), A, B, pi) X_train = self.convert_sequence(X1, X2) # Training input X_train = X_train.tolist() print 'Run Baum Welch method with (samples, length)', np.shape(X_train) final_seq = ghmm.SequenceSet(self.F, X_train) ## ret = self.ml.baumWelch(final_seq, loglikelihoodCutoff=2.0) ret = self.ml.baumWelch(final_seq, 10000) print 'Baum Welch return:', ret [self.A, self.B, self.pi] = self.ml.asMatrices() self.A = np.array(self.A) self.B = np.array(self.B) #--------------- learning for anomaly detection ---------------------------- [A, B, pi] = self.ml.asMatrices() n, m = np.shape(X1) self.nGaussian = self.nState if self.check_method == 'change' or self.check_method == 'globalChange': # Get maximum change of loglikelihood over whole time ll_delta_logp = [] for j in xrange(n): l_logp = [] for k in xrange(1, m): final_ts_obj = ghmm.EmissionSequence( self.F, X_train[j][:k * self.nEmissionDim]) logp = self.ml.loglikelihoods(final_ts_obj)[0] l_logp.append(logp) l_delta_logp = np.array(l_logp[1:]) - np.array(l_logp[:-1]) ll_delta_logp.append(l_delta_logp) self.l_mean_delta = np.mean(abs(np.array(ll_delta_logp).flatten())) self.l_std_delta = np.std(abs(np.array(ll_delta_logp).flatten())) print "mean_delta: ", self.l_mean_delta, " std_delta: ", self.l_std_delta if self.check_method == 'global' or self.check_method == 'globalChange': # Get average loglikelihood threshold over whole time l_logp = [] for j in xrange(n): for k in xrange(1, m): final_ts_obj = ghmm.EmissionSequence( self.F, X_train[j][:k * self.nEmissionDim]) logp = self.ml.loglikelihoods(final_ts_obj)[0] l_logp.append(logp) self.l_mu = np.mean(l_logp) self.l_std = np.std(l_logp) elif self.check_method == 'progress': # Get average loglikelihood threshold wrt progress self.std_coff = 1.0 g_mu_list = np.linspace( 0, m - 1, self.nGaussian) #, dtype=np.dtype(np.int16)) g_sig = float(m) / float(self.nGaussian) * self.std_coff ###################################################################################### if os.path.isfile(ml_pkl) and use_pkl: with open(ml_pkl, 'rb') as f: d = pickle.load(f) self.l_statePosterior = d[ 'state_post'] # time x state division self.ll_mu = d['ll_mu'] self.ll_std = d['ll_std'] else: n_jobs = -1 r = Parallel(n_jobs=n_jobs)( delayed(learn_likelihoods_progress)( i, n, m, A, B, pi, self.F, X_train, self.nEmissionDim, g_mu_list[i], g_sig, self.nState) for i in xrange(self.nGaussian)) l_i, self.l_statePosterior, self.ll_mu, self.ll_std = zip(*r) d = dict() d['state_post'] = self.l_statePosterior d['ll_mu'] = self.ll_mu d['ll_std'] = self.ll_std with open(ml_pkl, 'wb') as f: pickle.dump(d, f, protocol=pickle.HIGHEST_PROTOCOL)
def fit(self, xData, A=None, B=None, pi=None, cov_mult=None, ml_pkl=None, use_pkl=False, cov_type='full', fixed_trans=0,\ shuffle=False): ''' Input : - xData: dimension x sample x length Issues: - If NaN is returned, the reason can be one of followings, -- lower cov -- small range of xData (you have to scale it up.) ''' # Daehyung: What is the shape and type of input data? if shuffle: X = xData X = np.swapaxes(X, 0, 1) id_list = range(len(X)) random.shuffle(id_list) X = np.array(X)[id_list] X = np.swapaxes(X, 0, 1) else: X = [np.array(data) for data in xData] nData = len(xData[0]) param_dict = {} # Load pre-trained HMM without training if use_pkl and ml_pkl is not None and os.path.isfile(ml_pkl): if self.verbose: print "Load HMM parameters without train the hmm" param_dict = ut.load_pickle(ml_pkl) self.A = param_dict['A'] self.B = param_dict['B'] self.pi = param_dict['pi'] if self.nEmissionDim == 1: self.ml = ghmm.HMMFromMatrices(self.F, ghmm.GaussianDistribution(self.F), \ self.A, self.B, self.pi) else: self.ml = ghmm.HMMFromMatrices(self.F, ghmm.MultivariateGaussianDistribution(self.F), \ self.A, self.B, self.pi) out_a_num = param_dict.get('out_a_num', None) vec_num = param_dict.get('vec_num', None) mat_num = param_dict.get('mat_num', None) u_denom = param_dict.get('u_denom', None) if out_a_num is not None: self.ml.setBaumWelchParams(out_a_num, vec_num, mat_num, u_denom) return True else: if ml_pkl is None: ml_pkl = os.path.join(os.path.dirname(__file__), 'ml_temp_n.pkl') if cov_mult is None: cov_mult = [1.0] * (self.nEmissionDim**2) if A is None: if self.verbose: print "Generating a new A matrix" # Transition probability matrix (Initial transition probability, TODO?) A = util.init_trans_mat(self.nState).tolist() if B is None: if self.verbose: print "Generating a new B matrix" # We should think about multivariate Gaussian pdf. mus, cov = util.vectors_to_mean_cov(X, self.nState, self.nEmissionDim, cov_type=cov_type) ## print np.shape(mus), np.shape(cov) # cov: state x dim x dim for i in xrange(self.nEmissionDim): for j in xrange(self.nEmissionDim): cov[:, i, j] *= cov_mult[self.nEmissionDim * i + j] if self.verbose: for i, mu in enumerate(mus): print 'mu%i' % i, mu ## print 'cov', cov # Emission probability matrix B = [0] * self.nState for i in range(self.nState): if self.nEmissionDim > 1: B[i] = [[mu[i] for mu in mus]] B[i].append(cov[i].flatten().tolist()) else: B[i] = [np.squeeze(mus[0][i]), float(cov[i])] if pi is None: # pi - initial probabilities per state ## pi = [1.0/float(self.nState)] * self.nState pi = [0.0] * self.nState pi[0] = 1.0 # print 'Generating HMM' # HMM model object if self.nEmissionDim == 1: self.ml = ghmm.HMMFromMatrices(self.F, ghmm.GaussianDistribution(self.F), \ A, B, pi) else: self.ml = ghmm.HMMFromMatrices(self.F, ghmm.MultivariateGaussianDistribution(self.F), \ A, B, pi) if cov_type == 'diag': self.ml.setDiagonalCovariance(1) # print 'Creating Training Data' X_train = util.convert_sequence(X) # Training input X_train = X_train.tolist() if self.verbose: print "training data size: ", np.shape(X_train) if self.verbose: print 'Run Baum Welch method with (samples, length)', np.shape( X_train) final_seq = ghmm.SequenceSet(self.F, X_train) ## ret = self.ml.baumWelch(final_seq, loglikelihoodCutoff=2.0) ret = self.ml.baumWelch(final_seq, 10000) #, fixedTrans=fixed_trans) if np.isnan(ret): print 'Baum Welch return:', ret return 'Failure' print 'Baum Welch return:', ret / float(nData) [self.A, self.B, self.pi] = self.ml.asMatrices() self.A = np.array(self.A) self.B = np.array(self.B) param_dict['A'] = self.A param_dict['B'] = self.B param_dict['pi'] = self.pi try: [out_a_num, vec_num, mat_num, u_denom] = self.ml.getBaumWelchParams() param_dict['out_a_num'] = out_a_num param_dict['vec_num'] = vec_num param_dict['mat_num'] = mat_num param_dict['u_denom'] = u_denom except: print "Install new ghmm!!" if ml_pkl is not None: ut.save_pickle(param_dict, ml_pkl) return ret / float(nData)
def _get_hmm(self, hmm): return ghmm.HMMFromMatrices( DOMAIN, ghmm.MultivariateGaussianDistribution(DOMAIN), hmm.A, hmm.B, hmm.pi)
# B = [ # [["mu111","mu112"],["sig1111","sig1112","sig1121","sig1122"], # ["mu121","mu122"],["sig1211","sig1212","sig1221","sig1222"], # ["w11","w12"] ], # [["mu211","mu212"],["sig2111","sig2112","sig2121","sig2122"], # ["mu221","mu222"],["sig2211","sig2212","sig2221","sig2222"], # ["w21","w22"] ], # [["mu311","mu312"],["sig3111","sig3112","sig3121","sig3122"], # ["mu321","mu322"],["sig3211","sig3212","sig3221","sig3222"], # ["w31","w32"] ], # ] B = [[[5.0], [2.0], [6.0], [1.0], [0.3, 0.7]], [[2.0], [1.0], [1.5], [0.4], [0.4, 0.7]]] # parameters of mixture models pi = [0.1, 0.1] # initial probabilities per state model = ghmm.HMMFromMatrices(F, ghmm.MultivariateGaussianDistribution(F), A, B, pi) # modify model parameters (examples) p = model.getInitial(0) model.setInitial(0, 0.5) # re-set transition from state 0 to state 1 trans = model.getTransition(0, 1) model.setTransition(0, 1, 0.6) # re-setting emission of state 0 component 1 model.setEmission(0, 1, [[4.0], [2.0]]) model.normalize() # re-normalize model parameters print model # for 2 dimensional data, the data structure is like this [x11, x12, x21, x22, x31, x32...] seq = EmissionSequence(F, [5.5, 0.1]) # sample single sequence of length 50