def fit_batch(traj_data, n_components=2, subsample_factor=1, features=['speed', 'rotation'], **kwargs): ''' Fits model to concatenated traj_data Args: traj_data - list of paths of training dataset (trajectory csv) n_components - number of hidden states subsample_factor - subsample factor to apply to all files features - columns to fit model to **kwargs passed to GaussianHMM Returns: model - fitted model ''' # Concatenate data feature_list = [] lengths_list = [] for path in traj_data: X, l = features_from_csv(path, features=features, subsample_factor=subsample_factor) feature_list.append(X) lengths_list.append(l) print 'Concatenating features...' X = np.vstack(feature_list) l = np.hstack(lengths_list) # Fit HMM print 'Fitting model...' model = GaussianHMM(n_components, **kwargs) model.fit(X, lengths=l) return model
def addModel(self, nom, data, nbEtats, n_iter, startprob_prior=None, transmat_prior=None): ''' ajoute un model à tabModels paramètres : nom = nom du modèle data = tableau à trois dimension représentant un cluster possèdant des mouvements possèdant lui même des positions nbEtats = nombre d'états cachés pour chaque modèle n_iter = nombre d'itérations pour l'algorithme de Baum-Welch startprob_prior = la matrice initiale à priori transmat_prior = la matrice de transition à priori des états ''' model = GaussianHMM(nbEtats, covariance_type="diag", n_iter=n_iter, startprob_prior=startprob_prior, transmat_prior=transmat_prior) model.fit(data) verif_set_transMat(model) taille = len(self.tabModels) if(taille == 0): self.tabModels.append([nom]) self.tabModels[0].append(model) return for i in range(taille): if(self.tabModels[i][0] == nom): self.tabModels[i].append(model) return self.tabModels.append([nom]) self.tabModels[-1].append(model)
def fit(self): if self.verbose: print "[Clustering] Clearing old model and segmentation" self.segmentation = [] self.model = [] new_segments = [] new_model = [] g = GaussianHMM(n_components=self.n_components) all_demos = self._demonstrations[0] lens = [np.shape(self._demonstrations[0])[0]] for i in range(1, len(self._demonstrations)): all_demos = np.concatenate([all_demos,self._demonstrations[i]]) lens.append(np.shape(self._demonstrations[i])[0]) g.fit(all_demos,lens) for d in self._demonstrations: new_segments.append(self.findTransitions(g.predict(d))) #print g.predict(d) new_model.append(g) self.segmentation = new_segments self.model = new_model
def main(args): x, X = loadDiffRows(args.diffFile) model = GaussianHMM(n_components=3, covariance_type="diag", n_iter=100000000000) model.transmat_ = numpy.array([[0.5, 0.5, 0.0], [0.0, 0.5, 0.5], [0.0, 0.0, 1.0]]) model.fit(X) print(model.transmat_) model.transmat_[0][2] = 0. model.transmat_[1][0] = 0. model.transmat_[2][0] = 0. model.transmat_[2][1] = 0. exp = args.outFile.split('/')[-1].split('_')[0] with open(args.outFile, 'w') as fout: print('exp\tbin\treads\tstate', file=fout) for seq in X: hiddenStates = model.predict(seq) for idx,v in enumerate(zip(x,hiddenStates)): r,h = v print(exp + '\t' + str(idx) + '\t' + str(r) + '\t' + str(h), file=fout)
def select(self): warnings.filterwarnings("ignore", category=DeprecationWarning) # I'm going to use the same general skeleton I had been using for # the previous methods # Variables to hold update scores best_dic = -math.inf best_model = GaussianHMM() # Iterate across a range of model states for num_hidden_states in range(self.min_n_components, self.max_n_components + 1): try: # Fit a model based on state model = GaussianHMM(n_components=num_hidden_states, n_iter=100) model.fit(self.X, self.lengths) # What are we fitting on? #model = self.base_model(num_hidden_states) # Compute elements we need. Full equation: # DIC = log(P(X(i)) - 1/(M-1)SUM(log(P(X(all but i)) sigma_scores = 0 M = 0 #pdb.set_trace() # FIXME # Get WITHIN sample logL logL = model.score(self.X, self.lengths) # Compute sum(logL) for all words that are not the X[i] for word in self.hwords: if word != self.this_word: # Pull values to score this word temp_X, temp_length = self.hwords[word] # Score sigma_scores += model.score(temp_X, temp_length) M += 1 # Now we compute DIC current_dic = logL - (1.0 / (float(M)) * sigma_scores) # Control flow to update DIC score if current_dic >= best_dic: #print("We're updating scores!") best_model, best_dic = model, current_dic else: #print("We're NOT updating scores...") continue except: #print("We missed the try block") continue return best_model
def train(): models = [] for label in [ x for x in os.listdir(training_files) if os.path.isdir(os.path.join(training_files, x)) ]: features = None print('calculating', label) for filename in os.listdir(os.path.join(training_files, label)): filepath = os.path.join(training_files, label, filename) feature = count_mfcc(filepath) if features is None: features = feature else: features = np.append(features, feature, axis=0) print(len(os.listdir(os.path.join(training_files, label)))) model = GaussianHMM(n_components=3, n_iter=1000) model.fit(features) models.append((model, label)) model = None # save training model pickle.dump(models, open(pickle_1, 'wb')) print('done training') return 'done training'
def train(self, k, train_set, valid_set): train_wavs, train_folds, train_labels = zip(*list(chain(*train_set))) train_wavs, train_folds, train_labels = np.array(train_wavs), np.array( train_folds), np.array(train_labels) train_sample = len(train_wavs) train_x, _ = self.fix_frame(train_sample, train_wavs, train_folds, train_labels) # Test Model valid_wavs, valid_folds, valid_labels = zip(*valid_set) valid_wavs, valid_folds, valid_labels = np.array(valid_wavs), np.array( valid_folds), np.array(valid_labels) valid_sample = len(valid_wavs) valid_x, valid_y = self.fix_frame(valid_sample, valid_wavs, valid_folds, valid_labels) if config.isPCA: pca = PCA(n_components=config.n_pca) pca.fit(train_x) train_x = pca.transform(train_x) valid_x = pca.transform(valid_x) hmm = GaussianHMM(n_components=self.component) hmm.fit(train_x) joblib.dump(hmm, f"{self.model_path}/hmm10-{k}.pkl") score = purity_score(np.argmax(valid_y, axis=1), np.argmax(hmm.predict_proba(valid_x))) print('Accuracy:{0:.3f}'.format(score))
def hidden_markov_model(hidden_states_count, train, test, time, sample_size, data_name, f): #Create an HMM and fit it to data model = GaussianHMM(algorithm='map', n_components=hidden_states_count, covariance_type='diag', n_iter=10000) model.fit(train) #Decode the optimal sequence of internal hidden state (Viterbi) hidden_states = model.predict(test) #Prob next step prob_next_step = model.transmat_[hidden_states[-1], :] #Generate new sample (visible, hidden) X, Z = model.sample(sample_size) #Plot Data plot_time_series(test, hidden_states, hidden_states_count, None, data_name + ' - Predict') points = get_points(model) plot_gaussians(train, points, hidden_states_count, data_name + ' - Gaussian Predict') # plot_time_series(X, Z, hidden_states_count, None, title=data_name+' - Sample') #Write Data f.write('\n' + data_name + '\n') f.write('Transition Matrix:\n' + str(model.transmat_) + '\n') f.write('\nNext Step ' + str(prob_next_step) + '\n') for point in points: f.write('\nHidden Variable NO° ' + str(point['no']) + '\n\tMean: ' + str(point['mean']) + '\n\tSigma: ' + str(point['sigma']) + '\n') f.write('\n#######################################\n')
def clusterMatrix(Cij, n_components, covariance_type='diag', n_iter=1000): ''' clusterMatrix(Cij, n_components, covariance_type = 'diag', n_iter = 1000) applies a GaussianHMM clustering to the processed genomewide interchromosomal contact matrix Cij as generated by constructClusterContactMatrix :param Cij: processed genomewide interchromosomal contact matrix as generated by constructClusterContactMatrix :param n_components: number of clusters to find :param covariance_type: type of the covariance matrix to use (see hmmlearn documentation for more details) :param n_iter: number of iterations allowed :return: numpy.array containing numbers from 0 to n_components - 1 specifying the cluster to which each bin belongs and the model with which it was calculated (i.e. fitted hmmlearn.hmm.GaussianHMM object) ''' # initializing HMM object model = GaussianHMM(n_components=n_components, covariance_type=covariance_type, n_iter=n_iter, verbose=True) # fitting parameters model.fit(Cij) # compute the most likely state sequence using viterbi clusters = model.predict(Cij) return clusters, model
def fit_HMM(self,error_metric): print "Looking for optimal number of states and fitting HMM" for i in xrange(2,5): candidate = GaussianHMM(n_components=i, covariance_type="full", n_iter=1000) candidate.fit(self.X_train) if error_metric == HMM_MAD: error = HMM_MAD(candidate,self.X_test) if i == 2: best_guess = error best_model = candidate opt_n_states = i else: if error < best_guess: opt_n_states = i best_model = candidate best_guess = error else: error = error_metric(candidate,self.X_test) if i == 2: best_guess = error best_model = candidate opt_n_states = i else: if error > best_guess: opt_n_states = i best_model = candidate best_guess = error self.model = best_model self.n_states = opt_n_states print "Done. Lowest error of {} achieved with {} states".format(best_guess, opt_n_states)
def train(self): print('start train') Hstate_num = list(range(len(self.p_state))) Ostate_num = list(range(len(self.p_state))) Ostate = [] global value, index for (index, value) in enumerate(self.p_state): Ostate += value #观察状态序列 Hstate_num[index] = len( set(np.array(value).reshape(1, len(value))[0])) Ostate_num[index] = len(value) self.Ostate = Ostate self.Hstate_num = Hstate_num self.n = int(round(np.array(Hstate_num).mean())) #隐藏状态数 model = GaussianHMM(n_components=self.n, n_iter=1000, init_params="mcs", covariance_type="full") model.fit(np.array(Ostate), lengths=Ostate_num) s = model.transmat_.sum(axis=1).tolist() try: print('transmat') model.transmat_[s.index(0.0)] = np.array([1.0 / self.n] * self.n) except ValueError: pass self.model = model
def select(self): warnings.filterwarnings("ignore", category=DeprecationWarning) # TODO implement model selection using CV # create a temporary neg infinity best score best_score = float('-Inf') # create an empty best number of states best_num_components = None # define the number of splits for splitting method n_splits = 3 # set the split method to kfold split_method = KFold(n_splits=n_splits) # loop through the options for number of components aka hidden states for num_components in range(self.min_n_components, self.max_n_components + 1): # store the scores for this number of components n_scores = [] # test if the length of the sequences is smaller than the number of splits if len(self.sequences) < n_splits: #print('sequences {} less than splits {} -> skipping...'.format(len(self.sequences), n_splits)) # skip this test of folds continue # loop through the training and testing folds of the sequences for train_idx, test_idx in split_method.split(self.sequences): # get the training data with combine sequence utility function x_train, lengths_train = combine_sequences( train_idx, self.sequences) # get the testing data with combine sequence utility function x_test, lengths_test = combine_sequences( test_idx, self.sequences) # add try/except to eliminate non-viable models try: # create the model model = GaussianHMM(n_components=num_components, n_iter=1000, random_state=self.random_state) # fit the model model.fit(x_train, lengths_train) # calculate the score of the model aka Log Likelihood score = model.score(x_test, lengths_test) # add score to list n_scores.append(score) except: pass # calculate the mean score n_mean = np.mean(n_scores) # test if this mean is better than current best score if n_mean > best_score: # update the best number of components best_num_components = num_components # update the best score best_score = n_mean # test if a best model was found if best_num_components is not None: #print('best number of components found -> {}'.format(best_num_components)) return self.base_model(best_num_components) else: #print('best number of components not found -> returning constant {}'.format(self.min_n_components)) return self.base_model(self.min_n_components)
def test_GaussHMM_decode(cases: str) -> None: np.random.seed(12346) cases = int(cases) i = 1 N_decimal = 4 max_iter = 100 tol=1e-3 while i < cases: n_samples = np.random.randint(10, 50) hidden_states = np.random.randint(3, 6) n_features = np.random.randint(4, 9) X = [] lengths = [] for _ in range(n_samples): seq_length = np.random.randint(4, 9) this_x = np.random.rand(seq_length,n_features) X.append(this_x) lengths.append(seq_length) hmm_gold = GaussianHMM(n_components=hidden_states, covariance_type='full', algorithm='viterbi', n_iter=max_iter, tol=tol) X_gold = np.concatenate(X) hmm_gold.fit(X_gold, lengths) gold_means = hmm_gold.means_ gold_pi = hmm_gold.startprob_ gold_n_features = hmm_gold.n_features gold_transmat = hmm_gold.transmat_ gold_means = hmm_gold.means_ gold_covars = hmm_gold.covars_ hmm_mine = GaussHMM(hidden_states=hidden_states, A=gold_transmat, n_features=gold_n_features, means=gold_means, covar=gold_covars, pi=gold_pi, tol=tol, max_iter=max_iter) gold_logprob,gold_state_seq = hmm_gold.decode(X_gold, lengths) mine_logprob_list = [] mine_state_seq_list = [] for this_x in X: this_logprob, this_state_seq = hmm_mine.decode(this_x) mine_logprob_list.append(this_logprob) mine_state_seq_list.append(this_state_seq) mine_logprob = sum(mine_logprob_list) mine_state_seq = np.concatenate(mine_state_seq_list) assert_almost_equal(mine_logprob, gold_logprob, decimal=N_decimal) assert_almost_equal(mine_state_seq, gold_state_seq, decimal=N_decimal) i+=1 print('Successfully testing the decode function in Gaussian HMM!')
def HMM_algo(): for n in hiddenstates: for ts in timestamp: print("Combination: ", (n, ts)) hmm = GaussianHMM(n_components=n) feature_vector = feature_extraction(train_data) hmm.fit(feature_vector) mse = predict_for_next_n_days(test_data, 150, ts, hmm) print(mse)
def HMM_feat(ts, hmm_n=4): X = np.column_stack([ts]) model = GaussianHMM(n_components=hmm_n, covariance_type="diag", n_iter=1000) model.fit(X) id = np.argsort(model.means_, axis=0).T[0] return np.concatenate((np.reshape(model.transmat_[id,:][:,id], -1), np.reshape(model.covars_[id], -1), np.reshape(model.means_[id], -1)))
def getHiddenStatus(data): """ 使用Gaussian HMM对数据进行建模,并得到预测值 """ cols = ["r_5", "r_20", "a_5", "a_20"] model = GaussianHMM(n_components=3, covariance_type="full", n_iter=1000, random_state=2010) model.fit(data[cols]) hiddenStatus = model.predict(data[cols]) return hiddenStatus
def select(self): # Use these variables to store best model bestLogL = None bestModel = None # Iterate over all possible models for num_states in range(self.min_n_components, self.max_n_components + 1): try: # Define split method. Use n_splits = 3 wherever possible. This call needs to be # in try/except block, as it throws out exception for n_split = 1. To improve CV # performance, we can increase split count up to len(self.sequences), but this # will also hit performance significantly split_method = KFold(n_splits=(len(self.sequences) if ( len(self.sequences) < 3) else 3)) cnt = 0 sumLogL = 0 # Create new Gaussian HMM hmm_model = GaussianHMM(n_components=num_states, covariance_type="diag", n_iter=1000, random_state=self.random_state, verbose=self.verbose) if self.verbose: print("model created for {} with {} states".format( self.this_word, num_states)) # fit model with training sequences from KFold and calculate SUM logL for cv_train_idx, cv_test_idx in split_method.split( self.sequences): hmm_model.fit( *combine_sequences(cv_train_idx, self.sequences)) sumLogL += hmm_model.score( *combine_sequences(cv_test_idx, self.sequences)) cnt += 1 # Calculate average LogL avgLogL = sumLogL / cnt # Maximaze average logL to find best model if bestLogL is None or avgLogL > bestLogL: bestModel = hmm_model bestLogL = avgLogL except: if self.verbose: print("failure on {} with {} states".format( self.this_word, num_states)) return bestModel
class GaussHMM: def __init__(self, init): self.init = init def fit(self, signals, channels): self.hmm = GaussianHMM(n_components=len(self.init), covariance_type="full", n_iter=100) self.hmm.fit(np.array(signals).reshape([-1, 1])[:100]) self.hmm.means_ = self.get_mean(signals, channels) self.hmm.covars_ = self.get_cov(signals, channels) self.hmm.startprob_ = self.init self.hmm.transmat_ = self.markov_p_trans(channels) def predict(self, signals): pred = self.hmm.predict(signals.reshape([-1, 1])) return pred def predict_proba(self, signals): prob = self.hmm.predict_proba(signals.reshape([-1, 1])).round(3) return prob def get_mean(self, signals, channels): sig_mean = [] for chan_i in range(len(np.unique(channels))): sig_mean.append(signals[channels == chan_i].mean()) return np.array(sig_mean).reshape([-1, 1]) def get_cov(self, signals, channels): sig_cov = [] for chan_i in range(len(np.unique(channels))): sig_cov.append(np.cov(signals[channels == chan_i])) return np.array(sig_cov).reshape([-1, 1, 1]) def markov_p_trans(self, states): max_state = np.max(states) states_next = np.roll(states, -1) matrix = [] for i in range(max_state + 1): current_row = np.histogram(states_next[states == i], bins=np.arange(max_state + 2))[0] if np.sum(current_row ) == 0: # if a state doesn't appear in states... current_row = np.ones(max_state + 1) / ( max_state + 1) # ...use uniform probability else: current_row = current_row / np.sum( current_row) # normalize to 1 matrix.append(current_row) return np.array(matrix)
def NMF_HMM(N, X): nmf = NMF(N + 4) hmm = HMM(N) nmf.fit(X) Transition = nmf.components_ hmm.fit(Transition.T) P = hmm.predict(Transition.T) return (P, Transition)
def select(self): warnings.filterwarnings("ignore", category=DeprecationWarning) # TODO implement model selection using CV #print("**** train for word: ", self.this_word) #print("number of sequences: ", len(self.sequences)) if len(self.sequences) <= 2: return self.base_model(self.n_constant) max_score = float('-inf') best_n = None for n in range(self.min_n_components, self.max_n_components + 1): try: model = GaussianHMM(n_components=n, covariance_type="diag", n_iter=1000, random_state=self.random_state, verbose=False) scores = [] split_method = KFold(n_splits=min(3, len(self.sequences))) for cv_train_idx, cv_test_idx in split_method.split( self.sequences): train_X, train_lengths = combine_sequences( cv_train_idx, self.sequences) test_X, test_lengths = combine_sequences( cv_test_idx, self.sequences) model.fit(train_X, train_lengths) scores.append(model.score(test_X, test_lengths)) cv_score = np.mean(scores) if max_score < cv_score: max_score = cv_score best_n = n except: pass if best_n == None: #all fails best_n = 3 best_model = GaussianHMM(n_components=best_n, covariance_type="diag", n_iter=1000, random_state=self.random_state, verbose=False) best_model.fit(self.X, self.lengths) return best_model
def select(self): warnings.filterwarnings("ignore", category=DeprecationWarning) # Split into 3 folds when number of sequences for a word is atleast three # Make separate list of train and test folds in train_folds and test_folds n_splits = min(3, len(self.sequences)) if n_splits > 1: split_method = KFold(n_splits) train_folds = list() test_folds = list() for cv_train, cv_test in split_method.split(self.sequences): train_folds.append(cv_train) test_folds.append(cv_test) best_score = -float('inf') best_model = None n_components = None for n in range(self.min_n_components, self.max_n_components + 1): model = GaussianHMM(n_components=n, n_iter=1000, random_state=self.random_state, verbose=False) mean_logL = 0.0 # If number of sequences is one, we train and score on the same single fold if n_splits == 1: try: fit_model = model.fit(self.X, self.lengths) mean_logL = fit_model.score(self.X, self.lengths) except: continue else: count = 0 #count keeps record of the total number of folds which were successfully tested and scored. for ii in range( len(train_folds)): # Iterate and train over each fold train_X, train_lengths = combine_sequences( train_folds[ii], self.sequences) test_X, test_lengths = combine_sequences( test_folds[ii], self.sequences) try: fit_model = model.fit(train_X, train_lengths) mean_logL += fit_model.score(test_X, test_lengths) count += 1 except: continue if count > 0: mean_logL = mean_logL / count # Take mean of all the scores if mean_logL > best_score: best_score = mean_logL n_components = n best_model = fit_model return best_model
def HMMSe(self, globalstatenumber, X): '使用hmm产生序列' model = GaussianHMM(n_components=globalstatenumber, n_iter=1000, random_state=1, covariance_type='diag', params='stcm', init_params='stcm') model.fit(X) hidden_states = model.predict(X) return hidden_states
def select(self): """ select the best model for self.this_word based on average log Likelihood of cross-validation folds for n between self.min_n_components and self.max_n_components :return: GaussianHMM object """ warnings.filterwarnings("ignore", category=DeprecationWarning) if len(self.sequences) < 3: logging.warning( "Not enough sequences to split into folds - using self.n_constant states" ) return self.base_model(self.n_constant) results = list() split_method = KFold() for num_components in range(self.min_n_components, self.max_n_components + 1): score = 0 model = GaussianHMM(n_components=num_components, covariance_type="diag", n_iter=1000, random_state=self.random_state, verbose=False) logging.debug( "Built model with {} hidden states".format(num_components)) folds_run = 0 for cv_train_idx, cv_test_idx in split_method.split( self.sequences): try: logging.debug( "Train fold indices:{} Test fold indices:{}".format( cv_train_idx, cv_test_idx)) # view indices of the folds train_X, train_lengths = combine_sequences( cv_train_idx, self.sequences) cv_X, cv_lengths = combine_sequences( cv_test_idx, self.sequences) model.fit(train_X, train_lengths) score += model.score(cv_X, cv_lengths) folds_run += 1 except Exception as e: logging.warning("{} caught: {}".format(type(e), e)) pass logging.debug( "Adding score={} and model to results list".format(score)) if score > 0 and folds_run > 0: results.append((score / folds_run, model)) if len(results) == 0: return self.base_model(self.n_constant) else: return max(results)[1]
def select(self): warnings.filterwarnings("ignore", category=DeprecationWarning) # TODO implement model selection based on DIC scores M = len(self.words) models = [] dic_score = float("-inf") scores_data = {} for n in range(self.min_n_components, self.max_n_components): try: model = GaussianHMM(n_components=n, n_iter=1000).fit(self.X, self.lengths) logL = model.score(self.X, self.lengths) sum = 0 for word in self.words: if word == self.this_word: continue if not scores_data.__contains__(word): scores_data[word] = {} try: if not scores_data[word].__contains__(n): X, lengths = self.all_word_Xlengths[word] # todo optimize #m = GaussianHMM(n_components=n, n_iter=1000).fit(self.X, self.lengths) model.fit(X, lengths) ll = model.score(X, self.lengths) scores_data[word][n] = ll sum += scores_data[word][n] except: continue dic = logL - sum/(M-1) if dic_score < dic: dic_score = dic if len(models) == 0: models.append(model) else: models[0] = model except: continue if len(models) > 0: return models[0] return None
def select(self): warnings.filterwarnings("ignore", category=DeprecationWarning) # TODO implement model selection using CV split_method = KFold() best_score = -float("inf") best_model = None # if the length of sequences is less than 3, # it's meaningless to do KFold so here's an exception if len(self.sequences) < 3: # iterate through given components arguments for n in range(self.min_n_components, self.max_n_components): try: model = GaussianHMM(n_components=n, random_state=self.random_state) model = model.fit(self.X, self.lengths) score = model.score(self.X, self.lengths) if score > best_score: best_score = score best_model = model except: pass return best_model # else (length of sequences is 3 or longer) for cv_train_idx, cv_test_idx in split_method.split(self.sequences): # get training sample train_X, train_lengths = combine_sequences(cv_train_idx, self.sequences) # get testing sample test_X, test_lengths = combine_sequences(cv_test_idx, self.sequences) # iterate through given components for n in range(self.min_n_components, self.max_n_components): try: model = GaussianHMM(n_components=n, random_state=self.random_state) # train on training samples model = model.fit(train_X, train_lengths) # test on testing samples score = model.score(test_X, test_lengths) # score is just a simple log-likelihood score # so it's bigger the better if score > best_score: best_score = score best_model = model except: pass return best_model
def HMM_Stack_Transform(k, X): hmm = HMM(k, random_state=1) hmm.fit(X.T) P = hmm.predict(X.T) C = Compress_Progression(P) ST = np.zeros((np.shape(hmm.means_)[1], len(C))) for i in range(len(C)): ST[:, i] = hmm.means_[C[i]] return normalize(ST, 'l1', 0)
def hmmlearnHMM(): model = GaussianHMM(3, "full") model.fit(input_data.iloc[:, :-1]) actual = input_data.iloc[:, -1] pred = model.predict(input_data.iloc[:, :-1]) pred = [2 if x == 2 else x for x in pred] pred = [7 if x == 1 else x for x in pred] pred = [3 if x == 0 else x for x in pred] accuracy = sum(pred == actual) / float(len(actual)) print accuracy
def model_run(name): msft = db[name] msft_cursor = msft.find({}) data = [] time = datetime.datetime(2020, 6, 15, 9, 30, 00) for i in msft_cursor: if i['time'] >= time: data.append(float(i['price'].replace(',', ''))) data = np.array(data) segments = segment.topdownsegment(data, fit.interpolate, fit.sumsquared_error, max_error) box_feature = build_box_feature(segments, data) print(box_feature) train_data, test_data = train_test_split(box_feature, test_size=0.33, shuffle=False) feature_vector = feature_extract(train_data) print(feature_vector) hmm = GaussianHMM(n_components=4) hmm.fit(feature_vector) possibile_outcomes = compute_all_possible_outcomes(n_steps_frac_low, n_steps_frac_high, n_steps_frac_change) predict = predict_close_price_for_days(500, name) # for stock in stocks: # msft = db[stock] # msft_cursor = msft.find({}) # # data = [] # time = datetime.datetime(2020, 6, 15, 9, 30, 00) # for i in msft_cursor: # if i['time'] >= time: # data.append(float(i['price'].replace(',', ''))) # # data = np.array(data) # segments = segment.topdownsegment(data, fit.interpolate, fit.sumsquared_error, max_error) # box_feature = build_box_feature(segments, data) # print(box_feature) # train_data, test_data = train_test_split(box_feature, test_size=0.33, shuffle=False) # feature_vector = feature_extract(train_data) # print(feature_vector) # # hmm = GaussianHMM(n_components=4) # hmm.fit(feature_vector) # possibile_outcomes = compute_all_possible_outcomes(n_steps_frac_low, n_steps_frac_high, n_steps_frac_change) # predict = predict_close_price_for_days(500,stock)
def select(self): ''' Selects the best model as determined by the average score across n_splits cross-validation sets ''' warnings.filterwarnings("ignore", category=DeprecationWarning) best_score = float("-inf") best_model = None for num_states in range(self.min_n_components, self.max_n_components + 1): hmm_model = GaussianHMM(n_components=num_states, covariance_type="diag", n_iter=1000, random_state=self.random_state, verbose=False) scores = [] try: n_splits = self.get_n_splits() split_method = KFold(n_splits=n_splits) for cv_train_idx, cv_test_idx in split_method.split( self.sequences): # train train_x, train_length = combine_sequences( cv_train_idx, self.sequences) hmm_model.fit(train_x, train_length) # test test_x, test_length = combine_sequences( cv_test_idx, self.sequences) # evaluate score = self.score(hmm_model, test_x, test_length) scores.append(score) if self.verbose: print( "model created for {} with {} states, withs score: {}" .format(self.this_word, num_states, score)) except ValueError as e: if self.verbose: print("ValueError error({0}):".format(e)) print("failure on {} with {} states".format( self.this_word, num_states)) scores = [] if len(scores) > 0: mean_score = np.mean(scores) if mean_score > best_score: best_score = mean_score best_model = hmm_model return best_model
def hmm_gen_TS(N, hmm_n=4): TS = np.zeros([N,T]) model = GaussianHMM(n_components=hmm_n, covariance_type="diag", n_iter=1000) for n in range(N): if n%10 == 0: p('generating {0}th sample by HMM'.format(n)) r = np.random.randint(0, data_all.shape[0]) ts_real = np.array(data_all.loc[r, '0':]) X = np.column_stack([ts_real]) model.fit(X) ts, Z = model.sample(T) ts = ts[:,0] TS[n,:] = (ts-np.mean(ts))/np.std(ts) return TS
def test_GaussHMM_posterior(cases: str) -> None: np.random.seed(12346) cases = int(cases) i = 1 N_decimal = 4 max_iter = 100 tol=1e-3 while i < cases: n_samples = np.random.randint(10, 50) hidden_states = np.random.randint(3, 6) n_features = np.random.randint(4, 9) X = [] lengths = [] for _ in range(n_samples): seq_length = np.random.randint(4, 9) this_x = np.random.rand(seq_length,n_features) X.append(this_x) lengths.append(seq_length) hmm_gold = GaussianHMM(n_components=hidden_states, covariance_type='full', n_iter=max_iter, tol=tol) X_gold = np.concatenate(X) hmm_gold.fit(X_gold, lengths) gold_means = hmm_gold.means_ gold_pi = hmm_gold.startprob_ gold_n_features = hmm_gold.n_features gold_transmat = hmm_gold.transmat_ gold_means = hmm_gold.means_ gold_covars = hmm_gold.covars_ hmm_mine = GaussHMM(hidden_states=hidden_states, A=gold_transmat, n_features=gold_n_features, means=gold_means, covar=gold_covars, pi=gold_pi, tol=tol, max_iter=max_iter) _,gold_posteriors = hmm_gold.score_samples(X_gold, lengths) mine_posterior_list = [hmm_mine.posterior(this_x) for this_x in X] mine_posterior_list = np.concatenate(mine_posterior_list) assert_almost_equal(mine_posterior_list, gold_posteriors, decimal=N_decimal) i+=1 print('Successfully testing the function of computing posteriors in Gaussian HMM!')
def select(self): warnings.filterwarnings("ignore", category=DeprecationWarning) # TODO implement model selection using CV num_splits = 2 if num_splits > len(self.sequences): return None # Cannot proceed, not enough data best_num_states = self.n_constant best_average_logL_score = float("-inf") for num_states in range(self.min_n_components, self.max_n_components + 1): all_scores = [] split_method = KFold(n_splits=num_splits) for cv_train_idx, cv_test_idx in split_method.split( self.sequences): train_X, train_lengths = combine_sequences( cv_train_idx, self.sequences) test_X, test_lengths = combine_sequences( cv_test_idx, self.sequences) try: hmm_model = GaussianHMM(n_components=num_states, covariance_type="diag", n_iter=1000, random_state=self.random_state, verbose=False) hmm_model.fit(train_X, train_lengths) score = hmm_model.score(test_X, test_lengths) all_scores.append(score) except: pass logL = float("-inf") if len(all_scores) > 0: logL = np.average(all_scores) if logL > best_average_logL_score: best_average_logL_score = logL best_num_states = num_states return self.base_model(best_num_states)
def select(self): # Use these variables to store best model bestDIC = None bestModel = None # Iterate over all possible models for num_states in range(self.min_n_components, self.max_n_components + 1): try: # Create new Gaussian HMM hmm_model = GaussianHMM(n_components=num_states, covariance_type="diag", n_iter=1000, random_state=self.random_state, verbose=self.verbose) if self.verbose: print("model created for {} with {} states".format( self.this_word, num_states)) # Fit model with current data hmm_model.fit(self.X, self.lengths) # Calculate logL logL = hmm_model.score(self.X, self.lengths) otherScores = 0 # Calculate likelihood SUM for all other words for otherWord in self.hwords: if otherWord != self.this_word: otherScores += hmm_model.score(*self.hwords[otherWord]) # Caluclate dicusing formula DIC = log(P(X(i)) - 1/(M-1)SUM(log(P(X(all but i)) dic = logL - (float(1) / (len(self.hwords) - 1)) * otherScores # Find model with highest DIC if bestDIC is None or dic > bestDIC: bestModel = hmm_model bestDIC = dic except: if self.verbose: print("failure on {} with {} states".format( self.this_word, num_states)) return bestModel
def train(start_date, end_date, mode, input_days, span, n_components): training_x, _ = DataGenerator.make_features(start_date, end_date, mode=mode, input_days=input_days, span=span, is_training=True) # TODO: set model parameters model = GaussianHMM(n_components, n_iter=100) model.fit(training_x) # TODO: fix pickle file name filename = 'team11_model.pkl' pickle.dump(model, open(filename, 'wb'))
def fit_hmm(df, n_components, features=['speed', 'rotation'], **kwargs): ''' Fits a Gaussian HMM to the velocity data Args: df - dataframe containing positional data to be processed n_components - number of hidden states features - features to use in model fitting **kwargs passed to GaussianHMM Returns: model ''' X, lengths = get_features(df, features=features) model = GaussianHMM(n_components, **kwargs) model.fit(X, lengths=lengths) return model
def test_backward_with_hmmlearn(self): r = np.random.randn obs = [np.array([[-600 + r(), 100 + r()], [-300 + r(), 200 + r()], [0 + r(), 300 + r()]]) for _ in xrange(10)] hmm = GaussianHMM(n_components=3) hmm.fit(obs) # Calculcate bwdlattice using hmmlearn algorithm framelogprob = hmm._compute_log_likelihood(obs[0]) start = timeit.default_timer() bwdlattice1 = hmm._do_backward_pass(framelogprob) print('hmmlearn took %fs' % (timeit.default_timer() - start)) # Calculate bwdlattice using fhmm algorithm with #chains = 1. This should yield the exact same results start = timeit.default_timer() bwdlattice2 = np.zeros(bwdlattice1.shape) fhmmc._backward(obs[0].shape[0], 1, hmm.n_components, [(x,) for x in xrange(hmm.n_components)], hmm._log_startprob.reshape(1, 3), hmm._log_transmat.reshape(1, 3, 3), framelogprob, bwdlattice2) print('fhmm took %fs' % (timeit.default_timer() - start)) self.assertTrue(np.allclose(bwdlattice1, bwdlattice2))
class HmmClassifier(): def __init__(self, referenceSeqs, inputSeq): self.referenceSeqs = referenceSeqs self.inputSeq = inputSeq # feel free to change this model self.model = GaussianHMM(n_components=2, covariance_type="full", n_iter=2000) def predict(self): probs = [] for referenceSeq in self.referenceSeqs: #print "reference: {}".format(referenceSeq) self.model.fit(referenceSeq) hidden_states = self.model.predict(referenceSeq) prob = self.model.score(self.inputSeq) probs.append(prob) # return the index of the max prob return probs.index(max(probs))
class HMM: __slots__ = [ "model" ] def __init__(self): pass def draw(self, data): figure() plot(range(len(data)),data,alpha=0.8,color='red') show() def train(self, data, n_components): print("Training Data: %s" % data) self.data = data self.model = GaussianHMM(n_components, algorithm='viterbi', covariance_type='diag') X = np.reshape(data, (len(data),1)) self.model = self.model.fit([X]) self.hidden_states = self.model.predict(X) print("Sequence of States: " % self.hidden_states) def eval(self, obs): print("Testing Data: %s" % obs) X = np.reshape(obs, (len(obs),1)) print("Eval: %s" % str(self.model.score(X))) def plot(self): fig = figure(facecolor="white") ax = fig.add_subplot(111) for i in range(self.model.n_components): # use fancy indexing to plot data in each state idx = (self.hidden_states == i) ax.plot(np.array(range(len(self.data)))[idx], np.array(self.data)[idx], '.', label="State %d" % (i+1)) ax.legend() show()
def runHmm(patient_record,date_list,group_id,empirical_states): ############################################################################### # Processing the data max_state_number = (group_id+1)*10 X = np.zeros(shape=(max(len(patient_record),2),20)) index = 0 for date in date_list: tmp_list = [] #print(date) for key, value in patient_record[date].iteritems(): tmp_list.append(value) X[index] = np.array(tmp_list) index+=1 # if no lab test is available, train with an all zero array if X.shape[0] == 0: X = np.zeros(shape=(2,20)) elif X.shape[0] == 1: X[1] = np.zeros(shape=(1,20)) #print(X) #print(X.shape) ############################################################################### # Run Gaussian HMM print("fitting to HMM and decoding ...") n_components = 2 # make an HMM instance and execute fit model = GaussianHMM(n_components, covariance_type="diag", n_iter=1000) # Train n number of HMM to avoid loacl minimal max_score = 0 max_proba_states = [] transmat = [[]] n = 2 for i in range(1,n): model.fit([X]) score = model.decode(X)[0] if i==1 or max_score < score: max_score = score max_proba_states = model.predict(X) transmat = model.transmat_ ''' print "score", score # predict the optimal sequence of internal hidden state hidden_states = model.predict(X) print hidden_states ''' # end multiple training #print max_score, max_proba_states, transmat # Compare the state with empirical states max_proba_states = max_proba_states.tolist() max_proba_states_inver = [] for s in max_proba_states: max_proba_states_inver.append(0 if s == 1 else 1) #print empirical_states, max_proba_states, max_proba_states_inver difference_state = np.subtract(np.array(max_proba_states),np.array(empirical_states)).tolist() difference_state_inver = np.subtract(np.array(max_proba_states_inver),np.array(empirical_states)).tolist() difference = np.sum(np.power(difference_state,2)) difference_inver = np.sum(np.power(difference_state_inver,2)) #print difference, difference_inver if(difference_inver < difference): max_proba_states = max_proba_states_inver # end switch bits # Predict future state future_states_proba = np.dot([0,1],transmat) future_state = 0 if future_states_proba[1] > future_states_proba[0]: future_state = 1 # End result_states = max_proba_states+[future_state for i in range(0,max_state_number-len(max_proba_states))]; return result_states ''' state = [0,1] transmat = np.array(model.transmat_) print np.dot(state,transmat) print np.array(model.transmat_) #print (hidden_states) #print (hidden_states.shape) ''' print("done\n")
class CasmlApproximator(FunctionApproximator): """ """ class _RetentionMethod(RetentionMethod): """The retention method for the transition case base implementation for :class:`Casml`. When the new problem-solving experience can be stored or not stored in memory, depending on the revision outcomes and the CBR policy regarding case retention. Parameters ---------- owner : CaseBase A pointer to the owning case base. tau : float, optional The maximum permitted error when comparing most similar solution. Default is 0.8. sigma : float, optional The maximum permitted error when comparing actual with estimated transitions. Default is 0.2 plot_retention_method : callable, optional Callback function plotting the retention step. Default is None. Notes ----- The Casml retention method for the transition case base considers query cases as predicted correctly if both: 1. the difference between the actual and the estimated transitions are less than or equal to the permitted error :math:`\\sigma`: .. math:: d(\\text{case}.\\Delta_\\text{state}, T(s_{i-1}, a_{i-1}) <= \\sigma 2. and the query case is within the maximum permitted error :math:`\\tau` of the most similar solution case: .. math:: d(\\text{case}, 1\\text{NN}(C_T, \\text{case})) <= \\tau """ def __init__(self, owner, tau=None, sigma=None, plot_retention_method=None): super(CasmlApproximator._RetentionMethod, self).__init__(owner, plot_retention_method, {'tau': tau, 'sigma': sigma}) self._tau = tau if tau is not None else 0.8 """:type: float""" self._sigma = sigma if sigma is not None else 0.2 """:type: float""" def execute(self, features, matches, plot=True): """Execute the retention step. Parameters ---------- features : list[tuple[str, ndarray]] A list of features of the form (`feature_name`, `data_points`). matches : dict[str, dict[int, tuple[float, ndarray]]] The solution identified through the similarity measure. plot: bool, optional Plot the data during the retention step. Returns ------- int : The case id if the case was retained, -1 otherwise. """ f = dict(features) do_add = True if matches: for id_, val in matches['state'].iteritems(): delta_error = np.linalg.norm(self._owner.get_feature('delta_state', id_).value - f['delta_state']) if delta_error <= self._sigma: # At least one of the cases in the case base correctly estimated the query case, # the query case does not add any new information, do not add. do_add = False break basis_id = -1 if do_add or matches['state'].values()[0][0] > self._tau: basis_id = self._owner.insert(features, matches) if plot: self.plot_data(features, matches) return basis_id class Approximation(FunctionApproximator.Approximation): """ """ def __init__(self, approximator, state, act, kernelfn): super(CasmlApproximator.Approximation, self).__init__(state) self._act = act self._approximator = approximator """:type: CasmlApproximator""" self._kernelfn = kernelfn self._sum = 0.0 self._neighbors = [] """:type: list""" self._deltas = [] """:type: list""" self.update(state.features, act.features) def __del__(self): assert (self.state, Hashable(self._act.features)) not in self._approximator._queries # noinspection PyTypeChecker # if not next((True for elem in self._approximator._fit_X if np.all(elem == self.state.features)), False): if (self.state, Hashable(self._act.features)) not in self._approximator._bases: self._approximator._querycb.remove([('state', self.state.features), ('act', self._act.features)]) def include(self, d, state, delta): assert d >= 0 val = (d, state) if len(self._neighbors) <= 0 or val < self._neighbors[-1]: # noinspection PyTypeChecker # if not next((True for (dist, v) in self._neighbors if dist == d and np.all(v == state)), False): ind = bisect.bisect_left(self._neighbors, val) bisect.insort(self._neighbors, val) self._deltas.insert(ind, delta) self._compute_weights() self.dispatch('average_change') else: assert self._sum > 0.0 w = self._kernelfn(d) if w / self._sum >= self._approximator._minfraction: self._neighbors.append(val) self._deltas.append(delta) self._compute_weights() self.dispatch('average_change') def update(self, state, act): neighbors = dict(self._approximator._basiscb.retrieve([('state', state), ('act', act)])) if 'state' in neighbors: self._deltas = [self._approximator._basiscb.get_feature('delta_state', id_).value for id_ in neighbors['state'].iterkeys()] self._neighbors = neighbors['state'].values() self._compute_weights() def _compute_weights(self): self._weights.clear() self._sum = 0.0 i = 0 total = 0 # calculate successor states from the current state and solution delta state for (d, succ), delta in zip(self._neighbors, self._deltas): w = self._kernelfn(d) if self._sum == 0.0 or w / self._sum >= self._approximator._minfraction: sequence = [np.asarray(self._state.features), np.asarray(self._state.features + delta)] proba = np.exp(self._approximator._hmm.score(sequence)) self._weights[MDPState.create(succ)] = (w, proba) # proba self._sum += w total += proba i += 1 else: break del self._neighbors[i:] del self._deltas[i:] for succ, (w, p) in self._weights.iteritems(): self._weights[succ] = (w, p / total) # total pass # sequences = np.zeros((len(self._neighbors), 2, len(self._state)), dtype=float) # # for i, delta in enumerate(self._deltas): # sequences[i, 0] = np.array(self._state.features) # sequences[i, 1] = np.asarray(self._state.features + delta) # # # use HMM to calculate probability for observing sequence <current_state, next_state> # # noinspection PyTypeChecker # weights = np.exp(self._approximator._hmm.score(sequences)) # for (_, succ), w in zip(self._neighbors, weights): # self._weights[MDPState.create(succ)] = w # # sum_ = weights.sum() # for (_, succ), w in zip(self._neighbors, weights): # if len(weights) <= 1: # w *= 0.9 # self._weights[MDPState.create(succ)] = w / sum_ # ----------------------------- # CasmlApproximator # ----------------------------- def __init__(self, feature_metadata, minfraction, scale, kernelfn, tau=None, sigma=None, ncomponents=1, n_iter=1): super(CasmlApproximator, self).__init__() self._minfraction = minfraction self._scale = scale self._kernelfn = kernelfn self._new_sequence = True #: Contains all the existing CasmlAppoximations created by #: this CasmlApproximator. The keys serve as both queries and #: bases (queries are a superset of bases), so a datum may be #: None if the associated key is just a basis, not a query. self._queries = weakref.WeakValueDictionary() """:type: dict[tuple[MDPState, MDPAction], Approximation]""" #: The subset of keys of queries that are also bases. #: The order in which the bases have been received is preserved self._bases = set() """:type: set[tuple[MDPState, Hashable]""" self._fit_X = [] """:type: list[ndarray]""" #: The case base maintaining the observations in the form #: c = <s, a, ds>, where ds = s_{i+1} - s_i #: to identify possible successor states. self._basiscb = CaseBase(feature_metadata, retention_method=self._RetentionMethod, retention_method_params=(tau, sigma), name='basiscb') """:type: CaseBase""" del feature_metadata['delta_state'] #: Invariant: contains all the keys in queries self._querycb = CaseBase(feature_metadata, name='querycb') """:type: CaseBase""" #: The hidden Markov model maintaining the observations in the form #: seq = <s_{i}, s_{i+1}> #: to reason on the transition probabilities of successor states. self._hmm = GaussianHMM(ncomponents, n_iter=n_iter) # , covariance_type='full' # self._hmm = GaussianHMM(ncomponents) """:type: GaussianHMM""" self._not_add_bases = 0 self._not_add_count = 0 def initialize(self): """Prepare for a new episode.""" self._new_sequence = True def add_basis(self, state, act, succ=None): """Adds a state to the set of bases used to approximate query states. Parameters ---------- state : MDPState The state to add act : MDPAction The action performed in that state succ : MDPState: The successor state. Returns ------- MDPState : The approximated state. """ # update the hmm with the new sequence self._fit_hmm(state, succ) # retain the case in the query case base features = [('state', state.features), ('act', act.features)] self._querycb.retain(features) a = Hashable(act.features) if (state, a) in self._bases: self._not_add_bases += 1 return state self._bases.add((state, a)) # retain the case in the basis case base if succ is None: succ = state delta = succ - state features.append(('delta_state', delta)) basis_id = self._basiscb.run(features) if basis_id <= -1: self._not_add_count += 1 if basis_id >= 0: if self._querycb.similarity_uses_knn: for c in self._querycb.itervalues(): try: approx = self._queries[(MDPState.create(c['state'].value), Hashable(c['act'].value))] except KeyError: pass else: approx.update(c['state'].value, c['act'].value) else: neighbors = dict(self._querycb.retrieve([('state', state.features), ('act', act.features)])) for id_, (d, s) in neighbors['state'].iteritems(): try: approx = self._queries[(MDPState.create(s), Hashable(neighbors['act'][id_][1]))] except KeyError: pass else: approx.include(d, state.features, delta) return state def approximate(self, state, act): """Approximates a given state using an Approximation. Parameters ---------- state : MDPState The state to approximate. act : MDPAction The action performed in that state Returns ------- Approximation : The Approximation approximating state. """ self._querycb.retain([('state', state.features), ('act', act.features)]) a = Hashable(act.features) try: approx = self._queries[(state, a)] except KeyError: approx = CasmlApproximator.Approximation(self, state, act, self._kernelfn) self._queries[(state, a)] = approx return approx def _fit_hmm(self, state, succ): # try: # x = self._hmm._fit_X.copy() # except AttributeError: # x = np.zeros(1, dtype=np.object) # else: # if self._new_sequence: # x = self._hmm._fit_X.tolist() # x.append(np.zeros(1)) # x = np.array(x) # # if self._new_sequence: # self._new_sequence = False # x[-1] = np.hstack([np.reshape(state.features, (-1, state._nfeatures)).T]) # # x[-1] = np.hstack([x[-1].tolist(), np.reshape(succ.features, (-1, succ._nfeatures)).T]) # self._hmm.fit(x, n_init=1) if self._new_sequence: self._new_sequence = False self._fit_X.append([]) self._fit_X[-1].append(state.features) if succ is not None: self._fit_X[-1].append(succ.features) self._hmm.fit(np.concatenate(self._fit_X), lengths=[len(x) for x in self._fit_X])
def predict_states(X,group_id,empirical_states): #print("fitting to HMM and decoding ...") max_state_number = (group_id+1)*10 n_components = 2 # make an HMM instance and execute fit model = GaussianHMM(n_components, covariance_type="diag", n_iter=1000) # Train n number of HMM to avoid loacl minimal max_score = 0 max_proba_states = [] transmat = [[]] n = 2 for i in range(1,n): model.fit([X]) score = model.decode(X)[0] if i==1 or max_score < score: max_score = score max_proba_states = model.predict(X) transmat = model.transmat_ ''' print "score", score # predict the optimal sequence of internal hidden state hidden_states = model.predict(X) print hidden_states ''' # end multiple training #print max_score, max_proba_states, transmat # Compare the state with empirical states max_proba_states = max_proba_states.tolist() max_proba_states_inver = [] for s in max_proba_states: max_proba_states_inver.append(0 if s == 1 else 1) #print empirical_states, max_proba_states, max_proba_states_inver difference_state = np.subtract(np.array(max_proba_states),np.array(empirical_states)).tolist() difference_state_inver = np.subtract(np.array(max_proba_states_inver),np.array(empirical_states)).tolist() difference = np.sum(np.power(difference_state,2)) difference_inver = np.sum(np.power(difference_state_inver,2)) #print difference, difference_inver if(difference_inver < difference): max_proba_states = max_proba_states_inver # end switch bits # Predict future state future_states_proba = np.dot([0,1],transmat) future_state = 0 if future_states_proba[1] > future_states_proba[0]: future_state = 1 # End result_states = max_proba_states+[future_state for i in range(0,max_state_number-len(max_proba_states))]; return result_states print("done\n")
y_test = test_set[1:] # HMMMLearn #################################################################################### #################################################################################### #################################################################################### import numpy as np from hmmlearn.hmm import GaussianHMM new_x = np.asarray(x_train) n_comps = 6 model = GaussianHMM(n_comps) model.fit([new_x]) hidden_states = model.predict(new_x) ############################################################################### # print trained parameters and plot import pylab as pl from matplotlib.finance import quotes_historical_yahoo from matplotlib.dates import YearLocator, MonthLocator, DateFormatter print("Transition matrix") print(model.transmat_) print() print("means and vars of each hidden state")
std_devs = [] for i in range(len(spoken)): #print "fitting to HMM and decoding ..." n_components = 3 arr = [] # make an HMM instance and execute fit model = GaussianHMM(n_components, covariance_type="diag", n_iter=1000) for j in range(n_samples): (rate,sig) = wav.read(fpaths[i][j]) features = get_features(sig) arr.append(len(features)) model.fit([features]) models.append(model) means.append(np.mean(arr)) std_devs.append(np.std(arr)) #print("done\n") correct_answers = [] with open('Test/'+test_folder+'/answer.txt') as answers: for entry in answers: correct_answers.append(entry.split()) tot_words = len(correct_answers) right = 0.0 threshold = 1.5
diff = close_v[1:] - close_v[:-1] dates = dates[1:] close_v = close_v[1:] # pack diff and volume for training X = np.column_stack([diff, volume]) ############################################################################### # Run Gaussian HMM print("fitting to HMM and decoding ...", end="") n_components = 5 # make an HMM instance and execute fit model = GaussianHMM(n_components, covariance_type="diag", n_iter=1000) model.fit([X]) # predict the optimal sequence of internal hidden state hidden_states = model.predict(X) print("done\n") ############################################################################### # print trained parameters and plot print("Transition matrix") print(model.transmat_) print() print("means and vars of each hidden state") for i in range(n_components): print("%dth hidden state" % i)
obs = obs[1:] obs = obs.T obs = scale(obs) model = GaussianHMM(algorithm='viterbi', covariance_type='diag', covars_prior=0.01, covars_weight=1, init_params='mc', means_prior=0, means_weight=0, min_covar=0.001, n_components=3, n_iter=1000, params='mc', random_state=None, startprob_prior=1.0, tol=0.01, transmat_prior=1.0, verbose=False) model.startprob_ = numpy.array([1., 0, 0]) model.startprob_prior = model.startprob_ model.transmat_ = numpy.array([[0.9, 0.1, 0], [0, 0.9, 0.1], [0, 0, 1]]) model.transmat_prior = model.transmat_ model.fit(obs) pi = model.startprob_ A = model.transmat_ w = numpy.ones((n, m), dtype=numpy.double) hmm_means = numpy.ones((n, m, d), dtype=numpy.double) hmm_means[0][0] = model.means_[0] hmm_means[1][0] = model.means_[1] hmm_means[2][0] = model.means_[2] hmm_covars = numpy.array([[ numpy.matrix(numpy.eye(d,d)) for j in xrange(m)] for i in xrange(n)]) hmm_covars[0][0] = model.covars_[0] hmm_covars[1][0] = model.covars_[1] hmm_covars[2][0] = model.covars_[2] gmmhmm = GMHMM(n,m,d,A,hmm_means,hmm_covars,w,pi,init_type='user',verbose=False) # hidden_state = model.predict(obs)
import numpy as np import matplotlib.pyplot as plt from hmmlearn.hmm import GaussianHMM # 从输入文件中加载数据 input_file = 'CNY.csv' data = np.loadtxt(input_file, delimiter=',') # 提取需要的值 closing_values = np.array(data[:, 6]) volume_of_shares = np.array(data[:, 8])[:-1] # 计算每天收盘价变化率 diff_percentage = 100.0 * np.diff(closing_values) / closing_values[:-1] # 将变化率与交易量组合起来 X = np.column_stack((diff_percentage, volume_of_shares)) # 创建并训练高斯隐马尔科夫模型 print(u"训练高斯隐马尔科夫模型中......") model = GaussianHMM(n_components=5, covariance_type='diag', n_iter=1000) model.fit(X) # 用模型生成数据 num_samples = 500 samples, _ = model.sample(num_samples) plt.plot(np.arange(num_samples), samples[:, 0], c='black') plt.figure() plt.plot(np.arange(num_samples), samples[:, 1], c='red') plt.show()
import pickle import pylab as pl import numpy as np from hmmlearn.hmm import GaussianHMM from matplotlib.dates import YearLocator, MonthLocator, DateFormatter import nyc ############################################################################### # print trained parameters and plot ############################################################################### new_x = np.asarray(train_set) n_comps = 6 model = GaussianHMM(n_comps) model.fit([new_x]) hidden_states = model.predict(new_x) print("means and vars of each hidden state") for i in range(n_comps): print("%dth hidden state" % i) print("mean = ", model.means_[i]) print("var = ", np.diag(model.covars_[i])) print() years = YearLocator() # every year months = MonthLocator() # every month yearsFmt = DateFormatter('%Y') fig = pl.figure() ax = fig.add_subplot(111)