def get_hmms(self): for gesture_type in self.gesture_types: print_status("Get_Hmms", "Fitting for gesture_type: " + gesture_type) ### Step 1: fill hmm_examples appropriately ### hmm_examples = [] for gesture in self.gestures[gesture_type]: hmm_rep = gesture.get_hmm_rep() hmm_examples.append(hmm_rep) ### Step 2: fit parameters for the hmm ### hmm = GaussianHMM(self.num_hmm_states) hmm.fit(hmm_examples) ### Step 3: store the hmm in self.hmms ### self.hmms[gesture_type] = hmm print_inner_status( gesture_type, "predicted the following sequences: (score: sequence)") for example in hmm_examples: print " ", hmm.score(example), ": ", hmm.predict(example)
def use_hmm(img_times, change_vals, fps=10, min_secs_for_train_to_pass=8): from sklearn.hmm import GaussianHMM X = np.column_stack(change_vals) n_components = 2 model = GaussianHMM(n_components, covariance_type="diag", n_iter=1000) model.fit([X.T]) #thresh = 10**-15 #model.transmat_ = np.array([[1-thresh,thresh],[1-thresh,thresh]]) hidden_states = model.predict(X.T) # print trained parameters and plot print("Transition matrix") print(model.transmat_) print() print("means and vars of each hidden state") for i in range(n_components): print("%dth hidden state" % i) print("mean = ", model.means_[i]) print("var = ", np.diag(model.covars_[i])) print() if model.means_[0][0] > model.means_[1][0]: # assume most most frames have no train, switch labels if necessary hidden_states = 1 - hidden_states train_spotted = filter_out_short_motions(hidden_states, min_secs_for_train_to_pass, fps) plot_timeline(img_times, change_vals, hidden_states, train_spotted) utils.copy_image_subset(config.experiment_data_frames, config.experiment_output_frames_hmm, np.nonzero(train_spotted)[0]) return train_spotted
def predictWithHMM(index, window = 252): training_X = X[range(index-window,index),:] training_y = actual_y[range(index-window,index)] testing_X = X[index,:].reshape(1,training_X.shape[1]) testing_y = y[index] # PCA DATA if perform_pca: pca = PCA(n_components= pca_components) pca.fit(training_X) training_X = pca.transform(training_X) testing_X = pca.transform(testing_X) model = GaussianHMM(n_components, "diag",n_iter=1000) model.fit([training_X]) hidden_states = model.predict(training_X) predicted_hidden_state = model.predict(testing_X) # DO PROBALISTIC APPROACH # pr = model.predict_proba(testing_X) # print pr prob = 0 state_idx = (hidden_states == predicted_hidden_state) median_val = np.mean(training_y[state_idx]) return int(median_val>0), testing_y, prob
def train(X, n_components): ############################################################################### # Run Gaussian HMM print ("fitting to HMM and decoding ...") # make an HMM instance and execute fit model = GaussianHMM(n_components, covariance_type="diag", n_iter=2000) model.fit([X]) # predict the optimal sequence of internal hidden state hidden_states = model.predict(X) print ("done\n") ############################################################################### # print trained parameters and plot print ("Transition matrix") print (model.transmat_) print () print ("means and vars of each hidden state") for i in range(n_components): print ("%dth hidden state" % i) print ("mean = ", model.means_[i]) print ("var = ", np.diag(model.covars_[i])) print () return hidden_states, model
def predictWithHMM(index, window=252): training_X = X[range(index - window, index), :] training_y = actual_y[range(index - window, index)] testing_X = X[index, :].reshape(1, training_X.shape[1]) testing_y = y[index] # PCA DATA if perform_pca: pca = PCA(n_components=pca_components) pca.fit(training_X) training_X = pca.transform(training_X) testing_X = pca.transform(testing_X) model = GaussianHMM(n_components, "diag", n_iter=1000) model.fit([training_X]) hidden_states = model.predict(training_X) predicted_hidden_state = model.predict(testing_X) # DO PROBALISTIC APPROACH # pr = model.predict_proba(testing_X) # print pr prob = 0 state_idx = (hidden_states == predicted_hidden_state) median_val = np.mean(training_y[state_idx]) return int(median_val > 0), testing_y, prob
def create_hmm_by_label(label): seqs = get_sequences_by_label(label) n_states = 3 hmm = GaussianHMM(n_states, covariance_type="diag", n_iter=1000) hmm.fit([seqs]) return hmm
def run(self, protos): models = [] for nstate, label, seq in protos: train = self._training.run(seq) f1, f2 = self._feature.run(train, True) o = np.vstack((f1[:,1], f2)).T (start, trans) = self.init_left_right_model(nstate) clf = GaussianHMM(n_components=nstate, covariance_type=self._covar, transmat=trans, startprob=start) clf.fit(np.array([o])) models.append({'id':label, 'model':clf}) self._models = models return models
def create_hmm_by_labels(labels, dbs): seqs_all= [] for label in labels: seqs = get_sequences_by_label_multi_dbs(label, dbs) seqs_all.append(seqs) seqs_all = np.array(seqs_all)[0] #print seqs_all #print np.shape(seqs_all) n_states = 3 hmm = GaussianHMM(n_states, covariance_type="full", n_iter=1000) hmm.fit(seqs_all) return hmm
def run(self, protos): models = [] for nstate, label, seq in protos: train = self._training.run(seq) f1, f2 = self._feature.run(train, True) o = np.vstack((f1[:, 1], f2)).T (start, trans) = self.init_left_right_model(nstate) clf = GaussianHMM(n_components=nstate, covariance_type=self._covar, transmat=trans, startprob=start) clf.fit(np.array([o])) models.append({'id': label, 'model': clf}) self._models = models return models
def HMM(data, sid, means_prior=None): # data is _not_ an event-frame, but an array # of the most recent trade events # Create scikit-learn model using the means # from the previous model as a prior model = GaussianHMM(HIDDEN_STATES, covariance_type="diag", n_iter=10, means_prior=means_prior, means_weight=0.5) # Extract variation and volume diff = data.variation[sid].values volume = data.volume[sid].values X = np.column_stack([diff, volume]) if len(diff) < HIDDEN_STATES: return None # Estimate model model.fit([X]) return model
def get_hmms (self): for gesture_type in self.gesture_types: print_status ("Get_Hmms", "Fitting for gesture_type: " + gesture_type) ### Step 1: fill hmm_examples appropriately ### hmm_examples = [] for gesture in self.gestures[gesture_type]: hmm_rep = gesture.get_hmm_rep () hmm_examples.append (hmm_rep) ### Step 2: fit parameters for the hmm ### hmm = GaussianHMM (self.num_hmm_states) hmm.fit (hmm_examples) ### Step 3: store the hmm in self.hmms ### self.hmms[gesture_type] = hmm print_inner_status (gesture_type, "predicted the following sequences: (score: sequence)") for example in hmm_examples: print " ", hmm.score (example), ": ", hmm.predict (example)
def gaussian_hmm_model(stock_market_quote, n_components=5): close_v = np.asarray(stock_market_quote.get_closing_price()) volume = np.asanyarray(stock_market_quote.get_volume()) volume = volume[:-1] diff = close_v[1:] - close_v[:-1] close_v = close_v[1:] X = np.column_stack([diff, volume]) model = GaussianHMM(n_components, covariance_type="diag") model.fit([X]) hidden_states = model.predict(X) print "Transition matrix" print model.transmat_ print "" print "means and vars of each hidden state" for i in xrange(n_components): print "%dth hidden state" % i print "mean = ", model.means_[i] print "var = ", np.diag(model.covars_[i]) print "" '''Visualization of Closing Price with respect to Volume, clustered by hidden states of data ''' fig = mlp.figure() ax = fig.add_subplot(111) for i in xrange(n_components): idx = (hidden_states == i) ax.plot(volume[idx], close_v[idx], 'o', label="%dth hidden state" % i) ax.legend() ax.set_xlabel('Volume of Stock', fontsize=20) ax.set_ylabel('Closing Price of Stock', fontsize=20) ax.set_title("""Quote's Volume and closing volume change in different hidden states""") ax.grid(True) mlp.show()
def hmm(samples): model = GaussianHMM(n_components=3) samples = samples.dropna() idx = samples.index if samples.values.ndim < 2: #import pdb; pdb.set_trace() m = samples.values.shape samples = samples.values.reshape(m[0],1) model.fit([samples]) #_, states = model.decode(samples, algorithm='map') framelogprob = model._compute_log_likelihood(samples) logprob, fwdlattice = model._do_forward_pass(framelogprob) n, _ = model.means_.shape frame = pd.DataFrame( framelogprob, index=idx, columns=map(lambda x: "frame_"+str(x), range(n)) ) forward = pd.DataFrame( fwdlattice, index=idx, columns=map(lambda x: "forward_"+str(x), range(n)) ) #import pdb; pdb.set_trace() predict = pd.DataFrame( (fwdlattice-framelogprob)[1:, :], index=idx[:-1], columns=map(lambda x: "predict_"+str(x), range(n))) import pdb; pdb.set_trace() return model, frame.join(forward)
def main(): """ First ARG: list of training files Second ARG: save name for model """ file1 = sys.argv[1] outname = sys.argv[2] file_list = [f[0:-1] for f in open(file1, 'r')] models, transitions, priors = calc_transmat(file_list) hmm = GaussianHMM( transitions.shape[0], "full", #startprob=priors, n_iter=500, transmat=transitions, init_params='mcs', params='mcs', ) feats, _ = load_feats_labels(file_list) feat, lab = load_feats_labels(file_list) #hmm.means_ = np.transpose(models['mean']) #hmm.covars_ = models['sigma'] print 'Fitting' start = timeit.default_timer() hmm.fit([np.transpose(feat)]) stop = timeit.default_timer() print 'Training Time: ' + str(stop - start) features, labels = load_feats_labels(['audio.arff']) _, seq = hmm.decode(np.transpose(features)) #print filter(lambda(x,y): x==y, zip(labels, map(int2label, seq))) print len(filter(lambda (x, y): x == y, zip(labels, map(int2label, seq)))) pickle.dump(hmm, open(outname, "wb")) plt.imshow(transitions, interpolation='nearest') plt.show()
def main(): """ First ARG: list of training files Second ARG: save name for model """ file1 = sys.argv[1] outname = sys.argv[2] file_list = [f[0:-1] for f in open(file1,'r')] models, transitions, priors = calc_transmat(file_list) hmm = GaussianHMM( transitions.shape[0], "full", #startprob=priors, n_iter=500, transmat=transitions, init_params='mcs', params='mcs', ) feats, _ = load_feats_labels(file_list) feat, lab = load_feats_labels(file_list) #hmm.means_ = np.transpose(models['mean']) #hmm.covars_ = models['sigma'] print 'Fitting' start = timeit.default_timer() hmm.fit([np.transpose(feat)]) stop = timeit.default_timer() print 'Training Time: ' + str(stop - start) features, labels = load_feats_labels(['audio.arff']) _, seq = hmm.decode(np.transpose(features)) #print filter(lambda(x,y): x==y, zip(labels, map(int2label, seq))) print len(filter(lambda(x,y): x==y, zip(labels, map(int2label, seq)))) pickle.dump(hmm, open(outname, "wb")) plt.imshow(transitions, interpolation='nearest') plt.show()
class GaussianHmmLib: """ ref: http://scikit-learn.org/0.14/auto_examples/applications/plot_hmm_stock_analysis.html https://www.quantopian.com/posts/inferring-latent-states-using-a-gaussian-hidden-markov-model bear market: smaller mean, higher variant bull market: higher mean, smaller variant """ def __init__(self, dbhandler, *args, **kwargs): self.dbhandler = dbhandler self.sids = self.dbhandler.stock.ids self.n_components = int(kwargs.pop('n_components')) or 5 self.n_iter = int(kwargs.pop('n_iter')) or 1000 def run(self, data): sid = self.sids[0] self.dates = data[sid]['price'].values self.close_v = data[sid]['close_v'].values self.volume = data[sid]['volume'].values[1:] # take diff of close value # this makes len(diff) = len(close_t) - 1 # therefore, others quantity also need to be shifted self.diff = self.close_v[1:] - self.close_v[:-1] # pack diff and volume for training self.X = np.column_stack([self.diff, self.volume]) # make an HMM instance and execute fit self.model = GaussianHMM(self.n_components, covariance_type="diag", n_iter=self.n_iter) self.model.fit([self.X], n_iter=self.n_iter) # predict the optimal sequence of internal hidden state self.hidden_states = self.model.predict(self.X) def report(self): # print trained parameters and plot print "Transition matrix" print self.model.transmat_ print "" print "means and vars of each hidden state" for i in xrange(self.n_components): print "%dth hidden state" % i print "mean = ", self.model.means_[i] print "var = ", np.diag(self.model.covars_[i]) print "" years = YearLocator() # every year months = MonthLocator() # every month yearsFmt = DateFormatter('%Y') fig = plt.figure() ax = fig.add_subplot(111) for i in xrange(self.n_components): # use fancy indexing to plot data in each state idx = (self.hidden_states == i) ax.plot_date(self.dates[idx], self.close_v[idx], 'o', label="%dth hidden state" % i) ax.legend() # format the ticks ax.xaxis.set_major_locator(years) ax.xaxis.set_major_formatter(yearsFmt) ax.xaxis.set_minor_locator(months) ax.autoscale_view() # format the coords message box ax.fmt_xdata = DateFormatter('%Y-%m-%d') ax.fmt_ydata = lambda x: '$%1.2f' % x ax.grid(True) fig.autofmt_xdate() plt.savefig("gaussianhmm_%s.png" % (self.sids[0]))
# therefore, others quantity also need to be shifted diff = close_v[1:] - close_v[:-1] dates = dates[1:] close_v = close_v[1:] # pack diff and volume for training X = np.column_stack([diff, volume]) ############################################################################### # Run Gaussian HMM print "fitting to HMM and decoding ...", n_components = 5 # make an HMM instance and execute fit model = GaussianHMM(n_components, "diag") model.fit([X], n_iter=1000) # predict the optimal sequence of internal hidden state hidden_states = model.predict(X) print "done\n" ############################################################################### # print trained parameters and plot print "Transition matrix" print model.transmat_ print "" print "means and vars of each hidden state" for i in xrange(n_components): print "%dth hidden state" % i
adaptor = XMLAdaptorMultiWindow1() for file_path in glob.glob(source): observation_sequence = adaptor.convert(file_path) if observation_sequence: observation_sequence.save(output_dir + "training_observations/" + os.path.basename(file_path) + '.csv') print "Loading observations from CSV..." # Load observation sequences from CSV observation_sequences, filenames = readObservationSequences(output_dir + "training_observations/*.csv", return_filenames=True) training_sequences = [ observation_sequence.getNumpyArray() for observation_sequence in observation_sequences ] print "Training Multivariate Gaussian HMM model..." n_components = 3 model = GaussianHMM(n_components, covariance_type="full", n_iter=10) model.fit(training_sequences) # save Gaussian HMM model to file model_dir = output_dir + '%sstates/' % n_components mkdir_p(model_dir) serialiser = HMMSerialiser(model, feature_names=adaptor.getFeatures()) serialiser.saveXML(model_dir + 'model.xml') print "Tagging observation sequences..." tagged_sequences_dir = model_dir + "tagged_sequences/" mkdir_p(tagged_sequences_dir) for i, training_sequence in enumerate(training_sequences): hidden_state_sequence = model.predict(training_sequence) for j, state in enumerate(hidden_state_sequence): observation_sequences[i].getObservation(j).setState( "H%s" % state ) # save tagged sequence to file
if save: filename = filenames[i].replace('.csv', '.tagged.csv') observation_sequences[i].save(save + os.sep + filename, include_state=True) return likelihood_of_training_data, observations_per_state ### PROTOTYPE ROUTINE print "Loading training data..." # Load observation sequences from CSV observation_sequences, filenames = readObservationSequences(training_data, return_filenames=True) training_sequences = [ observation_sequence.getNumpyArray() for observation_sequence in observation_sequences ] print "Training multivariate Gaussian HMM (base model)..." # Implements (1.), (2.) base_model = GaussianHMM(n_states, covariance_type=covariance_type, n_iter=num_EM_iterations) base_model.fit(training_sequences) # save base model print "\tSaving base model to file..." saveModel(base_model, 'base_model', observation_sequences[0].getFeatureNames()) # tag training data using base model print "\tTagging training data using base model..." # Implements (3.), (4.) likelihood_of_training_data, observations_per_state = tagTrainingData( base_model, training_sequences, list(observation_sequences), # pass a copy save='base_model/tagged_training_data', filenames=filenames ) print "\tTotal log lokelihood of the training data according to base model: %.4f" % likelihood_of_training_data previous_model = base_model for i in range(num_GMM_models):
def makeGaussHMM(d): for i in range(len(d)): d[i] = normalize(d[i]) new_mod = GaussianHMM(4, n_iter = 10000) new_results = new_mod.fit(d) return new_results
#n_features = sum(good_features2) n_features = X_new.shape[1] print(n_features) # # clf = svm.SVC() # # clf.fit(X_new, y) # hmm = MultinomialHMM() # pos = np.where(np.diff(y) != 0)[0] # d = np.hstack([0, pos+1, len(y)]) # lens = np.diff(d) # hmm.fit(X_new, y, lens) hmm = GaussianHMM(n_components=20) hmm.fit([X_new]) clusters = pred = hmm.predict(X_new) # neigh = KNeighborsClassifier(n_neighbors=10, weights='distance') # scores = cross_validation.cross_val_score(neigh, X_new, y, cv=5) # print(scores) # # neigh.fit(X_new, y) # good_features = ETC.feature_importances_ >= 0.0005 # print(np.sum(good_features)) # X_new2 = X[..., good_features] # n_features = 20 # pca = PCA(n_components = n_features) # pca.fit(X_new)
algorithm="viterbi", covariance_type="full", covars_prior=0.01, covars_weight=1, means_prior=None, means_weight=0, n_components=5, random_state=None, startprob=None, startprob_prior=1.0, transmat=None, transmat_prior=1.0, ) print "Fitting model..." model.fit([farm1.get_output()], n_iter=1000) print "Predicting hidden states..." hidden_states = model.predict(farm1.get_output()) print "Transition matrix" print model.transmat_ print "" print "mean and vars of the hidden states" for i in range(5): print "%dth hidden state" % i print "mean = ", model.means_[i] print "var = ", np.diag(model.covars_[i]) print ""
class GaussianHmmLib: """ ref: http://scikit-learn.org/0.14/auto_examples/applications/plot_hmm_stock_analysis.html https://www.quantopian.com/posts/inferring-latent-states-using-a-gaussian-hidden-markov-model bear market: smaller mean, higher variant bull market: higher mean, smaller variant """ def __init__(self, dbhandler, *args, **kwargs): self.dbhandler = dbhandler self.sids = self.dbhandler.stock.ids self.n_components = int(kwargs.pop('n_components')) or 5 self.n_iter = int(kwargs.pop('n_iter')) or 1000 def run(self, data): sid = self.sids[0] self.dates = data[sid]['price'].values self.close_v = data[sid]['close_v'].values self.volume = data[sid]['volume'].values[1:] # take diff of close value # this makes len(diff) = len(close_t) - 1 # therefore, others quantity also need to be shifted self.diff = self.close_v[1:] - self.close_v[:-1] # pack diff and volume for training self.X = np.column_stack([self.diff, self.volume]) # make an HMM instance and execute fit self.model = GaussianHMM(self.n_components, covariance_type="diag", n_iter=self.n_iter) self.model.fit([self.X], n_iter=self.n_iter) # predict the optimal sequence of internal hidden state self.hidden_states = self.model.predict(self.X) def report(self): # print trained parameters and plot print "Transition matrix" print self.model.transmat_ print "" print "means and vars of each hidden state" for i in xrange(self.n_components): print "%dth hidden state" % i print "mean = ", self.model.means_[i] print "var = ", np.diag(self.model.covars_[i]) print "" years = YearLocator() # every year months = MonthLocator() # every month yearsFmt = DateFormatter('%Y') fig = plt.figure() ax = fig.add_subplot(111) for i in xrange(self.n_components): # use fancy indexing to plot data in each state idx = (self.hidden_states == i) ax.plot_date(self.dates[idx], self.close_v[idx], 'o', label="%dth hidden state" % i) ax.legend() # format the ticks ax.xaxis.set_major_locator(years) ax.xaxis.set_major_formatter(yearsFmt) ax.xaxis.set_minor_locator(months) ax.autoscale_view() # format the coords message box ax.fmt_xdata = DateFormatter('%Y-%m-%d') ax.fmt_ydata = lambda x: '$%1.2f' % x ax.grid(True) fig.autofmt_xdate() plt.savefig("gaussianhmm_%s.png" %(self.sids[0]))
ax.set_xlabel("No. of Clusters") ax.set_ylabel("Information Loss") ax.set_xticks(range(start,end+1),minor=True) ax.legend() ax.grid(True,which='both') plt.show() ############################################################################## # Run HMM X_hmm = np.column_stack((y_train,X_train[['hour_of_day','weather','day_of_week']])) #X_hmm = np.column_stack((y_train,X_train[['hour_of_day','weather']])) #X_hmm = y_train from sklearn.hmm import GaussianHMM n_clusters = 9 #n_clusters = 17 model = GaussianHMM(n_clusters,covariance_type='diag',n_iter=1000) model.fit([X_hmm]) hidden_states = model.predict(X_hmm) viterbi_states = model.decode(X_hmm) x_ax = np.asarray(range(len(X_hmm))) x_ax = X_train['hour_of_day'] + X_train['day_of_week']*24 #x_ax = X_train['hour_of_day'] x_ax = np.asarray([item.to_datetime() for item in X_train.index]) def plot_HMM(n_clusters,hidden_states,x_ax,y_ax): #PLOT HIDDEN STATES fig = plt.figure() ax = fig.add_subplot(111) for i in xrange(n_clusters): print i idx = (hidden_states==i) if i<7: ax.plot(x_ax[idx],y_ax[idx],'o',label='%dth state'%i)
close_v1 = close_v1[1:] diff2 = close_v2[1:] - close_v2[:-1] close_v2 = close_v2[1:] # pack diff and volume for training X1 = np.column_stack([diff1, volume1]) X2 = np.column_stack([diff2, volume2]) ############################################################################### # Run Gaussian HMM print("fitting to HMM and decoding ...", end='') n_components = 5 # make an HMM instance and execute fit model1 = GaussianHMM(n_components, covariance_type="diag", n_iter=1000) model2 = GaussianHMM(n_components, covariance_type="diag", n_iter=1000) model1.fit([X1]) model2.fit([X2]) # predict the optimal sequence of internal hidden state hidden_states1 = model1.predict(X1) hidden_states2 = model2.predict(X2) print("done\n") # calculate similarity measure states1 = range(n_components) states2 = list(itertools.permutations(states1)) print(states1) print(len(states2)) sims = [] for i in range(len(states2)):
data = t1dmread('trimmedDataFiles/MYFILE101.no_gaps_trimmed.csv') timeStamps101 = np.array(data['timestamp']) skinTemp101 = np.array(data['skin temp']) airTemp101 = np.array(data['air temp']) steps101 = np.array(data['steps']) hr101 = np.array(data['hr']) cgm101 = np.array(data['cgm']) normskintemp101 = skinTemp101 - airTemp101 toFit = np.column_stack([cgm101,normskintemp101]) print("Fitting to HMM") n_components = 4 model = GaussianHMM(n_components,covariance_type='diag',n_iter=1000) model.fit([toFit]) hidden_states = model.predict(toFit) print("done\n") print("Transition Matrix for Normed Skin Temperature") print(model.transmat_) print("\nMeans and variances of each hidden state: \n") for i in range(n_components): print("%dth hidden state:" % i) print("Mean = ",model.means_[i]) print("Variance = ",np.diag(model.covars_[i])) print()
if count < args.maf: trimmed_count = 0 counts.append(trimmed_count) kmer_stash.append(kmer) i += 1 if not len(counts): sys.exit("No k-mer counts remain after filtering; check thresholds and try again.") ## fit HMM to counts if len(args.mu) != len(args.sigmasq): sys.exit("Vectors of prior means and variances must be same length.") counts = np.reshape(np.log1p(np.array(counts, dtype = "int")), (-1,1)) hmm = GaussianHMM( len(args.mu) ) hmm.fit([counts]) if args.verbose: sys.stderr.write("Fitting HMM to k-mer counts, assuming {} hidden states...\n".format(len(args.mu))) sys.stderr.write("means:\n" + str(hmm.means_) + "\n") sys.stderr.write("covariances:\n" + str(hmm.covars_) + "\n") sys.stderr.write("\n") sys.stderr.write("Processing possible variant sites...\n") sys.stderr.write("\trejecting haplotypes with read count < {}\n".format(args.maf)) sys.stderr.write("\taccepting as TE/ME any haplotype with max count > {}\n".format(args.maxhits)) ## find positions of transitions states = hmm.predict(counts) breaks = np.where(np.diff(states))[0] break_coords = [] break_kmers = []
def test_1(): vm = VonMisesHMM(n_states=5) gm = GaussianHMM(n_components=5) X1 = np.random.randn(100, 2) yield lambda: vm.fit([X1]) yield lambda: gm.fit([X1])
print "Doing replicate", repInx, "/", numReps, "with", numState, "states" sys.stdout.flush() # cluster all the available data and use that as initial point means = cluster.KMeans(n_clusters=numState).fit(indata.iloc[:,0:num_data]).cluster_centers_ cv = np.cov(indata.iloc[:,0:num_data].T) covars = mixture.distribute_covar_matrix_to_match_covariance_type(cv, "tied", num_data) covars[covars==0] = 1e-5 model = GaussianHMM(numState, covariance_type="tied", n_iter=1000, init_params='abdefghijklnopqrstuvwxyzABDEFGHIJKLNOPQRSTUVWXYZ') model.means_ = means model.covars_ = covars print("Fitting model...") sys.stdout.flush() model.fit(data) print("Decoding states...") sys.stdout.flush() # do a loop over everything and record in one long array states = np.array([]) score = 0 for i in range(0, len(data)): hidden_states = model.decode(data[i]) states = np.append(states, hidden_states[1]) score = score + model.score(data[i]) print("Saving data...") sys.stdout.flush() # save the states and LLH
nclasses = len(np.unique(classlabels)) hmmclass = [] #print classlabels print quantized_set.shape for i in range(0, nclasses): newtrainset = [] for k in range(0, len(classlabels)): if classlabels[k] == i: #print i #print k newtrainset.append(quantized_set[:, k]) newtrainset = np.asarray(newtrainset) #print newtrainset.shape hmm = HMM(64) hmm.fit([newtrainset]) hmmclass.append(hmm) #print testingset.shape rowdivision = datasample.shape[0] t = [] for i in xrange(int(round(testingset.shape[0] / rowdivision))): t.append( quantize_data(testingset[rowdivision * i:rowdivision * (i + 1), :], kmms)) #print t.shape t = np.asarray(t) rlabels = [] for ts in t: i = 0 index = 0
t, last_index = overlapped_samples(file_path, incident_reported_time=int(incident_time), overlap=5, window=10, with_end=2) if t is None: print file_path, 'is bad' else: model.means_ = means model.covars_ = covs print 'shape intial', np.shape(covs) ''' best_seq = model.decode(t) print 'intial,', best_seq print 'final means', model.means_ print 'initial trans', tmat print 'initial startprobs', smat, sum(smat) ''' model.fit([t]) best_seq = model.decode(t) print 'file', file_path print 'final,', best_seq #print 'final means', model.means_ #print 'final trans', model.transmat_ #print 'final startprob', model.startprob_ if np.isnan(model.means_).any() == False and np.isnan(model.covars_).any() == False: means = model.means_ covs = np.array([np.diag(model.covars_[0])]) for i in range(1, model.n_components): covs = np.vstack((covs, [np.diag(model.covars_[i])])) print 'shape after', np.shape(covs) tmat = model.transmat_
class HMM(object): ''' class for creating and manipulating HMM model ''' def __init__(self,**kwargs): if 'steam_obj' not in kwargs: self.steam_obj = Steam() else: self.steam_obj = kwargs['steam_obj'] if 'weather_obj' not in kwargs: self.weather_obj = Weather() else: self.weather_obj = kwargs['weather_obj'] steam_obj = self.steam_obj weather_obj = self.weather_obj hour_of_day = steam_obj.ts.index.map(lambda x: x.hour + (x.minute/60.0)) day_of_week = steam_obj.ts.index.map(lambda x: x.dayofweek) df_hmm = pd.DataFrame({'steam':steam_obj.ts,'weather':weather_obj.ts, \ 'hour_of_day':hour_of_day,'day_of_week':day_of_week},index=steam_obj.ts.index) #its imp that the order for columns is maintain #while slicing the HMM model self.df_hmm,self.X_hmm = self.gen_meta_data(steam_obj,weather_obj) if 'n_states' not in kwargs: self.plot_elbow(3,15) else: self.n_states = kwargs['n_states'] def __len__(self): return len(self.X_hmm) def build_model(self): n_states = self.n_states X_hmm = self.X_hmm self.model = GaussianHMM(n_states,covariance_type='diag',n_iter=1000) self.model.fit([X_hmm]) self.hidden_states = self.model.predict(X_hmm) def build_forecast_model(self): model = self.model n_states = self.n_states model_forecast = copy.deepcopy(model) model_forecast.n_features = model.n_features-1 model_forecast._means_ = model.means_[:,1:] model_forecast._covars_ = model._covars_[:,1:] self.model_forecast = model_forecast def gen_meta_data(self,steam_obj=None,weather_obj=None): if steam_obj!=None: hour_of_day = steam_obj.ts.index.map(lambda x: x.hour + (x.minute/60.0)) day_of_week = steam_obj.ts.index.map(lambda x: x.dayofweek) df_hmm = pd.DataFrame({'steam':steam_obj.ts,'weather':weather_obj.ts, \ 'hour_of_day':hour_of_day},index=steam_obj.ts.index) #df_hmm = pd.DataFrame({'steam':steam_obj.ts,'weather':weather_obj.ts, \ # 'hour_of_day':hour_of_day,'day_of_week':day_of_week},index=steam_obj.ts.index) # X_hmm = df_hmm.as_matrix(columns=['steam','weather']) X_hmm = df_hmm.as_matrix(columns=['steam','weather','hour_of_day']) #X_hmm = df_hmm.as_matrix(columns=['steam','weather','hour_of_day','day_of_week']) else: hour_of_day = weather_obj.ts.index.map(lambda x: x.hour + (x.minute/60.0)) day_of_week = weather_obj.ts.index.map(lambda x: x.dayofweek) df_hmm = pd.DataFrame({'weather':weather_obj.ts, \ 'hour_of_day':hour_of_day},index=weather_obj.ts.index) #df_hmm = pd.DataFrame({'weather':weather_obj.ts, \ # 'hour_of_day':hour_of_day,'day_of_week':day_of_week},index=weather_obj.ts.index) # X_hmm = df_hmm.as_matrix(columns=['weather']) X_hmm = df_hmm.as_matrix(columns=['weather','hour_of_day']) #X_hmm = df_hmm.as_matrix(columns=['weather','hour_of_day','day_of_week']) return df_hmm,X_hmm def plot_model(self,x_ax=None,y_ax=None): X_hmm = self.X_hmm steam_ts = self.steam_obj.ts if x_ax == None: x_ax = np.asarray([item.to_datetime() for item in steam_ts.index]) if y_ax == None: y_ax = X_hmm[:,0] hidden_states = self.hidden_states n_states = self.n_states fig = plt.figure() ax = fig.add_subplot(111) for i in xrange(n_states): print i idx = (hidden_states==i) if i<7: ax.plot(x_ax[idx],y_ax[idx],'o',label='%dth state'%i) elif i<14: ax.plot(x_ax[idx],y_ax[idx],'x',label='%dth state'%i) elif i<21: ax.plot(x_ax[idx],y_ax[idx],'+',label='%dth state'%i) elif i<28: ax.plot(x_ax[idx],y_ax[idx],'*',label='%dth state'%i) ax.set_title('%d State HMM'%(n_states)) ax.legend() ax.set_ylabel('Load (Mlb/Hr)') ax.set_xlabel('Time') ax.grid(True) plt.show() def plot_elbow(self,start,end): ''' Fit GMM and plot elbow using AIC & BIC ''' from sklearn.mixture import GMM,DPGMM obs = self.X_hmm aics = [] bics = [] for i in range(start,end+1): n_iter=1000 for j in range(1,11): g = GMM(n_components=i,n_iter=n_iter) g.fit(obs) print i converged = g.converged_ if converged: print 'j:%d'%(j) break n_iter += 1000 aics.append(g.aic(obs)) bics.append(g.bic(obs)) if not converged: print 'Not Converged!!' fig = plt.figure() ax = fig.add_subplot(111) ax.plot(range(start,end+1),aics,label='AIC') ax.plot(range(start,end+1),bics,label='BIC') ax.set_xlabel("No. of Clusters") ax.set_ylabel("Information Loss") ax.set_xticks(range(start,end+1),minor=True) ax.legend() ax.grid(True,which='both') plt.show()
#===[ Local Data ]=== self.motion_sequences = _motion_sequences self.dataframes = [r.get_dataframe () for r in self.motion_sequences] self.feature_extractor = FeatureExtractor () self.train () # Function: fit # ------------- # fits a generative model (HMM) to the data, returns it def fit_hmm (self): X = [self.feature_extractor.extract(x) for x in self.dataframes] print [x.shape for x in X] hmm = GaussianHMM () hmm.fit (X) return hmm # Function: score_df # ------------------ # scores a dataframe def score_df (self, df): return self.hmm.score (self.feature_extractor.extract (df)) # Function: score # --------------- # returns a score of the current example, rep. as motion sequence def score (self, ms):
initial_map, initial_best_sep_map = model.decode(feat_from_list, algorithm='map') sum_initial_score += model.score(feat_from_list) sum_inital_ll += initial_ll sum_initial_map += initial_map else: remove_idx.append(idx) print 'too few samples in file', list_of_patient_file_paths[idx], np.shape(feat_from_list) print 'initial viterbi log-likelihood,', sum_inital_ll print 'initial score log-likelihood,', sum_initial_score print 'initial map log-likelihood', sum_initial_map remove_idx.sort() remove_idx.reverse() print 'removing...', remove_idx for r in remove_idx: del feats_as_list[r] model.fit(feats_as_list) sum_final_ll = 0.0 sum_final_score = 0.0 for feat_from_list in feats_as_list: print np.shape(feat_from_list) final_ll, final_best_seq = model.decode(feat_from_list) final_score = model.score(feat_from_list) sum_final_ll += final_ll sum_final_score += final_score print 'final viterbi log-likelihood,', sum_final_ll print 'final score log-likelihood,', sum_final_score #save all the files that have been generated by training mean_name = root + save_model_to + condition + '/' + condition + '-cond-' + feature + '-feat-' + str(n_states) + '-states-' + str( model.n_iter) + '-iter-mean.txt'
c = 0.0014999999999999458 e = 0.001 d = 0.050 ##EX transitions_prob = np.mat([row0 = [a,c,d,c,d], row1 = [ e,a,b,e,e], row2 = [c,d,a,c,d] , row3 = [d,c,c,a,d] , row4 [d,c,d,c ,a]]) transitions_prob = np.mat([[a, c, d, c, d], [e, a, b, e, e], [c, d, a, c, d], [d, c, c, a, d], [d, c, d, c, a]]) HMM = GaussianHMM(n_components=5, covariance_type="diag", transmat=transitions_prob) # # Must always fit the obs data before change means and covars # HMM.fit([Resul]) HMM.means_ = np.identity(5) HMM.covars_ = 0.2 * np.ones((5, 5)) # Use of LR probability to predict the states. HResul = HMM.predict(Resul) # Get the probability of success HMM Hscore = comp(HResul, target) # print HResul print "HMM = " print Hscore
def train_hmm(X): hmm = GaussianHMM(n_components=8) hmm.fit(X); print hmm.score(X[0]) print np.shape(X[0]) return hmm
def test_1(): vm = VonMisesHMM(n_states=5) gm = GaussianHMM(n_components=5) X1 = np.random.randn(100,2) yield lambda: vm.fit([X1]) yield lambda: gm.fit([X1])
diff = close_v[1:] - close_v[:-1] dates = dates[1:] close_v = close_v[1:] # pack diff and volume for training X = np.column_stack([diff, volume]) ############################################################################### # Run Gaussian HMM print "fitting to HMM and decoding ...", n_components = 2 # make an HMM instance and execute fit model = GaussianHMM(n_components, covariance_type="diag", n_iter=1000) model.fit([X]) # predict the optimal sequence of internal hidden state hidden_states = model.predict(X) print "done\n" ############################################################################### # print trained parameters and plot print "Transition matrix" print model.transmat_ print "" print "means and vars of each hidden state" for i in xrange(n_components): print "%dth hidden state" % i
model = GaussianHMM(algorithm='viterbi', covariance_type='full', covars_prior=0.01, covars_weight=1, means_prior=None, means_weight=0, n_components=5, random_state=None, startprob=None, startprob_prior=1.0, transmat=None, transmat_prior=1.0) print "Fitting model..." model.fit([farm1.get_output()], n_iter=1000) print "Predicting hidden states..." hidden_states = model.predict(farm1.get_output()) print "Transition matrix" print model.transmat_ print "" print "mean and vars of the hidden states" for i in range(5): print "%dth hidden state" % i print "mean = ", model.means_[i] print "var = ", np.diag(model.covars_[i]) print ""
if row[1] != 'close': #list = [] #for i in range(len(row)-2): # list.append(float(row[i+1])) label = float(row[7]) volume.append(float(row[2])) if label > 0: indices.append(1) else: indices.append(0) #matrix.append(list) X = numpy.column_stack([numpy.array(indices), numpy.array(volume)]) model = GaussianHMM(2, covariance_type="diag", n_iter=1000) model.fit([X]) """ reading the dato to be classified """ with open('hackathon-master/AAPL-test.csv', 'rb') as csvfile: data = csv.reader(csvfile, delimiter=',') #matrix = [] volume = [] labels = [] for row in data: if row[1] != 'close': list = [] volume.append(float(row[2])) #for i in range(len(row)-2):
counts.append(trimmed_count) kmer_stash.append(kmer) i += 1 if not len(counts): sys.exit( "No k-mer counts remain after filtering; check thresholds and try again." ) ## fit HMM to counts if len(args.mu) != len(args.sigmasq): sys.exit("Vectors of prior means and variances must be same length.") counts = np.reshape(np.log1p(np.array(counts, dtype="int")), (-1, 1)) hmm = GaussianHMM(len(args.mu)) hmm.fit([counts]) if args.verbose: sys.stderr.write( "Fitting HMM to k-mer counts, assuming {} hidden states...\n".format( len(args.mu))) sys.stderr.write("means:\n" + str(hmm.means_) + "\n") sys.stderr.write("covariances:\n" + str(hmm.covars_) + "\n") sys.stderr.write("\n") sys.stderr.write("Processing possible variant sites...\n") sys.stderr.write("\trejecting haplotypes with read count < {}\n".format( args.maf)) sys.stderr.write( "\taccepting as TE/ME any haplotype with max count > {}\n".format( args.maxhits))