def main(): hmm = MultinomialHMM(n_components=5) T = np.random.random(size=(5, 5)) T = T/T.sum(axis=1).reshape((5, 1)) hmm.transmat_ = T pi = np.random.random(size=(5,)) pi = pi/pi.sum() hmm.startprob_ = pi emit = np.random.random(size=(5, 10)) emit = emit/emit.sum(axis=1).reshape((5, 1)) hmm.emissionprob_ = emit X = np.zeros((20, 25)).astype(np.int) for i in range(20): x, _ = hmm.sample(n_samples=25) X[i] = x.reshape((25,)) # load the PyTorch HMM phmm = HMM(z_dim=5, x_dim=10) phmm.T = torch.Tensor(T.T) phmm.pi = torch.Tensor(pi) phmm.emit = torch.Tensor(emit.T) # compute PyTorch HMM forward-backward my_marginals = phmm.log_marginal(torch.Tensor(X.T)) # compute hmmlearn version true_marginals = np.zeros(20) for i in range(20): true_marginals[i] = hmm.score(X[i].reshape((-1, 1))) assert np.abs(true_marginals - my_marginals.numpy()).max() < 1e-4
def buildHMM(HMMFactory): model = MultinomialHMM(n_components=2, n_iter=200) model.startprob_ = HMMFactory.hiddenProb() model.transmat_ = HMMFactory.transMatrix() model.emissionprob_ = HMMFactory.emissionMatrix() return model
def create_hmm_data(N, seq_len, x_dim, z_dim, params=None): from hmmlearn.hmm import MultinomialHMM # introduces a lot of dependencies hmm = MultinomialHMM(n_components=z_dim) if params is None: T = np.random.random(size=(z_dim, z_dim)) T = T/T.sum(axis=1).reshape((z_dim, 1)) pi = np.random.random(size=(z_dim,)) pi = pi/pi.sum() emit = np.random.random(size=(z_dim, x_dim)) emit = emit/emit.sum(axis=1).reshape((z_dim, 1)) else: T, pi, emit = params hmm.transmat_ = T hmm.startprob_ = pi hmm.emissionprob_ = emit X = np.zeros((N, seq_len)).astype(np.int) for i in range(N): x, _ = hmm.sample(n_samples=seq_len) X[i] = x.reshape((seq_len,)) return (T, pi, emit), HMMData(X)
def predict(self, x, init_prob=None, method='hmmlearn', window=-1): """Predict result based on HMM """ if init_prob is None: init_prob = np.array( [1 / self.num_states for i in range(self.num_states)]) if method == 'hmmlearn': model = MultinomialHMM(self.num_states, n_iter=100) model.n_features = self.num_observations model.startprob_ = init_prob model.emissionprob_ = self.B model.transmat_ = self.A if window == -1: result = model.predict(x) else: result = np.zeros(x.shape[0], dtype=np.int) result[0:window] = model.predict(x[0:window]) for i in range(window, x.shape[0]): result[i] = model.predict(x[i - window + 1:i + 1])[-1] else: if window == -1: result = self.decode(x, init_prob) else: result = np.zeros(x.shape[0], dtype=np.int) result[0:window] = self.decode(x[0:window], init_prob) for i in range(window, x.shape[0]): result[i] = self.decode(x[i - window + 1:i + 1], init_prob)[-1] return result
def test_viterbi_case_random(self): for i in range(1000): # init self.n_state = np.random.randint(1,10) self.n_output = np.random.randint(1,10) self.step = np.random.randint(1,200) p = np.random.random(self.n_state) startprob = p/p.sum() p = np.random.random((self.n_state,self.n_state)) transmat = p/p.sum(axis=1).reshape(-1,1) p = np.random.random((self.n_state,self.n_output)) emissionprob = p/p.sum(axis=1).reshape(-1,1) X = np.random.choice(self.n_output,self.step).reshape(-1,1) # hmmlearn model = MultinomialHMM(n_components=self.n_state,) model.startprob_ = startprob model.transmat_ = transmat model.emissionprob_ = emissionprob y = model.predict(X) # my hmm hmm = HMM() pred = hmm.viterbi(startprob, transmat, emissionprob, X) self.assertTrue(np.array_equal(y, pred))
def initHMM(self, length): a = 1.0 / length # Transition probabilities trans = np.array([[1-a, a, 0, 0], # Pre -> [ 0, 1-a, a/2, a/2], # HQ -> [ 0, 0, 1, 0], # PostQuiet -> [ 0, 0, 0, 1] ]) # PostActive -> # emission probabilities eps = 1e-4 emit = np.array([[ 0.25, 0.25, 0.50 ], # Emit | Pre [ 0.16, 0.84-eps, eps ], # Emit | HQ [ 0.90, 0.10-eps, eps ], # Emit | PostQuiet [ 0.25, 0.25, 0.50 ] ]) # Emit | PostActive # A0 A1 A2 # Start state distribution start = np.array([0.34, 0.33, 0.33, 0]) hmm = MultinomialHMM(n_components=nStates) hmm.transmat_ = trans hmm.startprob_ = start hmm.emissionprob_ = emit return hmm
def get_model(self): """ 初始化hmm模型 """ model = MultinomialHMM(n_components=len(self.states)) model.startprob_ = self.init_p model.transmat_ = self.trans_p model.emissionprob_ = self.emit_p return model
def get_hmm(df, n_components, n_features): _, state_list = get_ubie_label(df["label"]) pred_list = get_pred_for_hmm(df["pred"]) clf = MultinomialHMM(n_components=n_components) clf.n_features = n_features clf.transmat_ = get_transmat(state_list) clf.emissionprob_ = get_emission(pred_list, state_list) clf.startprob_ = np.array([0.5, 0.05, 0.4, 0.05]) return clf
def get_model(self): """ returns a multinomial hmm""" model = MultinomialHMM(n_components=self.get_max(), params='e', init_params='') model.startprob_ = self.get_start() model.transmat_ = self.get_transition() model.emissionprob_ = self.get_emission() return model
def detect_events_hmm(mahal_timeseries, c_timeseries, global_pace_timeseries, threshold_quant=.95): #Sort the keys of the timeseries chronologically sorted_dates = sorted(mahal_timeseries) (expected_pace_timeseries, sd_pace_timeseries) = getExpectedPace(global_pace_timeseries) #Generate the list of values of R(t) mahal_list = [mahal_timeseries[d] for d in sorted_dates] c_list = [c_timeseries[d] for d in sorted_dates] global_pace_list = [global_pace_timeseries[d] for d in sorted_dates] expected_pace_list = [expected_pace_timeseries[d] for d in sorted_dates] #Use the quantile to determine the threshold sorted_mahal = sorted(mahal_list) threshold = getQuantile(sorted_mahal, threshold_quant) # The symbols array contains "1" if there is an outlier, "0" if there is not symbols = [] for i in range(len(mahal_list)): if(mahal_list[i] > threshold or c_list[i]==1): symbols.append(1) else: symbols.append(0) # Set up the hidden markov model. We are modeling the non-event states as "0" # and event states as "1" # Transition matrix with heavy weight on the diagonals ensures that the model # is likely to stick in the same state rather than rapidly switching. In other # words, the predictions will be relatively "smooth" trans_matrix = array([[.999, .001], [.001,.999]]) # Emission matrix - state 0 is likely to emit symbol 0, and vice versa # In other words, events are likely to be outliers emission_matrix = array([[.95, .05], [.4, .6]]) # Actually set up the hmm model = MultinomialHMM(n_components=2, transmat=trans_matrix) model.emissionprob_ = emission_matrix # Make the predictions lnl, predictions = model.decode(symbols) events = get_all_events(predictions, sorted_dates, mahal_list, global_pace_list, expected_pace_list) # Sort events by duration, starting with the long events events.sort(key = lambda x: x[2], reverse=True) return events, predictions
def predict_prob(self, x, init_prob=None, window=-1): """Predict the probability """ if init_prob is None: init_prob = np.array( [1 / self.num_states for i in range(self.num_states)]) model = MultinomialHMM(self.num_states) model.n_features = self.num_observations model.startprob_ = init_prob model.emissionprob_ = self.B model.transmat_ = self.A return model.predict_proba(x)
def run_hmm_model(input_df, n_unique, A_df, Eta, n_iter = 10000, tol=1e-2, verbose = False, params = 'e', init_params = ''): ''' Runs the hmm model and returns the predicted results, score and model input_df : The dataframe of keypresses n_unique : number of unqique chars A_df : Dataframe of trasnmission matrix Eta : Emissions matrix n_iter : Max number of iterations for hmm tol : The value to stop the hmm model if score does not improve by more than this verbose : Whether or not to print out params : Parameters to tune init_params : Paramters to initialize ''' # Propotion of characters starting words in english char_counts = get_char_counts() # Construct model hmm = MultinomialHMM(n_components=n_unique, startprob_prior=np.append(0, char_counts.values), transmat_prior=A_df.values, algorithm='viterbi', random_state=None, n_iter=n_iter, tol=tol, verbose=verbose, params=params, init_params=init_params) # Set values hmm.emissionprob_ = Eta hmm.transmat_ = A_df.values hmm.startprob_ = np.append(0, char_counts.values) # Feed in the clusters as the expected output model_input = input_df['cluster'].values # Reshape if len(model_input.shape) == 1: model_input = model_input.reshape((len(model_input), 1)) # Fit the model hmm = hmm.fit(model_input) # Score model score, results = hmm.decode(model_input) return score, results, hmm
def get_hmm_model(state): """Creates an instance of MultinomialHMM, which follows sklearn interface Input: - state: dictionnary where the keys are HiddenMarkovModelProbability choices where the values are the probabilities matrices or arrays which describes the according hidden markov model state Returns: an instance of a trained MultinomialHMM """ hmm_model = MultinomialHMM(n_components=len(SleepStage)) hmm_model.emissionprob_ = state[HiddenMarkovModelProbability.emission.name] hmm_model.startprob_ = state[HiddenMarkovModelProbability.start.name] hmm_model.transmat_ = state[HiddenMarkovModelProbability.transition.name] return hmm_model
def detect_events_hmm(mahal_timeseries, c_timeseries, global_pace_timeseries, threshold_quant=.95, trans_matrix=DEFAULT_TRANS_MATRIX, emission_matrix=DEFAULT_EMISSION_MATRIX, initial_state=None): #Sort the keys of the timeseries chronologically sorted_dates = sorted(mahal_timeseries) (expected_pace_timeseries, sd_pace_timeseries) = getExpectedPace(global_pace_timeseries) #Generate the list of values of R(t) mahal_list = [mahal_timeseries[d] for d in sorted_dates] c_list = [c_timeseries[d] for d in sorted_dates] global_pace_list = [global_pace_timeseries[d] for d in sorted_dates] expected_pace_list = [expected_pace_timeseries[d] for d in sorted_dates] #Use the quantile to determine the threshold sorted_mahal = sorted(mahal_list) threshold = getQuantile(sorted_mahal, threshold_quant) # The symbols array contains "1" if there is an outlier, "0" if there is not symbols = [] for i in range(len(mahal_list)): if (mahal_list[i] > threshold or c_list[i] == 1): symbols.append(1) else: symbols.append(0) # Actually set up the hmm model = MultinomialHMM(n_components=2, transmat=trans_matrix, startprob=initial_state) model.emissionprob_ = emission_matrix # Make the predictions lnl, predictions = model.decode(symbols) events = get_all_events(predictions, sorted_dates, mahal_list, global_pace_list, expected_pace_list) # Sort events by duration, starting with the long events events.sort(key=lambda x: x[2], reverse=True) return events, predictions
def detect_events_hmm(mahal_timeseries, c_timeseries, global_pace_timeseries, threshold_quant=.95, trans_matrix = DEFAULT_TRANS_MATRIX, emission_matrix=DEFAULT_EMISSION_MATRIX, initial_state=None): #Sort the keys of the timeseries chronologically sorted_dates = sorted(mahal_timeseries) (expected_pace_timeseries, sd_pace_timeseries) = getExpectedPace(global_pace_timeseries) #Generate the list of values of R(t) mahal_list = [mahal_timeseries[d] for d in sorted_dates] c_list = [c_timeseries[d] for d in sorted_dates] global_pace_list = [global_pace_timeseries[d] for d in sorted_dates] expected_pace_list = [expected_pace_timeseries[d] for d in sorted_dates] #Use the quantile to determine the threshold sorted_mahal = sorted(mahal_list) threshold = getQuantile(sorted_mahal, threshold_quant) # The symbols array contains "1" if there is an outlier, "0" if there is not symbols = [] for i in range(len(mahal_list)): if(mahal_list[i] > threshold or c_list[i]==1): symbols.append(1) else: symbols.append(0) # Actually set up the hmm model = MultinomialHMM(n_components=2, transmat=trans_matrix, startprob=initial_state) model.emissionprob_ = emission_matrix # Make the predictions lnl, predictions = model.decode(symbols) events = get_all_events(predictions, sorted_dates, mahal_list, global_pace_list, expected_pace_list) # Sort events by duration, starting with the long events events.sort(key = lambda x: x[2], reverse=True) return events, predictions
def test_viterbi_case_handcraft(self): # init startprob = np.array([0.6, 0.4]) transmat = np.array([[0.7, 0.3], [0.4, 0.6]]) emissionprob = np.array([[0.1, 0.4, 0.5], [0.6, 0.3, 0.1]]) X = np.array([1,0,2,0,2,1,0,1,1]).reshape(-1,1) # hmmlearn model = MultinomialHMM(n_components=2) model.startprob_ = startprob model.transmat_ = transmat model.emissionprob_ = emissionprob y = model.predict(X) # my hmm hmm = HMM() pred = hmm.viterbi(startprob, transmat, emissionprob, X) self.assertTrue(np.array_equal(y, pred))
def test_DiscreteHMM_fit(cases: str) -> None: np.random.seed(12346) cases = int(cases) i = 1 N_decimal = 4 max_iter = 100 tol=1e-3 while i < cases: n_samples = np.random.randint(10, 50) hidden_states = np.random.randint(3, 6) # symbols is the number of unqiue observation types. symbols = np.random.randint(4, 9) X = [] lengths = [] for _ in range(n_samples): # the actual length is seq_length + 1 seq_length = symbols this_x = np.random.choice(range(symbols), size=seq_length, replace=False) X.append(this_x) lengths.append(seq_length) A = np.full((hidden_states, hidden_states),1/hidden_states) B = [] for _ in range(hidden_states): this_B = np.random.dirichlet(np.ones(symbols),size=1)[0] B.append(this_B) B = np.array(B) pi = np.ones(hidden_states) pi = pi/hidden_states hmm_gold = MultinomialHMM(n_components=hidden_states, startprob_prior=1, transmat_prior=1, init_params='', n_iter=max_iter, tol=tol) hmm_gold.transmat_ = A hmm_gold.emissionprob_ = B hmm_gold.startprob_ = pi X_gold = np.concatenate(X).reshape((-1,1)) hmm_gold.fit(X_gold, lengths) gold_A = hmm_gold.transmat_ gold_B = hmm_gold.emissionprob_ gold_pi = hmm_gold.startprob_ hmm_mine = DiscreteHMM(hidden_states=hidden_states, symbols=symbols, A=A, B=B, pi=pi, tol=tol, max_iter=max_iter) hmm_mine.fit(X) mine_A = hmm_mine.A mine_B = hmm_mine.B mine_pi = hmm_mine.pi assert_almost_equal(mine_pi, gold_pi, decimal=N_decimal) assert_almost_equal(mine_A, gold_A, decimal=N_decimal) assert_almost_equal(mine_B, gold_B, decimal=N_decimal) i+=1 print('Successfully testing the function of estimating parameters in discrete HMM!')
print("Training Done") print("Model = ",model.monitor_) print("The transition prob of this trained model : ") print(model.transmat_) emiso = np.transpose(model.emissionprob_) print("\nThe emmision prob of this trained model : ") print(" State-0 State-1") print(emiso) seven_most_probabe(emiso) #printing the 7 most likely characters print("Stationary probbabilities : ",model. get_stationary_distribution()) print("So seeing the emission probabilities we can say that State 1 is Consonant and State 0 is Vowel") print("\nTask - 4") model_nat = MultinomialHMM(n_components=2) model_nat.transmat_ = trans_prob model_nat.emissionprob_ = np.transpose(emmis_prob) model_nat.startprob_ = np.array([0, 1]) scr2 = (model_nat.score(Data_arr)) scr1 = (model.score(Data_arr)) print("Log_Prob of Trained one is = " , scr1) print("Log_Prob of Natural one is = " , scr2) if(scr1 > scr2): print("Trained Model is better") else: print("Natural Model is better") print("Intializing the params of model from natural model") model2 = MultinomialHMM(n_components=2,n_iter = 200) model2.transmat_ = trans_prob model2.emissionprob_ = np.transpose(emmis_prob) model2.startprob_ = np.array([0, 1]) print("Training started")
# Transition probability as specified above transition_matrix = np.array([[0.2, 0.6, 0.15, 0.05], [0.2, 0.3, 0.3, 0.2], [0.05, 0.05, 0.7, 0.2], [0.005, 0.045, 0.15, 0.8]]) # Setting the transition probability model_multinomial.transmat_ = transition_matrix # Initial state probability initial_state_prob = np.array([0.1, 0.4, 0.4, 0.1]) # Setting initial state probability model_multinomial.startprob_ = initial_state_prob # Here the emission prob is required to be in the shape of # (n_components, n_symbols). So instead of directly feeding the # CPD we would using the transpose of it. emission_prob = np.array([[0.045, 0.15, 0.2, 0.6, 0.005], [0.2, 0.2, 0.2, 0.3, 0.1], [0.3, 0.1, 0.1, 0.05, 0.45], [0.1, 0.1, 0.2, 0.05, 0.55]]) # Setting the emission probability model_multinomial.emissionprob_ = emission_prob # model.sample returns both observations as well as hidden states # the first return argument being the observation and the second # being the hidden states Z, X = model_multinomial.sample(100)
emission_matrix = [] for key in emission_dict.keys(): tmp = emission_dict[key] emission_matrix.append(tmp) emission_matrix = np.array(emission_matrix) #Adding one row for unknowns unk = np.zeros((1, 9)) emission_matrix = np.vstack((emission_matrix, unk)) emission_matrix = emission_matrix.T model = MultinomialHMM(n_components=n_states, algorithm='viterbi') model.startprob_ = np.array(start_prob) model.transmat_ = trans_mat model.emissionprob_ = emission_matrix # format is: word gold pred nexcept = 0 with open("results.txt", "w") as out: for sent in test_sents: inp = [] for i in range(len(sent)): word = sent[i][0] try: k = list(emission_dict.keys()).index(word) except: nexcept += 1 k = emission_matrix.shape[0] - 1 inp.append(k)
import numpy as np import math from hmmlearn.hmm import MultinomialHMM model_man_derby = MultinomialHMM(n_components=2) states = ["Home", "Away"] observations = ["Win", "Lose", "Draw"] initial_vector = np.array([0.5, 0.5]) model_man_derby.startprob_ = initial_vector transition_matrix = np.array([[0.2, 0.8], [0.8, 0.2]]) model_man_derby.transmat_ = transition_matrix emission_matrix = np.array([[0.4, 0.467, 0.133], [0.4, 0.4, 0.2]]) model_man_derby.emissionprob_ = emission_matrix result = np.array([[0, 0], [0, 1], [0, 2], [1, 0], [1, 1], [1, 2], [2, 0], [2, 1], [2, 2]]).T titles = ["WW", "WL", "WD", "LW", "LL", "LD", "DW", "DL", "DD"] i = 0 for title in titles: logprob = model_man_derby.score(result[:, i].reshape(1, -1)) print(title, ':', math.exp(logprob)) i += 1
# coding: utf-8 import numpy as np from hmmlearn.hmm import MultinomialHMM from hmm import DiscreteHMM if __name__ == "__main__": start_probability = np.array([0.2, 0.4, 0.4]) transition_probability = np.array([[0.5, 0.2, 0.3], [0.3, 0.5, 0.2], [0.2, 0.3, 0.5]]) emission_probability = np.array([[0.5, 0.5], [0.4, 0.6], [0.7, 0.3]]) disc_hmm = MultinomialHMM(n_components=3) disc_hmm.startprob_ = start_probability disc_hmm.transmat_ = transition_probability disc_hmm.emissionprob_ = emission_probability X, Z = disc_hmm.sample(100) my_model = DiscreteHMM(n_obs=2, n_state=3) my_model.train(X, Z) print(X)
[ 0.0, 0.0, 0.3, 0.0, 0.7, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0 ], [ 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.6, 0.4, 0.0, 0.0 ], [ 0.0, 0.0, 0.2, 0.0, 0.8, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0 ]]) hmmBol.n_features = 16 hmmBol.startprob_ = startprob hmmBol.transmat_ = transmat hmmBol.emissionprob_ = emmBol # Position HMM emmPos = np.array([[0.0, 0.0, 0.0, 0.0, 1.0], [0.0, 0.0, 0.0, 0.0, 1.0], [0.0, 0.0, 0.3, 0.7, 0.0], [0.0, 0.0, 0.0, 0.0, 1.0], [0.5, 0.5, 0.0, 0.0, 0.0], [0.5, 0.5, 0.0, 0.0, 0.0], [0.0, 0.0, 0.8, 0.2, 0.0], [0.0, 1.0, 0.0, 0.0, 0.0]]) hmmPos.n_features = 5 hmmPos.startprob_ = startprob hmmPos.transmat_ = transmat hmmPos.emissionprob_ = emmPos # Object HMM emmObj = np.array([[0.1, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.9, 0.0], [0.3, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.7], [0.0, 0.0, 0.0, 0.3, 0.2, 0.2, 0.3, 0.0, 0.0],
'T': 0 }, 'I': { 'A': 0.4, 'C': 0.1, 'G': 0.1, 'T': 0.4 } } model = MultinomialHMM(n_components=3) model.startprob_ = np.array([1, 0, 0]) model.endprob_ = np.array([0, 0, 0.1]) model.transmat_ = np.array([[0.9, 0.1, 0], [0, 0, 1], [0, 0, 1]]) model.emissionprob_ = np.array([[0.25, 0.25, 0.25, 0.25], [0.05, 0, 0.95, 0], [0.4, 0.1, 0.1, 0.4]]) # In[121]: #"CTTCATGTGAAAGCAGACGTAAGTCA" A = 0 , C = 1 , G = 2 , T = 3 sequence = [ 1, 3, 3, 1, 0, 3, 2, 3, 2, 0, 0, 0, 2, 1, 0, 2, 0, 1, 2, 3, 0, 0, 2, 3, 1, 0 ] logprob, seq = model.decode(np.array([sequence]).transpose()) print(logprob) print(seq) # E = 0 , 5 = 1 , I = 2 print("following sequence correspond to :") print("EEEEEEEEEEEEEEEEEE5IIIIIII")
def train(self, data, labels, tp=None): labels = np.array(labels) for i in range(self.nb_class): print "Class", i ind = np.where(labels == i) digit_data = np.array(data)[ind] self.fit_encode_class(digit_data, i) sks, lengths = self.transform_encode_class(digit_data, i) if not tp: model = MultinomialHMM(n_components=self.nb_components, n_iter=self.max_iter, tol=self.tol, verbose=True, params='ste', init_params='e') init = 1. / self.nb_components model.startprob_ = np.full(self.nb_components, init) model.transmat_ = np.full((self.nb_components, self.nb_components), init) else: model = model = MultinomialHMM(n_components=self.nb_components, n_iter=self.max_iter, tol=self.tol, verbose=True, params='ste') # Number of distinct centroids num_obs = len(np.unique(np.concatenate(sks))) model.emissionprob_ = np.zeros((self.nb_components, num_obs)) hist = {} curr = 0 bucket_len = num_obs / self.nb_components for j in range(self.nb_components): if j == self.nb_components - 1 and curr + bucket_len < num_obs: offset = num_obs - curr - bucket_len for k in range(curr, curr + bucket_len + offset): if not j in hist: hist[j] = [] hist[j].append(k) model.emissionprob_[j, k] = 1 curr += bucket_len + offset else: for k in range(curr, curr + bucket_len): if not j in hist: hist[j] = [] hist[j].append(k) model.emissionprob_[j, k] = 1 curr += bucket_len model.startprob_ = np.zeros(self.nb_components) # always ends by penup model.startprob_[-1] = 1 model.transmat_ = np.zeros((self.nb_components, self.nb_components)) state_occ_count = np.zeros(self.nb_components) for example in digit_data: j = 0 prevobs = 0 for obs in example: le = self.les[i] val = le.transform(obs) if j == 0: prevobs = val j += 1 continue prevobs_state = None obs_state = None for k in range(self.nb_components): if (prevobs_state != None and obs_state != None): break if prevobs in hist[k]: prevobs_state = k if val in hist[k]: obs_state = k state_occ_count[prevobs_state] += 1 model.transmat_[prevobs_state, obs_state] += 1 prevobs = val j += 1 for j in range(self.nb_components): for k in range(self.nb_components): model.transmat_[j, k] = model.transmat_[j, k] / state_occ_count[j] model.fit(sks, lengths) self.models[i] = model
# generate emission matrix E = [] for e in es: dist = HMM_graph.frequency_distribution(seq2int(e)) E.append(dist) # generate transition matrix A = np.zeros((max(labels), max(labels))) for t in ts: A[t[0] - 1, t[1] - 1] += 1 A = [HMM_graph.norm(row) for row in A] hmm = MultinomialHMM(max(labels)) hmm.startprob_ = np.array(I) hmm.emissionprob_ = np.array(E) hmm.transmat_ = A # Try unsupervised stuff type1 = [ s.seq.tostring() for s in SeqIO.parse(open('fasta/type1.fasta'), 'fasta') ] type2 = [ s.seq.tostring() for s in SeqIO.parse(open('fasta/type2.fasta'), 'fasta') ] training_seqs = type1[:len(type1) / 2] + type2[:len(type2) / 2] print training_seqs training_seqs = map(hmmseq, training_seqs) tA, tE = fit(hmm, training_seqs)
print( "By observing the most likely charcters it seems like I should have used vowels and consonants as two sepaerate states" ) # In[109]: #task 4 evaluate_hmm_model = hmm_model.score(training_data) print("The score of the inbuilt trained model is") print(evaluate_hmm_model) print("\n") hmm_natural_model = MultinomialHMM(n_components=2) hmm_natural_model.transmat_ = transition_prob hmm_natural_model.emissionprob_ = np.transpose(emission_prob) hmm_natural_model.startprob_ = np.array([0, 1]) evaluate_hmm_natural = hmm_natural_model.score(training_data) print("The score of my designed natural hmm is") print(evaluate_hmm_natural) print(hmm_natural_model.monitor_) print("\n") print( "Since, the score of the inbulit hmm model is more than the natural model") print("Therefore the performance of inbuilt hmm model is good\n") print("Training the natural hmm") hmm_natural_model1 = MultinomialHMM(n_components=2, n_iter=500) hmm_natural_model1.transmat_ = transition_prob hmm_natural_model1.emissionprob_ = np.transpose(emission_prob) hmm_natural_model1.startprob_ = np.array([0, 1])
def computeHMM(dataset, alphabet, num_matchstates=9): num_sequences = len(dataset) best_score = None best_model = None alphabet = list(alphabet) residue_mapper = {alphabet[j]: j for j in range(0, len(alphabet))} #one begin, one end, num_matchstates + 1 insert states, num_matchstates match states, num_matchstates deletion states. num_states = 3 + 3 * num_matchstates concat_dataset = np.concatenate([[[residue_mapper[x]] for x in y] for y in dataset]) dataset_lengths = [len(x) for x in dataset] for x in range(0, 10): transition_matrix = np.zeros((num_states, num_states)) emission_matrix = np.zeros((num_states, len(alphabet))) #first num_matchstates + 2 are the matchstates (including beginning and end, though those two are mute #first do B, then M_1,...,M_m #B goes to either I_0 or M_1. b_row = ProfileHMM.compute_random_row(2) transition_matrix[0][1] = b_row[0] transition_matrix[0][2] = b_row[1] for i in range(1, num_matchstates + 1): #go to either match state, insertion state, or delete state. m_row = ProfileHMM.compute_random_row(3) #next match state transition_matrix[i][i + 1] = m_row[0] #insert state transition_matrix[i][i + num_matchstates + 2] = m_row[1] #deletion state print('i: %d' % i) transition_matrix[i][i + 2 * num_matchstates + 2] = m_row[2] emission_matrix[i] = ProfileHMM.compute_random_row( len(alphabet)) #now we do the insertion states. for i in range(num_matchstates + 2, 2 * num_matchstates + 3): #either go to self, or next match state. row = ProfileHMM.compute_random_row(2) transition_matrix[i][i] = row[0] transition_matrix[i][i - (num_matchstates + 1)] = row[1] emission_matrix[i] = ProfileHMM.compute_random_row( len(alphabet)) #now do deletion states. In the loop, do all but the last one for i in range(2 * num_matchstates + 3, 3 * num_matchstates + 2): row = ProfileHMM.compute_random_row(2) transition_matrix[i][i] = row[0] transition_matrix[i][i - 2 * num_matchstates - 1] = row[1] model = MultinomialHMM(num_states, params="ets") model.n_features = len(alphabet) start_prob = np.zeros(num_states) start_prob[0] = 1.0 print('start prob array') print(start_prob) model.startprob_ = start_prob model.transmat_ = transition_matrix model.emissionprob_ = emission_matrix try: model.fit(concat_dataset, dataset_lengths) except ValueError: pdb.set_trace() print('model') print(model) """ for row in range(0, len(model.emissionprob_)): for col in range(0, len(model.emissionprob_[row])): count = model.emissionprob_[row][col]*num_sequences model.emissionprob_[row][col] = (count + 0.01)/(num_sequences + len(alphabet)*0.01) """ print('emission probabilities') print(model.emissionprob_) score = model.score(concat_dataset, dataset_lengths) if x == 0: best_score = score best_model = model elif score > best_score: best_score = score best_model = model return best_model
import numpy as np from hmmlearn.hmm import MultinomialHMM from pattern.ge_params import GEParams from pattern.read_losses_from_csv import read_losses_from_csv, SEQUENCE_COL, RECEIVED_COL startprob_prior = np.array([0.99, 0.01]) transmat_prior = np.array([[0.95, 0.05], [0.95, 0.05]]) emissionprob_prior = np.array([[0.9, 0.1], [0.1, 0.9]]) model = MultinomialHMM(n_components=2, verbose=False, n_iter=1000, tol=1e-3) model.startprob_ = startprob_prior model.transmat_ = transmat_prior model.emissionprob_ = emissionprob_prior model.init_params = 'st' def fit_ge_params(losses: np.array) -> GEParams: model.fit(losses) return GEParams.from_hmm(model) def main(csv_path: str, max_length: int, use_received: bool = False, verbose: bool = False): out_dir, csv_name = os.path.split(csv_path) expected = None try:
high = high + 1 elif percent >= .50: highMid = highMid + 1 elif percent >= .25: lowMid = lowMid + 1 else: low = low + 1 matrix[1, 0] = low / len(wins) matrix[1, 1] = lowMid / len(wins) matrix[1, 2] = highMid / len(wins) matrix[1, 3] = high / len(wins) return matrix # Load Data filename = 'data.csv' X = np.loadtxt(filename, delimiter=',') player1 = X[:, 0] player2 = X[:, 1] record = X[:, 2] print "stateProbs(record)", stateProbs(record) print "eProbs(player1, record", eProbs(player1, record) clf = MultinomialHMM(n_components=2) clf.transmat_ = stateProbs(record) clf.emissionprob_ = eProbs(player1, record) print "here" clf.fit(clf.transmat_, clf.emissionprob_) clf.predict(player1)
Convert UMDHMM .hmm and .key files to pickle dumps of hmmlearn.MultinomialHMM and LabelEncoder objects. """) args.add_argument("hmm", help="path to source UMDHMM model file)") args.add_argument("key", help="path to source .key file") args.add_argument("out", help="basename for output files") args = args.parse_args() with open(args.hmm) as f: umd = UmdhmmFile(f) with open(args.key) as f: kf = KeyFile(f) mhmm = MultinomialHMM(n_components=umd.n, init_params='') mhmm.startprob_ = umd.startprob_ mhmm.transmat_ = umd.transmat_ mhmm.emissionprob_ = umd.emissionprob_ le = LabelEncoder() le.classes_ = np.array(kf.classes) out_pkl = '{0}.pkl'.format(args.out) out_le = '{0}.le'.format(args.out) joblib.dump(mhmm, out_pkl) with open(out_le, "wb") as f: pickle.dump(le, f) print("Output written to:\n\t- {0}\n\t- {1}".format(out_pkl, out_le), file=sys.stderr)