def predict(self, x, init_prob=None, method='hmmlearn', window=-1): """Predict result based on HMM """ if init_prob is None: init_prob = np.array( [1 / self.num_states for i in range(self.num_states)]) if method == 'hmmlearn': model = MultinomialHMM(self.num_states, n_iter=100) model.n_features = self.num_observations model.startprob_ = init_prob model.emissionprob_ = self.B model.transmat_ = self.A if window == -1: result = model.predict(x) else: result = np.zeros(x.shape[0], dtype=np.int) result[0:window] = model.predict(x[0:window]) for i in range(window, x.shape[0]): result[i] = model.predict(x[i - window + 1:i + 1])[-1] else: if window == -1: result = self.decode(x, init_prob) else: result = np.zeros(x.shape[0], dtype=np.int) result[0:window] = self.decode(x[0:window], init_prob) for i in range(window, x.shape[0]): result[i] = self.decode(x[i - window + 1:i + 1], init_prob)[-1] return result
def get_hmm(df, n_components, n_features): _, state_list = get_ubie_label(df["label"]) pred_list = get_pred_for_hmm(df["pred"]) clf = MultinomialHMM(n_components=n_components) clf.n_features = n_features clf.transmat_ = get_transmat(state_list) clf.emissionprob_ = get_emission(pred_list, state_list) clf.startprob_ = np.array([0.5, 0.05, 0.4, 0.05]) return clf
def predict_prob(self, x, init_prob=None, window=-1): """Predict the probability """ if init_prob is None: init_prob = np.array( [1 / self.num_states for i in range(self.num_states)]) model = MultinomialHMM(self.num_states) model.n_features = self.num_observations model.startprob_ = init_prob model.emissionprob_ = self.B model.transmat_ = self.A return model.predict_proba(x)
0.0, 0.0, 0.2, 0.0, 0.3, 0.0, 0.0, 0.0, 0.0, 0.0, 0.2, 0.0, 0.3, 0.0, 0.0, 0.0 ], [ 0.0, 0.0, 0.3, 0.0, 0.7, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0 ], [ 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.6, 0.4, 0.0, 0.0 ], [ 0.0, 0.0, 0.2, 0.0, 0.8, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0 ]]) hmmBol.n_features = 16 hmmBol.startprob_ = startprob hmmBol.transmat_ = transmat hmmBol.emissionprob_ = emmBol # Position HMM emmPos = np.array([[0.0, 0.0, 0.0, 0.0, 1.0], [0.0, 0.0, 0.0, 0.0, 1.0], [0.0, 0.0, 0.3, 0.7, 0.0], [0.0, 0.0, 0.0, 0.0, 1.0], [0.5, 0.5, 0.0, 0.0, 0.0], [0.5, 0.5, 0.0, 0.0, 0.0], [0.0, 0.0, 0.8, 0.2, 0.0], [0.0, 1.0, 0.0, 0.0, 0.0]]) hmmPos.n_features = 5 hmmPos.startprob_ = startprob hmmPos.transmat_ = transmat hmmPos.emissionprob_ = emmPos # Object HMM
def computeHMM(dataset, alphabet, num_matchstates=9): num_sequences = len(dataset) best_score = None best_model = None alphabet = list(alphabet) residue_mapper = {alphabet[j]: j for j in range(0, len(alphabet))} #one begin, one end, num_matchstates + 1 insert states, num_matchstates match states, num_matchstates deletion states. num_states = 3 + 3 * num_matchstates concat_dataset = np.concatenate([[[residue_mapper[x]] for x in y] for y in dataset]) dataset_lengths = [len(x) for x in dataset] for x in range(0, 10): transition_matrix = np.zeros((num_states, num_states)) emission_matrix = np.zeros((num_states, len(alphabet))) #first num_matchstates + 2 are the matchstates (including beginning and end, though those two are mute #first do B, then M_1,...,M_m #B goes to either I_0 or M_1. b_row = ProfileHMM.compute_random_row(2) transition_matrix[0][1] = b_row[0] transition_matrix[0][2] = b_row[1] for i in range(1, num_matchstates + 1): #go to either match state, insertion state, or delete state. m_row = ProfileHMM.compute_random_row(3) #next match state transition_matrix[i][i + 1] = m_row[0] #insert state transition_matrix[i][i + num_matchstates + 2] = m_row[1] #deletion state print('i: %d' % i) transition_matrix[i][i + 2 * num_matchstates + 2] = m_row[2] emission_matrix[i] = ProfileHMM.compute_random_row( len(alphabet)) #now we do the insertion states. for i in range(num_matchstates + 2, 2 * num_matchstates + 3): #either go to self, or next match state. row = ProfileHMM.compute_random_row(2) transition_matrix[i][i] = row[0] transition_matrix[i][i - (num_matchstates + 1)] = row[1] emission_matrix[i] = ProfileHMM.compute_random_row( len(alphabet)) #now do deletion states. In the loop, do all but the last one for i in range(2 * num_matchstates + 3, 3 * num_matchstates + 2): row = ProfileHMM.compute_random_row(2) transition_matrix[i][i] = row[0] transition_matrix[i][i - 2 * num_matchstates - 1] = row[1] model = MultinomialHMM(num_states, params="ets") model.n_features = len(alphabet) start_prob = np.zeros(num_states) start_prob[0] = 1.0 print('start prob array') print(start_prob) model.startprob_ = start_prob model.transmat_ = transition_matrix model.emissionprob_ = emission_matrix try: model.fit(concat_dataset, dataset_lengths) except ValueError: pdb.set_trace() print('model') print(model) """ for row in range(0, len(model.emissionprob_)): for col in range(0, len(model.emissionprob_[row])): count = model.emissionprob_[row][col]*num_sequences model.emissionprob_[row][col] = (count + 0.01)/(num_sequences + len(alphabet)*0.01) """ print('emission probabilities') print(model.emissionprob_) score = model.score(concat_dataset, dataset_lengths) if x == 0: best_score = score best_model = model elif score > best_score: best_score = score best_model = model return best_model
def buildHMM(num_states, n_iter=10, tol=0.01): model = MultinomialHMM(n_components=num_states, n_iter=n_iter, tol=tol) model.n_features = 3 return model