def log_cond_proba( self, x_future, x_past ): ''' calculates the log of the probability of observing the sequence x_future assuming that we've just observed x_past given the self.model parameters @param x_future {0,...,n_obs_states} valued list @param x_past {0,...,n_obs_states} valued list ''' return hmm.forward( self.model,np.hstack((x_past,x_future)), scaling=True )[0] - \ hmm.forward( self.model,np.array(x_past),scaling=True )[0]
def log_cond_proba(self, x_future, x_past): ''' calculates the log of the probability of observing the sequence x_future assuming that we've just observed x_past given the self.model parameters @param x_future {0,...,n_obs_states} valued list @param x_past {0,...,n_obs_states} valued list ''' return hmm.forward( self.model,np.hstack((x_past,x_future)), scaling=True )[0] - \ hmm.forward( self.model,np.array(x_past),scaling=True )[0]
def test(data_folder): all_tests = True for filename in sorted(list(os.listdir(data_folder))): with open(os.path.join(data_folder, filename), 'rb') as pickle_file: data = pickle.load(pickle_file) A = data['A'] B = data['B'] pi = data['pi'] O = data['O'] forward_answer = data['forward_answer'] viterbi_answer = data['viterbi_answer'] forward_valid = abs(forward(A, B, pi, O) - forward_answer) <= forward_answer * 10**-4 viterbi_valid = (viterbi_answer == viterbi( A, B, pi, O)).astype(int).sum() == viterbi_answer.shape[0] all_tests = all_tests and forward_valid and viterbi_valid print(filename) print('Forward:', forward_valid) print('Viterbi:', viterbi_valid) print() if all_tests: print('PASSED all tests') else: print('Some of the tests FAILED')
def test_dishonest_casino_larger_transition_p(self): '''Dishonest Casino Example.''' # Create transition probability matrix A = np.array([[0.9, 0.1], [0.1, 0.9]]) # Create observable probability distribution matrix. Casino biased toward "6" in state "1" B = statutil.scale_row_sums(np.array([[ 1.0, 1.0, 1.0, 1.0, 1.0, 1.0 ], [ 1.0, 1.0, 1.0, 1.0, 1.0, 5.0 ]])) # Create set of all observable symbols V = [1, 2, 3, 4, 5, 6] # Instantiate an HMM, note Pi is uniform probability distribution by default m = hmm.HMM(2, A=A, B=B, V=V) Obs = [ 1, 2, 3, 4, 5, 2, 1, 6, 6, 6, 5, 6 ] log_prob_Obs, Alpha, c = hmm.forward(m, Obs, scaling=1) assert_almost_equal(log_prob_Obs, -20.124, decimal=3, err_msg='Wrong observation probability') Q_star, _, _ = hmm.viterbi(m, Obs, scaling=1) assert_equal(Q_star, [0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1], err_msg='Wrong Viterbi path') Beta = hmm.backward(m, Obs, c) Gamma, Q_star = hmm.individually_optimal_states(Alpha, Beta) assert_almost_equal(Gamma, [[0.8189770516168013, 0.8482906260695058, 0.8525027084764197, 0.8329611652077556, 0.7834127024175411, 0.6880018120129073, 0.5161970090643716, 0.2130207566284025, 0.12024202874950358, 0.10797060639721641, 0.15902649827833876, 0.14930464162738483], [0.18102294838319855, 0.15170937393049422, 0.14749729152358024, 0.16703883479224435, 0.21658729758245884, 0.31199818798709256, 0.4838029909356284, 0.7869792433715975, 0.8797579712504964, 0.8920293936027837, 0.8409735017216613, 0.8506953583726152]], decimal=5, err_msg='Wrong state probabilities') assert_equal(Q_star, [0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1], 'Wrong individually-optimal states')
def test_dishonest_casino(self): '''Dishonest Casino Example.''' # Create transition probability matrix A = np.array([[0.99, 0.01], [0.01, 0.99]]) # Create observable probability distribution matrix. Casino biased toward "6" in state "1". B = statutil.scale_row_sums(np.array([[ 1.0, 1.0, 1.0, 1.0, 1.0, 1.0 ], [ 1.0, 1.0, 1.0, 1.0, 1.0, 5.0 ]])) # Create set of all observable symbols V = [1, 2, 3, 4, 5, 6] # Instantiate an HMM, note Pi is uniform probability distribution by default m = hmm.HMM(2, A=A, B=B, V=V) Obs = [ 1, 2, 3, 4, 5, 2, 1, 6, 6, 6, 5, 6 ] log_prob_Obs, Alpha, c = hmm.forward(m, Obs, scaling=1) assert_almost_equal(log_prob_Obs, -20.9468006, decimal=5, err_msg='Wrong observation probability') Q_star, _, _ = hmm.viterbi(m, Obs, scaling=1) assert_equal(Q_star, [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], 'Wrong Viterbi path') Beta = hmm.backward(m, Obs, c) Gamma, Q_star = hmm.individually_optimal_states(Alpha, Beta) assert_almost_equal(Gamma, [[0.63711364302936, 0.6348934929050587, 0.6271179131667495, 0.6117100305977996, 0.5845543683193845, 0.5383975935172204, 0.46091113744414974, 0.3313982095474306, 0.28864618346708165, 0.27562909135388625, 0.27498372625848855, 0.26932891011973825], [0.36288635697064003, 0.3651065070949412, 0.3728820868332506, 0.38828996940220045, 0.4154456316806155, 0.4616024064827796, 0.5390888625558502, 0.6686017904525694, 0.7113538165329184, 0.7243709086461138, 0.7250162737415115, 0.7306710898802617]], decimal=5, err_msg='Wrong state probabilities') assert_equal(Q_star, [0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1], 'Wrong individually-optimal states')
def log_proba(self, observation): ''' calculates the log of the probability of observing the sequence observation given the self.model parameters @param observation {0,...,n_obs_states} valued list ''' return hmm.forward(self.model, \ np.array(observation),scaling=True)[0]
def log_proba( self, observation ): ''' calculates the log of the probability of observing the sequence observation given the self.model parameters @param observation {0,...,n_obs_states} valued list ''' return hmm.forward(self.model, \ np.array(observation),scaling=True)[0]
def test_alpha_values(grid, observations, test_values): print("Testing alpha values...") p, alpha = forward(observations, grid.get_hmm()) assert alpha.shape == (3, grid.states_no), "Bad shape!" assert np.allclose(alpha, test_values), "Bad values!" assert np.allclose(p, sum(test_values[2])), "Bad values!" print(colored(">>> Alpha matrix looks right!", "green")) print("\n")
def get_alignment_posteriors(src_tokens, trg_tokens, transition_model, translation_model): "Compute the posterior alignment probability p(a_j=i | f, e) for each target token f_j." if isinstance(transition_model, TransitionModel): initial, transition = transition_model.get_parameters_for_sentence_pair( len(src_tokens)) translation = translation_model.get_parameters_for_sentence_pair( src_tokens, trg_tokens) posteriors = np.zeros( (len(trg_tokens) - 1, len(src_tokens), len(src_tokens))) single_posteriors = np.zeros((len(trg_tokens), len(src_tokens))) params = (initial, transition, translation) observations = np.arange(len(trg_tokens)) alpha = forward(params, observations) beta = backward(params, observations) answers = viterby(*params) for t in range(len(trg_tokens) - 1): nominator = (alpha[t, :] * transition.T ).T * translation[:, t + 1] * beta[t + 1, :] posteriors[t] = nominator / np.sum(nominator) nominator = alpha * beta single_posteriors = (nominator.T / np.sum(nominator, axis=1)).T log_likelihood = ( np.log(initial[answers[0]]) + np.sum(np.log(transition[answers[:-1], answers[1:]])) + np.sum(np.log(translation[answers, np.arange(len(trg_tokens))]))) return (posteriors, single_posteriors), log_likelihood, answers else: # here transition_model is a prior_model prior = transition_model.get_parameters_for_sentence_pair( len(src_tokens), len(trg_tokens)) traslation = translation_model.get_parameters_for_sentence_pair( src_tokens, trg_tokens) nominator = prior * traslation denominator = np.sum(nominator, axis=0) alignment_posteriors = nominator / denominator answers = np.argmax(alignment_posteriors, axis=0) arange = np.arange(len(trg_tokens)) log_likelihood = (np.log(prior[answers, arange]).sum() + np.log(traslation[answers, arange]).sum()) return [len(trg_tokens), alignment_posteriors.T], log_likelihood, answers
def test_forward_algorithm(grids): print("Testing forward...") grid = np.random.choice(grids) print("The real model is \033[1m" + grid.name + "\033[0m") T = np.random.randint(2, 10) observations, _ = grid.get_sequence(T) print("The observed sequence is", ", ".join([Grid.COLORS[i] for i in observations])) best_grid, best_p = None, None for grid in grids: p, _ = forward(observations, grid.get_hmm()) print("Probability that comes from " + grid.name + " is %.7f." % p) if best_grid is None or best_p < p: best_grid, best_p = grid.name, p print("Most probably the sequence was generated from " + best_grid + ".") print("\n")
def test_forward_by_sequence_length(grids, runs_no=1000): print("See how sequence length influences p...") for T in range(1, 11): correct = 0 for _ in range(runs_no): true_grid = np.random.choice(grids) observations, _ = true_grid.get_sequence(T) best_grid, best_p = None, None for grid in grids: p, _ = forward(observations, grid.get_hmm()) if best_grid is None or best_p < p: best_grid, best_p = grid.name, p correct += best_grid == true_grid.name perc = float(correct * 100) / runs_no print("%5d / %d (%5.2f%%) for T = %2d" % (correct, runs_no, perc, T)) print("\n")
pi=np.array([0.8, 0.2]), A=np.array([[0.9, 0.1], [0.1, 0.9]]), B=np.array( np.array([ [1 / 6, 1 / 6, 1 / 6, 1 / 6, 1 / 6, 1 / 6], [1 / 10, 1 / 10, 1 / 10, 1 / 10, 1 / 10, 1 / 2], ])), state_names=["fair", "loaded"], obs_names=["1", "2", "3", "4", "5", "6"], ) hmm.visualize() obs = np.array([1, 4, 3, 6, 6, 4]) - 1 # -1 because the sides are indices p, alpha = forward(obs, hmm) q, beta = backward(obs, hmm) print("p = %f, q = %f" % (p, q)) print("alpha") for l in alpha: print("%f %f" % (l[0], l[1])) print() print("beta") for l in beta: print("%f %f" % (l[0], l[1])) print() states, delta = viterbi(obs, hmm)
B = np.asarray([[0.5, 0.3, 0.2], [0.1, 0.1, 0.8]]) pi = np.asarray([0.6, 0.4]) O = np.asarray([1, 2, 1, 0]) alpha_gt = np.asarray([[0.18, 0.0172, 0.027276, 0.0054306], [0.04, 0.1072, 0.003348, 0.00197628]]) forward_result_gt = 0.00740688 delta_gt = np.asarray([[0.18, 0.0108, 0.024192, 0.0036288], [0.04, 0.1008, 0.002016, 0.00169344]]) viterbi_result_gt = np.asarray([0, 1, 0, 0]) forward_result, alpha = forward(A, B, pi, O) print('Forward result test: {}'.format( abs(forward_result_gt - forward_result) < 10**-5)) print('Forward alpha test: {}'.format( np.all(np.abs(alpha - alpha_gt) < 10**-5))) viterbi_result, delta = viterbi(A, B, pi, O) print('Viterbi result test: {}'.format( (viterbi_result_gt == viterbi_result).all())) print('Viterbi delta test: {}'.format( np.all(np.abs(delta - delta_gt) < 10**-5))) test_A = np.array([[0.5, 0.5], [0.4, 0.6]]) test_B = np.array([[0.2, 0.3, 0.3, 0.2], [0.3, 0.2, 0.2, 0.3]]) test_pi = np.array([0.5, 0.5]) test_O = np.array([2, 2, 1, 0, 1, 3, 2, 0, 0])
test_viterbi, test_delta = viterbi(test_a3, test_b3, test_pi3, test_o3) #print("test delta") #print("delta3 = " + str(list(test_delta))) #print("test_viterbi") #print("viterbi3 = " + str(list(np.uint8(test_viterbi)))) print("********************************************") print('Viterbi3 result test: {}'.format((test_viterbi == viterbi3).all())) print('Viterbi3 delta test: {}'.format( np.all(np.abs(test_delta - delta3) < 10**-5))) print("********************************************") print("--------------------------------------------") print("********************************************") print("TESTING FORWARD ALGORITHM: 1") test_forward, test_alpha = forward(test_a1, test_b1, test_pi1, test_o1) #print("test alpha") #print("alpha1 = " + str(list(test_alpha))) #print("test_forward") #print("forward1 = " + str(test_forward)) print("********************************************") print('Forward1 result test: {}'.format( abs(test_forward - forward1) < 10**-5)) print('Forward1 alpha test: {}'.format( np.all(np.abs(test_alpha - alpha1) < 10**-5))) print("********************************************") print("TESTING FORWARD ALGORITHM: 2") test_forward, test_alpha = forward(test_a2, test_b2, test_pi2, test_o2) #print("test alpha")
#################### # HMM #################### (a, b, pi) = datasets.getHMMData() hmm.viterbi(array([0, 1, 1, 2]), a, b, pi) #array([0, 0, 0, 1]) hmm.viterbi(array([0, 2, 1, 2]), a, b, pi) #array([0, 1, 1, 1]) ###WU 8 # example 1 hmm.viterbi(array([0, 1, 1, 1]), a, b, pi) # 0 0 0 0 hmm.viterbi(array([0, 1, 2, 1]), a, b, pi) # 0 0 1 1 al = hmm.forward(array([0, 1, 1, 2]), a, b, pi) be = hmm.backward(array([0, 1, 1, 2]), a, b, pi) hmm.sanityCheck(al, be) ########## # parameter re-estimation al = hmm.forward(array([0, 1, 1, 2]), a, b, pi) be = hmm.backward(array([0, 1, 1, 2]), a, b, pi) (a_new, b_new, pi_new) = hmm.reestimate(array([0, 1, 1, 2]), al, be, a, b, pi) # >>> a_new # array([[ 0.53662942, 0.46337058], # [ 0.39886289, 0.60113711]]) # >>> b_new # array([[ 0.35001693, 0.55333559, 0.09664748], # [ 0.14235731, 0.44259786, 0.41504483]])
coef_fin = COEFS_FIN[c] coef_fin3 = COEFS_FIN3[c] Ptran, States = hmm.calculate_matrix(all_Ptran[c], all_States[c], N_PROCESSES[c]) for jb, b in enumerate(BATCHES): if b == 70 or CATEGORIES[jb]!=c+1: continue end_b = BATCHES[jb+1] if b!=65 else 70 sig = cleaned_signal[100_000*b:100_000*end_b] nstates = Ptran.shape[0] for k in range(len(sig) // 100_000): sub_sig = sig[100_000*k:(k+1)*100_000] Psig = hmm.get_Psig(sub_sig, States, kexp) alpha0, etat0 = hmm.forward(Psig, Ptran, normalize=False) alpha1, etat1 = hmm.forward(Psig[::-1], np.transpose(Ptran), etat_in=etat0[::-1], coef=coefback) alpha2, etat2 = hmm.forward(Psig, Ptran, etat_in=etat1[::-1], coef=coeffor) alpha3 = etat1[::-1]*etat2*Psig**kexpp for j, alp in enumerate(alpha3): alpha3[j] /= alp.sum() pred = coef_fin*(alpha1[::-1]) + (1-coef_fin-coef_fin3)*alpha2 + coef_fin3*alpha3 full_pred[(b + k)*100_000:(b + k + 1)*100_000] = pred @ States Ys = full_pred.copy() Yopt, Thres = processing.optimize_thres_unsupervised(Ys.copy(),
cov_bis0 = 2.36 * np.eye(2) cov_bis1 = 1.65 * np.eye(2) cov_bis2 = 6.57 * np.eye(2) cov_bis3 = 3.18 * np.eye(2) covariances_init_bis = np.asarray([cov_bis0, cov_bis1, cov_bis2, cov_bis3]) states = [0, 1, 2, 3] start_proba_init = np.ones(4)/4 transition_proba_init = np.asarray([[1/2, 1/6, 1/6, 1/6], [1/6, 1/2, 1/6, 1/6], [1/6, 1/6, 1/2, 1/6], [1/6, 1/6, 1/6, 1/2]]) # 2 alpha_scaled, scale_alpha = hmm.forward(data_test, states, start_proba_init, transition_proba_init, means_init, covariances_init) beta_scaled = hmm.backward(data_test, states, transition_proba_init, means_init, covariances_init, scale_alpha) gamma = hmm.gammas(data_test, states, alpha_scaled, beta_scaled, scale_alpha) for i in states: y = np.zeros(100) for t in range(100): y[t] = gamma[t][i] plt.figure() plt.plot(y) plt.title("State %i" % (i+1))
# HMM #################### (a,b,pi) = datasets.getHMMData() hmm.viterbi(array([0,1,1,2]), a, b, pi) #array([0, 0, 0, 1]) hmm.viterbi(array([0,2,1,2]), a, b, pi) #array([0, 1, 1, 1]) ###WU 8 # example 1 hmm.viterbi(array([0,1,1,1]), a, b, pi) # 0 0 0 0 hmm.viterbi(array([0,1,2,1]), a, b, pi) # 0 0 1 1 al = hmm.forward(array([0,1,1,2]), a, b, pi) be = hmm.backward(array([0,1,1,2]), a, b, pi) hmm.sanityCheck(al,be) ########## # parameter re-estimation al = hmm.forward(array([0,1,1,2]), a, b, pi) be = hmm.backward(array([0,1,1,2]), a, b, pi) (a_new, b_new, pi_new) = hmm.reestimate(array([0,1,1,2]), al, be, a, b, pi) # >>> a_new # array([[ 0.53662942, 0.46337058], # [ 0.39886289, 0.60113711]]) # >>> b_new # array([[ 0.35001693, 0.55333559, 0.09664748], # [ 0.14235731, 0.44259786, 0.41504483]])