def test_dishonest_casino_larger_transition_p(self): '''Dishonest Casino Example.''' # Create transition probability matrix A = np.array([[0.9, 0.1], [0.1, 0.9]]) # Create observable probability distribution matrix. Casino biased toward "6" in state "1" B = statutil.scale_row_sums(np.array([[ 1.0, 1.0, 1.0, 1.0, 1.0, 1.0 ], [ 1.0, 1.0, 1.0, 1.0, 1.0, 5.0 ]])) # Create set of all observable symbols V = [1, 2, 3, 4, 5, 6] # Instantiate an HMM, note Pi is uniform probability distribution by default m = hmm.HMM(2, A=A, B=B, V=V) Obs = [ 1, 2, 3, 4, 5, 2, 1, 6, 6, 6, 5, 6 ] log_prob_Obs, Alpha, c = hmm.forward(m, Obs, scaling=1) assert_almost_equal(log_prob_Obs, -20.124, decimal=3, err_msg='Wrong observation probability') Q_star, _, _ = hmm.viterbi(m, Obs, scaling=1) assert_equal(Q_star, [0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1], err_msg='Wrong Viterbi path') Beta = hmm.backward(m, Obs, c) Gamma, Q_star = hmm.individually_optimal_states(Alpha, Beta) assert_almost_equal(Gamma, [[0.8189770516168013, 0.8482906260695058, 0.8525027084764197, 0.8329611652077556, 0.7834127024175411, 0.6880018120129073, 0.5161970090643716, 0.2130207566284025, 0.12024202874950358, 0.10797060639721641, 0.15902649827833876, 0.14930464162738483], [0.18102294838319855, 0.15170937393049422, 0.14749729152358024, 0.16703883479224435, 0.21658729758245884, 0.31199818798709256, 0.4838029909356284, 0.7869792433715975, 0.8797579712504964, 0.8920293936027837, 0.8409735017216613, 0.8506953583726152]], decimal=5, err_msg='Wrong state probabilities') assert_equal(Q_star, [0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1], 'Wrong individually-optimal states')
def test_forward_backward(self): pid = self.acmod.mdef.phone_id('SIL') h1 = hmm.HMM(self.acmod.mdef.pid2sseq(pid), self.acmod.tmat[self.acmod.mdef.pid2tmat(pid)]) mfcc = s2mfc.open(os.path.join(self.testdir, 'man.ah.111a.mfc')).getall() mfcc -= mfcc.mean(0) feat = _1s_c_d_dd.compute(mfcc) alpha = None self.alpha = [] for f in feat[0:50]: senscr = self.acmod.senone_compute(h1.iter_senones(), f) alpha = hmm.forward_evaluate(h1, senscr, alpha) self.alpha.append(alpha) beta = None self.beta = [] for f in feat[50:0:-1]: # Note that this is time-shifted by # one from the forward pass above senscr = self.acmod.senone_compute(h1.iter_senones(), f) beta = hmm.backward_evaluate(h1, senscr, beta) self.beta.append(beta) self.beta.reverse() ll = 0 for a, b in zip(self.alpha, self.beta): newll = sum(a * b) if ll != 0: self.assert_(abs(log(ll) - log(newll)) < 0.1) ll = newll
def main(args): if(len(args) != 2): print "Error. main.py needs two arguments" print "Example: python main.py sequences.fasta initial_parameters.txt" exit() s = [1,2,3,4] stateMapper = {1:0.32, 2:1.75, 3:4.54, 4:9.40} pParser = parser.pparser() parameters = pParser.parse_Parameters(args[1]) p = parameters[0] a = parameters[1] e = parameters[2] q = ['I', 'D'] x = util.compareSequences(args[0]) markovModel = hmm.HMM(False,s, q, a, e, p) newModel = algorithms.baum_welch_log(markovModel, [x][:], 10) fileHandler.outputEstimatedParameters(newModel, 'estimated_parameters.txt') likelihoods = [algorithms.forward_log(markovModel, x),algorithms.forward_log(newModel, x)] fileHandler.outputLikelihoods(likelihoods, 'likelihoods.txt') decodings_initial = algorithms.decodings(markovModel, x[:]) fileHandler.outputDecodings(decodings_initial, 'decodings_initial.txt') decodings_estimated = algorithms.decodings(newModel, x[:]) fileHandler.outputDecodings(decodings_estimated, 'decodings_estimated.txt')
def test_dishonest_casino(self): '''Dishonest Casino Example.''' # Create transition probability matrix A = np.array([[0.99, 0.01], [0.01, 0.99]]) # Create observable probability distribution matrix. Casino biased toward "6" in state "1". B = statutil.scale_row_sums(np.array([[ 1.0, 1.0, 1.0, 1.0, 1.0, 1.0 ], [ 1.0, 1.0, 1.0, 1.0, 1.0, 5.0 ]])) # Create set of all observable symbols V = [1, 2, 3, 4, 5, 6] # Instantiate an HMM, note Pi is uniform probability distribution by default m = hmm.HMM(2, A=A, B=B, V=V) Obs = [ 1, 2, 3, 4, 5, 2, 1, 6, 6, 6, 5, 6 ] log_prob_Obs, Alpha, c = hmm.forward(m, Obs, scaling=1) assert_almost_equal(log_prob_Obs, -20.9468006, decimal=5, err_msg='Wrong observation probability') Q_star, _, _ = hmm.viterbi(m, Obs, scaling=1) assert_equal(Q_star, [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], 'Wrong Viterbi path') Beta = hmm.backward(m, Obs, c) Gamma, Q_star = hmm.individually_optimal_states(Alpha, Beta) assert_almost_equal(Gamma, [[0.63711364302936, 0.6348934929050587, 0.6271179131667495, 0.6117100305977996, 0.5845543683193845, 0.5383975935172204, 0.46091113744414974, 0.3313982095474306, 0.28864618346708165, 0.27562909135388625, 0.27498372625848855, 0.26932891011973825], [0.36288635697064003, 0.3651065070949412, 0.3728820868332506, 0.38828996940220045, 0.4154456316806155, 0.4616024064827796, 0.5390888625558502, 0.6686017904525694, 0.7113538165329184, 0.7243709086461138, 0.7250162737415115, 0.7306710898802617]], decimal=5, err_msg='Wrong state probabilities') assert_equal(Q_star, [0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1], 'Wrong individually-optimal states')
def setUp(self): # 状态 self.states = ('健康', '感冒') # 观测状态 self.observations = ('正常', '发冷', '发烧') self.start_probability = {'健康': 0.6, '感冒': 0.4} self.transition_probability = { '健康': { '健康': 0.7, '感冒': 0.3 }, '感冒': { '健康': 0.4, '感冒': 0.6 }, } self.emission_probability = { '健康': { '正常': 0.5, '发冷': 0.4, '发烧': 0.1 }, '感冒': { '正常': 0.1, '发冷': 0.3, '发烧': 0.6 }, } self.states_label_index, self.states_index_label = generate_index_map( self.states) self.observations_label_index, self.observations_index_label = generate_index_map( self.observations) print("states_label_index", self.states_label_index) print("states_index_label", self.states_index_label) print("observations_label_index", self.observations_label_index) print("observations_index_label", self.observations_index_label) self.A = convert_map_to_matrix(self.transition_probability, self.states_label_index, self.states_label_index) print("A", self.A) self.B = convert_map_to_matrix(self.emission_probability, self.states_label_index, self.observations_label_index) print("B", self.B) self.pi = convert_map_to_vector(self.start_probability, self.states_label_index) print("Pi", self.pi) self.hmm = hmm.HMM(self.A, self.B, self.pi)
def init_model(self): ''' initializes self.model with parameters self.n_obs_states, self.n_markov_states, self.ini_markov_state, self.ini_trans_matrix and self.ini_b ''' self.model = hmm.HMM(n_states=self.n_markov_states, \ Pi=self.ini_markov_state, V=np.arange(self.n_obs_states), \ A=self.ini_trans_matrix, B=self.ini_b )
def main(): ocGrid = createGrid(100) dCube = createGrid(10) dcube = [[[1 for x in range(10)] for x in range(10)] for x in range(10)] #shiftCube(ocGrid,dCube,5) pi = np.array([0.5, 0.5]) # initial distribution a = np.array([[0.5, 0.5], [0.5, 0.5]]) # State transition matrix b = np.array([[0.2, 0.4, 0.4], [0.7, 0.2, 0.1]]) # Observation matrix obs = np.array([0, 1, 0, 1, 0, 1, 2, 0, 1, 0, 1, 0, 1, 2, 0, 1, 0]) hdmm = hmm.HMM(a, b, pi) hdmm.train(obs, 0.1)
def main(): args = init_argparse().parse_args() dictionary = read_dictionary(args.dict) phonemes = read_phonemes(args.phonemes) model = hmm.HMM(phonemes, dictionary) model.build_network() m = htk.readhtk(args.input) for d in m: model.step(d) model.print_result(args.frames)
def __init__(self): self.count = 0 self.buffer = np.zeros(50 * 36) # MFCC(12) + Delta1(12) + Delta2(12) self.c_buffer = np.zeros((5, 12)) # MFCC self.d_buffer = np.zeros((5, 12)) # Delta1 self.melbuffer = np.zeros((3, 160)) self.HMM = hmm.HMM() self.filterbank = self.mel() self.prediction_buffer = [] self.space_buffer = np.zeros(20) - 1 self.command = 5 # Stop
def fit(self, text): tagset, tag_index = text.get_tagset() self._model = hmm.HMM(tagset, tag_index) transition_probs = text.calculate_transition_matrix() self._model.set_all_transitions(transition_probs) for tag in tagset: emission = text.count_emission(tag) self._model.set_emission(tag, emission) initials = text.calculate_initial_probability() self._model.set_initial(initials)
def test_create_model(self): '''Based on Mike's DC example.''' # Transition probabilities A = np.array([ [.5, .5], [.5, .5]]) # Emission probabilities B = np.array([ [ 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, ], \ [ 1.0 , 1.0 , 1.0 , 1.0 , 1.0 , 1.0 / 2 ] ]) # Symbols V = [1, 2, 3, 4, 5, 6] # Model m = hmm.HMM(2, A=A, B=B, V=V) TestHmm.assert_model_matrices_almost_equal(m, (A, B, [0.5, 0.5]))
def kfold_cross_validate(directory, k): print 'Beginning k-fold cross validation...' subset_list = breakup_training(directory, k) results = [[] for i in xrange(10)] # outer array = each model, inner array = results per iteration # loop through each subset list, run training + validation for i in xrange( len(subset_list) ): # split the training docs into training + validation validation_set = set( subset_list[i] ) remaining = subset_list[:i] + subset_list[i + 1:] train_set = set( [index for subset in remaining for index in subset] ) # no resampling hmm_model_0 = hmm.HMM(directory, train_set, smooth_trans=True, smooth_emiss=True, resample=False) # smooth both hmm_model_1 = hmm.HMM(directory, train_set, smooth_trans=False, smooth_emiss=True, resample=False) # smooth emission only hmm_model_2 = hmm.HMM(directory, train_set, smooth_trans=True, smooth_emiss=False, resample=False) # smooth transition only hmm_model_3 = hmm.HMM(directory, train_set, smooth_trans=False, smooth_emiss=False, resample=False) # no smoothing results[0].append( cross_validate_hmm(directory, hmm_model_0, validation_set) ) results[1].append( cross_validate_hmm(directory, hmm_model_1, validation_set) ) results[2].append( cross_validate_hmm(directory, hmm_model_2, validation_set) ) results[3].append( cross_validate_hmm(directory, hmm_model_3, validation_set) ) # with resampling hmm_model_4 = hmm.HMM(directory, train_set, smooth_trans=True, smooth_emiss=True, resample=True) # smooth both hmm_model_5 = hmm.HMM(directory, train_set, smooth_trans=False, smooth_emiss=True, resample=True) # smooth emission only hmm_model_6 = hmm.HMM(directory, train_set, smooth_trans=True, smooth_emiss=False, resample=True) # smooth transition only hmm_model_7 = hmm.HMM(directory, train_set, smooth_trans=False, smooth_emiss=False, resample=True) # no smoothing results[4].append( cross_validate_hmm(directory, hmm_model_4, validation_set) ) results[5].append( cross_validate_hmm(directory, hmm_model_5, validation_set) ) results[6].append( cross_validate_hmm(directory, hmm_model_6, validation_set) ) results[7].append( cross_validate_hmm(directory, hmm_model_7, validation_set) ) # baseline with and without resampling baseline_1 = baseline.Baseline(directory, train_set, resample=False) baseline_2 = baseline.Baseline(directory, train_set, resample=True) results[8].append( cross_validate_baseline(directory, baseline_1, validation_set) ) results[9].append( cross_validate_baseline(directory, baseline_2, validation_set) ) # status update print str((float(i + 1) / k) * 100) + '% complete' # return the avg results tuple for each model that we train/test across all k-fold cross-validation rounds return [get_avg_results(model_results, k) for model_results in results]
def loadmodel(K, modelversion=2): with open('experiments/data/hmm_k_{}.pkl'.format(K), 'rb') as f: d = pickle.load(f) if modelversion == 1: # If the model requires logprobs d['transition_matrix'] = np.log(d['transition_matrix']) d['start_prob'] = np.log(d['start_prob']) return hmm1.HMM(d['num_states'], d['transition_matrix'], d['start_prob'], d['means'], d['stds']) elif modelversion == 2: return hmm2.HMM(d['num_states'], d['transition_matrix'], d['start_prob'], d['means'], d['stds'])
def build_hmm(self, model, init, srange, Nrange, times, nop = 129): """Building the object hmm given parameters""" self.method_name = model + '-' + init self.hmm = hmm.HMM(times = times, model = model, init = init, h = np.array([0.5]), s = srange, N = Nrange, u = np.array([0]), v = np.array([0]), nop = nop)
def testBaumWelchTrain(self): # run a baum_welch_train observations_data, states_data = self.hmm.simulate(100) print('observations_data', observations_data) print('states_data', states_data) guess = hmm.HMM(np.array([[0.5, 0.5], [0.5, 0.5]]), np.array([[0.3, 0.3, 0.3], [0.3, 0.3, 0.3]]), np.array([0.5, 0.5])) guess.baum_welch_train(observations_data) states_out = guess.state_path(observations_data)[1] p = 0.0 for s in states_data: if next(states_out) == s: p += 1 print(p / len(states_data))
class PredictHMM: def __init__(self): def predict(self, seq): N = 25 M = 19 T = len(seq) temp = [i for i in seq] trms = np.load('resources/models/hmm_model/'+str(T)+'_a.npy') emis = np.load('resources/models/hmm_model/'+str(T)+'_b.npy') pri = np.load('resources/models/hmm_model/'+str(T)+'_pi.npy') model = hmm.HMM(N, M, T, transmission=trms, emission=emis, prior=pri) res = dpf.predict_next_state(model, temp, T) return res
def ch3Ensemble(V0=-65, V1=20, tau01=2., tau12=4., Vchar01=1., Vchar12=1., Vhalf01=-20., Vhalf12=-25, nchannels=5): H = hmm.ch3hmm(V0=V0, V1=V1, tau01=tau01, tau12=tau12, Vhalf01=Vhalf01, Vhalf12=Vhalf12, Vchar01=Vchar01, Vchar12=Vchar12) E = Ensemble(H, nchannels) M = hmm.HMM(E.pstates, E.output, E.Q) return M
def test_train_model(self): '''Dishonest Casino Example - EM algorithm.''' # Create transition probability matrix A = np.array([[0.99, 0.01], [0.01, 0.99]]) # Create observable probability distribution matrix. Casino biased toward "6" in state "1". B = statutil.scale_row_sums(np.array([[ 1.0, 1.0, 1.0, 1.0, 1.0, 1.0 ], [ 1.0, 1.0, 1.0, 1.0, 1.0, 5.0 ]])) # Create set of all observable symbols V = [1, 2, 3, 4, 5, 6] # Instantiate an HMM, note Pi is uniform probability distribution by default m = hmm.HMM(2, A=A, B=B, V=V) Obs = [ 1, 2, 3, 4, 5, 2, 1, 6, 6, 6, 5, 6 ] c = [Obs] hmm.baum_welch(m, c, epochs=15, graph=False) TestHmm.assert_model_matrices_almost_equal(m, ([[0.856658708052639, 0.14334129194736125], [2.454940916925095e-16, 1.0]], [[0.28329354031233306, 0.2866825838637413, 0.14334129194736112, 0.14334129194736112, 0.14334129192821368, 9.896623857864685e-13], [0.004706380704415612, 4.3023359620169447e-11, 3.2510873580469717e-111, 1.2201233032249015e-54, 0.19905872387205914, 0.7962348953805019]], [1.0, 4.364785210913299e-122]))
def simulate(membership, TM, TM0, TI0, Z, T1, s0, rho, _actions=None): # natural (true) transition TMn = membership[0, 0] * TM[0] + membership[0, 1] * TM[1] + membership[ 0, 2] * TM[2] TIn = util.interaction_effect(TMn, rho) # t=1...T1 actions = [] observations = [] s = s0 # warm up loop if T1 > 0: for t in range(T1): if _actions is not None: a = _actions[t] else: a = np.random.binomial(1, 0.3, 1)[0] actions.append(a) # print s0, TMn[s0] s = np.random.choice(3, 1, p=TMn[s])[0] # assumes 3 states # print expandZ(Z[s],s) o = np.random.choice(3, 1, p=Z[s])[0] observations.append(o) # print t,a,s,o hmm = hmm.HMM() hmm.pi = np.array([0.5, 0.3, 0.2]) # ASSUMPTION hmm.A = np.array([TM0, TI0]) hmm.B = np.copy(Z) hmm.train(observations, actions, 0.01) T_hat = hmm.A Z_hat = hmm.B else: o = np.random.choice(3, 1, p=Z[s])[0] T_hat = np.array([TMn, TIn]) Z_hat = Z b = Z[:, o] b = b / b.sum() # initialize belief # personalize T, Z return TMn, TIn, actions, observations, s, b, T_hat, Z_hat
def setUp(self): # From https://github.com/phvu/misc/blob/master/viterbi/test1.py # this test is partly taken from cuHMM (https://code.google.com/p/chmm/) pi = np.array([[0.04, 0.02, 0.06, 0.04, 0.11, 0.11, 0.01, 0.09, 0.03, 0.05, 0.06, 0.11, 0.05, 0.11, 0.03, 0.08]]).T trans = np.array([ \ [0.08, 0.02, 0.10, 0.05, 0.07, 0.08, 0.07, 0.04, 0.08, 0.10, 0.07, 0.02, 0.01, 0.10, 0.09, 0.01], \ [0.06, 0.10, 0.11, 0.01, 0.04, 0.11, 0.04, 0.07, 0.08, 0.10, 0.08, 0.02, 0.09, 0.05, 0.02, 0.02], \ [0.08, 0.07, 0.08, 0.07, 0.01, 0.03, 0.10, 0.02, 0.07, 0.03, 0.06, 0.08, 0.03, 0.10, 0.10, 0.08], \ [0.08, 0.04, 0.04, 0.05, 0.07, 0.08, 0.01, 0.08, 0.10, 0.07, 0.11, 0.01, 0.05, 0.04, 0.11, 0.06], \ [0.03, 0.03, 0.08, 0.10, 0.11, 0.04, 0.06, 0.03, 0.03, 0.08, 0.03, 0.07, 0.10, 0.11, 0.07, 0.03], \ [0.02, 0.05, 0.01, 0.09, 0.05, 0.09, 0.05, 0.12, 0.09, 0.07, 0.01, 0.07, 0.05, 0.05, 0.11, 0.06], \ [0.11, 0.05, 0.10, 0.07, 0.01, 0.08, 0.05, 0.03, 0.03, 0.10, 0.01, 0.10, 0.08, 0.09, 0.07, 0.02], \ [0.03, 0.02, 0.16, 0.01, 0.05, 0.01, 0.14, 0.14, 0.02, 0.05, 0.01, 0.09, 0.07, 0.14, 0.03, 0.01], \ [0.01, 0.09, 0.13, 0.01, 0.02, 0.04, 0.05, 0.03, 0.10, 0.05, 0.06, 0.06, 0.11, 0.06, 0.03, 0.14], \ [0.09, 0.03, 0.04, 0.05, 0.04, 0.03, 0.12, 0.04, 0.07, 0.02, 0.07, 0.10, 0.11, 0.03, 0.06, 0.09], \ [0.09, 0.04, 0.06, 0.06, 0.05, 0.07, 0.05, 0.01, 0.05, 0.10, 0.04, 0.08, 0.05, 0.08, 0.08, 0.10], \ [0.07, 0.06, 0.01, 0.07, 0.06, 0.09, 0.01, 0.06, 0.07, 0.07, 0.08, 0.06, 0.01, 0.11, 0.09, 0.05], \ [0.03, 0.04, 0.06, 0.06, 0.06, 0.05, 0.02, 0.10, 0.11, 0.07, 0.09, 0.05, 0.05, 0.05, 0.11, 0.08], \ [0.04, 0.03, 0.04, 0.09, 0.10, 0.09, 0.08, 0.06, 0.04, 0.07, 0.09, 0.02, 0.05, 0.08, 0.04, 0.09], \ [0.05, 0.07, 0.02, 0.08, 0.06, 0.08, 0.05, 0.05, 0.07, 0.06, 0.10, 0.07, 0.03, 0.05, 0.06, 0.10], \ [0.11, 0.03, 0.02, 0.11, 0.11, 0.01, 0.02, 0.08, 0.05, 0.08, 0.11, 0.03, 0.02, 0.10, 0.01, 0.11]]) obs = np.array([[0.01,0.99], \ [0.58,0.42], \ [0.48,0.52], \ [0.58,0.42], \ [0.37,0.63], \ [0.33,0.67], \ [0.51,0.49], \ [0.28,0.72], \ [0.35,0.65], \ [0.61,0.39], \ [0.97,0.03], \ [0.87,0.13], \ [0.46,0.54], \ [0.55,0.45], \ [0.23,0.77], \ [0.76,0.24]]) self.d = hmm.HMM(pi, trans, obs)
def build_hmm_from_feature_matrices(self, feature_matrices, nstates, max_iterations=200, convergence_threshold=0.001, show_plots=False): self.__a = np.full((nstates, feature_matrices[0].shape[1]), self.__log_zero) self.__b = np.full(self.__a.shape, self.__log_zero) self.__g = np.full(self.__a.shape, self.__log_zero) self.__iteration = 0 if show_plots: self.__create_plots() self.__animation = animation.FuncAnimation(self.__fig, self.__update_plots, interval=1000, blit=False, repeat=False) result = Queue.Queue() training_thread = Thread(target=self.__train_hmm, args=[ feature_matrices, nstates, result, max_iterations, convergence_threshold ]) training_thread.start() if show_plots: plt.show() training_thread.join() new_hmm = hmm.HMM() new_hmm.initialize_from_hmm_parameters(result.get()) return new_hmm
def main(): logging.basicConfig(stream=sys.stdout, level=logging.DEBUG if DEBUG else logging.INFO) #hmm.test_hmm() transition_probs = [ [0.7, 0.3], [0.4, 0.6] ] emission_probs = [[0.4, 0.2, 0.3, 0.1], [0.2, 0.4, 0.1, 0.3]] initial_probs = [0.6, 0.4] state_labels = ['S1', 'S2'] emission_labels = ['a', 'c', 'g', 't'] model = hmm.HMM(initial_probs, transition_probs, emission_probs, state_labels, emission_labels) emission_seq_labels = [c for c in 'accgta'] emission_idx_list = model._get_emission_idx_seq_from_label_seq(emission_seq_labels) print("O/p prob", model.calc_prob_output_sequence(emission_seq_labels)) print(model.get_likelihood(5, 'S1', emission_seq_labels)) assert hmm.isclose(model.get_likelihood(5, 'S1', emission_seq_labels), model.alpha_t_helper(5, 0, emission_idx_list)/model.calc_prob_output_sequence(emission_seq_labels)) print(model.get_likelihood(5, 'S2', emission_seq_labels)) print(model.get_likelihood(3, 'S1', emission_seq_labels)) print(model.get_likelihood(3, 'S2', emission_seq_labels)) #print(model.alpha_t_helper(5, 1, emission_idx_list)) pretty_print_header("Viterbi algorith on ACCGTA to get most likely sequence of states:") print(model.get_most_likely_state_seq_from_labels(emission_seq_labels)) """
observations_label_index, observations_index_label = generate_index_map( observations) # {'cold': 1, 'dizzy': 2, 'normal': 0} A = convert_map_to_matrix(transition_probability, states_label_index, states_label_index) print A B = convert_map_to_matrix(emission_probability, states_label_index, observations_label_index) print B observations_index = convert_observations_to_index( observations, observations_label_index) Pi = convert_map_to_vector(start_probability, states_label_index) print Pi h = hmm.HMM(A, B, Pi) V, p = h.viterbi(observations_index) print " " * 7, " ".join( ("%10s" % observations_index_label[i]) for i in observations_index) for s in range(0, 2): print "%7s: " % states_index_label[s] + " ".join("%10s" % ("%f" % v) for v in V[s]) print '\nThe most possible states and probability are:' p, ss = h.state_path(observations_index) for s in ss: print states_index_label[s], print p # run a baum_welch_train observations_data, states_data = h.simulate(100) # print observations_data
import hmm as HiddenMarkov import gc import utils as utls import sys datasetFile = "dataset.txt" outFile = "out.txt" testDataSize = 200 datasetFile = sys.argv[1] outFile = sys.argv[2] print("initializing hmm...") hiddenMarkovModel = HiddenMarkov.HMM(datasetFile) print("Correcting the sentences...") results = list() data = hiddenMarkovModel.errorFullDataSet[:testDataSize] dataLength = len(data) for i in range(dataLength): temp = hiddenMarkovModel.viterbi(data[i]) results.append(temp) if not (i % 100): gc.collect() #evaluation correctEstimatedWordCount = 0 wrongTypedWordCount = 0 for i in range(dataLength): counts = utls.evaluateSentence(data[i], results[i]) correctEstimatedWordCount = correctEstimatedWordCount + counts[1]
def main(): model = hmm.HMM() print "-------------Preliminary setup----------------" if True: existingFile = 'models/two_english' newFile = "two_english_test" model.load(existingFile) model.dump(newFile) eq = compareFiles(newFile + ".emit", existingFile + ".emit", True) if eq: eq2 = compareFiles(newFile + ".trans", existingFile + ".trans", True) if eq and eq2: print "HMM read/write works correctly" else: print "HMM read/write failed!" sys.exit(-1) print "-------------Forward Algorithm----------------" if True: model.load("models/partofspeech.browntags.trained") obsfilebase = "data/ambiguous_sents" corpus = observations.load_observations(obsfilebase + ".obs") outputfile = obsfilebase + '.forwardprob' o2 = [] with open(outputfile, 'w') as o: for observation in corpus: res = model.forward(observation) if res is not None: o2.append(res[2]['VERB']) o.write(str(model.forward_probability(observation)) + '\n') refo2 = [ 0.0, 0.0, 0.0, 3.653679756807993e-11, 0.0, 0.0, 4.312565970191802e-12, 3.654779278846958e-11, 1.6086166116798018e-07, 0.0, 0.0 ] for i in range(len(refo2)): if len(o2) <= i: print "Error: Nothing returned from Forward Algorithm!" elif abs(o2[i] - refo2[i]) > 1e-14: print "Error in Forward Algorithm: Probability of Verb at t=2 should be " + str( refo2[i]) + " not " + str(o2[i]) eq = compareFiles(outputfile, "gold/ambiguous_sents.prob") if eq: print "Forward Algorithm passed basic sanity check" else: print "Error in Overall Forward Probability" print "-------------Viterbi Algorithm----------------" if True: model.load("models/partofspeech.browntags.trained") obsfilebase = "data/ambiguous_sents" corpus = observations.load_observations(obsfilebase + ".obs") outputfile = obsfilebase + '.tagged.obs' with codecs.open(outputfile, 'w', 'utf8') as o: for observation in corpus: stateseq = model.viterbi(observation) if stateseq is None: continue observation.stateseq = stateseq # adds most likely states as # 'tags' on observation o.write(str(observation)) eq = compareFiles(outputfile, "gold/ambiguous_sents.tagged.obs") if eq: print "Viterbi Completed Successfully" else: print "Error in Viterbi" print "-------------Backwards Algorithm----------------" if True: model.load("models/partofspeech.browntags.trained") obsfilebase = "data/ambiguous_sents" corpus = observations.load_observations(obsfilebase + ".obs") outputfile = obsfilebase + '.backwardprob' o2 = [] with open(outputfile, 'w') as o: for observation in corpus: res = model.backward(observation) if res is not None: o2.append(res[2]['VERB']) o.write(str(model.backward_probability(observation)) + '\n') refo2 = [ 2.3589871535491068e-07, 1.8514313765823803e-13, 2.140512612882977e-06, 2.0333825508441356e-06, 4.339252852607301e-10, 1.4033802247403003e-09, 1.4162117145319527e-08, 5.011761202650785e-06, 2.0776974177243364e-09, 5.391970636677047e-07, 2.147210857790581e-07 ] for i in range(len(refo2)): if len(o2) <= i: print "Error: Nothing returned from Backward Algorithm!" elif abs(o2[i] - refo2[i]) > 1e-14: print "Error in Backward Algorithm: Probability of Verb at t=2 should be " + str( refo2[i]) + " not " + str(o2[i]) eq = compareFiles(outputfile, "gold/ambiguous_sents.prob") if eq: print "Backward Algorithm passed basic sanity check" else: print "Error in Overall Backward Probability" print "------------------EM--------------------" if True: modelbase = "models/two_english" model.load(modelbase) obsfilename = "english_words" obsfilebase = "data/" + obsfilename corpus = observations.load_observations(obsfilebase + ".obs") log_likelihood = model.learn_unsupervised(corpus) #write the trained model ref_likelihood = -105954.94191 # -152860.669251 in base 2 if log_likelihood is None or abs(log_likelihood - ref_likelihood) > 0.05: print "Error: likelihood should be " + str(ref_likelihood) + \ " but is " + str(log_likelihood) finalprefix = modelbase + '.' + obsfilename + '.trained' model.dump(finalprefix) goldprefix = "gold/two_english.english_words.trained" learnedModel = hmm.HMM() learnedModel.load(finalprefix) refModel = hmm.HMM() refModel.load(goldprefix) eq = learnedModel.isEqual(refModel, 1e-13) if eq: print "EM implemented correctly!" else: print "Error in EM"
def test_create(self): pid = self.acmod.mdef.phone_id('OW_four', 'F_four', 'R_four') h1 = hmm.HMM(self.acmod.mdef.pid2sseq(pid), self.acmod.tmat[self.acmod.mdef.pid2tmat(pid)]) h2 = self.factory.create('OW_four', 'F_four', 'R_four') self.assertEquals(h1[0], h2[0])
# -*- coding:utf-8 -*- # Filename: test_weather.py # Author:hankcs # Date: 2016-08-06 PM6:04 import numpy as np import hmm import random A = np.array([[0.5, 0.5], [0.5, 0.5]]) B = np.array([[0.16, 0.16, 0.16, 0.16, 0.16, 0.16], [0.16, 0.16, 0.16, 0.16, 0.16, 0.16]]) pi = np.array([0.5, 0.5]) h = hmm.HMM(A, B, pi) # print observations_data # print states_data for i in range(100): size = 100 observations_data = np.empty([size], dtype=int) for j in range(size): rand = random.randint(1, 100) if rand <= 10: observations_data[j] = 0 elif rand <= 20: observations_data[j] = 1 elif rand <= 30: observations_data[j] = 2 elif rand <= 40: observations_data[j] = 3 elif rand <= 50:
def test_create(self): h1 = hmm.HMM(self.acmod.mdef.pid2sseq(352), self.acmod.tmat[self.acmod.mdef.pid2tmat(352)])
return m A = convert_map_to_matrix(transition_probability, states_label_index, states_label_index) print(A) B = convert_map_to_matrix(emission_probability, states_label_index, observations_label_index) print(B) observations_index = convert_observations_to_index(observations, observations_label_index) print(observations_index) pi = convert_map_to_vector(start_probability, states_label_index) print(pi) h = hmm.HMM(A, B, pi) V, p = h.viterbi(observations_index) print( " " * 7, " ".join( ("%10s" % observations_index_label[i]) for i in observations_index)) for s in range(0, 2): print("%7s: " % states_index_label[s] + " ".join("%10s" % ("%f" % v) for v in V[s])) print('\nThe most possible states and probability are:') p, ss = h.state_path(observations_index) for s in ss: print(states_index_label[s], ) print(p) # run a baum_welch_train observations_data, states_data = h.simulate(10)
def main(): model = hmm.HMM() print("-------------Preliminary setup----------------") if True: existingFile = 'models/two_english' newFile = "two_english_test" model.load(existingFile) model.dump(newFile) eq = compareFiles(newFile + ".emit", existingFile + ".emit", True) if eq: eq2 = compareFiles(newFile + ".trans", existingFile + ".trans", True) if eq and eq2: print("HMM read/write works correctly") else: print("HMM read/write failed!") sys.exit(-1) print("-------------Forward Algorithm----------------") if True: model.load("models/encoding.message.trained") obsfilebase = "data/message_short" corpus = observations.load_observations(obsfilebase + ".obs") outputfile = obsfilebase + '.forwardprob' o2 = [] with open(outputfile, 'w') as o: for observation in corpus: res = model.forward(observation) if res is not None: o2.append(res[1]['e']) o.write(str(model.forward_probability(observation)) + '\n') #print(o2) refo2 = [ 2.281674056874541e-61, 8.258943021002516e-289, 4.463852239881595e-71, 7.902572774713472e-90, 2.8349675683293275e-292, 0.00021461328799181662, 2.8704406377717645e-145, 5.979495606734988e-294, 1.9000669411442752e-05, 2.02982042347432e-87, 2.116898876762055e-70, 0.0, 0.002086363130808398, 3.156654312658676e-293 ] for i in range(len(refo2)): if len(o2) <= i: print("Error: Nothing returned from Forward Algorithm!") elif abs(o2[i] - refo2[i]) > 1e-12: print( "Error in Forward Algorithm: Probability of e at t=1 should be " + str(refo2[i]) + " not " + str(o2[i])) eq = compareFiles(outputfile, "gold/message_short.forwardprob") if eq: print("Forward Algorithm passed basic sanity check") else: print("Error in Overall Forward Probability") print("-------------Supervised Learning----------------") if True: modelbase = "models/partofspeech" model.load(modelbase) obsfilebase = "data/browntags" corpus = observations.load_observations(obsfilebase + ".obs") model.learn_supervised(corpus) finalprefix = modelbase + '.student.trained' model.dump(finalprefix) goldprefix = "gold/partofspeech.browntags.trained" learnedModel = hmm.HMM() learnedModel.load(finalprefix) refModel = hmm.HMM() refModel.load(goldprefix) eq = learnedModel.isEqual(refModel, 1e-8) if eq: print("Supervised learning implemented correctly!") else: print("Error in supervisedlearning") print("-------------Viterbi Algorithm----------------") if True: model.load("models/encoding.message.trained") obsfilebase = "data/message" corpus = observations.load_observations(obsfilebase + ".obs") outputfile = obsfilebase + '.tagged.obs' with codecs.open(outputfile, 'w', 'utf8') as o: for observation in corpus: stateseq = model.viterbi(observation) if stateseq is None: continue observation.stateseq = stateseq # adds most likely states as # 'tags' on observation o.write(str(observation)) eq = compareFiles(outputfile, "gold/message.tagged.obs") if eq: print("Viterbi Completed Successfully") else: print("Error in Viterbi") print("-------------Backwards Algorithm----------------") if True: model.load("models/encoding.message.trained") obsfilebase = "data/message_short" corpus = observations.load_observations(obsfilebase + ".obs") outputfile = obsfilebase + '.backwardprob' o2 = [] with open(outputfile, 'w') as o: for observation in corpus: res = model.backward(observation) if res is not None: o2.append(res[1]['e']) o.write(str(model.backward_probability(observation)) + '\n') refo2 = [ 1.316154528121009e-06, 1.0, 1.0, 1.2842129295716715e-05, 1.0, 1.0, 0.0001629601877945561, 0.03761379913095424, 2.464196513136796e-06, 4.079841777243271e-12, 1.0, 4.2966181326644535e-08, 1.0, 1.0 ] for i in range(len(refo2)): if len(o2) <= i: print("Error: Nothing returned from Backward Algorithm!") elif abs(o2[i] - refo2[i]) > 1e-10: print( "Error in Backward Algorithm: Probability of e at t=1 should be " + str(refo2[i]) + " not " + str(o2[i])) eq = compareFiles(outputfile, "gold/message_short.backwardprob") if eq: print("Backward Algorithm passed basic sanity check") else: print("Error in Overall Backward Probability") print("------------------EM--------------------") if True: modelbase = "models/two_english" model.load(modelbase) obsfilename = "english_words" obsfilebase = "data/" + obsfilename corpus = observations.load_observations(obsfilebase + ".obs") log_likelihood = model.learn_unsupervised(corpus) #write the trained model ref_likelihood = -105954.94191 # -152860.669251 in base 2 if log_likelihood is None or abs(log_likelihood - ref_likelihood) > 0.05: print("Error: likelihood should be " + str(ref_likelihood) + \ " but is " + str(log_likelihood)) finalprefix = modelbase + '.' + obsfilename + '.trained' model.dump(finalprefix) goldprefix = "gold/two_english.english_words.trained" learnedModel = hmm.HMM() learnedModel.load(finalprefix) refModel = hmm.HMM() refModel.load(goldprefix) eq = learnedModel.isEqual(refModel, 1e-10) if eq: print("EM implemented correctly!") else: print("Error in EM")