def train_model(dataset, threshold): ### Set up ### states, outputs = dataset.read_file() num_states = dataset.xyToInt.ravel().shape[0] num_outputs = len(dataset.obsToInt.keys()) measure_p = np.zeros((num_outputs, num_states)) start_p = np.ones((num_states,1)) * 1.0/16.0 # make the matrix of transition probs trans_p = np.identity(num_states) trans_p *= INITIAL_STAY_PROB for each_loc in VALID_LOCATIONS: int_repres = int(dataset.xyToInt[each_loc[0] - 1,each_loc[1] - 1]) # distribute probs to neighbours neighbours = NEIGHBOURS[each_loc[0]][each_loc[1]] num_neighbours = float(len(neighbours)) # do measurement probs int_col_repres = int(dataset.obsToInt[ACTUAL_COLOURS[each_loc[0]][each_loc[1]]]) measure_p[:,int_repres] = TOTAL_CAMERA_ERR_PROB / 3.0 measure_p[int_col_repres,int_repres] = CAMERA_ACC_PROB for each_neighbour in neighbours: int_repres_neigh = int(dataset.xyToInt[each_neighbour[0] - 1 ,each_neighbour[1] - 1]) trans_p[int_repres, int_repres_neigh] += (1.0 - INITIAL_STAY_PROB) * (1.0/num_neighbours) ### Model training ### llikes = [] ll_old = 10e10 print "\nTRANSITION P\n", trans_p print "\nMEASURE P\n", measure_p print "\nSTART P\n", start_p asym_cnt = 0 for _ in range(N_ITER): model = HMM(num_states, num_outputs, outputs, trans_p, measure_p, start_p) ll = model.train() print "Log Likelihood is ", ll llikes.append(ll) trans_p = model.transition_p measure_p = model.measure_p start_p = model.start_p diff = abs(ll_old - ll) print "Difference is", diff if diff < threshold: if asym_cnt >= 5: print "Threshold change reached 5 times, stopping" break else: asym_cnt += 1 ll_old = ll return model, llikes
def test_2(): ''' Same problem as in test_1 but using normal noisy sensor, should still have the highest probability of being in (3,0) at the end but with other lower probabilities as well. ''' print('----------- Test 2: Noisy Sensor in Simple Robot Maze. -----------') robot_problem = RobotProblem('maze_straight.maz', deterministic_sensor=False) hmm = HMM(robot_problem) solution = hmm.reason([0, 2, 1, 3]) print(solution)
def toy(self): """ Set up the toy simulation """ self.tasklist = [] feats = self.get_feats_standard() hmm = HMM() self._set_params_toy(hmm) cmrf = CMRF(hmm) for taskid in range(self.ntimes): task = Task('sim'+STUDY+'_'+self.name+'_'+str(taskid),cmrf,\ feats) # Run Brute force to enumerate the frontier with benchmark(task.name + 'brute') as t: seq, energies = self.bruteforce(cmrf, feats) task.all_seq = seq task.all_seq_energy = energies task.brute_time = t.elapsed # Now run the toy simulation` with benchmark(task.name + 'pareto') as t: task.frontier,task.frontier_energy = \ pareto_frontier(cmrf,feats) if self.plot_all: task.plot_frontier() task.pareto_time = t.elapsed self.tasklist.append(task)
def program1(phones): path = r"C:\Users\Nicole Schwartz\Anaconda3\seniorProject\new\darpa-timit-acousticphonetic-continuous-speech\data\\" gmms = GMMs(phones, path) start = timeit.default_timer() gmms.train() stop = timeit.default_timer() elapsed = stop - start print("GMM training time: " + str(int(elapsed) / 60) + "m " + str(int(elapsed) % 60) + "s") start = timeit.default_timer() accuracyGMM = gmms.test() stop = timeit.default_timer() elapsed = stop - start print("GMM testing time: " + str(int(elapsed) / 60) + "m " + str(int(elapsed) % 60) + "s") print("Accuracy of GMMs alone= ", round(accuracyGMM * 100, 3)) hmm = HMM(phones, gmms.models, path) start = timeit.default_timer() hmm.train(400) stop = timeit.default_timer() elapsed = stop - start print("HMM training time: " + str(int(elapsed) / 60) + "m " + str(int(elapsed) % 60) + "s") start = timeit.default_timer() hmm.test() stop = timeit.default_timer() elapsed = stop - start print("HMM testing time: " + str(int(elapsed) / 60) + "m " + str(int(elapsed) % 60) + "s")
def featspacelen(self): """ Vary the feature space and the sequence length """ self.tasklist = [] featspace = self.kwdargs['featspace'] seqspace = 20 seqlen = self.kwdargs['seqlen'] dims = [(seqspace, featspace)] * seqlen # Repeat for all the tasks described for taskid in range(self.ntimes): hmm = HMM() self._set_params_generic(hmm, seqlen, dims) cmrf = CMRF(hmm) feats = self._gen_feats_generic(seqlen, featspace) task = Task('sim'+STUDY+'_'+self.name+'_'+\ str(seqlen)+'_'+str(featspace)+'_'+str(taskid),cmrf,feats) # Run Brute force to enumerate the frontier if self.kwdargs['run_brute']: with benchmark(task.name + 'brute') as t: seq, energies = self.bruteforce(cmrf, feats) task.all_seq = seq task.all_seq_energy = energies task.brute_time = t.elapsed # Now run the toy simulation` with benchmark(task.name + 'pareto') as t: task.frontier,task.frontier_energy = \ pareto_frontier(cmrf,feats) if self.plot_all: task.plot_frontier(frontier_only=True) task.pareto_time = t.elapsed self.tasklist.append(task)
def ziftied(self) : """ Set up the toy simulation """ self.tasklist = [] feats = self.kwdargs['feats'] weights = self.kwdargs['weights'] hmm = HMM() self._set_params_ziftied(hmm) #1/0 cmrf = CMRF(hmm) for taskid in range(self.ntimes) : task = Task('bio'+str(STUDY)+'_'+self.name+'_'+str(taskid),cmrf,\ feats) # Run Brute force to enumerate the frontier # with benchmark(task.name+'brute') as t: # seq,energies = self.bruteforce(cmrf,feats) # task.all_seq = seq # task.all_seq_energy = energies # task.brute_time = t.elapsed # Sample the frontier with benchmark(task.name+'sample') as t: seq,energies = self.sample(cmrf,feats) task.sample_seq = seq task.sample_seq_energy = energies task.sample_time = t.elapsed # Now run the toy simulation` with benchmark(task.name+'pareto') as t : task.frontier,task.frontier_energy = \ pareto_frontier(cmrf,feats) if self.plot_all : task.plot_frontier(frontier_only = True,plot_samples=True) task.pareto_time = t.elapsed self.tasklist.append(task)
def randfeatsuntied(self): """ Run many iterations of toy with random probs """ self.tasklist = [] feats = self.get_feats_standard() # Repeat for all the tasks described for taskid in range(self.ntimes): hmm = HMM() self._set_params_randprobsuntied(hmm) cmrf = CMRF(hmm) feats = self._gen_feats_random() task = Task('sim'+STUDY+'_'+self.name+'_'+str(taskid),cmrf,\ feats) # Run Brute force to enumerate the frontier with benchmark(task.name + 'brute') as t: seq, energies = self.bruteforce(cmrf, feats) task.all_seq = seq task.all_seq_energy = energies task.brute_time = t.elapsed # Now run the toy simulation` with benchmark(task.name + 'pareto') as t: task.frontier,task.frontier_energy = \ pareto_frontier(cmrf,feats) if self.plot_all: task.plot_frontier() task.pareto_time = t.elapsed self.tasklist.append(task)
def main(): pref_path = os.getcwd() + "/classification_data_HWK2/EMGaussian" train_data = np.loadtxt(open(pref_path + ".data", "rb"), delimiter=" ") test_data = np.loadtxt(open(pref_path + ".test", "rb"), delimiter=" ") Xtrain = train_data[:, :2] Xtest = test_data[:, :2] models = {"GMM": GMM(isotropic=False), "HMM": HMM()} K = 4 #number of clusters for name in ["GMM", "HMM"]: print(name) model = models[name] model.fit(Xtrain, K, eps=pow(10, -2)) # visualize clusters and frontiers model.plot_clusters(Xtrain, "figs/" + name + " on train", save=True) model.plot_clusters(Xtest, "figs/" + name + " on test", save=True) print("") lik = model.compute_log_likelihood(Xtrain) print("mean log-likelihood on training set : ", lik / Xtrain.shape[0]) lik = model.compute_log_likelihood(Xtest) print("mean log-likelihood on test set : ", lik / Xtest.shape[0]) print("\n------------------------\n")
def train(params: Dict): """ build an asrmodel with the parameter in the json file and train it, than free the memory :param params: name of the file :return: """ assert "model_type" in params, "model_type is not specified" assert params["model_type"] in SUPPORTED_MODEL, \ "model_type not supported: {}, try with {}".format(params["model_type"], str(SUPPORTED_MODEL)) assert "trainset_id" in params, "trainset_id is not specified" trainset_path = join(TRAIN_PATH, params["trainset_id"]) if "set_model_name" in params: # specify a string to identify the model model_id = get_new_model_id(params["set_model_name"]) else: model_id = get_new_model_id(params["structure_id"]) if params["model_type"] == "CNN": asrmodel = CNN(join(MODEL_PATH, model_id), input_param=params) elif params["model_type"] == "HMM": asrmodel = HMM(join(MODEL_PATH, model_id)) else: # should never go here raise AssertionError("model_type not recognised: {} check {}".format(params["model_type"], SUPPORTED_MODEL)) asrmodel.train(trainset_path) asrmodel.save_model() del asrmodel # free memory return model_id
def task3(input_file): episodes = read_file(input_file) for i in range(10): print '\nEM run number', (i + 1) hmm = HMM(rand_init=True) hmm.baum_welch(episodes) print hmm
def test_0(): print('---------- Test 0: Umbrella World -------------') umbrella_problem = UmbrellaProblem() hmm = HMM(umbrella_problem) solution = hmm.forward_backward([int(obs) for obs in [True, True]]) print(solution) print('Forward Updates:') print(solution.updates) print('Backward Updates:') print(solution.updates_smoothed)
def multi_dim_observation(): initMatrix = np.matrix([[0.75], [0.25]]) transitionMatrix = np.matrix([[0.99, 0.01], [0.03, 0.97]]) markovChain = MarkovChain(initMatrix, transitionMatrix) g1 = GaussD(mean=np.matrix([[0], [0]]), cov=np.matrix([[2, 1], [1, 4]])) g2 = GaussD(mean=np.matrix([[3], [3]]), cov=np.matrix([[2, 1], [1, 4]])) h = HMM(markovChain, np.matrix([[g1], [g2]])) [X, S] = h.rand(h, 100) return (X, S)
def finite_duration(): initMatrix = np.matrix([[0.75], [0.25]]) transitionMatrix = np.matrix([[0.4, 0.4, 0.2], [0.1, 0.6, 0.3]]) markovChain = MarkovChain(initMatrix, transitionMatrix) g1 = GaussD(mean=np.matrix([0]), stdev=np.matrix([1])) g2 = GaussD(mean=np.matrix([3]), stdev=np.matrix([2])) h = HMM(markovChain, np.matrix([[g1], [g2]])) [X, S] = h.rand(h, 100) return (X, S)
def __init__(self, entry='train'): self.data_map_path = os.path.join('models', 'HMM_data.pkl') self.model_config_path = os.path.join('models', 'HMM_config.yml') self.model_param_path = os.path.join('models', 'HMM_model_params.pkl') self.load_config( ) # self.embedding_dim, self.hidden_dim, self.batch_size, self.drop_out, self.tags if entry == 'train': self.train_manager = DataManager(data_type='train', tags=self.tags, model_name='HMM') data_map = { "word_to_ix_size": self.train_manager.word_to_ix_size, # word_to_ix的长度,初始化HMM模型 "tag_to_ix_size": self.train_manager.tag_to_ix_size, # tag_to_ix的长度,初始化HMM模型 "word_to_ix": self.train_manager.word_to_ix, "tag_to_ix": self.train_manager.tag_to_ix, "ix_to_word": self.train_manager.ix_to_word, "ix_to_tag": self.train_manager.ix_to_tag, } self.save_data_map(data_map) self.dev_manager = DataManager(data_type='dev', data_map_path=self.data_map_path, model_name='HMM') self.model = HMM( hidden_state_num=self.train_manager.tag_to_ix_size, observable_state_num=self.train_manager.word_to_ix_size) self.save_model() # self.restore_model() elif entry == 'test': self.train_manager = DataManager(tags=self.tags, data_type='train', model_name='HMM') self.dev_manager = DataManager(data_type='dev', data_map_path=self.data_map_path, model_name='HMM') self.model = HMM( hidden_state_num=self.train_manager.tag_to_ix_size, observable_state_num=self.train_manager.word_to_ix_size) self.restore_model()
def test_1(): ''' Straight 4x1 maze test with deterministic sensor. Given the evidence RED, GREEN, BLUE, YELLOW we should know exactly where we are since there is no other sequence to yeild that evidence other than starting at (0,0) and traveling east, east, east. ''' print('---------- Test 1: Deterministic Simple Robot Maze-------------') robot_problem = RobotProblem('maze_straight.maz', deterministic_sensor=True) hmm = HMM(robot_problem) solution = hmm.reason([0, 2, 1, 3]) print(solution)
def test_HMM(maze, start_loc, step_num): step = 0 sensor_reading = [] location = [start_loc] print("step: " + str(step) + "\n") print("current location: " + str(start_loc) + "\n") print(maze) hmm = HMM(maze, sensor_reading, location) f = hmm.filter() print_result(maze, f) while step < step_num: step = step + 1 move = random.choice([(1, 0), (-1, 0), (0, 1), (0, -1)]) print("move: " + str(move) + "\n") new_loc = (location[step - 1][0] + move[0], location[step - 1][1] + move[1]) if maze.is_floor(new_loc[0], new_loc[1]): location.append(new_loc) else: location.append(location[step - 1]) all_color = ["r", "g", "y", "b"] color = maze.color_at(location[step][0], location[step][1]) all_color.remove(color) if random.random() > 0.88: color = random.choice(all_color) sensor_reading.append(color) hmm = HMM(maze, sensor_reading, location) f = hmm.filter() print("step: " + str(step) + "\n") print("current location: " + str(location[step]) + "\n") print("sensor reading: " + str(color) + "\n") print(maze) print_result(maze, f)
def test_1(self): pi = np.array([0.2, 0.4, 0.4]) print(pi) A = np.array([[0.5, 0.2, 0.3], [0.3, 0.5, 0.2], [0.2, 0.3, 0.5]]) B = np.array([[0.5, 0.5], [0.4, 0.6], [0.7, 0.3]]) S = ['1', '2', '3'] V = ['1', '2'] hmm = HMM(pi, A, B, S, V) observation = np.array(['1', '2', '1']) res = hmm.evaluation(observation) print(res) self.assertAlmostEqual(res, 0.130218)
def test_evalution_assignment(self): pi = np.array([0.3, 0.7]) print(pi) A = np.array([[0.1, 0.9], [0.8, 0.2]]) B = np.array([[0.7, 0.1, 0.2], [0.3, 0.5, 0.2]]) S = ['吃', '睡'] V = ["哭", "没精神", "找妈妈"] hmm = HMM(pi, A, B, S, V) observation = np.array(['哭', '没精神', '找妈妈']) res = hmm.evaluation(observation) print(res) self.assertAlmostEqual(res, 0.026880000000000005)
def test_decode_assignment(self): pi = np.array([0.3, 0.7]) print(pi) A = np.array([[0.1, 0.9], [0.8, 0.2]]) B = np.array([[0.7, 0.1, 0.2], [0.3, 0.5, 0.2]]) S = ['吃', '睡'] V = ["哭", "没精神", "找妈妈"] hmm = HMM(pi, A, B, S, V) observation = np.array(['哭', '没精神', '找妈妈']) res = hmm.decode(observation) print(res) self.assertEqual(res, ['吃', '睡', '吃'])
def trainModel(): end = TRAIN_NUM if ENABLE_RATE: end = int(TRAIN_NUM * TRAIN_NUM_RATE) dataSet = brown.tagged_words(tagset='universal')[:end] dataSet = [[d[0].lower(), d[1]] for d in dataSet] hmm = HMM(args=dataSet) paras = hmm.output_to_viterbi() # cache model fo = open(MODEL_PATH, 'wb') with fo: pickle.dump(paras, fo) return paras
def task1(input_file): episodes, state_visit_count = read_input_file(input_file) hmm = HMM() E = len(episodes) N = hmm.hidden_states V = hmm.visible_states # Compute initial probabilities hmm.initial = [0 for i in range(N)] for episode in episodes: hmm.initial[episode[0][0]] += 1.0 / E # Compute transition probabilities hmm.transition = [[0 for i in range(N)] for j in range(N)] norm = [0 for i in range(N)] for episode in episodes: for t in range(len(episode) - 1): state = episode[t][0] nextState = episode[t + 1][0] hmm.transition[nextState][state] += 1.0 norm[state] += 1 for nextState in range(N): for state in range(N): try: hmm.transition[nextState][state] /= norm[state] except ZeroDivisionError: continue # Compute emission probabilities hmm.emission = [[0 for i in range(N)] for j in range(V)] norm = [0 for i in range(N)] for episode in episodes: for timestep in episode: reward = timestep[1] state = timestep[0] hmm.emission[reward][state] += 1.0 norm[state] += 1 for reward in range(V): for state in range(N): try: hmm.emission[reward][state] /= norm[state] except ZeroDivisionError: continue print hmm return
def test_decode_ppt(self): """ """ pi = np.array([1, 0, 0]) print(pi) A = np.array([[0.4, 0.6, 0], [0, 0.8, 0.2], [0, 0, 1.0]]) B = np.array([[0.7, 0.3], [0.4, 0.6], [0.8, 0.2]]) S = ['1', '2', '3'] V = ["A", "B"] hmm = HMM(pi, A, B, S, V) observation = np.array(['A', 'B', 'A', 'B']) res = hmm.decode(observation) print(res) self.assertEqual(res, ['1', '2', '2', '2'])
def test_forward(): initMatrix = np.matrix([[1.0], [0]]) transitionMatrix = np.matrix([[0.9, 0.1, 0], [0, 0.9, 0.1]]) mc = MarkovChain(initMatrix, transitionMatrix) g1 = GaussD(mean=np.matrix([0]), stdev=np.matrix([1])) g2 = GaussD(mean=np.matrix([3]), stdev=np.matrix([2])) # output sequence x = np.matrix([-0.2, 2.6, 1.3]) pX, logS = g1.prob(np.matrix([g1, g2]), x) alphaHat, c = mc.forward(mc, pX) print 'alphaHat:', alphaHat, 'expected: [1 0.3847 0.4189; 0 0.6153 0.5811]' print 'c:', c, 'expected: [1 0.1625 0.8266 0.0581]' h = HMM(mc, np.matrix([[g1], [g2]])) # logP = P(X|h) logP = h.logprob(h, x) print 'logP: ', logP, 'expected: -9.1877' initMatrix = np.matrix([[1.0], [0]]) transitionMatrix = np.matrix([[0.0, 1.0, 0.0], [0.0, 0.7, 0.3]]) x = np.matrix([-0.2, 2.6, 1.3]) mc = MarkovChain(initMatrix, transitionMatrix) g1 = GaussD(mean=np.matrix([0]), stdev=np.matrix([1])) g2 = GaussD(mean=np.matrix([3]), stdev=np.matrix([1])) h1 = HMM(mc, np.matrix([[g1], [g2]])) transitionMatrix = np.matrix([[0.5, 0.5, 0.0], [0.0, 0.5, 0.5]]) mc2 = MarkovChain(initMatrix, transitionMatrix) g3 = GaussD(mean=np.matrix([0]), stdev=np.matrix([1])) g4 = GaussD(mean=np.matrix([3]), stdev=np.matrix([1])) h2 = HMM(mc2, np.matrix([[g3], [g4]])) logP = h1.logprob(np.matrix([h1, h2]), x) print 'logP:', logP, 'expected: [-5.562463348 -6.345037882]'
def makeLeftRightHMM(self, nStates, pD, obsData, lData=None): if nStates <= 0: print 'Number of states must be > 0' if lData is None: lData = obsData.shape[1] D = np.mean(lData) D = D / nStates mc = self.initLeftRightMC(nStates, D) hmm = HMM(mc, pD) hmm = hmm.init(hmm, obsData, lData) hmm, logprobs = hmm.train(hmm, obsData, lData, 5, np.log(1.01)) return hmm
def test_decode_weather(self): """ dataset source: https://www.cnblogs.com/Denise-hzf/p/6612212.html """ pi = np.array([0.63, 0.17, 0.20]) print(pi) A = np.array([[0.5, 0.375, 0.125], [0.25, 0.125, 0.652], [0.25, 0.375, 0.375]]) B = np.array([[0.6, 0.2, 0.15, 0.05], [0.25, 0.25, 0.25, 0.25], [0.05, 0.10, 0.35, 0.5]]) S = ['Sunny', 'Cloudy', 'Rainy'] V = ["Dry", "Dryish", "Damp", "Soggy"] hmm = HMM(pi, A, B, S, V) observation = np.array(['Dry', 'Damp', 'Soggy']) res = hmm.decode(observation) print(res)
def test_3(): print( '------------ Test 3: 4x4 Colored Maze with Noisy Sensor. ----------------' ) path = [(0, 0), (0, 1), (0, 2), (0, 3), (1, 3), (2, 3), (3, 3), (3, 2), (3, 1), (3, 0), (2, 0), (1, 0)] robot_problem = RobotProblem('maze1.maz', deterministic_sensor=False) ground_truth = robot_problem.get_ground_truth(path) print('Path: ', ' -> '.join(['(%s, %s)' % (state[0], state[1]) for state in path])) print('Ground Truth: ', ' -> '.join([robot_problem.color_map[i] for i in ground_truth])) hmm = HMM(robot_problem) solution = hmm.forward_backward(ground_truth) print(solution) print('-------- Path Animation ----------') robot_problem.animate_path(path, solution)
def train(motionname, motion_obs_seq): """ :param motionname: [str] :param motion_obs_seq: list of all observation sequences under the choice of motion :return: """ totlen=0 for obs in motion_obs_seq: totlen=totlen+len(obs) avglen=totlen/len(motion_obs_seq) # initialize A,B,pi=init(avglen) obs_train = motion_obs_seq hmmmodel = HMM(A, B, pi, N, M) print("\n\nStart to train "+str(motionname)+" model!") # EM max_epoch = 200 #TODO tolerance=0.000005 epoch = 0 tot=0 tot_prev=-math.inf while epoch <= max_epoch: #and tot-tot_prev>=tolerance: if epoch>0: tot_prev=tot hmmmodel.update(obs_train) counter = 0 tot = 0 for obs in obs: counter = counter + 1 ll = hmmmodel.get_prob(obs) #print('obs NO. ' + str(counter)+'loglikelihood: ' + str(ll)) tot = tot + ll #total likelihood print('epoch' + str(epoch)+' total loglikelihood ' + str(tot)) epoch = epoch + 1 # save model hmmmodel.save(motionname) '''
def trainGestureModel(): #if want to pre-process data #beat3Obs, beat4Obs, circleObs, eightObs, infObs, waveObs = preprocessTrainingData() #number of hidden states N n_states = 10 #number of observation types M n_obs = 30 #instantiate variables pi = (1.0 / n_states) * np.ones((n_states, 1)) #A and B matrix A = np.random.rand(n_states,n_states) A = A / A.sum(axis=1)[:, None] B = np.random.rand(n_obs,n_states) B = B / B.sum(axis=1)[:, None] #Get the probability of observations gestureNames = np.array(['beat3','beat4','circle','eight','inf','wave'],dtype='object') HMMModels = np.empty((6,7),dtype='object') #iterate through the list of gestures for gesture in range(0,gestureNames.shape[0]): gestureName = gestureNames[gesture] #load the data for the type of gesture observationDataFileName = "".join((gestureName,"Obs.pickle")) with open(observationDataFileName, 'rb') as handle: observationSequences = pickle.load(handle) #Generate the trained HMM model for the correct gesture for j in range(0,len(observationSequences)): hmmModelOfGesture = HMM(n_states, n_obs, pi, A, B) observationSequence = observationSequences[j] hmmModelOfGesture.baum_welch(observationSequence, max_iter=3) #Add the model to the list of models HMMModels[gesture,j] = hmmModelOfGesture with open('HMMModels.pickle', 'wb') as handle: pickle.dump(HMMModels, handle, protocol=pickle.HIGHEST_PROTOCOL) return HMMModels
def ncsa_model(): """ Model described in hw5 :return: Transition, Emission and Expected Matrices """ A = np.array([[0.25, 0.75, 0.00], [0.00, 0.25, 0.75], [0.00, 0.00, 1.00]]) B = np.array([[1.0, 0.0], [0.0, 1.0], [1.0, 0.0]]) expected = np.array([[1.0, 0.0, 0.0], [0.0, 1.0, 0.0], [0.0, 0.0, 1.0]]) seq = ['PS', 'SI', 'PS'] viterbi_path = ['NA', 'AP', 'AC'] model = HMM(A, B, states=['NA', 'AP', 'AC'], emissions=['PS', 'SI']) return model, seq, expected, viterbi_path
def learn_hmm(self, seqlist): """ Learns hmm from seqlist""" hmm = HMM() hmm.length = self.length hmm.dims = [(2, 1)] * hmm.length # (latent,emit) dimspace hmm.emit = [[[1.0], [1.0]]] * hmm.length hmm.seqmap = [{'a': 0, 'b': 1}] * hmm.length hmm.seqmap2 = [{0: 'a', 1: 'b'}] * hmm.length hmm.featmap = [{'H': 0}] * hmm.length hmm.initprob = [0.5, 0.5] hmm.trained = True hmm.alphabet = 'ab' # Calculate HMM transition probabilities hmm.trans = [[[0.7, 0.3], [0.3, 0.7]]] * hmm.length counts, counts2 = [], [] for i in range(len(seqlist[0])): counts.append({}) counts2.append({}) for i, seq in enumerate(seqlist): for j, aa in enumerate(seq): counts[j][aa] = counts[j].get(aa, 0) + self.k - i for i, seq in enumerate(seqlist): for j, aa in enumerate(seq[:-1]): counts2[j][seq[j:j + 2]] = counts2[j].get(seq[j:j + 2], 0) + self.k - i hmm.trans = [] for i in range(len(seqlist[0]) - 1): hmm.trans.append([]) for j, aa1 in enumerate(hmm.alphabet): hmm.trans[-1].append([]) for k, aa2 in enumerate(hmm.alphabet): val = (counts2[i].get(aa1 + aa2, 0) + self.smoothfac) / ( counts[i].get(aa1, 0) + self.smoothfac * len(hmm.alphabet)) hmm.trans[-1][-1].append(val) return hmm