def train_model(dataset, threshold): ### Set up ### states, outputs = dataset.read_file() num_states = dataset.xyToInt.ravel().shape[0] num_outputs = len(dataset.obsToInt.keys()) measure_p = np.zeros((num_outputs, num_states)) start_p = np.ones((num_states,1)) * 1.0/16.0 # make the matrix of transition probs trans_p = np.identity(num_states) trans_p *= INITIAL_STAY_PROB for each_loc in VALID_LOCATIONS: int_repres = int(dataset.xyToInt[each_loc[0] - 1,each_loc[1] - 1]) # distribute probs to neighbours neighbours = NEIGHBOURS[each_loc[0]][each_loc[1]] num_neighbours = float(len(neighbours)) # do measurement probs int_col_repres = int(dataset.obsToInt[ACTUAL_COLOURS[each_loc[0]][each_loc[1]]]) measure_p[:,int_repres] = TOTAL_CAMERA_ERR_PROB / 3.0 measure_p[int_col_repres,int_repres] = CAMERA_ACC_PROB for each_neighbour in neighbours: int_repres_neigh = int(dataset.xyToInt[each_neighbour[0] - 1 ,each_neighbour[1] - 1]) trans_p[int_repres, int_repres_neigh] += (1.0 - INITIAL_STAY_PROB) * (1.0/num_neighbours) ### Model training ### llikes = [] ll_old = 10e10 print "\nTRANSITION P\n", trans_p print "\nMEASURE P\n", measure_p print "\nSTART P\n", start_p asym_cnt = 0 for _ in range(N_ITER): model = HMM(num_states, num_outputs, outputs, trans_p, measure_p, start_p) ll = model.train() print "Log Likelihood is ", ll llikes.append(ll) trans_p = model.transition_p measure_p = model.measure_p start_p = model.start_p diff = abs(ll_old - ll) print "Difference is", diff if diff < threshold: if asym_cnt >= 5: print "Threshold change reached 5 times, stopping" break else: asym_cnt += 1 ll_old = ll return model, llikes
def test_2(): ''' Same problem as in test_1 but using normal noisy sensor, should still have the highest probability of being in (3,0) at the end but with other lower probabilities as well. ''' print('----------- Test 2: Noisy Sensor in Simple Robot Maze. -----------') robot_problem = RobotProblem('maze_straight.maz', deterministic_sensor=False) hmm = HMM(robot_problem) solution = hmm.reason([0, 2, 1, 3]) print(solution)
def task3(input_file): episodes = read_file(input_file) for i in range(10): print '\nEM run number', (i + 1) hmm = HMM(rand_init=True) hmm.baum_welch(episodes) print hmm
def multi_dim_observation(): initMatrix = np.matrix([[0.75], [0.25]]) transitionMatrix = np.matrix([[0.99, 0.01], [0.03, 0.97]]) markovChain = MarkovChain(initMatrix, transitionMatrix) g1 = GaussD(mean=np.matrix([[0], [0]]), cov=np.matrix([[2, 1], [1, 4]])) g2 = GaussD(mean=np.matrix([[3], [3]]), cov=np.matrix([[2, 1], [1, 4]])) h = HMM(markovChain, np.matrix([[g1], [g2]])) [X, S] = h.rand(h, 100) return (X, S)
def test_0(): print('---------- Test 0: Umbrella World -------------') umbrella_problem = UmbrellaProblem() hmm = HMM(umbrella_problem) solution = hmm.forward_backward([int(obs) for obs in [True, True]]) print(solution) print('Forward Updates:') print(solution.updates) print('Backward Updates:') print(solution.updates_smoothed)
def finite_duration(): initMatrix = np.matrix([[0.75], [0.25]]) transitionMatrix = np.matrix([[0.4, 0.4, 0.2], [0.1, 0.6, 0.3]]) markovChain = MarkovChain(initMatrix, transitionMatrix) g1 = GaussD(mean=np.matrix([0]), stdev=np.matrix([1])) g2 = GaussD(mean=np.matrix([3]), stdev=np.matrix([2])) h = HMM(markovChain, np.matrix([[g1], [g2]])) [X, S] = h.rand(h, 100) return (X, S)
def test_1(): ''' Straight 4x1 maze test with deterministic sensor. Given the evidence RED, GREEN, BLUE, YELLOW we should know exactly where we are since there is no other sequence to yeild that evidence other than starting at (0,0) and traveling east, east, east. ''' print('---------- Test 1: Deterministic Simple Robot Maze-------------') robot_problem = RobotProblem('maze_straight.maz', deterministic_sensor=True) hmm = HMM(robot_problem) solution = hmm.reason([0, 2, 1, 3]) print(solution)
def test_decode_assignment(self): pi = np.array([0.3, 0.7]) print(pi) A = np.array([[0.1, 0.9], [0.8, 0.2]]) B = np.array([[0.7, 0.1, 0.2], [0.3, 0.5, 0.2]]) S = ['吃', '睡'] V = ["哭", "没精神", "找妈妈"] hmm = HMM(pi, A, B, S, V) observation = np.array(['哭', '没精神', '找妈妈']) res = hmm.decode(observation) print(res) self.assertEqual(res, ['吃', '睡', '吃'])
def test_evalution_assignment(self): pi = np.array([0.3, 0.7]) print(pi) A = np.array([[0.1, 0.9], [0.8, 0.2]]) B = np.array([[0.7, 0.1, 0.2], [0.3, 0.5, 0.2]]) S = ['吃', '睡'] V = ["哭", "没精神", "找妈妈"] hmm = HMM(pi, A, B, S, V) observation = np.array(['哭', '没精神', '找妈妈']) res = hmm.evaluation(observation) print(res) self.assertAlmostEqual(res, 0.026880000000000005)
def test_1(self): pi = np.array([0.2, 0.4, 0.4]) print(pi) A = np.array([[0.5, 0.2, 0.3], [0.3, 0.5, 0.2], [0.2, 0.3, 0.5]]) B = np.array([[0.5, 0.5], [0.4, 0.6], [0.7, 0.3]]) S = ['1', '2', '3'] V = ['1', '2'] hmm = HMM(pi, A, B, S, V) observation = np.array(['1', '2', '1']) res = hmm.evaluation(observation) print(res) self.assertAlmostEqual(res, 0.130218)
def trainModel(): end = TRAIN_NUM if ENABLE_RATE: end = int(TRAIN_NUM * TRAIN_NUM_RATE) dataSet = brown.tagged_words(tagset='universal')[:end] dataSet = [[d[0].lower(), d[1]] for d in dataSet] hmm = HMM(args=dataSet) paras = hmm.output_to_viterbi() # cache model fo = open(MODEL_PATH, 'wb') with fo: pickle.dump(paras, fo) return paras
class ProseWriter: def __init__(self): self.hmm = HMM() def train(self, fileName): self.hmm.train(fileName) def clear_unknowns(self): del self.hmm.sts[UNKNOWN] del self.hmm.obs[UNKNOWN] del self.hmm.starts[UNKNOWN] del self.hmm.transitions[UNKNOWN] for t in self.hmm.transitions: del self.hmm.transitions[t][UNKNOWN] del self.hmm.emissions[UNKNOWN] for e in self.hmm.emissions: del self.hmm.emissions[e][UNKNOWN] self.hmm.startCount -= 1 def start_state(self): starts = self.hmm.starts count = random.randint(0, self.hmm.startCount - 1) for s in starts: if count == 0: return s count -= 1 def next_state(self, curState): transitions = self.hmm.transitions[curState] count = random.randint(0, self.hmm.sts[curState] - 1) for t in transitions: for c in range(transitions[t]): if count == 0: return t count -= 1 def next_word(self, nextState): emissions = self.hmm.emissions[nextState] count = random.randint(0, self.hmm.sts[nextState] - 1) for e in emissions: for c in range(emissions[e]): if count == 0: return e count -= 1 def write(self, count): state = None for i in range(count): while state == None: state = self.start_state() #if i != 0 and state != '.': print " ", print self.next_word(state), state = self.next_state(state) print ""
def task1(input_file): episodes, state_visit_count = read_input_file(input_file) hmm = HMM() E = len(episodes) N = hmm.hidden_states V = hmm.visible_states # Compute initial probabilities hmm.initial = [0 for i in range(N)] for episode in episodes: hmm.initial[episode[0][0]] += 1.0 / E # Compute transition probabilities hmm.transition = [[0 for i in range(N)] for j in range(N)] norm = [0 for i in range(N)] for episode in episodes: for t in range(len(episode) - 1): state = episode[t][0] nextState = episode[t + 1][0] hmm.transition[nextState][state] += 1.0 norm[state] += 1 for nextState in range(N): for state in range(N): try: hmm.transition[nextState][state] /= norm[state] except ZeroDivisionError: continue # Compute emission probabilities hmm.emission = [[0 for i in range(N)] for j in range(V)] norm = [0 for i in range(N)] for episode in episodes: for timestep in episode: reward = timestep[1] state = timestep[0] hmm.emission[reward][state] += 1.0 norm[state] += 1 for reward in range(V): for state in range(N): try: hmm.emission[reward][state] /= norm[state] except ZeroDivisionError: continue print hmm return
def test_decode_ppt(self): """ """ pi = np.array([1, 0, 0]) print(pi) A = np.array([[0.4, 0.6, 0], [0, 0.8, 0.2], [0, 0, 1.0]]) B = np.array([[0.7, 0.3], [0.4, 0.6], [0.8, 0.2]]) S = ['1', '2', '3'] V = ["A", "B"] hmm = HMM(pi, A, B, S, V) observation = np.array(['A', 'B', 'A', 'B']) res = hmm.decode(observation) print(res) self.assertEqual(res, ['1', '2', '2', '2'])
def makeLeftRightHMM(self, nStates, pD, obsData, lData=None): if nStates <= 0: print 'Number of states must be > 0' if lData is None: lData = obsData.shape[1] D = np.mean(lData) D = D / nStates mc = self.initLeftRightMC(nStates, D) hmm = HMM(mc, pD) hmm = hmm.init(hmm, obsData, lData) hmm, logprobs = hmm.train(hmm, obsData, lData, 5, np.log(1.01)) return hmm
def test_decode_weather(self): """ dataset source: https://www.cnblogs.com/Denise-hzf/p/6612212.html """ pi = np.array([0.63, 0.17, 0.20]) print(pi) A = np.array([[0.5, 0.375, 0.125], [0.25, 0.125, 0.652], [0.25, 0.375, 0.375]]) B = np.array([[0.6, 0.2, 0.15, 0.05], [0.25, 0.25, 0.25, 0.25], [0.05, 0.10, 0.35, 0.5]]) S = ['Sunny', 'Cloudy', 'Rainy'] V = ["Dry", "Dryish", "Damp", "Soggy"] hmm = HMM(pi, A, B, S, V) observation = np.array(['Dry', 'Damp', 'Soggy']) res = hmm.decode(observation) print(res)
def featspacelen(self): """ Vary the feature space and the sequence length """ self.tasklist = [] featspace = self.kwdargs['featspace'] seqspace = 20 seqlen = self.kwdargs['seqlen'] dims = [(seqspace, featspace)] * seqlen # Repeat for all the tasks described for taskid in range(self.ntimes): hmm = HMM() self._set_params_generic(hmm, seqlen, dims) cmrf = CMRF(hmm) feats = self._gen_feats_generic(seqlen, featspace) task = Task('sim'+STUDY+'_'+self.name+'_'+\ str(seqlen)+'_'+str(featspace)+'_'+str(taskid),cmrf,feats) # Run Brute force to enumerate the frontier if self.kwdargs['run_brute']: with benchmark(task.name + 'brute') as t: seq, energies = self.bruteforce(cmrf, feats) task.all_seq = seq task.all_seq_energy = energies task.brute_time = t.elapsed # Now run the toy simulation` with benchmark(task.name + 'pareto') as t: task.frontier,task.frontier_energy = \ pareto_frontier(cmrf,feats) if self.plot_all: task.plot_frontier(frontier_only=True) task.pareto_time = t.elapsed self.tasklist.append(task)
def toy(self): """ Set up the toy simulation """ self.tasklist = [] feats = self.get_feats_standard() hmm = HMM() self._set_params_toy(hmm) cmrf = CMRF(hmm) for taskid in range(self.ntimes): task = Task('sim'+STUDY+'_'+self.name+'_'+str(taskid),cmrf,\ feats) # Run Brute force to enumerate the frontier with benchmark(task.name + 'brute') as t: seq, energies = self.bruteforce(cmrf, feats) task.all_seq = seq task.all_seq_energy = energies task.brute_time = t.elapsed # Now run the toy simulation` with benchmark(task.name + 'pareto') as t: task.frontier,task.frontier_energy = \ pareto_frontier(cmrf,feats) if self.plot_all: task.plot_frontier() task.pareto_time = t.elapsed self.tasklist.append(task)
def main(): pref_path = os.getcwd() + "/classification_data_HWK2/EMGaussian" train_data = np.loadtxt(open(pref_path + ".data", "rb"), delimiter=" ") test_data = np.loadtxt(open(pref_path + ".test", "rb"), delimiter=" ") Xtrain = train_data[:, :2] Xtest = test_data[:, :2] models = {"GMM": GMM(isotropic=False), "HMM": HMM()} K = 4 #number of clusters for name in ["GMM", "HMM"]: print(name) model = models[name] model.fit(Xtrain, K, eps=pow(10, -2)) # visualize clusters and frontiers model.plot_clusters(Xtrain, "figs/" + name + " on train", save=True) model.plot_clusters(Xtest, "figs/" + name + " on test", save=True) print("") lik = model.compute_log_likelihood(Xtrain) print("mean log-likelihood on training set : ", lik / Xtrain.shape[0]) lik = model.compute_log_likelihood(Xtest) print("mean log-likelihood on test set : ", lik / Xtest.shape[0]) print("\n------------------------\n")
def train(params: Dict): """ build an asrmodel with the parameter in the json file and train it, than free the memory :param params: name of the file :return: """ assert "model_type" in params, "model_type is not specified" assert params["model_type"] in SUPPORTED_MODEL, \ "model_type not supported: {}, try with {}".format(params["model_type"], str(SUPPORTED_MODEL)) assert "trainset_id" in params, "trainset_id is not specified" trainset_path = join(TRAIN_PATH, params["trainset_id"]) if "set_model_name" in params: # specify a string to identify the model model_id = get_new_model_id(params["set_model_name"]) else: model_id = get_new_model_id(params["structure_id"]) if params["model_type"] == "CNN": asrmodel = CNN(join(MODEL_PATH, model_id), input_param=params) elif params["model_type"] == "HMM": asrmodel = HMM(join(MODEL_PATH, model_id)) else: # should never go here raise AssertionError("model_type not recognised: {} check {}".format(params["model_type"], SUPPORTED_MODEL)) asrmodel.train(trainset_path) asrmodel.save_model() del asrmodel # free memory return model_id
def ziftied(self) : """ Set up the toy simulation """ self.tasklist = [] feats = self.kwdargs['feats'] weights = self.kwdargs['weights'] hmm = HMM() self._set_params_ziftied(hmm) #1/0 cmrf = CMRF(hmm) for taskid in range(self.ntimes) : task = Task('bio'+str(STUDY)+'_'+self.name+'_'+str(taskid),cmrf,\ feats) # Run Brute force to enumerate the frontier # with benchmark(task.name+'brute') as t: # seq,energies = self.bruteforce(cmrf,feats) # task.all_seq = seq # task.all_seq_energy = energies # task.brute_time = t.elapsed # Sample the frontier with benchmark(task.name+'sample') as t: seq,energies = self.sample(cmrf,feats) task.sample_seq = seq task.sample_seq_energy = energies task.sample_time = t.elapsed # Now run the toy simulation` with benchmark(task.name+'pareto') as t : task.frontier,task.frontier_energy = \ pareto_frontier(cmrf,feats) if self.plot_all : task.plot_frontier(frontier_only = True,plot_samples=True) task.pareto_time = t.elapsed self.tasklist.append(task)
def randfeatsuntied(self): """ Run many iterations of toy with random probs """ self.tasklist = [] feats = self.get_feats_standard() # Repeat for all the tasks described for taskid in range(self.ntimes): hmm = HMM() self._set_params_randprobsuntied(hmm) cmrf = CMRF(hmm) feats = self._gen_feats_random() task = Task('sim'+STUDY+'_'+self.name+'_'+str(taskid),cmrf,\ feats) # Run Brute force to enumerate the frontier with benchmark(task.name + 'brute') as t: seq, energies = self.bruteforce(cmrf, feats) task.all_seq = seq task.all_seq_energy = energies task.brute_time = t.elapsed # Now run the toy simulation` with benchmark(task.name + 'pareto') as t: task.frontier,task.frontier_energy = \ pareto_frontier(cmrf,feats) if self.plot_all: task.plot_frontier() task.pareto_time = t.elapsed self.tasklist.append(task)
def test_3(): print( '------------ Test 3: 4x4 Colored Maze with Noisy Sensor. ----------------' ) path = [(0, 0), (0, 1), (0, 2), (0, 3), (1, 3), (2, 3), (3, 3), (3, 2), (3, 1), (3, 0), (2, 0), (1, 0)] robot_problem = RobotProblem('maze1.maz', deterministic_sensor=False) ground_truth = robot_problem.get_ground_truth(path) print('Path: ', ' -> '.join(['(%s, %s)' % (state[0], state[1]) for state in path])) print('Ground Truth: ', ' -> '.join([robot_problem.color_map[i] for i in ground_truth])) hmm = HMM(robot_problem) solution = hmm.forward_backward(ground_truth) print(solution) print('-------- Path Animation ----------') robot_problem.animate_path(path, solution)
def trainGestureModel(): #if want to pre-process data #beat3Obs, beat4Obs, circleObs, eightObs, infObs, waveObs = preprocessTrainingData() #number of hidden states N n_states = 10 #number of observation types M n_obs = 30 #instantiate variables pi = (1.0 / n_states) * np.ones((n_states, 1)) #A and B matrix A = np.random.rand(n_states,n_states) A = A / A.sum(axis=1)[:, None] B = np.random.rand(n_obs,n_states) B = B / B.sum(axis=1)[:, None] #Get the probability of observations gestureNames = np.array(['beat3','beat4','circle','eight','inf','wave'],dtype='object') HMMModels = np.empty((6,7),dtype='object') #iterate through the list of gestures for gesture in range(0,gestureNames.shape[0]): gestureName = gestureNames[gesture] #load the data for the type of gesture observationDataFileName = "".join((gestureName,"Obs.pickle")) with open(observationDataFileName, 'rb') as handle: observationSequences = pickle.load(handle) #Generate the trained HMM model for the correct gesture for j in range(0,len(observationSequences)): hmmModelOfGesture = HMM(n_states, n_obs, pi, A, B) observationSequence = observationSequences[j] hmmModelOfGesture.baum_welch(observationSequence, max_iter=3) #Add the model to the list of models HMMModels[gesture,j] = hmmModelOfGesture with open('HMMModels.pickle', 'wb') as handle: pickle.dump(HMMModels, handle, protocol=pickle.HIGHEST_PROTOCOL) return HMMModels
class main_window(): def __init__(self): X_train, Y_train, _, _ = get_train_test_data(split=0.01) self.hmm = HMM() self.hmm.train(X_train, Y_train) self.top = Tk() self.top.title('POS Tagging') self.top.geometry('500x350') self.process_btn = Button(self.top, text='Process', command=self.process_btn_click) self.process_btn.place(x=400, y=300) self.input_label = Label(self.top, text='Input: ') self.input_label.place(x=20, y=10) self.input_text = Text(self.top, width=65, height=7) self.input_text.place(x=20, y=35) self.result_label = Label(self.top, text='Result: ') self.result_label.place(x=20, y=150) self.result_text = Text(self.top, width=65, height=8) self.result_text.place(x=20, y=170) self.result_text.config(state=DISABLED) self.top.mainloop() def process_btn_click(self): content = self.input_text.get('1.0', END) content = content.replace('\n', '').replace(',', ' ,') if content.find('.') != -1: content = content.split('.')[:-1] result = [] for l in content: l = l.strip() + ' .' tags = self.hmm.viterbi_bigram(l) result += [c + '/' + t for c, t in zip(l.split(' '), tags)] else: tags = self.hmm.viterbi_bigram(content) result = [c + '/' + t for c, t in zip(content.split(' '), tags)] self.result_text.config(state=NORMAL) self.result_text.delete('1.0', END) self.result_text.insert('1.0', result) self.result_text.config(state=DISABLED)
def main (): estimator = HMM() # estimator = NB.learning_model labeled_symbols, labeled_sequence, codes, labels = estimator.get_data('I:\\eclipse_workspace\\CharacterRecognition\\teams_dataset') # data_x, data_y = NB.get_data() title = "Learning Curves (HMM)" # Cross validation with 100 iterations to get smoother mean test and train # score curves, each time with 20% data randomly selected as a validation set. # cv = cross_validation.ShuffleSplit(len(data_y), n_iter=50, # test_size=0.2, random_state=0) plot_learning_curve(estimator, title, X=(labeled_symbols, labeled_sequence, codes, labels), custom='yes') # plot_learning_curve(estimator, title, data_x, data_y, cv=cv) plt.show()
def __init__(self, entry='train'): self.data_map_path = os.path.join('models', 'HMM_data.pkl') self.model_config_path = os.path.join('models', 'HMM_config.yml') self.model_param_path = os.path.join('models', 'HMM_model_params.pkl') self.load_config( ) # self.embedding_dim, self.hidden_dim, self.batch_size, self.drop_out, self.tags if entry == 'train': self.train_manager = DataManager(data_type='train', tags=self.tags, model_name='HMM') data_map = { "word_to_ix_size": self.train_manager.word_to_ix_size, # word_to_ix的长度,初始化HMM模型 "tag_to_ix_size": self.train_manager.tag_to_ix_size, # tag_to_ix的长度,初始化HMM模型 "word_to_ix": self.train_manager.word_to_ix, "tag_to_ix": self.train_manager.tag_to_ix, "ix_to_word": self.train_manager.ix_to_word, "ix_to_tag": self.train_manager.ix_to_tag, } self.save_data_map(data_map) self.dev_manager = DataManager(data_type='dev', data_map_path=self.data_map_path, model_name='HMM') self.model = HMM( hidden_state_num=self.train_manager.tag_to_ix_size, observable_state_num=self.train_manager.word_to_ix_size) self.save_model() # self.restore_model() elif entry == 'test': self.train_manager = DataManager(tags=self.tags, data_type='train', model_name='HMM') self.dev_manager = DataManager(data_type='dev', data_map_path=self.data_map_path, model_name='HMM') self.model = HMM( hidden_state_num=self.train_manager.tag_to_ix_size, observable_state_num=self.train_manager.word_to_ix_size) self.restore_model()
def test(self, sent_set): """ Use a Hidden Markov Model to tag a set of sentences, and evaluate accuracy. :param sent_set: tuple like (untagged sentences, gold standard sentences) """ untagged_sents = sent_set[0] # recover untagged sentences gold_tagged_sents = sent_set[ 1] # recover gold standard tagged sentences # initialize an HMM object with necessary parameters self.hmm = HMM(untagged_sents, self.pos_tags, self.words_given_pos, \ self.words_given_pos_upper, self.pos2_given_pos1, Tagger.start_tag) # get HMM-tagged sentences hmm_tagged_sents = self.hmm.tag() # evaluate against gold standard and return accuracy data return self.evaluate(hmm_tagged_sents, gold_tagged_sents)
def program1(phones): path = r"C:\Users\Nicole Schwartz\Anaconda3\seniorProject\new\darpa-timit-acousticphonetic-continuous-speech\data\\" gmms = GMMs(phones, path) start = timeit.default_timer() gmms.train() stop = timeit.default_timer() elapsed = stop - start print("GMM training time: " + str(int(elapsed) / 60) + "m " + str(int(elapsed) % 60) + "s") start = timeit.default_timer() accuracyGMM = gmms.test() stop = timeit.default_timer() elapsed = stop - start print("GMM testing time: " + str(int(elapsed) / 60) + "m " + str(int(elapsed) % 60) + "s") print("Accuracy of GMMs alone= ", round(accuracyGMM * 100, 3)) hmm = HMM(phones, gmms.models, path) start = timeit.default_timer() hmm.train(400) stop = timeit.default_timer() elapsed = stop - start print("HMM training time: " + str(int(elapsed) / 60) + "m " + str(int(elapsed) % 60) + "s") start = timeit.default_timer() hmm.test() stop = timeit.default_timer() elapsed = stop - start print("HMM testing time: " + str(int(elapsed) / 60) + "m " + str(int(elapsed) % 60) + "s")
def __init__(self): X_train, Y_train, _, _ = get_train_test_data(split=0.01) self.hmm = HMM() self.hmm.train(X_train, Y_train) self.top = Tk() self.top.title('POS Tagging') self.top.geometry('500x350') self.process_btn = Button(self.top, text='Process', command=self.process_btn_click) self.process_btn.place(x=400, y=300) self.input_label = Label(self.top, text='Input: ') self.input_label.place(x=20, y=10) self.input_text = Text(self.top, width=65, height=7) self.input_text.place(x=20, y=35) self.result_label = Label(self.top, text='Result: ') self.result_label.place(x=20, y=150) self.result_text = Text(self.top, width=65, height=8) self.result_text.place(x=20, y=170) self.result_text.config(state=DISABLED) self.top.mainloop()
def initializeHMM(numStates=numStates, possibleObservations=numCats): print "Initializing HMM..." hmm = HMM(numStates=numStates, numCats=possibleObservations) hmm.pi = np.random.rand(numStates) hmm.pi /= sum(hmm.pi) hmm.A = np.random.rand(numStates, numStates) A_row_sums = hmm.A.sum(axis=1) hmm.A /= A_row_sums[:, np.newaxis] hmm.B = np.random.rand(numStates, numCats) B_row_sums = hmm.B.sum(axis=1) hmm.B /= B_row_sums[:, np.newaxis] print "Initial HMM stats" print "A: ", print hmm.A print "B: ", print hmm.B print "pi: ", print hmm.pi return hmm
def test(self, sent_set): """ Use a Hidden Markov Model to tag a set of sentences, and evaluate accuracy. :param sent_set: tuple like (untagged sentences, gold standard sentences) """ untagged_sents = sent_set[0] # recover untagged sentences gold_tagged_sents = sent_set[1] # recover gold standard tagged sentences # initialize an HMM object with necessary parameters self.hmm = HMM(untagged_sents, self.pos_tags, self.words_given_pos, \ self.words_given_pos_upper, self.pos2_given_pos1, Tagger.start_tag) # get HMM-tagged sentences hmm_tagged_sents = self.hmm.tag() # evaluate against gold standard and return accuracy data return self.evaluate(hmm_tagged_sents, gold_tagged_sents)
class Tagger: def __init__(self): self.hmm = HMM() self.tests = {} def train(self, fileName): self.hmm.train(fileName) def test(self, fileName): data = open(fileName).read().split() observations = [] partsofspeech = [] for i in range(len(data)): tag = data[i].split("_") observations.append(tag[WORD].lower()) partsofspeech.append(tag[POS]) self.tests[fileName] = {} self.tests[fileName]["text"] = observations self.tests[fileName]["pos_real"] = partsofspeech self.tests[fileName]["table"] = self.viterbi_table(observations) (self.tests[fileName]["prob"], self.tests[fileName]["pos_guess"]) = \ self.optimal_tags(self.tests[fileName]["table"]) self.tests[fileName]["accuracy"] = self.accuracy(fileName) def viterbi_table(self, observations): vt = [] states = self.hmm.states() #print str(observations) for index in range(0, len(observations)): vt.append({}) #print str(observations[t]) for st in states: emis_prob = self.hmm.emission_probability(st, observations[index]) # print "emis_prob " + str(st) + " " + str(observations[t]) + " = " + str(emis_prob) if index == 0: (max_prob, best_state) = (self.hmm.start_probability(st), None) else: (max_prob, best_state) = self.get_best_prob_and_state(index, st, vt) # print max_prob, best_state # exit() vt[index][st] = {"prob": max_prob + emis_prob, "prev": best_state} return vt def get_best_prob_and_state(self, index, st, vt): states = self.hmm.states() best_state = None max_prob = -float("inf") for prev_st in states: prevProb = vt[index - 1][prev_st]["prob"] transProb = self.hmm.transition_probability(prev_st, st) product = prevProb + transProb if max_prob < product: max_prob = product best_state = prev_st #print str((max_prob, best_state)) return (max_prob, best_state) def print_table(self, fileName): vt = self.tests[fileName]["table"] obs = self.tests[fileName]["text"] # Print a table of steps from dictionary print " ", for i in range(len(vt)): print str(obs[i].ljust(7)), print "" for state in vt[0]: print "%s:" % state, for v in vt: print "%.7s" % ("%f" % v[state]["prob"]), print "" def optimal_tags(self, vt): opt = [] # The highest probability max_prob = -float("inf") previous = None for st, value in vt[-1].items(): if value["prob"] > max_prob: max_prob = value["prob"] previous = st opt.append(previous) # Follow the backtrack till the first observation for t in range(len(vt) - 2, -1, -1): opt.insert(0, vt[t + 1][previous]["prev"]) previous = vt[t][previous]["prev"] return (max_prob, opt) def accuracy(self, fileName): acc = { "matrix" : {}, "errors" : 0 } sts = self.hmm.sts.keys() obs = self.tests[fileName]["text"] real = self.tests[fileName]["pos_real"] guess = self.tests[fileName]["pos_guess"] matches = 0 for i in range(len(sts)): acc["matrix"][sts[i]] = {} for j in range(len(sts)): acc["matrix"][sts[i]][sts[j]] = 0 for i in range(len(obs)): acc["matrix"][real[i]][guess[i]] += 1 if real[i] == guess[i]: matches += 1 acc["value"] = matches / float(len(obs)) return acc def output(self, fileName): opt = self.tests[fileName]["pos_guess"] max_prob = self.tests[fileName]["prob"] accuracy = self.tests[fileName]["accuracy"]["value"] print 'The steps of states are ' + ' '.join(opt) + \ ' with highest probability of E%s' % max_prob print fileName + ' tagged with %s accuracy' % accuracy def print_confusion_matrix(self, fileName): confusion_matrix = tagger.tests[testFile]["accuracy"]["matrix"] keys = confusion_matrix.keys() print " ", for key in keys: print str(key.rjust(5)), print "" for pos in confusion_matrix: print pos.ljust(5), for pos2 in confusion_matrix[pos]: print str(confusion_matrix[pos][pos2]).rjust(5), print "" def export_confusion_matrix_to_csv(self, fileName): confusion_matrix = tagger.tests[testFile]["accuracy"]["matrix"] keys = confusion_matrix.keys() result = "," for key in keys: result += key + "," result += "\n" for pos in confusion_matrix: result += pos + "," for pos2 in confusion_matrix[pos]: result += str(confusion_matrix[pos][pos2]) + "," result += "\n" f = open("result.csv", "w") f.write(result) f.close()
from HMM import HMM initProb=[0.2,0.4,0.4] transMatrix=[[0.5,0.2,0.3], [0.3,0.5,0.2], [0.2,0.3,0.5]] genMatrix=[[0.5,0.5], [0.4,0.6], [0.7,0.3]] hmm=HMM(initProb,transMatrix,genMatrix) observe=[0,1,0] prob1=hmm.forward_calc_prob(observe) prob2=hmm.backward_calc_prob(observe) path=hmm.viterbi_decoding(observe) print(prob1,prob2) print(path)
from HMM import HMM pi = [0.5, 0.5] pi_linha = [0.0, 1.0] A = [[0.5, 0.5], [0.5, 0.5]] B = [[0.8, 0.2], [0.3, 0.7]] hmm = HMM(A, B, pi) hmm_linha = HMM(A, B, pi_linha) observacoes = [0,0] print "O|M: ", hmm.probabilidade_observacoes(observacoes) print "O|M': ", hmm_linha.probabilidade_observacoes(observacoes) print "T = 1" estados = [0] observacoes = [0] numerador = hmm.probabilidade_estados_observacoes(estados, observacoes)
def main(): global leave_trace, list_traces, pomdp if r_visual_granularity > wall_thickness: print "PARAMETER ERROR: r_visual_granularity exceeds wall_thickness!" print "This can cause wall detection errors!" if r_init_x < wall_thickness or r_init_y < wall_thickness: print "PARAMETER ERROR: starting position overlaps wall!" print "Check r_init_x|y_topleft and wall_thickness" pygame.init() # also calls display.init() startTime = time.time() caption = sim_version + " \tmode: teleoperation " pygame.display.set_caption(caption + str(startTime)) r_sprite = load_image(r_image) g_sprite = load_image("goal.bmp") background = load_image(back_image) # prepare simulation objects clock = pygame.time.Clock() screen.blit(background, (0, 0)) goal = Goal(g_sprite) r = Robot( r_sprite, r_init_x, r_init_y, r_init_azi, r_init_fwd_speed, r_init_spin_speed, r_visual_range, r_visual_angle, goal, pomdp, ) robotSprite = pygame.sprite.Group(r) goalSprite = pygame.sprite.Group(goal) # display the environment once, right before event loop count = -1 for ob in list_obstacles: count = count + 1 s = pygame.display.get_surface() s.fill(ob.color, list_rect_obstacles[count]) r.draw_rays(screen) r.showGoalPath(screen) pygame.display.flip() # recording time going = True time_down = 0.0 time_elapsed = 0.0 T1 = -1 T2 = -1 # HMM initalize hmm = HMM() hmm.pi = np.array([0.5, 0.5]) hmm.A = np.array([[0.5, 0.5], [0.5, 0.5]]) hmm.B = np.array([[0.3, 0.7], [0.99, 0.01]]) while going: clock.tick(fps) # at most that many fps time_elapsed = 0.0 # Event loop################################ global user_inputF, user_inputR, totalUserInput, o, R, numTrials for event in pygame.event.get(): if event == QUIT: going = False elif event.type == KEYDOWN: if event.key == K_ESCAPE: going = False elif event.key == K_w: T1 = 1 r.direction = "w" time_down = pygame.time.get_ticks() elif event.key == K_d: T2 = 1 r.direction = "d" time_down = pygame.time.get_ticks() elif event.key == K_a: T2 = 1 r.direction = "a" time_down = pygame.time.get_ticks() elif event.key == K_s: T1 = 1 r.direction = "s" time_down = pygame.time.get_ticks() if event.key == K_SPACE: r.opmode = 0 # teleop mode caption = sim_version + " \tmode: teleoperation " if event.key == K_1: r.opmode = 1 # autonomous navigation mode caption = sim_version + " \tmode: autonomous " if event.key == K_2: r.opmode = 2 # autonomous navigation mode caption = sim_version + " \tmode: assist " if event.key == K_3: r.opmode = 3 # autonomous navigation mode caption = sim_version + " \tmode: pomdp " if event.key == K_4: r.opmode = 4 # autonomous navigation mode caption = sim_version + " \tmode: hmm " elif event.type == KEYUP: if event.key == K_w: time_elapsed = pygame.time.get_ticks() - time_down T1 = -1 r.direction = "N" elif event.key == K_d: time_elapsed = pygame.time.get_ticks() - time_down T2 = -1 r.direction = "N" elif event.key == K_a: time_elapsed = pygame.time.get_ticks() - time_down T2 = -1 r.direction = "N" elif event.key == K_s: time_elapsed = pygame.time.get_ticks() - time_down T1 = -1 r.direction = "N" totalUserInput += time_elapsed / 1000.0 user_inputF += T1 * 0.1 if user_inputF > 1.0: user_inputF = 1.0 elif user_inputF < 0.0: user_inputF = 0.0 user_inputR += T2 * 0.1 if user_inputR > 1.0: user_inputR = 1.0 elif user_inputR < 0.0: user_inputR = 0.0 pygame.display.set_caption(caption) # slow down when stopped moving if r.speed > 0.0: r.speed -= 0.5 if r.speed < 0.5: r.speed = 0.0 elif r.speed < 0.0: r.speed += 0.5 if r.speed > 0.5: r.speed = 0.0 # Find if goal reached if pygame.sprite.spritecollide(r, goalSprite, False) != []: print "You made it to the goal" print r.numCollision R = R + 1 if len(list_obstacles) > 30: mapLevel = 3 else: mapLevel = 0 if r.numCollision > 5: observation = 0 + mapLevel elif r.numCollision > 0: observation = 1 + mapLevel else: observation = 2 + mapLevel r.pomdp.update_belief(r.action, observation) r.action = r.pomdp.get_best_action()[0] print "Most likely state: " + r.pomdp.pomdpenv.states[np.argmax(r.pomdp.belief)] print "Observation: " + pomdp.pomdpenv.observations[observation] print "Action: " + pomdp.pomdpenv.actions[r.action] prevOb = observation print r.pomdp.belief r.numCollision = 0 # if R == numTrials: # hmm.train(o,0.0001,graphics=False) # print 'probabilities\n',hmm.pi # print 'state transition probabililities\n',hmm.A # print 'observation probabililities\n',hmm.B # R = 0 # o = np.zeros(numTrials) goal.getNew() startTime = time.time() robotSprite.update() goalSprite.update() screen.blit(background, (0, 0)) # redraws the entire bkgrnd. # screen.fill((255,255,255)) # white background # screen.blit(red_block, (100,100)) count = -1 for ob in list_obstacles: count = count + 1 s = pygame.display.get_surface() s.fill(ob.color, list_rect_obstacles[count]) ob.detected = False ob.dist = 1000.0 ob.xCollide = 0.0 ob.yCollide = 0.0 r.draw_rays(screen) r.showGoalPath(screen) robotSprite.draw(screen) goalSprite.draw(screen) # pygame.display.update() pygame.display.flip() # all changes are drawn at once (double buffer) # pygame.time.delay(100) pygame.quit() # also calls display.quit() f = open("results", "w") f.write(str(Distance) + ",") f.write(str(totalUserInput) + ",") f.write(str(minObsDist))
class Tagger: """ A class for POS-tagging text and evaluating the result """ ######### CLASS VARIABLES ######### # a fake START tag to add to the beginning of sentences to help with tagging start_tag = '^' # number of times for a POS tagging mistake to occur in order to show it to user mistake_threshold = 50 # x-fold cross-validation test_cycles = 2 def __init__(self, corpus_path, corpus_files, test_files): """ Construct a Tagger object :param corpus_path: path to corpus files :param corpus_files: list of corpus files """ # object for working with training data self.training = Treebank(corpus_path, corpus_files) # object for working with testing data self.testing = Treebank(corpus_path, test_files) # will contain a list of tags in training corpus self.pos_tags = False # will be object for running the Hidden Markov Model for tagging self.hmm = False # use PennTags self.tags = PennTags # will hold conditional frequency distribution for P(Wi|Ck) self.words_given_pos = False # will hold conditional frequency distribution for P(Ci+1|Ci) self.pos2_given_pos1 = False ######### `PUBLIC' FUNCTIONS ######### def run_test_cycles(self): """ Run the test cycles for training and testing the tagger. Specifically, employ ten-fold cross-validation to train/test on different segments of the corpus. """ total_time_start = time.time() # keep track of time rights = [] # array to hold number of correctly-tagged words for each test wrongs = [] # array to hold number of incorrectly-tagged words for each test totals = [] # array to hold number of total words for each test all_missed = [] # array to hold incorrect tag information for each test sep = ''.join(["-" for i in range(50)]) + "\n" # logging s # returns tagged sentences training_sents = self.training.tagged_sents self.train(training_sents) # returns untagged sentences testing_tagged_sents = self.testing.tagged_sents testing_untagged_sents = self.testing.sents testing_sents = (testing_untagged_sents, testing_tagged_sents) (right, wrong, missed) = self.test(testing_sents) # gather accuracy statistics for this test total = right + wrong rights.append(right) # store the correct count for this test cycle wrongs.append(wrong) # store the incorrect count for this test cycle totals.append(total) # store the total words tested for this test cycle all_missed += missed # add incorrect tag information from this cycle msg("Total words: %d\n" % total) msg("Correct tags: %d (%0.2f%%)\n" % (right, right / total * 100)) msg("Incorrect tags: %d (%0.2f%%)\n" % (wrong, wrong / total * 100)) msg("%s%s" % (sep,sep)) # calculate and output statistics for the entire test print "Total tests run: %d" % len(totals) print "Total time taken: %0.2f seconds" % (time.time() - total_time_start) print "Average correct tags: %0.2f%%" % (sum(rights) / sum(totals) * 100) print "Average incorrect tags: %0.2f%%" % (sum(wrongs) / sum(totals) * 100) print # give the option of inspecting incorrect tags if raw_input("Examine bad tags? ") in ['y','Y']: self.inspect(all_missed) def train(self, sents): """ Train the tagger on a set of tagged sentences :param sents: list of tagged sentences """ # collect POS tags from our corpus self.pos_tags = self.training.pos_tags() # add start markers to help with bigram tagging msg("Adjusting POS tags...") sents = self._adjust_pos(sents) msg("done\n") # create 2 conditional frequency distributions (from the NLTK) that store # observed probabilities that a given word has a certain POS, one for # lowercase-normalized words and one for words as they appear in the text msg("Training (Wi|Ck)...") # create a CFD for words normalized to lowercase self.words_given_pos = ConditionalFreqDist((wp[1], wp[0].lower()) for \ sent in sents for wp in sent) # create a CFD for words left in their original capitalization self.words_given_pos_upper = ConditionalFreqDist((wp[1], wp[0]) for \ sent in sents for wp in sent) msg("done\n") # create another CFD that stores probabilities that stores observed # probabilities that one POS follows another POS msg("Training (Ci+1|Ci)...") self.pos2_given_pos1 = ConditionalFreqDist((sent[i-1][1], sent[i][1]) for \ sent in sents for i in range(1,len(sent))) msg("done\n") def test(self, sent_set): """ Use a Hidden Markov Model to tag a set of sentences, and evaluate accuracy. :param sent_set: tuple like (untagged sentences, gold standard sentences) """ untagged_sents = sent_set[0] # recover untagged sentences gold_tagged_sents = sent_set[1] # recover gold standard tagged sentences # initialize an HMM object with necessary parameters self.hmm = HMM(untagged_sents, self.pos_tags, self.words_given_pos, \ self.words_given_pos_upper, self.pos2_given_pos1, Tagger.start_tag) # get HMM-tagged sentences hmm_tagged_sents = self.hmm.tag() # evaluate against gold standard and return accuracy data return self.evaluate(hmm_tagged_sents, gold_tagged_sents) def evaluate(self, hmm_tagged_sents, gold_tagged_sents): """ Evaluate one set of tagged sentences against another set :param hmm_tagged_sents: list of tagged sentences :param gold_tagged_sents: list of tagged sentences used as gold standard """ # ensure our sentence sets have the same length if len(hmm_tagged_sents) != len(gold_tagged_sents): raise Exception("HMM-tagged sentence set did not match gold \ standard sentence set!") right = 0 # initialize counter of correct tags wrong = 0 # initialize counter of incorrect tags missed = [] # initialize array of tagged words we didn't get right # loop through sentence sets for i in range(len(gold_tagged_sents)): # ensure our sentences have the same length if len(hmm_tagged_sents[i]) != len(gold_tagged_sents[i]): raise Exception("HMM-tagged sentence did not match gold \ standard sentence!") # loop through words in sentence for j in range(len(gold_tagged_sents[i])): gold_tagged_word = gold_tagged_sents[i][j] hmm_tagged_word = hmm_tagged_sents[i][j] # ensure the words are the same between the sets if gold_tagged_word[0] != hmm_tagged_word[0]: raise Exception("HMM-tagged word did not match gold \ standard word!") # increment counters based on tag correctness if gold_tagged_word[1] == hmm_tagged_word[1]: right += 1 else: missed.append((hmm_tagged_word, gold_tagged_word, \ hmm_tagged_sents[i], gold_tagged_sents[i])) wrong += 1 # end words loop # end sentences loop # return a tuple of correct vs incorrect tags return (right, wrong, missed) def inspect(self, missed): """ Inspect a testing session, and print data about tag accuracy :param missed: list of tuples of missed tags like: (hmm_tagged_word, gold_tagged_word, hmm_context, gold_context) """ # create a CFD so we can examine a matrix of incorrect vs correct tags # ms[1][1] = tag of a gold_tagged_word # ms[0][1] = tag of an hmm_tagged_word cfd = ConditionalFreqDist((ms[1][1], ms[0][1]) for ms in missed) # initialize a hash to store mistakes by frequency mistakes = {} # print a table showing mistake frequency cfd.tabulate() msg("\n") # loop through mistake frequencies by gold standard tag, i.e., if we are # examining gold-standard 'IN', count what we incorrectly tagged it as conds = cfd.conditions() for g_tag in conds: for hmm_tag in cfd[g_tag].keys(): # how many times did we incorrectly say g_tag was hmm_tag? count = cfd[g_tag][hmm_tag] # add these mistakes to the count if count not in mistakes.keys(): mistakes[count] = [] mistakes[count].append((hmm_tag, g_tag)) # get a list of all mistake types that occurred over a threshold, worst first mistake_counts = set([count for (count, mistake_set) in \ mistakes.iteritems() if count > Tagger.mistake_threshold]) mistake_counts = reversed(sorted(mistake_counts)) # now create a list of mistake types to show the user, i.e., loop # through all types and if they are of a high-frequency type, add to list mistakes_to_halt = [] for count in mistake_counts: mistake_set = mistakes[count] for mistake_tuple in mistake_set: mistakes_to_halt.append(mistake_tuple) msg("%d\t%s\twas really\t%s\n" % (count, mistake_tuple[0], \ mistake_tuple[1])) msg("\n") # create separators used when outputting missed word contexts sep_big = "---------------------------------------------------\n" sep_small = "\n-----------------------------------------\n" # loop through individual mistakes and, if they match the kind of error # we want to halt for, show the user the mistake as well as the sentence # context for both the gold-standard sentence and the hmm-tagged sentence response = None for missed_set in missed: if response not in ['q','Q']: (hmm_tagged_word, gold_tagged_word, hmm_tagged_sent, \ gold_tagged_sent) = missed_set should_halt = False # determine whether the current mistake matches a mistake type # we want to halt for for pair in mistakes_to_halt: if hmm_tagged_word[1] == pair[0] and \ gold_tagged_word[1] == pair[1]: should_halt = True if should_halt: msg("%sTagged '%s' with %s when it should have been %s.%s" %\ (sep_big, hmm_tagged_word[0], hmm_tagged_word[1],\ gold_tagged_word[1], sep_small)) msg("Gold: " + (' '.join([(w[0] + "/" + w[1]) for w in \ gold_tagged_sent]))) msg(sep_small) msg("Mine: " + (' '.join([(w[0] + "/" + w[1]) for w in \ hmm_tagged_sent]))) # get user input to decide whether to keep going response = raw_input("\n\nEnter to continue, Q to quit: ") ######### `PRIVATE' FUNCTIONS ######### def _adjust_pos(self, sents): """ Insert start markers (word and tag tuple) in each sentence of a list. Add any other tags that need adding :param sents: list of tagged sentences """ new_sents = [] # initialize array of start-marked sentences # loop through tagged sentences for sent in sents: # add a new start-marked sentence to our array new_sents.append([(Tagger.start_tag, Tagger.start_tag)] + sent) # make sure our start marker tag gets added to the POS list self.pos_tags.append(Tagger.start_tag) # also take the opportunity to add other tags to the list # which we may not have encountered in testing for tag in self.tags.rare_tags: if tag not in self.pos_tags: self.pos_tags.append(tag) return new_sents
#!/usr/bin/env python2.7 from HMM import HMM import sys # initialize HMM object, write resulting # probability matrices to proper files try: training_file = sys.argv[1] except IndexError: exit("No file provided") model = HMM(training_file) with open('hw3b.a-matrix.txt', 'w') as tp_out: tp_out.write(HMM.format_matrix(model.tmatrix)) with open('hw3b.emission.txt', 'w') as ep_out: ep_out.write(HMM.format_matrix(model.ematrix))
def __init__(self,genes): self.hmm = HMM(genes)
class Tagger: def __init__(self,genes): self.hmm = HMM(genes) # Calculates the emission probability of a given word tag pair def e(self,x,u): #x is the word y is the tag for the given word return self.hmm.e(x,u) def q(self,w,u,v): return self.hmm.trigram_prob((u,v,w)) # Find the argmax of all x's from the sentence input. We use the HMM.e(x|y) method to generate our list of values. def argmax(self,eminlist): return max(eminlist,key=lambda x : x[1]) def calc(self,text,tags): wordTags = [] # not very pythonic but works for now for tag in tags: wordTags.append((tag,self.e(text,tag))) return wordTags def k_val(self,k): if k in (-1,0): return ['*'] else: return self.hmm.getTags() #Dynammic programming for the Viterbi algorithm def viterbi(self, sentence): n = len(sentence) x = [""]+sentence y = [""]*(n+1) pi = {} pi[0,'*',"*"] = 1 back_pointers = {} for k in range(1,n+1): for u in self.k_val(k-1): for v in self.k_val(k): back_pointers[k,u,v],pi[k,u,v] = self.argmax([(tag,pi[k-1,tag,u] * self.q(v,tag,u)*self.e(x[k],v)) for tag in self.k_val(k-2)]) (y[n-1],y[n]),score = self.argmax([ ((u,v), pi[n,u,v] * self.q("STOP",u,v)) for u in self.k_val(n-1) for v in self.k_val(n)]) for k in range((n-2),0,-1): y[k] = back_pointers[k+2,y[k+1],y[k+2]] y[0] = '*' tagScores = [] for i in range(1,n): tagScores.append(pi[i,y[i-1],y[i]]) return y[1:n+1],tagScores+[score] # reads in the gene data and returns each sentence of the document. def read_sent(self,data): sentence = [] for word in data: if word.strip(): sentence.append(word.strip()) else: yield sentence sentence = [] def print_tags(self,sentence,tags): print "\n".join([word+" "+tag for word,tag in zip(sentence,tags[0])])
def __init__(self): self.hmm = HMM()
for word in ls: new_word = word + '#' for letter in new_word: if letter not in letters: letters[letter] = 1 if new_word not in words: words[new_word] = 1 return sorted(letters.keys()), sorted(words.keys()) ### A - transition probabilities ### B - emission probailities ### Pi - initial state distribution if __name__ == '__main__': if len(sys.argv) < 4 or len(sys.argv) > 5: print 'usage: HMM.py <input_file> <out_text> <out_png> [-v]' quit() input_file_name = sys.argv[1] output_file_name = sys.argv[2] out_pdf_name = sys.argv[3] if len(sys.argv) == 5: if sys.argv[4] != '-v': print 'usage: python HMM.py <input_file> <out_text> <out_png> [-v]' quit() VERBOSE_FLAG = 1 letters, words = read_in(input_file_name) with open(output_file_name,'w') as out_text: myHMM = HMM(letters,words,STATES,out_text,out_pdf_name,VERBOSE_FLAG) myHMM.cycle(MAX_ITERS,MIN_CHANGE) myHMM.make_plot()
def __init__(self): self.hmm = HMM() self.tests = {}
def main(): global leave_trace, list_traces,pomdp if r_visual_granularity > wall_thickness: print 'PARAMETER ERROR: r_visual_granularity exceeds wall_thickness!' print 'This can cause wall detection errors!' if r_init_x<wall_thickness or r_init_y<wall_thickness: print 'PARAMETER ERROR: starting position overlaps wall!' print 'Check r_init_x|y_topleft and wall_thickness' pygame.init() #also calls display.init() startTime = time.time() caption = (sim_version + ' \tmode: teleoperation ' ) pygame.display.set_caption(caption+ str(startTime)) r_sprite = load_image(r_image) g_sprite = load_image('goal.bmp') background = load_image(back_image) #prepare simulation objects clock = pygame.time.Clock() screen.blit(background, (0, 0)) goal = Goal(g_sprite) r = Robot(r_sprite, r_init_x, r_init_y,r_init_azi, r_init_fwd_speed,\ r_init_spin_speed, r_visual_range, r_visual_angle,goal,pomdp) robotSprite = pygame.sprite.Group(r) goalSprite = pygame.sprite.Group(goal) #display the environment once, right before event loop count = -1 for ob in list_obstacles: count = count + 1 s = pygame.display.get_surface() s.fill(ob.color, list_rect_obstacles[count]) r.draw_rays(screen) r.showGoalPath(screen) pygame.display.flip() going = True time_down = 0.0 time_elapsed = 0.0 T = -1 #HMM initalize hmm = HMM() hmm.pi = np.array([0.5, 0.5]) hmm.A = np.array([[0.5, 0.5],[0.5, 0.5]]) hmm.B = np.array([[0.3, 0.7],[0.99, 0.01]]) while going: clock.tick(fps) #at most that many fps time_elapsed = 0.0 #Event loop################################ global user_input, totalUserInput, o,R,numTrials for event in pygame.event.get(): if event == QUIT: going = False elif event.type == KEYDOWN: if event.key == K_ESCAPE: going = False elif event.key == K_w: T = 1 r.direction = 'w' time_down = pygame.time.get_ticks() elif event.key == K_d: T = 1 r.direction = 'd' time_down = pygame.time.get_ticks() elif event.key == K_a: T = 1 r.direction = 'a' time_down = pygame.time.get_ticks() elif event.key == K_s: T = 1 r.direction = 's' time_down = pygame.time.get_ticks() if event.key == K_SPACE: r.opmode = 0 #teleop mode caption = sim_version + ' \tmode: teleoperation ' if event.key == K_1: r.opmode = 1 #autonomous navigation mode caption = (sim_version + ' \tmode: autonomous ') if event.key == K_2: r.opmode = 2 #autonomous navigation mode caption = (sim_version + ' \tmode: assist ') if event.key == K_3: r.opmode = 3 #autonomous navigation mode caption = (sim_version + ' \tmode: pomdp ') if event.key == K_4: r.opmode = 4 #autonomous navigation mode caption = (sim_version + ' \tmode: hmm ') if event.key == K_t: #toggles the tracing mode if leave_trace: leave_trace = 0 list_traces = list() print 'changing leave_trace from 1 to 0' else: leave_trace = 1 print 'changing leave_trace from 0 to 1' elif event.type == KEYUP: if event.key == K_w: time_elapsed = pygame.time.get_ticks() - time_down T = -1 r.direction = 'N' elif event.key == K_d: time_elapsed = pygame.time.get_ticks() - time_down T = -1 r.direction = 'N' elif event.key == K_a: time_elapsed = pygame.time.get_ticks() - time_down T = -1 r.direction = 'N' elif event.key == K_s: time_elapsed = pygame.time.get_ticks() - time_down T = -1 r.direction = 'N' totalUserInput += time_elapsed/1000.0 user_input += T*0.1 if user_input > 1.0: user_input = 1.0 elif user_input < 0.0: user_input = 0.0 pygame.display.set_caption(caption) if r.speed > 0.0: r.speed -= 0.5 elif r.speed < 0.0: r.speed += 0.5 # Find if goal reached if pygame.sprite.spritecollide(r, goalSprite, False) != []: print 'You made it to the goal' R=R+1 r.pomdp.update_belief(0,5) print o if R == numTrials: hmm.train(o,0.0001,graphics=False) print 'probabilities\n',hmm.pi print 'state transition probabililities\n',hmm.A print 'observation probabililities\n',hmm.B R = 0 o = np.zeros(numTrials) goal.getNew() startTime = time.time() robotSprite.update() goalSprite.update() screen.blit(background, (0, 0)) #redraws the entire bkgrnd. #screen.fill((255,255,255)) # white background #screen.blit(red_block, (100,100)) count = -1 for ob in list_obstacles: count = count + 1 s = pygame.display.get_surface() s.fill(ob.color, list_rect_obstacles[count]) r.draw_rays(screen) r.showGoalPath(screen) # draw_traces(screen) robotSprite.draw(screen) goalSprite.draw(screen) #pygame.display.update() pygame.display.flip() #all changes are drawn at once (double buffer) #pygame.time.delay(100) pygame.quit() #also calls display.quit() f = open('results','w') f.write(str(Distance)+',') f.write(str(totalUserInput)+',') f.write(str(minObsDist))
def learn_hmm(self,seqlist) : """ Learns hmm from seqlist""" hmm = HMM() hmm.length = self.length hmm.dims = [(2,1)]*hmm.length # (latent,emit) dimspace hmm.emit = [ [[1.0],[1.0]] ]*hmm.length hmm.seqmap = [{'a':0,'b':1}]*hmm.length hmm.seqmap2 = [{0:'a',1:'b'}]*hmm.length hmm.featmap = [{'H':0}]*hmm.length hmm.initprob = [0.5,0.5] hmm.trained = True hmm.alphabet = 'ab' # Calculate HMM transition probabilities hmm.trans = [ [[0.7,0.3],[0.3,0.7]] ]*hmm.length counts,counts2 = [],[] for i in range(len(seqlist[0])) : counts.append({}) counts2.append({}) for i,seq in enumerate(seqlist) : for j,aa in enumerate(seq) : counts[j][aa] = counts[j].get(aa,0) + self.k - i for i,seq in enumerate(seqlist) : for j,aa in enumerate(seq[:-1]) : counts2[j][seq[j:j+2]] = counts2[j].get(seq[j:j+2],0) + self.k - i hmm.trans = [] for i in range(len(seqlist[0])-1) : hmm.trans.append([]) for j,aa1 in enumerate(hmm.alphabet) : hmm.trans[-1].append([]) for k,aa2 in enumerate(hmm.alphabet) : val = (counts2[i].get(aa1+aa2,0)+self.smoothfac) / (counts[i].get(aa1,0)+self.smoothfac*len(hmm.alphabet)) hmm.trans[-1][-1].append(val) return hmm
from HMM import HMM pi = [0.5, 0.5] A = [[0.5, 0.5], [0.5, 0.5]] B = [[0.8, 0.2], [0.3, 0.7]] hmm = HMM(A, B, pi) observations_head = [0,0,0,1,1,0,0] print "Head: ", hmm.probabilidade_observacoes(observations_head) observations_tail = [0,0,0,1,1,0,1] print "Tail: ", hmm.probabilidade_observacoes(observations_tail)
from HMM import HMM pi = [0.34, 0.33, 0.33] A = [[0.34, 0.33, 0.33], [1.0, 0.0, 0.0], [1.0, 0.0, 0.0]] B = [[0.34, 0.33, 0.33], [0.4, 0.6, 0.0], [0.0, 0.6, 0.4]] O = [1, 1, 2, 2, 1, 0, 1, 2, 2, 0] hmm = HMM(A, B, pi) print hmm.viterbi(O)
app_train_list = [] app_test_list1 = [] app_test_list2 = [] for channel in combined.columns: app_train_list.append(Appliance(channel,train_set[[channel]])) app_test_list1.append(Appliance(channel,test_set1[[channel]])) app_test_list2.append(Appliance(channel,test_set2[[channel]])) num_states_dict={} ModelDict = {} for i,app in enumerate(app_train_list): X_train = create_matrix(app,good_chunks = True) X_test = create_matrix(app_test_list1[i],good_chunks = False) hmm = HMM(X_train,X_test) print app.name hmm.fit_HMM(perc_std_expl) ModelDict[app.name] = hmm.model num_states_dict[app.name] = hmm.n_states fhmm = FHMM() fhmm.train(app_train_list,num_states_dict = num_states_dict) predictions = pd.DataFrame() predictions = fhmm.disaggregate(test_set2[['total']], predictions) total_power_predicted = predictions.sum() total_power_act = test_set2[predictions.columns].sum() print "Percent stand.dev.explained, 1 min:", perc_std_expl_full(predictions,test_set2) print "R2, 1 min:" , r2_full(predictions,test_set2)