示例#1
1
文件: trainHMM.py 项目: tamos/robots
def train_model(dataset, threshold):

    ### Set up ###
    states, outputs = dataset.read_file()
    num_states = dataset.xyToInt.ravel().shape[0]
    num_outputs = len(dataset.obsToInt.keys())

    measure_p = np.zeros((num_outputs, num_states))
    start_p = np.ones((num_states,1)) * 1.0/16.0
    # make the matrix of transition probs
    trans_p = np.identity(num_states)
    trans_p *= INITIAL_STAY_PROB
    for each_loc in VALID_LOCATIONS:
        int_repres = int(dataset.xyToInt[each_loc[0] - 1,each_loc[1] - 1])

        # distribute probs to neighbours
        neighbours = NEIGHBOURS[each_loc[0]][each_loc[1]]
        num_neighbours = float(len(neighbours))

        # do measurement probs
        int_col_repres = int(dataset.obsToInt[ACTUAL_COLOURS[each_loc[0]][each_loc[1]]])
        measure_p[:,int_repres] = TOTAL_CAMERA_ERR_PROB / 3.0
        measure_p[int_col_repres,int_repres] = CAMERA_ACC_PROB

        for each_neighbour in neighbours:
            int_repres_neigh = int(dataset.xyToInt[each_neighbour[0] - 1 ,each_neighbour[1] - 1])
            trans_p[int_repres, int_repres_neigh] += (1.0 - INITIAL_STAY_PROB) * (1.0/num_neighbours)

    ### Model training ###

    llikes = []
    ll_old = 10e10

    print "\nTRANSITION P\n", trans_p
    print "\nMEASURE P\n", measure_p
    print "\nSTART P\n", start_p


    asym_cnt = 0
    for _ in range(N_ITER):
        model = HMM(num_states, num_outputs, outputs, trans_p,  measure_p, start_p)
        ll = model.train()
        print "Log Likelihood is ", ll
        llikes.append(ll)

        trans_p = model.transition_p
        measure_p = model.measure_p
        start_p = model.start_p

        diff = abs(ll_old - ll)
        print "Difference is", diff
        if diff < threshold:
            if asym_cnt >= 5:
                print "Threshold change reached 5 times, stopping"
                break
            else:
                asym_cnt += 1
        ll_old = ll
    return model, llikes
示例#2
1
def test_2():
    '''
    Same problem as in test_1 but using normal noisy sensor, should still have the highest probability of being in (3,0) at the end but with other lower
    probabilities as well. 
    '''
    print('----------- Test 2: Noisy Sensor in Simple Robot Maze. -----------')
    robot_problem = RobotProblem('maze_straight.maz',
                                 deterministic_sensor=False)
    hmm = HMM(robot_problem)
    solution = hmm.reason([0, 2, 1, 3])
    print(solution)
示例#3
0
文件: task3.py 项目: kokoff/mlap
def task3(input_file):
    episodes = read_file(input_file)

    for i in range(10):
        print '\nEM run number', (i + 1)
        hmm = HMM(rand_init=True)
        hmm.baum_welch(episodes)
        print hmm
示例#4
0
def multi_dim_observation():
    initMatrix = np.matrix([[0.75], [0.25]])
    transitionMatrix = np.matrix([[0.99, 0.01], [0.03, 0.97]])
    markovChain = MarkovChain(initMatrix, transitionMatrix)
    g1 = GaussD(mean=np.matrix([[0], [0]]), cov=np.matrix([[2, 1], [1, 4]]))
    g2 = GaussD(mean=np.matrix([[3], [3]]), cov=np.matrix([[2, 1], [1, 4]]))
    h = HMM(markovChain, np.matrix([[g1], [g2]]))
    [X, S] = h.rand(h, 100)

    return (X, S)
示例#5
0
def test_0():
    print('---------- Test 0: Umbrella World -------------')
    umbrella_problem = UmbrellaProblem()
    hmm = HMM(umbrella_problem)
    solution = hmm.forward_backward([int(obs) for obs in [True, True]])
    print(solution)
    print('Forward Updates:')
    print(solution.updates)
    print('Backward Updates:')
    print(solution.updates_smoothed)
示例#6
0
def finite_duration():
    initMatrix = np.matrix([[0.75], [0.25]])
    transitionMatrix = np.matrix([[0.4, 0.4, 0.2], [0.1, 0.6, 0.3]])
    markovChain = MarkovChain(initMatrix, transitionMatrix)
    g1 = GaussD(mean=np.matrix([0]), stdev=np.matrix([1]))
    g2 = GaussD(mean=np.matrix([3]), stdev=np.matrix([2]))
    h = HMM(markovChain, np.matrix([[g1], [g2]]))
    [X, S] = h.rand(h, 100)

    return (X, S)
示例#7
0
def test_1():
    '''
    Straight 4x1 maze test with deterministic sensor. Given the evidence RED, GREEN, BLUE, YELLOW we should know exactly where we are
    since there is no other sequence to yeild that evidence other than starting at (0,0) and traveling east, east, east. 
    '''
    print('---------- Test 1: Deterministic Simple Robot Maze-------------')
    robot_problem = RobotProblem('maze_straight.maz',
                                 deterministic_sensor=True)
    hmm = HMM(robot_problem)
    solution = hmm.reason([0, 2, 1, 3])
    print(solution)
示例#8
0
 def test_decode_assignment(self):
     pi = np.array([0.3, 0.7])
     print(pi)
     A = np.array([[0.1, 0.9], [0.8, 0.2]])
     B = np.array([[0.7, 0.1, 0.2], [0.3, 0.5, 0.2]])
     S = ['吃', '睡']
     V = ["哭", "没精神", "找妈妈"]
     hmm = HMM(pi, A, B, S, V)
     observation = np.array(['哭', '没精神', '找妈妈'])
     res = hmm.decode(observation)
     print(res)
     self.assertEqual(res, ['吃', '睡', '吃'])
示例#9
0
 def test_evalution_assignment(self):
     pi = np.array([0.3, 0.7])
     print(pi)
     A = np.array([[0.1, 0.9], [0.8, 0.2]])
     B = np.array([[0.7, 0.1, 0.2], [0.3, 0.5, 0.2]])
     S = ['吃', '睡']
     V = ["哭", "没精神", "找妈妈"]
     hmm = HMM(pi, A, B, S, V)
     observation = np.array(['哭', '没精神', '找妈妈'])
     res = hmm.evaluation(observation)
     print(res)
     self.assertAlmostEqual(res, 0.026880000000000005)
示例#10
0
 def test_1(self):
     pi = np.array([0.2, 0.4, 0.4])
     print(pi)
     A = np.array([[0.5, 0.2, 0.3], [0.3, 0.5, 0.2], [0.2, 0.3, 0.5]])
     B = np.array([[0.5, 0.5], [0.4, 0.6], [0.7, 0.3]])
     S = ['1', '2', '3']
     V = ['1', '2']
     hmm = HMM(pi, A, B, S, V)
     observation = np.array(['1', '2', '1'])
     res = hmm.evaluation(observation)
     print(res)
     self.assertAlmostEqual(res, 0.130218)
示例#11
0
def trainModel():
    end = TRAIN_NUM
    if ENABLE_RATE:
        end = int(TRAIN_NUM * TRAIN_NUM_RATE)
    dataSet = brown.tagged_words(tagset='universal')[:end]
    dataSet = [[d[0].lower(), d[1]] for d in dataSet]
    hmm = HMM(args=dataSet)
    paras = hmm.output_to_viterbi()
    # cache model
    fo = open(MODEL_PATH, 'wb')
    with fo:
        pickle.dump(paras, fo)
    return paras
示例#12
0
class ProseWriter:
    def __init__(self):
        self.hmm = HMM()

    def train(self, fileName):
        self.hmm.train(fileName)

    def clear_unknowns(self):
        del self.hmm.sts[UNKNOWN]
        del self.hmm.obs[UNKNOWN]
        del self.hmm.starts[UNKNOWN]
        del self.hmm.transitions[UNKNOWN]
        for t in self.hmm.transitions:
            del self.hmm.transitions[t][UNKNOWN]
        del self.hmm.emissions[UNKNOWN]
        for e in self.hmm.emissions:
            del self.hmm.emissions[e][UNKNOWN]
        self.hmm.startCount -= 1

    def start_state(self):
        starts = self.hmm.starts
        count = random.randint(0, self.hmm.startCount - 1)
        for s in starts:
            if count == 0: return s
            count -= 1

    def next_state(self, curState):
        transitions = self.hmm.transitions[curState]
        count = random.randint(0, self.hmm.sts[curState] - 1)
        for t in transitions:
            for c in range(transitions[t]):
                if count == 0: return t
                count -= 1

    def next_word(self, nextState):
        emissions = self.hmm.emissions[nextState]
        count = random.randint(0, self.hmm.sts[nextState] - 1)
        for e in emissions:
            for c in range(emissions[e]):
                if count == 0: return e
                count -= 1

    def write(self, count):
        state = None
        for i in range(count):
            while state == None:
                state = self.start_state()
            #if i != 0 and state != '.': print " ",
            print self.next_word(state),
            state = self.next_state(state)
        print ""
示例#13
0
文件: task1.py 项目: kokoff/mlap
def task1(input_file):
    episodes, state_visit_count = read_input_file(input_file)
    hmm = HMM()
    E = len(episodes)
    N = hmm.hidden_states
    V = hmm.visible_states

    # Compute initial probabilities
    hmm.initial = [0 for i in range(N)]
    for episode in episodes:
        hmm.initial[episode[0][0]] += 1.0 / E

    # Compute transition probabilities
    hmm.transition = [[0 for i in range(N)] for j in range(N)]

    norm = [0 for i in range(N)]
    for episode in episodes:

        for t in range(len(episode) - 1):
            state = episode[t][0]
            nextState = episode[t + 1][0]
            hmm.transition[nextState][state] += 1.0
            norm[state] += 1

    for nextState in range(N):
        for state in range(N):
            try:
                hmm.transition[nextState][state] /= norm[state]
            except ZeroDivisionError:
                continue

    # Compute emission probabilities
    hmm.emission = [[0 for i in range(N)] for j in range(V)]
    norm = [0 for i in range(N)]

    for episode in episodes:
        for timestep in episode:
            reward = timestep[1]
            state = timestep[0]
            hmm.emission[reward][state] += 1.0
            norm[state] += 1

    for reward in range(V):
        for state in range(N):
            try:
                hmm.emission[reward][state] /= norm[state]
            except ZeroDivisionError:
                continue

    print hmm
    return
示例#14
0
 def test_decode_ppt(self):
     """
   """
     pi = np.array([1, 0, 0])
     print(pi)
     A = np.array([[0.4, 0.6, 0], [0, 0.8, 0.2], [0, 0, 1.0]])
     B = np.array([[0.7, 0.3], [0.4, 0.6], [0.8, 0.2]])
     S = ['1', '2', '3']
     V = ["A", "B"]
     hmm = HMM(pi, A, B, S, V)
     observation = np.array(['A', 'B', 'A', 'B'])
     res = hmm.decode(observation)
     print(res)
     self.assertEqual(res, ['1', '2', '2', '2'])
示例#15
0
class ProseWriter:
    def __init__(self):
        self.hmm = HMM()

    def train(self, fileName):
        self.hmm.train(fileName)
        
    def clear_unknowns(self):
        del self.hmm.sts[UNKNOWN]
        del self.hmm.obs[UNKNOWN]
        del self.hmm.starts[UNKNOWN]
        del self.hmm.transitions[UNKNOWN]
        for t in self.hmm.transitions:
            del self.hmm.transitions[t][UNKNOWN]
        del self.hmm.emissions[UNKNOWN]
        for e in self.hmm.emissions:
            del self.hmm.emissions[e][UNKNOWN]
        self.hmm.startCount -= 1

    def start_state(self):
        starts = self.hmm.starts
        count = random.randint(0, self.hmm.startCount - 1)
        for s in starts:
            if count == 0: return s
            count -= 1

    def next_state(self, curState):
        transitions = self.hmm.transitions[curState]
        count = random.randint(0, self.hmm.sts[curState] - 1)
        for t in transitions:
            for c in range(transitions[t]):
                if count == 0: return t
                count -= 1

    def next_word(self, nextState):
        emissions = self.hmm.emissions[nextState]
        count = random.randint(0, self.hmm.sts[nextState] - 1)
        for e in emissions:
            for c in range(emissions[e]):
                if count == 0: return e
                count -= 1

    def write(self, count):
        state = None
        for i in range(count):
            while state == None: state = self.start_state()
            #if i != 0 and state != '.': print " ",
            print self.next_word(state),
            state = self.next_state(state)
        print ""
示例#16
0
    def makeLeftRightHMM(self, nStates, pD, obsData, lData=None):
        if nStates <= 0:
            print 'Number of states must be > 0'
        if lData is None:
            lData = obsData.shape[1]

        D = np.mean(lData)
        D = D / nStates
        mc = self.initLeftRightMC(nStates, D)
        hmm = HMM(mc, pD)
        hmm = hmm.init(hmm, obsData, lData)

        hmm, logprobs = hmm.train(hmm, obsData, lData, 5, np.log(1.01))

        return hmm
示例#17
0
 def test_decode_weather(self):
     """
   dataset source: https://www.cnblogs.com/Denise-hzf/p/6612212.html
   """
     pi = np.array([0.63, 0.17, 0.20])
     print(pi)
     A = np.array([[0.5, 0.375, 0.125], [0.25, 0.125, 0.652],
                   [0.25, 0.375, 0.375]])
     B = np.array([[0.6, 0.2, 0.15, 0.05], [0.25, 0.25, 0.25, 0.25],
                   [0.05, 0.10, 0.35, 0.5]])
     S = ['Sunny', 'Cloudy', 'Rainy']
     V = ["Dry", "Dryish", "Damp", "Soggy"]
     hmm = HMM(pi, A, B, S, V)
     observation = np.array(['Dry', 'Damp', 'Soggy'])
     res = hmm.decode(observation)
     print(res)
示例#18
0
    def featspacelen(self):
        """ Vary the feature space and the sequence length """
        self.tasklist = []
        featspace = self.kwdargs['featspace']
        seqspace = 20
        seqlen = self.kwdargs['seqlen']
        dims = [(seqspace, featspace)] * seqlen

        # Repeat for all the tasks described
        for taskid in range(self.ntimes):
            hmm = HMM()
            self._set_params_generic(hmm, seqlen, dims)
            cmrf = CMRF(hmm)
            feats = self._gen_feats_generic(seqlen, featspace)
            task = Task('sim'+STUDY+'_'+self.name+'_'+\
             str(seqlen)+'_'+str(featspace)+'_'+str(taskid),cmrf,feats)
            # Run Brute force to enumerate the frontier
            if self.kwdargs['run_brute']:
                with benchmark(task.name + 'brute') as t:
                    seq, energies = self.bruteforce(cmrf, feats)
                task.all_seq = seq
                task.all_seq_energy = energies
                task.brute_time = t.elapsed

            # Now run the toy simulation`
            with benchmark(task.name + 'pareto') as t:
                task.frontier,task.frontier_energy = \
                 pareto_frontier(cmrf,feats)
            if self.plot_all:
                task.plot_frontier(frontier_only=True)
            task.pareto_time = t.elapsed
            self.tasklist.append(task)
示例#19
0
    def toy(self):
        """ Set up the toy simulation """
        self.tasklist = []
        feats = self.get_feats_standard()
        hmm = HMM()
        self._set_params_toy(hmm)
        cmrf = CMRF(hmm)
        for taskid in range(self.ntimes):
            task = Task('sim'+STUDY+'_'+self.name+'_'+str(taskid),cmrf,\
             feats)
            # Run Brute force to enumerate the frontier
            with benchmark(task.name + 'brute') as t:
                seq, energies = self.bruteforce(cmrf, feats)
            task.all_seq = seq
            task.all_seq_energy = energies
            task.brute_time = t.elapsed

            # Now run the toy simulation`
            with benchmark(task.name + 'pareto') as t:
                task.frontier,task.frontier_energy = \
                 pareto_frontier(cmrf,feats)
            if self.plot_all:
                task.plot_frontier()
            task.pareto_time = t.elapsed
            self.tasklist.append(task)
示例#20
0
def main():
    pref_path = os.getcwd() + "/classification_data_HWK2/EMGaussian"

    train_data = np.loadtxt(open(pref_path + ".data", "rb"), delimiter=" ")
    test_data = np.loadtxt(open(pref_path + ".test", "rb"), delimiter=" ")

    Xtrain = train_data[:, :2]
    Xtest = test_data[:, :2]

    models = {"GMM": GMM(isotropic=False), "HMM": HMM()}
    K = 4  #number of clusters

    for name in ["GMM", "HMM"]:

        print(name)
        model = models[name]
        model.fit(Xtrain, K, eps=pow(10, -2))

        # visualize clusters and frontiers
        model.plot_clusters(Xtrain, "figs/" + name + " on train", save=True)
        model.plot_clusters(Xtest, "figs/" + name + " on test", save=True)

        print("")

        lik = model.compute_log_likelihood(Xtrain)
        print("mean log-likelihood on training set : ", lik / Xtrain.shape[0])

        lik = model.compute_log_likelihood(Xtest)
        print("mean log-likelihood on test set : ", lik / Xtest.shape[0])

        print("\n------------------------\n")
示例#21
0
def train(params: Dict):
    """
    build an asrmodel with the parameter in the json file and train it, than free the memory
    :param params: name of the file
    :return:
    """
    assert "model_type" in params, "model_type is not specified"
    assert params["model_type"] in SUPPORTED_MODEL, \
        "model_type not supported: {}, try with {}".format(params["model_type"], str(SUPPORTED_MODEL))
    assert "trainset_id" in params, "trainset_id is not specified"

    trainset_path = join(TRAIN_PATH, params["trainset_id"])

    if "set_model_name" in params:  # specify a string to identify the model
        model_id = get_new_model_id(params["set_model_name"])
    else:
        model_id = get_new_model_id(params["structure_id"])

    if params["model_type"] == "CNN":
        asrmodel = CNN(join(MODEL_PATH, model_id), input_param=params)
    elif params["model_type"] == "HMM":
        asrmodel = HMM(join(MODEL_PATH, model_id))
    else:
        # should never go here
        raise AssertionError("model_type not recognised: {} check {}".format(params["model_type"], SUPPORTED_MODEL))

    asrmodel.train(trainset_path)
    asrmodel.save_model()
    del asrmodel  # free memory
    return model_id
示例#22
0
	def ziftied(self) :
		""" Set up the toy simulation """	
		self.tasklist = []
		feats = self.kwdargs['feats']
		weights = self.kwdargs['weights']
		hmm = HMM()
		self._set_params_ziftied(hmm)
		#1/0
		cmrf = CMRF(hmm)
		for taskid in range(self.ntimes) :	
			task = Task('bio'+str(STUDY)+'_'+self.name+'_'+str(taskid),cmrf,\
				feats)				
			# Run Brute force to enumerate the frontier
#			with benchmark(task.name+'brute') as t:
#				seq,energies = self.bruteforce(cmrf,feats)			
#			task.all_seq = seq
#			task.all_seq_energy = energies
#			task.brute_time = t.elapsed			

			# Sample the frontier
			with benchmark(task.name+'sample') as t:
				seq,energies = self.sample(cmrf,feats)			
			task.sample_seq = seq
			task.sample_seq_energy = energies
			task.sample_time = t.elapsed			

			# Now run the toy simulation`
			with benchmark(task.name+'pareto') as t : 
				task.frontier,task.frontier_energy = \
					pareto_frontier(cmrf,feats)		
			if self.plot_all :
				task.plot_frontier(frontier_only = True,plot_samples=True)
			task.pareto_time = t.elapsed
			self.tasklist.append(task)	
示例#23
0
    def randfeatsuntied(self):
        """ Run many iterations of toy with random probs  """
        self.tasklist = []
        feats = self.get_feats_standard()

        # Repeat for all the tasks described
        for taskid in range(self.ntimes):
            hmm = HMM()
            self._set_params_randprobsuntied(hmm)
            cmrf = CMRF(hmm)
            feats = self._gen_feats_random()
            task = Task('sim'+STUDY+'_'+self.name+'_'+str(taskid),cmrf,\
             feats)
            # Run Brute force to enumerate the frontier
            with benchmark(task.name + 'brute') as t:
                seq, energies = self.bruteforce(cmrf, feats)
            task.all_seq = seq
            task.all_seq_energy = energies
            task.brute_time = t.elapsed

            # Now run the toy simulation`
            with benchmark(task.name + 'pareto') as t:
                task.frontier,task.frontier_energy = \
                 pareto_frontier(cmrf,feats)
            if self.plot_all:
                task.plot_frontier()
            task.pareto_time = t.elapsed
            self.tasklist.append(task)
示例#24
0
def test_3():
    print(
        '------------ Test 3: 4x4 Colored Maze with Noisy Sensor. ----------------'
    )
    path = [(0, 0), (0, 1), (0, 2), (0, 3), (1, 3), (2, 3), (3, 3), (3, 2),
            (3, 1), (3, 0), (2, 0), (1, 0)]
    robot_problem = RobotProblem('maze1.maz', deterministic_sensor=False)
    ground_truth = robot_problem.get_ground_truth(path)
    print('Path: ',
          ' -> '.join(['(%s, %s)' % (state[0], state[1]) for state in path]))
    print('Ground Truth: ',
          ' -> '.join([robot_problem.color_map[i] for i in ground_truth]))
    hmm = HMM(robot_problem)
    solution = hmm.forward_backward(ground_truth)
    print(solution)
    print('-------- Path Animation ----------')
    robot_problem.animate_path(path, solution)
示例#25
0
def trainGestureModel():

    #if want to pre-process data
    #beat3Obs, beat4Obs, circleObs, eightObs, infObs, waveObs = preprocessTrainingData()

    #number of hidden states N
    n_states = 10

    #number of observation types M
    n_obs = 30

    #instantiate variables
    pi = (1.0 / n_states) * np.ones((n_states, 1))

    #A and B matrix
    A = np.random.rand(n_states,n_states)
    A = A / A.sum(axis=1)[:, None]

    B = np.random.rand(n_obs,n_states)
    B = B / B.sum(axis=1)[:, None]

    #Get the probability of observations
    gestureNames = np.array(['beat3','beat4','circle','eight','inf','wave'],dtype='object')
    HMMModels = np.empty((6,7),dtype='object')
    #iterate through the list of gestures
    for gesture in range(0,gestureNames.shape[0]):
        gestureName = gestureNames[gesture]
        #load the data for the type of gesture
        observationDataFileName = "".join((gestureName,"Obs.pickle"))
        with open(observationDataFileName, 'rb') as handle:
            observationSequences = pickle.load(handle)
        #Generate the trained HMM model for the correct gesture
        for j in range(0,len(observationSequences)):
            hmmModelOfGesture = HMM(n_states, n_obs, pi, A, B)
            observationSequence = observationSequences[j]
            hmmModelOfGesture.baum_welch(observationSequence, max_iter=3)
            #Add the model to the list of models
            HMMModels[gesture,j] = hmmModelOfGesture

    with open('HMMModels.pickle', 'wb') as handle:
        pickle.dump(HMMModels, handle, protocol=pickle.HIGHEST_PROTOCOL)

    return HMMModels
示例#26
0
文件: UI.py 项目: thanhtcptit/POS-Tag
class main_window():

    def __init__(self):
        X_train, Y_train, _, _ = get_train_test_data(split=0.01)
        self.hmm = HMM()
        self.hmm.train(X_train, Y_train)

        self.top = Tk()
        self.top.title('POS Tagging')
        self.top.geometry('500x350')
        self.process_btn = Button(self.top, text='Process', command=self.process_btn_click)
        self.process_btn.place(x=400, y=300)

        self.input_label = Label(self.top, text='Input:  ')
        self.input_label.place(x=20, y=10)
        self.input_text = Text(self.top, width=65, height=7)
        self.input_text.place(x=20, y=35)

        self.result_label = Label(self.top, text='Result:  ')
        self.result_label.place(x=20, y=150)
        self.result_text = Text(self.top, width=65, height=8)
        self.result_text.place(x=20, y=170)
        self.result_text.config(state=DISABLED)

        self.top.mainloop()

    def process_btn_click(self):
        content = self.input_text.get('1.0', END)
        content = content.replace('\n', '').replace(',', ' ,')
        if content.find('.') != -1:
            content = content.split('.')[:-1]
            result = []
            for l in content:
                l = l.strip() + ' .'
                tags = self.hmm.viterbi_bigram(l)
                result += [c + '/' + t for c, t in zip(l.split(' '), tags)]
        else:
            tags = self.hmm.viterbi_bigram(content)
            result = [c + '/' + t for c, t in zip(content.split(' '), tags)]
        self.result_text.config(state=NORMAL)
        self.result_text.delete('1.0', END)
        self.result_text.insert('1.0', result)
        self.result_text.config(state=DISABLED)
def main ():

    estimator = HMM()
#     estimator = NB.learning_model
    labeled_symbols, labeled_sequence, codes, labels = estimator.get_data('I:\\eclipse_workspace\\CharacterRecognition\\teams_dataset')
#     data_x, data_y = NB.get_data()
    
    
    title = "Learning Curves (HMM)"
    # Cross validation with 100 iterations to get smoother mean test and train
    # score curves, each time with 20% data randomly selected as a validation set.
#     cv = cross_validation.ShuffleSplit(len(data_y), n_iter=50,
#                                        test_size=0.2, random_state=0)
    
    
    plot_learning_curve(estimator, title, X=(labeled_symbols, labeled_sequence, codes, labels), custom='yes')
#     plot_learning_curve(estimator, title, data_x, data_y, cv=cv)
    
    plt.show()
示例#28
0
    def __init__(self, entry='train'):
        self.data_map_path = os.path.join('models', 'HMM_data.pkl')
        self.model_config_path = os.path.join('models', 'HMM_config.yml')
        self.model_param_path = os.path.join('models', 'HMM_model_params.pkl')
        self.load_config(
        )  # self.embedding_dim, self.hidden_dim, self.batch_size, self.drop_out, self.tags
        if entry == 'train':
            self.train_manager = DataManager(data_type='train',
                                             tags=self.tags,
                                             model_name='HMM')
            data_map = {
                "word_to_ix_size":
                self.train_manager.word_to_ix_size,  # word_to_ix的长度,初始化HMM模型
                "tag_to_ix_size":
                self.train_manager.tag_to_ix_size,  # tag_to_ix的长度,初始化HMM模型
                "word_to_ix": self.train_manager.word_to_ix,
                "tag_to_ix": self.train_manager.tag_to_ix,
                "ix_to_word": self.train_manager.ix_to_word,
                "ix_to_tag": self.train_manager.ix_to_tag,
            }
            self.save_data_map(data_map)
            self.dev_manager = DataManager(data_type='dev',
                                           data_map_path=self.data_map_path,
                                           model_name='HMM')

            self.model = HMM(
                hidden_state_num=self.train_manager.tag_to_ix_size,
                observable_state_num=self.train_manager.word_to_ix_size)

            self.save_model()
            # self.restore_model()
        elif entry == 'test':
            self.train_manager = DataManager(tags=self.tags,
                                             data_type='train',
                                             model_name='HMM')
            self.dev_manager = DataManager(data_type='dev',
                                           data_map_path=self.data_map_path,
                                           model_name='HMM')
            self.model = HMM(
                hidden_state_num=self.train_manager.tag_to_ix_size,
                observable_state_num=self.train_manager.word_to_ix_size)
            self.restore_model()
示例#29
0
    def test(self, sent_set):
        """
        Use a Hidden Markov Model to tag a set of sentences, and evaluate accuracy.
        
        :param sent_set: tuple like (untagged sentences, gold standard sentences)
        """

        untagged_sents = sent_set[0]  # recover untagged sentences
        gold_tagged_sents = sent_set[
            1]  # recover gold standard tagged sentences

        # initialize an HMM object with necessary parameters
        self.hmm = HMM(untagged_sents, self.pos_tags, self.words_given_pos, \
            self.words_given_pos_upper, self.pos2_given_pos1, Tagger.start_tag)

        # get HMM-tagged sentences
        hmm_tagged_sents = self.hmm.tag()

        # evaluate against gold standard and return accuracy data
        return self.evaluate(hmm_tagged_sents, gold_tagged_sents)
示例#30
0
def program1(phones):
    path = r"C:\Users\Nicole Schwartz\Anaconda3\seniorProject\new\darpa-timit-acousticphonetic-continuous-speech\data\\"

    gmms = GMMs(phones, path)
    start = timeit.default_timer()
    gmms.train()
    stop = timeit.default_timer()
    elapsed = stop - start
    print("GMM training time: " + str(int(elapsed) / 60) + "m  " +
          str(int(elapsed) % 60) + "s")
    start = timeit.default_timer()
    accuracyGMM = gmms.test()
    stop = timeit.default_timer()
    elapsed = stop - start
    print("GMM testing time: " + str(int(elapsed) / 60) + "m  " +
          str(int(elapsed) % 60) + "s")
    print("Accuracy of GMMs alone= ", round(accuracyGMM * 100, 3))
    hmm = HMM(phones, gmms.models, path)
    start = timeit.default_timer()
    hmm.train(400)
    stop = timeit.default_timer()
    elapsed = stop - start
    print("HMM training time: " + str(int(elapsed) / 60) + "m  " +
          str(int(elapsed) % 60) + "s")
    start = timeit.default_timer()
    hmm.test()
    stop = timeit.default_timer()
    elapsed = stop - start
    print("HMM testing time: " + str(int(elapsed) / 60) + "m  " +
          str(int(elapsed) % 60) + "s")
示例#31
0
文件: UI.py 项目: thanhtcptit/POS-Tag
    def __init__(self):
        X_train, Y_train, _, _ = get_train_test_data(split=0.01)
        self.hmm = HMM()
        self.hmm.train(X_train, Y_train)

        self.top = Tk()
        self.top.title('POS Tagging')
        self.top.geometry('500x350')
        self.process_btn = Button(self.top, text='Process', command=self.process_btn_click)
        self.process_btn.place(x=400, y=300)

        self.input_label = Label(self.top, text='Input:  ')
        self.input_label.place(x=20, y=10)
        self.input_text = Text(self.top, width=65, height=7)
        self.input_text.place(x=20, y=35)

        self.result_label = Label(self.top, text='Result:  ')
        self.result_label.place(x=20, y=150)
        self.result_text = Text(self.top, width=65, height=8)
        self.result_text.place(x=20, y=170)
        self.result_text.config(state=DISABLED)

        self.top.mainloop()
def initializeHMM(numStates=numStates, possibleObservations=numCats):

  print "Initializing HMM..."
  hmm = HMM(numStates=numStates, numCats=possibleObservations)
  hmm.pi = np.random.rand(numStates)
  hmm.pi /= sum(hmm.pi)

  hmm.A = np.random.rand(numStates, numStates)
  A_row_sums = hmm.A.sum(axis=1)
  hmm.A /= A_row_sums[:, np.newaxis]

  hmm.B = np.random.rand(numStates, numCats)
  B_row_sums = hmm.B.sum(axis=1)
  hmm.B /= B_row_sums[:, np.newaxis]

  print "Initial HMM stats"
  print "A: ",
  print hmm.A
  print "B: ",
  print hmm.B
  print "pi: ",
  print hmm.pi

  return hmm
示例#33
0
 def test(self, sent_set):
     """
     Use a Hidden Markov Model to tag a set of sentences, and evaluate accuracy.
     
     :param sent_set: tuple like (untagged sentences, gold standard sentences)
     """
     
     untagged_sents = sent_set[0] # recover untagged sentences
     gold_tagged_sents = sent_set[1] # recover gold standard tagged sentences
     
     # initialize an HMM object with necessary parameters
     self.hmm = HMM(untagged_sents, self.pos_tags, self.words_given_pos, \
         self.words_given_pos_upper, self.pos2_given_pos1, Tagger.start_tag)
     
     # get HMM-tagged sentences
     hmm_tagged_sents = self.hmm.tag()
     
     # evaluate against gold standard and return accuracy data
     return self.evaluate(hmm_tagged_sents, gold_tagged_sents)
示例#34
0
文件: Tagger.py 项目: romrell4/470-AI
class Tagger:
    def __init__(self):
        self.hmm = HMM()
        self.tests = {}

    def train(self, fileName):
        self.hmm.train(fileName)

    def test(self, fileName):
        data = open(fileName).read().split()
        observations = []
        partsofspeech = []
        for i in range(len(data)):
            tag = data[i].split("_")
            observations.append(tag[WORD].lower())
            partsofspeech.append(tag[POS])
        self.tests[fileName] = {}
        self.tests[fileName]["text"] = observations
        self.tests[fileName]["pos_real"] = partsofspeech
        self.tests[fileName]["table"] = self.viterbi_table(observations)
        (self.tests[fileName]["prob"], self.tests[fileName]["pos_guess"]) = \
        self.optimal_tags(self.tests[fileName]["table"])
        self.tests[fileName]["accuracy"] = self.accuracy(fileName)

    def viterbi_table(self, observations):
        vt = []
        states = self.hmm.states()
        #print str(observations)
        for index in range(0, len(observations)):
            vt.append({})
            #print str(observations[t])
            for st in states:
                emis_prob = self.hmm.emission_probability(st, observations[index])
                # print "emis_prob " + str(st) + " " + str(observations[t]) + " = " + str(emis_prob)
                if index == 0:
                    (max_prob, best_state) = (self.hmm.start_probability(st), None)
                else:
                    (max_prob, best_state) = self.get_best_prob_and_state(index, st, vt)
                    # print max_prob, best_state
                    # exit()

                vt[index][st] = {"prob": max_prob + emis_prob, "prev": best_state}
        return vt

    def get_best_prob_and_state(self, index, st, vt):
        states = self.hmm.states()
        best_state = None
        max_prob = -float("inf")
        for prev_st in states:
            prevProb = vt[index - 1][prev_st]["prob"]
            transProb = self.hmm.transition_probability(prev_st, st)
            product = prevProb + transProb
            if max_prob < product:
                max_prob = product
                best_state = prev_st
        #print str((max_prob, best_state))
        return (max_prob, best_state)

    def print_table(self, fileName):
        vt = self.tests[fileName]["table"]
        obs = self.tests[fileName]["text"]
        # Print a table of steps from dictionary
        print "   ",
        for i in range(len(vt)):
            print str(obs[i].ljust(7)),
        print ""
        for state in vt[0]:
            print "%s:" % state,
            for v in vt:
                print "%.7s" % ("%f" % v[state]["prob"]),
            print ""

    def optimal_tags(self, vt):
        opt = []
        # The highest probability
        max_prob = -float("inf")
        previous = None
        for st, value in vt[-1].items():
            if value["prob"] > max_prob:
                max_prob = value["prob"]
                previous = st

        opt.append(previous)

        # Follow the backtrack till the first observation
        for t in range(len(vt) - 2, -1, -1):
            opt.insert(0, vt[t + 1][previous]["prev"])
            previous = vt[t][previous]["prev"]
        return (max_prob, opt)

    def accuracy(self, fileName):
        acc = { "matrix" : {}, "errors" : 0 }
        sts = self.hmm.sts.keys()
        obs = self.tests[fileName]["text"]
        real = self.tests[fileName]["pos_real"]
        guess = self.tests[fileName]["pos_guess"]
        matches = 0

        for i in range(len(sts)):
            acc["matrix"][sts[i]] = {}
            for j in range(len(sts)):
                acc["matrix"][sts[i]][sts[j]] = 0

        for i in range(len(obs)):
            acc["matrix"][real[i]][guess[i]] += 1

            if real[i] == guess[i]:
                matches += 1

        acc["value"] = matches / float(len(obs))


        return acc

    def output(self, fileName):
        opt = self.tests[fileName]["pos_guess"]
        max_prob = self.tests[fileName]["prob"]
        accuracy = self.tests[fileName]["accuracy"]["value"]
        print 'The steps of states are ' + ' '.join(opt) + \
              ' with highest probability of E%s' % max_prob
        print fileName + ' tagged with %s accuracy' % accuracy

    def print_confusion_matrix(self, fileName):
        confusion_matrix = tagger.tests[testFile]["accuracy"]["matrix"]
        keys = confusion_matrix.keys()

        print "     ",
        for key in keys:
            print str(key.rjust(5)),
        print ""
        for pos in confusion_matrix:
            print pos.ljust(5),
            for pos2 in confusion_matrix[pos]:
                print str(confusion_matrix[pos][pos2]).rjust(5),
            print ""

    def export_confusion_matrix_to_csv(self, fileName):
        confusion_matrix = tagger.tests[testFile]["accuracy"]["matrix"]
        keys = confusion_matrix.keys()

        result = ","
        for key in keys:
            result += key + ","
        result += "\n"
        for pos in confusion_matrix:
            result += pos + ","
            for pos2 in confusion_matrix[pos]:
                result += str(confusion_matrix[pos][pos2]) + ","
            result += "\n"

        f = open("result.csv", "w")
        f.write(result)
        f.close()
示例#35
0
from HMM import HMM

initProb=[0.2,0.4,0.4]
transMatrix=[[0.5,0.2,0.3],
			 [0.3,0.5,0.2],
			 [0.2,0.3,0.5]]
genMatrix=[[0.5,0.5],
		   [0.4,0.6],
		   [0.7,0.3]]
hmm=HMM(initProb,transMatrix,genMatrix)
observe=[0,1,0]
prob1=hmm.forward_calc_prob(observe)
prob2=hmm.backward_calc_prob(observe)
path=hmm.viterbi_decoding(observe)
print(prob1,prob2)
print(path)
示例#36
0
from HMM import HMM

pi = [0.5, 0.5]
pi_linha = [0.0, 1.0]
A = [[0.5, 0.5], [0.5, 0.5]]
B = [[0.8, 0.2], [0.3, 0.7]]

hmm = HMM(A, B, pi)
hmm_linha = HMM(A, B, pi_linha)

observacoes = [0,0]
print "O|M: ", hmm.probabilidade_observacoes(observacoes)
print "O|M': ", hmm_linha.probabilidade_observacoes(observacoes)

print "T = 1"
estados = [0]
observacoes = [0]
numerador = hmm.probabilidade_estados_observacoes(estados, observacoes)
def main():
    global leave_trace, list_traces, pomdp
    if r_visual_granularity > wall_thickness:
        print "PARAMETER ERROR: r_visual_granularity exceeds wall_thickness!"
        print "This can cause wall detection errors!"
    if r_init_x < wall_thickness or r_init_y < wall_thickness:
        print "PARAMETER ERROR: starting position overlaps wall!"
        print "Check r_init_x|y_topleft and wall_thickness"
    pygame.init()  # also calls display.init()
    startTime = time.time()
    caption = sim_version + " \tmode: teleoperation  "
    pygame.display.set_caption(caption + str(startTime))
    r_sprite = load_image(r_image)
    g_sprite = load_image("goal.bmp")
    background = load_image(back_image)

    # prepare simulation objects
    clock = pygame.time.Clock()
    screen.blit(background, (0, 0))
    goal = Goal(g_sprite)
    r = Robot(
        r_sprite,
        r_init_x,
        r_init_y,
        r_init_azi,
        r_init_fwd_speed,
        r_init_spin_speed,
        r_visual_range,
        r_visual_angle,
        goal,
        pomdp,
    )

    robotSprite = pygame.sprite.Group(r)
    goalSprite = pygame.sprite.Group(goal)

    # display the environment once, right before event loop

    count = -1
    for ob in list_obstacles:
        count = count + 1
        s = pygame.display.get_surface()
        s.fill(ob.color, list_rect_obstacles[count])
    r.draw_rays(screen)
    r.showGoalPath(screen)
    pygame.display.flip()

    # recording time
    going = True
    time_down = 0.0
    time_elapsed = 0.0
    T1 = -1
    T2 = -1

    # HMM initalize
    hmm = HMM()
    hmm.pi = np.array([0.5, 0.5])
    hmm.A = np.array([[0.5, 0.5], [0.5, 0.5]])
    hmm.B = np.array([[0.3, 0.7], [0.99, 0.01]])

    while going:
        clock.tick(fps)  # at most that many fps
        time_elapsed = 0.0
        # Event loop################################
        global user_inputF, user_inputR, totalUserInput, o, R, numTrials
        for event in pygame.event.get():
            if event == QUIT:
                going = False
            elif event.type == KEYDOWN:
                if event.key == K_ESCAPE:
                    going = False

                elif event.key == K_w:
                    T1 = 1
                    r.direction = "w"
                    time_down = pygame.time.get_ticks()
                elif event.key == K_d:
                    T2 = 1
                    r.direction = "d"
                    time_down = pygame.time.get_ticks()
                elif event.key == K_a:
                    T2 = 1
                    r.direction = "a"
                    time_down = pygame.time.get_ticks()
                elif event.key == K_s:
                    T1 = 1
                    r.direction = "s"
                    time_down = pygame.time.get_ticks()

                if event.key == K_SPACE:
                    r.opmode = 0  # teleop mode
                    caption = sim_version + " \tmode: teleoperation  "
                if event.key == K_1:
                    r.opmode = 1  # autonomous navigation mode
                    caption = sim_version + " \tmode: autonomous  "
                if event.key == K_2:
                    r.opmode = 2  # autonomous navigation mode
                    caption = sim_version + " \tmode: assist  "
                if event.key == K_3:
                    r.opmode = 3  # autonomous navigation mode
                    caption = sim_version + " \tmode: pomdp  "
                if event.key == K_4:
                    r.opmode = 4  # autonomous navigation mode
                    caption = sim_version + " \tmode: hmm  "

            elif event.type == KEYUP:

                if event.key == K_w:
                    time_elapsed = pygame.time.get_ticks() - time_down
                    T1 = -1
                    r.direction = "N"
                elif event.key == K_d:
                    time_elapsed = pygame.time.get_ticks() - time_down
                    T2 = -1
                    r.direction = "N"
                elif event.key == K_a:
                    time_elapsed = pygame.time.get_ticks() - time_down
                    T2 = -1
                    r.direction = "N"
                elif event.key == K_s:
                    time_elapsed = pygame.time.get_ticks() - time_down
                    T1 = -1
                    r.direction = "N"

        totalUserInput += time_elapsed / 1000.0

        user_inputF += T1 * 0.1

        if user_inputF > 1.0:
            user_inputF = 1.0
        elif user_inputF < 0.0:
            user_inputF = 0.0

        user_inputR += T2 * 0.1

        if user_inputR > 1.0:
            user_inputR = 1.0
        elif user_inputR < 0.0:
            user_inputR = 0.0

        pygame.display.set_caption(caption)

        # slow down when stopped moving
        if r.speed > 0.0:
            r.speed -= 0.5
            if r.speed < 0.5:
                r.speed = 0.0
        elif r.speed < 0.0:
            r.speed += 0.5
            if r.speed > 0.5:
                r.speed = 0.0

        # Find if goal reached
        if pygame.sprite.spritecollide(r, goalSprite, False) != []:
            print "You made it to the goal"
            print r.numCollision
            R = R + 1

            if len(list_obstacles) > 30:
                mapLevel = 3
            else:
                mapLevel = 0

            if r.numCollision > 5:
                observation = 0 + mapLevel
            elif r.numCollision > 0:
                observation = 1 + mapLevel
            else:
                observation = 2 + mapLevel
            r.pomdp.update_belief(r.action, observation)
            r.action = r.pomdp.get_best_action()[0]
            print "Most likely state: " + r.pomdp.pomdpenv.states[np.argmax(r.pomdp.belief)]
            print "Observation: " + pomdp.pomdpenv.observations[observation]
            print "Action: " + pomdp.pomdpenv.actions[r.action]
            prevOb = observation
            print r.pomdp.belief

            r.numCollision = 0

            #            if R == numTrials:
            #                hmm.train(o,0.0001,graphics=False)
            #                print 'probabilities\n',hmm.pi
            #                print 'state transition probabililities\n',hmm.A
            #                print 'observation probabililities\n',hmm.B
            #                R = 0
            #                o = np.zeros(numTrials)
            goal.getNew()
            startTime = time.time()

        robotSprite.update()
        goalSprite.update()
        screen.blit(background, (0, 0))  # redraws the entire bkgrnd.
        # screen.fill((255,255,255)) # white background
        # screen.blit(red_block, (100,100))
        count = -1
        for ob in list_obstacles:
            count = count + 1
            s = pygame.display.get_surface()
            s.fill(ob.color, list_rect_obstacles[count])
            ob.detected = False
            ob.dist = 1000.0
            ob.xCollide = 0.0
            ob.yCollide = 0.0
        r.draw_rays(screen)
        r.showGoalPath(screen)

        robotSprite.draw(screen)
        goalSprite.draw(screen)

        # pygame.display.update()
        pygame.display.flip()  # all changes are drawn at once (double buffer)
        # pygame.time.delay(100)
    pygame.quit()  # also calls display.quit()
    f = open("results", "w")

    f.write(str(Distance) + ",")
    f.write(str(totalUserInput) + ",")
    f.write(str(minObsDist))
示例#38
0
class Tagger:
    """
    A class for POS-tagging text and evaluating the result
    """
    
    ######### CLASS VARIABLES #########
    
    # a fake START tag to add to the beginning of sentences to help with tagging
    start_tag = '^'
    
    # number of times for a POS tagging mistake to occur in order to show it to user
    mistake_threshold = 50
    
    # x-fold cross-validation
    test_cycles = 2
    
    def __init__(self, corpus_path, corpus_files, test_files):
        """
        Construct a Tagger object
        
        :param corpus_path: path to corpus files
        :param corpus_files: list of corpus files
        """
        
        # object for working with training data
        self.training = Treebank(corpus_path, corpus_files)

        # object for working with testing data
        self.testing = Treebank(corpus_path, test_files)

        # will contain a list of tags in training corpus
        self.pos_tags = False 
        
        # will be object for running the Hidden Markov Model for tagging
        self.hmm = False
        
        # use PennTags
        self.tags = PennTags
        
        # will hold conditional frequency distribution for P(Wi|Ck)
        self.words_given_pos = False
        
        # will hold conditional frequency distribution for P(Ci+1|Ci) 
        self.pos2_given_pos1 = False
    
    
    ######### `PUBLIC' FUNCTIONS #########
    
    def run_test_cycles(self):
        """
        Run the test cycles for training and testing the tagger.
        Specifically, employ ten-fold cross-validation to train/test on different
        segments of the corpus.
        """
        
        total_time_start = time.time() # keep track of time
        rights = [] # array to hold number of correctly-tagged words for each test
        wrongs = [] # array to hold number of incorrectly-tagged words for each test
        totals = [] # array to hold number of total words for each test
        all_missed = [] # array to hold incorrect tag information for each test
        sep = ''.join(["-" for i in range(50)]) + "\n" # logging s

        # returns tagged sentences
        training_sents = self.training.tagged_sents

        self.train(training_sents)

        # returns untagged sentences
        testing_tagged_sents = self.testing.tagged_sents

        testing_untagged_sents = self.testing.sents

        testing_sents = (testing_untagged_sents, testing_tagged_sents)


        (right, wrong, missed) = self.test(testing_sents)

        # gather accuracy statistics for this test
        total = right + wrong
        rights.append(right) # store the correct count for this test cycle
        wrongs.append(wrong) # store the incorrect count for this test cycle
        totals.append(total) # store the total words tested for this test cycle
        all_missed += missed # add incorrect tag information from this cycle

        msg("Total words: %d\n" % total)
        msg("Correct tags: %d (%0.2f%%)\n" % (right, right / total * 100))
        msg("Incorrect tags: %d (%0.2f%%)\n" % (wrong, wrong / total * 100))

        msg("%s%s" % (sep,sep))
        
        # calculate and output statistics for the entire test
        print "Total tests run: %d" % len(totals)
        print "Total time taken: %0.2f seconds" % (time.time() - total_time_start)
        print "Average correct tags: %0.2f%%" % (sum(rights) / sum(totals) * 100)
        print "Average incorrect tags: %0.2f%%" % (sum(wrongs) / sum(totals) * 100)
        print
        
        # give the option of inspecting incorrect tags
        if raw_input("Examine bad tags? ") in ['y','Y']:
            self.inspect(all_missed)
            
    def train(self, sents):
        """
        Train the tagger on a set of tagged sentences
        
        :param sents: list of tagged sentences
        """
        
        # collect POS tags from our corpus
        self.pos_tags = self.training.pos_tags()
        
        # add start markers to help with bigram tagging
        msg("Adjusting POS tags...")
        sents = self._adjust_pos(sents)
        msg("done\n")
        
        # create 2 conditional frequency distributions (from the NLTK) that store
        # observed probabilities that a given word has a certain POS, one for
        # lowercase-normalized words and one for words as they appear in the text
        msg("Training (Wi|Ck)...")
        
        # create a CFD for words normalized to lowercase
        self.words_given_pos = ConditionalFreqDist((wp[1], wp[0].lower()) for \
            sent in sents for wp in sent)
            
        # create a CFD for words left in their original capitalization
        self.words_given_pos_upper = ConditionalFreqDist((wp[1], wp[0]) for \
            sent in sents for wp in sent)
        msg("done\n")
        
        # create another CFD that stores probabilities that stores observed
        # probabilities that one POS follows another POS
        msg("Training (Ci+1|Ci)...")
        self.pos2_given_pos1 = ConditionalFreqDist((sent[i-1][1], sent[i][1]) for \
            sent in sents for i in range(1,len(sent)))

        msg("done\n")
        
    def test(self, sent_set):
        """
        Use a Hidden Markov Model to tag a set of sentences, and evaluate accuracy.
        
        :param sent_set: tuple like (untagged sentences, gold standard sentences)
        """
        
        untagged_sents = sent_set[0] # recover untagged sentences
        gold_tagged_sents = sent_set[1] # recover gold standard tagged sentences
        
        # initialize an HMM object with necessary parameters
        self.hmm = HMM(untagged_sents, self.pos_tags, self.words_given_pos, \
            self.words_given_pos_upper, self.pos2_given_pos1, Tagger.start_tag)
        
        # get HMM-tagged sentences
        hmm_tagged_sents = self.hmm.tag()
        
        # evaluate against gold standard and return accuracy data
        return self.evaluate(hmm_tagged_sents, gold_tagged_sents)
        
    def evaluate(self, hmm_tagged_sents, gold_tagged_sents):
        """
        Evaluate one set of tagged sentences against another set
        
        :param hmm_tagged_sents: list of tagged sentences
        :param gold_tagged_sents: list of tagged sentences used as gold standard
        """
        
        # ensure our sentence sets have the same length
        if len(hmm_tagged_sents) != len(gold_tagged_sents):
            raise Exception("HMM-tagged sentence set did not match gold \
                standard sentence set!")
        
        right = 0 # initialize counter of correct tags
        wrong = 0 # initialize counter of incorrect tags
        missed = [] # initialize array of tagged words we didn't get right
        
        # loop through sentence sets
        for i in range(len(gold_tagged_sents)):
            
            # ensure our sentences have the same length
            if len(hmm_tagged_sents[i]) != len(gold_tagged_sents[i]):
                raise Exception("HMM-tagged sentence did not match gold \
                    standard sentence!")
                
            # loop through words in sentence
            for j in range(len(gold_tagged_sents[i])):
                gold_tagged_word = gold_tagged_sents[i][j]
                hmm_tagged_word = hmm_tagged_sents[i][j]
                
                # ensure the words are the same between the sets
                if gold_tagged_word[0] != hmm_tagged_word[0]:
                    raise Exception("HMM-tagged word did not match gold \
                        standard word!")

                # increment counters based on tag correctness
                if gold_tagged_word[1] == hmm_tagged_word[1]:
                    right += 1
                else:
                    missed.append((hmm_tagged_word, gold_tagged_word, \
                        hmm_tagged_sents[i], gold_tagged_sents[i]))
                    wrong += 1
            # end words loop
        # end sentences loop
        
        # return a tuple of correct vs incorrect tags
        return (right, wrong, missed)
        
    def inspect(self, missed):
        """
        Inspect a testing session, and print data about tag accuracy
        
        :param missed: list of tuples of missed tags like:
            (hmm_tagged_word, gold_tagged_word, hmm_context, gold_context)
        """
        
        # create a CFD so we can examine a matrix of incorrect vs correct tags
        # ms[1][1] = tag of a gold_tagged_word
        # ms[0][1] = tag of an hmm_tagged_word
        cfd = ConditionalFreqDist((ms[1][1], ms[0][1]) for ms in missed)
        
        # initialize a hash to store mistakes by frequency
        mistakes = {}
        
        # print a table showing mistake frequency
        cfd.tabulate()
        msg("\n")
        
        # loop through mistake frequencies by gold standard tag, i.e., if we are
        # examining gold-standard 'IN', count what we incorrectly tagged it as
        conds = cfd.conditions()
        for g_tag in conds:
            for hmm_tag in cfd[g_tag].keys():
                # how many times did we incorrectly say g_tag was hmm_tag?
                count = cfd[g_tag][hmm_tag]
                
                # add these mistakes to the count
                if count not in mistakes.keys():
                    mistakes[count] = []
                mistakes[count].append((hmm_tag, g_tag))
                
        # get a list of all mistake types that occurred over a threshold, worst first
        mistake_counts = set([count for (count, mistake_set) in \
            mistakes.iteritems() if count > Tagger.mistake_threshold])
        mistake_counts = reversed(sorted(mistake_counts))
        
        # now create a list of mistake types to show the user, i.e., loop 
        # through all types and if they are of a high-frequency type, add to list
        mistakes_to_halt = []
        for count in mistake_counts:
            mistake_set = mistakes[count]
            for mistake_tuple in mistake_set:
                mistakes_to_halt.append(mistake_tuple)
                msg("%d\t%s\twas really\t%s\n" % (count, mistake_tuple[0], \
                    mistake_tuple[1]))
        msg("\n")
        
        # create separators used when outputting missed word contexts
        sep_big = "---------------------------------------------------\n"
        sep_small = "\n-----------------------------------------\n"
        
        # loop through individual mistakes and, if they match the kind of error
        # we want to halt for, show the user the mistake as well as the sentence
        # context for both the gold-standard sentence and the hmm-tagged sentence
        response = None
        for missed_set in missed:
            if response not in ['q','Q']:
                (hmm_tagged_word, gold_tagged_word, hmm_tagged_sent, \
                    gold_tagged_sent) = missed_set
                should_halt = False
                # determine whether the current mistake matches a mistake type
                # we want to halt for
                for pair in mistakes_to_halt:
                    if hmm_tagged_word[1] == pair[0] and \
                        gold_tagged_word[1] == pair[1]:
                        should_halt = True
                if should_halt:
                    msg("%sTagged '%s' with %s when it should have been %s.%s" %\
                    (sep_big, hmm_tagged_word[0], hmm_tagged_word[1],\
                        gold_tagged_word[1], sep_small))
                    
                    msg("Gold: " + (' '.join([(w[0] + "/" + w[1]) for w in \
                        gold_tagged_sent])))
                    msg(sep_small)
                    msg("Mine: " + (' '.join([(w[0] + "/" + w[1]) for w in \
                        hmm_tagged_sent])))
                    
                    # get user input to decide whether to keep going
                    response = raw_input("\n\nEnter to continue, Q to quit: ")

    ######### `PRIVATE' FUNCTIONS #########
    
    def _adjust_pos(self, sents):
        """
        Insert start markers (word and tag tuple) in each sentence of a list.
        Add any other tags that need adding
        
        :param sents: list of tagged sentences
        """
        
        new_sents = [] # initialize array of start-marked sentences
        
        # loop through tagged sentences
        for sent in sents:
            # add a new start-marked sentence to our array
            new_sents.append([(Tagger.start_tag, Tagger.start_tag)] + sent)
            
        # make sure our start marker tag gets added to the POS list
        self.pos_tags.append(Tagger.start_tag)
        
        # also take the opportunity to add other tags to the list
        # which we may not have encountered in testing
        for tag in self.tags.rare_tags:
            if tag not in self.pos_tags:
                self.pos_tags.append(tag)
        
        return new_sents
示例#39
0
#!/usr/bin/env python2.7

from HMM import HMM
import sys

# initialize HMM object, write resulting
# probability matrices to proper files

try:
    training_file = sys.argv[1]
except IndexError:
    exit("No file provided")

model = HMM(training_file)

with open('hw3b.a-matrix.txt', 'w') as tp_out:
    tp_out.write(HMM.format_matrix(model.tmatrix))
with open('hw3b.emission.txt', 'w') as ep_out:
    ep_out.write(HMM.format_matrix(model.ematrix))
示例#40
0
	def __init__(self,genes):
		self.hmm = HMM(genes)
示例#41
0
class Tagger:
	
	def __init__(self,genes):
		self.hmm = HMM(genes)
		
	# Calculates the emission probability of a given word tag pair
	def e(self,x,u):
		#x is the word y is the tag for the given word
		return self.hmm.e(x,u)
	
	def q(self,w,u,v):
		return self.hmm.trigram_prob((u,v,w))
																		
	# Find the argmax of all x's from the sentence input. We use the HMM.e(x|y) method to generate our list of values. 	
	def argmax(self,eminlist):
		return max(eminlist,key=lambda x : x[1])
		
	def calc(self,text,tags):
		wordTags = []
		
		# not very pythonic but works for now
		for tag in tags:
			wordTags.append((tag,self.e(text,tag)))
		return wordTags
	
	def k_val(self,k):
		if k in (-1,0): return ['*']
		else: return self.hmm.getTags()
	
	#Dynammic programming for the Viterbi algorithm
	def viterbi(self, sentence):
		n = len(sentence)
		
		x = [""]+sentence
		y = [""]*(n+1)
		
		pi = {}
		pi[0,'*',"*"] = 1
		back_pointers = {}
		
		for k in range(1,n+1):
			for u in self.k_val(k-1):
				for v in self.k_val(k):
					back_pointers[k,u,v],pi[k,u,v] = self.argmax([(tag,pi[k-1,tag,u] * self.q(v,tag,u)*self.e(x[k],v)) 
																                        for tag in self.k_val(k-2)])
							
		(y[n-1],y[n]),score = self.argmax([ ((u,v), pi[n,u,v] * self.q("STOP",u,v)) for u in self.k_val(n-1) for v in self.k_val(n)])
		
		for k in range((n-2),0,-1):
			y[k] = back_pointers[k+2,y[k+1],y[k+2]]
		y[0] = '*'
		tagScores = []
		for i in range(1,n):
			tagScores.append(pi[i,y[i-1],y[i]])
			
		return y[1:n+1],tagScores+[score]
		
	# reads in the gene data and returns each sentence of the document.
	def read_sent(self,data):
		sentence = []
		
		for word in data:
			if word.strip():
				sentence.append(word.strip())
			else:
				yield sentence
				sentence = []

	def print_tags(self,sentence,tags):
		print "\n".join([word+" "+tag for word,tag in zip(sentence,tags[0])])
示例#42
0
 def __init__(self):
     self.hmm = HMM()
示例#43
0
文件: main.py 项目: pombredanne/math
            for word in ls:
                new_word = word + '#'
                for letter in new_word:
                    if letter not in letters:
                        letters[letter] = 1
                if new_word not in words:
                    words[new_word] = 1
    return sorted(letters.keys()), sorted(words.keys())

### A  - transition probabilities
### B  - emission probailities
### Pi - initial state distribution
if __name__ == '__main__':
    if len(sys.argv) < 4 or len(sys.argv) > 5:
        print 'usage: HMM.py <input_file> <out_text> <out_png> [-v]'
        quit()
    input_file_name = sys.argv[1]
    output_file_name = sys.argv[2]
    out_pdf_name = sys.argv[3]
    if len(sys.argv) == 5:
        if sys.argv[4] != '-v':
            print 'usage: python HMM.py <input_file> <out_text> <out_png> [-v]'
            quit()
        VERBOSE_FLAG = 1
    letters, words = read_in(input_file_name)
    with open(output_file_name,'w') as out_text:
        myHMM = HMM(letters,words,STATES,out_text,out_pdf_name,VERBOSE_FLAG)
        myHMM.cycle(MAX_ITERS,MIN_CHANGE)
        myHMM.make_plot()

示例#44
0
文件: Tagger.py 项目: romrell4/470-AI
 def __init__(self):
     self.hmm = HMM()
     self.tests = {}
示例#45
0
def main():
    global leave_trace, list_traces,pomdp
    if r_visual_granularity > wall_thickness:
        print 'PARAMETER ERROR: r_visual_granularity exceeds wall_thickness!'
        print 'This can cause wall detection errors!'
    if r_init_x<wall_thickness or r_init_y<wall_thickness:
        print 'PARAMETER ERROR: starting position overlaps wall!'
        print 'Check r_init_x|y_topleft and wall_thickness'
    pygame.init()           #also calls display.init()   
    startTime = time.time()
    caption = (sim_version + ' \tmode: teleoperation  ' )
    pygame.display.set_caption(caption+ str(startTime))
    r_sprite = load_image(r_image)
    g_sprite = load_image('goal.bmp')
    background  = load_image(back_image)


    #prepare simulation objects
    clock = pygame.time.Clock()
    screen.blit(background, (0, 0))
    goal = Goal(g_sprite)
    r = Robot(r_sprite, r_init_x, r_init_y,r_init_azi, r_init_fwd_speed,\
              r_init_spin_speed, r_visual_range, r_visual_angle,goal,pomdp)

    robotSprite = pygame.sprite.Group(r)
    goalSprite = pygame.sprite.Group(goal)

    #display the environment once, right before event loop
    
    count = -1
    for ob in list_obstacles:
        count = count + 1
        s = pygame.display.get_surface()
        s.fill(ob.color, list_rect_obstacles[count])
    r.draw_rays(screen) 
    r.showGoalPath(screen)
    pygame.display.flip() 
    

    going = True
    time_down = 0.0
    time_elapsed = 0.0
    T = -1
    
    #HMM initalize  
    hmm = HMM()
    hmm.pi = np.array([0.5, 0.5])
    hmm.A = np.array([[0.5, 0.5],[0.5, 0.5]])
    hmm.B = np.array([[0.3, 0.7],[0.99, 0.01]])
    
    
    while going:
        clock.tick(fps)      #at most that many fps
        time_elapsed = 0.0
        #Event loop################################
        global user_input, totalUserInput, o,R,numTrials
        for event in pygame.event.get():
            if event == QUIT:
                going = False
            elif event.type == KEYDOWN:
                if event.key == K_ESCAPE:
                    going = False
                    
                    
                    
                elif event.key == K_w:
                    T = 1
                    r.direction = 'w'
                    time_down = pygame.time.get_ticks()
                elif event.key == K_d:
                    T = 1
                    r.direction = 'd'
                    time_down = pygame.time.get_ticks()
                elif event.key == K_a:
                    T = 1
                    r.direction = 'a'
                    time_down = pygame.time.get_ticks()
                elif event.key == K_s:
                    T = 1
                    r.direction = 's'
                    time_down = pygame.time.get_ticks()

  
                
                
                
                
               
                if event.key == K_SPACE:
                    r.opmode = 0            #teleop mode
                    caption = sim_version + ' \tmode: teleoperation  '
                if event.key == K_1:
                    r.opmode = 1            #autonomous navigation mode
                    caption = (sim_version + ' \tmode: autonomous  ')
                if event.key == K_2:
                    r.opmode = 2            #autonomous navigation mode
                    caption = (sim_version + ' \tmode: assist  ')
                if event.key == K_3:
                    r.opmode = 3            #autonomous navigation mode
                    caption = (sim_version + ' \tmode: pomdp  ')
                if event.key == K_4:
                    r.opmode = 4           #autonomous navigation mode
                    caption = (sim_version + ' \tmode: hmm  ')
                    
                                    
                    
                    
                if event.key == K_t:        #toggles the tracing mode
                    if leave_trace:
                        leave_trace = 0
                        list_traces = list()
                        print 'changing leave_trace from 1 to 0'
                    else:
                        leave_trace = 1
                        print 'changing leave_trace from 0 to 1'
                        
            
            elif event.type == KEYUP:
                
                if event.key == K_w:
                    time_elapsed = pygame.time.get_ticks() - time_down
                    T = -1
                    r.direction = 'N'
                elif event.key == K_d:
                    time_elapsed = pygame.time.get_ticks() - time_down
                    T = -1
                    r.direction = 'N'
                elif event.key == K_a:
                    time_elapsed = pygame.time.get_ticks() - time_down
                    T = -1
                    r.direction = 'N'
                elif event.key == K_s:
                    time_elapsed = pygame.time.get_ticks() - time_down
                    T = -1
                    r.direction = 'N'
                    
        totalUserInput += time_elapsed/1000.0
            

        
        user_input += T*0.1

        if user_input > 1.0:
            user_input = 1.0
        elif user_input < 0.0:
            user_input = 0.0

                
        pygame.display.set_caption(caption)
                        
        if r.speed > 0.0:
            r.speed -= 0.5
        elif r.speed < 0.0:
            r.speed += 0.5
            
        
            
        # Find if goal reached 
        if pygame.sprite.spritecollide(r, goalSprite, False) != []:    
            print 'You made it to the goal'
            R=R+1
            r.pomdp.update_belief(0,5)
            print o
            if R == numTrials:
                hmm.train(o,0.0001,graphics=False)
                print 'probabilities\n',hmm.pi
                print 'state transition probabililities\n',hmm.A
                print 'observation probabililities\n',hmm.B
                R = 0
                o = np.zeros(numTrials)
            goal.getNew()
            startTime = time.time()

  
        robotSprite.update()
        goalSprite.update()
        screen.blit(background, (0, 0))  #redraws the entire bkgrnd.
        #screen.fill((255,255,255)) # white background
        #screen.blit(red_block, (100,100))
        count = -1
        for ob in list_obstacles:
            count = count + 1
            s = pygame.display.get_surface()
            s.fill(ob.color, list_rect_obstacles[count])
        r.draw_rays(screen)
        r.showGoalPath(screen)
#       
        
        draw_traces(screen)
        robotSprite.draw(screen)
        goalSprite.draw(screen)
        

        
        #pygame.display.update()
        pygame.display.flip()   #all changes are drawn at once (double buffer)
        #pygame.time.delay(100)
    pygame.quit()               #also calls display.quit()
    f = open('results','w')

    f.write(str(Distance)+',')
    f.write(str(totalUserInput)+',')
    f.write(str(minObsDist))
示例#46
0
	def learn_hmm(self,seqlist) : 
		""" Learns hmm from seqlist"""
		hmm = HMM()
		hmm.length = self.length
		hmm.dims = [(2,1)]*hmm.length # (latent,emit) dimspace
		hmm.emit = [
			[[1.0],[1.0]]
		]*hmm.length
			
		hmm.seqmap = [{'a':0,'b':1}]*hmm.length
		hmm.seqmap2 = [{0:'a',1:'b'}]*hmm.length
		hmm.featmap = [{'H':0}]*hmm.length
		hmm.initprob = [0.5,0.5]
		hmm.trained = True
		hmm.alphabet = 'ab'	

		# Calculate HMM transition probabilities
		hmm.trans = [
			[[0.7,0.3],[0.3,0.7]]
		]*hmm.length
		
		counts,counts2 = [],[]
		for i in range(len(seqlist[0])) :
			counts.append({})
			counts2.append({})

		for i,seq in enumerate(seqlist) : 
			for j,aa in enumerate(seq) : 
				counts[j][aa] = counts[j].get(aa,0) + self.k - i

		for i,seq in enumerate(seqlist) : 
			for j,aa in enumerate(seq[:-1]) : 
				counts2[j][seq[j:j+2]] = counts2[j].get(seq[j:j+2],0) + self.k - i
		
		hmm.trans = []

		for i in range(len(seqlist[0])-1) :
			hmm.trans.append([])
			for j,aa1 in enumerate(hmm.alphabet) : 
				hmm.trans[-1].append([])
				for k,aa2 in enumerate(hmm.alphabet) :
					val = (counts2[i].get(aa1+aa2,0)+self.smoothfac) / (counts[i].get(aa1,0)+self.smoothfac*len(hmm.alphabet))
					hmm.trans[-1][-1].append(val)
		return hmm
示例#47
0
from HMM import HMM

pi = [0.5, 0.5]
A = [[0.5, 0.5], [0.5, 0.5]]
B = [[0.8, 0.2], [0.3, 0.7]]

hmm = HMM(A, B, pi)

observations_head = [0,0,0,1,1,0,0]
print "Head: ", hmm.probabilidade_observacoes(observations_head)

observations_tail = [0,0,0,1,1,0,1]
print "Tail: ", hmm.probabilidade_observacoes(observations_tail)
示例#48
0
from HMM import HMM

pi = [0.34, 0.33, 0.33]

A = [[0.34, 0.33, 0.33], [1.0, 0.0, 0.0], [1.0, 0.0, 0.0]]

B = [[0.34, 0.33, 0.33], [0.4, 0.6, 0.0], [0.0, 0.6, 0.4]]

O = [1, 1, 2, 2, 1, 0, 1, 2, 2, 0]

hmm = HMM(A, B, pi)
print hmm.viterbi(O)
app_train_list = []
app_test_list1 = []
app_test_list2 = []

for channel in combined.columns:
    app_train_list.append(Appliance(channel,train_set[[channel]]))
    app_test_list1.append(Appliance(channel,test_set1[[channel]]))
    app_test_list2.append(Appliance(channel,test_set2[[channel]]))

num_states_dict={}
ModelDict = {}

for i,app in enumerate(app_train_list):
    X_train = create_matrix(app,good_chunks = True)
    X_test = create_matrix(app_test_list1[i],good_chunks = False)
    hmm = HMM(X_train,X_test)
    print app.name
    hmm.fit_HMM(perc_std_expl)
    ModelDict[app.name] = hmm.model
    num_states_dict[app.name] = hmm.n_states

fhmm = FHMM()
fhmm.train(app_train_list,num_states_dict = num_states_dict)
predictions = pd.DataFrame()
predictions = fhmm.disaggregate(test_set2[['total']], predictions)

total_power_predicted = predictions.sum()
total_power_act = test_set2[predictions.columns].sum()

print "Percent stand.dev.explained, 1 min:", perc_std_expl_full(predictions,test_set2)
print "R2, 1 min:" , r2_full(predictions,test_set2)