Exemplo n.º 1
0
    def test_dishonest_casino_larger_transition_p(self):
        '''Dishonest Casino Example.'''
        # Create transition probability matrix
        A = np.array([[0.9, 0.1],
                      [0.1, 0.9]])
        # Create observable probability distribution matrix. Casino biased toward "6" in state "1"
        B = statutil.scale_row_sums(np.array([[ 1.0, 1.0, 1.0, 1.0, 1.0, 1.0 ],
                                              [ 1.0, 1.0, 1.0, 1.0, 1.0, 5.0 ]]))
        # Create set of all observable symbols
        V = [1, 2, 3, 4, 5, 6]
    
        # Instantiate an HMM, note Pi is uniform probability distribution by default
        m = hmm.HMM(2, A=A, B=B, V=V)
        
        Obs = [ 1, 2, 3, 4, 5, 2, 1, 6, 6, 6, 5, 6 ]
        log_prob_Obs, Alpha, c = hmm.forward(m, Obs, scaling=1)
        assert_almost_equal(log_prob_Obs, -20.124, decimal=3, err_msg='Wrong observation probability')
        
        Q_star, _, _ = hmm.viterbi(m, Obs, scaling=1)
        assert_equal(Q_star, [0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1], err_msg='Wrong Viterbi path')

        Beta = hmm.backward(m, Obs, c)
        Gamma, Q_star = hmm.individually_optimal_states(Alpha, Beta)
        assert_almost_equal(Gamma,
                            [[0.8189770516168013, 0.8482906260695058, 0.8525027084764197, 0.8329611652077556, 0.7834127024175411, 0.6880018120129073, 0.5161970090643716, 0.2130207566284025, 0.12024202874950358, 0.10797060639721641, 0.15902649827833876, 0.14930464162738483], [0.18102294838319855, 0.15170937393049422, 0.14749729152358024, 0.16703883479224435, 0.21658729758245884, 0.31199818798709256, 0.4838029909356284, 0.7869792433715975, 0.8797579712504964, 0.8920293936027837, 0.8409735017216613, 0.8506953583726152]],
                            decimal=5, err_msg='Wrong state probabilities')        
        assert_equal(Q_star, [0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1], 'Wrong individually-optimal states')
Exemplo n.º 2
0
 def test_forward_backward(self):
     pid = self.acmod.mdef.phone_id('SIL')
     h1 = hmm.HMM(self.acmod.mdef.pid2sseq(pid),
                  self.acmod.tmat[self.acmod.mdef.pid2tmat(pid)])
     mfcc = s2mfc.open(os.path.join(self.testdir,
                                    'man.ah.111a.mfc')).getall()
     mfcc -= mfcc.mean(0)
     feat = _1s_c_d_dd.compute(mfcc)
     alpha = None
     self.alpha = []
     for f in feat[0:50]:
         senscr = self.acmod.senone_compute(h1.iter_senones(), f)
         alpha = hmm.forward_evaluate(h1, senscr, alpha)
         self.alpha.append(alpha)
     beta = None
     self.beta = []
     for f in feat[50:0:-1]:  # Note that this is time-shifted by
         # one from the forward pass above
         senscr = self.acmod.senone_compute(h1.iter_senones(), f)
         beta = hmm.backward_evaluate(h1, senscr, beta)
         self.beta.append(beta)
     self.beta.reverse()
     ll = 0
     for a, b in zip(self.alpha, self.beta):
         newll = sum(a * b)
         if ll != 0:
             self.assert_(abs(log(ll) - log(newll)) < 0.1)
         ll = newll
Exemplo n.º 3
0
def main(args):
    if(len(args) != 2):
        print "Error. main.py needs two arguments"
        print "Example: python main.py sequences.fasta initial_parameters.txt"
        exit()
    s = [1,2,3,4]
    stateMapper = {1:0.32, 2:1.75, 3:4.54, 4:9.40}
    pParser = parser.pparser()
    parameters = pParser.parse_Parameters(args[1])
    p = parameters[0]
    a = parameters[1]
    e = parameters[2]
    q = ['I', 'D']
    x = util.compareSequences(args[0])
    markovModel = hmm.HMM(False,s, q, a, e, p)
    
    newModel = algorithms.baum_welch_log(markovModel, [x][:], 10)
    fileHandler.outputEstimatedParameters(newModel, 'estimated_parameters.txt')
    likelihoods = [algorithms.forward_log(markovModel, x),algorithms.forward_log(newModel, x)]
    fileHandler.outputLikelihoods(likelihoods, 'likelihoods.txt')

    decodings_initial = algorithms.decodings(markovModel, x[:])
    fileHandler.outputDecodings(decodings_initial, 'decodings_initial.txt')
    
    decodings_estimated = algorithms.decodings(newModel, x[:])
    fileHandler.outputDecodings(decodings_estimated, 'decodings_estimated.txt')
Exemplo n.º 4
0
    def test_dishonest_casino(self):
        '''Dishonest Casino Example.'''
        # Create transition probability matrix
        A = np.array([[0.99, 0.01],
                      [0.01, 0.99]])
        # Create observable probability distribution matrix. Casino biased toward "6" in state "1".        
        B = statutil.scale_row_sums(np.array([[ 1.0, 1.0, 1.0, 1.0, 1.0, 1.0 ],
                                              [ 1.0, 1.0, 1.0, 1.0, 1.0, 5.0 ]]))
        # Create set of all observable symbols
        V = [1, 2, 3, 4, 5, 6]
    
        # Instantiate an HMM, note Pi is uniform probability distribution by default
        m = hmm.HMM(2, A=A, B=B, V=V)
        
        Obs = [ 1, 2, 3, 4, 5, 2, 1, 6, 6, 6, 5, 6 ]
        log_prob_Obs, Alpha, c = hmm.forward(m, Obs, scaling=1)
        assert_almost_equal(log_prob_Obs, -20.9468006, decimal=5, err_msg='Wrong observation probability')
        
        Q_star, _, _ = hmm.viterbi(m, Obs, scaling=1)
        assert_equal(Q_star, [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], 'Wrong Viterbi path')

        Beta = hmm.backward(m, Obs, c)
        Gamma, Q_star = hmm.individually_optimal_states(Alpha, Beta)
        assert_almost_equal(Gamma,
                            [[0.63711364302936, 0.6348934929050587, 0.6271179131667495, 0.6117100305977996, 0.5845543683193845, 0.5383975935172204, 0.46091113744414974, 0.3313982095474306, 0.28864618346708165, 0.27562909135388625, 0.27498372625848855, 0.26932891011973825], [0.36288635697064003, 0.3651065070949412, 0.3728820868332506, 0.38828996940220045, 0.4154456316806155, 0.4616024064827796, 0.5390888625558502, 0.6686017904525694, 0.7113538165329184, 0.7243709086461138, 0.7250162737415115, 0.7306710898802617]],
                            decimal=5, err_msg='Wrong state probabilities')        
        assert_equal(Q_star, [0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1], 'Wrong individually-optimal states')
Exemplo n.º 5
0
    def setUp(self):
        # 状态
        self.states = ('健康', '感冒')
        # 观测状态
        self.observations = ('正常', '发冷', '发烧')
        self.start_probability = {'健康': 0.6, '感冒': 0.4}
        self.transition_probability = {
            '健康': {
                '健康': 0.7,
                '感冒': 0.3
            },
            '感冒': {
                '健康': 0.4,
                '感冒': 0.6
            },
        }

        self.emission_probability = {
            '健康': {
                '正常': 0.5,
                '发冷': 0.4,
                '发烧': 0.1
            },
            '感冒': {
                '正常': 0.1,
                '发冷': 0.3,
                '发烧': 0.6
            },
        }

        self.states_label_index, self.states_index_label = generate_index_map(
            self.states)
        self.observations_label_index, self.observations_index_label = generate_index_map(
            self.observations)

        print("states_label_index", self.states_label_index)
        print("states_index_label", self.states_index_label)
        print("observations_label_index", self.observations_label_index)
        print("observations_index_label", self.observations_index_label)

        self.A = convert_map_to_matrix(self.transition_probability,
                                       self.states_label_index,
                                       self.states_label_index)

        print("A", self.A)

        self.B = convert_map_to_matrix(self.emission_probability,
                                       self.states_label_index,
                                       self.observations_label_index)

        print("B", self.B)

        self.pi = convert_map_to_vector(self.start_probability,
                                        self.states_label_index)

        print("Pi", self.pi)

        self.hmm = hmm.HMM(self.A, self.B, self.pi)
Exemplo n.º 6
0
 def init_model(self):
     '''
     initializes self.model with parameters self.n_obs_states,
     self.n_markov_states, self.ini_markov_state, self.ini_trans_matrix
     and self.ini_b
     '''
     self.model = hmm.HMM(n_states=self.n_markov_states, \
             Pi=self.ini_markov_state, V=np.arange(self.n_obs_states), \
             A=self.ini_trans_matrix, B=self.ini_b )
Exemplo n.º 7
0
def main():
    ocGrid = createGrid(100)
    dCube = createGrid(10)
    dcube = [[[1 for x in range(10)] for x in range(10)] for x in range(10)]
    #shiftCube(ocGrid,dCube,5)
    pi = np.array([0.5, 0.5])  # initial distribution
    a = np.array([[0.5, 0.5], [0.5, 0.5]])  # State transition matrix
    b = np.array([[0.2, 0.4, 0.4], [0.7, 0.2, 0.1]])  # Observation matrix
    obs = np.array([0, 1, 0, 1, 0, 1, 2, 0, 1, 0, 1, 0, 1, 2, 0, 1, 0])
    hdmm = hmm.HMM(a, b, pi)
    hdmm.train(obs, 0.1)
Exemplo n.º 8
0
def main():
    args = init_argparse().parse_args()
    dictionary = read_dictionary(args.dict)
    phonemes = read_phonemes(args.phonemes)
    model = hmm.HMM(phonemes, dictionary)
    model.build_network()

    m = htk.readhtk(args.input)
    for d in m:
        model.step(d)
    model.print_result(args.frames)
Exemplo n.º 9
0
 def __init__(self):
     self.count = 0
     self.buffer = np.zeros(50 * 36)  # MFCC(12) + Delta1(12) + Delta2(12)
     self.c_buffer = np.zeros((5, 12))  # MFCC
     self.d_buffer = np.zeros((5, 12))  # Delta1
     self.melbuffer = np.zeros((3, 160))
     self.HMM = hmm.HMM()
     self.filterbank = self.mel()
     self.prediction_buffer = []
     self.space_buffer = np.zeros(20) - 1
     self.command = 5  # Stop
Exemplo n.º 10
0
    def fit(self, text):
        tagset, tag_index = text.get_tagset()
        self._model = hmm.HMM(tagset, tag_index)

        transition_probs = text.calculate_transition_matrix()
        self._model.set_all_transitions(transition_probs)
        for tag in tagset:
            emission = text.count_emission(tag)
            self._model.set_emission(tag, emission)

        initials = text.calculate_initial_probability()
        self._model.set_initial(initials)
Exemplo n.º 11
0
    def test_create_model(self):
        '''Based on Mike's DC example.'''
        # Transition probabilities 
        A = np.array([ [.5, .5], [.5, .5]])
        # Emission probabilities
        B = np.array([ [ 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, ], \
                         [ 1.0 , 1.0 , 1.0 , 1.0 , 1.0 , 1.0 / 2 ] ])
        # Symbols
        V = [1, 2, 3, 4, 5, 6]

        # Model
        m = hmm.HMM(2, A=A, B=B, V=V)
        TestHmm.assert_model_matrices_almost_equal(m, (A, B, [0.5, 0.5])) 
Exemplo n.º 12
0
def kfold_cross_validate(directory, k):

	print 'Beginning k-fold cross validation...'

	subset_list = breakup_training(directory, k)
	results = [[] for i in xrange(10)] # outer array = each model, inner array = results per iteration

	# loop through each subset list, run training + validation
	for i in xrange( len(subset_list) ):

		# split the training docs into training + validation
		validation_set = set( subset_list[i] )
		remaining = subset_list[:i] + subset_list[i + 1:]
		train_set = set( [index for subset in remaining for index in subset] )

		# no resampling 
		hmm_model_0 = hmm.HMM(directory, train_set, smooth_trans=True, smooth_emiss=True, resample=False) # smooth both
		hmm_model_1 = hmm.HMM(directory, train_set, smooth_trans=False, smooth_emiss=True, resample=False) # smooth emission only
		hmm_model_2 = hmm.HMM(directory, train_set, smooth_trans=True, smooth_emiss=False, resample=False) # smooth transition only
		hmm_model_3 = hmm.HMM(directory, train_set, smooth_trans=False, smooth_emiss=False, resample=False) # no smoothing

		results[0].append( cross_validate_hmm(directory, hmm_model_0, validation_set) )
		results[1].append( cross_validate_hmm(directory, hmm_model_1, validation_set) )
		results[2].append( cross_validate_hmm(directory, hmm_model_2, validation_set) )
		results[3].append( cross_validate_hmm(directory, hmm_model_3, validation_set) )

		# with resampling
		hmm_model_4 = hmm.HMM(directory, train_set, smooth_trans=True, smooth_emiss=True, resample=True) # smooth both
		hmm_model_5 = hmm.HMM(directory, train_set, smooth_trans=False, smooth_emiss=True, resample=True) # smooth emission only
		hmm_model_6 = hmm.HMM(directory, train_set, smooth_trans=True, smooth_emiss=False, resample=True) # smooth transition only
		hmm_model_7 = hmm.HMM(directory, train_set, smooth_trans=False, smooth_emiss=False, resample=True) # no smoothing

		results[4].append( cross_validate_hmm(directory, hmm_model_4, validation_set) )
		results[5].append( cross_validate_hmm(directory, hmm_model_5, validation_set) )
		results[6].append( cross_validate_hmm(directory, hmm_model_6, validation_set) )
		results[7].append( cross_validate_hmm(directory, hmm_model_7, validation_set) )

		# baseline with and without resampling
		baseline_1 = baseline.Baseline(directory, train_set, resample=False)
		baseline_2 = baseline.Baseline(directory, train_set, resample=True)

		results[8].append( cross_validate_baseline(directory, baseline_1, validation_set) )
		results[9].append( cross_validate_baseline(directory, baseline_2, validation_set) )

		# status update
		print str((float(i + 1) / k) * 100) + '% complete'

	# return the avg results tuple for each model that we train/test across all k-fold cross-validation rounds
	return [get_avg_results(model_results, k) for model_results in results]
Exemplo n.º 13
0
def loadmodel(K, modelversion=2):
    with open('experiments/data/hmm_k_{}.pkl'.format(K), 'rb') as f:
        d = pickle.load(f)

    if modelversion == 1:
        # If the model requires logprobs
        d['transition_matrix'] = np.log(d['transition_matrix'])
        d['start_prob'] = np.log(d['start_prob'])
        return hmm1.HMM(d['num_states'], d['transition_matrix'],
                        d['start_prob'], d['means'], d['stds'])
    elif modelversion == 2:
        return hmm2.HMM(d['num_states'], d['transition_matrix'],
                        d['start_prob'], d['means'], d['stds'])
Exemplo n.º 14
0
    def build_hmm(self, model, init, srange, Nrange, times, nop = 129):
        """Building the object hmm given parameters"""

        self.method_name = model + '-' + init

        self.hmm = hmm.HMM(times = times,
                           model = model,
                           init = init,
                           h = np.array([0.5]),
                           s = srange,
                           N = Nrange,
                           u = np.array([0]),
                           v = np.array([0]),
                           nop = nop)
Exemplo n.º 15
0
    def testBaumWelchTrain(self):
        # run a baum_welch_train
        observations_data, states_data = self.hmm.simulate(100)
        print('observations_data', observations_data)
        print('states_data', states_data)
        guess = hmm.HMM(np.array([[0.5, 0.5], [0.5, 0.5]]),
                        np.array([[0.3, 0.3, 0.3], [0.3, 0.3, 0.3]]),
                        np.array([0.5, 0.5]))
        guess.baum_welch_train(observations_data)
        states_out = guess.state_path(observations_data)[1]
        p = 0.0
        for s in states_data:
            if next(states_out) == s:
                p += 1

        print(p / len(states_data))
class PredictHMM:
    
	def __init__(self):

	def predict(self, seq):
		
		N = 25
	    M = 19
	    T = len(seq)
		temp = [i for i in seq]
		
		trms = np.load('resources/models/hmm_model/'+str(T)+'_a.npy')
	    emis = np.load('resources/models/hmm_model/'+str(T)+'_b.npy')
	    pri = np.load('resources/models/hmm_model/'+str(T)+'_pi.npy')
	    model = hmm.HMM(N, M, T, transmission=trms, emission=emis, prior=pri)

		res = dpf.predict_next_state(model, temp, T)         

		return  res
Exemplo n.º 17
0
def ch3Ensemble(V0=-65,
                V1=20,
                tau01=2.,
                tau12=4.,
                Vchar01=1.,
                Vchar12=1.,
                Vhalf01=-20.,
                Vhalf12=-25,
                nchannels=5):
    H = hmm.ch3hmm(V0=V0,
                   V1=V1,
                   tau01=tau01,
                   tau12=tau12,
                   Vhalf01=Vhalf01,
                   Vhalf12=Vhalf12,
                   Vchar01=Vchar01,
                   Vchar12=Vchar12)
    E = Ensemble(H, nchannels)
    M = hmm.HMM(E.pstates, E.output, E.Q)
    return M
Exemplo n.º 18
0
 def test_train_model(self):
     '''Dishonest Casino Example - EM algorithm.'''
     # Create transition probability matrix
     A = np.array([[0.99, 0.01],
                   [0.01, 0.99]])
     # Create observable probability distribution matrix. Casino biased toward "6" in state "1".        
     B = statutil.scale_row_sums(np.array([[ 1.0, 1.0, 1.0, 1.0, 1.0, 1.0 ],
                                           [ 1.0, 1.0, 1.0, 1.0, 1.0, 5.0 ]]))
     # Create set of all observable symbols
     V = [1, 2, 3, 4, 5, 6]
 
     # Instantiate an HMM, note Pi is uniform probability distribution by default
     m = hmm.HMM(2, A=A, B=B, V=V)
     
     Obs = [ 1, 2, 3, 4, 5, 2, 1, 6, 6, 6, 5, 6 ]
     c = [Obs]
     hmm.baum_welch(m, c, epochs=15, graph=False)
     TestHmm.assert_model_matrices_almost_equal(m, 
                                                ([[0.856658708052639, 0.14334129194736125], [2.454940916925095e-16, 1.0]],
                                                 [[0.28329354031233306, 0.2866825838637413, 0.14334129194736112, 0.14334129194736112, 0.14334129192821368, 9.896623857864685e-13], [0.004706380704415612, 4.3023359620169447e-11, 3.2510873580469717e-111, 1.2201233032249015e-54, 0.19905872387205914, 0.7962348953805019]],
                                                 [1.0, 4.364785210913299e-122]))
Exemplo n.º 19
0
def simulate(membership, TM, TM0, TI0, Z, T1, s0, rho, _actions=None):
    # natural (true) transition
    TMn = membership[0, 0] * TM[0] + membership[0, 1] * TM[1] + membership[
        0, 2] * TM[2]
    TIn = util.interaction_effect(TMn, rho)

    # t=1...T1
    actions = []
    observations = []
    s = s0
    # warm up loop
    if T1 > 0:
        for t in range(T1):
            if _actions is not None:
                a = _actions[t]
            else:
                a = np.random.binomial(1, 0.3, 1)[0]
            actions.append(a)
            # print s0, TMn[s0]
            s = np.random.choice(3, 1, p=TMn[s])[0]  # assumes 3 states
            # print expandZ(Z[s],s)
            o = np.random.choice(3, 1, p=Z[s])[0]
            observations.append(o)
            # print t,a,s,o
        hmm = hmm.HMM()
        hmm.pi = np.array([0.5, 0.3, 0.2])  # ASSUMPTION
        hmm.A = np.array([TM0, TI0])
        hmm.B = np.copy(Z)
        hmm.train(observations, actions, 0.01)
        T_hat = hmm.A
        Z_hat = hmm.B
    else:
        o = np.random.choice(3, 1, p=Z[s])[0]
        T_hat = np.array([TMn, TIn])
        Z_hat = Z
    b = Z[:, o]
    b = b / b.sum()  # initialize belief
    # personalize T, Z
    return TMn, TIn, actions, observations, s, b, T_hat, Z_hat
Exemplo n.º 20
0
	def setUp(self):
		# From https://github.com/phvu/misc/blob/master/viterbi/test1.py
		# this test is partly taken from cuHMM (https://code.google.com/p/chmm/)
		pi = np.array([[0.04, 0.02, 0.06, 0.04, 0.11, 0.11, 0.01, 0.09, 0.03, 0.05, 0.06, 0.11, 0.05, 0.11, 0.03, 0.08]]).T
		trans = np.array([ \
		    [0.08, 0.02, 0.10, 0.05, 0.07, 0.08, 0.07, 0.04, 0.08, 0.10, 0.07, 0.02, 0.01, 0.10, 0.09, 0.01], \
		    [0.06, 0.10, 0.11, 0.01, 0.04, 0.11, 0.04, 0.07, 0.08, 0.10, 0.08, 0.02, 0.09, 0.05, 0.02, 0.02], \
		    [0.08, 0.07, 0.08, 0.07, 0.01, 0.03, 0.10, 0.02, 0.07, 0.03, 0.06, 0.08, 0.03, 0.10, 0.10, 0.08], \
		    [0.08, 0.04, 0.04, 0.05, 0.07, 0.08, 0.01, 0.08, 0.10, 0.07, 0.11, 0.01, 0.05, 0.04, 0.11, 0.06], \
		    [0.03, 0.03, 0.08, 0.10, 0.11, 0.04, 0.06, 0.03, 0.03, 0.08, 0.03, 0.07, 0.10, 0.11, 0.07, 0.03], \
		    [0.02, 0.05, 0.01, 0.09, 0.05, 0.09, 0.05, 0.12, 0.09, 0.07, 0.01, 0.07, 0.05, 0.05, 0.11, 0.06], \
		    [0.11, 0.05, 0.10, 0.07, 0.01, 0.08, 0.05, 0.03, 0.03, 0.10, 0.01, 0.10, 0.08, 0.09, 0.07, 0.02], \
		    [0.03, 0.02, 0.16, 0.01, 0.05, 0.01, 0.14, 0.14, 0.02, 0.05, 0.01, 0.09, 0.07, 0.14, 0.03, 0.01], \
		    [0.01, 0.09, 0.13, 0.01, 0.02, 0.04, 0.05, 0.03, 0.10, 0.05, 0.06, 0.06, 0.11, 0.06, 0.03, 0.14], \
		    [0.09, 0.03, 0.04, 0.05, 0.04, 0.03, 0.12, 0.04, 0.07, 0.02, 0.07, 0.10, 0.11, 0.03, 0.06, 0.09], \
		    [0.09, 0.04, 0.06, 0.06, 0.05, 0.07, 0.05, 0.01, 0.05, 0.10, 0.04, 0.08, 0.05, 0.08, 0.08, 0.10], \
		    [0.07, 0.06, 0.01, 0.07, 0.06, 0.09, 0.01, 0.06, 0.07, 0.07, 0.08, 0.06, 0.01, 0.11, 0.09, 0.05], \
		    [0.03, 0.04, 0.06, 0.06, 0.06, 0.05, 0.02, 0.10, 0.11, 0.07, 0.09, 0.05, 0.05, 0.05, 0.11, 0.08], \
		    [0.04, 0.03, 0.04, 0.09, 0.10, 0.09, 0.08, 0.06, 0.04, 0.07, 0.09, 0.02, 0.05, 0.08, 0.04, 0.09], \
		    [0.05, 0.07, 0.02, 0.08, 0.06, 0.08, 0.05, 0.05, 0.07, 0.06, 0.10, 0.07, 0.03, 0.05, 0.06, 0.10], \
		    [0.11, 0.03, 0.02, 0.11, 0.11, 0.01, 0.02, 0.08, 0.05, 0.08, 0.11, 0.03, 0.02, 0.10, 0.01, 0.11]])
		obs = np.array([[0.01,0.99], \
		                [0.58,0.42], \
		                [0.48,0.52], \
		                [0.58,0.42], \
		                [0.37,0.63], \
		                [0.33,0.67], \
		                [0.51,0.49], \
		                [0.28,0.72], \
		                [0.35,0.65], \
		                [0.61,0.39], \
		                [0.97,0.03], \
		                [0.87,0.13], \
		                [0.46,0.54], \
		                [0.55,0.45], \
		                [0.23,0.77], \
		                [0.76,0.24]])
		self.d = hmm.HMM(pi, trans, obs)
Exemplo n.º 21
0
    def build_hmm_from_feature_matrices(self,
                                        feature_matrices,
                                        nstates,
                                        max_iterations=200,
                                        convergence_threshold=0.001,
                                        show_plots=False):
        self.__a = np.full((nstates, feature_matrices[0].shape[1]),
                           self.__log_zero)
        self.__b = np.full(self.__a.shape, self.__log_zero)
        self.__g = np.full(self.__a.shape, self.__log_zero)
        self.__iteration = 0

        if show_plots:
            self.__create_plots()
            self.__animation = animation.FuncAnimation(self.__fig,
                                                       self.__update_plots,
                                                       interval=1000,
                                                       blit=False,
                                                       repeat=False)

        result = Queue.Queue()
        training_thread = Thread(target=self.__train_hmm,
                                 args=[
                                     feature_matrices, nstates, result,
                                     max_iterations, convergence_threshold
                                 ])
        training_thread.start()

        if show_plots:
            plt.show()

        training_thread.join()

        new_hmm = hmm.HMM()
        new_hmm.initialize_from_hmm_parameters(result.get())

        return new_hmm
def main():
    logging.basicConfig(stream=sys.stdout, level=logging.DEBUG if DEBUG else logging.INFO)

    #hmm.test_hmm()

    transition_probs = [ [0.7, 0.3], [0.4, 0.6] ]
    emission_probs = [[0.4, 0.2, 0.3, 0.1], [0.2, 0.4, 0.1, 0.3]]
    initial_probs = [0.6, 0.4]
    state_labels = ['S1', 'S2']
    emission_labels = ['a', 'c', 'g', 't']
    model = hmm.HMM(initial_probs, transition_probs, emission_probs, state_labels, emission_labels)
    emission_seq_labels = [c for c in 'accgta']
    emission_idx_list = model._get_emission_idx_seq_from_label_seq(emission_seq_labels)
    print("O/p prob", model.calc_prob_output_sequence(emission_seq_labels))
    print(model.get_likelihood(5, 'S1', emission_seq_labels))
    assert hmm.isclose(model.get_likelihood(5, 'S1', emission_seq_labels), model.alpha_t_helper(5, 0, emission_idx_list)/model.calc_prob_output_sequence(emission_seq_labels))
    print(model.get_likelihood(5, 'S2', emission_seq_labels))
    print(model.get_likelihood(3, 'S1', emission_seq_labels))
    print(model.get_likelihood(3, 'S2', emission_seq_labels))
    #print(model.alpha_t_helper(5, 1, emission_idx_list))

    pretty_print_header("Viterbi algorith on ACCGTA to get most likely sequence of states:")
    print(model.get_most_likely_state_seq_from_labels(emission_seq_labels))
    """
Exemplo n.º 23
0
    observations_label_index, observations_index_label = generate_index_map(
        observations)
    # {'cold': 1, 'dizzy': 2, 'normal': 0}

    A = convert_map_to_matrix(transition_probability, states_label_index,
                              states_label_index)
    print A
    B = convert_map_to_matrix(emission_probability, states_label_index,
                              observations_label_index)
    print B
    observations_index = convert_observations_to_index(
        observations, observations_label_index)
    Pi = convert_map_to_vector(start_probability, states_label_index)
    print Pi

    h = hmm.HMM(A, B, Pi)
    V, p = h.viterbi(observations_index)
    print " " * 7, " ".join(
        ("%10s" % observations_index_label[i]) for i in observations_index)
    for s in range(0, 2):
        print "%7s: " % states_index_label[s] + " ".join("%10s" % ("%f" % v)
                                                         for v in V[s])
    print '\nThe most possible states and probability are:'
    p, ss = h.state_path(observations_index)
    for s in ss:
        print states_index_label[s],
    print p

    # run a baum_welch_train
    observations_data, states_data = h.simulate(100)
    # print observations_data
Exemplo n.º 24
0
import hmm as HiddenMarkov
import gc
import utils as utls
import sys

datasetFile = "dataset.txt"
outFile = "out.txt"
testDataSize = 200

datasetFile = sys.argv[1]
outFile = sys.argv[2]

print("initializing hmm...")
hiddenMarkovModel = HiddenMarkov.HMM(datasetFile)

print("Correcting the sentences...")
results = list()
data = hiddenMarkovModel.errorFullDataSet[:testDataSize]
dataLength = len(data)
for i in range(dataLength):
    temp = hiddenMarkovModel.viterbi(data[i])
    results.append(temp)
    if not (i % 100):
        gc.collect()

#evaluation
correctEstimatedWordCount = 0
wrongTypedWordCount = 0
for i in range(dataLength):
    counts = utls.evaluateSentence(data[i], results[i])
    correctEstimatedWordCount = correctEstimatedWordCount + counts[1]
Exemplo n.º 25
0
def main():

    model = hmm.HMM()

    print "-------------Preliminary setup----------------"
    if True:
        existingFile = 'models/two_english'
        newFile = "two_english_test"
        model.load(existingFile)
        model.dump(newFile)
        eq = compareFiles(newFile + ".emit", existingFile + ".emit", True)
        if eq:
            eq2 = compareFiles(newFile + ".trans", existingFile + ".trans",
                               True)
        if eq and eq2:
            print "HMM read/write works correctly"
        else:
            print "HMM read/write failed!"
            sys.exit(-1)

    print "-------------Forward Algorithm----------------"
    if True:
        model.load("models/partofspeech.browntags.trained")
        obsfilebase = "data/ambiguous_sents"
        corpus = observations.load_observations(obsfilebase + ".obs")
        outputfile = obsfilebase + '.forwardprob'
        o2 = []
        with open(outputfile, 'w') as o:
            for observation in corpus:
                res = model.forward(observation)
                if res is not None:
                    o2.append(res[2]['VERB'])
                o.write(str(model.forward_probability(observation)) + '\n')

        refo2 = [
            0.0, 0.0, 0.0, 3.653679756807993e-11, 0.0, 0.0,
            4.312565970191802e-12, 3.654779278846958e-11,
            1.6086166116798018e-07, 0.0, 0.0
        ]
        for i in range(len(refo2)):
            if len(o2) <= i:
                print "Error: Nothing returned from Forward Algorithm!"
            elif abs(o2[i] - refo2[i]) > 1e-14:
                print "Error in Forward Algorithm: Probability of Verb at t=2 should be " + str(
                    refo2[i]) + " not " + str(o2[i])

        eq = compareFiles(outputfile, "gold/ambiguous_sents.prob")
        if eq:
            print "Forward Algorithm passed basic sanity check"
        else:
            print "Error in Overall Forward Probability"

    print "-------------Viterbi Algorithm----------------"
    if True:
        model.load("models/partofspeech.browntags.trained")
        obsfilebase = "data/ambiguous_sents"
        corpus = observations.load_observations(obsfilebase + ".obs")
        outputfile = obsfilebase + '.tagged.obs'

        with codecs.open(outputfile, 'w', 'utf8') as o:
            for observation in corpus:
                stateseq = model.viterbi(observation)
                if stateseq is None:
                    continue
                observation.stateseq = stateseq  # adds most likely states as
                # 'tags' on observation
                o.write(str(observation))

        eq = compareFiles(outputfile, "gold/ambiguous_sents.tagged.obs")
        if eq:
            print "Viterbi Completed Successfully"
        else:
            print "Error in Viterbi"

    print "-------------Backwards Algorithm----------------"
    if True:
        model.load("models/partofspeech.browntags.trained")
        obsfilebase = "data/ambiguous_sents"
        corpus = observations.load_observations(obsfilebase + ".obs")
        outputfile = obsfilebase + '.backwardprob'
        o2 = []
        with open(outputfile, 'w') as o:
            for observation in corpus:
                res = model.backward(observation)
                if res is not None:
                    o2.append(res[2]['VERB'])
                o.write(str(model.backward_probability(observation)) + '\n')

        refo2 = [
            2.3589871535491068e-07, 1.8514313765823803e-13,
            2.140512612882977e-06, 2.0333825508441356e-06,
            4.339252852607301e-10, 1.4033802247403003e-09,
            1.4162117145319527e-08, 5.011761202650785e-06,
            2.0776974177243364e-09, 5.391970636677047e-07,
            2.147210857790581e-07
        ]
        for i in range(len(refo2)):
            if len(o2) <= i:
                print "Error: Nothing returned from Backward Algorithm!"
            elif abs(o2[i] - refo2[i]) > 1e-14:
                print "Error in Backward Algorithm: Probability of Verb at t=2 should be " + str(
                    refo2[i]) + " not " + str(o2[i])

        eq = compareFiles(outputfile, "gold/ambiguous_sents.prob")
        if eq:
            print "Backward Algorithm passed basic sanity check"
        else:
            print "Error in Overall Backward Probability"

    print "------------------EM--------------------"
    if True:
        modelbase = "models/two_english"
        model.load(modelbase)
        obsfilename = "english_words"
        obsfilebase = "data/" + obsfilename
        corpus = observations.load_observations(obsfilebase + ".obs")
        log_likelihood = model.learn_unsupervised(corpus)
        #write the trained model
        ref_likelihood = -105954.94191  # -152860.669251 in base 2
        if log_likelihood is None or abs(log_likelihood -
                                         ref_likelihood) > 0.05:
            print "Error: likelihood should be " + str(ref_likelihood) + \
                  " but is " + str(log_likelihood)
        finalprefix = modelbase + '.' + obsfilename + '.trained'
        model.dump(finalprefix)
        goldprefix = "gold/two_english.english_words.trained"

        learnedModel = hmm.HMM()
        learnedModel.load(finalprefix)

        refModel = hmm.HMM()
        refModel.load(goldprefix)

        eq = learnedModel.isEqual(refModel, 1e-13)
        if eq:
            print "EM implemented correctly!"
        else:
            print "Error in EM"
Exemplo n.º 26
0
 def test_create(self):
     pid = self.acmod.mdef.phone_id('OW_four', 'F_four', 'R_four')
     h1 = hmm.HMM(self.acmod.mdef.pid2sseq(pid),
                  self.acmod.tmat[self.acmod.mdef.pid2tmat(pid)])
     h2 = self.factory.create('OW_four', 'F_four', 'R_four')
     self.assertEquals(h1[0], h2[0])
Exemplo n.º 27
0
# -*- coding:utf-8 -*-
# Filename: test_weather.py
# Author:hankcs
# Date: 2016-08-06 PM6:04
import numpy as np

import hmm
import random

A = np.array([[0.5, 0.5], [0.5, 0.5]])
B = np.array([[0.16, 0.16, 0.16, 0.16, 0.16, 0.16],
              [0.16, 0.16, 0.16, 0.16, 0.16, 0.16]])
pi = np.array([0.5, 0.5])
h = hmm.HMM(A, B, pi)

# print observations_data
# print states_data
for i in range(100):
    size = 100
    observations_data = np.empty([size], dtype=int)
    for j in range(size):
        rand = random.randint(1, 100)
        if rand <= 10:
            observations_data[j] = 0
        elif rand <= 20:
            observations_data[j] = 1
        elif rand <= 30:
            observations_data[j] = 2
        elif rand <= 40:
            observations_data[j] = 3
        elif rand <= 50:
Exemplo n.º 28
0
 def test_create(self):
     h1 = hmm.HMM(self.acmod.mdef.pid2sseq(352),
                  self.acmod.tmat[self.acmod.mdef.pid2tmat(352)])
Exemplo n.º 29
0
    return m


A = convert_map_to_matrix(transition_probability, states_label_index,
                          states_label_index)
print(A)
B = convert_map_to_matrix(emission_probability, states_label_index,
                          observations_label_index)
print(B)
observations_index = convert_observations_to_index(observations,
                                                   observations_label_index)
print(observations_index)
pi = convert_map_to_vector(start_probability, states_label_index)
print(pi)

h = hmm.HMM(A, B, pi)
V, p = h.viterbi(observations_index)
print(
    " " * 7, " ".join(
        ("%10s" % observations_index_label[i]) for i in observations_index))
for s in range(0, 2):
    print("%7s: " % states_index_label[s] + " ".join("%10s" % ("%f" % v)
                                                     for v in V[s]))
print('\nThe most possible states and probability are:')
p, ss = h.state_path(observations_index)
for s in ss:
    print(states_index_label[s], )
print(p)

# run a baum_welch_train
observations_data, states_data = h.simulate(10)
Exemplo n.º 30
0
def main():

    model = hmm.HMM()

    print("-------------Preliminary setup----------------")
    if True:
        existingFile = 'models/two_english'
        newFile = "two_english_test"
        model.load(existingFile)
        model.dump(newFile)
        eq = compareFiles(newFile + ".emit", existingFile + ".emit", True)
        if eq:
            eq2 = compareFiles(newFile + ".trans", existingFile + ".trans",
                               True)
        if eq and eq2:
            print("HMM read/write works correctly")
        else:
            print("HMM read/write failed!")
            sys.exit(-1)

    print("-------------Forward Algorithm----------------")
    if True:
        model.load("models/encoding.message.trained")
        obsfilebase = "data/message_short"
        corpus = observations.load_observations(obsfilebase + ".obs")
        outputfile = obsfilebase + '.forwardprob'
        o2 = []
        with open(outputfile, 'w') as o:
            for observation in corpus:
                res = model.forward(observation)
                if res is not None:
                    o2.append(res[1]['e'])
                o.write(str(model.forward_probability(observation)) + '\n')
        #print(o2)
        refo2 = [
            2.281674056874541e-61, 8.258943021002516e-289,
            4.463852239881595e-71, 7.902572774713472e-90,
            2.8349675683293275e-292, 0.00021461328799181662,
            2.8704406377717645e-145, 5.979495606734988e-294,
            1.9000669411442752e-05, 2.02982042347432e-87,
            2.116898876762055e-70, 0.0, 0.002086363130808398,
            3.156654312658676e-293
        ]
        for i in range(len(refo2)):
            if len(o2) <= i:
                print("Error: Nothing returned from Forward Algorithm!")
            elif abs(o2[i] - refo2[i]) > 1e-12:
                print(
                    "Error in Forward Algorithm: Probability of e at t=1 should be "
                    + str(refo2[i]) + " not " + str(o2[i]))

        eq = compareFiles(outputfile, "gold/message_short.forwardprob")
        if eq:
            print("Forward Algorithm passed basic sanity check")
        else:
            print("Error in Overall Forward Probability")

    print("-------------Supervised Learning----------------")
    if True:
        modelbase = "models/partofspeech"
        model.load(modelbase)
        obsfilebase = "data/browntags"
        corpus = observations.load_observations(obsfilebase + ".obs")
        model.learn_supervised(corpus)
        finalprefix = modelbase + '.student.trained'
        model.dump(finalprefix)
        goldprefix = "gold/partofspeech.browntags.trained"

        learnedModel = hmm.HMM()
        learnedModel.load(finalprefix)

        refModel = hmm.HMM()
        refModel.load(goldprefix)

        eq = learnedModel.isEqual(refModel, 1e-8)
        if eq:
            print("Supervised learning implemented correctly!")
        else:
            print("Error in supervisedlearning")

    print("-------------Viterbi Algorithm----------------")
    if True:
        model.load("models/encoding.message.trained")
        obsfilebase = "data/message"
        corpus = observations.load_observations(obsfilebase + ".obs")
        outputfile = obsfilebase + '.tagged.obs'

        with codecs.open(outputfile, 'w', 'utf8') as o:
            for observation in corpus:
                stateseq = model.viterbi(observation)
                if stateseq is None:
                    continue
                observation.stateseq = stateseq  # adds most likely states as
                # 'tags' on observation
                o.write(str(observation))

        eq = compareFiles(outputfile, "gold/message.tagged.obs")
        if eq:
            print("Viterbi Completed Successfully")
        else:
            print("Error in Viterbi")

    print("-------------Backwards Algorithm----------------")
    if True:
        model.load("models/encoding.message.trained")
        obsfilebase = "data/message_short"
        corpus = observations.load_observations(obsfilebase + ".obs")
        outputfile = obsfilebase + '.backwardprob'
        o2 = []
        with open(outputfile, 'w') as o:
            for observation in corpus:
                res = model.backward(observation)
                if res is not None:
                    o2.append(res[1]['e'])
                o.write(str(model.backward_probability(observation)) + '\n')
        refo2 = [
            1.316154528121009e-06, 1.0, 1.0, 1.2842129295716715e-05, 1.0, 1.0,
            0.0001629601877945561, 0.03761379913095424, 2.464196513136796e-06,
            4.079841777243271e-12, 1.0, 4.2966181326644535e-08, 1.0, 1.0
        ]
        for i in range(len(refo2)):
            if len(o2) <= i:
                print("Error: Nothing returned from Backward Algorithm!")
            elif abs(o2[i] - refo2[i]) > 1e-10:
                print(
                    "Error in Backward Algorithm: Probability of e at t=1 should be "
                    + str(refo2[i]) + " not " + str(o2[i]))

        eq = compareFiles(outputfile, "gold/message_short.backwardprob")
        if eq:
            print("Backward Algorithm passed basic sanity check")
        else:
            print("Error in Overall Backward Probability")

    print("------------------EM--------------------")
    if True:
        modelbase = "models/two_english"
        model.load(modelbase)
        obsfilename = "english_words"
        obsfilebase = "data/" + obsfilename
        corpus = observations.load_observations(obsfilebase + ".obs")
        log_likelihood = model.learn_unsupervised(corpus)
        #write the trained model
        ref_likelihood = -105954.94191  # -152860.669251 in base 2
        if log_likelihood is None or abs(log_likelihood -
                                         ref_likelihood) > 0.05:
            print("Error: likelihood should be " + str(ref_likelihood) + \
                  " but is " + str(log_likelihood))
        finalprefix = modelbase + '.' + obsfilename + '.trained'
        model.dump(finalprefix)
        goldprefix = "gold/two_english.english_words.trained"

        learnedModel = hmm.HMM()
        learnedModel.load(finalprefix)

        refModel = hmm.HMM()
        refModel.load(goldprefix)

        eq = learnedModel.isEqual(refModel, 1e-10)
        if eq:
            print("EM implemented correctly!")
        else:
            print("Error in EM")