Пример #1
0
    def predict(self, word):
        X = self.vec.transform(word)
        scores = X.dot(self.coef_.T).toarray()

        y = viterbi.decode(scores, self.intercept_trans_, self.intercept_init_, self.intercept_final_)
        y =  [self.classes_[pred] for pred in y]

        return re.sub('_','',''.join(y))
Пример #2
0
    def predict(self, word):
        X = self.vec.transform(word)
        scores = X.dot(self.coef_.T).toarray()

        y = viterbi.decode(scores, self.intercept_trans_, self.intercept_init_,
                           self.intercept_final_)
        y = [self.classes_[pred] for pred in y]

        return re.sub('_', '', ''.join(y))
Пример #3
0
def decode_sentence(s, model):
    return decode(s, model)
Пример #4
0
from corpus import get_data  #,get_lexicon
from hmm import get_HMM
from viterbi import decode
import random

# test
#train_set = [ random.choice(sents) for i in range(0,200) ]
train_set = get_data("corpus.xml")[:200]
model = get_HMM(train_set)

s = random.choice(train_set)
#s = sents[4023]
print("phrase: ", s["num"])
s["decoded"] = decode(s, model)
for i in range(0, s["len"]):
    if s["tokens"][i] in forget_words:
        print("*UNK*\t", s["tokens"][i], "\t", s["tags"][i], "\t",
              s["decoded"][i])
    else:
        print("     \t", s["tokens"][i], "\t", s["tags"][i], "\t",
              s["decoded"][i])
Пример #5
0
	for i in xrange(num_states):
		trans_probs.append([0]*num_states)
	for i in xrange(num_states):
		trans_probs[i] = normalise(dict_trans[i],num_states)
	return trans_probs

def get_obs_probs(dict_obsInState, num_states, num_obs):
	obs_probs = []
	for i in xrange(num_states):
		obs_probs.append([0]*num_obs)
	for i in xrange(num_states):
		obs_probs[i] = normalise(dict_obsInState[i],num_obs)
	return obs_probs


l = get_data("train.txt")
list_seq = get_seq(l)
a = process_seq(list_seq)
first_seq = list_seq[0]

init_probs = get_initial_probs(a[-1],len(a[1]))
trans_probs = get_trans_probs(a[2],len(a[1]))
obs_probs = get_obs_probs(a[-2],len(a[1]),len(a[0]))
num_states = len(a[1])
num_observs = len(a[0]) 
obs_seq = [a[0][i[0]] for i in first_seq]
time_steps = len(obs_seq)
l = decode(num_observs,num_states,time_steps,init_probs,trans_probs,obs_probs,obs_seq)