示例#1
0
 def evaluate_EM(self, dataset):
     # Evaluate accuracy at initial iteration
     pred = self.viterbi_decode_corpus(dataset)
     confusion_matrix = cm.build_confusion_matrix(dataset.seq_list, pred,
                                                  self.get_num_states(), self.get_num_states())
     best = cm.get_best_assignment(confusion_matrix)
     new_pred = []
     for i, sequence in enumerate(dataset.seq_list):
         pred_seq = pred[i]
         new_seq = pred_seq.copy_sequence()
         for j, y_hat in enumerate(new_seq.y):
             new_seq.y[j] = best[y_hat]
         new_pred.append(new_seq)
     acc = self.evaluate_corpus(dataset, new_pred)
     return acc
示例#2
0
 def evaluate_EM(self, dataset):
     # Evaluate accuracy at initial iteration
     pred = self.viterbi_decode_corpus(dataset)
     confusion_matrix = cm.build_confusion_matrix(dataset.seq_list, pred,
                                                  self.get_num_states(), self.get_num_states())
     best = cm.get_best_assignment(confusion_matrix)
     new_pred = []
     for i, sequence in enumerate(dataset.seq_list):
         pred_seq = pred[i]
         new_seq = pred_seq.copy_sequence()
         for j, y_hat in enumerate(new_seq.y):
             new_seq.y[j] = best[y_hat]
         new_pred.append(new_seq)
     acc = self.evaluate_corpus(dataset, new_pred)
     return acc
示例#3
0
feature_mapper.build_features()

crf_online = crfo.CRFOnline(corpus.word_dict, corpus.tag_dict, feature_mapper)
crf_online.num_epochs = 20
crf_online.train_supervised(train_seq)

'''
You will receive feedback when each epoch is finished, note that running the 20 epochs might take a while.
After training is done, evaluate the learned model on the training, development and test sets.
'''

pred_train = crf_online.viterbi_decode_corpus(train_seq)
pred_dev = crf_online.viterbi_decode_corpus(dev_seq)
pred_test = crf_online.viterbi_decode_corpus(test_seq)
eval_train = crf_online.evaluate_corpus(train_seq, pred_train)
eval_dev = crf_online.evaluate_corpus(dev_seq, pred_dev)
eval_test = crf_online.evaluate_corpus(test_seq, pred_test)

print "CRF - ID Features Accuracy Train: %.3f Dev: %.3f Test: %.3f"%(eval_train, eval_dev, eval_test)

# CRF -
# ID Features Accuracy Train: 0.949 Dev: 0.846 Test: 0.858

# Confusion_matrix calculation
import lxmls.sequences.confusion_matrix as cm
import matplotlib.pyplot as plt
confusion_matrix = cm.build_confusion_matrix(test_seq.seq_list, pred_test, len(corpus.tag_dict),
                                             crf_online.get_num_states())

cm.plot_confusion_bar_graph(confusion_matrix, corpus.tag_dict,
                            xrange(crf_online.get_num_states()), 'Confusion matrix')
示例#4
0
best_smothing = hmm.pick_best_smoothing(train_seq, dev_seq, [10, 1, 0.1, 0])

hmm.train_supervised(train_seq, smoothing=best_smothing)
viterbi_pred_test = hmm.viterbi_decode_corpus(test_seq)
posterior_pred_test = hmm.posterior_decode_corpus(test_seq)
eval_viterbi_test = hmm.evaluate_corpus(test_seq, viterbi_pred_test)
eval_posterior_test = hmm.evaluate_corpus(test_seq, posterior_pred_test)
print "Best Smoothing %f --  Test Set Accuracy: Posterior Decode %.3f, Viterbi Decode: %.3f" % (
    best_smothing,
    eval_posterior_test,
    eval_viterbi_test,
)

confusion_matrix = cm.build_confusion_matrix(
    test_seq.seq_list, viterbi_pred_test, len(corpus.tag_dict), hmm.get_num_states()
)

cm.plot_confusion_bar_graph(confusion_matrix, corpus.tag_dict, range(hmm.get_num_states()), "Confusion matrix")

print "------------"
print "Exercise 2.10"
print "------------"

# Train with EM.
hmm.train_EM(train_seq, 0.1, 20, evaluate=True)
viterbi_pred_test = hmm.viterbi_decode_corpus(test_seq)
posterior_pred_test = hmm.posterior_decode_corpus(test_seq)
eval_viterbi_test = hmm.evaluate_corpus(test_seq, viterbi_pred_test)
eval_posterior_test = hmm.evaluate_corpus(test_seq, posterior_pred_test)
示例#5
0
posterior_pred_test = hmm.posterior_decode_corpus(test_seq)
eval_viterbi_test =   hmm.evaluate_corpus(test_seq,viterbi_pred_test)
eval_posterior_test = hmm.evaluate_corpus(test_seq,posterior_pred_test)
print "Test Set Accuracy: Posterior Decode %.3f, Viterbi Decode: %.3f"%(eval_posterior_test,eval_viterbi_test)

best_smothing = hmm.pick_best_smoothing(train_seq, dev_seq, [10,1,0.1,0])


hmm.train_supervised(train_seq, smoothing=best_smothing)
viterbi_pred_test = hmm.viterbi_decode_corpus(test_seq)
posterior_pred_test = hmm.posterior_decode_corpus(test_seq)
eval_viterbi_test =   hmm.evaluate_corpus(test_seq, viterbi_pred_test)
eval_posterior_test = hmm.evaluate_corpus(test_seq, posterior_pred_test)
print "Best Smoothing %f --  Test Set Accuracy: Posterior Decode %.3f, Viterbi Decode: %.3f"%(best_smothing,eval_posterior_test,eval_viterbi_test)

confusion_matrix = cm.build_confusion_matrix(test_seq.seq_list, viterbi_pred_test, 
                                             len(corpus.tag_dict), hmm.get_num_states())

cm.plot_confusion_bar_graph(confusion_matrix, corpus.tag_dict, 
                            range(hmm.get_num_states()), 'Confusion matrix')


print "------------"
print "Exercise 2.10"
print "------------"

# Train with EM.
hmm.train_EM(train_seq, 0.1, 20, evaluate=True)
viterbi_pred_test = hmm.viterbi_decode_corpus(test_seq)
posterior_pred_test = hmm.posterior_decode_corpus(test_seq)
eval_viterbi_test =   hmm.evaluate_corpus(test_seq, viterbi_pred_test)
eval_posterior_test = hmm.evaluate_corpus(test_seq, posterior_pred_test)