def evaluate_EM(self, dataset): # Evaluate accuracy at initial iteration pred = self.viterbi_decode_corpus(dataset) confusion_matrix = cm.build_confusion_matrix(dataset.seq_list, pred, self.get_num_states(), self.get_num_states()) best = cm.get_best_assignment(confusion_matrix) new_pred = [] for i, sequence in enumerate(dataset.seq_list): pred_seq = pred[i] new_seq = pred_seq.copy_sequence() for j, y_hat in enumerate(new_seq.y): new_seq.y[j] = best[y_hat] new_pred.append(new_seq) acc = self.evaluate_corpus(dataset, new_pred) return acc
feature_mapper.build_features() crf_online = crfo.CRFOnline(corpus.word_dict, corpus.tag_dict, feature_mapper) crf_online.num_epochs = 20 crf_online.train_supervised(train_seq) ''' You will receive feedback when each epoch is finished, note that running the 20 epochs might take a while. After training is done, evaluate the learned model on the training, development and test sets. ''' pred_train = crf_online.viterbi_decode_corpus(train_seq) pred_dev = crf_online.viterbi_decode_corpus(dev_seq) pred_test = crf_online.viterbi_decode_corpus(test_seq) eval_train = crf_online.evaluate_corpus(train_seq, pred_train) eval_dev = crf_online.evaluate_corpus(dev_seq, pred_dev) eval_test = crf_online.evaluate_corpus(test_seq, pred_test) print "CRF - ID Features Accuracy Train: %.3f Dev: %.3f Test: %.3f"%(eval_train, eval_dev, eval_test) # CRF - # ID Features Accuracy Train: 0.949 Dev: 0.846 Test: 0.858 # Confusion_matrix calculation import lxmls.sequences.confusion_matrix as cm import matplotlib.pyplot as plt confusion_matrix = cm.build_confusion_matrix(test_seq.seq_list, pred_test, len(corpus.tag_dict), crf_online.get_num_states()) cm.plot_confusion_bar_graph(confusion_matrix, corpus.tag_dict, xrange(crf_online.get_num_states()), 'Confusion matrix')
best_smothing = hmm.pick_best_smoothing(train_seq, dev_seq, [10, 1, 0.1, 0]) hmm.train_supervised(train_seq, smoothing=best_smothing) viterbi_pred_test = hmm.viterbi_decode_corpus(test_seq) posterior_pred_test = hmm.posterior_decode_corpus(test_seq) eval_viterbi_test = hmm.evaluate_corpus(test_seq, viterbi_pred_test) eval_posterior_test = hmm.evaluate_corpus(test_seq, posterior_pred_test) print "Best Smoothing %f -- Test Set Accuracy: Posterior Decode %.3f, Viterbi Decode: %.3f" % ( best_smothing, eval_posterior_test, eval_viterbi_test, ) confusion_matrix = cm.build_confusion_matrix( test_seq.seq_list, viterbi_pred_test, len(corpus.tag_dict), hmm.get_num_states() ) cm.plot_confusion_bar_graph(confusion_matrix, corpus.tag_dict, range(hmm.get_num_states()), "Confusion matrix") print "------------" print "Exercise 2.10" print "------------" # Train with EM. hmm.train_EM(train_seq, 0.1, 20, evaluate=True) viterbi_pred_test = hmm.viterbi_decode_corpus(test_seq) posterior_pred_test = hmm.posterior_decode_corpus(test_seq) eval_viterbi_test = hmm.evaluate_corpus(test_seq, viterbi_pred_test) eval_posterior_test = hmm.evaluate_corpus(test_seq, posterior_pred_test)
posterior_pred_test = hmm.posterior_decode_corpus(test_seq) eval_viterbi_test = hmm.evaluate_corpus(test_seq,viterbi_pred_test) eval_posterior_test = hmm.evaluate_corpus(test_seq,posterior_pred_test) print "Test Set Accuracy: Posterior Decode %.3f, Viterbi Decode: %.3f"%(eval_posterior_test,eval_viterbi_test) best_smothing = hmm.pick_best_smoothing(train_seq, dev_seq, [10,1,0.1,0]) hmm.train_supervised(train_seq, smoothing=best_smothing) viterbi_pred_test = hmm.viterbi_decode_corpus(test_seq) posterior_pred_test = hmm.posterior_decode_corpus(test_seq) eval_viterbi_test = hmm.evaluate_corpus(test_seq, viterbi_pred_test) eval_posterior_test = hmm.evaluate_corpus(test_seq, posterior_pred_test) print "Best Smoothing %f -- Test Set Accuracy: Posterior Decode %.3f, Viterbi Decode: %.3f"%(best_smothing,eval_posterior_test,eval_viterbi_test) confusion_matrix = cm.build_confusion_matrix(test_seq.seq_list, viterbi_pred_test, len(corpus.tag_dict), hmm.get_num_states()) cm.plot_confusion_bar_graph(confusion_matrix, corpus.tag_dict, range(hmm.get_num_states()), 'Confusion matrix') print "------------" print "Exercise 2.10" print "------------" # Train with EM. hmm.train_EM(train_seq, 0.1, 20, evaluate=True) viterbi_pred_test = hmm.viterbi_decode_corpus(test_seq) posterior_pred_test = hmm.posterior_decode_corpus(test_seq) eval_viterbi_test = hmm.evaluate_corpus(test_seq, viterbi_pred_test) eval_posterior_test = hmm.evaluate_corpus(test_seq, posterior_pred_test)