def test_svm_online(sd, scr): svm = svmc.SVM() svm.regularizer = 1.0 # This is lambda params_svm_sd = svm.train(sd.train_X,sd.train_y) y_pred_train = svm.test(sd.train_X,params_svm_sd) acc_train = svm.evaluate(sd.train_y, y_pred_train) assert allclose(acc_train, 0.940000, tolerance) y_pred_test = svm.test(sd.test_X,params_svm_sd) acc_test = svm.evaluate(sd.test_y, y_pred_test) assert allclose(acc_test, 0.960000, tolerance) params_svm_sc = svm.train(scr.train_X,scr.train_y) y_pred_train = svm.test(scr.train_X,params_svm_sc) acc_train = svm.evaluate(scr.train_y, y_pred_train) assert allclose(acc_train, 0.87875, 0.01) y_pred_test = svm.test(scr.test_X,params_svm_sc) acc_test = svm.evaluate(scr.test_y, y_pred_test) # TODO: py2 gives 0.805, check the reason for the different value assert allclose(acc_test, 0.810000, 0.01)
def run_all_classifiers(dataset): fig, axis = dataset.plot_data() print "Naive Bayes" nb = nbc.NaiveBayes() params_nb = nb.train(dataset.train_X, dataset.train_y) print params_nb.reshape(-1) predict = nb.test(dataset.train_X, params_nb) evaluation = nb.evaluate(predict, dataset.train_y) predict2 = nb.test(dataset.test_X, params_nb) evaluation2 = nb.evaluate(predict2, dataset.test_y) print "Accuracy train: %f test: %f" % (evaluation, evaluation2) fig, axis = dataset.add_line(fig, axis, params_nb, "Naive Bayes", "red") print "Perceptron" perc = percc.Perceptron() params_perc = perc.train(dataset.train_X, dataset.train_y) print params_perc.reshape(-1) predict = perc.test(dataset.train_X, params_perc) evaluation = perc.evaluate(predict, dataset.train_y) predict2 = perc.test(dataset.test_X, params_perc) evaluation2 = perc.evaluate(predict2, dataset.test_y) print "Accuracy train: %f test: %f" % (evaluation, evaluation2) fig, axis = dataset.add_line(fig, axis, params_perc, "Perceptron", "blue") print "MaxEnt LBFGS" me = mec_batch.MaxEnt_batch() params_me = me.train(dataset.train_X, dataset.train_y) print params_me.reshape(-1) predict = me.test(dataset.train_X, params_me) evaluation = me.evaluate(predict, dataset.train_y) predict2 = me.test(dataset.test_X, params_me) evaluation2 = me.evaluate(predict2, dataset.test_y) print "Accuracy train: %f test: %f" % (evaluation, evaluation2) fig, axis = dataset.add_line(fig, axis, params_me, "ME-LBFGS", "green") print "MaxEnt Online" me_online = mec_online.MaxEnt_online() params_me = me_online.train(dataset.train_X, dataset.train_y) print params_me.reshape(-1) predict = me_online.test(dataset.train_X, params_me) evaluation = me_online.evaluate(predict, dataset.train_y) predict2 = me_online.test(dataset.test_X, params_me) evaluation2 = me.evaluate(predict2, dataset.test_y) print "Accuracy train: %f test: %f" % (evaluation, evaluation2) fig, axis = dataset.add_line(fig, axis, params_me, "ME-Online", "pink") print "MIRA" mira = mirac.Mira() params_mira = mira.train(dataset.train_X, dataset.train_y) print params_mira.reshape(-1) predict = mira.test(dataset.train_X, params_mira) evaluation = mira.evaluate(predict, dataset.train_y) predict2 = mira.test(dataset.test_X, params_mira) evaluation2 = mira.evaluate(predict2, dataset.test_y) print "Accuracy train: %f test: %f" % (evaluation, evaluation2) fig, axis = dataset.add_line(fig, axis, params_mira, "Mira", "orange") print "SVM" svm = svmc.SVM() params_svm = svm.train(dataset.train_X, dataset.train_y) print params_svm.reshape(-1) predict = svm.test(dataset.train_X, params_svm) evaluation = svm.evaluate(predict, dataset.train_y) predict2 = svm.test(dataset.test_X, params_svm) evaluation2 = svm.evaluate(predict2, dataset.test_y) print "Accuracy train: %f test: %f" % (evaluation, evaluation2) fig, axis = dataset.add_line(fig, axis, params_svm, "SVM", "brown")
print # Same as above, but for the Maximum Entropy classifier, online version me_sgd = meoc.MaxEntOnline() params_meo_sd = me_sgd.train(sd.train_X, sd.train_y) y_pred_train = me_sgd.test(sd.train_X, params_meo_sd) acc_train = me_sgd.evaluate(sd.train_y, y_pred_train) y_pred_test = me_sgd.test(sd.test_X, params_meo_sd) acc_test = me_sgd.evaluate(sd.test_y, y_pred_test) fig, axis = sd.add_line(fig, axis, params_meo_sd, "Max-Ent-Online", "magenta") print "Max-Ent Online Simple Dataset Accuracy train: %f test: %f" % (acc_train, acc_test) print # Same as above, but for the SVM classifier svm = svmc.SVM() params_svm_sd = svm.train(sd.train_X, sd.train_y) y_pred_train = svm.test(sd.train_X, params_svm_sd) acc_train = svm.evaluate(sd.train_y, y_pred_train) y_pred_test = svm.test(sd.test_X, params_svm_sd) acc_test = svm.evaluate(sd.test_y, y_pred_test) fig, axis = sd.add_line(fig, axis, params_svm_sd, "SVM", "yellow") print "SVM Online Simple Dataset Accuracy train: %f test: %f" % (acc_train, acc_test) print # End of exercise 3.1 ######### # Exercise 3.2: implement Naive Bayes for multinomial data ######## # Read the book review data