def main(): h = dt.DT({'maxDepth': 5}) h.train( datasets.TennisData.X, datasets.TennisData.Y ) print h print runClassifier.trainTestSet(dt.DT({'maxDepth': 1}), datasets.TennisData) runClassifier.trainTestSet(dt.DT({'maxDepth': 2}), datasets.TennisData) runClassifier.trainTestSet(dt.DT({'maxDepth': 3}), datasets.TennisData) runClassifier.trainTestSet(dt.DT({'maxDepth': 5}), datasets.TennisData) print runClassifier.trainTestSet(dt.DT({'maxDepth': 1}), datasets.CFTookCG) runClassifier.trainTestSet(dt.DT({'maxDepth': 3}), datasets.CFTookCG) runClassifier.trainTestSet(dt.DT({'maxDepth': 5}), datasets.CFTookCG) print #curve = runClassifier.learningCurveSet(dt.DT({'maxDepth': 5}), datasets.CFTookAI) #runClassifier.plotCurve('DT on AI', curve) curve = runClassifier.hyperparamCurveSet(dt.DT({'maxDepth': 5}), 'maxDepth', [1,2,3,4,5,6,7,8,9,10], datasets.CFTookAI) runClassifier.plotCurve( 'DT on AI (hyperparameter)', curve ) """ print "WU4:" h = dt.DT({'maxDepth': 3}) h.train( datasets.CFTookCG.X, datasets.CFTookCG.Y ) print h print "0", datasets.CFDataRatings.courseNames[6], datasets.CFDataRatings.courseIds[6] print "1-left", datasets.CFDataRatings.courseNames[34], datasets.CFDataRatings.courseIds[34] print "1-left-2-left", datasets.CFDataRatings.courseNames[48], datasets.CFDataRatings.courseIds[48] print "1-left-2-right", datasets.CFDataRatings.courseNames[27], datasets.CFDataRatings.courseIds[27] print "1-right", datasets.CFDataRatings.courseNames[54], datasets.CFDataRatings.courseIds[54] print "1-right-2-left", datasets.CFDataRatings.courseNames[32], datasets.CFDataRatings.courseIds[32] print "1-right-2-left", datasets.CFDataRatings.courseNames[53], datasets.CFDataRatings.courseIds[53] """ return
import dumbClassifiers as du import datasets as data import runClassifier as run import numpy import knn #9 curve = run.learningCurveSet(knn.KNN({'isKNN':True,'K':5}),data.DigitData) run.plotCurve('K-Nearest Neighbor on 5-NN; DIgitsData',curve) #11 curve = run.hyperparamCurveSet(knn.KNN({'isKNN':True}), 'K', [1,2,3,4,5,6,7,8,9,10],data.DigitData) run.plotCurve('Hyperparameter Curve on DigitsData',curve) #12 arr = [] counter = 1 while counter < 20: arr.append(counter) counter += .5 curve = run.hyperparamCurveSet(knn.KNN({'isKNN':False}), 'eps', arr ,data.DigitData) run.plotCurve('Hyperparameter Curve on DigitsData',curve)
import datasets as data import runClassifier as run import numpy import perceptron curve = run.learningCurveSet(perceptron.Perceptron({'numEpoch': 10}), data.TwoDDiagonal) run.plotCurve("Perceptron Learning Curve on Sentiment Data", curve)
import perceptron import datasets import runClassifier # learning curve for epoch = 5 curve = runClassifier.learningCurveSet(perceptron.Perceptron({'numEpoch': 5}), datasets.SentimentData) runClassifier.plotCurve('Perceptron on Sentiment Data', curve) # different values for epoch curve = runClassifier.hyperparamCurveSet(perceptron.Perceptron({}), 'numEpoch', [1,2,3,4,5,6,7,8,9,10], datasets.SentimentData) runClassifier.plotCurve('Perceptron on Sentiment Data (hyperparameter)', curve)
from numpy import * from pylab import * import util import binary import datasets import knn import runClassifier # Learning curves for K = 1, 2, 10, 20 for i in [1, 2, 10, 20] : (dataSizes, trainAcc, testAcc) = runClassifier.learningCurveSet(knn.KNN({'isKNN': True, 'K': i}), datasets.DigitData) runClassifier.plotCurve("Learning Curve for knn, K=%d" % (i), [dataSizes, trainAcc, testAcc]) ylim([.2,1.1]) savefig("LC_k%d.png" % (i) ) close() # Learning curves for \epsilon = 5.0, 10.0, 15.0, 20.0 for i in [5.0, 10.0, 15.0, 20.0] : (dataSizes, trainAcc, testAcc) = runClassifier.learningCurveSet(knn.KNN({'isKNN': False, 'eps': i}), datasets.DigitData) runClassifier.plotCurve("Learning Curve for knn, eps=%f" % (i), [dataSizes, trainAcc, testAcc]) ylim([.45,1.1]) savefig("LC_eps%f.png" % (i) ) close() # Learning curve for K = 5 (dataSizes, trainAcc, testAcc) = runClassifier.learningCurveSet(knn.KNN({'isKNN': True, 'K': 5}), datasets.DigitData) runClassifier.plotCurve("Learning Curve for knn, K=5", [dataSizes, trainAcc, testAcc]) savefig("LC_k5.png")
import perceptron import datasets as data import runClassifier as run import pylab from numpy import * #13 curve = run.learningCurveSet(perceptron.Perceptron({'numEpoch': 10}), data.SentimentData) run.plotCurve("Perceptron Learning Curve", curve) #14 h = perceptron.Perceptron({'numEpoch': 10}) h.train(data.TwoDDiagonal.X, data.TwoDAxisAligned.Y) run.plotData(data.TwoDDiagonal.X, data.TwoDAxisAligned.Y) run.plotClassifier(h.weights, h.bias) pylab.show() #15 class DummyData: X = array([[-1, 2], [1, 2], [-1, -2], [1, -2], [-2, 1], [-2, -1], [2, 1], [2, -1]]) Y = array([1, 1, 1, 1, -1, -1, -1, -1]) Xte = X Yte = Y h = perceptron.Perceptron({'numEpoch': 10}) h.train(DummyData.X, DummyData.Y)
h = dt.DT({'maxDepth': 2}) h.train(datasets.TennisData.X, datasets.TennisData.Y) print(h) h = dt.DT({'maxDepth': 5}) h.train(datasets.TennisData.X, datasets.TennisData.Y) print(h) # Sentiment data h = dt.DT({'maxDepth': 2}) h.train(datasets.SentimentData.X, datasets.SentimentData.Y) print(h) print(datasets.SentimentData.words[2428]) print(datasets.SentimentData.words[3842]) print(datasets.SentimentData.words[3892]) runClassifier.trainTestSet(dt.DT({'maxDepth': 1}), datasets.SentimentData) runClassifier.trainTestSet(dt.DT({'maxDepth': 3}), datasets.SentimentData) runClassifier.trainTestSet(dt.DT({'maxDepth': 5}), datasets.SentimentData) curve = runClassifier.learningCurveSet(dt.DT({'maxDepth': 9}), datasets.SentimentData) runClassifier.plotCurve('DT on Sentiment Data', curve) # W2 curve = runClassifier.hyperparamCurveSet(dt.DT({}), 'maxDepth', [1, 2, 4, 6, 8, 12, 16], datasets.SentimentData) runClassifier.plotCurve('DT on Sentiment Data (hyperparameter)', curve)
import util import datasets import binary import dumbClassifiers import runClassifier import dt import knn import perceptron from numpy import * from pylab import * # runClassifier.trainTestSet(dt.DT({'maxDepth': 1}), datasets.SentimentData) # runClassifier.trainTestSet(dt.DT({'maxDepth': 3}), datasets.SentimentData) # runClassifier.trainTestSet(dt.DT({'maxDepth': 5}), datasets.SentimentData) curve = runClassifier.learningCurveSet(dt.DT({'maxDepth': 9}), datasets.SentimentData) runClassifier.plotCurve('DT on Sentiment Data', curve)
import knn import datasets import runClassifier # different values for K curve = runClassifier.hyperparamCurveSet(knn.KNN({'isKNN':True}), 'K', [1,2,3,4,5,6,7,8,9,10], datasets.DigitData) runClassifier.plotCurve('KNN on Digit Data (hyperparameter K)', curve) # different values for epsilon curve = runClassifier.hyperparamCurveSet(knn.KNN({'isKNN':False}), 'eps', [1.0,2.0,3.0,4.0,5.0,6.0,7.0,8.0,9.0,10.0], datasets.DigitData) runClassifier.plotCurve('KNN on Digit Data (hyperparameter epsilon)', curve) # learning curve for K = 5 curve = runClassifier.learningCurveSet(knn.KNN({'isKNN':True, 'K':5}), datasets.DigitData) runClassifier.plotCurve('KNN on Digit Data', curve)
}), datasets.DigitData) runClassifier.trainTestSet(knn.KNN({ 'isKNN': False, 'eps': 10.0 }), datasets.DigitData) runClassifier.trainTestSet(knn.KNN({ 'isKNN': True, 'K': 1 }), datasets.DigitData) runClassifier.trainTestSet(knn.KNN({ 'isKNN': True, 'K': 3 }), datasets.DigitData) runClassifier.trainTestSet(knn.KNN({ 'isKNN': True, 'K': 5 }), datasets.DigitData) #knnCurve = runClassifier.hyperparamCurveSet(knn.KNN({'isKNN':True, 'K':1}), 'K', range(0, 21), datasets.DigitData) #runClassifier.plotCurve('KNN vs. train/test Accuracy', knnCurve) #epsCurve = runClassifier.hyperparamCurveSet(knn.KNN({'isKNN':False, 'eps':0.5}), 'eps', arange(0,14.5,0.5), datasets.DigitData) #runClassifier.plotCurve('Epsilon Ball vs. train/test accuracy', epsCurve) learning = runClassifier.learningCurveSet(knn.KNN({ 'isKNN': True, 'K': 5 }), datasets.DigitData) runClassifier.plotCurve('Training examples vs. train/test accuracy', learning)
from pylab import * import util, datasets, runClassifier, binary import dt import dumbClassifiers X = datasets.TennisData.X Y = datasets.TennisData.Y data = datasets.TennisData # ----- for dt ----- maxD = 1; reload(dt) h = dt.DT({'maxDepth': maxD}) curve = runClassifier.learningCurveSet(dt.DT({'maxDepth': 5}), datasets.CFTookAI) curveh = runClassifier.hyperparamCurveSet(dt.DT({'maxDepth': 5}), 'maxDepth', [1,2,3,4,5,6,7,8,9,10], datasets.CFTookAI) runClassifier.plotCurve('', curveh) h = dt.DT({'maxDepth': 10}) h.train(datasets.CFTookAI.X, datasets.CFTookAI.Y); # # the maxDepth that does the best on test set is 5, we reach 1 on the training data with maxDepth=10 # # with maxDepth=5, the features are: with5 = dt.DT({'maxDepth':5}) with5.train(datasets.CFTookAI.X, datasets.CFTookAI.Y) # # CFTookAI.X is (400, 55), the 55 features. # # the top 5 features are (indices): top5With5 = [1,44, 37, 54, 52, 48]; # #which are: bestWith5_courses = datasets.CFTookAI.courseNames[top5With5] # #result: array(['introduction to information technology', # # 'database management systems', 'complexity theory', # # 'computational linguistics ii', 'advanced computer graphics'],
import dumbClassifiers as du import datasets as data import runClassifier as run import numpy import dt # Test the ZDecision Tree curve = run.learningCurveSet(dt.DT({'maxDepth': 6}), data.SentimentData) run.plotCurve('Decision Tree Learning Curve on Sediment Data', curve) curve = run.hyperparamCurveSet(dt.DT({}), 'maxDepth', [1, 2, 4, 6, 8, 12, 16], data.SentimentData) run.plotCurve('Decision Tree Hyperparameter Curve on Sediment Data', curve)