示例#1
0
def main():
	h = dt.DT({'maxDepth': 5})
	h.train( datasets.TennisData.X, datasets.TennisData.Y )
	print h
	print
	
	runClassifier.trainTestSet(dt.DT({'maxDepth': 1}), datasets.TennisData)
	runClassifier.trainTestSet(dt.DT({'maxDepth': 2}), datasets.TennisData)
	runClassifier.trainTestSet(dt.DT({'maxDepth': 3}), datasets.TennisData)
	runClassifier.trainTestSet(dt.DT({'maxDepth': 5}), datasets.TennisData)	
	print
	
	runClassifier.trainTestSet(dt.DT({'maxDepth': 1}), datasets.CFTookCG)
	runClassifier.trainTestSet(dt.DT({'maxDepth': 3}), datasets.CFTookCG)
	runClassifier.trainTestSet(dt.DT({'maxDepth': 5}), datasets.CFTookCG)
	print 
	
	#curve = runClassifier.learningCurveSet(dt.DT({'maxDepth': 5}), datasets.CFTookAI)
	#runClassifier.plotCurve('DT on AI', curve)
	
	curve = runClassifier.hyperparamCurveSet(dt.DT({'maxDepth': 5}), 'maxDepth', [1,2,3,4,5,6,7,8,9,10], datasets.CFTookAI)
	runClassifier.plotCurve( 'DT on AI (hyperparameter)', curve )
	"""
	print "WU4:"
	h = dt.DT({'maxDepth': 3})
	h.train( datasets.CFTookCG.X, datasets.CFTookCG.Y )
	print h

	print "0", datasets.CFDataRatings.courseNames[6], datasets.CFDataRatings.courseIds[6]
	print "1-left", datasets.CFDataRatings.courseNames[34], datasets.CFDataRatings.courseIds[34]
	print "1-left-2-left", datasets.CFDataRatings.courseNames[48], datasets.CFDataRatings.courseIds[48]
	print "1-left-2-right", datasets.CFDataRatings.courseNames[27], datasets.CFDataRatings.courseIds[27]
	print "1-right", datasets.CFDataRatings.courseNames[54], datasets.CFDataRatings.courseIds[54]
	print "1-right-2-left", datasets.CFDataRatings.courseNames[32], datasets.CFDataRatings.courseIds[32]
	print "1-right-2-left", datasets.CFDataRatings.courseNames[53], datasets.CFDataRatings.courseIds[53]
	"""
	return
示例#2
0
import dumbClassifiers as du
import datasets as data
import runClassifier as run
import numpy
import knn

#9
curve = run.learningCurveSet(knn.KNN({'isKNN':True,'K':5}),data.DigitData)
run.plotCurve('K-Nearest Neighbor on 5-NN; DIgitsData',curve)

#11
curve = run.hyperparamCurveSet(knn.KNN({'isKNN':True}), 'K', [1,2,3,4,5,6,7,8,9,10],data.DigitData)
run.plotCurve('Hyperparameter Curve on DigitsData',curve)

#12
arr = []
counter = 1
while counter < 20:
    arr.append(counter)
    counter += .5

curve = run.hyperparamCurveSet(knn.KNN({'isKNN':False}), 'eps', arr ,data.DigitData)
run.plotCurve('Hyperparameter Curve on DigitsData',curve)
示例#3
0
import datasets as data
import runClassifier as run
import numpy
import perceptron

curve = run.learningCurveSet(perceptron.Perceptron({'numEpoch': 10}),
                             data.TwoDDiagonal)
run.plotCurve("Perceptron Learning Curve on Sentiment Data", curve)
import perceptron
import datasets
import runClassifier

# learning curve for epoch = 5
curve = runClassifier.learningCurveSet(perceptron.Perceptron({'numEpoch': 5}), datasets.SentimentData)
runClassifier.plotCurve('Perceptron on Sentiment Data', curve)

# different values for epoch
curve = runClassifier.hyperparamCurveSet(perceptron.Perceptron({}), 'numEpoch', [1,2,3,4,5,6,7,8,9,10], datasets.SentimentData)
runClassifier.plotCurve('Perceptron on Sentiment Data (hyperparameter)', curve)
示例#5
0
from numpy import *
from pylab import *

import util
import binary
import datasets
import knn
import runClassifier

# Learning curves for K = 1, 2, 10, 20

for i in [1, 2, 10, 20] :
	(dataSizes, trainAcc, testAcc) = runClassifier.learningCurveSet(knn.KNN({'isKNN': True, 'K': i}), datasets.DigitData)
	runClassifier.plotCurve("Learning Curve for knn, K=%d" % (i), [dataSizes, trainAcc, testAcc])
	ylim([.2,1.1])
	savefig("LC_k%d.png" % (i) )
	close()

# Learning curves for \epsilon = 5.0, 10.0, 15.0, 20.0
for i in [5.0, 10.0, 15.0, 20.0] :
	(dataSizes, trainAcc, testAcc) = runClassifier.learningCurveSet(knn.KNN({'isKNN': False, 'eps': i}), datasets.DigitData)
	runClassifier.plotCurve("Learning Curve for knn, eps=%f" % (i), [dataSizes, trainAcc, testAcc])
	ylim([.45,1.1])
	savefig("LC_eps%f.png" % (i) )
	close()

# Learning curve for K = 5
(dataSizes, trainAcc, testAcc) = runClassifier.learningCurveSet(knn.KNN({'isKNN': True, 'K': 5}), datasets.DigitData)

runClassifier.plotCurve("Learning Curve for knn, K=5", [dataSizes, trainAcc, testAcc])
savefig("LC_k5.png")
示例#6
0
import perceptron
import datasets as data
import runClassifier as run
import pylab
from numpy import *

#13
curve = run.learningCurveSet(perceptron.Perceptron({'numEpoch': 10}),
                             data.SentimentData)
run.plotCurve("Perceptron Learning Curve", curve)

#14
h = perceptron.Perceptron({'numEpoch': 10})
h.train(data.TwoDDiagonal.X, data.TwoDAxisAligned.Y)
run.plotData(data.TwoDDiagonal.X, data.TwoDAxisAligned.Y)
run.plotClassifier(h.weights, h.bias)
pylab.show()


#15
class DummyData:
    X = array([[-1, 2], [1, 2], [-1, -2], [1, -2], [-2, 1], [-2, -1], [2, 1],
               [2, -1]])
    Y = array([1, 1, 1, 1, -1, -1, -1, -1])

    Xte = X
    Yte = Y


h = perceptron.Perceptron({'numEpoch': 10})
h.train(DummyData.X, DummyData.Y)
示例#7
0
h = dt.DT({'maxDepth': 2})
h.train(datasets.TennisData.X, datasets.TennisData.Y)
print(h)

h = dt.DT({'maxDepth': 5})
h.train(datasets.TennisData.X, datasets.TennisData.Y)
print(h)

# Sentiment data
h = dt.DT({'maxDepth': 2})
h.train(datasets.SentimentData.X, datasets.SentimentData.Y)
print(h)

print(datasets.SentimentData.words[2428])
print(datasets.SentimentData.words[3842])
print(datasets.SentimentData.words[3892])

runClassifier.trainTestSet(dt.DT({'maxDepth': 1}), datasets.SentimentData)
runClassifier.trainTestSet(dt.DT({'maxDepth': 3}), datasets.SentimentData)
runClassifier.trainTestSet(dt.DT({'maxDepth': 5}), datasets.SentimentData)

curve = runClassifier.learningCurveSet(dt.DT({'maxDepth': 9}),
                                       datasets.SentimentData)
runClassifier.plotCurve('DT on Sentiment Data', curve)

# W2
curve = runClassifier.hyperparamCurveSet(dt.DT({}), 'maxDepth',
                                         [1, 2, 4, 6, 8, 12, 16],
                                         datasets.SentimentData)
runClassifier.plotCurve('DT on Sentiment Data (hyperparameter)', curve)
示例#8
0
import util
import datasets
import binary
import dumbClassifiers
import runClassifier
import dt
import knn
import perceptron
from numpy import *
from pylab import *

# runClassifier.trainTestSet(dt.DT({'maxDepth': 1}), datasets.SentimentData)
# runClassifier.trainTestSet(dt.DT({'maxDepth': 3}), datasets.SentimentData)
# runClassifier.trainTestSet(dt.DT({'maxDepth': 5}), datasets.SentimentData)
curve = runClassifier.learningCurveSet(dt.DT({'maxDepth': 9}),
                                       datasets.SentimentData)
runClassifier.plotCurve('DT on Sentiment Data', curve)
示例#9
0
import knn
import datasets
import runClassifier

# different values for K
curve = runClassifier.hyperparamCurveSet(knn.KNN({'isKNN':True}), 'K', [1,2,3,4,5,6,7,8,9,10], datasets.DigitData)
runClassifier.plotCurve('KNN on Digit Data (hyperparameter K)', curve)

# different values for epsilon
curve = runClassifier.hyperparamCurveSet(knn.KNN({'isKNN':False}), 'eps', [1.0,2.0,3.0,4.0,5.0,6.0,7.0,8.0,9.0,10.0], datasets.DigitData)
runClassifier.plotCurve('KNN on Digit Data (hyperparameter epsilon)', curve)

# learning curve for K = 5
curve = runClassifier.learningCurveSet(knn.KNN({'isKNN':True, 'K':5}), datasets.DigitData)
runClassifier.plotCurve('KNN on Digit Data', curve)
示例#10
0
}), datasets.DigitData)
runClassifier.trainTestSet(knn.KNN({
    'isKNN': False,
    'eps': 10.0
}), datasets.DigitData)

runClassifier.trainTestSet(knn.KNN({
    'isKNN': True,
    'K': 1
}), datasets.DigitData)
runClassifier.trainTestSet(knn.KNN({
    'isKNN': True,
    'K': 3
}), datasets.DigitData)
runClassifier.trainTestSet(knn.KNN({
    'isKNN': True,
    'K': 5
}), datasets.DigitData)

#knnCurve = runClassifier.hyperparamCurveSet(knn.KNN({'isKNN':True, 'K':1}), 'K', range(0, 21), datasets.DigitData)
#runClassifier.plotCurve('KNN vs. train/test Accuracy', knnCurve)

#epsCurve = runClassifier.hyperparamCurveSet(knn.KNN({'isKNN':False, 'eps':0.5}), 'eps', arange(0,14.5,0.5), datasets.DigitData)
#runClassifier.plotCurve('Epsilon Ball vs. train/test accuracy', epsCurve)

learning = runClassifier.learningCurveSet(knn.KNN({
    'isKNN': True,
    'K': 5
}), datasets.DigitData)
runClassifier.plotCurve('Training examples vs. train/test accuracy', learning)
示例#11
0
from pylab import *
import util, datasets, runClassifier, binary
import dt
import dumbClassifiers

X = datasets.TennisData.X
Y = datasets.TennisData.Y
data = datasets.TennisData

# ----- for dt -----
maxD = 1;
reload(dt)
h = dt.DT({'maxDepth': maxD})
curve = runClassifier.learningCurveSet(dt.DT({'maxDepth': 5}), datasets.CFTookAI)
curveh = runClassifier.hyperparamCurveSet(dt.DT({'maxDepth': 5}), 'maxDepth', [1,2,3,4,5,6,7,8,9,10], datasets.CFTookAI)
runClassifier.plotCurve('', curveh)

h = dt.DT({'maxDepth': 10})
h.train(datasets.CFTookAI.X, datasets.CFTookAI.Y);
# # the maxDepth that does the best on test set is 5, we reach 1 on the training data with maxDepth=10
# # with maxDepth=5, the features are:
 with5 = dt.DT({'maxDepth':5})
 with5.train(datasets.CFTookAI.X, datasets.CFTookAI.Y)
# # CFTookAI.X is (400, 55), the 55 features.
# # the top 5 features are (indices):
 top5With5 = [1,44, 37, 54, 52, 48];
# #which are:
 bestWith5_courses = datasets.CFTookAI.courseNames[top5With5]
# #result: array(['introduction to information technology',
#       #  'database management systems', 'complexity theory',
#       #  'computational linguistics ii', 'advanced computer graphics'], 
示例#12
0
import dumbClassifiers as du
import datasets as data
import runClassifier as run
import numpy
import dt

# Test the ZDecision Tree
curve = run.learningCurveSet(dt.DT({'maxDepth': 6}), data.SentimentData)
run.plotCurve('Decision Tree Learning Curve on Sediment Data', curve)

curve = run.hyperparamCurveSet(dt.DT({}), 'maxDepth', [1, 2, 4, 6, 8, 12, 16],
                               data.SentimentData)
run.plotCurve('Decision Tree Hyperparameter Curve on Sediment Data', curve)