Exemplo n.º 1
0
def main():
	h = dt.DT({'maxDepth': 5})
	h.train( datasets.TennisData.X, datasets.TennisData.Y )
	print h
	print
	
	runClassifier.trainTestSet(dt.DT({'maxDepth': 1}), datasets.TennisData)
	runClassifier.trainTestSet(dt.DT({'maxDepth': 2}), datasets.TennisData)
	runClassifier.trainTestSet(dt.DT({'maxDepth': 3}), datasets.TennisData)
	runClassifier.trainTestSet(dt.DT({'maxDepth': 5}), datasets.TennisData)	
	print
	
	runClassifier.trainTestSet(dt.DT({'maxDepth': 1}), datasets.CFTookCG)
	runClassifier.trainTestSet(dt.DT({'maxDepth': 3}), datasets.CFTookCG)
	runClassifier.trainTestSet(dt.DT({'maxDepth': 5}), datasets.CFTookCG)
	print 
	
	#curve = runClassifier.learningCurveSet(dt.DT({'maxDepth': 5}), datasets.CFTookAI)
	#runClassifier.plotCurve('DT on AI', curve)
	
	curve = runClassifier.hyperparamCurveSet(dt.DT({'maxDepth': 5}), 'maxDepth', [1,2,3,4,5,6,7,8,9,10], datasets.CFTookAI)
	runClassifier.plotCurve( 'DT on AI (hyperparameter)', curve )
	"""
	print "WU4:"
	h = dt.DT({'maxDepth': 3})
	h.train( datasets.CFTookCG.X, datasets.CFTookCG.Y )
	print h

	print "0", datasets.CFDataRatings.courseNames[6], datasets.CFDataRatings.courseIds[6]
	print "1-left", datasets.CFDataRatings.courseNames[34], datasets.CFDataRatings.courseIds[34]
	print "1-left-2-left", datasets.CFDataRatings.courseNames[48], datasets.CFDataRatings.courseIds[48]
	print "1-left-2-right", datasets.CFDataRatings.courseNames[27], datasets.CFDataRatings.courseIds[27]
	print "1-right", datasets.CFDataRatings.courseNames[54], datasets.CFDataRatings.courseIds[54]
	print "1-right-2-left", datasets.CFDataRatings.courseNames[32], datasets.CFDataRatings.courseIds[32]
	print "1-right-2-left", datasets.CFDataRatings.courseNames[53], datasets.CFDataRatings.courseIds[53]
	"""
	return
Exemplo n.º 2
0
import dumbClassifiers as du
import datasets as data
import runClassifier as run
import numpy
import knn

#9
curve = run.learningCurveSet(knn.KNN({'isKNN':True,'K':5}),data.DigitData)
run.plotCurve('K-Nearest Neighbor on 5-NN; DIgitsData',curve)

#11
curve = run.hyperparamCurveSet(knn.KNN({'isKNN':True}), 'K', [1,2,3,4,5,6,7,8,9,10],data.DigitData)
run.plotCurve('Hyperparameter Curve on DigitsData',curve)

#12
arr = []
counter = 1
while counter < 20:
    arr.append(counter)
    counter += .5

curve = run.hyperparamCurveSet(knn.KNN({'isKNN':False}), 'eps', arr ,data.DigitData)
run.plotCurve('Hyperparameter Curve on DigitsData',curve)
Exemplo n.º 3
0
import perceptron
import datasets
import runClassifier

# learning curve for epoch = 5
curve = runClassifier.learningCurveSet(perceptron.Perceptron({'numEpoch': 5}), datasets.SentimentData)
runClassifier.plotCurve('Perceptron on Sentiment Data', curve)

# different values for epoch
curve = runClassifier.hyperparamCurveSet(perceptron.Perceptron({}), 'numEpoch', [1,2,3,4,5,6,7,8,9,10], datasets.SentimentData)
runClassifier.plotCurve('Perceptron on Sentiment Data (hyperparameter)', curve)
Exemplo n.º 4
0
h = dt.DT({'maxDepth': 2})
h.train(datasets.TennisData.X, datasets.TennisData.Y)
print(h)

h = dt.DT({'maxDepth': 5})
h.train(datasets.TennisData.X, datasets.TennisData.Y)
print(h)

# Sentiment data
h = dt.DT({'maxDepth': 2})
h.train(datasets.SentimentData.X, datasets.SentimentData.Y)
print(h)

print(datasets.SentimentData.words[2428])
print(datasets.SentimentData.words[3842])
print(datasets.SentimentData.words[3892])

runClassifier.trainTestSet(dt.DT({'maxDepth': 1}), datasets.SentimentData)
runClassifier.trainTestSet(dt.DT({'maxDepth': 3}), datasets.SentimentData)
runClassifier.trainTestSet(dt.DT({'maxDepth': 5}), datasets.SentimentData)

curve = runClassifier.learningCurveSet(dt.DT({'maxDepth': 9}),
                                       datasets.SentimentData)
runClassifier.plotCurve('DT on Sentiment Data', curve)

# W2
curve = runClassifier.hyperparamCurveSet(dt.DT({}), 'maxDepth',
                                         [1, 2, 4, 6, 8, 12, 16],
                                         datasets.SentimentData)
runClassifier.plotCurve('DT on Sentiment Data (hyperparameter)', curve)
Exemplo n.º 5
0
class DummyData:
    X = array([[-1, 2], [1, 2], [-1, -2], [1, -2], [-2, 1], [-2, -1], [2, 1],
               [2, -1]])
    Y = array([1, 1, 1, 1, -1, -1, -1, -1])

    Xte = X
    Yte = Y


h = perceptron.Perceptron({'numEpoch': 10})
h.train(DummyData.X, DummyData.Y)
run.plotData(DummyData.X, DummyData.Y)
run.plotClassifier(h.weights, h.bias)
pylab.show()

curve = run.hyperparamCurveSet(h, 'numEpoch', [1, 2, 3, 4, 5, 6, 7, 8, 9, 10],
                               DummyData)
run.plotCurve("Perceptron Learning Curve", curve)

curve = run.learningCurveSet(h, DummyData)
run.plotCurve("Learning Curve for NOn-converge", curve)

#16


class bsX:

    X = array([[10, -1], [10, -0.5], [10, 0], [10, 0.5], [10, 0.9], [10, 1],
               [1, -1], [1, -0.05], [1, 0], [1, 0.80], [1, 0.89], [1, 1]])
    Y = array([1, 1, 1, 1, 1, 1, -1, -1, -1, -1, -1, -1])

    Xte = X
Exemplo n.º 6
0
import knn
import datasets
import runClassifier

# different values for K
curve = runClassifier.hyperparamCurveSet(knn.KNN({'isKNN':True}), 'K', [1,2,3,4,5,6,7,8,9,10], datasets.DigitData)
runClassifier.plotCurve('KNN on Digit Data (hyperparameter K)', curve)

# different values for epsilon
curve = runClassifier.hyperparamCurveSet(knn.KNN({'isKNN':False}), 'eps', [1.0,2.0,3.0,4.0,5.0,6.0,7.0,8.0,9.0,10.0], datasets.DigitData)
runClassifier.plotCurve('KNN on Digit Data (hyperparameter epsilon)', curve)

# learning curve for K = 5
curve = runClassifier.learningCurveSet(knn.KNN({'isKNN':True, 'K':5}), datasets.DigitData)
runClassifier.plotCurve('KNN on Digit Data', curve)
Exemplo n.º 7
0
from numpy import *
from pylab import *
import util, datasets, runClassifier, binary
import dt
import dumbClassifiers

X = datasets.TennisData.X
Y = datasets.TennisData.Y
data = datasets.TennisData

# ----- for dt -----
maxD = 1;
reload(dt)
h = dt.DT({'maxDepth': maxD})
curve = runClassifier.learningCurveSet(dt.DT({'maxDepth': 5}), datasets.CFTookAI)
curveh = runClassifier.hyperparamCurveSet(dt.DT({'maxDepth': 5}), 'maxDepth', [1,2,3,4,5,6,7,8,9,10], datasets.CFTookAI)
runClassifier.plotCurve('', curveh)

h = dt.DT({'maxDepth': 10})
h.train(datasets.CFTookAI.X, datasets.CFTookAI.Y);
# # the maxDepth that does the best on test set is 5, we reach 1 on the training data with maxDepth=10
# # with maxDepth=5, the features are:
 with5 = dt.DT({'maxDepth':5})
 with5.train(datasets.CFTookAI.X, datasets.CFTookAI.Y)
# # CFTookAI.X is (400, 55), the 55 features.
# # the top 5 features are (indices):
 top5With5 = [1,44, 37, 54, 52, 48];
# #which are:
 bestWith5_courses = datasets.CFTookAI.courseNames[top5With5]
# #result: array(['introduction to information technology',
#       #  'database management systems', 'complexity theory',
Exemplo n.º 8
0
import dumbClassifiers as du
import datasets as data
import runClassifier as run
import numpy
import dt

# Test the ZDecision Tree
curve = run.learningCurveSet(dt.DT({'maxDepth': 6}), data.SentimentData)
run.plotCurve('Decision Tree Learning Curve on Sediment Data', curve)

curve = run.hyperparamCurveSet(dt.DT({}), 'maxDepth', [1, 2, 4, 6, 8, 12, 16],
                               data.SentimentData)
run.plotCurve('Decision Tree Hyperparameter Curve on Sediment Data', curve)