Пример #1
0
 def BuildModel(self, data, labels):
     # Create and train the classifier.
     options = {}
     if self.stumps:
         options["stumps"] = self.stumps
     if self.minsize:
         options["minsize"] = self.minsize
     DC = mlpy.ClassTree(**options)
     DC.learn(data, labels)
     return DC
Пример #2
0
def fitDecisionTree(data):
    '''
        Build a decision tree classifier
    '''
    # create the classifier object
    tree = ml.ClassTree(minsize=1000)

    # fit the data
    tree.learn(data[0], data[1])

    # return the classifier
    return tree
Пример #3
0
    def metric(self):
        totalTimer = Timer()
        with totalTimer:
            model = mlpy.ClassTree(**self.build_opts)
            model.learn(self.data_split[0], self.data_split[1])

            if len(self.data) >= 2:
                predictions = model.pred(self.data[1])

        metric = {}
        metric["runtime"] = totalTimer.ElapsedTime()

        if len(self.data) == 3:
            confusionMatrix = Metrics.ConfusionMatrix(self.data[2],
                                                      predictions)
            metric['ACC'] = Metrics.AverageAccuracy(confusionMatrix)
            metric['MCC'] = Metrics.MCCMultiClass(confusionMatrix)
            metric['Precision'] = Metrics.AvgPrecision(confusionMatrix)
            metric['Recall'] = Metrics.AvgRecall(confusionMatrix)
            metric['MSE'] = Metrics.SimpleMeanSquaredError(
                self.data[2], predictions)

        return metric
Пример #4
0
directory = raw_input("What directory are the XML files located:\n")
regexParse = raw_input("How would you like to parse the words, leave it blank if you would like to parse by whitespace:\n")
if(regexParse == ""):
	regexParse = None
[vocab,indexToWord,fullDataPoints] = parseDataPoints(directory,regexParse)
[X,Y] = packageData(fullDataPoints,regexParse,vocab, indexToWord)


testModel(mlpy.Perceptron(alpha=0.1, thr=0.05, maxiters=1000), X, Y, "Perceptron")
testModel(mlpy.ElasticNetC(lmb=0.01, eps=0.001),X,Y, "ElasticNet")
testModel(mlpy.LibLinear(solver_type='l2r_l2loss_svc_dual', C=1), X, Y, "LibLinear")
testModel(mlpy.DLDA(delta=0.1), X, Y, "DLDA")
testModel(mlpy.Golub(), X, Y, "Golub")
testModel(mlpy.Parzen(),X,Y,"Parzen")
testModel(mlpy.KNN(2),X,Y,"KNN")
testModel(mlpy.ClassTree(),X,Y,"Classification Tree")
testModel(mlpy.MaximumLikelihoodC(),X,Y,"Maximum Likelihood Classifer")













Пример #5
0
import mlpy

BEST = {
    'knn': mlpy.KNN(1),
    'tree': mlpy.ClassTree(stumps=0, minsize=0),
    'svm': mlpy.LibSvm(svm_type='c_svc',
                       kernel=mlpy.KernelGaussian(10),
                       C=10000)
}
Пример #6
0
 def BuildModel(self, data, labels):
     # Create and train the classifier.
     DC = mlpy.ClassTree(stumps=self.stumps, minsize=self.minsize)
     DC.learn(data, labels)
     return DC
Пример #7
0
def main(xfile,yfile,algorithm=""):
    x = np.loadtxt(open(xfile,"rb"),delimiter=" ")
    y = np.loadtxt(open(yfile,"rb"),delimiter=",")

    
    x,y = shuffle_in_unison_inplace(x,y)

    tr_size = 6000
    te_size = 4000

    xtrain = x[0:tr_size]
    xtest = x[tr_size:(tr_size+te_size)]


    ytrain =  y[0:tr_size]
    ytest = y[tr_size:(tr_size+te_size)]
    
    algorithms = ['l1r_l2loss_svc','l1r_lr']
    for algorithm in algorithms:
    	print algorithm
	ftest = open(str(algorithm) +'_Test.csv','w')
	ftrain = open(str(algorithm) +'_Train.csv','w')
	ftest.write("Weight beta Accuracy_on_winning_bids Accuracy_on_nonwinning_bids\n")
	ftrain.write("Weight beta Accuracy_on_winning_bids Accuracy_on_nonwinning_bids\n")
        for i in range(1,10):
	    for b in range(1,20):
	    	
            	beta = .2 + .1*b
            	w={0:1, 1:(+i*.5)}
            	solver = mlpy.LibLinear(solver_type=algorithm, C=beta, eps=0.01, weight=w)
            	solver.learn(xtrain, ytrain)         

            	yhat = solver.pred(xtrain)
            	printStats(ytrain,yhat,algorithm,.0+i*.2,beta,"train errors",ftrain)
        
        	yhat = solver.pred(xtest)
            	printStats(ytest,yhat,algorithm,.0+i*.2,beta,"test errors", ftest)
	ftest.close()
	ftrain.close()
        
    print "kmeans"
    ftest = open("Kmeans"+'_Test.csv','w')
    ftrain = open("Kmeans" +'_Train.csv','w')
    ftest.write("Weight beta Accuracy_on_winning_bids Accuracy_on_nonwinning_bids\n")
    ftrain.write("Weight beta Accuracy_on_winning_bids Accuracy_on_nonwinning_bids\n")
    solver = mlpy.KNN(2)
    solver.learn(xtrain, ytrain) 
    yhat = solver.pred(xtrain)
    printStats(ytrain,yhat,"Kmeans","none","none","train errors", ftrain)
    yhat = solver.pred(xtest)
    printStats(ytest,yhat,"Kmeans","none","none","test errors", ftest)
    ftest.close()
    ftrain.close()

    ftest = open("Classification" +'_Test.csv','w')
    print "Class"
    ftrain = open("Classification"+'_Train.csv','w')
    ftest.write("Weight beta Accuracy_on_winning_bids Accuracy_on_nonwinning_bids\n")
    ftrain.write("Weight beta Accuracy_on_winning_bids Accuracy_on_nonwinning_bids\n")    
    solver = mlpy.ClassTree()
    solver.learn(xtrain, ytrain)         
    yhat = solver.pred(xtrain)
    printStats(ytrain,yhat,"Classification Tree","none","none","train errors", ftrain)
    yhat = solver.pred(xtest)
    printStats(ytest,yhat,"Classification Tree","none","none","test errors", ftest)
    ftest.close()
    ftrain.close()
Пример #8
0
    golub = mlpy.Golub()
    golub.learn(x, y)
    test = golub.pred(xcontrol)  # test points
    print 'Golub: %.1f percent predicted' % (
        100 * len(test[test == ycontrol]) / len(test))
    dic['golub'].append(100 * len(test[test == ycontrol]) / len(test))

    knn = mlpy.KNN(k=7)
    knn.learn(x, y)
    test = knn.pred(xcontrol)  # test points
    print 'KNN: %.1f percent predicted' % (100 * len(test[test == ycontrol]) /
                                           len(test))
    dic['knn'].append(100 * len(test[test == ycontrol]) / len(test))

    tree = mlpy.ClassTree(stumps=0, minsize=100)
    tree.learn(x, y)
    test = tree.pred(xcontrol)  # test points
    print 'ClassTree: %.1f percent predicted' % (
        100 * len(test[test == ycontrol]) / len(test))
    dic['tree'].append(100 * len(test[test == ycontrol]) / len(test))

    rank = mlpy.rfe_w2(x, y, p=0, classifier=ld)
    print ''
    print fnames
    print rank

new = {}
for k in dic.keys():
    new[k] = {
        'avg': np.round(np.average(dic[k]), 2),
Пример #9
0
                        basic.testing_data[i])):
                    classified += 1
            fd.write("%s,%s,%d,%d,%d\n" %
                     (datasets[d][0], "LDAC", k, size, classified))

            knn = mlpy.KNN(k=3)
            knn.learn(basic.training_data, basic.training_label)
            classified = 0
            for i in range(len(basic.testing_label)):
                if (int)(basic.testing_label[i]) == (int)(knn.pred(
                        basic.testing_data[i])):
                    classified += 1
            fd.write("%s,%s,%d,%d,%d\n" %
                     (datasets[d][0], "KNN", k, size, classified))

            tree = mlpy.ClassTree(minsize=10)
            tree.learn(basic.training_data, basic.training_label)
            classified = 0
            for i in range(len(basic.testing_label)):
                if (int)(basic.testing_label[i]) == (int)(tree.pred(
                        basic.testing_data[i])):
                    classified += 1
            fd.write("%s,%s,%d,%d,%d\n" %
                     (datasets[d][0], "GINI", k, size, classified))

            ml = mlpy.MaximumLikelihoodC()
            ml.learn(basic.training_data, basic.training_label)
            classified = 0
            for i in range(len(basic.testing_label)):
                if (int)(basic.testing_label[i]) == (int)(ml.pred(
                        basic.testing_data[i])):