def BuildModel(self, data, labels): # Create and train the classifier. options = {} if self.stumps: options["stumps"] = self.stumps if self.minsize: options["minsize"] = self.minsize DC = mlpy.ClassTree(**options) DC.learn(data, labels) return DC
def fitDecisionTree(data): ''' Build a decision tree classifier ''' # create the classifier object tree = ml.ClassTree(minsize=1000) # fit the data tree.learn(data[0], data[1]) # return the classifier return tree
def metric(self): totalTimer = Timer() with totalTimer: model = mlpy.ClassTree(**self.build_opts) model.learn(self.data_split[0], self.data_split[1]) if len(self.data) >= 2: predictions = model.pred(self.data[1]) metric = {} metric["runtime"] = totalTimer.ElapsedTime() if len(self.data) == 3: confusionMatrix = Metrics.ConfusionMatrix(self.data[2], predictions) metric['ACC'] = Metrics.AverageAccuracy(confusionMatrix) metric['MCC'] = Metrics.MCCMultiClass(confusionMatrix) metric['Precision'] = Metrics.AvgPrecision(confusionMatrix) metric['Recall'] = Metrics.AvgRecall(confusionMatrix) metric['MSE'] = Metrics.SimpleMeanSquaredError( self.data[2], predictions) return metric
directory = raw_input("What directory are the XML files located:\n") regexParse = raw_input("How would you like to parse the words, leave it blank if you would like to parse by whitespace:\n") if(regexParse == ""): regexParse = None [vocab,indexToWord,fullDataPoints] = parseDataPoints(directory,regexParse) [X,Y] = packageData(fullDataPoints,regexParse,vocab, indexToWord) testModel(mlpy.Perceptron(alpha=0.1, thr=0.05, maxiters=1000), X, Y, "Perceptron") testModel(mlpy.ElasticNetC(lmb=0.01, eps=0.001),X,Y, "ElasticNet") testModel(mlpy.LibLinear(solver_type='l2r_l2loss_svc_dual', C=1), X, Y, "LibLinear") testModel(mlpy.DLDA(delta=0.1), X, Y, "DLDA") testModel(mlpy.Golub(), X, Y, "Golub") testModel(mlpy.Parzen(),X,Y,"Parzen") testModel(mlpy.KNN(2),X,Y,"KNN") testModel(mlpy.ClassTree(),X,Y,"Classification Tree") testModel(mlpy.MaximumLikelihoodC(),X,Y,"Maximum Likelihood Classifer")
import mlpy BEST = { 'knn': mlpy.KNN(1), 'tree': mlpy.ClassTree(stumps=0, minsize=0), 'svm': mlpy.LibSvm(svm_type='c_svc', kernel=mlpy.KernelGaussian(10), C=10000) }
def BuildModel(self, data, labels): # Create and train the classifier. DC = mlpy.ClassTree(stumps=self.stumps, minsize=self.minsize) DC.learn(data, labels) return DC
def main(xfile,yfile,algorithm=""): x = np.loadtxt(open(xfile,"rb"),delimiter=" ") y = np.loadtxt(open(yfile,"rb"),delimiter=",") x,y = shuffle_in_unison_inplace(x,y) tr_size = 6000 te_size = 4000 xtrain = x[0:tr_size] xtest = x[tr_size:(tr_size+te_size)] ytrain = y[0:tr_size] ytest = y[tr_size:(tr_size+te_size)] algorithms = ['l1r_l2loss_svc','l1r_lr'] for algorithm in algorithms: print algorithm ftest = open(str(algorithm) +'_Test.csv','w') ftrain = open(str(algorithm) +'_Train.csv','w') ftest.write("Weight beta Accuracy_on_winning_bids Accuracy_on_nonwinning_bids\n") ftrain.write("Weight beta Accuracy_on_winning_bids Accuracy_on_nonwinning_bids\n") for i in range(1,10): for b in range(1,20): beta = .2 + .1*b w={0:1, 1:(+i*.5)} solver = mlpy.LibLinear(solver_type=algorithm, C=beta, eps=0.01, weight=w) solver.learn(xtrain, ytrain) yhat = solver.pred(xtrain) printStats(ytrain,yhat,algorithm,.0+i*.2,beta,"train errors",ftrain) yhat = solver.pred(xtest) printStats(ytest,yhat,algorithm,.0+i*.2,beta,"test errors", ftest) ftest.close() ftrain.close() print "kmeans" ftest = open("Kmeans"+'_Test.csv','w') ftrain = open("Kmeans" +'_Train.csv','w') ftest.write("Weight beta Accuracy_on_winning_bids Accuracy_on_nonwinning_bids\n") ftrain.write("Weight beta Accuracy_on_winning_bids Accuracy_on_nonwinning_bids\n") solver = mlpy.KNN(2) solver.learn(xtrain, ytrain) yhat = solver.pred(xtrain) printStats(ytrain,yhat,"Kmeans","none","none","train errors", ftrain) yhat = solver.pred(xtest) printStats(ytest,yhat,"Kmeans","none","none","test errors", ftest) ftest.close() ftrain.close() ftest = open("Classification" +'_Test.csv','w') print "Class" ftrain = open("Classification"+'_Train.csv','w') ftest.write("Weight beta Accuracy_on_winning_bids Accuracy_on_nonwinning_bids\n") ftrain.write("Weight beta Accuracy_on_winning_bids Accuracy_on_nonwinning_bids\n") solver = mlpy.ClassTree() solver.learn(xtrain, ytrain) yhat = solver.pred(xtrain) printStats(ytrain,yhat,"Classification Tree","none","none","train errors", ftrain) yhat = solver.pred(xtest) printStats(ytest,yhat,"Classification Tree","none","none","test errors", ftest) ftest.close() ftrain.close()
golub = mlpy.Golub() golub.learn(x, y) test = golub.pred(xcontrol) # test points print 'Golub: %.1f percent predicted' % ( 100 * len(test[test == ycontrol]) / len(test)) dic['golub'].append(100 * len(test[test == ycontrol]) / len(test)) knn = mlpy.KNN(k=7) knn.learn(x, y) test = knn.pred(xcontrol) # test points print 'KNN: %.1f percent predicted' % (100 * len(test[test == ycontrol]) / len(test)) dic['knn'].append(100 * len(test[test == ycontrol]) / len(test)) tree = mlpy.ClassTree(stumps=0, minsize=100) tree.learn(x, y) test = tree.pred(xcontrol) # test points print 'ClassTree: %.1f percent predicted' % ( 100 * len(test[test == ycontrol]) / len(test)) dic['tree'].append(100 * len(test[test == ycontrol]) / len(test)) rank = mlpy.rfe_w2(x, y, p=0, classifier=ld) print '' print fnames print rank new = {} for k in dic.keys(): new[k] = { 'avg': np.round(np.average(dic[k]), 2),
basic.testing_data[i])): classified += 1 fd.write("%s,%s,%d,%d,%d\n" % (datasets[d][0], "LDAC", k, size, classified)) knn = mlpy.KNN(k=3) knn.learn(basic.training_data, basic.training_label) classified = 0 for i in range(len(basic.testing_label)): if (int)(basic.testing_label[i]) == (int)(knn.pred( basic.testing_data[i])): classified += 1 fd.write("%s,%s,%d,%d,%d\n" % (datasets[d][0], "KNN", k, size, classified)) tree = mlpy.ClassTree(minsize=10) tree.learn(basic.training_data, basic.training_label) classified = 0 for i in range(len(basic.testing_label)): if (int)(basic.testing_label[i]) == (int)(tree.pred( basic.testing_data[i])): classified += 1 fd.write("%s,%s,%d,%d,%d\n" % (datasets[d][0], "GINI", k, size, classified)) ml = mlpy.MaximumLikelihoodC() ml.learn(basic.training_data, basic.training_label) classified = 0 for i in range(len(basic.testing_label)): if (int)(basic.testing_label[i]) == (int)(ml.pred( basic.testing_data[i])):