def testSetM(self): decisionTree = DecisionTree() decisionTree.setMinSplit(5) folds = 3 meanError, varError = decisionTree.evaluateCv(self.X, self.y, folds) decisionTree.setM(100) #decisionTree.setMinSplit(20) meanError2, varError = decisionTree.evaluateCv(self.X, self.y, folds) self.assertTrue(meanError != meanError2)
def testPredict2(self): #We play around with parameters to maximise AUC on the IGF1_0-Haar data dataDir = PathDefaults.getDataDir() fileName = dataDir + "IGF1_0-Haar.npy" XY = numpy.load(fileName) X = XY[:, 0:XY.shape[1]-1] y = XY[:, XY.shape[1]-1].ravel() weight = numpy.bincount(numpy.array(y, numpy.int))[0]/float(y.shape[0]) #weight = 0.5 #weight = 0.9 folds = 3 decisionTree = DecisionTree() decisionTree.setWeight(weight) decisionTree.setMaxDepth(50) #decisionTree.setMinSplit(100) decisionTree.setM(50) mean, var = decisionTree.evaluateCv(X, y, folds, Evaluator.auc) logging.debug("AUC = " + str(mean)) logging.debug("Var = " + str(var))
def testPredict2(self): #We play around with parameters to maximise AUC on the IGF1_0-Haar data dataDir = PathDefaults.getDataDir() fileName = dataDir + "IGF1_0-Haar.npy" XY = numpy.load(fileName) X = XY[:, 0:XY.shape[1] - 1] y = XY[:, XY.shape[1] - 1].ravel() weight = numpy.bincount(numpy.array(y, numpy.int))[0] / float( y.shape[0]) #weight = 0.5 #weight = 0.9 folds = 3 decisionTree = DecisionTree() decisionTree.setWeight(weight) decisionTree.setMaxDepth(50) #decisionTree.setMinSplit(100) decisionTree.setM(50) mean, var = decisionTree.evaluateCv(X, y, folds, Evaluator.auc) logging.debug("AUC = " + str(mean)) logging.debug("Var = " + str(var))