def testCvPrune(self): numExamples = 500 X, y = data.make_regression(numExamples) y = Standardiser().standardiseArray(y) numTrain = numpy.round(numExamples * 0.33) numValid = numpy.round(numExamples * 0.33) trainX = X[0:numTrain, :] trainY = y[0:numTrain] validX = X[numTrain:numTrain+numValid, :] validY = y[numTrain:numTrain+numValid] testX = X[numTrain+numValid:, :] testY = y[numTrain+numValid:] learner = DecisionTreeLearner() learner.learnModel(trainX, trainY) error1 = Evaluator.rootMeanSqError(learner.predict(testX), testY) #print(learner.getTree()) unprunedTree = learner.tree.copy() learner.setGamma(1000) learner.cvPrune(trainX, trainY) self.assertEquals(unprunedTree.getNumVertices(), learner.tree.getNumVertices()) learner.setGamma(100) learner.cvPrune(trainX, trainY) #Test if pruned tree is subtree of current: for vertexId in learner.tree.getAllVertexIds(): self.assertTrue(vertexId in unprunedTree.getAllVertexIds()) #The error should be better after pruning learner.learnModel(trainX, trainY) #learner.cvPrune(validX, validY, 0.0, 5) learner.repPrune(validX, validY) error2 = Evaluator.rootMeanSqError(learner.predict(testX), testY) self.assertTrue(error1 >= error2)
trainX = X[0:numTrainExamples, :] trainY = y[0:numTrainExamples] validX = X[numTrainExamples:numTrainExamples+numValidExamples, :] validY = y[numTrainExamples:numTrainExamples+numValidExamples] testX = X[numTrainExamples+numValidExamples:, :] testY = y[numTrainExamples+numValidExamples:] learner = DecisionTreeLearner(minSplit=1, maxDepth=50) learner.learnModel(trainX, trainY) #Seem to be optimal alphaThreshold = 100.0 learner.setAlphaThreshold(alphaThreshold) learner.repPrune(validX, validY) #learner.tree = learner.tree.cut(3) predY = learner.predict(testX) plt.figure(0) plt.scatter(testX[:, 0], testX[:, 1], c=testY, s=50, vmin=0, vmax=1) plt.colorbar() plt.figure(1) plt.scatter(testX[:, 0], testX[:, 1], c=predY, s=50, vmin=0, vmax=1) plt.colorbar() colormap = matplotlib.cm.get_cmap() def displayTree(learner, vertexId, minX0, maxX0, minX1, maxX1, colormap):