Exemplo n.º 1
0
    def testBinaryError(self):
        testY = numpy.array([1, 1, -1, 1])
        predY = numpy.array([-1, 1, -1, 1])
        predY2 = numpy.array([-1, -1, -1, 1])
        predY3 = numpy.array([-1, -1, 1, -1])

        self.assertTrue(Evaluator.binaryError(testY, predY) == 0.25)
        self.assertTrue(Evaluator.binaryError(testY, testY) == 0.0)
        self.assertTrue(Evaluator.binaryError(predY, predY) == 0.0)

        self.assertTrue(Evaluator.binaryError(testY, predY2) == 0.5)
        self.assertTrue(Evaluator.binaryError(testY, predY3) == 1.0)
Exemplo n.º 2
0
    def testBinaryError(self):
        testY = numpy.array([1, 1, -1, 1])
        predY = numpy.array([-1, 1, -1, 1])
        predY2 = numpy.array([-1, -1, -1, 1])
        predY3 = numpy.array([-1, -1, 1, -1])

        self.assertTrue(Evaluator.binaryError(testY, predY) == 0.25)
        self.assertTrue(Evaluator.binaryError(testY, testY) == 0.0)
        self.assertTrue(Evaluator.binaryError(predY, predY) == 0.0)

        self.assertTrue(Evaluator.binaryError(testY, predY2) == 0.5)
        self.assertTrue(Evaluator.binaryError(testY, predY3) == 1.0)
    def testLearnModel2(self): 
        #We want to make sure the learnt tree with gamma = 0 maximise the 
        #empirical risk 
        minSplit = 20
        maxDepth = 3
        gamma = 0.01
        learner = PenaltyDecisionTree(minSplit=minSplit, maxDepth=maxDepth, gamma=gamma, pruning=False) 
        
        #Vary sampleSize
        numpy.random.seed(21)
        learner.setSampleSize(1)           
        learner.learnModel(self.X, self.y)        
        error1 = learner.treeObjective(self.X, self.y)

        numpy.random.seed(21)
        learner.setSampleSize(5)        
        learner.learnModel(self.X, self.y)
        error2 = learner.treeObjective(self.X, self.y)

        numpy.random.seed(21)                
        learner.setSampleSize(10)       
        learner.learnModel(self.X, self.y)
        error3 = learner.treeObjective(self.X, self.y)
        
        self.assertTrue(error1 >= error2)
        self.assertTrue(error2 >= error3)
        
        #Now vary max depth 
        learner.gamma = 0         
        
        numpy.random.seed(21)
        learner.setSampleSize(1) 
        learner.minSplit = 1
        learner.maxDepth = 3 
        learner.learnModel(self.X, self.y)
        predY = learner.predict(self.X)
        error1 = Evaluator.binaryError(self.y, predY)
        
        numpy.random.seed(21)
        learner.maxDepth = 5 
        learner.learnModel(self.X, self.y)
        predY = learner.predict(self.X)
        error2 = Evaluator.binaryError(self.y, predY)
        
        numpy.random.seed(21)
        learner.maxDepth = 10 
        learner.learnModel(self.X, self.y)
        predY = learner.predict(self.X)
        error3 = Evaluator.binaryError(self.y, predY)        
        
        self.assertTrue(error1 >= error2)
        self.assertTrue(error2 >= error3)
Exemplo n.º 4
0
    def testBayesError(self):
        dataDir = PathDefaults.getDataDir() + "modelPenalisation/toy/"
        data = numpy.load(dataDir + "toyData.npz")
        gridPoints, X, y, pdfX, pdfY1X, pdfYminus1X = data["arr_0"], data["arr_1"], data["arr_2"], data["arr_3"], data["arr_4"], data["arr_5"]

        sampleSize = 100
        trainX, trainY = X[0:sampleSize, :], y[0:sampleSize]
        testX, testY = X[sampleSize:, :], y[sampleSize:]

        #We form a test set from the grid points
        gridX = numpy.zeros((gridPoints.shape[0]**2, 2))
        for m in range(gridPoints.shape[0]):
            gridX[m*gridPoints.shape[0]:(m+1)*gridPoints.shape[0], 0] = gridPoints
            gridX[m*gridPoints.shape[0]:(m+1)*gridPoints.shape[0], 1] = gridPoints[m]

        Cs = 2**numpy.arange(-5, 5, dtype=numpy.float)
        gammas = 2**numpy.arange(-5, 5, dtype=numpy.float)

        bestError = 1 

        for C in Cs:
            for gamma in gammas:
                svm = LibSVM(kernel="gaussian", C=C, kernelParam=gamma)
                svm.learnModel(trainX, trainY)
                predY, decisionsY = svm.predict(gridX, True)
                decisionGrid = numpy.reshape(decisionsY, (gridPoints.shape[0], gridPoints.shape[0]), order="F")
                error = ModelSelectUtils.bayesError(gridPoints, decisionGrid, pdfX, pdfY1X, pdfYminus1X)

                predY, decisionsY = svm.predict(testX, True)
                error2 = Evaluator.binaryError(testY, predY)
                print(error, error2)

                if error < bestError:
                    error = bestError
                    bestC = C
                    bestGamma = gamma

        svm = LibSVM(kernel="gaussian", C=bestC, kernelParam=bestGamma)
        svm.learnModel(trainX, trainY)
        predY, decisionsY = svm.predict(gridX, True)

        plt.figure(0)
        plt.contourf(gridPoints, gridPoints, decisionGrid, 100)
        plt.colorbar()

        plt.figure(1)
        plt.scatter(X[y==1, 0], X[y==1, 1], c='r' ,label="-1")
        plt.scatter(X[y==-1, 0], X[y==-1, 1], c='b',label="+1")
        plt.legend()
        plt.show()
Exemplo n.º 5
0
def computeIdealPenalty(args):
    """
    Find the complete penalty.
    """
    (X, y, fullX, C, gamma, gridPoints, pdfX, pdfY1X, pdfYminus1X) = args

    svm = LibSVM('gaussian', gamma, C)
    svm.learnModel(X, y)
    predY = svm.predict(X)
    predFullY, decisionsY = svm.predict(fullX, True)
    decisionGrid = numpy.reshape(decisionsY, (gridPoints.shape[0], gridPoints.shape[0]), order="F")
    trueError = ModelSelectUtils.bayesError(gridPoints, decisionGrid, pdfX, pdfY1X, pdfYminus1X)
    idealPenalty = trueError - Evaluator.binaryError(predY, y)

    return idealPenalty
 def testGrowTree(self):
     startId = (0, )
     minSplit = 20
     maxDepth = 3
     gamma = 0.01
     learner = PenaltyDecisionTree(minSplit=minSplit, maxDepth=maxDepth, gamma=gamma, pruning=False) 
     
     trainX = self.X[100:, :]
     trainY = self.y[100:]
     testX = self.X[0:100, :]
     testY = self.y[0:100]    
     
     argsortX = numpy.zeros(trainX.shape, numpy.int)
     for i in range(trainX.shape[1]): 
         argsortX[:, i] = numpy.argsort(trainX[:, i])
         argsortX[:, i] = numpy.argsort(argsortX[:, i])
     
     learner.tree = DictTree()
     rootNode = DecisionNode(numpy.arange(trainX.shape[0]), Util.mode(trainY))
     learner.tree.setVertex(startId, rootNode)        
     
     #Note that this matches with the case where we create a new tree each time 
     numpy.random.seed(21)
     bestError = float("inf")        
     
     for i in range(20): 
         learner.tree.pruneVertex(startId)
         learner.growTree(trainX, trainY, argsortX, startId)
         
         predTestY = learner.predict(testX)
         error = Evaluator.binaryError(predTestY, testY)
         #print(Evaluator.binaryError(predTestY, testY), learner.tree.getNumVertices())
         
         if error < bestError: 
             bestError = error 
             bestTree = learner.tree.copy() 
         
         self.assertTrue(learner.tree.depth() <= maxDepth)
         
         for vertexId in learner.tree.nonLeaves(): 
             self.assertTrue(learner.tree.getVertex(vertexId).getTrainInds().shape[0] >= minSplit)
     
     bestError1 = bestError               
     learner.tree = bestTree    
     
     #Now we test growing a tree from a non-root vertex 
     numpy.random.seed(21)
     for i in range(20): 
         learner.tree.pruneVertex((0, 1)) 
         learner.growTree(trainX, trainY, argsortX, (0, 1))
         
         self.assertTrue(learner.tree.getVertex((0,)) == bestTree.getVertex((0,)))
         self.assertTrue(learner.tree.getVertex((0,0)) == bestTree.getVertex((0,0)))
         
         
         predTestY = learner.predict(testX)
         error = Evaluator.binaryError(predTestY, testY)
         
         if error < bestError: 
             bestError = error 
             bestTree = learner.tree.copy() 
         #print(Evaluator.binaryError(predTestY, testY), learner.tree.getNumVertices())
     self.assertTrue(bestError1 >= bestError )
Exemplo n.º 7
0
print(numpy.sum(y==2), numpy.sum(y==0)) 

trainSplit = 0.3
numTrainExamples = numExamples*trainSplit

trainX = X[0:numTrainExamples, :]
trainY = y[0:numTrainExamples]
testX = X[numTrainExamples:, :]
testY = y[numTrainExamples:]

learner = PenaltyDecisionTree(minSplit=1, maxDepth=50, pruning=False)
learner.learnModel(trainX, trainY)

predY = learner.predict(trainX)
print(Evaluator.binaryError(predY, trainY))
print(learner.getTree())


plt.figure(0)
plt.scatter(testX[:, 0], testX[:, 1], c=testY, s=50, vmin=0, vmax=2)
plt.title("Test set")
plt.colorbar()

plt.figure(1)
plt.scatter(trainX[:, 0], trainX[:, 1], c=trainY, s=50, vmin=0, vmax=2)
plt.title("Training set")
plt.colorbar()

colormap  = matplotlib.cm.get_cmap()