示例#1
0
    def learnModel(self, X, Y):
        """
        Learn a model for a set of examples given as the rows of the matrix X,
        with corresponding labels given in the elements of 1D array Y.

        :param X: A matrix with examples as rows
        :type X: :class:`ndarray`

        :param Y: A vector of binary labels as a 1D array
        :type Y: :class:`ndarray`
        """
        Parameter.checkClass(X, numpy.ndarray)
        Parameter.checkClass(Y, numpy.ndarray)
        Parameter.checkArray(X)
        Parameter.checkArray(Y)
        labels = numpy.unique(Y)
        if labels.shape[0] != 2:
            raise ValueError("Can only accept binary labelled data: " + str(labels))
        if (labels != numpy.array([-1, 1])).any(): 
            raise ValueError("Labels must be -1/+1: " + str(labels))
        if self.featureSize == None: 
            featureSize = numpy.sqrt(X.shape[1])/float(X.shape[1])
        else: 
            featureSize = self.featureSize

        tree = DictTree()
        trainInds = numpy.arange(Y.shape[0])
        featureInds = numpy.sort(numpy.random.permutation(X.shape[1])[0:int(numpy.round(X.shape[1]*featureSize))]) 

        #Seed the tree
        node = RankNode(trainInds, featureInds)
        tree.setVertex((0, 0), node)

        for d in range(self.maxDepth):
            for k in range(2**d):
                if tree.vertexExists((d, k)):
                    node = tree.getVertex((d, k))

                    if not node.isPure() and not node.isLeafNode():
                        self.splitNode(tree, X, Y, d, k)

        self.tree = tree 
示例#2
0
    def testSplitNode(self):
        d = 0
        k = 0
        maxDepth = 1
        inds = numpy.arange(self.y.shape[0])
        treeRank = TreeRank(self.leafRanklearner)
        treeRank.setMaxDepth(maxDepth)

        node = RankNode(inds, numpy.arange(self.X.shape[1]))

        tree = DictTree()
        tree.setVertex((0, 0), node)
        tree = treeRank.splitNode(tree, self.X, self.y, d, k)

        self.assertEquals(tree.getNumVertices(), 3)
        self.assertEquals(tree.getNumEdges(), 2)
        self.assertEquals(tree.getRootId(), (0, 0))
        self.assertTrue(not tree.getVertex((0, 0)).isLeafNode())
        self.assertTrue(tree.getVertex((1, 0)).isLeafNode())
        self.assertTrue(tree.getVertex((1, 1)).isLeafNode())

        self.assertTrue(tree.depth() <= maxDepth)
示例#3
0
    def splitNode(self, tree, X, Y, d, k):
        """
        Take a node in a tree and classify in order to split it into 2 
        """
        if self.featureSize == None: 
            featureSize = numpy.sqrt(X.shape[1])/float(X.shape[1])
        else: 
            featureSize = self.featureSize       
        
        node = tree.getVertex((d, k))
        inds = node.getTrainInds()
        featureInds = node.getFeatureInds()
        alpha =  numpy.sum(Y[inds]==self.bestResponse)/float(inds.shape[0])

        #Now classify

        #We have the following condition if we need to do cross validation within the node
        if Util.histogram(Y[inds])[0].min() > self.minLabelCount:
            self.leafRanklearner.setWeight(1-alpha)
            leafRank = self.leafRanklearner.generateLearner(X, Y)
        else:
            leafRank = MajorityPredictor()

        node.setLeafRank(leafRank)
        leafRank.learnModel(X[inds, :][:, featureInds], Y[inds])
        predY = leafRank.predict(X[inds, :][:, featureInds])
        
        if numpy.unique(predY).shape[0] == 2 and inds.shape[0] >= self.minSplit:
            leftInds = inds[predY == self.bestResponse]
            featureInds = numpy.sort(numpy.random.permutation(X.shape[1])[0:int(numpy.round(X.shape[1]*featureSize))])
            leftNode = RankNode(leftInds, featureInds)
            leftNode.setPure(numpy.unique(Y[leftInds]).shape[0] <= 1)
            leftNode.setIsLeafNode(d==self.maxDepth-1 or leftNode.isPure())
            leftNode.setScore((1 - float(2*k)/2**(d+1))*2**self.maxDepth)
            tree.addEdge((d, k), (d+1, 2*k))
            tree.setVertex((d+1, 2*k), leftNode)

            rightInds = inds[predY != self.bestResponse]
            featureInds = numpy.sort(numpy.random.permutation(X.shape[1])[0:int(numpy.round(X.shape[1]*featureSize))])
            rightNode = RankNode(rightInds, featureInds)
            rightNode.setPure(numpy.unique(Y[rightInds]).shape[0] <= 1)
            rightNode.setIsLeafNode(d==self.maxDepth-1 or rightNode.isPure())
            rightNode.setScore((1 - float(2*k+1)/2**(d+1))*2**self.maxDepth)
            tree.addEdge((d, k), (d+1, 2*k+1))
            tree.setVertex((d+1, 2*k+1), rightNode)
        else:
            node.setIsLeafNode(True)
            node.setScore((1 - float(k)/2**d)*2**self.maxDepth)
            
        return tree