示例#1
0
    def testFeaturesSize(self):
        treeRank = TreeRank(self.leafRanklearner)
        featureSize = 0.5
        treeRank.setFeatureSize(featureSize)
        treeRank.learnModel(self.X, self.y)
        scores = treeRank.predict(self.X)

        tree = treeRank.getTree()

        vertexIds = tree.getAllVertexIds()
        for vertexId in vertexIds:
            node = tree.getVertex(vertexId)

            # print(node.getFeatureInds())
            self.assertEquals(node.getFeatureInds().shape[0], numpy.round(featureSize * self.X.shape[1]))
            self.assertEquals(numpy.unique(node.getFeatureInds()).shape[0], node.getFeatureInds().shape[0])
示例#2
0
    def learnModel(self, X, y):
        """
        Learn a model for a set of examples given as the rows of the matrix X,
        with corresponding labels given in the elements of 1D array y.

        :param X: A matrix with examples as rows
        :type X: :class:`ndarray`

        :param y: A vector of labels
        :type y: :class:`ndarray`
        """
        Parameter.checkClass(X, numpy.ndarray)
        Parameter.checkClass(y, numpy.ndarray)
        Parameter.checkArray(X)
        Parameter.checkArray(y)

        labels = numpy.unique(y)
        if labels.shape[0] != 2:
            raise ValueError("Can only accept binary labelled data")
        if (labels != numpy.array([-1, 1])).any():
            raise ValueError("Labels must be -1/+1: " + str(labels))

        forestList = []
        indList = []
        numSampledExamples = int(numpy.round(self.sampleSize * X.shape[0]))

        for i in range(self.numTrees):
            Util.printConciseIteration(i, 1, self.numTrees, "Tree: ")
            if self.sampleReplace:
                inds = numpy.random.randint(0, X.shape[0], numSampledExamples)
            else:
                inds = numpy.random.permutation(
                    X.shape[0])[0:numSampledExamples]

            treeRank = TreeRank(self.leafRanklearner)
            treeRank.setMaxDepth(self.maxDepth)
            treeRank.setMinSplit(self.minSplit)
            treeRank.setFeatureSize(self.featureSize)
            treeRank.setBestResponse(self.bestResponse)
            treeRank.learnModel(X[inds, :], y[inds])
            forestList.append(treeRank)
            indList.append(inds)

        self.forestList = forestList
        self.indList = indList
示例#3
0
    def learnModel(self, X, y):
        """
        Learn a model for a set of examples given as the rows of the matrix X,
        with corresponding labels given in the elements of 1D array y.

        :param X: A matrix with examples as rows
        :type X: :class:`ndarray`

        :param y: A vector of labels
        :type y: :class:`ndarray`
        """
        Parameter.checkClass(X, numpy.ndarray)
        Parameter.checkClass(y, numpy.ndarray)
        Parameter.checkArray(X)
        Parameter.checkArray(y)
        
        labels = numpy.unique(y)
        if labels.shape[0] != 2:
            raise ValueError("Can only accept binary labelled data")
        if (labels != numpy.array([-1, 1])).any(): 
            raise ValueError("Labels must be -1/+1: " + str(labels))

        forestList = []
        indList = []
        numSampledExamples = int(numpy.round(self.sampleSize*X.shape[0]))

        for i in range(self.numTrees):
            Util.printConciseIteration(i, 1, self.numTrees, "Tree: ")
            if self.sampleReplace:
                inds = numpy.random.randint(0, X.shape[0], numSampledExamples)
            else:
                inds = numpy.random.permutation(X.shape[0])[0:numSampledExamples]

            treeRank = TreeRank(self.leafRanklearner)
            treeRank.setMaxDepth(self.maxDepth)
            treeRank.setMinSplit(self.minSplit)
            treeRank.setFeatureSize(self.featureSize)
            treeRank.setBestResponse(self.bestResponse)
            treeRank.learnModel(X[inds, :], y[inds])
            forestList.append(treeRank)
            indList.append(inds)

        self.forestList = forestList
        self.indList = indList
示例#4
0
    def testFeaturesSize(self):
        treeRank = TreeRank(self.leafRanklearner)
        featureSize = 0.5
        treeRank.setFeatureSize(featureSize)
        treeRank.learnModel(self.X, self.y)
        scores = treeRank.predict(self.X)

        tree = treeRank.getTree()

        vertexIds = tree.getAllVertexIds()
        for vertexId in vertexIds:
            node = tree.getVertex(vertexId)

            #print(node.getFeatureInds())
            self.assertEquals(node.getFeatureInds().shape[0],
                              numpy.round(featureSize * self.X.shape[1]))
            self.assertEquals(
                numpy.unique(node.getFeatureInds()).shape[0],
                node.getFeatureInds().shape[0])