def testFeaturesSize(self): treeRank = TreeRank(self.leafRanklearner) featureSize = 0.5 treeRank.setFeatureSize(featureSize) treeRank.learnModel(self.X, self.y) scores = treeRank.predict(self.X) tree = treeRank.getTree() vertexIds = tree.getAllVertexIds() for vertexId in vertexIds: node = tree.getVertex(vertexId) # print(node.getFeatureInds()) self.assertEquals(node.getFeatureInds().shape[0], numpy.round(featureSize * self.X.shape[1])) self.assertEquals(numpy.unique(node.getFeatureInds()).shape[0], node.getFeatureInds().shape[0])
def learnModel(self, X, y): """ Learn a model for a set of examples given as the rows of the matrix X, with corresponding labels given in the elements of 1D array y. :param X: A matrix with examples as rows :type X: :class:`ndarray` :param y: A vector of labels :type y: :class:`ndarray` """ Parameter.checkClass(X, numpy.ndarray) Parameter.checkClass(y, numpy.ndarray) Parameter.checkArray(X) Parameter.checkArray(y) labels = numpy.unique(y) if labels.shape[0] != 2: raise ValueError("Can only accept binary labelled data") if (labels != numpy.array([-1, 1])).any(): raise ValueError("Labels must be -1/+1: " + str(labels)) forestList = [] indList = [] numSampledExamples = int(numpy.round(self.sampleSize * X.shape[0])) for i in range(self.numTrees): Util.printConciseIteration(i, 1, self.numTrees, "Tree: ") if self.sampleReplace: inds = numpy.random.randint(0, X.shape[0], numSampledExamples) else: inds = numpy.random.permutation( X.shape[0])[0:numSampledExamples] treeRank = TreeRank(self.leafRanklearner) treeRank.setMaxDepth(self.maxDepth) treeRank.setMinSplit(self.minSplit) treeRank.setFeatureSize(self.featureSize) treeRank.setBestResponse(self.bestResponse) treeRank.learnModel(X[inds, :], y[inds]) forestList.append(treeRank) indList.append(inds) self.forestList = forestList self.indList = indList
def learnModel(self, X, y): """ Learn a model for a set of examples given as the rows of the matrix X, with corresponding labels given in the elements of 1D array y. :param X: A matrix with examples as rows :type X: :class:`ndarray` :param y: A vector of labels :type y: :class:`ndarray` """ Parameter.checkClass(X, numpy.ndarray) Parameter.checkClass(y, numpy.ndarray) Parameter.checkArray(X) Parameter.checkArray(y) labels = numpy.unique(y) if labels.shape[0] != 2: raise ValueError("Can only accept binary labelled data") if (labels != numpy.array([-1, 1])).any(): raise ValueError("Labels must be -1/+1: " + str(labels)) forestList = [] indList = [] numSampledExamples = int(numpy.round(self.sampleSize*X.shape[0])) for i in range(self.numTrees): Util.printConciseIteration(i, 1, self.numTrees, "Tree: ") if self.sampleReplace: inds = numpy.random.randint(0, X.shape[0], numSampledExamples) else: inds = numpy.random.permutation(X.shape[0])[0:numSampledExamples] treeRank = TreeRank(self.leafRanklearner) treeRank.setMaxDepth(self.maxDepth) treeRank.setMinSplit(self.minSplit) treeRank.setFeatureSize(self.featureSize) treeRank.setBestResponse(self.bestResponse) treeRank.learnModel(X[inds, :], y[inds]) forestList.append(treeRank) indList.append(inds) self.forestList = forestList self.indList = indList
def testFeaturesSize(self): treeRank = TreeRank(self.leafRanklearner) featureSize = 0.5 treeRank.setFeatureSize(featureSize) treeRank.learnModel(self.X, self.y) scores = treeRank.predict(self.X) tree = treeRank.getTree() vertexIds = tree.getAllVertexIds() for vertexId in vertexIds: node = tree.getVertex(vertexId) #print(node.getFeatureInds()) self.assertEquals(node.getFeatureInds().shape[0], numpy.round(featureSize * self.X.shape[1])) self.assertEquals( numpy.unique(node.getFeatureInds()).shape[0], node.getFeatureInds().shape[0])