def evaluateLearn(X, y, idx, learnModel, predict, metricMethod, progress=True): """ Evaluate this learning algorithm using the given list of training/test splits The metricMethod is a method which takes (predictedY, realY) as input and returns a metric about the quality of the evaluation. :param X: A matrix with examples as rows :type X: :class:`ndarray` :param y: A vector of labels :type y: :class:`ndarray` :param idx: A list of training/test splits :type idx: :class:`list` :param learnModel: A function such that learnModel(X, y) finds a mapping from X to y :type learnModel: :class:`function` :param predict: A function such that predict(X) makes predictions for X :type predict: :class:`function` :param metricMethod: A function such that metricMethod(predY, testY) returns the quality of predicted labels predY :type metricMethod: :class:`function` Output: the mean and variation of the cross validation folds. """ #Parameter.checkClass(idx, list) Parameter.checkClass(X, numpy.ndarray) Parameter.checkArray(X, softCheck=True) Parameter.checkInt(X.shape[0], 1, float('inf')) Parameter.checkClass(y, numpy.ndarray) Parameter.checkArray(y, softCheck=True) if y.ndim != 1: raise ValueError("Dimention of y must be 1") i = 0 metrics = numpy.zeros(len(idx)) logging.debug("EvaluateLearn: Using " + str(len(idx)) + " splits on " + str(X.shape[0]) + " examples") for idxtr, idxts in idx: if progress: Util.printConciseIteration(i, 1, len(idx)) trainX, testX = X[idxtr, :], X[idxts, :] trainY, testY = y[idxtr], y[idxts] #logging.debug("Distribution of labels in evaluateLearn train: " + str(numpy.bincount(trainY))) #logging.debug("Distribution of labels in evaluateLearn test: " + str(numpy.bincount(testY))) learnModel(trainX, trainY) predY = predict(testX) gc.collect() metrics[i] = metricMethod(predY, testY) i += 1 return metrics
def learnModel(self, X, y): """ Learn a model for a set of examples given as the rows of the matrix X, with corresponding labels given in the elements of 1D array y. :param X: A matrix with examples as rows :type X: :class:`ndarray` :param y: A vector of labels :type y: :class:`ndarray` """ Parameter.checkClass(X, numpy.ndarray) Parameter.checkClass(y, numpy.ndarray) Parameter.checkArray(X) Parameter.checkArray(y) labels = numpy.unique(y) if labels.shape[0] != 2: raise ValueError("Can only accept binary labelled data") if (labels != numpy.array([-1, 1])).any(): raise ValueError("Labels must be -1/+1: " + str(labels)) forestList = [] indList = [] numSampledExamples = int(numpy.round(self.sampleSize * X.shape[0])) for i in range(self.numTrees): Util.printConciseIteration(i, 1, self.numTrees, "Tree: ") if self.sampleReplace: inds = numpy.random.randint(0, X.shape[0], numSampledExamples) else: inds = numpy.random.permutation( X.shape[0])[0:numSampledExamples] treeRank = TreeRank(self.leafRanklearner) treeRank.setMaxDepth(self.maxDepth) treeRank.setMinSplit(self.minSplit) treeRank.setFeatureSize(self.featureSize) treeRank.setBestResponse(self.bestResponse) treeRank.learnModel(X[inds, :], y[inds]) forestList.append(treeRank) indList.append(inds) self.forestList = forestList self.indList = indList
def learnModel(self, X, y): """ Learn a model for a set of examples given as the rows of the matrix X, with corresponding labels given in the elements of 1D array y. :param X: A matrix with examples as rows :type X: :class:`ndarray` :param y: A vector of labels :type y: :class:`ndarray` """ Parameter.checkClass(X, numpy.ndarray) Parameter.checkClass(y, numpy.ndarray) Parameter.checkArray(X) Parameter.checkArray(y) labels = numpy.unique(y) if labels.shape[0] != 2: raise ValueError("Can only accept binary labelled data") if (labels != numpy.array([-1, 1])).any(): raise ValueError("Labels must be -1/+1: " + str(labels)) forestList = [] indList = [] numSampledExamples = int(numpy.round(self.sampleSize*X.shape[0])) for i in range(self.numTrees): Util.printConciseIteration(i, 1, self.numTrees, "Tree: ") if self.sampleReplace: inds = numpy.random.randint(0, X.shape[0], numSampledExamples) else: inds = numpy.random.permutation(X.shape[0])[0:numSampledExamples] treeRank = TreeRank(self.leafRanklearner) treeRank.setMaxDepth(self.maxDepth) treeRank.setMinSplit(self.minSplit) treeRank.setFeatureSize(self.featureSize) treeRank.setBestResponse(self.bestResponse) treeRank.learnModel(X[inds, :], y[inds]) forestList.append(treeRank) indList.append(inds) self.forestList = forestList self.indList = indList