Exemplos de Evaluator em Python, exemplos de apgl.util.Evaluator.Evaluator em Python

Exemplo n.º 1

0

Exibir arquivo

Arquivo: TreeRankTest.py Projeto: malcolmreynolds/APGL

    def testPredict2(self):
        #Test on Gauss2D dataset
        dataDir = PathDefaults.getDataDir()

        fileName = dataDir + "Gauss2D_learn.csv"
        XY = numpy.loadtxt(fileName, skiprows=1, usecols=(1,2,3), delimiter=",")
        X = XY[:, 0:2]
        y = XY[:, 2]

        fileName = dataDir + "Gauss2D_test.csv"
        testXY = numpy.loadtxt(fileName, skiprows=1, usecols=(1,2,3), delimiter=",")
        testX = testXY[:, 0:2]
        testY = testXY[:, 2]

        X = Standardiser().standardiseArray(X)
        testX = Standardiser().standardiseArray(testX)

        maxDepths = range(3, 10)
        trainAucs = numpy.array([0.7194734, 0.7284824, 0.7332185, 0.7348198, 0.7366152, 0.7367508, 0.7367508, 0.7367508])
        testAucs = numpy.array([0.6789078, 0.6844632, 0.6867918, 0.6873420, 0.6874820, 0.6874400, 0.6874400, 0.6874400])
        i = 0
        
        #The results are approximately the same, but not exactly 
        for maxDepth in maxDepths:
            treeRank = TreeRank(self.leafRanklearner)
            treeRank.setMaxDepth(maxDepth)
            treeRank.learnModel(X, y)
            trainScores = treeRank.predict(X)
            testScores = treeRank.predict(testX)

            self.assertAlmostEquals(Evaluator.auc(trainScores, y), trainAucs[i], 2)
            self.assertAlmostEquals(Evaluator.auc(testScores, testY), testAucs[i], 1)
            i+=1

Exemplo n.º 2

0

Exibir arquivo

Arquivo: EvaluatorTest.py Projeto: charanpald/sandbox

    def testLocalAuc(self):
        testY = numpy.array([-1, -1, 1, 1, 1, 1, 1, -1, -1, 1])
        predY = numpy.array([0.987,  0.868,  0.512,  0.114,  0.755,  0.976,  0.05,  0.371, 0.629,  0.819])

        self.assertEquals(Evaluator.localAuc(testY, predY, 1.0), Evaluator.auc(predY, testY))
        self.assertEquals(Evaluator.localAuc(testY, predY, 0.0), 0)

        self.assertEquals(Evaluator.localAuc(testY, testY, 0.2), 1.0)

Exemplo n.º 3

0

Exibir arquivo

Arquivo: EvaluatorTest.py Projeto: charanpald/sandbox

 def testAveragePrecisionFromLists(self): 
     predList  = [4, 2, 10]
     testList = [4, 2, 15, 16]
     
     self.assertEquals(Evaluator.averagePrecisionFromLists(testList, predList), 0.5)
     
     predList = [0,1,2,3,4,5]
     testList = [0, 3, 4, 5]
     self.assertAlmostEquals(Evaluator.averagePrecisionFromLists(testList, predList), 0.691666666666)

Exemplo n.º 4

0

Exibir arquivo

Arquivo: EvaluatorTest.py Projeto: rezaarmand/sandbox

    def testLocalAuc(self):
        testY = numpy.array([-1, -1, 1, 1, 1, 1, 1, -1, -1, 1])
        predY = numpy.array([
            0.987, 0.868, 0.512, 0.114, 0.755, 0.976, 0.05, 0.371, 0.629, 0.819
        ])

        self.assertEquals(Evaluator.localAuc(testY, predY, 1.0),
                          Evaluator.auc(predY, testY))
        self.assertEquals(Evaluator.localAuc(testY, predY, 0.0), 0)

        self.assertEquals(Evaluator.localAuc(testY, testY, 0.2), 1.0)

Exemplo n.º 5

0

Exibir arquivo

Arquivo: EvaluatorTest.py Projeto: rezaarmand/sandbox

    def testBinaryError(self):
        testY = numpy.array([1, 1, -1, 1])
        predY = numpy.array([-1, 1, -1, 1])
        predY2 = numpy.array([-1, -1, -1, 1])
        predY3 = numpy.array([-1, -1, 1, -1])

        self.assertTrue(Evaluator.binaryError(testY, predY) == 0.25)
        self.assertTrue(Evaluator.binaryError(testY, testY) == 0.0)
        self.assertTrue(Evaluator.binaryError(predY, predY) == 0.0)

        self.assertTrue(Evaluator.binaryError(testY, predY2) == 0.5)
        self.assertTrue(Evaluator.binaryError(testY, predY3) == 1.0)

Exemplo n.º 6

0

Exibir arquivo

Arquivo: EvaluatorTest.py Projeto: rezaarmand/sandbox

    def testMeanAbsError(self):
        testY = numpy.array([1, 2, 1.5])
        predY = numpy.array([2, 1, 0.5])

        self.assertEquals(Evaluator.meanAbsError(testY, predY), 1.0)
        self.assertEquals(Evaluator.meanAbsError(testY, testY), 0.0)

        testY = numpy.random.rand(10)
        predY = numpy.random.rand(10)

        error = numpy.abs(testY - predY).mean()
        self.assertEquals(error, Evaluator.meanAbsError(testY, predY))

Exemplo n.º 7

0

Exibir arquivo

Arquivo: EvaluatorTest.py Projeto: charanpald/sandbox

    def testBinaryError(self):
        testY = numpy.array([1, 1, -1, 1])
        predY = numpy.array([-1, 1, -1, 1])
        predY2 = numpy.array([-1, -1, -1, 1])
        predY3 = numpy.array([-1, -1, 1, -1])

        self.assertTrue(Evaluator.binaryError(testY, predY) == 0.25)
        self.assertTrue(Evaluator.binaryError(testY, testY) == 0.0)
        self.assertTrue(Evaluator.binaryError(predY, predY) == 0.0)

        self.assertTrue(Evaluator.binaryError(testY, predY2) == 0.5)
        self.assertTrue(Evaluator.binaryError(testY, predY3) == 1.0)

Exemplo n.º 8

0

Exibir arquivo

Arquivo: EvaluatorTest.py Projeto: charanpald/sandbox

    def testWeightedRootMeanSqError(self):

        y = numpy.array([0.1, 0.2, 0.3])
        predY = numpy.array([0.1, 0.2, 0.3])

        self.assertEquals(Evaluator.weightedRootMeanSqError(y, predY), 0.0)

        #Errors on larger ys are weighted more 
        predY = numpy.array([0.0, 0.2, 0.3])
        predY2 = numpy.array([0.1, 0.2, 0.4])

        self.assertTrue(Evaluator.weightedRootMeanSqError(y, predY) < Evaluator.weightedRootMeanSqError(y, predY2))

Exemplo n.º 9

0

Exibir arquivo

Arquivo: EvaluatorTest.py Projeto: charanpald/sandbox

 def testMeanAbsError(self): 
     testY = numpy.array([1, 2, 1.5])
     predY = numpy.array([2, 1, 0.5]) 
     
     self.assertEquals(Evaluator.meanAbsError(testY, predY), 1.0)
     self.assertEquals(Evaluator.meanAbsError(testY, testY), 0.0)
     
     testY = numpy.random.rand(10)
     predY = numpy.random.rand(10)
     
     error = numpy.abs(testY - predY).mean()
     self.assertEquals(error, Evaluator.meanAbsError(testY, predY))

Exemplo n.º 10

0

Exibir arquivo

Arquivo: EvaluatorTest.py Projeto: rezaarmand/sandbox

    def testAveragePrecisionFromLists(self):
        predList = [4, 2, 10]
        testList = [4, 2, 15, 16]

        self.assertEquals(
            Evaluator.averagePrecisionFromLists(testList, predList), 0.5)

        predList = [0, 1, 2, 3, 4, 5]
        testList = [0, 3, 4, 5]
        self.assertAlmostEquals(
            Evaluator.averagePrecisionFromLists(testList, predList),
            0.691666666666)

Exemplo n.º 11

0

Exibir arquivo

Arquivo: PenaltyDecisionTreeTest.py Projeto: malcolmreynolds/APGL

    def testLearnModel2(self): 
        #We want to make sure the learnt tree with gamma = 0 maximise the 
        #empirical risk 
        minSplit = 20
        maxDepth = 3
        gamma = 0.01
        learner = PenaltyDecisionTree(minSplit=minSplit, maxDepth=maxDepth, gamma=gamma, pruning=False) 
        
        #Vary sampleSize
        numpy.random.seed(21)
        learner.setSampleSize(1)           
        learner.learnModel(self.X, self.y)        
        error1 = learner.treeObjective(self.X, self.y)

        numpy.random.seed(21)
        learner.setSampleSize(5)        
        learner.learnModel(self.X, self.y)
        error2 = learner.treeObjective(self.X, self.y)

        numpy.random.seed(21)                
        learner.setSampleSize(10)       
        learner.learnModel(self.X, self.y)
        error3 = learner.treeObjective(self.X, self.y)
        
        self.assertTrue(error1 >= error2)
        self.assertTrue(error2 >= error3)
        
        #Now vary max depth 
        learner.gamma = 0         
        
        numpy.random.seed(21)
        learner.setSampleSize(1) 
        learner.minSplit = 1
        learner.maxDepth = 3 
        learner.learnModel(self.X, self.y)
        predY = learner.predict(self.X)
        error1 = Evaluator.binaryError(self.y, predY)
        
        numpy.random.seed(21)
        learner.maxDepth = 5 
        learner.learnModel(self.X, self.y)
        predY = learner.predict(self.X)
        error2 = Evaluator.binaryError(self.y, predY)
        
        numpy.random.seed(21)
        learner.maxDepth = 10 
        learner.learnModel(self.X, self.y)
        predY = learner.predict(self.X)
        error3 = Evaluator.binaryError(self.y, predY)        
        
        self.assertTrue(error1 >= error2)
        self.assertTrue(error2 >= error3)

Exemplo n.º 12

0

Exibir arquivo

Arquivo: RankSVMTest.py Projeto: malcolmreynolds/APGL

    def testSetC(self):
        rankSVM = RankSVM()
        rankSVM.setC(100.0)
        rankSVM.learnModel(self.X, self.y)
        predY = rankSVM.predict(self.X)
        auc1 = Evaluator.auc(predY, self.y)

        rankSVM.setC(0.1)
        rankSVM.learnModel(self.X, self.y)
        predY = rankSVM.predict(self.X)
        auc2 = Evaluator.auc(predY, self.y)

        self.assertTrue(auc1 != auc2)

Exemplo n.º 13

0

Exibir arquivo

Arquivo: EvaluatorTest.py Projeto: charanpald/sandbox

    def testRootMeanSqError(self):
        y = numpy.array([1,2,3])
        predY = numpy.array([1,2,3])

        self.assertEquals(Evaluator.rootMeanSqError(y, predY), 0.0)

        y = numpy.array([1,2,3])
        predY = numpy.array([1,2,2])

        self.assertEquals(Evaluator.rootMeanSqError(y, predY), float(1)/numpy.sqrt(3))

        predY = numpy.array([1,2])
        self.assertRaises(ValueError, Evaluator.rootMeanSqError, y, predY)

Exemplo n.º 14

0

Exibir arquivo

Arquivo: EvaluatorTest.py Projeto: rezaarmand/sandbox

    def testRootMeanSqError(self):
        y = numpy.array([1, 2, 3])
        predY = numpy.array([1, 2, 3])

        self.assertEquals(Evaluator.rootMeanSqError(y, predY), 0.0)

        y = numpy.array([1, 2, 3])
        predY = numpy.array([1, 2, 2])

        self.assertEquals(Evaluator.rootMeanSqError(y, predY),
                          float(1) / numpy.sqrt(3))

        predY = numpy.array([1, 2])
        self.assertRaises(ValueError, Evaluator.rootMeanSqError, y, predY)

Exemplo n.º 15

0

Exibir arquivo

Arquivo: EvaluatorTest.py Projeto: rezaarmand/sandbox

    def testWeightedRootMeanSqError(self):

        y = numpy.array([0.1, 0.2, 0.3])
        predY = numpy.array([0.1, 0.2, 0.3])

        self.assertEquals(Evaluator.weightedRootMeanSqError(y, predY), 0.0)

        #Errors on larger ys are weighted more
        predY = numpy.array([0.0, 0.2, 0.3])
        predY2 = numpy.array([0.1, 0.2, 0.4])

        self.assertTrue(
            Evaluator.weightedRootMeanSqError(y, predY) <
            Evaluator.weightedRootMeanSqError(y, predY2))

Exemplo n.º 16

0

Exibir arquivo

Arquivo: EvaluatorTest.py Projeto: charanpald/sandbox

    def testBinaryBootstrapError(self):

        testY = numpy.array([-1, -1, 1, 1, 1])
        predY = 1 - testY

        trainY = numpy.array([-1, -1, 1, 1, 1])
        predTrainY = 1 - trainY

        self.assertEquals(Evaluator.binaryBootstrapError(testY, testY, trainY, trainY, 0.5), 0.0)

        self.assertEquals(Evaluator.binaryBootstrapError(testY, testY, trainY, predTrainY, 0.5), 0.5)
        self.assertEquals(Evaluator.binaryBootstrapError(testY, testY, trainY, predTrainY, 0.1), 0.9)

        self.assertEquals(Evaluator.binaryBootstrapError(testY, predY, trainY, trainY, 0.1), 0.1)

Exemplo n.º 17

0

Exibir arquivo

Arquivo: TreeRankForestR.py Projeto: malcolmreynolds/APGL

    def evaluateCvOuter(self, X, Y, folds, leafRank):
        """
        Run cross validation and output some ROC curves. In this case Y is a 1D array.
        """
        Parameter.checkClass(X, numpy.ndarray)
        Parameter.checkClass(Y, numpy.ndarray)
        Parameter.checkInt(folds, 2, float('inf'))
        if Y.ndim != 1:
            raise ValueError("Expecting Y to be 1D")

        indexList = cross_val.StratifiedKFold(Y, folds)
        self.setLeafRank(leafRank)

        bestParams = []
        bestTrainAUCs = numpy.zeros(folds)
        bestTrainROCs = []
        bestTestAUCs = numpy.zeros(folds)
        bestTestROCs = []
        bestMetaDicts = []
        i = 0

        for trainInds, testInds in indexList:
            Util.printIteration(i, 1, folds)
            trainX, trainY = X[trainInds, :], Y[trainInds]
            testX, testY = X[testInds, :], Y[testInds]

            logging.debug("Distribution of labels in train: " + str(numpy.bincount(trainY)))
            logging.debug("Distribution of labels in test: " + str(numpy.bincount(testY)))

            self.learnModel(trainX, trainY)
            predTrainY = self.predict(trainX)
            predTestY = self.predict(testX)
            bestTrainAUCs[i] = Evaluator.auc(predTrainY, trainY)
            bestTestAUCs[i] = Evaluator.auc(predTestY, testY)

            #Store the parameters and ROC curves
            bestTrainROCs.append(Evaluator.roc(trainY, predTrainY))
            bestTestROCs.append(Evaluator.roc(testY, predTestY))

            metaDict = {}
            bestMetaDicts.append(metaDict)

            i += 1

        logging.debug("Mean test AUC = " + str(numpy.mean(bestTestAUCs)))
        logging.debug("Std test AUC = " + str(numpy.std(bestTestAUCs)))
        allMetrics = [bestTrainAUCs, bestTrainROCs, bestTestAUCs, bestTestROCs]

        return (bestParams, allMetrics, bestMetaDicts)

Exemplo n.º 18

0

Exibir arquivo

Arquivo: EvaluatorTest.py Projeto: charanpald/sandbox

    def testAuc(self):
        testY = numpy.array([-1, -1, 1, 1])
        predY = numpy.array([-1, 0, 1, 1])
        predY2 = numpy.array([0.1, 0.2, 0.3, 0.4])

        self.assertEquals(Evaluator.auc(predY, testY), 1.0)
        self.assertEquals(Evaluator.auc(predY2, testY), 1.0)
        self.assertEquals(Evaluator.auc(-predY, testY), 0.0)

        numExamples = 1000
        testY = numpy.array(numpy.random.rand(numExamples)>0.5, numpy.int)
        predY = numpy.random.rand(numExamples)>0.5

        #For a random score the AUC is approximately 0.5 
        self.assertAlmostEquals(Evaluator.auc(predY, testY), 0.5, 1)

Exemplo n.º 19

0

Exibir arquivo

Arquivo: EvaluatorTest.py Projeto: rezaarmand/sandbox

    def testAuc(self):
        testY = numpy.array([-1, -1, 1, 1])
        predY = numpy.array([-1, 0, 1, 1])
        predY2 = numpy.array([0.1, 0.2, 0.3, 0.4])

        self.assertEquals(Evaluator.auc(predY, testY), 1.0)
        self.assertEquals(Evaluator.auc(predY2, testY), 1.0)
        self.assertEquals(Evaluator.auc(-predY, testY), 0.0)

        numExamples = 1000
        testY = numpy.array(numpy.random.rand(numExamples) > 0.5, numpy.int)
        predY = numpy.random.rand(numExamples) > 0.5

        #For a random score the AUC is approximately 0.5
        self.assertAlmostEquals(Evaluator.auc(predY, testY), 0.5, 1)

Exemplo n.º 20

0

Exibir arquivo

Arquivo: MetabolomicsRegExp.py Projeto: malcolmreynolds/APGL

    def computeRankMetrics(self, X, Y, indexList, bestLearners, standardiserY, labelIndex):
        #Some code to do ranking using the learner predictors
        i = 0
        rankMetrics = numpy.zeros((len(indexList), self.boundsList[labelIndex].shape[0]-1))
        for idxtr, idxts in indexList:
            logging.info("Iteration " + str(i))

            trainX, testX = X[idxtr, :], X[idxts, :]
            trainY, testY = Y[idxtr], Y[idxts]

            bestLearners[i].learnModel(trainX, trainY)
            predY = bestLearners[i].predict(testX)
            gc.collect()

            #Now output 3 sets of ranked scores
            predY = standardiserY.unstandardiseArray(predY)
            testY = standardiserY.unstandardiseArray(testY)

            YScores = MetabolomicsUtils.scoreLabels(predY, self.boundsList[labelIndex])
            YIndList = MetabolomicsUtils.createIndicatorLabel(testY, self.boundsList[labelIndex])

            for j in range(self.boundsList[labelIndex].shape[0]-1):
                rankMetrics[i, j] = Evaluator.auc(YScores[:, j], YIndList[j])
            i += 1

        logging.debug(rankMetrics)

        return rankMetrics

Exemplo n.º 21

0

Exibir arquivo

Arquivo: TreeRankForestRTest.py Projeto: malcolmreynolds/APGL

    def testAuc(self):
        self.treeRankForest.learnModel(self.X, self.Y)
        scores = self.treeRankForest.predictScores(self.X)

        auc1 = Evaluator.auc(scores, self.Y.ravel())
        auc2 = self.treeRankForest.aucFromROC(self.treeRankForest.predictROC(self.X, self.Y))

        self.assertAlmostEquals(auc1, auc2, places=4)

Exemplo n.º 22

0

Exibir arquivo

Arquivo: TreeRankForestTest.py Projeto: malcolmreynolds/APGL

    def testPredict2(self):
        #Test on Gauss2D dataset
        dataDir = PathDefaults.getDataDir()

        fileName = dataDir + "Gauss2D_learn.csv"
        XY = numpy.loadtxt(fileName, skiprows=1, usecols=(1,2,3), delimiter=",")
        X = XY[:, 0:2]
        y = XY[:, 2]
        
        y = y*2 - 1 

        fileName = dataDir + "Gauss2D_test.csv"
        testXY = numpy.loadtxt(fileName, skiprows=1, usecols=(1,2,3), delimiter=",")
        testX = testXY[:, 0:2]
        testY = testXY[:, 2]
        
        testY = testY*2-1

        X = Standardiser().standardiseArray(X)
        testX = Standardiser().standardiseArray(testX)

        numTrees = 5
        minSplit = 50 
        maxDepths = range(3, 10)
        trainAucs = numpy.array([0.7252582, 0.7323278, 0.7350289, 0.7372529, 0.7399985, 0.7382176, 0.7395104, 0.7386347])
        testAucs = numpy.array([0.6806122, 0.6851614, 0.6886183, 0.6904147, 0.6897266, 0.6874600, 0.6875980, 0.6878801])

        i = 0
        
        #The results are approximately the same, but not exactly 
        for maxDepth in maxDepths:
            treeRankForest = TreeRankForest(self.leafRanklearner)
            treeRankForest.setMaxDepth(maxDepth)
            treeRankForest.setMinSplit(minSplit)
            treeRankForest.setNumTrees(numTrees)
            treeRankForest.learnModel(X, y)
            trainScores = treeRankForest.predict(X)
            testScores = treeRankForest.predict(testX)

            print(Evaluator.auc(trainScores, y), Evaluator.auc(testScores, testY))

            self.assertAlmostEquals(Evaluator.auc(trainScores, y), trainAucs[i], 1)
            self.assertAlmostEquals(Evaluator.auc(testScores, testY), testAucs[i], 1)
            i+=1

Exemplo n.º 23

0

Exibir arquivo

Arquivo: EvaluatorTest.py Projeto: charanpald/sandbox

    def testPrecisionFromIndLists(self): 
        predList  = [4, 2, 10]
        testList = [4, 2]

        self.assertEquals(Evaluator.precisionFromIndLists(testList, predList), 2.0/3)  
        
        testList = [4, 2, 10]
        self.assertEquals(Evaluator.precisionFromIndLists(testList, predList), 1) 
        
        predList  = [10, 2, 4]
        self.assertEquals(Evaluator.precisionFromIndLists(testList, predList), 1)
        
        testList = [1, 9, 11]
        self.assertEquals(Evaluator.precisionFromIndLists(testList, predList), 0)
        
        predList = [1, 2, 3, 4, 5]
        testList = [1, 9, 11]
        
        self.assertEquals(Evaluator.precisionFromIndLists(testList, predList), 1.0/5)

Exemplo n.º 24

0

Exibir arquivo

Arquivo: AbstractPredictor.py Projeto: malcolmreynolds/APGL

def computeBootstrapError(args):
    """
    Used in conjunction with the parallel model selection. Trains and then tests
    on a seperate test set and evaluated the bootstrap error. 
    """
    (trainX, trainY, testX, testY, learner) = args
    learner.learnModel(trainX, trainY)
    predTestY = learner.predict(testX)
    predTrainY = learner.predict(trainX)
    weight = 0.632
    return Evaluator.binaryBootstrapError(predTestY, testY, predTrainY, trainY, weight)

Exemplo n.º 25

0

Exibir arquivo

Arquivo: DecisionTreeLearnerTest.py Projeto: malcolmreynolds/APGL

 def testCvPrune(self): 
     numExamples = 500
     X, y = data.make_regression(numExamples)  
     
     y = Standardiser().standardiseArray(y)
     
     numTrain = numpy.round(numExamples * 0.33)     
     numValid = numpy.round(numExamples * 0.33) 
     
     trainX = X[0:numTrain, :]
     trainY = y[0:numTrain]
     validX = X[numTrain:numTrain+numValid, :]
     validY = y[numTrain:numTrain+numValid]
     testX = X[numTrain+numValid:, :]
     testY = y[numTrain+numValid:]
     
     learner = DecisionTreeLearner()
     learner.learnModel(trainX, trainY)
     error1 = Evaluator.rootMeanSqError(learner.predict(testX), testY)
     
     #print(learner.getTree())
     unprunedTree = learner.tree.copy() 
     learner.setGamma(1000)
     learner.cvPrune(trainX, trainY)
     
     self.assertEquals(unprunedTree.getNumVertices(), learner.tree.getNumVertices())
     learner.setGamma(100)
     learner.cvPrune(trainX, trainY)
     
     #Test if pruned tree is subtree of current: 
     for vertexId in learner.tree.getAllVertexIds(): 
         self.assertTrue(vertexId in unprunedTree.getAllVertexIds())
         
     #The error should be better after pruning 
     learner.learnModel(trainX, trainY)
     #learner.cvPrune(validX, validY, 0.0, 5)
     learner.repPrune(validX, validY)
   
     error2 = Evaluator.rootMeanSqError(learner.predict(testX), testY)
     
     self.assertTrue(error1 >= error2)

Exemplo n.º 26

0

Exibir arquivo

Arquivo: MetabolomicsRegExp.py Projeto: malcolmreynolds/APGL

    def meanAUC(self, predY, testY, labelIndex, standardiserY):
        predY = standardiserY.unstandardiseArray(predY)
        testY = standardiserY.unstandardiseArray(testY)

        YScores = MetabolomicsUtils.scoreLabels(predY, self.boundsList[labelIndex])
        YIndList = MetabolomicsUtils.createIndicatorLabel(testY, self.boundsList[labelIndex])

        rankMetrics = numpy.zeros(self.boundsList[labelIndex].shape[0]-1)

        for j in range(rankMetrics.shape[0]):
            rankMetrics[j] = Evaluator.auc(YScores[:, j], YIndList[j])

        return numpy.mean(rankMetrics)

Exemplo n.º 27

0

Exibir arquivo

Arquivo: EvaluatorTest.py Projeto: rezaarmand/sandbox

    def testBinaryBootstrapError(self):

        testY = numpy.array([-1, -1, 1, 1, 1])
        predY = 1 - testY

        trainY = numpy.array([-1, -1, 1, 1, 1])
        predTrainY = 1 - trainY

        self.assertEquals(
            Evaluator.binaryBootstrapError(testY, testY, trainY, trainY, 0.5),
            0.0)

        self.assertEquals(
            Evaluator.binaryBootstrapError(testY, testY, trainY, predTrainY,
                                           0.5), 0.5)
        self.assertEquals(
            Evaluator.binaryBootstrapError(testY, testY, trainY, predTrainY,
                                           0.1), 0.9)

        self.assertEquals(
            Evaluator.binaryBootstrapError(testY, predY, trainY, trainY, 0.1),
            0.1)

Exemplo n.º 28

0

Exibir arquivo

Arquivo: ModelSelectUtilsTest.py Projeto: malcolmreynolds/APGL

    def testBayesError(self):
        dataDir = PathDefaults.getDataDir() + "modelPenalisation/toy/"
        data = numpy.load(dataDir + "toyData.npz")
        gridPoints, X, y, pdfX, pdfY1X, pdfYminus1X = data["arr_0"], data["arr_1"], data["arr_2"], data["arr_3"], data["arr_4"], data["arr_5"]

        sampleSize = 100
        trainX, trainY = X[0:sampleSize, :], y[0:sampleSize]
        testX, testY = X[sampleSize:, :], y[sampleSize:]

        #We form a test set from the grid points
        gridX = numpy.zeros((gridPoints.shape[0]**2, 2))
        for m in range(gridPoints.shape[0]):
            gridX[m*gridPoints.shape[0]:(m+1)*gridPoints.shape[0], 0] = gridPoints
            gridX[m*gridPoints.shape[0]:(m+1)*gridPoints.shape[0], 1] = gridPoints[m]

        Cs = 2**numpy.arange(-5, 5, dtype=numpy.float)
        gammas = 2**numpy.arange(-5, 5, dtype=numpy.float)

        bestError = 1 

        for C in Cs:
            for gamma in gammas:
                svm = LibSVM(kernel="gaussian", C=C, kernelParam=gamma)
                svm.learnModel(trainX, trainY)
                predY, decisionsY = svm.predict(gridX, True)
                decisionGrid = numpy.reshape(decisionsY, (gridPoints.shape[0], gridPoints.shape[0]), order="F")
                error = ModelSelectUtils.bayesError(gridPoints, decisionGrid, pdfX, pdfY1X, pdfYminus1X)

                predY, decisionsY = svm.predict(testX, True)
                error2 = Evaluator.binaryError(testY, predY)
                print(error, error2)

                if error < bestError:
                    error = bestError
                    bestC = C
                    bestGamma = gamma

        svm = LibSVM(kernel="gaussian", C=bestC, kernelParam=bestGamma)
        svm.learnModel(trainX, trainY)
        predY, decisionsY = svm.predict(gridX, True)

        plt.figure(0)
        plt.contourf(gridPoints, gridPoints, decisionGrid, 100)
        plt.colorbar()

        plt.figure(1)
        plt.scatter(X[y==1, 0], X[y==1, 1], c='r' ,label="-1")
        plt.scatter(X[y==-1, 0], X[y==-1, 1], c='b',label="+1")
        plt.legend()
        plt.show()

Exemplo n.º 29

0

Exibir arquivo

Arquivo: ModelSelectUtils.py Projeto: malcolmreynolds/APGL

def computeIdealPenalty(args):
    """
    Find the complete penalty.
    """
    (X, y, fullX, C, gamma, gridPoints, pdfX, pdfY1X, pdfYminus1X) = args

    svm = LibSVM('gaussian', gamma, C)
    svm.learnModel(X, y)
    predY = svm.predict(X)
    predFullY, decisionsY = svm.predict(fullX, True)
    decisionGrid = numpy.reshape(decisionsY, (gridPoints.shape[0], gridPoints.shape[0]), order="F")
    trueError = ModelSelectUtils.bayesError(gridPoints, decisionGrid, pdfX, pdfY1X, pdfYminus1X)
    idealPenalty = trueError - Evaluator.binaryError(predY, y)

    return idealPenalty

Exemplo n.º 30

0

Exibir arquivo

Arquivo: EvaluatorTest.py Projeto: rezaarmand/sandbox

    def testPrecisionFromIndLists(self):
        predList = [4, 2, 10]
        testList = [4, 2]

        self.assertEquals(Evaluator.precisionFromIndLists(testList, predList),
                          2.0 / 3)

        testList = [4, 2, 10]
        self.assertEquals(Evaluator.precisionFromIndLists(testList, predList),
                          1)

        predList = [10, 2, 4]
        self.assertEquals(Evaluator.precisionFromIndLists(testList, predList),
                          1)

        testList = [1, 9, 11]
        self.assertEquals(Evaluator.precisionFromIndLists(testList, predList),
                          0)

        predList = [1, 2, 3, 4, 5]
        testList = [1, 9, 11]

        self.assertEquals(Evaluator.precisionFromIndLists(testList, predList),
                          1.0 / 5)

Exemplo n.º 31

0

Exibir arquivo

Arquivo: MetabolomicsExpHelper.py Projeto: pierrebo/wallhack

    def saveResult(self, X, Y, learner, paramDict, fileName):
        """
        Save a single result to file, checking if the results have already been computed
        """
        filelock = FileLock(fileName)
        gc.collect()

        if not filelock.isLocked() and not filelock.fileExists(): 
            filelock.lock()
            try: 
                logging.debug("Computing file " + fileName)
                logging.debug("Shape of examples: " + str(X.shape) + ", number of +1: " + str(numpy.sum(Y==1)) + ", -1: " + str(numpy.sum(Y==-1)))
                
                #idxFull = Sampling.crossValidation(self.outerFolds, X.shape[0])
                idxFull = StratifiedKFold(Y, self.outerFolds)
                errors = numpy.zeros(self.outerFolds)
                
                for i, (trainInds, testInds) in enumerate(idxFull): 
                    logging.debug("Outer fold: " + str(i))
                    
                    trainX, trainY = X[trainInds, :], Y[trainInds]
                    testX, testY = X[testInds, :], Y[testInds]
                    #idx = Sampling.crossValidation(self.innerFolds, trainX.shape[0])
                    idx = StratifiedKFold(trainY, self.innerFolds)
                    logging.debug("Initial learner is " + str(learner))
                    bestLearner, cvGrid = learner.parallelModelSelect(trainX, trainY, idx, paramDict)

                    bestLearner = learner.getBestLearner(cvGrid, paramDict, trainX, trainY, idx, best="max")
                    logging.debug("Best learner is " + str(bestLearner))
                    
                    bestLearner.learnModel(trainX, trainY)
                    predY = bestLearner.predict(testX)
                    errors[i] = Evaluator.auc(predY, testY)
                
                logging.debug("Mean auc: " + str(numpy.mean(errors)))
                numpy.save(fileName, errors)
                logging.debug("Saved results as : " + fileName)
            finally: 
                filelock.unlock()
        else:
            logging.debug("File exists, or is locked: " + fileName)

Exemplo n.º 32

0

Exibir arquivo

Arquivo: TreeRank.py Projeto: malcolmreynolds/APGL

    def learnModelCut(self, X, Y, folds=4):
        """
        Perform model learning with tree cutting in order to choose a maximal
        depth. The best tree is chosen using cross validation and depths are
        selected from 0 to maxDepth. The best depth corresponds the maximal
        AUC obtained using cross validation. 

        :param X: A matrix with examples as rows
        :type X: :class:`ndarray`

        :param Y: A vector of binary labels as a 1D array
        :type Y: :class:`ndarray`

        :param folds: The number of cross validation folds.
        :type folds: :class:`int`
        """

        indexList = cross_val.StratifiedKFold(Y, folds)
        depths = numpy.arange(1, self.maxDepth)
        meanAUCs = numpy.zeros(depths.shape[0])

        for trainInds, testInds in indexList:
            trainX, trainY = X[trainInds, :], Y[trainInds]
            testX, testY = X[testInds, :], Y[testInds]

            self.learnModel(trainX, trainY)
            fullTree = self.tree

            for i in range(fullTree.depth()):
                d = depths[i]
                self.tree = TreeRank.cut(fullTree, d)
                predTestY = self.predict(testX)

                meanAUCs[i] += Evaluator.auc(predTestY, testY)/float(folds)

        bestDepth = depths[numpy.argmax(meanAUCs)]
        self.learnModel(X, Y)
        self.tree = TreeRank.cut(self.tree, bestDepth)

Exemplo n.º 33

0

Exibir arquivo

Arquivo: RankAggregator.py Projeto: pierrebo/wallhack

    def greedyMC2(lists, itemList, trainList, n): 
        """
        A method to greedily select a subset of the outputLists such that 
        the average precision is maximised
        """
        currentListsInds = range(len(lists))
        newListsInds = []
        currentAvPrecision = 0 
        lastAvPrecision = -0.1
        
        while currentAvPrecision - lastAvPrecision > 0: 
            lastAvPrecision = currentAvPrecision 
            averagePrecisions = numpy.zeros(len(currentListsInds))      
            
            for i, j in enumerate(currentListsInds):
                newListsInds.append(j)

                newLists = []                
                for k in newListsInds: 
                    newLists.append(lists[k])
                
                rankAggregate, scores = RankAggregator.MC2(newLists, itemList)
                averagePrecisions[i] = Evaluator.averagePrecisionFromLists(trainList, rankAggregate[0:n], n)
                newListsInds.remove(j)

            j = numpy.argmax(averagePrecisions)
            currentAvPrecision = averagePrecisions[j]
            
            if currentAvPrecision > lastAvPrecision: 
                newListsInds.append(currentListsInds.pop(j))
            
        return newListsInds

Exemplo n.º 34

0

Exibir arquivo

Arquivo: PenaltyTest.py Projeto: malcolmreynolds/APGL

#Figure out why the penalty is increasing 
X = trainX 
y = trainY 

for i in range(foldsSet.shape[0]): 
    folds = foldsSet[i]
    idx = Sampling.crossValidation(folds, validX.shape[0])
    
    penalty = 0
    fullError = 0 
    trainError = 0     
    
    learner.learnModel(validX, validY)
    predY = learner.predict(X)
    predValidY = learner.predict(validX)
    idealPenalty = Evaluator.rootMeanSqError(predY, y) - Evaluator.rootMeanSqError(predValidY, validY)
    
    for trainInds, testInds in idx:
        trainX = validX[trainInds, :]
        trainY = validY[trainInds]
    
        #learner.setGamma(gamma)
        #learner.setC(C)
        learner.learnModel(trainX, trainY)
        predY = learner.predict(validX)
        predTrainY = learner.predict(trainX)
        fullError += Evaluator.rootMeanSqError(predY, validY)
        trainError += Evaluator.rootMeanSqError(predTrainY, trainY)
        penalty += Evaluator.rootMeanSqError(predY, validY) - Evaluator.rootMeanSqError(predTrainY, trainY)
        
    print((folds-1)*fullError/folds, (folds-1)*trainError/folds, (folds-1)*penalty/folds)

Exemplo n.º 35

0

Exibir arquivo

Arquivo: PermutationGraphExperiment2.py Projeto: malcolmreynolds/APGL

tau = 1.0
lmbda = 0.1
linearKernel = LinearKernel()
permutationKernel = PermutationGraphKernel(tau, linearKernel)
randomWalkKernel = RandWalkGraphKernel(lmbda)

K1 = numpy.zeros((numGraphs, numGraphs))
K2 = numpy.zeros((numGraphs, numGraphs))

for i in range(0, numGraphs):
    print(("i="+str(i)))
    for j in range(0, numGraphs):
        print(("j="+str(j)))
        K1[i, j] = permutationKernel.evaluate(graphs[i], graphs[j])
        K2[i, j] = randomWalkKernel.evaluate(graphs[i], graphs[j])

D1 = KernelUtils.computeDistanceMatrix(K1)
D2 = KernelUtils.computeDistanceMatrix(K2)

numPairs = numGraphs/2
windowSize = 3
pairIndices = numpy.array([list(range(numPairs)),  list(range(numPairs))]).T
pairIndices[:, 1] = numPairs + pairIndices[:, 1]

error1 = Evaluator.evaluateWindowError(D1, windowSize, pairIndices)
error2 = Evaluator.evaluateWindowError(D2, windowSize, pairIndices)

print(("Error 1: " + str(error1)))
print(("Error 2: " + str(error2)))

Exemplo n.º 36

0

Exibir arquivo

Arquivo: RankBoostTest.py Projeto: malcolmreynolds/APGL

    def testPredict(self):
        rankBoost = RankBoost()
        rankBoost.learnModel(self.X, self.y)
        predY = rankBoost.predict(self.X)

        self.assertTrue(Evaluator.auc(predY, self.y) <= 1.0 and Evaluator.auc(predY, self.y) >= 0.0)

Exemplo n.º 37

0

Exibir arquivo

Arquivo: PenaltyDecisionTreeTest.py Projeto: malcolmreynolds/APGL

 def testGrowTree(self):
     startId = (0, )
     minSplit = 20
     maxDepth = 3
     gamma = 0.01
     learner = PenaltyDecisionTree(minSplit=minSplit, maxDepth=maxDepth, gamma=gamma, pruning=False) 
     
     trainX = self.X[100:, :]
     trainY = self.y[100:]
     testX = self.X[0:100, :]
     testY = self.y[0:100]    
     
     argsortX = numpy.zeros(trainX.shape, numpy.int)
     for i in range(trainX.shape[1]): 
         argsortX[:, i] = numpy.argsort(trainX[:, i])
         argsortX[:, i] = numpy.argsort(argsortX[:, i])
     
     learner.tree = DictTree()
     rootNode = DecisionNode(numpy.arange(trainX.shape[0]), Util.mode(trainY))
     learner.tree.setVertex(startId, rootNode)        
     
     #Note that this matches with the case where we create a new tree each time 
     numpy.random.seed(21)
     bestError = float("inf")        
     
     for i in range(20): 
         learner.tree.pruneVertex(startId)
         learner.growTree(trainX, trainY, argsortX, startId)
         
         predTestY = learner.predict(testX)
         error = Evaluator.binaryError(predTestY, testY)
         #print(Evaluator.binaryError(predTestY, testY), learner.tree.getNumVertices())
         
         if error < bestError: 
             bestError = error 
             bestTree = learner.tree.copy() 
         
         self.assertTrue(learner.tree.depth() <= maxDepth)
         
         for vertexId in learner.tree.nonLeaves(): 
             self.assertTrue(learner.tree.getVertex(vertexId).getTrainInds().shape[0] >= minSplit)
     
     bestError1 = bestError               
     learner.tree = bestTree    
     
     #Now we test growing a tree from a non-root vertex 
     numpy.random.seed(21)
     for i in range(20): 
         learner.tree.pruneVertex((0, 1)) 
         learner.growTree(trainX, trainY, argsortX, (0, 1))
         
         self.assertTrue(learner.tree.getVertex((0,)) == bestTree.getVertex((0,)))
         self.assertTrue(learner.tree.getVertex((0,0)) == bestTree.getVertex((0,0)))
         
         
         predTestY = learner.predict(testX)
         error = Evaluator.binaryError(predTestY, testY)
         
         if error < bestError: 
             bestError = error 
             bestTree = learner.tree.copy() 
         #print(Evaluator.binaryError(predTestY, testY), learner.tree.getNumVertices())
     self.assertTrue(bestError1 >= bestError )

Exemplo n.º 38

0

Exibir arquivo

Arquivo: ToyTreeExp.py Projeto: malcolmreynolds/APGL

        minAlpha = alpha 
    if alpha > maxAlpha: 
        maxAlpha = alpha 
        
numAlphas = 100
alphas = numpy.linspace(maxAlpha+0.1, minAlpha, numAlphas)
errors = numpy.zeros(numAlphas)

for i in range(alphas.shape[0]): 
    #learner.learnModel(trainX, trainY)
    learner.setAlphaThreshold(alphas[i])
    learner.cvPrune(trainX, trainY)
    #learner.cvPrune(validX, validY, alphas[numpy.argmin(errors)])
    #learner.prune(validX, validY, alphas[i])
    predY = learner.predict(testX)
    errors[i] = Evaluator.rootMeanSqError(predY, testY)
    
plt.figure(3)
plt.scatter(alphas, errors)

#Now plot best tree 
plt.figure(4)
learner.learnModel(trainX, trainY)
#learner.cvPrune(validX, validY, alphas[numpy.argmin(errors)])
learner.setAlphaThreshold(alphas[numpy.argmin(errors)])
learner.cvPrune(trainX, trainY)
rootId = learner.tree.getRootId()
displayTree(learner, rootId, 0, 1, 0, 1, colormap)

plt.show()

Exemplo n.º 39

0

Exibir arquivo

Arquivo: DecisionTreeLearnerTest.py Projeto: malcolmreynolds/APGL

    def testModelSelect(self): 
        
        """
        We test the results on some data and compare to SVR. 
        """
        numExamples = 200
        X, y = data.make_regression(numExamples, noise=0.5)  
        
        X = Standardiser().standardiseArray(X)
        y = Standardiser().standardiseArray(y)
        
        trainX = X[0:100, :]
        trainY = y[0:100]
        testX = X[100:, :]
        testY = y[100:]
        
        learner = DecisionTreeLearner(maxDepth=20, minSplit=10, pruneType="REP-CV")
        learner.setPruneCV(8)
        
        paramDict = {} 
        paramDict["setGamma"] = numpy.linspace(0.0, 1.0, 10) 
        paramDict["setPruneCV"] = numpy.arange(6, 11, 2, numpy.int)
        
        folds = 5
        idx = Sampling.crossValidation(folds, trainX.shape[0])
        bestTree, cvGrid = learner.parallelModelSelect(trainX, trainY, idx, paramDict)


        predY = bestTree.predict(testX)
        error = Evaluator.rootMeanSqError(testY, predY)
        print(error)
        
        
        learner = DecisionTreeLearner(maxDepth=20, minSplit=5, pruneType="CART")
        
        paramDict = {} 
        paramDict["setGamma"] = numpy.linspace(0.0, 1.0, 50) 
        
        folds = 5
        idx = Sampling.crossValidation(folds, trainX.shape[0])
        bestTree, cvGrid = learner.parallelModelSelect(trainX, trainY, idx, paramDict)


        predY = bestTree.predict(testX)
        error = Evaluator.rootMeanSqError(testY, predY)
        print(error)
              
        return 
        #Let's compare to the SVM 
        learner2 = LibSVM(kernel='gaussian', type="Epsilon_SVR") 
        
        paramDict = {} 
        paramDict["setC"] = 2.0**numpy.arange(-10, 14, 2, dtype=numpy.float)
        paramDict["setGamma"] = 2.0**numpy.arange(-10, 4, 2, dtype=numpy.float)
        paramDict["setEpsilon"] = learner2.getEpsilons()
        
        idx = Sampling.crossValidation(folds, trainX.shape[0])
        bestSVM, cvGrid = learner2.parallelModelSelect(trainX, trainY, idx, paramDict)

        predY = bestSVM.predict(testX)
        error = Evaluator.rootMeanSqError(testY, predY)
        print(error)