def testLearningRate(self): 
     numExamples = 100
     trainX, trainY = data.make_regression(numExamples) 
     trainX = Standardiser().normaliseArray(trainX)
     trainY = Standardiser().normaliseArray(trainY)
     learner = DecisionTreeLearner(pruneType="CART", maxDepth=20, minSplit=1)
     
     
     foldsSet = numpy.arange(2, 7, 2)
     
     gammas = numpy.array(numpy.round(2**numpy.arange(1, 8, 1)-1), dtype=numpy.int)
     paramDict = {} 
     paramDict["setGamma"] = gammas
     
     betaGrid = learner.learningRate(trainX, trainY, foldsSet, paramDict)
     
     #Compute beta more directly 
     numParams = gammas.shape[0]
     sampleSize = trainX.shape[0]
     sampleMethod = Sampling.crossValidation
     Cvs = numpy.array([1])
     penalties = numpy.zeros((foldsSet.shape[0], numParams))
     betas = numpy.zeros(gammas.shape[0])
     
     for k in range(foldsSet.shape[0]): 
         folds = foldsSet[k]
         logging.debug("Folds " + str(folds))
         
         idx = sampleMethod(folds, trainX.shape[0])   
         
         #Now try penalisation
         resultsList = learner.parallelPen(trainX, trainY, idx, paramDict, Cvs)
         bestLearner, trainErrors, currentPenalties = resultsList[0]
         penalties[k, :] = currentPenalties
     
     for i in range(gammas.shape[0]): 
         inds = numpy.logical_and(numpy.isfinite(penalties[:, i]), penalties[:, i]>0)
         tempPenalties = penalties[:, i][inds]
         tempfoldsSet = numpy.array(foldsSet, numpy.float)[inds]                            
         
         if tempPenalties.shape[0] > 1: 
             x = numpy.log((tempfoldsSet-1)/tempfoldsSet*sampleSize)
             y = numpy.log(tempPenalties)+numpy.log(tempfoldsSet)   
         
             clf = linear_model.LinearRegression()
             clf.fit(numpy.array([x]).T, y)
             betas[i] = clf.coef_[0]    
             
     betas = -betas   
     
     nptst.assert_array_equal(betaGrid, betas)
Пример #2
0
    logging.debug("Dataset " + datasetName)
    learner = DecisionTreeLearner(criterion="mse", maxDepth=100, minSplit=1, pruneType="CART", processes=numProcesses)
    learner.setChunkSize(3)   
    
    outfileName = outputDir + datasetName + "Beta"

    for m in range(sampleSizes.shape[0]): 
        sampleSize = sampleSizes[m]
        logging.debug("Sample size " + str(sampleSize))
    
        penalties = numpy.zeros((foldsSet.shape[0], numParams))
        betas = numpy.zeros((gammas.shape[0], sampleSizes.shape[0]))
        
        for j in range(numRealisations):      
            logging.debug("Realisation: " + str(j))
            
            trainX, trainY, testX, testY = loadMethod(dataDir, datasetName, j)
            
            numpy.random.seed(21)
            trainInds = numpy.random.permutation(trainX.shape[0])[0:sampleSize]
            validX = trainX[trainInds,:]
            validY = trainY[trainInds]
                           
            betas = learner.learningRate(validX, validY, foldsSet, paramDict)       
            print(betas) 
            
            plt.plot(gammas, betas)
            plt.show()
        
    break