Exemplo n.º 1
0
def getSetup(learnerName, dataDir, outputDir, numProcesses): 
    
    if learnerName=="SVM":
        learner = LibSVM(kernel='gaussian', type="C_SVC", processes=numProcesses) 
        loadMethod = ModelSelectUtils.loadRatschDataset
        dataDir += "benchmark/"
        outputDir += "classification/" + learnerName + "/"
        
        paramDict = {} 
        paramDict["setC"] = learner.getCs()
        paramDict["setGamma"] = learner.getGammas()  
    elif learnerName=="SVR":
        learner = LibSVM(kernel='gaussian', type="Epsilon_SVR", processes=numProcesses) 
        learner.normModelSelect = True
        loadMethod = ModelSelectUtils.loadRegressDataset
        dataDir += "regression/"
        outputDir += "regression/" + learnerName + "/"

        paramDict = {} 
        paramDict["setC"] = 2.0**numpy.arange(-10, 14, 2, dtype=numpy.float)
        paramDict["setGamma"] = 2.0**numpy.arange(-10, 4, 2, dtype=numpy.float)
        paramDict["setEpsilon"] = learner.getEpsilons()
    elif learnerName=="CART": 
        learner = DecisionTreeLearner(criterion="mse", maxDepth=30, minSplit=1, pruneType="CART", processes=numProcesses)
        learner.setChunkSize(2)
        loadMethod = ModelSelectUtils.loadRegressDataset
        dataDir += "regression/"
        outputDir += "regression/" + learnerName + "/"

        paramDict = {} 
        paramDict["setGamma"] =  numpy.array(numpy.round(2**numpy.arange(1, 7.5, 0.5)-1), dtype=numpy.int)
    else: 
        raise ValueError("Unknown learnerName: " + learnerName)
                
    return learner, loadMethod, dataDir, outputDir, paramDict 
Exemplo n.º 2
0
dataDir = PathDefaults.getDataDir() 
dataDir += "modelPenalisation/regression/"
outputDir = PathDefaults.getOutputDir() + "modelPenalisation/regression/SVR/"

figInd = 0 

loadMethod = ModelSelectUtils.loadRegressDataset
datasets = ModelSelectUtils.getRegressionDatasets(True)

numProcesses = multiprocessing.cpu_count()
learner = LibSVM(kernel="rbf", processes=numProcesses, type="Epsilon_SVR")
learner.setChunkSize(3)

Cs = 2.0**numpy.arange(-10, 14, 2, dtype=numpy.float)
gammas = 2.0**numpy.arange(-10, 4, 2, dtype=numpy.float)
epsilons = learner.getEpsilons()

gammaInd = 3 
gamma = gammas[gammaInd]
learner.setGamma(gamma)

epsilonInd = 0 
epsilon = epsilons[epsilonInd]
learner.setEpsilon(epsilon)
learner.normModelSelect = True

paramDict = {} 
paramDict["setC"] = Cs 
numParams = Cs.shape[0]

#datasets = [datasets[1]]
Exemplo n.º 3
0
numProcesses = 8
dataDir = PathDefaults.getDataDir() + "modelPenalisation/regression/"
datasetName = datasets[9]
print(datasetName)

j = 0 
trainX, trainY, testX, testY = ModelSelectUtils.loadRegressDataset(dataDir, datasetName, j)

learner = LibSVM(kernel='gaussian', type="Epsilon_SVR", processes=numProcesses) 


paramDict = {} 
paramDict["setC"] = 2.0**numpy.arange(-10, 14, 2, dtype=numpy.float)
paramDict["setGamma"] = 2.0**numpy.arange(-10, 4, 2, dtype=numpy.float)
paramDict["setEpsilon"] = learner.getEpsilons()

foldsSet = numpy.arange(2, 31, 2)
Cvs = numpy.array([1.0])
sampleMethod = Sampling.crossValidation

sampleSize = 100
trainInds = numpy.random.permutation(trainX.shape[0])[0:sampleSize]
validX = trainX[trainInds,:]
validY = trainY[trainInds]

"""
for i in range(foldsSet.shape[0]): 
    folds = foldsSet[i]
    
    Cvs = numpy.array([folds-1.0])
    def testModelSelect(self): 
        
        """
        We test the results on some data and compare to SVR. 
        """
        numExamples = 200
        X, y = data.make_regression(numExamples, noise=0.5)  
        
        X = Standardiser().standardiseArray(X)
        y = Standardiser().standardiseArray(y)
        
        trainX = X[0:100, :]
        trainY = y[0:100]
        testX = X[100:, :]
        testY = y[100:]
        
        learner = DecisionTreeLearner(maxDepth=20, minSplit=10, pruneType="REP-CV")
        learner.setPruneCV(8)
        
        paramDict = {} 
        paramDict["setGamma"] = numpy.linspace(0.0, 1.0, 10) 
        paramDict["setPruneCV"] = numpy.arange(6, 11, 2, numpy.int)
        
        folds = 5
        idx = Sampling.crossValidation(folds, trainX.shape[0])
        bestTree, cvGrid = learner.parallelModelSelect(trainX, trainY, idx, paramDict)


        predY = bestTree.predict(testX)
        error = Evaluator.rootMeanSqError(testY, predY)
        print(error)
        
        
        learner = DecisionTreeLearner(maxDepth=20, minSplit=5, pruneType="CART")
        
        paramDict = {} 
        paramDict["setGamma"] = numpy.linspace(0.0, 1.0, 50) 
        
        folds = 5
        idx = Sampling.crossValidation(folds, trainX.shape[0])
        bestTree, cvGrid = learner.parallelModelSelect(trainX, trainY, idx, paramDict)


        predY = bestTree.predict(testX)
        error = Evaluator.rootMeanSqError(testY, predY)
        print(error)
              
        return 
        #Let's compare to the SVM 
        learner2 = LibSVM(kernel='gaussian', type="Epsilon_SVR") 
        
        paramDict = {} 
        paramDict["setC"] = 2.0**numpy.arange(-10, 14, 2, dtype=numpy.float)
        paramDict["setGamma"] = 2.0**numpy.arange(-10, 4, 2, dtype=numpy.float)
        paramDict["setEpsilon"] = learner2.getEpsilons()
        
        idx = Sampling.crossValidation(folds, trainX.shape[0])
        bestSVM, cvGrid = learner2.parallelModelSelect(trainX, trainY, idx, paramDict)

        predY = bestSVM.predict(testX)
        error = Evaluator.rootMeanSqError(testY, predY)
        print(error)