Exemplo n.º 1
0
    def testUnlock(self):
        fileLock = FileLock(self.fileName)
        fileLock.lock()

        self.assertTrue(fileLock.isLocked())
        fileLock.unlock()
        self.assertTrue(not fileLock.isLocked())
Exemplo n.º 2
0
    def testUnlock(self):
        fileLock = FileLock(self.fileName)
        fileLock.lock()

        self.assertTrue(fileLock.isLocked())
        fileLock.unlock()
        self.assertTrue(not fileLock.isLocked())
Exemplo n.º 3
0
    def saveWeightVectorResults(self, X, Y, learner, paramDict, fileName): 
        """
        Save the results of the variable importance 
        """
        filelock = FileLock(fileName)
        gc.collect()

        if not filelock.isLocked() and not filelock.fileExists(): 
            filelock.lock()
            try: 
                logging.debug("Computing weights file " + fileName)
                logging.debug("Shape of examples: " + str(X.shape) + ", number of +1: " + str(numpy.sum(Y==1)) + ", -1: " + str(numpy.sum(Y==-1)))
                                
                tempLearner = learner.copy()
                logging.debug("Initial learner is " + str(tempLearner))
                idx = StratifiedKFold(Y, self.innerFolds)
                tempLearner.processes = self.numProcesses
                bestLearner, cvGrid = tempLearner.parallelModelSelect(X, Y, idx, paramDict)

                bestLearner = tempLearner.getBestLearner(cvGrid, paramDict, X, Y, idx, best="max")
                logging.debug("Best learner is " + str(bestLearner))
                
                bestLearner.learnModel(X, Y)
                weightVector = bestLearner.variableImportance(X, Y)   
                numpy.save(fileName, weightVector)
                logging.debug("Saved results as : " + fileName)
            finally: 
                filelock.unlock()
        else:
            logging.debug("File exists, or is locked: " + fileName)
Exemplo n.º 4
0
def saveStats(args):    
    i, theta = args 
    
    resultsFileName = outputDir + "SimStats" + str(i) + ".pkl"
    lock = FileLock(resultsFileName)
    
    if not lock.fileExists() and not lock.isLocked():    
        lock.lock()
         
        model = HIVModelUtils.createModel(targetGraph, startDate, endDate, recordStep, M, matchAlpha, breakSize, matchAlg, theta=thetaArray[i])
        times, infectedIndices, removedIndices, graph, compTimes, graphMetrics = HIVModelUtils.simulate(model)
        times = numpy.arange(startDate, endDate+1, recordStep)
        vertexArray, infectedIndices, removedIndices, contactGraphStats, removedGraphStats, finalRemovedDegrees = HIVModelUtils.generateStatistics(graph, times)
        stats = times, vertexArray, infectedIndices, removedGraphStats, finalRemovedDegrees, graphMetrics.objectives, compTimes
        
        Util.savePickle(stats, resultsFileName)
        lock.unlock()
    else: 
        logging.debug("Results already computed: " + str(resultsFileName))
Exemplo n.º 5
0
    def saveResult(self, X, Y, learner, paramDict, fileName):
        """
        Save a single result to file, checking if the results have already been computed
        """
        filelock = FileLock(fileName)
        gc.collect()

        if not filelock.isLocked() and not filelock.fileExists(): 
            filelock.lock()
            try: 
                logging.debug("Computing file " + fileName)
                logging.debug("Shape of examples: " + str(X.shape) + ", number of +1: " + str(numpy.sum(Y==1)) + ", -1: " + str(numpy.sum(Y==-1)))
                
                #idxFull = Sampling.crossValidation(self.outerFolds, X.shape[0])
                idxFull = StratifiedKFold(Y, self.outerFolds)
                errors = numpy.zeros(self.outerFolds)
                
                for i, (trainInds, testInds) in enumerate(idxFull): 
                    logging.debug("Outer fold: " + str(i))
                    
                    trainX, trainY = X[trainInds, :], Y[trainInds]
                    testX, testY = X[testInds, :], Y[testInds]
                    #idx = Sampling.crossValidation(self.innerFolds, trainX.shape[0])
                    idx = StratifiedKFold(trainY, self.innerFolds)
                    logging.debug("Initial learner is " + str(learner))
                    bestLearner, cvGrid = learner.parallelModelSelect(trainX, trainY, idx, paramDict)

                    bestLearner = learner.getBestLearner(cvGrid, paramDict, trainX, trainY, idx, best="max")
                    logging.debug("Best learner is " + str(bestLearner))
                    
                    bestLearner.learnModel(trainX, trainY)
                    predY = bestLearner.predict(testX)
                    errors[i] = Evaluator.auc(predY, testY)
                
                logging.debug("Mean auc: " + str(numpy.mean(errors)))
                numpy.save(fileName, errors)
                logging.debug("Saved results as : " + fileName)
            finally: 
                filelock.unlock()
        else:
            logging.debug("File exists, or is locked: " + fileName)
Exemplo n.º 6
0
def computeLearningRates(datasetNames, numProcesses, fileNameSuffix, learnerName, sampleSizes, foldsSet): 
    dataDir = PathDefaults.getDataDir() + "modelPenalisation/"
    outputDir = PathDefaults.getOutputDir() + "modelPenalisation/"

    learner, loadMethod, dataDir, outputDir, paramDict = getSetup(learnerName, dataDir, outputDir, numProcesses)
    
    for i in range(len(datasetNames)):
        logging.debug("Learning using dataset " + datasetNames[i][0])
        outfileName = outputDir + datasetNames[i][0] + fileNameSuffix

        fileLock = FileLock(outfileName + ".npz")
        if not fileLock.isLocked() and not fileLock.fileExists():
            fileLock.lock()
            
            numRealisations = datasetNames[i][1]  
            gridShape = [numRealisations, sampleSizes.shape[0]]
            gridShape.extend(list(learner.gridShape(paramDict)))   
            gridShape = tuple(gridShape)            
            
            betaGrids = numpy.zeros(gridShape) 
            
            for k in range(sampleSizes.shape[0]):
                sampleSize = sampleSizes[k]
                
                logging.debug("Using sample size " + str(sampleSize))
                for j in range(numRealisations):
                        Util.printIteration(j, 1, numRealisations, "Realisation: ")
                        trainX, trainY, testX, testY = loadMethod(dataDir, datasetNames[i][0], j)
                        
                        numpy.random.seed(21)
                        trainInds = numpy.random.permutation(trainX.shape[0])[0:sampleSize]
                        validX = trainX[trainInds,:]
                        validY = trainY[trainInds]
                        
                        betaGrids[j, k, :] = learner.learningRate(validX, validY, foldsSet, paramDict)
            
            numpy.savez(outfileName, betaGrids)
            logging.debug("Saved results as file " + outfileName + ".npz")
            fileLock.unlock()
Exemplo n.º 7
0
 def testLock(self):
     fileLock = FileLock(self.fileName)
     fileLock.lock()
Exemplo n.º 8
0
def runToyExp(datasetNames, sampleSizes, foldsSet, cvScalings, sampleMethods, numProcesses, fileNameSuffix):
    dataDir = PathDefaults.getDataDir() + "modelPenalisation/toy/"
    outputDir = PathDefaults.getOutputDir() + "modelPenalisation/"

    svm = LibSVM()
    numCs = svm.getCs().shape[0]
    numGammas = svm.getGammas().shape[0]
    numMethods = 1 + (1 + cvScalings.shape[0])
    numParams = 2

    runIdeal = True
    runCv = True
    runVfpen = True

    for i in range(len(datasetNames)):
        datasetName = datasetNames[i][0]
        numRealisations = datasetNames[i][1]
        logging.debug("Learning using dataset " + datasetName)

        for s in range(len(sampleMethods)):
            sampleMethod = sampleMethods[s][1]
            outfileName = outputDir + datasetName + sampleMethods[s][0] + fileNameSuffix

            fileLock = FileLock(outfileName + ".npz")
            if not fileLock.isLocked() and not fileLock.fileExists():
                fileLock.lock()
                errors = numpy.zeros((numRealisations, len(sampleSizes), foldsSet.shape[0], numMethods))
                params = numpy.zeros((numRealisations, len(sampleSizes), foldsSet.shape[0], numMethods, numParams))
                errorGrids = numpy.zeros(
                    (numRealisations, len(sampleSizes), foldsSet.shape[0], numMethods, numCs, numGammas)
                )
                approxGrids = numpy.zeros(
                    (numRealisations, len(sampleSizes), foldsSet.shape[0], numMethods, numCs, numGammas)
                )
                idealGrids = numpy.zeros((numRealisations, len(sampleSizes), foldsSet.shape[0], numCs, numGammas))

                data = numpy.load(dataDir + datasetName + ".npz")
                gridPoints, trainX, trainY, pdfX, pdfY1X, pdfYminus1X = (
                    data["arr_0"],
                    data["arr_1"],
                    data["arr_2"],
                    data["arr_3"],
                    data["arr_4"],
                    data["arr_5"],
                )

                # We form a test set from the grid points
                testX = numpy.zeros((gridPoints.shape[0] ** 2, 2))
                for m in range(gridPoints.shape[0]):
                    testX[m * gridPoints.shape[0] : (m + 1) * gridPoints.shape[0], 0] = gridPoints
                    testX[m * gridPoints.shape[0] : (m + 1) * gridPoints.shape[0], 1] = gridPoints[m]

                for j in range(numRealisations):
                    Util.printIteration(j, 1, numRealisations, "Realisation: ")

                    for k in range(sampleSizes.shape[0]):
                        sampleSize = sampleSizes[k]
                        for m in range(foldsSet.shape[0]):
                            folds = foldsSet[m]
                            logging.debug("Using sample size " + str(sampleSize) + " and " + str(folds) + " folds")
                            perm = numpy.random.permutation(trainX.shape[0])
                            trainInds = perm[0:sampleSize]
                            validX = trainX[trainInds, :]
                            validY = trainY[trainInds]

                            svm = LibSVM(processes=numProcesses)
                            # Find ideal penalties
                            if runIdeal:
                                logging.debug("Finding ideal grid of penalties")
                                idealGrids[j, k, m, :, :] = parallelPenaltyGridRbf(
                                    svm, validX, validY, testX, gridPoints, pdfX, pdfY1X, pdfYminus1X
                                )

                            # Cross validation
                            if runCv:
                                logging.debug("Running V-fold cross validation")
                                methodInd = 0
                                idx = sampleMethod(folds, validY.shape[0])
                                if sampleMethod == Sampling.bootstrap:
                                    bootstrap = True
                                else:
                                    bootstrap = False

                                bestSVM, cvGrid = svm.parallelVfcvRbf(validX, validY, idx, True, bootstrap)
                                predY, decisionsY = bestSVM.predict(testX, True)
                                decisionGrid = numpy.reshape(
                                    decisionsY, (gridPoints.shape[0], gridPoints.shape[0]), order="F"
                                )
                                errors[j, k, m, methodInd] = ModelSelectUtils.bayesError(
                                    gridPoints, decisionGrid, pdfX, pdfY1X, pdfYminus1X
                                )
                                params[j, k, m, methodInd, :] = numpy.array([bestSVM.getC(), bestSVM.getKernelParams()])
                                errorGrids[j, k, m, methodInd, :, :] = cvGrid

                            # v fold penalisation
                            if runVfpen:
                                logging.debug("Running penalisation")
                                # BIC penalisation
                                Cv = float((folds - 1) * numpy.log(validX.shape[0]) / 2)
                                tempCvScalings = cvScalings * (folds - 1)
                                tempCvScalings = numpy.insert(tempCvScalings, 0, Cv)

                                # Use cross validation
                                idx = sampleMethod(folds, validY.shape[0])
                                svmGridResults = svm.parallelVfPenRbf(validX, validY, idx, tempCvScalings)

                                for n in range(len(tempCvScalings)):
                                    bestSVM, trainErrors, approxGrid = svmGridResults[n]
                                    methodInd = n + 1
                                    predY, decisionsY = bestSVM.predict(testX, True)
                                    decisionGrid = numpy.reshape(
                                        decisionsY, (gridPoints.shape[0], gridPoints.shape[0]), order="F"
                                    )
                                    errors[j, k, m, methodInd] = ModelSelectUtils.bayesError(
                                        gridPoints, decisionGrid, pdfX, pdfY1X, pdfYminus1X
                                    )
                                    params[j, k, m, methodInd, :] = numpy.array(
                                        [bestSVM.getC(), bestSVM.getKernelParams()]
                                    )
                                    errorGrids[j, k, m, methodInd, :, :] = trainErrors + approxGrid
                                    approxGrids[j, k, m, methodInd, :, :] = approxGrid

                meanErrors = numpy.mean(errors, 0)
                print(meanErrors)

                meanParams = numpy.mean(params, 0)
                print(meanParams)

                meanErrorGrids = numpy.mean(errorGrids, 0)
                stdErrorGrids = numpy.std(errorGrids, 0)

                meanIdealGrids = numpy.mean(idealGrids, 0)
                stdIdealGrids = numpy.std(idealGrids, 0)

                meanApproxGrids = numpy.mean(approxGrids, 0)
                stdApproxGrids = numpy.std(approxGrids, 0)

                numpy.savez(
                    outfileName,
                    errors,
                    params,
                    meanErrorGrids,
                    stdErrorGrids,
                    meanIdealGrids,
                    stdIdealGrids,
                    meanApproxGrids,
                    stdApproxGrids,
                )
                logging.debug("Saved results as file " + outfileName + ".npz")
                fileLock.unlock()
            else:
                logging.debug("Results already computed")

    logging.debug("All done!")
Exemplo n.º 9
0
def runBenchmarkExp(datasetNames, sampleSizes, foldsSet, cvScalings, sampleMethods, numProcesses, fileNameSuffix, learnerName, betaNameSuffix):
    dataDir = PathDefaults.getDataDir() + "modelPenalisation/"
    outputDir = PathDefaults.getOutputDir() + "modelPenalisation/"

    learner, loadMethod, dataDir, outputDir, paramDict = getSetup(learnerName, dataDir, outputDir, numProcesses)
    numParams = len(paramDict.keys())
    numMethods = 1 + (cvScalings.shape[0] + 1)
    
    runCv = True
    runVfpen = True

    for i in range(len(datasetNames)):
        datasetName = datasetNames[i][0]
        numRealisations = datasetNames[i][1]
        logging.debug("Learning using dataset " + datasetName)
        
        #Load learning rates for penalisation 
        betafileName = outputDir + datasetNames[i][0] + betaNameSuffix + ".npz"
        betaGrids = numpy.load(betafileName)["arr_0"]
        betaGrids = numpy.clip(betaGrids, 0, 1)    

        for s in range(len(sampleMethods)):
            sampleMethod = sampleMethods[s][1]
            outfileName = outputDir + datasetName + sampleMethods[s][0] + fileNameSuffix

            fileLock = FileLock(outfileName + ".npz")
            if not fileLock.isLocked() and not fileLock.fileExists():
                fileLock.lock()
                errors = numpy.zeros((numRealisations, len(sampleSizes), foldsSet.shape[0], numMethods))
                params = numpy.zeros((numRealisations, len(sampleSizes), foldsSet.shape[0], numMethods, numParams))
                
                errorShape = [numRealisations, len(sampleSizes), foldsSet.shape[0] ,numMethods]
                errorShape.extend(list(learner.gridShape(paramDict))) 
                errorShape = tuple(errorShape)
                
                gridShape = [numRealisations, len(sampleSizes), foldsSet.shape[0] ,numMethods]
                gridShape.extend(list(learner.gridShape(paramDict)))   
                gridShape = tuple(gridShape)
                
                idealErrorShape = [numRealisations, len(sampleSizes)]
                idealErrorShape.extend(list(learner.gridShape(paramDict)))   
                idealErrorShape = tuple(idealErrorShape)
                
                errorGrids = numpy.zeros(errorShape)
                approxGrids = numpy.zeros(errorShape)

                for j in range(numRealisations):
                    Util.printIteration(j, 1, numRealisations, "Realisation: ")
                    trainX, trainY, testX, testY = loadMethod(dataDir, datasetName, j)
                  
                    for k in range(sampleSizes.shape[0]):
                        sampleSize = sampleSizes[k]
                        for m in range(foldsSet.shape[0]):
                            if foldsSet[m] < sampleSize: 
                                folds = foldsSet[m]
                            else: 
                                folds = sampleSize 
                            logging.debug("Using sample size " + str(sampleSize) + " and " + str(folds) + " folds")
                            numpy.random.seed(21)
                            trainInds = numpy.random.permutation(trainX.shape[0])[0:sampleSize]
                            validX = trainX[trainInds,:]
                            validY = trainY[trainInds]

                            #Cross validation
                            if runCv:
                                logging.debug("Running simple sampling using " + str(sampleMethod))
                                methodInd = 0
                                idx = sampleMethod(folds, validY.shape[0])
                                bestLearner, cvGrid = learner.parallelModelSelect(validX, validY, idx, paramDict)
                                predY = bestLearner.predict(testX)
                                errors[j, k, m, methodInd] = bestLearner.getMetricMethod()(testY, predY)
                                params[j, k, m, methodInd, :] = bestLearner.getParamsArray(paramDict)
                                errorGrids[j, k, m, methodInd, :] = cvGrid

                            #v fold penalisation
                            if runVfpen:
                                logging.debug("Running penalisation using " + str(sampleMethod))
                                #Corrected penalisation give by using learning rate 

                                tempCvScalings = list(cvScalings * (folds-1))
                                tempCvScalings.insert(0, betaGrids[j, k, :]) 
                                
                                idx = sampleMethod(folds, validY.shape[0])
                                learnerGridResults = learner.parallelPen(validX, validY, idx, paramDict, tempCvScalings)

                                for n in range(len(tempCvScalings)):
                                    bestLearner, trainErrors, approxGrid = learnerGridResults[n]
                                    predY = bestLearner.predict(testX)
                                    methodInd = n + 1
                                    errors[j, k, m, methodInd] = bestLearner.getMetricMethod()(testY, predY)
                                    params[j, k, m, methodInd, :] = bestLearner.getParamsArray(paramDict)
                                    errorGrids[j, k, m, methodInd, :] = trainErrors + approxGrid
                                    approxGrids[j, k, m, methodInd, :] = approxGrid
                        
                meanErrors = numpy.mean(errors, 0)
                print(meanErrors)

                meanParams = numpy.mean(params, 0)
                print(meanParams)

                #When using CART trees the penalty can be inf in which case std is undefined
                #In this case we set to zero any infinite values 
                meanErrorGrids = numpy.mean(errorGrids, 0)
                try: 
                    stdErrorGrids = numpy.std(errorGrids, 0)
                except FloatingPointError:
                    errorGrids[numpy.isinf(errorGrids)] = 0 
                    stdErrorGrids = numpy.std(errorGrids, 0)
            
                meanApproxGrids = numpy.mean(approxGrids, 0) 
                try: 
                    stdApproxGrids = numpy.std(approxGrids, 0)
                except FloatingPointError:
                    approxGrids[numpy.isinf(approxGrids)] = 0 
                    stdApproxGrids = numpy.std(approxGrids, 0)

                numpy.savez(outfileName, errors, params, meanErrorGrids, stdErrorGrids, meanApproxGrids, stdApproxGrids)
                logging.debug("Saved results as file " + outfileName + ".npz")
                fileLock.unlock()
            else:
                logging.debug("Results already computed")
            
    logging.debug("All done!")
Exemplo n.º 10
0
def findErrorGrid(datasetNames, numProcesses, fileNameSuffix, learnerName, sampleSizes): 
    dataDir = PathDefaults.getDataDir() + "modelPenalisation/"
    outputDir = PathDefaults.getOutputDir() + "modelPenalisation/"

    learner, loadMethod, dataDir, outputDir, paramDict = getSetup(learnerName, dataDir, outputDir, numProcesses)
    
    numParams = len(paramDict.keys())    
    
    runIdeal = True 
    runTest = True 
    
    for i in range(len(datasetNames)):
        logging.debug("Learning using dataset " + datasetNames[i][0])
        outfileName = outputDir + datasetNames[i][0] + fileNameSuffix
    
        fileLock = FileLock(outfileName + ".npz")
        if not fileLock.isLocked() and not fileLock.fileExists():
            fileLock.lock()
            
            numRealisations = datasetNames[i][1]            
            
            gridShape = [numRealisations, sampleSizes.shape[0]]
            gridShape.extend(list(learner.gridShape(paramDict)))   
            gridShape = tuple(gridShape)   
            
            idealPenGrids = numpy.zeros(gridShape)
            idealErrorGrids = numpy.zeros(gridShape)
            idealErrors = numpy.zeros((numRealisations, sampleSizes.shape[0]))
                
            params = numpy.zeros((numRealisations, len(sampleSizes), numParams))    
            
            for k in range(sampleSizes.shape[0]):
                sampleSize = sampleSizes[k]
                
                logging.debug("Using sample size " + str(sampleSize))
                for j in range(numRealisations):
                        Util.printIteration(j, 1, numRealisations, "Realisation: ")
                        trainX, trainY, testX, testY = loadMethod(dataDir, datasetNames[i][0], j)
                        
                        numpy.random.seed(21)
                        trainInds = numpy.random.permutation(trainX.shape[0])[0:sampleSize]
                        validX = trainX[trainInds,:]
                        validY = trainY[trainInds]
                        
                        #Find ideal penalties
                        if runIdeal:
                            logging.debug("Finding ideal grid of penalties")
                            idealPenGrids[j, k, :] = learner.parallelPenaltyGrid(validX, validY, testX, testY, paramDict)                        
                        
                        #Find ideal model using the test set 
                        if runTest: 
                            logging.debug("Running test set sampling") 
                            cvGrid = learner.parallelSplitGrid(validX, validY, testX, testY, paramDict)
                            bestLearner = learner.getBestLearner(cvGrid, paramDict, validX, validY)
                            predY = bestLearner.predict(testX)
                            idealErrors[j, k] = bestLearner.getMetricMethod()(testY, predY)
                            params[j, k, :] = bestLearner.getParamsArray(paramDict)
                            idealErrorGrids[j, k, :] = cvGrid
            
            meanIdealPenGrids = idealPenGrids.mean(0)
            stdIdealPenGrids = idealPenGrids.std(0)
            
            meanIdealErrorGrids = idealErrorGrids.mean(0)
            stdIdealErrorGrids = idealErrorGrids.std(0)
            
            numpy.savez(outfileName, idealErrors, params, meanIdealErrorGrids, stdIdealErrorGrids, meanIdealPenGrids, stdIdealPenGrids)
            logging.debug("Saved results as file " + outfileName + ".npz")
            fileLock.unlock()
        else:
            logging.debug("Results already computed")
Exemplo n.º 11
0
    def runExperiment(self):
        """
        Run the selected clustering experiments and save results
        """
        if self.algoArgs.runSoftImpute:
            logging.debug("Running soft impute")
            
            for svdAlg in self.algoArgs.svdAlgs: 
                if svdAlg == "rsvd" or svdAlg == "rsvdUpdate" or svdAlg == "rsvdUpdate2": 
                    resultsFileName = self.resultsDir + "ResultsSoftImpute_alg=" + svdAlg + "_p=" + str(self.algoArgs.p)+ "_q=" + str(self.algoArgs.q) + "_updateAlg=" + self.algoArgs.updateAlg + ".npz"
                else: 
                    resultsFileName = self.resultsDir + "ResultsSoftImpute_alg=" + svdAlg  + "_updateAlg=" + self.algoArgs.updateAlg + ".npz"
                    
                fileLock = FileLock(resultsFileName)  
                
                if not fileLock.isLocked() and not fileLock.fileExists(): 
                    fileLock.lock()
                    
                    try: 
                        learner = IterativeSoftImpute(svdAlg=svdAlg, logStep=self.logStep, kmax=self.algoArgs.kmax, postProcess=self.algoArgs.postProcess, weighted=self.algoArgs.weighted, p=self.algoArgs.p, q=self.algoArgs.q, verbose=self.algoArgs.verbose, updateAlg=self.algoArgs.updateAlg)
                        
                        if self.algoArgs.modelSelect: 
                            trainIterator = self.getTrainIterator()
                            #Let's find the optimal lambda using the first matrix 
                            X = trainIterator.next() 
                            
                            logging.debug("Performing model selection, taking subsample of entries of size " + str(self.sampleSize))
                            X = SparseUtils.submatrix(X, self.sampleSize)
                            
                            cvInds = Sampling.randCrossValidation(self.algoArgs.folds, X.nnz)
                            meanErrors, stdErrors = learner.modelSelect(X, self.algoArgs.rhos, self.algoArgs.ks, cvInds)
                            
                            logging.debug("Mean errors = " + str(meanErrors))
                            logging.debug("Std errors = " + str(stdErrors))
                            
                            modelSelectFileName = resultsFileName.replace("Results", "ModelSelect") 
                            numpy.savez(modelSelectFileName, meanErrors, stdErrors)
                            logging.debug("Saved model selection grid as " + modelSelectFileName)                            
                            
                            rho = self.algoArgs.rhos[numpy.unravel_index(numpy.argmin(meanErrors), meanErrors.shape)[0]]
                            k = self.algoArgs.ks[numpy.unravel_index(numpy.argmin(meanErrors), meanErrors.shape)[1]]
                        else: 
                            rho = self.algoArgs.rhos[0]
                            k = self.algoArgs.ks[0]
                            
                        learner.setK(k)  
                        learner.setRho(rho)   
                        logging.debug(learner)                
                        trainIterator = self.getTrainIterator()
                        ZIter = learner.learnModel(trainIterator)
                        
                        self.recordResults(ZIter, learner, resultsFileName)
                    finally: 
                        fileLock.unlock()
                else: 
                    logging.debug("File is locked or already computed: " + resultsFileName)
                
                
        if self.algoArgs.runSgdMf:
            logging.debug("Running SGD MF")
            
            resultsFileName = self.resultsDir + "ResultsSgdMf.npz"
            fileLock = FileLock(resultsFileName)  
            
            if not fileLock.isLocked() and not fileLock.fileExists(): 
                fileLock.lock()
                
                try: 
                    learner = IterativeSGDNorm2Reg(k=self.algoArgs.ks[0], lmbda=self.algoArgs.lmbdas[0], gamma=self.algoArgs.gammas[0], eps=self.algoArgs.eps)               

                    if self.algoArgs.modelSelect:
                        # Let's find optimal parameters using the first matrix 
                        learner.modelSelect(self.getTrainIterator().next(), self.algoArgs.ks, self.algoArgs.lmbdas, self.algoArgs.gammas, self.algoArgs.folds)
                        trainIterator = self.getTrainIterator()

                    trainIterator = self.getTrainIterator()
                    ZIter = learner.learnModel(trainIterator)
                    
                    self.recordResults(ZIter, learner, resultsFileName)
                finally: 
                    fileLock.unlock()
            else: 
                logging.debug("File is locked or already computed: " + resultsFileName)            
            
        logging.info("All done: see you around!")
Exemplo n.º 12
0
 def testLock(self):
     fileLock = FileLock(self.fileName)
     fileLock.lock()