Exemplo n.º 1
0
    def testPostProcess(self): 
        lmbda = 0.0 
        eps = 0.1 
        k = 20
        
        matrixIterator = iter(self.matrixList)
        iterativeSoftImpute = IterativeSoftImpute(lmbda, k=k, eps=eps, svdAlg="rsvd", postProcess=True)
        ZList = iterativeSoftImpute.learnModel(matrixIterator)
        
        for i, Z in enumerate(ZList):
            U, s, V = Z
            Xhat = (U*s).dot(V.T)
            
            nptst.assert_array_almost_equal(Xhat, numpy.array(self.matrixList[i].todense()))
        
        #Try case with iterativeSoftImpute.postProcessSamples < X.nnz 
        matrixIterator = iter(self.matrixList)
        iterativeSoftImpute.postProcessSamples = int(self.matrixList[0].nnz/2)
        
        ZList = iterativeSoftImpute.learnModel(matrixIterator)
        for i, Z in enumerate(ZList):
            U, s, V = Z
            Xhat = (U*s).dot(V.T)
            
            nptst.assert_array_almost_equal(Xhat, self.matrixList[i].todense(), 2)

        #Try for larger lambda 
        iterativeSoftImpute.setRho(0.2)
        ZList = iterativeSoftImpute.learnModel(matrixIterator)
        for i, Z in enumerate(ZList):
            U, s, V = Z
            Xhat = (U*s).dot(V.T)
Exemplo n.º 2
0
    def testModelSelect2(self): 
        rho = 0.1
        shape = (20, 20) 
        r = 20 
        numInds = 100
        noise = 0.2
        X = ExpSU.SparseUtils.generateSparseLowRank(shape, r, numInds, noise)
        X = X.tocsc()
        
        U, s, V = numpy.linalg.svd(X.todense())

        k = 15

        iterativeSoftImpute = IterativeSoftImpute(rho, k=None, svdAlg="propack", updateAlg="initial")
        rhos = numpy.linspace(0.5, 0.001, 5)
        ks = numpy.array([5, 10, 15], numpy.int)
        folds = 3
        
        cvInds = [] 
        for i in range(folds): 
            cvInds.append((numpy.arange(X.nnz), numpy.arange(X.nnz)))
        
        meanTestErrors, stdTestErrors = iterativeSoftImpute.modelSelect(X, rhos, ks, cvInds)
       
        self.assertAlmostEquals(numpy.linalg.norm(stdTestErrors), 0, 3)
        
        meanTestErrors2 = numpy.zeros((rhos.shape[0], ks.shape[0]))        
        
        #Now compute errors manually 
        for j, k in enumerate(ks): 
            iterativeSoftImpute.setK(k)
            for i, rho in enumerate(rhos): 
                iterativeSoftImpute.setRho(rho)
                ZIter = iterativeSoftImpute.learnModel(iter([X]))
                indList = [X.nonzero()]
                outIterator = iterativeSoftImpute.predict(ZIter, indList)
                Xhat = outIterator.next()
    
                meanTestErrors2[i, j] = MCEvaluator.rootMeanSqError(X, Xhat)

        nptst.assert_array_almost_equal(meanTestErrors, meanTestErrors2, 2)
Exemplo n.º 3
0
    def runExperiment(self):
        """
        Run the selected clustering experiments and save results
        """
        if self.algoArgs.runSoftImpute:
            logging.debug("Running soft impute")
            
            for svdAlg in self.algoArgs.svdAlgs: 
                if svdAlg == "rsvd" or svdAlg == "rsvdUpdate" or svdAlg == "rsvdUpdate2": 
                    resultsFileName = self.resultsDir + "ResultsSoftImpute_alg=" + svdAlg + "_p=" + str(self.algoArgs.p)+ "_q=" + str(self.algoArgs.q) + "_updateAlg=" + self.algoArgs.updateAlg + ".npz"
                else: 
                    resultsFileName = self.resultsDir + "ResultsSoftImpute_alg=" + svdAlg  + "_updateAlg=" + self.algoArgs.updateAlg + ".npz"
                    
                fileLock = FileLock(resultsFileName)  
                
                if not fileLock.isLocked() and not fileLock.fileExists(): 
                    fileLock.lock()
                    
                    try: 
                        learner = IterativeSoftImpute(svdAlg=svdAlg, logStep=self.logStep, kmax=self.algoArgs.kmax, postProcess=self.algoArgs.postProcess, weighted=self.algoArgs.weighted, p=self.algoArgs.p, q=self.algoArgs.q, verbose=self.algoArgs.verbose, updateAlg=self.algoArgs.updateAlg)
                        
                        if self.algoArgs.modelSelect: 
                            trainIterator = self.getTrainIterator()
                            #Let's find the optimal lambda using the first matrix 
                            X = trainIterator.next() 
                            
                            logging.debug("Performing model selection, taking subsample of entries of size " + str(self.sampleSize))
                            X = SparseUtils.submatrix(X, self.sampleSize)
                            
                            cvInds = Sampling.randCrossValidation(self.algoArgs.folds, X.nnz)
                            meanErrors, stdErrors = learner.modelSelect(X, self.algoArgs.rhos, self.algoArgs.ks, cvInds)
                            
                            logging.debug("Mean errors = " + str(meanErrors))
                            logging.debug("Std errors = " + str(stdErrors))
                            
                            modelSelectFileName = resultsFileName.replace("Results", "ModelSelect") 
                            numpy.savez(modelSelectFileName, meanErrors, stdErrors)
                            logging.debug("Saved model selection grid as " + modelSelectFileName)                            
                            
                            rho = self.algoArgs.rhos[numpy.unravel_index(numpy.argmin(meanErrors), meanErrors.shape)[0]]
                            k = self.algoArgs.ks[numpy.unravel_index(numpy.argmin(meanErrors), meanErrors.shape)[1]]
                        else: 
                            rho = self.algoArgs.rhos[0]
                            k = self.algoArgs.ks[0]
                            
                        learner.setK(k)  
                        learner.setRho(rho)   
                        logging.debug(learner)                
                        trainIterator = self.getTrainIterator()
                        ZIter = learner.learnModel(trainIterator)
                        
                        self.recordResults(ZIter, learner, resultsFileName)
                    finally: 
                        fileLock.unlock()
                else: 
                    logging.debug("File is locked or already computed: " + resultsFileName)
                
                
        if self.algoArgs.runSgdMf:
            logging.debug("Running SGD MF")
            
            resultsFileName = self.resultsDir + "ResultsSgdMf.npz"
            fileLock = FileLock(resultsFileName)  
            
            if not fileLock.isLocked() and not fileLock.fileExists(): 
                fileLock.lock()
                
                try: 
                    learner = IterativeSGDNorm2Reg(k=self.algoArgs.ks[0], lmbda=self.algoArgs.lmbdas[0], gamma=self.algoArgs.gammas[0], eps=self.algoArgs.eps)               

                    if self.algoArgs.modelSelect:
                        # Let's find optimal parameters using the first matrix 
                        learner.modelSelect(self.getTrainIterator().next(), self.algoArgs.ks, self.algoArgs.lmbdas, self.algoArgs.gammas, self.algoArgs.folds)
                        trainIterator = self.getTrainIterator()

                    trainIterator = self.getTrainIterator()
                    ZIter = learner.learnModel(trainIterator)
                    
                    self.recordResults(ZIter, learner, resultsFileName)
                finally: 
                    fileLock.unlock()
            else: 
                logging.debug("File is locked or already computed: " + resultsFileName)            
            
        logging.info("All done: see you around!")