Exemplo n.º 1
0
    def testCentreRows(self): 
        shape = (50, 10)
        r = 5 
        k = 100 

        X, U, s, V = SparseUtils.generateSparseLowRank(shape, r, k, verbose=True)   
        rowInds, colInds = X.nonzero()
        
        for i in range(rowInds.shape[0]): 
            self.assertEquals(X[rowInds[i], colInds[i]], numpy.array(X[X.nonzero()]).ravel()[i])
        
        mu2 = numpy.array(X.sum(1)).ravel()
        numNnz = numpy.zeros(X.shape[0])
        
        for i in range(X.shape[0]): 
            for j in range(X.shape[1]):     
                if X[i,j]!=0:                 
                    numNnz[i] += 1
                    
        mu2 /= numNnz 
        mu2[numNnz==0] = 0
        
        X, mu = SparseUtils.centerRows(X)      
        nptst.assert_array_almost_equal(numpy.array(X.mean(1)).ravel(), numpy.zeros(X.shape[0]))
        nptst.assert_array_almost_equal(mu, mu2)
Exemplo n.º 2
0
    def testSvdSoft(self): 
        A = scipy.sparse.rand(10, 10, 0.2)
        A = A.tocsc()
        
        lmbda = 0.2
        U, s, V = SparseUtils.svdSoft(A, lmbda)
        ATilde = U.dot(numpy.diag(s)).dot(V.T)     
        
        #Now compute the same matrix using numpy
        A = A.todense() 
        
        U2, s2, V2 = numpy.linalg.svd(A)
        inds = numpy.flipud(numpy.argsort(s2))
        inds = inds[s2[inds] > lmbda]
        U2, s2, V2 = Util.indSvd(U2, s2, V2, inds) 
        
        s2 = s2 - lmbda 
        s2 = numpy.clip(s, 0, numpy.max(s2)) 

        ATilde2 = U2.dot(numpy.diag(s2)).dot(V2.T)
        
        nptst.assert_array_almost_equal(s, s)
        nptst.assert_array_almost_equal(ATilde, ATilde2)
        
        #Now run svdSoft with a numpy array 
        U3, s3, V3 = SparseUtils.svdSoft(A, lmbda)
        ATilde3 = U.dot(numpy.diag(s)).dot(V.T)  
        
        nptst.assert_array_almost_equal(s, s3)
        nptst.assert_array_almost_equal(ATilde3, ATilde2)
Exemplo n.º 3
0
    def testMatrixApprox(self):
        tol = 10**-6
        A = numpy.random.rand(10, 10)
        A = A.dot(A.T)

        n = 5
        inds = numpy.sort(numpy.random.permutation(A.shape[0])[0:n])
        AHat = Nystrom.matrixApprox(A, inds)

        n = 10
        AHat2 = Nystrom.matrixApprox(A, n)
        self.assertTrue(
            numpy.linalg.norm(A - AHat2) < numpy.linalg.norm(A - AHat))
        self.assertTrue(numpy.linalg.norm(A - AHat2) < tol)

        #Test on a sparse matrix
        As = scipy.sparse.csr_matrix(A)
        n = 5
        inds = numpy.sort(numpy.random.permutation(A.shape[0])[0:n])
        AHat = Nystrom.matrixApprox(As, inds)

        n = 10
        AHat2 = Nystrom.matrixApprox(As, n)
        self.assertTrue(
            SparseUtils.norm(As - AHat2) < SparseUtils.norm(As - AHat))
        self.assertTrue(SparseUtils.norm(As - AHat2) < tol)

        #Compare dense and sparse solutions
        for n in range(1, 9):
            inds = numpy.sort(numpy.random.permutation(A.shape[0])[0:n])
            AHats = Nystrom.matrixApprox(As, inds)
            AHat = Nystrom.matrixApprox(A, inds)

            self.assertTrue(
                numpy.linalg.norm(AHat - numpy.array(AHats.todense())) < tol)
Exemplo n.º 4
0
    def testSampleUsers(self): 
        m = 10
        n = 15
        r = 5 
        u = 0.3
        w = 1-u
        X, U, s, V, wv = SparseUtils.generateSparseBinaryMatrix((m,n), r, w, csarray=True, verbose=True, indsPerRow=200)

        k = 50
        X2, userInds = Sampling.sampleUsers(X, k)

        nptst.assert_array_equal(X.toarray(), X2.toarray())
        
        numRuns = 50
        for i in range(numRuns): 
            m = numpy.random.randint(10, 100)
            n = numpy.random.randint(10, 100)
            k = numpy.random.randint(10, 100)

            X, U, s, V, wv = SparseUtils.generateSparseBinaryMatrix((m,n), r, w, csarray=True, verbose=True, indsPerRow=200)

            X2, userInds = Sampling.sampleUsers(X, k)
            
            self.assertEquals(X2.shape[0], min(k, m))
            self.assertTrue((X.dot(X.T)!=numpy.zeros((m, m)).all()))
            self.assertTrue((X2.toarray() == X.toarray()[userInds, :]).all())
            self.assertEquals(X.toarray()[userInds, :].nonzero()[0].shape[0], X2.nnz)
Exemplo n.º 5
0
    def testLocalAucApprox(self):
        m = 100
        n = 200
        k = 2
        X, U, s, V, wv = SparseUtils.generateSparseBinaryMatrix((m, n), k, csarray=True, verbose=True)

        w = 1.0
        localAuc = MCEvaluator.localAUC(X, U, V, w)

        samples = numpy.arange(150, 200, 10)

        for i, sampleSize in enumerate(samples):
            numAucSamples = sampleSize
            localAuc2 = MCEvaluator.localAUCApprox(SparseUtils.getOmegaListPtr(X), U, V, w, numAucSamples)
            self.assertAlmostEqual(localAuc2, localAuc, 1)

        # Try smaller w
        w = 0.5
        localAuc = MCEvaluator.localAUC(X, U, V, w)

        samples = numpy.arange(50, 200, 10)

        for i, sampleSize in enumerate(samples):
            numAucSamples = sampleSize
            localAuc2 = MCEvaluator.localAUCApprox(SparseUtils.getOmegaListPtr(X), U, V, w, numAucSamples)

            self.assertAlmostEqual(localAuc2, localAuc, 1)
Exemplo n.º 6
0
    def testMatrixApprox(self):
        tol = 10**-6 
        A = numpy.random.rand(10, 10)
        A = A.dot(A.T)

        n = 5
        inds = numpy.sort(numpy.random.permutation(A.shape[0])[0:n])
        AHat = Nystrom.matrixApprox(A, inds)

        n = 10
        AHat2 = Nystrom.matrixApprox(A, n)
        self.assertTrue(numpy.linalg.norm(A - AHat2) < numpy.linalg.norm(A - AHat))
        self.assertTrue(numpy.linalg.norm(A - AHat2) < tol)

        #Test on a sparse matrix
        As = scipy.sparse.csr_matrix(A)
        n = 5
        inds = numpy.sort(numpy.random.permutation(A.shape[0])[0:n])
        AHat = Nystrom.matrixApprox(As, inds)

        n = 10
        AHat2 = Nystrom.matrixApprox(As, n)
        self.assertTrue(SparseUtils.norm(As - AHat2) < SparseUtils.norm(As - AHat))
        self.assertTrue(SparseUtils.norm(As - AHat2) < tol)

        #Compare dense and sparse solutions
        for n in range(1, 9):
            inds = numpy.sort(numpy.random.permutation(A.shape[0])[0:n])
            AHats = Nystrom.matrixApprox(As, inds)
            AHat = Nystrom.matrixApprox(A, inds)

            self.assertTrue(numpy.linalg.norm(AHat - numpy.array(AHats.todense())) < tol)
Exemplo n.º 7
0
    def testSvdSoft(self):
        A = scipy.sparse.rand(10, 10, 0.2)
        A = A.tocsc()

        lmbda = 0.2
        U, s, V = SparseUtils.svdSoft(A, lmbda)
        ATilde = U.dot(numpy.diag(s)).dot(V.T)

        #Now compute the same matrix using numpy
        A = A.todense()

        U2, s2, V2 = numpy.linalg.svd(A)
        inds = numpy.flipud(numpy.argsort(s2))
        inds = inds[s2[inds] > lmbda]
        U2, s2, V2 = Util.indSvd(U2, s2, V2, inds)

        s2 = s2 - lmbda
        s2 = numpy.clip(s, 0, numpy.max(s2))

        ATilde2 = U2.dot(numpy.diag(s2)).dot(V2.T)

        nptst.assert_array_almost_equal(s, s)
        nptst.assert_array_almost_equal(ATilde, ATilde2)

        #Now run svdSoft with a numpy array
        U3, s3, V3 = SparseUtils.svdSoft(A, lmbda)
        ATilde3 = U.dot(numpy.diag(s)).dot(V.T)

        nptst.assert_array_almost_equal(s, s3)
        nptst.assert_array_almost_equal(ATilde3, ATilde2)
Exemplo n.º 8
0
 def testSplitNnz(self): 
     numRuns = 100 
     import sppy 
     
     for i in range(numRuns): 
         m = numpy.random.randint(5, 50)
         n = numpy.random.randint(5, 50)  
         X = scipy.sparse.rand(m, n, 0.5)
         X = X.tocsc()
         
         split = numpy.random.rand()
         X1, X2 = SparseUtils.splitNnz(X, split)
         
         nptst.assert_array_almost_equal((X1+X2).todense(), X.todense()) 
         
     for i in range(numRuns): 
         m = numpy.random.randint(5, 50)
         n = numpy.random.randint(5, 50)  
         X = scipy.sparse.rand(m, n, 0.5)
         X = X.tocsc()
         
         X = sppy.csarray(X)
         
         split = numpy.random.rand()
         X1, X2 = SparseUtils.splitNnz(X, split)
         
         nptst.assert_array_almost_equal((X1+X2).toarray(), X.toarray()) 
Exemplo n.º 9
0
    def testSplitNnz(self):
        numRuns = 100
        import sppy

        for i in range(numRuns):
            m = numpy.random.randint(5, 50)
            n = numpy.random.randint(5, 50)
            X = scipy.sparse.rand(m, n, 0.5)
            X = X.tocsc()

            split = numpy.random.rand()
            X1, X2 = SparseUtils.splitNnz(X, split)

            nptst.assert_array_almost_equal((X1 + X2).todense(), X.todense())

        for i in range(numRuns):
            m = numpy.random.randint(5, 50)
            n = numpy.random.randint(5, 50)
            X = scipy.sparse.rand(m, n, 0.5)
            X = X.tocsc()

            X = sppy.csarray(X)

            split = numpy.random.rand()
            X1, X2 = SparseUtils.splitNnz(X, split)

            nptst.assert_array_almost_equal((X1 + X2).toarray(), X.toarray())
Exemplo n.º 10
0
    def testSparseMatrix(self): 
        m = 10
        n = 15
        
        A = numpy.random.rand(m, n)
        rowInds, colInds = A.nonzero()
        vals = A[rowInds, colInds]
        

        
        X = SparseUtils.sparseMatrix(vals, rowInds, colInds, A.shape, "scipy", storagetype="col")
        self.assertTrue(X.dtype==A.dtype)
        self.assertTrue(X.shape==A.shape)
        self.assertTrue(type(X)== scipy.sparse.csc_matrix)
        nptst.assert_array_equal(X.toarray(), A)
        
        X = SparseUtils.sparseMatrix(vals, rowInds, colInds, A.shape, "scipy", storagetype="row")
        self.assertTrue(X.dtype==A.dtype)
        self.assertTrue(X.shape==A.shape)
        self.assertTrue(type(X)== scipy.sparse.csr_matrix)
        nptst.assert_array_equal(X.toarray(), A)
       
        X = SparseUtils.sparseMatrix(vals, rowInds, colInds, A.shape, "csarray", storagetype="col")
        self.assertTrue(X.dtype==A.dtype)
        self.assertTrue(X.shape==A.shape)
        self.assertTrue(type(X)== sppy.csarray)
        self.assertTrue(X.storagetype=="col")
        nptst.assert_array_equal(X.toarray(), A)
        
        X = SparseUtils.sparseMatrix(vals, rowInds, colInds, A.shape, "csarray", storagetype="row")
        self.assertTrue(X.dtype==A.dtype)
        self.assertTrue(X.shape==A.shape)
        self.assertTrue(type(X)== sppy.csarray)
        self.assertTrue(X.storagetype=="row")
        nptst.assert_array_equal(X.toarray(), A)       
Exemplo n.º 11
0
    def testGetOmegaListPtr(self): 
        import sppy 
        m = 10 
        n = 5
        X = scipy.sparse.rand(m, n, 0.1)
        X = X.tocsr()
        
        indPtr, colInds = SparseUtils.getOmegaListPtr(X)

        for i in range(m): 
            omegai = colInds[indPtr[i]:indPtr[i+1]]
            nptst.assert_array_almost_equal(omegai, X.toarray()[i, :].nonzero()[0])
        
        Xsppy = sppy.csarray(X)
        indPtr, colInds  = SparseUtils.getOmegaListPtr(Xsppy)
        
        for i in range(m):
            omegai = colInds[indPtr[i]:indPtr[i+1]]
            nptst.assert_array_almost_equal(omegai, X.toarray()[i, :].nonzero()[0])
        
        #Test a zero array (scipy doesn't work in this case)
        X = sppy.csarray((m,n))
        
        indPtr, colInds = SparseUtils.getOmegaListPtr(X)
   
        for i in range(m): 
            omegai = colInds[indPtr[i]:indPtr[i+1]]
Exemplo n.º 12
0
    def testCentreCols(self):
        shape = (50, 10)
        r = 5
        k = 100

        X, U, s, V = SparseUtils.generateSparseLowRank(shape,
                                                       r,
                                                       k,
                                                       verbose=True)
        rowInds, colInds = X.nonzero()

        mu2 = numpy.array(X.sum(0)).ravel()
        numNnz = numpy.zeros(X.shape[1])

        for i in range(X.shape[0]):
            for j in range(X.shape[1]):
                if X[i, j] != 0:
                    numNnz[j] += 1

        mu2 /= numNnz
        mu2[numNnz == 0] = 0

        X, mu = SparseUtils.centerCols(X)
        nptst.assert_array_almost_equal(
            numpy.array(X.mean(0)).ravel(), numpy.zeros(X.shape[1]))
        nptst.assert_array_almost_equal(mu, mu2)
Exemplo n.º 13
0
    def testLocalAucApprox(self):
        m = 100
        n = 200
        k = 2
        X, U, s, V, wv = SparseUtils.generateSparseBinaryMatrix((m, n),
                                                                k,
                                                                csarray=True,
                                                                verbose=True)

        w = 1.0
        localAuc = MCEvaluator.localAUC(X, U, V, w)

        samples = numpy.arange(150, 200, 10)

        for i, sampleSize in enumerate(samples):
            numAucSamples = sampleSize
            localAuc2 = MCEvaluator.localAUCApprox(
                SparseUtils.getOmegaListPtr(X), U, V, w, numAucSamples)
            self.assertAlmostEqual(localAuc2, localAuc, 1)

        #Try smaller w
        w = 0.5
        localAuc = MCEvaluator.localAUC(X, U, V, w)

        samples = numpy.arange(50, 200, 10)

        for i, sampleSize in enumerate(samples):
            numAucSamples = sampleSize
            localAuc2 = MCEvaluator.localAUCApprox(
                SparseUtils.getOmegaListPtr(X), U, V, w, numAucSamples)

            self.assertAlmostEqual(localAuc2, localAuc, 1)
Exemplo n.º 14
0
    def testGetOmegaListPtr(self):
        import sppy
        m = 10
        n = 5
        X = scipy.sparse.rand(m, n, 0.1)
        X = X.tocsr()

        indPtr, colInds = SparseUtils.getOmegaListPtr(X)

        for i in range(m):
            omegai = colInds[indPtr[i]:indPtr[i + 1]]
            nptst.assert_array_almost_equal(omegai,
                                            X.toarray()[i, :].nonzero()[0])

        Xsppy = sppy.csarray(X)
        indPtr, colInds = SparseUtils.getOmegaListPtr(Xsppy)

        for i in range(m):
            omegai = colInds[indPtr[i]:indPtr[i + 1]]
            nptst.assert_array_almost_equal(omegai,
                                            X.toarray()[i, :].nonzero()[0])

        #Test a zero array (scipy doesn't work in this case)
        X = sppy.csarray((m, n))

        indPtr, colInds = SparseUtils.getOmegaListPtr(X)

        for i in range(m):
            omegai = colInds[indPtr[i]:indPtr[i + 1]]
Exemplo n.º 15
0
    def testScale(self):
        """
        Look at the scales of the unnormalised gradients. 
        """

        m = 100
        n = 400
        k = 3
        X = SparseUtils.generateSparseBinaryMatrix((m, n), k, csarray=True)

        w = 0.1
        eps = 0.001
        learner = MaxAUCTanh(k, w)
        learner.normalise = False
        learner.lmbdaU = 1.0
        learner.lmbdaV = 1.0
        learner.rho = 1.0
        learner.numAucSamples = 100

        indPtr, colInds = SparseUtils.getOmegaListPtr(X)
        r = numpy.random.rand(m)

        U = numpy.random.rand(X.shape[0], k)
        V = numpy.random.rand(X.shape[1], k)

        gi = numpy.random.rand(m)
        gi /= gi.sum()
        gp = numpy.random.rand(n)
        gp /= gp.sum()
        gq = numpy.random.rand(n)
        gq /= gq.sum()

        permutedRowInds = numpy.array(numpy.random.permutation(m),
                                      numpy.uint32)
        permutedColInds = numpy.array(numpy.random.permutation(n),
                                      numpy.uint32)

        maxLocalAuc = MaxLocalAUC(k, w)
        normGp, normGq = maxLocalAuc.computeNormGpq(indPtr, colInds, gp, gq, m)

        normDui = 0
        for i in range(m):
            du = learner.derivativeUi(indPtr, colInds, U, V, r, gi, gp, gq, i)
            normDui += numpy.linalg.norm(du)

        normDui /= float(m)
        print(normDui)

        normDvi = 0

        for i in range(n):
            dv = learner.derivativeVi(indPtr, colInds, U, V, r, gi, gp, gq, i)
            normDvi += numpy.linalg.norm(dv)

        normDvi /= float(n)
        print(normDvi)
Exemplo n.º 16
0
 def testReconstructLowRank(self): 
     shape = (5000, 1000)
     r = 5
     
     U, s, V = SparseUtils.generateLowRank(shape, r)
     
     inds = numpy.array([0])
     X = SparseUtils.reconstructLowRank(U, s, V, inds)
     
     self.assertAlmostEquals(X[0, 0], (U[0, :]*s).dot(V[0, :]))
Exemplo n.º 17
0
    def testReconstructLowRank(self):
        shape = (5000, 1000)
        r = 5

        U, s, V = SparseUtils.generateLowRank(shape, r)

        inds = numpy.array([0])
        X = SparseUtils.reconstructLowRank(U, s, V, inds)

        self.assertAlmostEquals(X[0, 0], (U[0, :] * s).dot(V[0, :]))
Exemplo n.º 18
0
    def modelSelect(self, X):
        """
        Perform model selection on X and return the best parameters. 
        """
        m, n = X.shape
        cvInds = Sampling.randCrossValidation(self.folds, X.nnz)
        localAucs = numpy.zeros(
            (self.ks.shape[0], self.lmbdas.shape[0], len(cvInds)))

        logging.debug("Performing model selection")
        paramList = []

        for icv, (trainInds, testInds) in enumerate(cvInds):
            Util.printIteration(icv, 1, self.folds, "Fold: ")

            trainX = SparseUtils.submatrix(X, trainInds)
            testX = SparseUtils.submatrix(X, testInds)

            testOmegaList = SparseUtils.getOmegaList(testX)

            for i, k in enumerate(self.ks):
                maxLocalAuc = self.copy()
                maxLocalAuc.k = k
                paramList.append((trainX, testX, testOmegaList, maxLocalAuc))

        pool = multiprocessing.Pool(processes=self.numProcesses,
                                    maxtasksperchild=100)
        resultsIterator = pool.imap(localAucsLmbdas, paramList, self.chunkSize)
        #import itertools
        #resultsIterator = itertools.imap(localAucsLmbdas, paramList)

        for icv, (trainInds, testInds) in enumerate(cvInds):
            for i, k in enumerate(self.ks):
                tempAucs = resultsIterator.next()
                localAucs[i, :, icv] = tempAucs

        pool.terminate()

        meanLocalAucs = numpy.mean(localAucs, 2)
        stdLocalAucs = numpy.std(localAucs, 2)

        logging.debug(meanLocalAucs)

        k = self.ks[numpy.unravel_index(numpy.argmax(meanLocalAucs),
                                        meanLocalAucs.shape)[0]]
        lmbda = self.lmbdas[numpy.unravel_index(numpy.argmax(meanLocalAucs),
                                                meanLocalAucs.shape)[1]]

        logging.debug("Model parameters: k=" + str(k) + " lmbda=" + str(lmbda))

        self.k = k
        self.lmbda = lmbda

        return meanLocalAucs, stdLocalAucs
Exemplo n.º 19
0
    def testScale(self): 
        """
        Look at the scales of the unnormalised gradients. 
        """        
        
        m = 100 
        n = 400 
        k = 3 
        X = SparseUtils.generateSparseBinaryMatrix((m, n), k, csarray=True)
        
        w = 0.1
        eps = 0.001
        learner = MaxAUCTanh(k, w)
        learner.normalise = False
        learner.lmbdaU = 1.0
        learner.lmbdaV = 1.0
        learner.rho = 1.0
        learner.numAucSamples = 100
        
        indPtr, colInds = SparseUtils.getOmegaListPtr(X)
        r = numpy.random.rand(m)

        U = numpy.random.rand(X.shape[0], k)
        V = numpy.random.rand(X.shape[1], k)
        
        gi = numpy.random.rand(m)
        gi /= gi.sum()        
        gp = numpy.random.rand(n)
        gp /= gp.sum()        
        gq = numpy.random.rand(n)
        gq /= gq.sum()     
        
        permutedRowInds = numpy.array(numpy.random.permutation(m), numpy.uint32)
        permutedColInds = numpy.array(numpy.random.permutation(n), numpy.uint32)
        
        maxLocalAuc = MaxLocalAUC(k, w)
        normGp, normGq = maxLocalAuc.computeNormGpq(indPtr, colInds, gp, gq, m)
        
        normDui = 0
        for i in range(m): 
            du = learner.derivativeUi(indPtr, colInds, U, V, r, gi, gp, gq, i) 
            normDui += numpy.linalg.norm(du)
            
        normDui /= float(m)
        print(normDui)        
        
        normDvi = 0         
        
        for i in range(n): 
            dv = learner.derivativeVi(indPtr, colInds, U, V, r, gi, gp, gq, i) 
            normDvi += numpy.linalg.norm(dv)
            
        normDvi /= float(n)
        print(normDvi)
Exemplo n.º 20
0
    def testSparseMatrix(self):
        m = 10
        n = 15

        A = numpy.random.rand(m, n)
        rowInds, colInds = A.nonzero()
        vals = A[rowInds, colInds]

        X = SparseUtils.sparseMatrix(vals,
                                     rowInds,
                                     colInds,
                                     A.shape,
                                     "scipy",
                                     storagetype="col")
        self.assertTrue(X.dtype == A.dtype)
        self.assertTrue(X.shape == A.shape)
        self.assertTrue(type(X) == scipy.sparse.csc_matrix)
        nptst.assert_array_equal(X.toarray(), A)

        X = SparseUtils.sparseMatrix(vals,
                                     rowInds,
                                     colInds,
                                     A.shape,
                                     "scipy",
                                     storagetype="row")
        self.assertTrue(X.dtype == A.dtype)
        self.assertTrue(X.shape == A.shape)
        self.assertTrue(type(X) == scipy.sparse.csr_matrix)
        nptst.assert_array_equal(X.toarray(), A)

        X = SparseUtils.sparseMatrix(vals,
                                     rowInds,
                                     colInds,
                                     A.shape,
                                     "csarray",
                                     storagetype="col")
        self.assertTrue(X.dtype == A.dtype)
        self.assertTrue(X.shape == A.shape)
        self.assertTrue(type(X) == sppy.csarray)
        self.assertTrue(X.storagetype == "col")
        nptst.assert_array_equal(X.toarray(), A)

        X = SparseUtils.sparseMatrix(vals,
                                     rowInds,
                                     colInds,
                                     A.shape,
                                     "csarray",
                                     storagetype="row")
        self.assertTrue(X.dtype == A.dtype)
        self.assertTrue(X.shape == A.shape)
        self.assertTrue(type(X) == sppy.csarray)
        self.assertTrue(X.storagetype == "row")
        nptst.assert_array_equal(X.toarray(), A)
Exemplo n.º 21
0
 def learnModel2(self, X):
     """
     Learn the matrix completion using a sparse matrix X. This is the simple 
     version of the soft impute algorithm in which we store the entire 
     matrices, newZ and oldZ. 
     """
     #if not scipy.sparse.isspmatrix_lil(X):
     #    raise ValueError("Input matrix must be lil_matrix")
         
     oldZ = scipy.sparse.lil_matrix(X.shape)
     omega = X.nonzero()
     tol = 10**-6
      
     ZList = []
     
     for rho in self.rhos:
         gamma = self.eps + 1
         i = 0
         while gamma > self.eps:
             Y = oldZ.copy()
             Y[omega] = 0
             Y = X + Y
             Y = Y.tocsc()
             U, s, V = ExpSU.SparseUtils.svdSoft(Y, rho)
             #Get an "invalid value encountered in sqrt" warning sometimes
             newZ = scipy.sparse.lil_matrix((U*s).dot(V.T))
             
             oldZ = oldZ.tocsr()
             normOldZ = SparseUtils.norm(oldZ)**2
             normNewZmOldZ = SparseUtils.norm(newZ - oldZ)**2               
             
             #We can get newZ == oldZ in which case we break
             if normNewZmOldZ < tol: 
                 gamma = 0
             elif abs(normOldZ) < tol:
                 gamma = self.eps + 1 
             else: 
                 gamma = normNewZmOldZ/normOldZ
             
             oldZ = newZ.copy()
             
             logging.debug("Iteration " + str(i) + " gamma="+str(gamma)) 
             i += 1
         
         logging.debug("Number of iterations for lambda="+str(rho) + ": " + str(i))
         ZList.append(newZ)
     
     if self.rhos.shape[0] != 1:
         return ZList
     else:
         return ZList[0]
Exemplo n.º 22
0
    def modelSelect(self, X): 
        """
        Perform model selection on X and return the best parameters. 
        """
        m, n = X.shape
        cvInds = Sampling.randCrossValidation(self.folds, X.nnz)
        localAucs = numpy.zeros((self.ks.shape[0], self.lmbdas.shape[0], len(cvInds)))
        
        logging.debug("Performing model selection")
        paramList = []        
        
        for icv, (trainInds, testInds) in enumerate(cvInds):
            Util.printIteration(icv, 1, self.folds, "Fold: ")

            trainX = SparseUtils.submatrix(X, trainInds)
            testX = SparseUtils.submatrix(X, testInds)
            
            testOmegaList = SparseUtils.getOmegaList(testX)
            
            for i, k in enumerate(self.ks): 
                maxLocalAuc = self.copy()
                maxLocalAuc.k = k
                paramList.append((trainX, testX, testOmegaList, maxLocalAuc))
                    
        pool = multiprocessing.Pool(processes=self.numProcesses, maxtasksperchild=100)
        resultsIterator = pool.imap(localAucsLmbdas, paramList, self.chunkSize)
        #import itertools
        #resultsIterator = itertools.imap(localAucsLmbdas, paramList)
        
        for icv, (trainInds, testInds) in enumerate(cvInds):        
            for i, k in enumerate(self.ks): 
                tempAucs = resultsIterator.next()
                localAucs[i, :, icv] = tempAucs
        
        pool.terminate()
        
        meanLocalAucs = numpy.mean(localAucs, 2)
        stdLocalAucs = numpy.std(localAucs, 2)
        
        logging.debug(meanLocalAucs)
        
        k = self.ks[numpy.unravel_index(numpy.argmax(meanLocalAucs), meanLocalAucs.shape)[0]]
        lmbda = self.lmbdas[numpy.unravel_index(numpy.argmax(meanLocalAucs), meanLocalAucs.shape)[1]]
        
        logging.debug("Model parameters: k=" + str(k) + " lmbda=" + str(lmbda))
        
        self.k = k 
        self.lmbda = lmbda 
        
        return meanLocalAucs, stdLocalAucs
Exemplo n.º 23
0
 def syntheticDataset1(m=500, n=200, k=8, u=0.1, sd=0, noise=5): 
     """
     Create a simple synthetic dataset 
     """
     w = 1-u
     X, U, s, V, wv = SparseUtils.generateSparseBinaryMatrix((m,n), k, w, sd=sd, csarray=True, verbose=True, indsPerRow=200)
     X = X + sppy.rand((m, n), noise/float(n), storagetype="row")
     X[X.nonzero()] = 1
     X.prune()
     X = SparseUtils.pruneMatrixRows(X, minNnzRows=10)
     logging.debug("Non zero elements: " + str(X.nnz) + " shape: " + str(X.shape))
     U = U*s
     
     return X, U, V
Exemplo n.º 24
0
def learnPredictRanking(args):
    """
    A function to train on a training set and test on a test set, for a number 
    of values of rho. 
    """
    learner, trainX, testX, rhos = args
    logging.debug("k=" + str(learner.getK()))
    logging.debug(learner)

    testInds = testX.nonzero()
    trainXIter = []
    testIndList = []

    for rho in rhos:
        trainXIter.append(trainX)
        testIndList.append(testInds)

    trainXIter = iter(trainXIter)

    ZIter = learner.learnModel(trainXIter, iter(rhos))

    metrics = numpy.zeros(rhos.shape[0])

    for j, Z in enumerate(ZIter):
        U, s, V = Z
        U = U * s
        U = numpy.ascontiguousarray(U)
        V = numpy.ascontiguousarray(V)

        testOrderedItems = MCEvaluatorCython.recommendAtk(
            U, V, learner.recommendSize, trainX)

        if learner.metric == "mrr":
            metrics[j] = MCEvaluator.mrrAtK(SparseUtils.getOmegaListPtr(testX),
                                            testOrderedItems,
                                            learner.recommendSize)
            logging.debug("MRR@" + str(learner.recommendSize) + ": " +
                          str('%.4f' % metrics[j]) + " " + str(learner))
        elif learner.metric == "f1":
            metrics[j] = MCEvaluator.mrrAtK(SparseUtils.getOmegaListPtr(testX),
                                            testOrderedItems,
                                            learner.recommendSize)
            logging.debug("F1@" + str(learner.recommendSize) + ": " +
                          str('%.4f' % metrics[j]) + " " + str(learner))
        else:
            raise ValueError("Unknown metric " + learner.metric)

        gc.collect()

    return metrics
Exemplo n.º 25
0
    def modelSelect(self, X): 
        """
        Perform model selection on X and return the best parameters. 
        """
        m, n = X.shape
        cvInds = Sampling.randCrossValidation(self.folds, X.nnz)
        precisions = numpy.zeros((self.ks.shape[0], len(cvInds)))
        
        logging.debug("Performing model selection")
        paramList = []        
        
        for icv, (trainInds, testInds) in enumerate(cvInds):
            Util.printIteration(icv, 1, self.folds, "Fold: ")

            trainX = SparseUtils.submatrix(X, trainInds)
            testX = SparseUtils.submatrix(X, testInds)
            
            testOmegaList = SparseUtils.getOmegaList(testX)
            
            for i, k in enumerate(self.ks): 
                learner = self.copy()
                learner.k = k
                paramList.append((trainX, testX, testOmegaList, learner))
                    
        #pool = multiprocessing.Pool(processes=self.numProcesses, maxtasksperchild=100)
        #resultsIterator = pool.imap(computePrecision, paramList, self.chunkSize)
        import itertools
        resultsIterator = itertools.imap(computePrecision, paramList)
        
        for icv, (trainInds, testInds) in enumerate(cvInds):        
            for i, k in enumerate(self.ks): 
                tempPrecision = resultsIterator.next()
                precisions[i, icv] = tempPrecision
        
        #pool.terminate()
        
        meanPrecisions = numpy.mean(precisions, 1)
        stdPrecisions = numpy.std(precisions, 1)
        
        logging.debug(meanPrecisions)
        
        k = self.ks[numpy.argmax(meanPrecisions)]

        
        logging.debug("Model parameters: k=" + str(k)) 
        
        self.k = k 
        
        return meanPrecisions, stdPrecisions
Exemplo n.º 26
0
    def _addSparseRSVD(U, s, V, X, k=10, kX=None, kRand=None, q=None):
        """
        Perform a randomised SVD of the matrix X + U diag(s) V.T. We use th
        """
        if kX==None:
            kX=k
        if kRand==None:
            kRand=k
        if q==None:
            q=1

        m, n = X.shape
        Us = U*s

        kX = numpy.min([m, n, kX])
        UX, sX, VX = SparseUtils.svdPropack(X, kX)
        omega = numpy.c_[V, VX, numpy.random.randn(n, kRand)]
        
        def rMultA(x):
            return Us.dot(V.T.dot(x)) + X.dot(x)
        def rMultAT(x):
            return V.dot(Us.T.dot(x)) + X.T.dot(x)
        
        Y = rMultA(omega)
        for i in range(q): 
            Y = rMultAT(Y)
            Y = rMultA(Y)
        
        Q, R = numpy.linalg.qr(Y)
        B = rMultAT(Q).T   
        U, s, VT = numpy.linalg.svd(B, full_matrices=False)
        U, s, V = Util.indSvd(U, s, VT, numpy.flipud(numpy.argsort(s))[:k])
        U = Q.dot(U)
        
        return U, s, V 
Exemplo n.º 27
0
 def testOverfit(self): 
     """
     See if we can get a zero objective on the hinge loss 
     """
     m = 10 
     n = 20 
     k = 5 
     
     u = 0.5
     w = 1-u
     X = SparseUtils.generateSparseBinaryMatrix((m, n), k, w, csarray=True)
     
     eps = 0.001
     k = 10
     maxLocalAuc = MaxLocalAUC(k, u, eps=eps, stochastic=True)
     maxLocalAuc.rate = "constant"
     maxLocalAuc.maxIterations = 500
     maxLocalAuc.numProcesses = 1
     maxLocalAuc.loss = "hinge"
     maxLocalAuc.validationUsers = 0
     maxLocalAuc.lmbda = 0        
     
     print("Overfit example")
     U, V, trainMeasures, testMeasures, iterations, time = maxLocalAuc.learnModel(X, verbose=True)
     
     self.assertAlmostEquals(trainMeasures[-1, 0], 0, 3)
Exemplo n.º 28
0
    def profileDerivativeUiApprox(self):
        k = 10
        U = numpy.random.rand(self.m, k)
        V = numpy.random.rand(self.n, k)

        indPtr, colInds = SparseUtils.getOmegaListPtr(self.X)

        gp = numpy.random.rand(self.n)
        gp /= gp.sum()
        gq = numpy.random.rand(self.n)
        gq /= gq.sum()

        j = 3
        numRowSamples = 100
        numAucSamples = 10

        permutedRowInds = numpy.array(numpy.random.permutation(self.m), numpy.uint32)
        permutedColInds = numpy.array(numpy.random.permutation(self.n), numpy.uint32)

        maxLocalAuc = MaxLocalAUC(k, w=0.9)
        normGp, normGq = maxLocalAuc.computeNormGpq(indPtr, colInds, gp, gq, self.m)

        lmbda = 0.001
        normalise = True

        learner = MaxLocalAUCCython()

        def run():
            numRuns = 10
            for j in range(numRuns):
                for i in range(self.m):
                    learner.derivativeUiApprox(indPtr, colInds, U, V, gp, gq, permutedColInds, i)

        ProfileUtils.profile("run()", globals(), locals())
Exemplo n.º 29
0
    def profileObjective(self):

        k = 10
        U = numpy.random.rand(self.m, k)
        V = numpy.random.rand(self.n, k)

        indPtr, colInds = SparseUtils.getOmegaListPtr(self.X)
        colIndsProbabilities = numpy.ones(colInds.shape[0])

        for i in range(self.m):
            colIndsProbabilities[indPtr[i] : indPtr[i + 1]] /= colIndsProbabilities[indPtr[i] : indPtr[i + 1]].sum()
            colIndsProbabilities[indPtr[i] : indPtr[i + 1]] = numpy.cumsum(
                colIndsProbabilities[indPtr[i] : indPtr[i + 1]]
            )

        r = numpy.zeros(self.m)
        lmbda = 0.001
        rho = 1.0
        numAucSamples = 100

        def run():
            numRuns = 10
            for i in range(numRuns):
                objectiveApprox(indPtr, colInds, indPtr, colInds, U, V, r, numAucSamples, lmbda, rho, False)

        ProfileUtils.profile("run()", globals(), locals())
Exemplo n.º 30
0
    def testParallelLearnModel(self): 
        numpy.random.seed(21)
        m = 500 
        n = 200 
        k = 5 
        X = SparseUtils.generateSparseBinaryMatrix((m, n), k, csarray=True)
        
        from wallhack.rankingexp.DatasetUtils import DatasetUtils
        X, U, V = DatasetUtils.syntheticDataset1()

        
        u = 0.1
        w = 1-u
        eps = 0.05
        maxLocalAuc = MaxLocalAUC(k, w, alpha=1.0, eps=eps, stochastic=True)
        maxLocalAuc.maxIterations = 3
        maxLocalAuc.recordStep = 1
        maxLocalAuc.rate = "optimal"
        maxLocalAuc.t0 = 2.0
        maxLocalAuc.validationUsers = 0.0
        maxLocalAuc.numProcesses = 4
        
        os.system('taskset -p 0xffffffff %d' % os.getpid())
        print(X.nnz/maxLocalAuc.numAucSamples)
        U, V = maxLocalAuc.parallelLearnModel(X)
Exemplo n.º 31
0
    def testLocalAUC(self):
        m = 10
        n = 20
        k = 2
        X, U, s, V, wv = SparseUtils.generateSparseBinaryMatrix((m, n), k, 0.5, verbose=True, csarray=True)

        Z = U.dot(V.T)

        localAuc = numpy.zeros(m)

        for i in range(m):
            localAuc[i] = sklearn.metrics.roc_auc_score(numpy.ravel(X[i, :].toarray()), Z[i, :])

        localAuc = localAuc.mean()

        u = 0.0
        localAuc2 = MCEvaluator.localAUC(X, U, V, u)

        self.assertEquals(localAuc, localAuc2)

        # Now try a large r
        w = 1.0

        localAuc2 = MCEvaluator.localAUC(X, U, V, w)
        self.assertEquals(localAuc2, 0)
Exemplo n.º 32
0
    def testOverfit(self):
        """
        See if we can get a zero objective on the hinge loss 
        """
        m = 10
        n = 20
        k = 5

        u = 0.5
        w = 1 - u
        X = SparseUtils.generateSparseBinaryMatrix((m, n), k, w, csarray=True)

        eps = 0.001
        k = 10
        maxLocalAuc = MaxLocalAUC(k, u, eps=eps, stochastic=True)
        maxLocalAuc.rate = "constant"
        maxLocalAuc.maxIterations = 500
        maxLocalAuc.numProcesses = 1
        maxLocalAuc.loss = "hinge"
        maxLocalAuc.validationUsers = 0
        maxLocalAuc.lmbda = 0

        print("Overfit example")
        U, V, trainMeasures, testMeasures, iterations, time = maxLocalAuc.learnModel(
            X, verbose=True)

        self.assertAlmostEquals(trainMeasures[-1, 0], 0, 3)
Exemplo n.º 33
0
 def f1AtK(positiveArray, orderedItems, k, verbose=False): 
     """
     Return the F1@k measure for each row of the predicted matrix UV.T 
     using real values in positiveArray. positiveArray is a tuple (indPtr, colInds)
     
     :param orderedItems: The ordered items for each user (users are rows, items are cols)  
     
     :param verbose: If true return recall and first k recommendation for each row, otherwise just precisions
     """
     if type(positiveArray) != tuple: 
         positiveArray = SparseUtils.getOmegaListPtr(positiveArray)        
     
     orderedItems = orderedItems[:, 0:k]
     indPtr, colInds = positiveArray
     
     precisions = MCEvaluatorCython.precisionAtk(indPtr, colInds, orderedItems)
     recalls = MCEvaluatorCython.recallAtk(indPtr, colInds, orderedItems)
     
     denominator = precisions+recalls
     denominator += denominator == 0      
     
     f1s = 2*precisions*recalls/denominator
     
     if verbose: 
         return f1s, orderedItems
     else: 
         return f1s.mean()
Exemplo n.º 34
0
    def testAverageRocCurve(self):
        m = 50
        n = 20
        k = 8
        u = 20.0 / m
        w = 1 - u
        X, U, s, V, wv = SparseUtils.generateSparseBinaryMatrix(
            (m, n), k, w, csarray=True, verbose=True, indsPerRow=200
        )

        fpr, tpr = MCEvaluator.averageRocCurve(X, U, V)

        import matplotlib

        matplotlib.use("GTK3Agg")
        import matplotlib.pyplot as plt

        # plt.plot(fpr, tpr)
        # plt.show()

        # Now try case where we have a training set
        folds = 1
        testSize = 5
        trainTestXs = Sampling.shuffleSplitRows(X, folds, testSize)
        trainX, testX = trainTestXs[0]

        fpr, tpr = MCEvaluator.averageRocCurve(testX, U, V, trainX=trainX)
Exemplo n.º 35
0
    def localAUCApprox(positiveArray,
                       U,
                       V,
                       w,
                       numAucSamples=50,
                       r=None,
                       allArray=None):
        """
        Compute the estimated local AUC for the score functions UV^T relative to X with 
        quantile w. The AUC is computed using positiveArray which is a tuple (indPtr, colInds)
        assuming allArray is None. If allArray is not None then positive items are chosen 
        from positiveArray and negative ones are chosen to complement allArray.
        """

        if type(positiveArray) != tuple:
            positiveArray = SparseUtils.getOmegaListPtr(positiveArray)

        indPtr, colInds = positiveArray
        U = numpy.ascontiguousarray(U)
        V = numpy.ascontiguousarray(V)

        if r is None:
            r = SparseUtilsCython.computeR(U, V, w, numAucSamples)

        if allArray is None:
            return MCEvaluatorCython.localAUCApprox(indPtr, colInds, indPtr,
                                                    colInds, U, V,
                                                    numAucSamples, r)
        else:
            allIndPtr, allColInd = allArray
            return MCEvaluatorCython.localAUCApprox(indPtr, colInds, allIndPtr,
                                                    allColInd, U, V,
                                                    numAucSamples, r)
Exemplo n.º 36
0
 def testParallelSparseLowRankOp(self): 
     numRuns = 10         
     
     for i in range(numRuns): 
         m = numpy.random.randint(10, 100)
         n = numpy.random.randint(10, 100)
         density = numpy.random.rand()
         A = scipy.sparse.rand(m, n, density)
         A = A.tocsc()
         
         r = numpy.random.randint(10, 100)
         U, s, V = SparseUtils.generateLowRank((m, n), r)          
         
         L = LinOperatorUtils.parallelSparseLowRankOp(A, U, s, V)
         
         u = numpy.random.rand(m)
         v = numpy.random.rand(n)
         
         r = 10 
         W = numpy.random.rand(m, r)
         X = numpy.random.rand(n, r)
         
         B = numpy.array(A+(U*s).dot(V.T))            
         
         nptst.assert_array_almost_equal(L.matvec(v), B.dot(v))
         nptst.assert_array_almost_equal(L.rmatvec(u), B.T.dot(u))
         nptst.assert_array_almost_equal(L.matmat(X), B.dot(X))
         nptst.assert_array_almost_equal(L.rmatmat(W), B.T.dot(W))
Exemplo n.º 37
0
    def testLocalAUC(self):
        m = 10
        n = 20
        k = 2
        X, U, s, V, wv = SparseUtils.generateSparseBinaryMatrix((m, n),
                                                                k,
                                                                0.5,
                                                                verbose=True,
                                                                csarray=True)

        Z = U.dot(V.T)

        localAuc = numpy.zeros(m)

        for i in range(m):
            localAuc[i] = sklearn.metrics.roc_auc_score(
                numpy.ravel(X[i, :].toarray()), Z[i, :])

        localAuc = localAuc.mean()

        u = 0.0
        localAuc2 = MCEvaluator.localAUC(X, U, V, u)

        self.assertEquals(localAuc, localAuc2)

        #Now try a large r
        w = 1.0

        localAuc2 = MCEvaluator.localAUC(X, U, V, w)
        self.assertEquals(localAuc2, 0)
Exemplo n.º 38
0
    def stratifiedRecallAtK(positiveArray,
                            orderedItems,
                            k,
                            itemCounts,
                            beta=0.5,
                            verbose=False):
        """
        Compute the average recall@k score for each row of the predicted matrix UV.T 
        using real values in positiveArray. positiveArray is a tuple (indPtr, colInds)
        
        :param orderedItems: The ordered items for each user (users are rows, items are cols)  
        
        :param verbose: If true return recall and first k recommendation for each row, otherwise just precisions
        """
        if type(positiveArray) != tuple:
            positiveArray = SparseUtils.getOmegaListPtr(positiveArray)

        orderedItems = orderedItems[:, 0:k]
        indPtr, colInds = positiveArray
        recalls, denominators = MCEvaluatorCython.stratifiedRecallAtk(
            indPtr, colInds, orderedItems, itemCounts, beta)

        if verbose:
            return recalls, orderedItems
        else:
            return numpy.average(recalls, weights=denominators)
Exemplo n.º 39
0
    def testAverageRocCurve(self):
        m = 50
        n = 20
        k = 8
        u = 20.0 / m
        w = 1 - u
        X, U, s, V, wv = SparseUtils.generateSparseBinaryMatrix((m, n),
                                                                k,
                                                                w,
                                                                csarray=True,
                                                                verbose=True,
                                                                indsPerRow=200)

        fpr, tpr = MCEvaluator.averageRocCurve(X, U, V)

        import matplotlib
        matplotlib.use("GTK3Agg")
        import matplotlib.pyplot as plt
        #plt.plot(fpr, tpr)
        #plt.show()

        #Now try case where we have a training set
        folds = 1
        testSize = 5
        trainTestXs = Sampling.shuffleSplitRows(X, folds, testSize)
        trainX, testX = trainTestXs[0]

        fpr, tpr = MCEvaluator.averageRocCurve(testX, U, V, trainX=trainX)
Exemplo n.º 40
0
    def f1AtK(positiveArray, orderedItems, k, verbose=False):
        """
        Return the F1@k measure for each row of the predicted matrix UV.T 
        using real values in positiveArray. positiveArray is a tuple (indPtr, colInds)
        
        :param orderedItems: The ordered items for each user (users are rows, items are cols)  
        
        :param verbose: If true return recall and first k recommendation for each row, otherwise just precisions
        """
        if type(positiveArray) != tuple:
            positiveArray = SparseUtils.getOmegaListPtr(positiveArray)

        orderedItems = orderedItems[:, 0:k]
        indPtr, colInds = positiveArray

        precisions = MCEvaluatorCython.precisionAtk(indPtr, colInds,
                                                    orderedItems)
        recalls = MCEvaluatorCython.recallAtk(indPtr, colInds, orderedItems)

        denominator = precisions + recalls
        denominator += denominator == 0

        f1s = 2 * precisions * recalls / denominator

        if verbose:
            return f1s, orderedItems
        else:
            return f1s.mean()
Exemplo n.º 41
0
    def testModelSelect(self):
        m = 50
        n = 50
        k = 5
        u = 0.5
        w = 1 - u
        X = SparseUtils.generateSparseBinaryMatrix((m, n), k, w)

        os.system('taskset -p 0xffffffff %d' % os.getpid())

        u = 0.2
        lmbda = 0.1
        gamma = 0.01
        learner = BprRecommender(k, lmbda, gamma)
        learner.maxIterations = 2
        learner.ks = 2**numpy.arange(3, 5)
        learner.lmbdaUsers = 2.0**-numpy.arange(1, 3)
        learner.lmbdaPoses = 2.0**-numpy.arange(1, 3)
        learner.lmbdaNegs = 2.0**-numpy.arange(1, 3)
        learner.gammas = 2.0**-numpy.arange(1, 3)
        learner.folds = 2
        learner.numProcesses = 1

        colProbs = numpy.array(X.sum(1)).ravel()
        colProbs /= colProbs.sum()
        print(colProbs, colProbs.shape)

        learner.modelSelect(X, colProbs=colProbs)
Exemplo n.º 42
0
    def profileObjective(self):

        k = 10
        U = numpy.random.rand(self.m, k)
        V = numpy.random.rand(self.n, k)

        indPtr, colInds = SparseUtils.getOmegaListPtr(self.X)
        colIndsProbabilities = numpy.ones(colInds.shape[0])

        for i in range(self.m):
            colIndsProbabilities[indPtr[i]:indPtr[
                i + 1]] /= colIndsProbabilities[indPtr[i]:indPtr[i + 1]].sum()
            colIndsProbabilities[indPtr[i]:indPtr[i + 1]] = numpy.cumsum(
                colIndsProbabilities[indPtr[i]:indPtr[i + 1]])

        r = numpy.zeros(self.m)
        lmbda = 0.001
        rho = 1.0
        numAucSamples = 100

        def run():
            numRuns = 10
            for i in range(numRuns):
                objectiveApprox(indPtr, colInds, indPtr, colInds, U, V, r,
                                numAucSamples, lmbda, rho, False)

        ProfileUtils.profile('run()', globals(), locals())
Exemplo n.º 43
0
 def uncenter(self, X): 
     """
     Uncenter a training or test matrix. 
     """
     #logging.debug("Uncentering matrix of size: " + str(X.shape))
     return SparseUtils.uncenterRows(X, self.muRows)
     
Exemplo n.º 44
0
 def testModelSelect(self): 
     m = 50 
     n = 50 
     k = 5
     u = 0.5 
     w = 1-u
     X = SparseUtils.generateSparseBinaryMatrix((m, n), k, w)
     
     os.system('taskset -p 0xffffffff %d' % os.getpid())
     
     u = 0.2
     lmbda = 0.1 
     gamma = 0.01
     learner = BprRecommender(k, lmbda, gamma)
     learner.maxIterations = 2        
     learner.ks = 2**numpy.arange(3, 5)
     learner.lmbdaUsers = 2.0**-numpy.arange(1, 3)
     learner.lmbdaPoses = 2.0**-numpy.arange(1, 3)
     learner.lmbdaNegs = 2.0**-numpy.arange(1, 3)
     learner.gammas = 2.0**-numpy.arange(1, 3)
     learner.folds = 2
     learner.numProcesses = 1 
     
     colProbs = numpy.array(X.sum(1)).ravel()
     colProbs /= colProbs.sum()
     print(colProbs, colProbs.shape)
     
     learner.modelSelect(X, colProbs=colProbs)
Exemplo n.º 45
0
    def testSvdArpack(self): 
        shape = (500, 100)
        r = 5 
        k = 1000 

        X, U, s, V = SparseUtils.generateSparseLowRank(shape, r, k, verbose=True)                
        
        k2 = 10 
        U, s, V = SparseUtils.svdArpack(X, k2)

        U2, s2, V2 = numpy.linalg.svd(X.todense())
        V2 = V2.T

        nptst.assert_array_almost_equal(s, s2[0:k2])
        nptst.assert_array_almost_equal(numpy.abs(U), numpy.abs(U2[:, 0:k2]), 3)
        nptst.assert_array_almost_equal(numpy.abs(V), numpy.abs(V2[:, 0:k2]), 3)
Exemplo n.º 46
0
    def testParallelSparseLowRankOp(self):
        numRuns = 10

        for i in range(numRuns):
            m = numpy.random.randint(10, 100)
            n = numpy.random.randint(10, 100)
            density = numpy.random.rand()
            A = scipy.sparse.rand(m, n, density)
            A = A.tocsc()

            r = numpy.random.randint(10, 100)
            U, s, V = SparseUtils.generateLowRank((m, n), r)

            L = LinOperatorUtils.parallelSparseLowRankOp(A, U, s, V)

            u = numpy.random.rand(m)
            v = numpy.random.rand(n)

            r = 10
            W = numpy.random.rand(m, r)
            X = numpy.random.rand(n, r)

            B = numpy.array(A + (U * s).dot(V.T))

            nptst.assert_array_almost_equal(L.matvec(v), B.dot(v))
            nptst.assert_array_almost_equal(L.rmatvec(u), B.T.dot(u))
            nptst.assert_array_almost_equal(L.matmat(X), B.dot(X))
            nptst.assert_array_almost_equal(L.rmatmat(W), B.T.dot(W))
Exemplo n.º 47
0
 def flixster(minNnzRows=10, minNnzCols=2, quantile=90): 
     matrixFileName = PathDefaults.getDataDir() + "flixster/Ratings.timed.txt" 
     matrixFile = open(matrixFileName)
     matrixFile.readline()
     userIndexer = IdIndexer("i")
     movieIndexer = IdIndexer("i")
     
     ratings = array.array("f")
     logging.debug("Loading ratings from " + matrixFileName)
     
     for i, line in enumerate(matrixFile):
         if i % 1000000 == 0: 
             logging.debug("Iteration: " + str(i))
         vals = line.split()
         
         userIndexer.append(vals[0])
         movieIndexer.append(vals[1])
         ratings.append(float(vals[2]))
     
     rowInds = userIndexer.getArray()
     colInds = movieIndexer.getArray()
     ratings = numpy.array(ratings)
     
     X = sppy.csarray((len(userIndexer.getIdDict()), len(movieIndexer.getIdDict())), storagetype="row", dtype=numpy.int)
     X.put(numpy.array(ratings>3, numpy.int), numpy.array(rowInds, numpy.int32), numpy.array(colInds, numpy.int32), init=True)
     X.prune()
     
     X = SparseUtils.pruneMatrixRowAndCols(X, minNnzRows, minNnzCols)
     
     logging.debug("Read file: " + matrixFileName)
     logging.debug("Non zero elements: " + str(X.nnz) + " shape: " + str(X.shape))
     
     #X = Sampling.sampleUsers(X, 1000)
     
     return X 
Exemplo n.º 48
0
    def epinions(minNnzRows=10, minNnzCols=3, quantile=90): 
        matrixFileName = PathDefaults.getDataDir() + "epinions/rating.mat" 
        A = scipy.io.loadmat(matrixFileName)["rating"]
        
        userIndexer = IdIndexer("i")
        itemIndexer = IdIndexer("i")        
        
        for i in range(A.shape[0]): 
            userIndexer.append(A[i, 0])
            itemIndexer.append(A[i, 1])


        rowInds = userIndexer.getArray()
        colInds = itemIndexer.getArray()
        ratings = A[:, 3]        
        
        X = sppy.csarray((len(userIndexer.getIdDict()), len(itemIndexer.getIdDict())), storagetype="row", dtype=numpy.int)
        X.put(numpy.array(ratings>3, numpy.int), numpy.array(rowInds, numpy.int32), numpy.array(colInds, numpy.int32), init=True)
        X.prune()
        
        X = SparseUtils.pruneMatrixRowAndCols(X, minNnzRows, minNnzCols)
        
        logging.debug("Read file: " + matrixFileName)
        logging.debug("Non zero elements: " + str(X.nnz) + " shape: " + str(X.shape))

        return X 
Exemplo n.º 49
0
    def testF1Atk(self):
        m = 10
        n = 5
        r = 3
        X, U, s, V, wv = SparseUtils.generateSparseBinaryMatrix((m, n), r, 0.5, verbose=True)

        import sppy

        X = sppy.csarray(X)
        orderedItems = MCEvaluator.recommendAtk(U * s, V, n)

        self.assertAlmostEquals(
            MCEvaluator.f1AtK(X, orderedItems, n, verbose=False), 2 * r / float(n) / (1 + r / float(n))
        )

        m = 20
        n = 50
        X, U, s, V, wv = SparseUtils.generateSparseBinaryMatrix((m, n), r, 0.5, verbose=True)
        k = 5

        orderedItems = MCEvaluator.recommendAtk(U * s, V, k)
        precision, scoreInds = MCEvaluator.precisionAtK(X, orderedItems, k, verbose=True)
        recall, scoreInds = MCEvaluator.recallAtK(X, orderedItems, k, verbose=True)
        f1s = numpy.zeros(m)

        for i in range(m):
            f1s[i] = 2 * precision[i] * recall[i] / (precision[i] + recall[i])

        orderedItems = MCEvaluator.recommendAtk(U * s, V, n)
        f1s2, scoreInds = MCEvaluator.f1AtK(X, orderedItems, k, verbose=True)

        nptst.assert_array_equal(f1s, f1s2)

        # Test case where we get a zero precision or recall
        orderedItems[5, :] = -1
        precision, scoreInds = MCEvaluator.precisionAtK(X, orderedItems, k, verbose=True)
        recall, scoreInds = MCEvaluator.recallAtK(X, orderedItems, k, verbose=True)

        f1s = numpy.zeros(m)

        for i in range(m):
            if precision[i] + recall[i] != 0:
                f1s[i] = 2 * precision[i] * recall[i] / (precision[i] + recall[i])

        f1s2, scoreInds = MCEvaluator.f1AtK(X, orderedItems, k, verbose=True)

        nptst.assert_array_equal(f1s, f1s2)
Exemplo n.º 50
0
 def testGetOmegaList(self):
     import sppy 
     m = 10 
     n = 5
     X = scipy.sparse.rand(m, n, 0.1)
     X = X.tocsr()
     
     
     omegaList = SparseUtils.getOmegaList(X)
     for i in range(m): 
         nptst.assert_array_almost_equal(omegaList[i], X.toarray()[i, :].nonzero()[0])
     
     Xsppy = sppy.csarray(X)
     omegaList = SparseUtils.getOmegaList(Xsppy)
     
     for i in range(m):
         nptst.assert_array_almost_equal(omegaList[i], X.toarray()[i, :].nonzero()[0])
Exemplo n.º 51
0
    def testDiag(self):
        numRows = 10
        numCols = 10
        A = scipy.sparse.rand(numRows, numCols, 0.5, "csr")

        d = SparseUtils.diag(A)

        for i in range(numRows):
            self.assertEquals(d[i], A[i, i])
Exemplo n.º 52
0
    def testGetOmegaList(self):
        import sppy
        m = 10
        n = 5
        X = scipy.sparse.rand(m, n, 0.1)
        X = X.tocsr()

        omegaList = SparseUtils.getOmegaList(X)
        for i in range(m):
            nptst.assert_array_almost_equal(omegaList[i],
                                            X.toarray()[i, :].nonzero()[0])

        Xsppy = sppy.csarray(X)
        omegaList = SparseUtils.getOmegaList(Xsppy)

        for i in range(m):
            nptst.assert_array_almost_equal(omegaList[i],
                                            X.toarray()[i, :].nonzero()[0])
Exemplo n.º 53
0
    def testPruneMatrixCols(self):
        m = 30
        n = 20
        density = 0.5
        X = sppy.rand((m, n), density)
        X[X.nonzero()] = 1

        newX, rowInds = SparseUtils.pruneMatrixCols(X, maxNnz=10, verbose=True)

        nnzCols = numpy.zeros(n)
        for i in range(n):
            nnzCols[i] = X.toarray()[:, i].nonzero()[0].shape[0]

            if nnzCols[i] <= 10:
                self.assertTrue(i in rowInds)

        self.assertTrue((newX.sum(0) <= 10).all())

        newX, rowInds = SparseUtils.pruneMatrixCols(X, minNnz=10, verbose=True)

        nnzCols = numpy.zeros(n)
        for i in range(n):
            nnzCols[i] = X.toarray()[:, i].nonzero()[0].shape[0]

            if nnzCols[i] >= 10:
                self.assertTrue(i in rowInds)

        self.assertTrue((newX.sum(0) >= 10).all())

        newX, rowInds = SparseUtils.pruneMatrixCols(X,
                                                    minNnz=10,
                                                    maxNnz=15,
                                                    verbose=True)

        nnzCols = numpy.zeros(n)
        for i in range(n):
            nnzCols[i] = X.toarray()[:, i].nonzero()[0].shape[0]

            if nnzCols[i] >= 10 and nnzCols[i] <= 15:
                self.assertTrue(i in rowInds)

        self.assertTrue(
            numpy.logical_and(newX.sum(0) >= 10,
                              newX.sum(0) <= 15).all())
Exemplo n.º 54
0
    def testSampleUsers2(self): 
        m = 10
        n = 15
        r = 5 
        u = 0.3
        w = 1-u
        X, U, s, V, wv = SparseUtils.generateSparseBinaryMatrix((m,n), r, w, csarray=True, verbose=True, indsPerRow=200)

        k = X.nnz+100
        X2, userInds = Sampling.sampleUsers2(X, k)

        nptst.assert_array_equal(X.toarray(), X2.toarray())
        
        #Test pruning of cols 
        k = 500
        m = 100
        n = 500
        u = 0.1
        w = 1 - u
        X, U, s, V, wv = SparseUtils.generateSparseBinaryMatrix((m,n), r, w, csarray=True, verbose=True, indsPerRow=200)
        numpy.random.seed(21)
        X2, userInds = Sampling.sampleUsers2(X, k, prune=True)
        nnz1 = X2.nnz
        self.assertTrue((X2.sum(0)!=0).all())

        numpy.random.seed(21)
        X2, userInds = Sampling.sampleUsers2(X, k, prune=False)
        nnz2 = X2.nnz
        self.assertEquals(nnz1, nnz2)

        numRuns = 50
        for i in range(numRuns): 
            m = numpy.random.randint(10, 100)
            n = numpy.random.randint(10, 100)
            k = 500

            X, U, s, V, wv = SparseUtils.generateSparseBinaryMatrix((m,n), r, w, csarray=True, verbose=True, indsPerRow=200)

            X2, userInds = Sampling.sampleUsers2(X, k)
            

            self.assertTrue((X.dot(X.T)!=numpy.zeros((m, m)).all()))
            self.assertTrue((X2.toarray() == X.toarray()[userInds, :]).all())
            self.assertEquals(X.toarray()[userInds, :].nonzero()[0].shape[0], X2.nnz)
Exemplo n.º 55
0
    def __init__(self):
        numpy.random.seed(21)

        #Create a low rank matrix
        self.m = 1000
        self.n = 5000
        self.k = 10
        self.X = SparseUtils.generateSparseBinaryMatrix((self.m, self.n),
                                                        self.k,
                                                        csarray=True)
Exemplo n.º 56
0
    def testResize(self):
        numRows = 10
        numCols = 10

        A = scipy.sparse.rand(numRows, numCols, 0.1, "csr")

        B = SparseUtils.resize(A, (5, 5))

        self.assertEquals(B.shape, (5, 5))
        for i in range(5):
            for j in range(5):
                self.assertEquals(B[i, j], A[i, j])

        B = SparseUtils.resize(A, (15, 15))

        self.assertEquals(B.shape, (15, 15))
        self.assertEquals(B.nnz, A.nnz)
        for i in range(10):
            for j in range(10):
                self.assertEquals(B[i, j], A[i, j])
Exemplo n.º 57
0
    def profileGetOmegaList(self):
        shape = (20000, 15000)
        r = 50
        k = 1000000

        X = SparseUtils.generateSparseLowRank(shape, r, k)
        import sppy
        X = sppy.csarray(X)

        ProfileUtils.profile('SparseUtils.getOmegaList(X)', globals(),
                             locals())