Пример #1
0
 def testComputeR(self): 
     U = numpy.random.rand(10, 5)
     V = numpy.random.rand(15, 5)
     
     Z = U.dot(V.T)
     
     u = 1.0
     r = SparseUtilsCython.computeR(U, V, u, indsPerRow=1000)
            
     tol = 0.1
     self.assertTrue(numpy.linalg.norm(Z.max(1) - r)/numpy.linalg.norm(Z.max(1)) < tol)
     
     u = 0.0
     r = SparseUtilsCython.computeR(U, V, u, indsPerRow=1000)
     self.assertTrue(numpy.linalg.norm(Z.min(1) - r)/numpy.linalg.norm(Z.min(1)) < tol)
     
     u = 0.3
     r = SparseUtilsCython.computeR(U, V, u, indsPerRow=1000) 
     r2 = numpy.percentile(Z, u*100.0, 1)
     #nptst.assert_array_almost_equal(r, r2, 2)
     self.assertTrue(numpy.linalg.norm(r - r2)/numpy.linalg.norm(r) < tol)
     
     #Try a larger matrix 
     U = numpy.random.rand(100, 5)
     V = numpy.random.rand(105, 5)
     
     Z = U.dot(V.T)
     
     r = SparseUtilsCython.computeR(U, V, u) 
     r2 = numpy.percentile(Z, u*100.0, 1)
     
     self.assertTrue(numpy.linalg.norm(r-r2) < 0.5)
Пример #2
0
 def testPartialOuterProduct(self):
     m = 15        
     n = 10
     
     
     u = numpy.random.rand(m)
     v = numpy.random.rand(n)
     Y = numpy.outer(u, v)
     
     inds = numpy.nonzero(Y)
     rowInds = numpy.array(inds[0], numpy.int32)
     colInds = numpy.array(inds[1], numpy.int32)
     vals = SparseUtilsCython.partialOuterProduct(rowInds, colInds, u, v)
     X = numpy.reshape(vals, Y.shape)
     
     nptst.assert_almost_equal(X, Y)
     
     #Try just some indices 
     density = 0.2
     A = scipy.sparse.rand(n, n, density)
     inds = A.nonzero()
     rowInds = numpy.array(inds[0], numpy.int32)
     colInds = numpy.array(inds[1], numpy.int32)
     
     vals = SparseUtilsCython.partialOuterProduct(rowInds, colInds, u, v)
     
     for i in range(inds[0].shape[0]): 
         j = inds[0][i]
         k = inds[1][i]
         
         self.assertAlmostEquals(vals[i], Y[j, k])  
         
     
     self.assertEquals(A.nnz, inds[0].shape[0])
Пример #3
0
    def centerRows(X, mu=None, inds=None):
        """
        Simply subtract the mean value of a row from each non-zero element.
        """
        if inds == None:
            rowInds, colInds = X.nonzero()
        else:
            rowInds, colInds = inds

        rowInds = numpy.array(rowInds, numpy.int32)
        colInds = numpy.array(colInds, numpy.int32)

        if mu == None:
            #This is the mean of the nonzero values in each row
            nonZeroCounts = numpy.bincount(rowInds, minlength=X.shape[0])
            inds = nonZeroCounts==0
            nonZeroCounts += inds
            #This is required because when we do X.sum(1) for centering it uses the same
            #dtype as X to store the sum, and this can result in overflow for e.g. uint8
            if X.dtype == numpy.uint8:
                sumCol = SparseUtilsCython.sumCols(rowInds, numpy.array(X[rowInds, colInds]).flatten(), X.shape[0])
            else:
                sumCol = numpy.array(X.sum(1)).flatten()
            mu = sumCol/nonZeroCounts
            mu[inds] = 0

        vals = SparseUtilsCython.partialOuterProduct(rowInds, colInds, numpy.array(mu, numpy.float), numpy.ones(X.shape[1]))
        X[X.nonzero()] = numpy.array(X[X.nonzero()] - vals, numpy.float)

        return X, mu
Пример #4
0
    def centerRows(X, mu=None, inds=None):
        """
        Simply subtract the mean value of a row from each non-zero element.
        """
        if inds == None:
            rowInds, colInds = X.nonzero()
        else:
            rowInds, colInds = inds

        rowInds = numpy.array(rowInds, numpy.int32)
        colInds = numpy.array(colInds, numpy.int32)

        if mu == None:
            #This is the mean of the nonzero values in each row
            nonZeroCounts = numpy.bincount(rowInds, minlength=X.shape[0])
            inds = nonZeroCounts == 0
            nonZeroCounts += inds
            #This is required because when we do X.sum(1) for centering it uses the same
            #dtype as X to store the sum, and this can result in overflow for e.g. uint8
            if X.dtype == numpy.uint8:
                sumCol = SparseUtilsCython.sumCols(
                    rowInds,
                    numpy.array(X[rowInds, colInds]).flatten(), X.shape[0])
            else:
                sumCol = numpy.array(X.sum(1)).flatten()
            mu = sumCol / nonZeroCounts
            mu[inds] = 0

        vals = SparseUtilsCython.partialOuterProduct(
            rowInds, colInds, numpy.array(mu, numpy.float),
            numpy.ones(X.shape[1]))
        X[X.nonzero()] = numpy.array(X[X.nonzero()] - vals, numpy.float)

        return X, mu
Пример #5
0
 def testPartialReconstructValsPQ(self):
     n = 10
     Y = numpy.random.rand(n, n)
     
     U, s, V = numpy.linalg.svd(Y)
     V = V.T 
     
     V = numpy.ascontiguousarray(V)
     
     rowInds, colInds = numpy.nonzero(Y)  
     rowInds = numpy.array(rowInds, numpy.int32)
     colInds = numpy.array(colInds, numpy.int32)
     vals = SparseUtilsCython.partialReconstructValsPQ(rowInds, colInds, numpy.ascontiguousarray(U*s), V)
     X = numpy.reshape(vals, Y.shape)
     
     nptst.assert_almost_equal(X, Y)
     
     #Try just some indices 
     density = 0.2
     A = scipy.sparse.rand(n, n, density)
     inds = A.nonzero()
     rowInds = numpy.array(inds[0], numpy.int32)
     colInds = numpy.array(inds[1], numpy.int32)
     
     vals = SparseUtilsCython.partialReconstructValsPQ(rowInds, colInds, numpy.ascontiguousarray(U*s), V)
     
     for i in range(inds[0].shape[0]): 
         j = inds[0][i]
         k = inds[1][i]
         
         self.assertAlmostEquals(vals[i], Y[j, k])  
         
     
     self.assertEquals(A.nnz, inds[0].shape[0])
Пример #6
0
    def testGenerateSparseBinaryMatrix(self):
        m = 5 
        n = 10 
        k = 3
        quantile = 0.7
        numpy.random.seed(21)
        X = SparseUtils.generateSparseBinaryMatrix((m,n), k, quantile)
        Xscipy = numpy.array(X.todense()) 
        
        nptst.assert_array_equal(numpy.array(X.sum(1)).flatten(), numpy.ones(m)*3)
        
        quantile = 0.0 
        X = SparseUtils.generateSparseBinaryMatrix((m,n), k, quantile)
        self.assertTrue(numpy.linalg.norm(X - numpy.ones((m,n))) < 1.1)
        #nptst.assert_array_almost_equal(X.todense(), numpy.ones((m,n)))
        
        quantile = 0.7
        numpy.random.seed(21)
        X = SparseUtils.generateSparseBinaryMatrix((m,n), k, quantile, csarray=True)
        Xcsarray = X.toarray()
        
        nptst.assert_array_equal(numpy.array(X.sum(1)).flatten(), numpy.ones(m)*3)
        
        quantile = 0.0 
        X = SparseUtils.generateSparseBinaryMatrix((m,n), k, quantile, csarray=True)
        self.assertTrue(numpy.linalg.norm(X.toarray() - numpy.ones((m,n))) < 1.1)
        #nptst.assert_array_almost_equal(X.toarray(), numpy.ones((m,n)))
        
        nptst.assert_array_equal(Xcsarray, Xscipy)
        
        #Test variation in the quantiles 
        w = 0.7
        X, U, s, V, wv = SparseUtils.generateSparseBinaryMatrix((m,n), k, w, sd=0.1, csarray=True, verbose=True)
        
        Z = (U*s).dot(V.T)
        X2 = numpy.zeros((m, n))
        r2 = numpy.zeros(m)
        for i in range(m): 
            r2[i] = numpy.percentile(numpy.sort(Z[i, :]), wv[i]*100)
            X2[i, Z[i, :]>r2[i]] = 1 
        r = SparseUtilsCython.computeR2(U*s, V, wv)

        nptst.assert_array_almost_equal(X.toarray(), X2)
        nptst.assert_array_almost_equal(r, r2)
        
        #Test a larger standard deviation
        w = 0.7
        X, U, s, V, wv = SparseUtils.generateSparseBinaryMatrix((m,n), k, w, sd=0.5, csarray=True, verbose=True)
        
        Z = (U*s).dot(V.T)
        X2 = numpy.zeros((m, n))
        r2 = numpy.zeros(m)
        for i in range(m): 
            r2[i] = numpy.percentile(numpy.sort(Z[i, :]), wv[i]*100)
            X2[i, Z[i, :]>=r2[i]] = 1 
        r = SparseUtilsCython.computeR2(U*s, V, wv)

        nptst.assert_array_almost_equal(X.toarray(), X2)
        nptst.assert_array_almost_equal(r, r2)
Пример #7
0
    def uncenter(X, mu1, mu2):
        """
        Uncenter a matrix with mu1 and mu2, the row and columns means of the original
        matrix. X is the centered matrix.
        """
        rowInds, colInds = X.nonzero()
        rowInds = numpy.array(rowInds, numpy.int32)
        colInds = numpy.array(colInds, numpy.int32)

        vals1 = SparseUtilsCython.partialOuterProduct(rowInds, colInds, numpy.array(mu1, numpy.float), numpy.ones(X.shape[1]))
        vals2 = SparseUtilsCython.partialOuterProduct(rowInds, colInds, numpy.ones(X.shape[0]), numpy.array(mu2, numpy.float))
        X[rowInds, colInds] = X[rowInds, colInds] + vals1 + vals2

        return X
Пример #8
0
 def testCenterRowsCsarray(self):
     
     numRuns = 10        
     
     for i in range(numRuns): 
         density = numpy.random.rand()
         m = numpy.random.randint(10, 100) 
         n = numpy.random.randint(10, 100) 
         X = sppy.rand((m,n), density)
         
 
         SparseUtilsCython.centerRowsCsarray(X)
 
         
         nptst.assert_array_almost_equal(X.sum(1), numpy.zeros(m))
Пример #9
0
    def unshrink(self, X, U, V):
        """
        Perform post-processing on a factorisation of a matrix X use factor 
        vectors U and V. 
        """
        logging.debug("Post processing singular values")

        #Fix for versions of numpy < 1.7
        inds = numpy.unique(
            numpy.random.randint(
                0, X.data.shape[0],
                numpy.min([self.postProcessSamples, X.data.shape[0]])))
        a = numpy.array(X[X.nonzero()]).ravel()[inds]

        B = numpy.zeros((a.shape[0], U.shape[1]))

        rowInds, colInds = X.nonzero()
        rowInds = numpy.array(rowInds[inds], numpy.int32)
        colInds = numpy.array(colInds[inds], numpy.int32)

        #Populate B
        for i in range(U.shape[1]):
            B[:, i] = SparseUtilsCython.partialOuterProduct(
                rowInds, colInds, U[:, i], V[:, i])

        s = numpy.linalg.pinv(B.T.dot(B)).dot(B.T).dot(a)

        return s
Пример #10
0
    def reconstructLowRank(U, s, V, k):
        """
        Take the SVD of a low rank matrix and partially compute it with at most
        k values. If k is an array of values [0, U.shape[0]*V.shape[0]] then these
        indices are used for reconstruction.
        """
        (m, n) = (U.shape[0], V.shape[0])

        if type(k) == numpy.ndarray:
            inds = k
            inds = numpy.unique(inds)
            rowInds, colInds = numpy.unravel_index(inds, (m, n))
        elif type(k) == tuple:
            rowInds, colInds = k
        else:
            inds = numpy.random.randint(0, n * m, k)
            inds = numpy.unique(inds)
            rowInds, colInds = numpy.unravel_index(inds, (m, n))

        U = numpy.ascontiguousarray(U)
        V = numpy.ascontiguousarray(V)
        X = SparseUtilsCython.partialReconstructPQ((rowInds, colInds), U * s,
                                                   V)

        return X
Пример #11
0
    def localAUCApprox(positiveArray,
                       U,
                       V,
                       w,
                       numAucSamples=50,
                       r=None,
                       allArray=None):
        """
        Compute the estimated local AUC for the score functions UV^T relative to X with 
        quantile w. The AUC is computed using positiveArray which is a tuple (indPtr, colInds)
        assuming allArray is None. If allArray is not None then positive items are chosen 
        from positiveArray and negative ones are chosen to complement allArray.
        """

        if type(positiveArray) != tuple:
            positiveArray = SparseUtils.getOmegaListPtr(positiveArray)

        indPtr, colInds = positiveArray
        U = numpy.ascontiguousarray(U)
        V = numpy.ascontiguousarray(V)

        if r is None:
            r = SparseUtilsCython.computeR(U, V, w, numAucSamples)

        if allArray is None:
            return MCEvaluatorCython.localAUCApprox(indPtr, colInds, indPtr,
                                                    colInds, U, V,
                                                    numAucSamples, r)
        else:
            allIndPtr, allColInd = allArray
            return MCEvaluatorCython.localAUCApprox(indPtr, colInds, allIndPtr,
                                                    allColInd, U, V,
                                                    numAucSamples, r)
Пример #12
0
 def generateSparseBinaryMatrix(shape, p, w=0.9, sd=0, csarray=False, verbose=False, indsPerRow=50):
     """
     Create an underlying matrix Z = UsV.T of rank p and then go through each row 
     and threshold so that a proportion quantile numbers are kept. The final matrix 
     is a 0/1 matrix. We order each row of Z in ascending order and then keep those bigger 
     than u. In other words w=0 keeps all numbers and w=1.0 keeps none. 
     """
     m, n = shape
     U, s, V = SparseUtils.generateLowRank(shape, p)
     
     X = (U*s).dot(V.T)
     
     wv = numpy.random.randn(m)*sd + w
     wv = numpy.clip(wv, 0, 1)
     r = SparseUtilsCython.computeR2((U*s), V, wv, indsPerRow=indsPerRow)
     
     for i in range(m):
         X[i, X[i, :] >= r[i]] = 1
         X[i, X[i, :] < r[i]] = 0
     
     if csarray:
         import sppy
         X = sppy.csarray(X, storagetype="row")
     else:
         X = scipy.sparse.csr_matrix(X)
         
     if verbose: 
         return X, U, s, V, wv 
     else: 
         return X
Пример #13
0
    def centerCols(X, mu=None, inds=None):
        """
        Simply subtract the mean value of a row from each non-zero element.
        """
        if inds == None:
            rowInds, colInds = X.nonzero()
        else:
            rowInds, colInds = inds
        rowInds = numpy.array(rowInds, numpy.int32)
        colInds = numpy.array(colInds, numpy.int32)

        if mu == None:
            #This is the mean of the nonzero values in each col
            nonZeroCounts = numpy.bincount(colInds, minlength=X.shape[1])
            inds = nonZeroCounts == 0
            nonZeroCounts += inds
            mu = numpy.array(X.sum(0), numpy.float).ravel() / nonZeroCounts
            mu[inds] = 0

        vals = SparseUtilsCython.partialOuterProduct(
            rowInds, colInds, numpy.ones(X.shape[0]),
            numpy.array(mu, numpy.float))
        X[X.nonzero()] = numpy.array(X[X.nonzero()] - vals, numpy.float)

        return X, mu
Пример #14
0
    def uncenter(X, mu1, mu2):
        """
        Uncenter a matrix with mu1 and mu2, the row and columns means of the original
        matrix. X is the centered matrix.
        """
        rowInds, colInds = X.nonzero()
        rowInds = numpy.array(rowInds, numpy.int32)
        colInds = numpy.array(colInds, numpy.int32)

        vals1 = SparseUtilsCython.partialOuterProduct(
            rowInds, colInds, numpy.array(mu1, numpy.float),
            numpy.ones(X.shape[1]))
        vals2 = SparseUtilsCython.partialOuterProduct(
            rowInds, colInds, numpy.ones(X.shape[0]),
            numpy.array(mu2, numpy.float))
        X[rowInds, colInds] = X[rowInds, colInds] + vals1 + vals2

        return X
Пример #15
0
 def testSumCols(self): 
     A = scipy.sparse.rand(10, 15, 0.5)*10
     A = scipy.sparse.csc_matrix(A, dtype=numpy.uint8)
     
     rowInds, colInds = A.nonzero()  
     rowInds = numpy.array(rowInds, numpy.int32)
     colInds = numpy.array(colInds, numpy.int32)
     
     sumCol = SparseUtilsCython.sumCols(rowInds, numpy.array(A[rowInds, colInds]).flatten(), A.shape[0])
     nptst.assert_array_equal(numpy.array(A.sum(1)).flatten(), sumCol) 
Пример #16
0
    def testStratifiedRecallAtk(self):
        m = 20
        n = 50
        r = 3
        alpha = 1

        X, U, V = SparseUtilsCython.generateSparseBinaryMatrixPL((m, n),
                                                                 r,
                                                                 density=0.2,
                                                                 alpha=alpha,
                                                                 csarray=True)

        itemCounts = numpy.array(X.sum(0) + 1, numpy.int32)

        (indPtr, colInds) = X.nonzeroRowsPtr()

        indPtr = numpy.array(indPtr, numpy.uint32)
        colInds = numpy.array(colInds, numpy.uint32)

        k = 5
        orderedItems = numpy.random.randint(0, n, m * k)
        orderedItems = numpy.reshape(orderedItems, (m, k))
        orderedItems = numpy.array(orderedItems, numpy.int32)
        beta = 0.5

        recalls, denominators = MCEvaluatorCython.stratifiedRecallAtk(
            indPtr, colInds, orderedItems, itemCounts, beta)

        recalls2 = numpy.zeros(m)

        #Now compute recalls from scratch
        for i in range(m):
            omegai = colInds[indPtr[i]:indPtr[i + 1]]

            numerator = 0
            for j in range(k):
                if orderedItems[i, j] in omegai:
                    numerator += 1 / itemCounts[orderedItems[i, j]]**beta

            denominator = 0

            for j in omegai:
                denominator += 1 / itemCounts[j]**beta

            recalls2[i] = numerator / denominator

        nptst.assert_array_equal(recalls, recalls2)

        #Now try to match with normal recall
        itemCounts = numpy.ones(n, numpy.int32)
        recalls, denominators = MCEvaluatorCython.stratifiedRecallAtk(
            indPtr, colInds, orderedItems, itemCounts, beta)
        recalls2 = MCEvaluatorCython.recallAtk(indPtr, colInds, orderedItems)

        nptst.assert_array_equal(recalls, recalls2)
Пример #17
0
    def testStratifiedRecallAtk(self): 
        m = 20 
        n = 50 
        r = 3     
        alpha = 1
        
        X, U, V = SparseUtilsCython.generateSparseBinaryMatrixPL((m,n), r, density=0.2, alpha=alpha, csarray=True)
        
        itemCounts = numpy.array(X.sum(0)+1, numpy.int32) 
        
        (indPtr, colInds) = X.nonzeroRowsPtr()
        
        indPtr = numpy.array(indPtr, numpy.uint32)
        colInds = numpy.array(colInds, numpy.uint32)
        
        k = 5
        orderedItems = numpy.random.randint(0, n, m*k)
        orderedItems = numpy.reshape(orderedItems, (m, k))
        orderedItems = numpy.array(orderedItems, numpy.int32)        
        beta = 0.5
        
        recalls, denominators = MCEvaluatorCython.stratifiedRecallAtk(indPtr, colInds, orderedItems, itemCounts, beta)
        
        
        recalls2 = numpy.zeros(m)        
            
        #Now compute recalls from scratch 
        for i in range(m):
            omegai = colInds[indPtr[i]:indPtr[i+1]]            
            
            numerator = 0 
            for j in range(k):
                if orderedItems[i, j] in omegai: 
                    numerator += 1/itemCounts[orderedItems[i, j]]**beta
            
            denominator = 0

            for j in omegai: 
                denominator += 1/itemCounts[j]**beta
                
            recalls2[i] = numerator/denominator
            
        nptst.assert_array_equal(recalls, recalls2)
                                
                
        #Now try to match with normal recall 
        itemCounts = numpy.ones(n, numpy.int32)
        recalls, denominators = MCEvaluatorCython.stratifiedRecallAtk(indPtr, colInds, orderedItems, itemCounts, beta)
        recalls2 = MCEvaluatorCython.recallAtk(indPtr, colInds, orderedItems)
        
        nptst.assert_array_equal(recalls, recalls2)
Пример #18
0
    def testGenerateSparseBinaryMatrixPL(self):
        m = 200 
        n = 100 
        k = 3
        density = 0.1
        numpy.random.seed(21)
        X, U, V = SparseUtilsCython.generateSparseBinaryMatrixPL((m,n), k, density=density, csarray=True)       

        #Just check that the distributions are roughtly power law 
        print(numpy.histogram(X.sum(0)))        
        print(numpy.histogram(X.sum(1)))  
        
        self.assertAlmostEqual(X.nnz/float(m*n), density, 2)
        self.assertEquals(X.shape, (m, n))
Пример #19
0
    def uncenterRows(X, mu):
        """
        Take a matrix with rows centered using mu, and return them to their original
        state. Note that one should call X.eliminate_zeros() beforehand.
        """
        if X.shape[0] != mu.shape[0]:
            raise ValueError("Invalid number of rows")

        rowInds, colInds = X.nonzero()
        rowInds = numpy.array(rowInds, numpy.int32)
        colInds = numpy.array(colInds, numpy.int32)

        vals = SparseUtilsCython.partialOuterProduct(rowInds, colInds, numpy.array(mu, numpy.float), numpy.ones(X.shape[1]))
        X[rowInds, colInds] = numpy.array(X[rowInds, colInds] + vals, numpy.float)

        return X
Пример #20
0
    def reconstructLowRankPQ(P, Q, inds):
        """
        Given an array of unique indices inds in [0, U.shape[0]*V.shape[0]-1],
        partially reconstruct $P*Q^T$. The returned matrix is a scipy csc_matrix.
        """
        (m, n) = (P.shape[0], Q.shape[0])

        if type(inds) == tuple:
            rowInds, colInds = inds
            rowInds = numpy.array(rowInds, numpy.int)
            colInds = numpy.array(colInds, numpy.int)
        else:
            rowInds, colInds = numpy.unravel_index(inds, (m, n))

        X = SparseUtilsCython.partialReconstructPQ((rowInds, colInds), P, Q)

        return X
Пример #21
0
def localAucsLmbdas(args): 
    trainX, testX, testOmegaList, learner  = args 
    
    (m, n) = trainX.shape
                        
    localAucs = numpy.zeros(learner.lmbdas.shape[0])

    for j, lmbda in enumerate(learner.lmbdas): 
        learner.lmbda = lmbda 
        
        U, V = learner.learnModel(trainX)
        
        r = SparseUtilsCython.computeR(U, V, 1-learner.u, learner.numAucSamples)
        localAucs[j] = MCEvaluator.localAUCApprox(testX, U, V, testOmegaList, learner.numAucSamples, r) 
        logging.debug("Local AUC: " + str(localAucs[j]) + " with k = " + str(learner.k) + " and lmbda= " + str(learner.lmbda))
        
    return localAucs
Пример #22
0
    def reconstructLowRankPQ(P, Q, inds):
        """
        Given an array of unique indices inds in [0, U.shape[0]*V.shape[0]-1],
        partially reconstruct $P*Q^T$. The returned matrix is a scipy csc_matrix.
        """
        (m, n) = (P.shape[0], Q.shape[0])

        if type(inds) == tuple:
            rowInds, colInds = inds
            rowInds = numpy.array(rowInds, numpy.int)
            colInds = numpy.array(colInds, numpy.int)
        else:
            rowInds, colInds = numpy.unravel_index(inds, (m, n))

        X = SparseUtilsCython.partialReconstructPQ((rowInds, colInds), P, Q)

        return X
Пример #23
0
    def testComputeR2(self): 
        m = 10 
        n = 15
        U = numpy.random.rand(m, 5)
        V = numpy.random.rand(n, 5)
        
        Z = U.dot(V.T)
        
        w = numpy.ones(m)*1.0
        r = SparseUtilsCython.computeR2(U, V, w, indsPerRow=1000)
               
        tol = 0.1
        self.assertTrue(numpy.linalg.norm(Z.max(1) - r)/numpy.linalg.norm(Z.max(1)) < tol)
        
        w =  numpy.zeros(m)
        r = SparseUtilsCython.computeR2(U, V, w, indsPerRow=1000)
        self.assertTrue(numpy.linalg.norm(Z.min(1) - r)/numpy.linalg.norm(Z.min(1)) < tol)
        
        w = numpy.zeros(m)
        w[5:10] = 1
        r = SparseUtilsCython.computeR2(U, V, w, indsPerRow=1000)
        self.assertTrue(numpy.linalg.norm(Z[0:5, :].min(1) - r[0:5])/numpy.linalg.norm(Z[0:5, :].min(1)) < tol)
        self.assertTrue(numpy.linalg.norm(Z[5:, :].max(1) - r[5:])/numpy.linalg.norm(Z[5:, :].min(1)) < tol)
        
        w =  numpy.ones(m)*0.3
        r = SparseUtilsCython.computeR2(U, V, w, indsPerRow=1000) 
        r2 = numpy.zeros(m)
        for i in range(m): 
            r2[i] = numpy.percentile(Z[i, :], w[i]*100.0)
        self.assertTrue(numpy.linalg.norm(r2 - r)/numpy.linalg.norm(r2) < tol)
        
        w =  numpy.random.rand(m)
        r = SparseUtilsCython.computeR2(U, V, w) 
        r2 = numpy.zeros(m)

        for i in range(m): 
            r2[i] = numpy.percentile(Z[i, :], w[i]*100.0)
        self.assertTrue(numpy.linalg.norm(r2 - r)/numpy.linalg.norm(r2) < tol)       
        
        #Try a larger matrix 
        m = 100 
        n = 105
        U = numpy.random.rand(m, 5)
        V = numpy.random.rand(n, 5)
        
        Z = U.dot(V.T)
        w =  numpy.random.rand(m)
        r = SparseUtilsCython.computeR2(U, V, w, indsPerRow=10000) 
        r2 = numpy.zeros(m) 
        for i in range(m): 
            r2[i] = numpy.percentile(Z[i, :], w[i]*100.0)
        
        self.assertTrue(numpy.linalg.norm(r-r2) < 0.4)
Пример #24
0
    def uncenterRows(X, mu):
        """
        Take a matrix with rows centered using mu, and return them to their original
        state. Note that one should call X.eliminate_zeros() beforehand.
        """
        if X.shape[0] != mu.shape[0]:
            raise ValueError("Invalid number of rows")

        rowInds, colInds = X.nonzero()
        rowInds = numpy.array(rowInds, numpy.int32)
        colInds = numpy.array(colInds, numpy.int32)

        vals = SparseUtilsCython.partialOuterProduct(
            rowInds, colInds, numpy.array(mu, numpy.float),
            numpy.ones(X.shape[1]))
        X[rowInds, colInds] = numpy.array(X[rowInds, colInds] + vals,
                                          numpy.float)

        return X
Пример #25
0
def localAucsLmbdas(args):
    trainX, testX, testOmegaList, learner = args

    (m, n) = trainX.shape

    localAucs = numpy.zeros(learner.lmbdas.shape[0])

    for j, lmbda in enumerate(learner.lmbdas):
        learner.lmbda = lmbda

        U, V = learner.learnModel(trainX)

        r = SparseUtilsCython.computeR(U, V, 1 - learner.u,
                                       learner.numAucSamples)
        localAucs[j] = MCEvaluator.localAUCApprox(testX, U, V, testOmegaList,
                                                  learner.numAucSamples, r)
        logging.debug("Local AUC: " + str(localAucs[j]) + " with k = " +
                      str(learner.k) + " and lmbda= " + str(learner.lmbda))

    return localAucs
Пример #26
0
 def testPartialReconstructValsPQ2(self): 
     numRuns = 10         
     
     for i in range(numRuns): 
         m = numpy.random.randint(5, 50)
         n = numpy.random.randint(5, 50)
         Y = numpy.random.rand(m, n)
         
         U, s, V = numpy.linalg.svd(Y,  full_matrices=0)
         V = V.T 
         
         V = numpy.ascontiguousarray(V)
         
         rowInds, colInds = numpy.nonzero(Y)  
         rowInds = numpy.array(rowInds, numpy.int32)
         colInds = numpy.array(colInds, numpy.int32)
         #print(U.shape, V.shape)
         vals = SparseUtilsCython.partialReconstructValsPQ(rowInds, colInds, numpy.ascontiguousarray(U*s), V)
         X = numpy.reshape(vals, Y.shape)
         
         nptst.assert_almost_equal(X, Y)
Пример #27
0
    def localAUCApprox2(X, U, V, w, numAucSamples=50, omegaList=None):
        """
        Compute the estimated local AUC for the score functions UV^T relative to X with 
        quantile w. 
        """
        #For now let's compute the full matrix
        Z = U.dot(V.T)

        localAuc = numpy.zeros(X.shape[0])
        allInds = numpy.arange(X.shape[1])

        U = numpy.ascontiguousarray(U)
        V = numpy.ascontiguousarray(V)

        r = SparseUtilsCython.computeR(U, V, w, numAucSamples)

        if omegaList == None:
            omegaList = SparseUtils.getOmegaList(X)

        for i in range(X.shape[0]):
            omegai = omegaList[i]
            omegaBari = numpy.setdiff1d(allInds, omegai, assume_unique=True)

            if omegai.shape[0] * omegaBari.shape[0] != 0:
                partialAuc = 0

                for j in range(numAucSamples):
                    ind = numpy.random.randint(omegai.shape[0] *
                                               omegaBari.shape[0])
                    p = omegai[int(ind / omegaBari.shape[0])]
                    q = omegaBari[ind % omegaBari.shape[0]]

                    if Z[i, p] > Z[i, q] and Z[i, p] > r[i]:
                        partialAuc += 1

                localAuc[i] = partialAuc / float(numAucSamples)

        localAuc = localAuc.mean()

        return localAuc
Пример #28
0
    def localAUCApprox2(X, U, V, w, numAucSamples=50, omegaList=None): 
        """
        Compute the estimated local AUC for the score functions UV^T relative to X with 
        quantile w. 
        """
        #For now let's compute the full matrix 
        Z = U.dot(V.T)
        
        localAuc = numpy.zeros(X.shape[0]) 
        allInds = numpy.arange(X.shape[1])
        
        U = numpy.ascontiguousarray(U)
        V = numpy.ascontiguousarray(V)
        
        r = SparseUtilsCython.computeR(U, V, w, numAucSamples)
        
        if omegaList==None: 
            omegaList = SparseUtils.getOmegaList(X)

        for i in range(X.shape[0]): 
            omegai = omegaList[i]
            omegaBari = numpy.setdiff1d(allInds, omegai, assume_unique=True)
            
            if omegai.shape[0] * omegaBari.shape[0] != 0: 
                partialAuc = 0 

                for j in range(numAucSamples):
                    ind = numpy.random.randint(omegai.shape[0]*omegaBari.shape[0])
                    p = omegai[int(ind/omegaBari.shape[0])] 
                    q = omegaBari[ind % omegaBari.shape[0]]   
                    
                    if Z[i, p] > Z[i, q] and Z[i, p] > r[i]: 
                        partialAuc += 1 
                            
                localAuc[i] = partialAuc/float(numAucSamples)
          
        localAuc = localAuc.mean()        
        
        return localAuc        
Пример #29
0
    def localAUC(positiveArray, U, V, w, numRowInds=None):
        """
        Compute the local AUC for the score functions UV^T relative to X with 
        quantile w. 
        """
        if numRowInds == None:
            numRowInds = V.shape[0]

        if type(positiveArray) != tuple:
            positiveArray = SparseUtils.getOmegaListPtr(positiveArray)

        #For now let's compute the full matrix
        Z = U.dot(V.T)

        r = SparseUtilsCython.computeR(U, V, w, numRowInds)

        localAuc = numpy.zeros(U.shape[0])
        allInds = numpy.arange(V.shape[0])
        indPtr, colInds = positiveArray

        for i in range(U.shape[0]):
            omegai = colInds[indPtr[i]:indPtr[i + 1]]
            omegaBari = numpy.setdiff1d(allInds, omegai, assume_unique=True)

            if omegai.shape[0] * omegaBari.shape[0] != 0:
                partialAuc = 0

                for p in omegai:
                    for q in omegaBari:
                        if Z[i, p] > Z[i, q] and Z[i, p] > r[i]:
                            partialAuc += 1

                localAuc[i] = partialAuc / float(
                    omegai.shape[0] * omegaBari.shape[0])

        localAuc = localAuc.mean()

        return localAuc
Пример #30
0
 def localAUCApprox(positiveArray, U, V, w, numAucSamples=50, r=None, allArray=None): 
     """
     Compute the estimated local AUC for the score functions UV^T relative to X with 
     quantile w. The AUC is computed using positiveArray which is a tuple (indPtr, colInds)
     assuming allArray is None. If allArray is not None then positive items are chosen 
     from positiveArray and negative ones are chosen to complement allArray.
     """
     
     if type(positiveArray) != tuple: 
         positiveArray = SparseUtils.getOmegaListPtr(positiveArray)          
     
     indPtr, colInds = positiveArray
     U = numpy.ascontiguousarray(U)
     V = numpy.ascontiguousarray(V)        
     
     if r is None: 
         r = SparseUtilsCython.computeR(U, V, w, numAucSamples)
     
     if allArray is None: 
         return MCEvaluatorCython.localAUCApprox(indPtr, colInds, indPtr, colInds, U, V, numAucSamples, r)
     else:
         allIndPtr, allColInd = allArray
         return MCEvaluatorCython.localAUCApprox(indPtr, colInds, allIndPtr, allColInd, U, V, numAucSamples, r)
Пример #31
0
    def centerCols(X, mu=None, inds=None):
        """
        Simply subtract the mean value of a row from each non-zero element.
        """
        if inds == None:
            rowInds, colInds = X.nonzero()
        else:
            rowInds, colInds = inds
        rowInds = numpy.array(rowInds, numpy.int32)
        colInds = numpy.array(colInds, numpy.int32)

        if mu == None:
            #This is the mean of the nonzero values in each col
            nonZeroCounts = numpy.bincount(colInds, minlength=X.shape[1])
            inds = nonZeroCounts==0
            nonZeroCounts += inds
            mu = numpy.array(X.sum(0), numpy.float).ravel()/nonZeroCounts
            mu[inds] = 0

        vals = SparseUtilsCython.partialOuterProduct(rowInds, colInds, numpy.ones(X.shape[0]), numpy.array(mu, numpy.float))
        X[X.nonzero()] = numpy.array(X[X.nonzero()] - vals, numpy.float)

        return X, mu
Пример #32
0
 def localAUC(positiveArray, U, V, w, numRowInds=None): 
     """
     Compute the local AUC for the score functions UV^T relative to X with 
     quantile w. 
     """
     if numRowInds == None: 
         numRowInds = V.shape[0]
         
     if type(positiveArray) != tuple: 
         positiveArray = SparseUtils.getOmegaListPtr(positiveArray)  
     
     #For now let's compute the full matrix 
     Z = U.dot(V.T)
     
     r = SparseUtilsCython.computeR(U, V, w, numRowInds)
     
     localAuc = numpy.zeros(U.shape[0]) 
     allInds = numpy.arange(V.shape[0])
     indPtr, colInds = positiveArray
     
     for i in range(U.shape[0]): 
         omegai = colInds[indPtr[i]:indPtr[i+1]]
         omegaBari = numpy.setdiff1d(allInds, omegai, assume_unique=True)
         
         if omegai.shape[0] * omegaBari.shape[0] != 0: 
             partialAuc = 0                
             
             for p in omegai: 
                 for q in omegaBari: 
                     if Z[i, p] > Z[i, q] and Z[i, p] > r[i]: 
                         partialAuc += 1 
                         
             localAuc[i] = partialAuc/float(omegai.shape[0] * omegaBari.shape[0])
     
     localAuc = localAuc.mean()        
     
     return localAuc
Пример #33
0
    def generateSparseBinaryMatrix(shape,
                                   p,
                                   w=0.9,
                                   sd=0,
                                   csarray=False,
                                   verbose=False,
                                   indsPerRow=50):
        """
        Create an underlying matrix Z = UsV.T of rank p and then go through each row 
        and threshold so that a proportion quantile numbers are kept. The final matrix 
        is a 0/1 matrix. We order each row of Z in ascending order and then keep those bigger 
        than u. In other words w=0 keeps all numbers and w=1.0 keeps none. 
        """
        m, n = shape
        U, s, V = SparseUtils.generateLowRank(shape, p)

        X = (U * s).dot(V.T)

        wv = numpy.random.randn(m) * sd + w
        wv = numpy.clip(wv, 0, 1)
        r = SparseUtilsCython.computeR2((U * s), V, wv, indsPerRow=indsPerRow)

        for i in range(m):
            X[i, X[i, :] >= r[i]] = 1
            X[i, X[i, :] < r[i]] = 0

        if csarray:
            import sppy
            X = sppy.csarray(X, storagetype="row")
        else:
            X = scipy.sparse.csr_matrix(X)

        if verbose:
            return X, U, s, V, wv
        else:
            return X
Пример #34
0
    def profileLocalAucApprox(self):
        m = 500
        n = 1000
        k = 10
        X, U, s, V = SparseUtils.generateSparseBinaryMatrix((m, n),
                                                            k,
                                                            csarray=True,
                                                            verbose=True)

        u = 0.1
        w = 1 - u
        numAucSamples = 200

        omegaList = SparseUtils.getOmegaList(X)
        r = SparseUtilsCython.computeR(U, V, w, numAucSamples)

        numRuns = 10

        def run():
            for i in range(numRuns):
                MCEvaluator.localAUCApprox(X, U, V, omegaList, numAucSamples,
                                           r)

        ProfileUtils.profile('run()', globals(), locals())
Пример #35
0
    def reconstructLowRank(U, s, V, k):
        """
        Take the SVD of a low rank matrix and partially compute it with at most
        k values. If k is an array of values [0, U.shape[0]*V.shape[0]] then these
        indices are used for reconstruction.
        """
        (m, n) = (U.shape[0], V.shape[0])

        if type(k) == numpy.ndarray:
            inds = k
            inds = numpy.unique(inds)
            rowInds, colInds = numpy.unravel_index(inds, (m, n))
        elif type(k) == tuple:
            rowInds, colInds = k
        else:
            inds = numpy.random.randint(0, n*m, k)
            inds = numpy.unique(inds)
            rowInds, colInds = numpy.unravel_index(inds, (m, n))

        U = numpy.ascontiguousarray(U)
        V = numpy.ascontiguousarray(V)
        X = SparseUtilsCython.partialReconstructPQ((rowInds, colInds), U*s, V)

        return X
Пример #36
0
 def unshrink(self, X, U, V): 
     """
     Perform post-processing on a factorisation of a matrix X use factor 
     vectors U and V. 
     """
     logging.debug("Post processing singular values")
            
     #Fix for versions of numpy < 1.7 
     inds = numpy.unique(numpy.random.randint(0, X.data.shape[0], numpy.min([self.postProcessSamples, X.data.shape[0]]))) 
     a = numpy.array(X[X.nonzero()]).ravel()[inds]
         
     B = numpy.zeros((a.shape[0], U.shape[1])) 
         
     rowInds, colInds = X.nonzero() 
     rowInds = numpy.array(rowInds[inds], numpy.int32)
     colInds = numpy.array(colInds[inds], numpy.int32)  
     
     #Populate B 
     for i in range(U.shape[1]): 
         B[:, i] = SparseUtilsCython.partialOuterProduct(rowInds, colInds, U[:, i], V[:, i])
     
     s = numpy.linalg.pinv(B.T.dot(B)).dot(B.T).dot(a)
     
     return s 
Пример #37
0
    def learnModel(self, X, fullMatrices=True):
        """
        Learn the matrix completion using a sparse matrix X. This is the simple 
        version of the soft impute algorithm in which we store the entire 
        matrices, newZ and oldZ. 
        """
        if not scipy.sparse.isspmatrix_csc(X):
            raise ValueError("Input matrix must be csc_matrix")
            
        (n, m) = X.shape
        oldU = numpy.zeros((n, 1))
        oldS = numpy.zeros(1)
        oldV = numpy.zeros((m, 1))
        omega = X.nonzero()
        tol = 10**-6
        
        rowInds = numpy.array(omega[0], numpy.int)
        colInds = numpy.array(omega[1], numpy.int)
         
        ZList = []
        
        for rho in self.rhos:
            gamma = self.eps + 1
            i = 0
            
            Y = scipy.sparse.csc_matrix(X, dtype=numpy.float)
            U, s, V = ExpSU.SparseUtils.svdArpack(Y, 1, kmax=20)
            lmbda = rho*numpy.max(s)
            
            while gamma > self.eps:
                ZOmega = SparseUtilsCython.partialReconstructPQ((rowInds, colInds), oldU*oldS, oldV)
                Y = X - ZOmega
                Y = Y.tocsc()

                newU, newS, newV = ExpSU.SparseUtils.svdSparseLowRank(Y, oldU, oldS, oldV)
        
                #Soft threshold 
                newS = newS - lmbda
                newS = numpy.clip(newS, 0, numpy.max(newS))
                
                
                normOldZ = (oldS**2).sum()
                normNewZmOldZ = (oldS**2).sum() + (newS**2).sum() - 2*numpy.trace((oldV.T.dot(newV*newS)).dot(newU.T.dot(oldU*oldS)))
                
                #We can get newZ == oldZ in which case we break
                if normNewZmOldZ < tol: 
                    gamma = 0
                elif abs(normOldZ) < tol:
                    gamma = self.eps + 1 
                else: 
                    gamma = normNewZmOldZ/normOldZ
                
                oldU = newU.copy() 
                oldS = newS.copy() 
                oldV = newV.copy() 
                
                logging.debug("Iteration " + str(i) + " gamma="+str(gamma)) 
                i += 1 
                
            logging.debug("Number of iterations for lambda="+str(rho) + ": " + str(i))
            
            if fullMatrices: 
                newZ = scipy.sparse.lil_matrix((newU*newS).dot(newV.T))
                ZList.append(newZ)
            else: 
                ZList.append((newU,newS,newV))
        
        if self.rhos.shape[0] != 1:
            return ZList
        else:
            return ZList[0]
Пример #38
0
    def generateMatrices(self):
        """
        This function returns a list of 20 train/test matrices for incremental 
        collaborative filtering. Each item in the list is (trainX, testX).
        """    
        numpy.random.seed(21)    
        r = 50 
        
        U, s, V = SparseUtils.generateLowRank((self.endM, self.endN), r, normalise=False)
        
        self.startNumInds = self.pnz*self.startM*self.startN
        self.endNumInds = self.pnz*self.endM*self.endN
        
        if not self.nonUniform: 
            inds = numpy.random.randint(0, self.endM*self.endN-1, self.endNumInds)
        else:
            logging.debug("Using non uniform dataset")
            inds = numpy.array(numpy.random.randn(self.endNumInds)*(self.endM*self.endN-1)/4 +(self.endM*self.endN-1)/2, numpy.int)
            inds = numpy.clip(inds, 0, (self.endM*self.endN-1))
            
        inds = numpy.unique(inds)
        numpy.random.shuffle(inds)
        self.endNumInds = inds.shape[0]
        
        rowInds, colInds = numpy.unravel_index(inds, (self.endM, self.endN))
        rowInds = numpy.array(rowInds, numpy.int32)
        colInds = numpy.array(colInds, numpy.int32)
        vals = SparseUtilsCython.partialReconstructValsPQ(rowInds, colInds, U*s, V)
        vals /= vals.std()
        vals +=  numpy.random.randn(vals.shape[0])*self.noise
        
        
        isTrainInd = numpy.array(numpy.random.rand(inds.shape[0]) <= self.trainSplit, numpy.bool)
        
        assert (self.trainSplit - isTrainInd.sum()/float(isTrainInd.shape[0]))
        
        XMaskTrain = scipy.sparse.csc_matrix((isTrainInd, (rowInds, colInds)), dtype=numpy.bool, shape=(self.endM, self.endN))
        XMaskTest = scipy.sparse.csc_matrix((numpy.logical_not(isTrainInd), (rowInds, colInds)), dtype=numpy.bool, shape=(self.endM, self.endN))

        #In the first phase, the matrices stay the same size but there are more nonzero 
        #entries   
        numMatrices = 10 
        stepList = numpy.linspace(self.startNumInds, self.endNumInds, numMatrices) 
        trainXList = []
        testXList = []    
        
        for i in range(numMatrices):  
            currentVals = vals[0:stepList[i]]
            currentRowInds = rowInds[0:stepList[i]]
            currentColInds = colInds[0:stepList[i]]
            
            X = scipy.sparse.csc_matrix((currentVals, (currentRowInds, currentColInds)), dtype=numpy.float, shape=(self.endM, self.endN))
            #print("pnz=" + str(X.nnz/float(X.shape[0]*X.shape[1])))
            
            trainX = X.multiply(XMaskTrain)[0:self.startM, 0:self.startN]
            trainX.eliminate_zeros()
            trainX.prune() 
            
            testX = X.multiply(XMaskTest)[0:self.startM, 0:self.startN]
            testX.eliminate_zeros()
            testX.prune() 
            
            trainXList.append(trainX)
            testXList.append(testX)
            
        #Now we increase the size of matrix 
        numMatrices = 10 
        mStepList = numpy.linspace(self.startM, self.endM, numMatrices)
        nStepList = numpy.linspace(self.startN, self.endN, numMatrices)
    
        X = scipy.sparse.csc_matrix((vals, (rowInds, colInds)), dtype=numpy.float, shape=(self.endM, self.endN))
    
        for i in range(numMatrices): 
            trainX = X.multiply(XMaskTrain)[0:mStepList[i], :][:, 0:nStepList[i]]
            trainX.eliminate_zeros()
            trainX.prune() 
            
            testX = X.multiply(XMaskTest)[0:mStepList[i], :][:, 0:nStepList[i]]
            testX.eliminate_zeros()
            testX.prune() 
            
            trainXList.append(trainX)
            testXList.append(testX)
                    
        return trainXList, testXList
Пример #39
0
logging.debug("Starting training")
logging.debug(maxLocalAuc)

#modelSelectX = trainX[0:100, :]
#maxLocalAuc.learningRateSelect(trainX)
#maxLocalAuc.modelSelect(trainX)
#ProfileUtils.profile('U, V, trainObjs, trainAucs, testObjs, testAucs, iterations, time = maxLocalAuc.learnModel(trainX, testX=testX, verbose=True)', globals(), locals())

U, V, trainMeasures, testMeasures, iterations, time = maxLocalAuc.learnModel(trainX, verbose=True)

p = 10

trainOrderedItems = MCEvaluator.recommendAtk(U, V, p)
testOrderedItems = MCEvaluatorCython.recommendAtk(U, V, p, trainX)

r = SparseUtilsCython.computeR(U, V, maxLocalAuc.w, maxLocalAuc.numRecordAucSamples)
trainObjVec = maxLocalAuc.objectiveApprox(trainOmegaPtr, U, V, r, maxLocalAuc.gi, maxLocalAuc.gp, maxLocalAuc.gq, full=True)
testObjVec = maxLocalAuc.objectiveApprox(testOmegaPtr, U, V, r, maxLocalAuc.gi, maxLocalAuc.gp, maxLocalAuc.gq, allArray=allOmegaPtr, full=True)

itemCounts = numpy.array(X.sum(0)+1, numpy.int32)
beta = 0.5

for p in [1, 3, 5, 10]:
    trainPrecision = MCEvaluator.precisionAtK(trainOmegaPtr, trainOrderedItems, p)
    testPrecision = MCEvaluator.precisionAtK(testOmegaPtr, testOrderedItems, p)
    logging.debug("Train/test precision@" + str(p) + "=" + str(trainPrecision) + "/" + str(testPrecision)) 
    
for p in [1, 3, 5, 10]:
    trainRecall = MCEvaluator.stratifiedRecallAtK(trainOmegaPtr, trainOrderedItems, p, itemCounts, beta)
    testRecall = MCEvaluator.stratifiedRecallAtK(testOmegaPtr, testOrderedItems, p, itemCounts, beta)    
    logging.debug("Train/test stratified recall@" + str(p) + "=" + str(trainRecall) + "/" + str(testRecall))
Пример #40
0
        rReal = numpy.mean(Z, 1)
        errors[0, i, j] = numpy.linalg.norm(rReal - r)
        
        r = computeR(U, V, aucSamples, numpy.median)
        rReal = numpy.median(Z, 1)
        errors[1, i, j] = numpy.linalg.norm(rReal - r)
        
        r = computeR(U, V, aucSamples, numpy.min, 1)
        rReal = numpy.min(Z, 1)
        errors[2, i, j] = numpy.linalg.norm(rReal - r)

        r = computeR(U, V, aucSamples, numpy.max, 1)
        rReal = numpy.max(Z, 1)
        errors[3, i, j] = numpy.linalg.norm(rReal - r)        
        
        r = SparseUtilsCython.computeR(U, V, w, aucSamples)
        rReal = numpy.percentile(Z, w*100.0, 1)
        errors[4, i, j] = numpy.linalg.norm(rReal - r)
            
meanErrors = numpy.mean(errors, 2)
print(meanErrors)


plt.plot(numAucSamples, meanErrors[0, :], label="mean")
plt.plot(numAucSamples, meanErrors[1, :], label="median")
plt.plot(numAucSamples, meanErrors[2, :], label="min")
plt.plot(numAucSamples, meanErrors[3, :], label="max")
plt.plot(numAucSamples, meanErrors[4, :], label="u=0.1")

plt.legend()
plt.show()
Пример #41
0
 def run(): 
     for i in range(numRuns): 
         SparseUtilsCython.computeR(U, V, w, indsPerRow)
Пример #42
0
    def testGenerateSparseBinaryMatrix(self):
        m = 5
        n = 10
        k = 3
        quantile = 0.7
        numpy.random.seed(21)
        X = SparseUtils.generateSparseBinaryMatrix((m, n), k, quantile)
        Xscipy = numpy.array(X.todense())

        nptst.assert_array_equal(
            numpy.array(X.sum(1)).flatten(),
            numpy.ones(m) * 3)

        quantile = 0.0
        X = SparseUtils.generateSparseBinaryMatrix((m, n), k, quantile)
        self.assertTrue(numpy.linalg.norm(X - numpy.ones((m, n))) < 1.1)
        #nptst.assert_array_almost_equal(X.todense(), numpy.ones((m,n)))

        quantile = 0.7
        numpy.random.seed(21)
        X = SparseUtils.generateSparseBinaryMatrix((m, n),
                                                   k,
                                                   quantile,
                                                   csarray=True)
        Xcsarray = X.toarray()

        nptst.assert_array_equal(
            numpy.array(X.sum(1)).flatten(),
            numpy.ones(m) * 3)

        quantile = 0.0
        X = SparseUtils.generateSparseBinaryMatrix((m, n),
                                                   k,
                                                   quantile,
                                                   csarray=True)
        self.assertTrue(
            numpy.linalg.norm(X.toarray() - numpy.ones((m, n))) < 1.1)
        #nptst.assert_array_almost_equal(X.toarray(), numpy.ones((m,n)))

        nptst.assert_array_equal(Xcsarray, Xscipy)

        #Test variation in the quantiles
        w = 0.7
        X, U, s, V, wv = SparseUtils.generateSparseBinaryMatrix((m, n),
                                                                k,
                                                                w,
                                                                sd=0.1,
                                                                csarray=True,
                                                                verbose=True)

        Z = (U * s).dot(V.T)
        X2 = numpy.zeros((m, n))
        r2 = numpy.zeros(m)
        for i in range(m):
            r2[i] = numpy.percentile(numpy.sort(Z[i, :]), wv[i] * 100)
            X2[i, Z[i, :] > r2[i]] = 1
        r = SparseUtilsCython.computeR2(U * s, V, wv)

        nptst.assert_array_almost_equal(X.toarray(), X2)
        nptst.assert_array_almost_equal(r, r2)

        #Test a larger standard deviation
        w = 0.7
        X, U, s, V, wv = SparseUtils.generateSparseBinaryMatrix((m, n),
                                                                k,
                                                                w,
                                                                sd=0.5,
                                                                csarray=True,
                                                                verbose=True)

        Z = (U * s).dot(V.T)
        X2 = numpy.zeros((m, n))
        r2 = numpy.zeros(m)
        for i in range(m):
            r2[i] = numpy.percentile(numpy.sort(Z[i, :]), wv[i] * 100)
            X2[i, Z[i, :] >= r2[i]] = 1
        r = SparseUtilsCython.computeR2(U * s, V, wv)

        nptst.assert_array_almost_equal(X.toarray(), X2)
        nptst.assert_array_almost_equal(r, r2)
Пример #43
0
            def next(self):
                X = self.XIterator.next()
                logging.debug("Learning on matrix with shape: " + str(X.shape) + " and " + str(X.nnz) + " non-zeros")    
                
                if self.iterativeSoftImpute.weighted: 
                    #Compute row and col probabilities 
                    up, vp = SparseUtils.nonzeroRowColsProbs(X)
                    nzuInds = up==0
                    nzvInds = vp==0
                    u = numpy.sqrt(1/(up + numpy.array(nzuInds, numpy.int))) 
                    v = numpy.sqrt(1/(vp + numpy.array(nzvInds, numpy.int)))
                    u[nzuInds] = 0 
                    v[nzvInds] = 0 
                
                if self.rhos != None: 
                    self.iterativeSoftImpute.setRho(self.rhos.next())

                if not scipy.sparse.isspmatrix_csc(X):
                    raise ValueError("X must be a csc_matrix not " + str(type(X)))
                    
                #Figure out what lambda should be 
                #PROPACK has problems with convergence 
                Y = scipy.sparse.csc_matrix(X, dtype=numpy.float)
                U, s, V = ExpSU.SparseUtils.svdArpack(Y, 1, kmax=20)
                del Y
                #U, s, V = SparseUtils.svdPropack(X, 1, kmax=20)
                maxS = s[0]
                logging.debug("Largest singular value : " + str(maxS))

                (n, m) = X.shape

                if self.j == 0:
                    self.oldU = numpy.zeros((n, 1))
                    self.oldS = numpy.zeros(1)
                    self.oldV = numpy.zeros((m, 1))
                else:
                    oldN = self.oldU.shape[0]
                    oldM = self.oldV.shape[0]

                    if self.iterativeSoftImpute.updateAlg == "initial":
                        if n > oldN:
                            self.oldU = Util.extendArray(self.oldU, (n, self.oldU.shape[1]))
                        elif n < oldN:
                            self.oldU = self.oldU[0:n, :]

                        if m > oldM:
                            self.oldV = Util.extendArray(self.oldV, (m, self.oldV.shape[1]))
                        elif m < oldN:
                            self.oldV = self.oldV[0:m, :]
                    elif self.iterativeSoftImpute.updateAlg == "zero":
                        self.oldU = numpy.zeros((n, 1))
                        self.oldS = numpy.zeros(1)
                        self.oldV = numpy.zeros((m, 1))
                    else:
                        raise ValueError("Unknown SVD update algorithm: " + self.updateAlg)

                rowInds, colInds = X.nonzero()

                gamma = self.iterativeSoftImpute.eps + 1
                i = 0

                self.iterativeSoftImpute.measures = numpy.zeros((self.iterativeSoftImpute.maxIterations, 4))

                while gamma > self.iterativeSoftImpute.eps:
                    if i == self.iterativeSoftImpute.maxIterations: 
                        logging.debug("Maximum number of iterations reached")
                        break 
                    
                    ZOmega = SparseUtilsCython.partialReconstructPQ((rowInds, colInds), self.oldU*self.oldS, self.oldV)
                    Y = X - ZOmega
                    #Y = Y.tocsc()
                    #del ZOmega
                    Y = csarray(Y, storagetype="row")
                    gc.collect()
                    
                    #os.system('taskset -p 0xffffffff %d' % os.getpid())

                    if self.iterativeSoftImpute.svdAlg=="propack":
                        L = LinOperatorUtils.sparseLowRankOp(Y, self.oldU, self.oldS, self.oldV, parallel=False)                        
                        newU, newS, newV = SparseUtils.svdPropack(L, k=self.iterativeSoftImpute.k, kmax=self.iterativeSoftImpute.kmax)
                    elif self.iterativeSoftImpute.svdAlg=="arpack":
                        L = LinOperatorUtils.sparseLowRankOp(Y, self.oldU, self.oldS, self.oldV, parallel=False)                        
                        newU, newS, newV = SparseUtils.svdArpack(L, k=self.iterativeSoftImpute.k, kmax=self.iterativeSoftImpute.kmax)
                    elif self.iterativeSoftImpute.svdAlg=="svdUpdate":
                        newU, newS, newV = SVDUpdate.addSparseProjected(self.oldU, self.oldS, self.oldV, Y, self.iterativeSoftImpute.k)
                    elif self.iterativeSoftImpute.svdAlg=="rsvd":
                        L = LinOperatorUtils.sparseLowRankOp(Y, self.oldU, self.oldS, self.oldV, parallel=True)
                        newU, newS, newV = RandomisedSVD.svd(L, self.iterativeSoftImpute.k, p=self.iterativeSoftImpute.p, q=self.iterativeSoftImpute.q)
                    elif self.iterativeSoftImpute.svdAlg=="rsvdUpdate": 
                        L = LinOperatorUtils.sparseLowRankOp(Y, self.oldU, self.oldS, self.oldV, parallel=True)
                        if self.j == 0: 
                            newU, newS, newV = RandomisedSVD.svd(L, self.iterativeSoftImpute.k, p=self.iterativeSoftImpute.p, q=self.iterativeSoftImpute.q)
                        else: 
                            newU, newS, newV = RandomisedSVD.svd(L, self.iterativeSoftImpute.k, p=self.iterativeSoftImpute.p, q=self.iterativeSoftImpute.qu, omega=self.oldV)
                    elif self.iterativeSoftImpute.svdAlg=="rsvdUpdate2":
                        
                        if self.j == 0: 
                            L = LinOperatorUtils.sparseLowRankOp(Y, self.oldU, self.oldS, self.oldV, parallel=True)
                            newU, newS, newV = RandomisedSVD.svd(L, self.iterativeSoftImpute.k, p=self.iterativeSoftImpute.p, q=self.iterativeSoftImpute.q)
                        else: 
                            #Need linear operator which is U s V 
                            L = LinOperatorUtils.lowRankOp(self.oldU, self.oldS, self.oldV)
                            Y = GeneralLinearOperator.asLinearOperator(Y, parallel=True)
                            newU, newS, newV = RandomisedSVD.updateSvd(L, self.oldU, self.oldS, self.oldV, Y, self.iterativeSoftImpute.k, p=self.iterativeSoftImpute.p)
                    else:
                        raise ValueError("Unknown SVD algorithm: " + self.iterativeSoftImpute.svdAlg)

                    if self.iterativeSoftImpute.weighted and i==0: 
                        delta = numpy.diag((u*newU.T).dot(newU))
                        pi = numpy.diag((v*newV.T).dot(newV))
                        lmbda = (maxS/numpy.max(delta*pi))*self.iterativeSoftImpute.rho
                        lmbdav = lmbda*delta*pi
                    elif not self.iterativeSoftImpute.weighted: 
                        lmbda = maxS*self.iterativeSoftImpute.rho
                        if i==0: 
                            logging.debug("lambda: " + str(lmbda))
                        lmbdav = lmbda
                        
                    newS = newS - lmbdav                    
                    #Soft threshold
                    newS = numpy.clip(newS, 0, numpy.max(newS))
                    

                    normOldZ = (self.oldS**2).sum()
                    normNewZmOldZ = (self.oldS**2).sum() + (newS**2).sum() - 2*numpy.trace((self.oldV.T.dot(newV*newS)).dot(newU.T.dot(self.oldU*self.oldS)))

                    #We can get newZ == oldZ in which case we break
                    if normNewZmOldZ < self.tol:
                        gamma = 0
                    elif abs(normOldZ) < self.tol:
                        gamma = self.iterativeSoftImpute.eps + 1
                    else:
                        gamma = normNewZmOldZ/normOldZ
                        
                    if self.iterativeSoftImpute.verbose: 
                        theta1 = (self.iterativeSoftImpute.k - numpy.linalg.norm(self.oldU.T.dot(newU), 'fro')**2)/self.iterativeSoftImpute.k
                        theta2 = (self.iterativeSoftImpute.k - numpy.linalg.norm(self.oldV.T.dot(newV), 'fro')**2)/self.iterativeSoftImpute.k
                        thetaS = numpy.linalg.norm(newS - self.oldS)**2/numpy.linalg.norm(newS)**2
                        self.iterativeSoftImpute.measures[i, :] = numpy.array([gamma, theta1, theta2, thetaS])

                    self.oldU = newU.copy()
                    self.oldS = newS.copy()
                    self.oldV = newV.copy()

                    logging.debug("Iteration " + str(i) + " gamma="+str(gamma))
                    i += 1

                if self.iterativeSoftImpute.postProcess: 
                    #Add the mean vectors 
                    previousS = newS
                    newU = numpy.c_[newU, numpy.array(X.mean(1)).ravel()]
                    newV = numpy.c_[newV, numpy.array(X.mean(0)).ravel()]
                    newS = self.iterativeSoftImpute.unshrink(X, newU, newV)  
                    
                    #Note that this increases the rank of U and V by 1 
                    #print("Difference in s after postprocessing: " + str(numpy.linalg.norm(previousS - newS[0:-1]))) 
                    logging.debug("Difference in s after postprocessing: " + str(numpy.linalg.norm(previousS - newS[0:-1]))) 

                logging.debug("Number of iterations for rho="+str(self.iterativeSoftImpute.rho) + ": " + str(i))
                self.j += 1
                return (newU, newS, newV)
Пример #44
0
    def recordResults(self, muU, muV, trainMeasures, testMeasures, loopInd,
                      rowSamples, indPtr, colInds, testIndPtr, testColInds,
                      allIndPtr, allColInds, gi, gp, gq, trainX, startTime):

        sigmaU = self.getSigma(loopInd, self.alpha, muU.shape[0])
        sigmaV = self.getSigma(loopInd, self.alpha, muU.shape[0])
        r = SparseUtilsCython.computeR(muU, muV, self.w,
                                       self.numRecordAucSamples)
        objArr = self.objectiveApprox((indPtr, colInds),
                                      muU,
                                      muV,
                                      r,
                                      gi,
                                      gp,
                                      gq,
                                      full=True)
        if trainMeasures == None:
            trainMeasures = []
        trainMeasures.append([
            objArr.sum(),
            MCEvaluator.localAUCApprox((indPtr, colInds), muU, muV, self.w,
                                       self.numRecordAucSamples, r),
            time.time() - startTime, loopInd
        ])

        printStr = "iter " + str(loopInd) + ":"
        printStr += " sigmaU=" + str('%.4f' % sigmaU)
        printStr += " sigmaV=" + str('%.4f' % sigmaV)
        printStr += " train: obj~" + str('%.4f' % trainMeasures[-1][0])
        printStr += " LAUC~" + str('%.4f' % trainMeasures[-1][1])

        if testIndPtr is not None:
            testMeasuresRow = []
            testMeasuresRow.append(
                self.objectiveApprox((testIndPtr, testColInds),
                                     muU,
                                     muV,
                                     r,
                                     gi,
                                     gp,
                                     gq,
                                     allArray=(allIndPtr, allColInds)))
            testMeasuresRow.append(
                MCEvaluator.localAUCApprox((testIndPtr, testColInds),
                                           muU,
                                           muV,
                                           self.w,
                                           self.numRecordAucSamples,
                                           r,
                                           allArray=(allIndPtr, allColInds)))
            testOrderedItems = MCEvaluatorCython.recommendAtk(
                muU, muV, numpy.max(self.recommendSize), trainX)

            printStr += " validation: obj~" + str('%.4f' % testMeasuresRow[0])
            printStr += " LAUC~" + str('%.4f' % testMeasuresRow[1])

            try:
                for p in self.recommendSize:
                    f1Array, orderedItems = MCEvaluator.f1AtK(
                        (testIndPtr, testColInds),
                        testOrderedItems,
                        p,
                        verbose=True)
                    testMeasuresRow.append(f1Array[rowSamples].mean())
            except:
                f1Array, orderedItems = MCEvaluator.f1AtK(
                    (testIndPtr, testColInds),
                    testOrderedItems,
                    self.recommendSize,
                    verbose=True)
                testMeasuresRow.append(f1Array[rowSamples].mean())

            printStr += " f1@" + str(self.recommendSize) + "=" + str(
                '%.4f' % testMeasuresRow[-1])

            try:
                for p in self.recommendSize:
                    mrr, orderedItems = MCEvaluator.mrrAtK(
                        (testIndPtr, testColInds),
                        testOrderedItems,
                        p,
                        verbose=True)
                    testMeasuresRow.append(mrr[rowSamples].mean())
            except:
                mrr, orderedItems = MCEvaluator.mrrAtK(
                    (testIndPtr, testColInds),
                    testOrderedItems,
                    self.recommendSize,
                    verbose=True)
                testMeasuresRow.append(mrr[rowSamples].mean())

            printStr += " mrr@" + str(self.recommendSize) + "=" + str(
                '%.4f' % testMeasuresRow[-1])
            testMeasures.append(testMeasuresRow)

        printStr += " ||U||=" + str('%.3f' % numpy.linalg.norm(muU))
        printStr += " ||V||=" + str('%.3f' % numpy.linalg.norm(muV))

        if self.bound:
            trainObj = objArr.sum()

            expectationBound = self.computeBound(trainX, muU, muV, trainObj,
                                                 self.delta)
            printStr += " bound=" + str('%.3f' % expectationBound)
            trainMeasures[-1].append(expectationBound)

        return printStr
Пример #45
0
    def recordResults(
        self,
        muU,
        muV,
        trainMeasures,
        testMeasures,
        loopInd,
        rowSamples,
        indPtr,
        colInds,
        testIndPtr,
        testColInds,
        allIndPtr,
        allColInds,
        gi,
        gp,
        gq,
        trainX,
        startTime,
    ):

        sigmaU = self.getSigma(loopInd, self.alpha, muU.shape[0])
        sigmaV = self.getSigma(loopInd, self.alpha, muU.shape[0])
        r = SparseUtilsCython.computeR(muU, muV, self.w, self.numRecordAucSamples)
        objArr = self.objectiveApprox((indPtr, colInds), muU, muV, r, gi, gp, gq, full=True)
        if trainMeasures == None:
            trainMeasures = []
        trainMeasures.append(
            [
                objArr.sum(),
                MCEvaluator.localAUCApprox((indPtr, colInds), muU, muV, self.w, self.numRecordAucSamples, r),
                time.time() - startTime,
                loopInd,
            ]
        )

        printStr = "iter " + str(loopInd) + ":"
        printStr += " sigmaU=" + str("%.4f" % sigmaU)
        printStr += " sigmaV=" + str("%.4f" % sigmaV)
        printStr += " train: obj~" + str("%.4f" % trainMeasures[-1][0])
        printStr += " LAUC~" + str("%.4f" % trainMeasures[-1][1])

        if testIndPtr is not None:
            testMeasuresRow = []
            testMeasuresRow.append(
                self.objectiveApprox(
                    (testIndPtr, testColInds), muU, muV, r, gi, gp, gq, allArray=(allIndPtr, allColInds)
                )
            )
            testMeasuresRow.append(
                MCEvaluator.localAUCApprox(
                    (testIndPtr, testColInds),
                    muU,
                    muV,
                    self.w,
                    self.numRecordAucSamples,
                    r,
                    allArray=(allIndPtr, allColInds),
                )
            )
            testOrderedItems = MCEvaluatorCython.recommendAtk(muU, muV, numpy.max(self.recommendSize), trainX)

            printStr += " validation: obj~" + str("%.4f" % testMeasuresRow[0])
            printStr += " LAUC~" + str("%.4f" % testMeasuresRow[1])

            try:
                for p in self.recommendSize:
                    f1Array, orderedItems = MCEvaluator.f1AtK(
                        (testIndPtr, testColInds), testOrderedItems, p, verbose=True
                    )
                    testMeasuresRow.append(f1Array[rowSamples].mean())
            except:
                f1Array, orderedItems = MCEvaluator.f1AtK(
                    (testIndPtr, testColInds), testOrderedItems, self.recommendSize, verbose=True
                )
                testMeasuresRow.append(f1Array[rowSamples].mean())

            printStr += " f1@" + str(self.recommendSize) + "=" + str("%.4f" % testMeasuresRow[-1])

            try:
                for p in self.recommendSize:
                    mrr, orderedItems = MCEvaluator.mrrAtK((testIndPtr, testColInds), testOrderedItems, p, verbose=True)
                    testMeasuresRow.append(mrr[rowSamples].mean())
            except:
                mrr, orderedItems = MCEvaluator.mrrAtK(
                    (testIndPtr, testColInds), testOrderedItems, self.recommendSize, verbose=True
                )
                testMeasuresRow.append(mrr[rowSamples].mean())

            printStr += " mrr@" + str(self.recommendSize) + "=" + str("%.4f" % testMeasuresRow[-1])
            testMeasures.append(testMeasuresRow)

        printStr += " ||U||=" + str("%.3f" % numpy.linalg.norm(muU))
        printStr += " ||V||=" + str("%.3f" % numpy.linalg.norm(muV))

        if self.bound:
            trainObj = objArr.sum()

            expectationBound = self.computeBound(trainX, muU, muV, trainObj, self.delta)
            printStr += " bound=" + str("%.3f" % expectationBound)
            trainMeasures[-1].append(expectationBound)

        return printStr
Пример #46
0
            def next(self):
                X = self.XIterator.next()
                logging.debug("Learning on matrix with shape: " +
                              str(X.shape) + " and " + str(X.nnz) +
                              " non-zeros")

                if self.iterativeSoftImpute.weighted:
                    #Compute row and col probabilities
                    up, vp = SparseUtils.nonzeroRowColsProbs(X)
                    nzuInds = up == 0
                    nzvInds = vp == 0
                    u = numpy.sqrt(1 / (up + numpy.array(nzuInds, numpy.int)))
                    v = numpy.sqrt(1 / (vp + numpy.array(nzvInds, numpy.int)))
                    u[nzuInds] = 0
                    v[nzvInds] = 0

                if self.rhos != None:
                    self.iterativeSoftImpute.setRho(self.rhos.next())

                if not scipy.sparse.isspmatrix_csc(X):
                    raise ValueError("X must be a csc_matrix not " +
                                     str(type(X)))

                #Figure out what lambda should be
                #PROPACK has problems with convergence
                Y = scipy.sparse.csc_matrix(X, dtype=numpy.float)
                U, s, V = ExpSU.SparseUtils.svdArpack(Y, 1, kmax=20)
                del Y
                #U, s, V = SparseUtils.svdPropack(X, 1, kmax=20)
                maxS = s[0]
                logging.debug("Largest singular value : " + str(maxS))

                (n, m) = X.shape

                if self.j == 0:
                    self.oldU = numpy.zeros((n, 1))
                    self.oldS = numpy.zeros(1)
                    self.oldV = numpy.zeros((m, 1))
                else:
                    oldN = self.oldU.shape[0]
                    oldM = self.oldV.shape[0]

                    if self.iterativeSoftImpute.updateAlg == "initial":
                        if n > oldN:
                            self.oldU = Util.extendArray(
                                self.oldU, (n, self.oldU.shape[1]))
                        elif n < oldN:
                            self.oldU = self.oldU[0:n, :]

                        if m > oldM:
                            self.oldV = Util.extendArray(
                                self.oldV, (m, self.oldV.shape[1]))
                        elif m < oldN:
                            self.oldV = self.oldV[0:m, :]
                    elif self.iterativeSoftImpute.updateAlg == "zero":
                        self.oldU = numpy.zeros((n, 1))
                        self.oldS = numpy.zeros(1)
                        self.oldV = numpy.zeros((m, 1))
                    else:
                        raise ValueError("Unknown SVD update algorithm: " +
                                         self.updateAlg)

                rowInds, colInds = X.nonzero()

                gamma = self.iterativeSoftImpute.eps + 1
                i = 0

                self.iterativeSoftImpute.measures = numpy.zeros(
                    (self.iterativeSoftImpute.maxIterations, 4))

                while gamma > self.iterativeSoftImpute.eps:
                    if i == self.iterativeSoftImpute.maxIterations:
                        logging.debug("Maximum number of iterations reached")
                        break

                    ZOmega = SparseUtilsCython.partialReconstructPQ(
                        (rowInds, colInds), self.oldU * self.oldS, self.oldV)
                    Y = X - ZOmega
                    #Y = Y.tocsc()
                    #del ZOmega
                    Y = csarray(Y, storagetype="row")
                    gc.collect()

                    #os.system('taskset -p 0xffffffff %d' % os.getpid())

                    if self.iterativeSoftImpute.svdAlg == "propack":
                        L = LinOperatorUtils.sparseLowRankOp(Y,
                                                             self.oldU,
                                                             self.oldS,
                                                             self.oldV,
                                                             parallel=False)
                        newU, newS, newV = SparseUtils.svdPropack(
                            L,
                            k=self.iterativeSoftImpute.k,
                            kmax=self.iterativeSoftImpute.kmax)
                    elif self.iterativeSoftImpute.svdAlg == "arpack":
                        L = LinOperatorUtils.sparseLowRankOp(Y,
                                                             self.oldU,
                                                             self.oldS,
                                                             self.oldV,
                                                             parallel=False)
                        newU, newS, newV = SparseUtils.svdArpack(
                            L,
                            k=self.iterativeSoftImpute.k,
                            kmax=self.iterativeSoftImpute.kmax)
                    elif self.iterativeSoftImpute.svdAlg == "svdUpdate":
                        newU, newS, newV = SVDUpdate.addSparseProjected(
                            self.oldU, self.oldS, self.oldV, Y,
                            self.iterativeSoftImpute.k)
                    elif self.iterativeSoftImpute.svdAlg == "rsvd":
                        L = LinOperatorUtils.sparseLowRankOp(Y,
                                                             self.oldU,
                                                             self.oldS,
                                                             self.oldV,
                                                             parallel=True)
                        newU, newS, newV = RandomisedSVD.svd(
                            L,
                            self.iterativeSoftImpute.k,
                            p=self.iterativeSoftImpute.p,
                            q=self.iterativeSoftImpute.q)
                    elif self.iterativeSoftImpute.svdAlg == "rsvdUpdate":
                        L = LinOperatorUtils.sparseLowRankOp(Y,
                                                             self.oldU,
                                                             self.oldS,
                                                             self.oldV,
                                                             parallel=True)
                        if self.j == 0:
                            newU, newS, newV = RandomisedSVD.svd(
                                L,
                                self.iterativeSoftImpute.k,
                                p=self.iterativeSoftImpute.p,
                                q=self.iterativeSoftImpute.q)
                        else:
                            newU, newS, newV = RandomisedSVD.svd(
                                L,
                                self.iterativeSoftImpute.k,
                                p=self.iterativeSoftImpute.p,
                                q=self.iterativeSoftImpute.qu,
                                omega=self.oldV)
                    elif self.iterativeSoftImpute.svdAlg == "rsvdUpdate2":

                        if self.j == 0:
                            L = LinOperatorUtils.sparseLowRankOp(Y,
                                                                 self.oldU,
                                                                 self.oldS,
                                                                 self.oldV,
                                                                 parallel=True)
                            newU, newS, newV = RandomisedSVD.svd(
                                L,
                                self.iterativeSoftImpute.k,
                                p=self.iterativeSoftImpute.p,
                                q=self.iterativeSoftImpute.q)
                        else:
                            #Need linear operator which is U s V
                            L = LinOperatorUtils.lowRankOp(
                                self.oldU, self.oldS, self.oldV)
                            Y = GeneralLinearOperator.asLinearOperator(
                                Y, parallel=True)
                            newU, newS, newV = RandomisedSVD.updateSvd(
                                L,
                                self.oldU,
                                self.oldS,
                                self.oldV,
                                Y,
                                self.iterativeSoftImpute.k,
                                p=self.iterativeSoftImpute.p)
                    else:
                        raise ValueError("Unknown SVD algorithm: " +
                                         self.iterativeSoftImpute.svdAlg)

                    if self.iterativeSoftImpute.weighted and i == 0:
                        delta = numpy.diag((u * newU.T).dot(newU))
                        pi = numpy.diag((v * newV.T).dot(newV))
                        lmbda = (maxS / numpy.max(
                            delta * pi)) * self.iterativeSoftImpute.rho
                        lmbdav = lmbda * delta * pi
                    elif not self.iterativeSoftImpute.weighted:
                        lmbda = maxS * self.iterativeSoftImpute.rho
                        if i == 0:
                            logging.debug("lambda: " + str(lmbda))
                        lmbdav = lmbda

                    newS = newS - lmbdav
                    #Soft threshold
                    newS = numpy.clip(newS, 0, numpy.max(newS))

                    normOldZ = (self.oldS**2).sum()
                    normNewZmOldZ = (self.oldS**2).sum() + (
                        newS**2).sum() - 2 * numpy.trace(
                            (self.oldV.T.dot(newV * newS)).dot(
                                newU.T.dot(self.oldU * self.oldS)))

                    #We can get newZ == oldZ in which case we break
                    if normNewZmOldZ < self.tol:
                        gamma = 0
                    elif abs(normOldZ) < self.tol:
                        gamma = self.iterativeSoftImpute.eps + 1
                    else:
                        gamma = normNewZmOldZ / normOldZ

                    if self.iterativeSoftImpute.verbose:
                        theta1 = (
                            self.iterativeSoftImpute.k -
                            numpy.linalg.norm(self.oldU.T.dot(newU), 'fro')**
                            2) / self.iterativeSoftImpute.k
                        theta2 = (
                            self.iterativeSoftImpute.k -
                            numpy.linalg.norm(self.oldV.T.dot(newV), 'fro')**
                            2) / self.iterativeSoftImpute.k
                        thetaS = numpy.linalg.norm(
                            newS - self.oldS)**2 / numpy.linalg.norm(newS)**2
                        self.iterativeSoftImpute.measures[i, :] = numpy.array(
                            [gamma, theta1, theta2, thetaS])

                    self.oldU = newU.copy()
                    self.oldS = newS.copy()
                    self.oldV = newV.copy()

                    logging.debug("Iteration " + str(i) + " gamma=" +
                                  str(gamma))
                    i += 1

                if self.iterativeSoftImpute.postProcess:
                    #Add the mean vectors
                    previousS = newS
                    newU = numpy.c_[newU, numpy.array(X.mean(1)).ravel()]
                    newV = numpy.c_[newV, numpy.array(X.mean(0)).ravel()]
                    newS = self.iterativeSoftImpute.unshrink(X, newU, newV)

                    #Note that this increases the rank of U and V by 1
                    #print("Difference in s after postprocessing: " + str(numpy.linalg.norm(previousS - newS[0:-1])))
                    logging.debug("Difference in s after postprocessing: " +
                                  str(numpy.linalg.norm(previousS -
                                                        newS[0:-1])))

                logging.debug("Number of iterations for rho=" +
                              str(self.iterativeSoftImpute.rho) + ": " +
                              str(i))
                self.j += 1
                return (newU, newS, newV)
Пример #47
0
import os
import sys 
import sppy.io
import numpy 
import logging
from sandbox.util.SparseUtilsCython import SparseUtilsCython
from sandbox.util.SparseUtils import SparseUtils
from sandbox.util.PathDefaults import PathDefaults 

logging.basicConfig(stream=sys.stdout, level=logging.DEBUG)
numpy.random.seed(21)
m = 600 
n = 300 
k = 8
density = 0.1

X, U, V = SparseUtilsCython.generateSparseBinaryMatrixPL((m,n), k, density=density, alpha=1, csarray=True)
X = SparseUtils.pruneMatrixRows(X, minNnzRows=10)

resultsDir = PathDefaults.getDataDir() + "syntheticRanking/"

if not os.path.exists(resultsDir): 
    os.mkdir(resultsDir)

matrixFileName = resultsDir + "dataset1.mtx" 

sppy.io.mmwrite(matrixFileName, X)
logging.debug("Non-zero elements: " + str(X.nnz) + " shape: " + str(X.shape))
logging.debug("Saved file: " + matrixFileName)