Пример #1
0
 def testPartialOuterProduct(self):
     m = 15        
     n = 10
     
     
     u = numpy.random.rand(m)
     v = numpy.random.rand(n)
     Y = numpy.outer(u, v)
     
     inds = numpy.nonzero(Y)
     rowInds = numpy.array(inds[0], numpy.int32)
     colInds = numpy.array(inds[1], numpy.int32)
     vals = SparseUtilsCython.partialOuterProduct(rowInds, colInds, u, v)
     X = numpy.reshape(vals, Y.shape)
     
     nptst.assert_almost_equal(X, Y)
     
     #Try just some indices 
     density = 0.2
     A = scipy.sparse.rand(n, n, density)
     inds = A.nonzero()
     rowInds = numpy.array(inds[0], numpy.int32)
     colInds = numpy.array(inds[1], numpy.int32)
     
     vals = SparseUtilsCython.partialOuterProduct(rowInds, colInds, u, v)
     
     for i in range(inds[0].shape[0]): 
         j = inds[0][i]
         k = inds[1][i]
         
         self.assertAlmostEquals(vals[i], Y[j, k])  
         
     
     self.assertEquals(A.nnz, inds[0].shape[0])
Пример #2
0
    def uncenter(X, mu1, mu2):
        """
        Uncenter a matrix with mu1 and mu2, the row and columns means of the original
        matrix. X is the centered matrix.
        """
        rowInds, colInds = X.nonzero()
        rowInds = numpy.array(rowInds, numpy.int32)
        colInds = numpy.array(colInds, numpy.int32)

        vals1 = SparseUtilsCython.partialOuterProduct(rowInds, colInds, numpy.array(mu1, numpy.float), numpy.ones(X.shape[1]))
        vals2 = SparseUtilsCython.partialOuterProduct(rowInds, colInds, numpy.ones(X.shape[0]), numpy.array(mu2, numpy.float))
        X[rowInds, colInds] = X[rowInds, colInds] + vals1 + vals2

        return X
Пример #3
0
    def centerRows(X, mu=None, inds=None):
        """
        Simply subtract the mean value of a row from each non-zero element.
        """
        if inds == None:
            rowInds, colInds = X.nonzero()
        else:
            rowInds, colInds = inds

        rowInds = numpy.array(rowInds, numpy.int32)
        colInds = numpy.array(colInds, numpy.int32)

        if mu == None:
            #This is the mean of the nonzero values in each row
            nonZeroCounts = numpy.bincount(rowInds, minlength=X.shape[0])
            inds = nonZeroCounts==0
            nonZeroCounts += inds
            #This is required because when we do X.sum(1) for centering it uses the same
            #dtype as X to store the sum, and this can result in overflow for e.g. uint8
            if X.dtype == numpy.uint8:
                sumCol = SparseUtilsCython.sumCols(rowInds, numpy.array(X[rowInds, colInds]).flatten(), X.shape[0])
            else:
                sumCol = numpy.array(X.sum(1)).flatten()
            mu = sumCol/nonZeroCounts
            mu[inds] = 0

        vals = SparseUtilsCython.partialOuterProduct(rowInds, colInds, numpy.array(mu, numpy.float), numpy.ones(X.shape[1]))
        X[X.nonzero()] = numpy.array(X[X.nonzero()] - vals, numpy.float)

        return X, mu
Пример #4
0
    def centerCols(X, mu=None, inds=None):
        """
        Simply subtract the mean value of a row from each non-zero element.
        """
        if inds == None:
            rowInds, colInds = X.nonzero()
        else:
            rowInds, colInds = inds
        rowInds = numpy.array(rowInds, numpy.int32)
        colInds = numpy.array(colInds, numpy.int32)

        if mu == None:
            #This is the mean of the nonzero values in each col
            nonZeroCounts = numpy.bincount(colInds, minlength=X.shape[1])
            inds = nonZeroCounts == 0
            nonZeroCounts += inds
            mu = numpy.array(X.sum(0), numpy.float).ravel() / nonZeroCounts
            mu[inds] = 0

        vals = SparseUtilsCython.partialOuterProduct(
            rowInds, colInds, numpy.ones(X.shape[0]),
            numpy.array(mu, numpy.float))
        X[X.nonzero()] = numpy.array(X[X.nonzero()] - vals, numpy.float)

        return X, mu
Пример #5
0
    def centerRows(X, mu=None, inds=None):
        """
        Simply subtract the mean value of a row from each non-zero element.
        """
        if inds == None:
            rowInds, colInds = X.nonzero()
        else:
            rowInds, colInds = inds

        rowInds = numpy.array(rowInds, numpy.int32)
        colInds = numpy.array(colInds, numpy.int32)

        if mu == None:
            #This is the mean of the nonzero values in each row
            nonZeroCounts = numpy.bincount(rowInds, minlength=X.shape[0])
            inds = nonZeroCounts == 0
            nonZeroCounts += inds
            #This is required because when we do X.sum(1) for centering it uses the same
            #dtype as X to store the sum, and this can result in overflow for e.g. uint8
            if X.dtype == numpy.uint8:
                sumCol = SparseUtilsCython.sumCols(
                    rowInds,
                    numpy.array(X[rowInds, colInds]).flatten(), X.shape[0])
            else:
                sumCol = numpy.array(X.sum(1)).flatten()
            mu = sumCol / nonZeroCounts
            mu[inds] = 0

        vals = SparseUtilsCython.partialOuterProduct(
            rowInds, colInds, numpy.array(mu, numpy.float),
            numpy.ones(X.shape[1]))
        X[X.nonzero()] = numpy.array(X[X.nonzero()] - vals, numpy.float)

        return X, mu
Пример #6
0
    def unshrink(self, X, U, V):
        """
        Perform post-processing on a factorisation of a matrix X use factor 
        vectors U and V. 
        """
        logging.debug("Post processing singular values")

        #Fix for versions of numpy < 1.7
        inds = numpy.unique(
            numpy.random.randint(
                0, X.data.shape[0],
                numpy.min([self.postProcessSamples, X.data.shape[0]])))
        a = numpy.array(X[X.nonzero()]).ravel()[inds]

        B = numpy.zeros((a.shape[0], U.shape[1]))

        rowInds, colInds = X.nonzero()
        rowInds = numpy.array(rowInds[inds], numpy.int32)
        colInds = numpy.array(colInds[inds], numpy.int32)

        #Populate B
        for i in range(U.shape[1]):
            B[:, i] = SparseUtilsCython.partialOuterProduct(
                rowInds, colInds, U[:, i], V[:, i])

        s = numpy.linalg.pinv(B.T.dot(B)).dot(B.T).dot(a)

        return s
Пример #7
0
    def uncenter(X, mu1, mu2):
        """
        Uncenter a matrix with mu1 and mu2, the row and columns means of the original
        matrix. X is the centered matrix.
        """
        rowInds, colInds = X.nonzero()
        rowInds = numpy.array(rowInds, numpy.int32)
        colInds = numpy.array(colInds, numpy.int32)

        vals1 = SparseUtilsCython.partialOuterProduct(
            rowInds, colInds, numpy.array(mu1, numpy.float),
            numpy.ones(X.shape[1]))
        vals2 = SparseUtilsCython.partialOuterProduct(
            rowInds, colInds, numpy.ones(X.shape[0]),
            numpy.array(mu2, numpy.float))
        X[rowInds, colInds] = X[rowInds, colInds] + vals1 + vals2

        return X
Пример #8
0
    def uncenterRows(X, mu):
        """
        Take a matrix with rows centered using mu, and return them to their original
        state. Note that one should call X.eliminate_zeros() beforehand.
        """
        if X.shape[0] != mu.shape[0]:
            raise ValueError("Invalid number of rows")

        rowInds, colInds = X.nonzero()
        rowInds = numpy.array(rowInds, numpy.int32)
        colInds = numpy.array(colInds, numpy.int32)

        vals = SparseUtilsCython.partialOuterProduct(rowInds, colInds, numpy.array(mu, numpy.float), numpy.ones(X.shape[1]))
        X[rowInds, colInds] = numpy.array(X[rowInds, colInds] + vals, numpy.float)

        return X
Пример #9
0
    def uncenterRows(X, mu):
        """
        Take a matrix with rows centered using mu, and return them to their original
        state. Note that one should call X.eliminate_zeros() beforehand.
        """
        if X.shape[0] != mu.shape[0]:
            raise ValueError("Invalid number of rows")

        rowInds, colInds = X.nonzero()
        rowInds = numpy.array(rowInds, numpy.int32)
        colInds = numpy.array(colInds, numpy.int32)

        vals = SparseUtilsCython.partialOuterProduct(
            rowInds, colInds, numpy.array(mu, numpy.float),
            numpy.ones(X.shape[1]))
        X[rowInds, colInds] = numpy.array(X[rowInds, colInds] + vals,
                                          numpy.float)

        return X
Пример #10
0
    def centerCols(X, mu=None, inds=None):
        """
        Simply subtract the mean value of a row from each non-zero element.
        """
        if inds == None:
            rowInds, colInds = X.nonzero()
        else:
            rowInds, colInds = inds
        rowInds = numpy.array(rowInds, numpy.int32)
        colInds = numpy.array(colInds, numpy.int32)

        if mu == None:
            #This is the mean of the nonzero values in each col
            nonZeroCounts = numpy.bincount(colInds, minlength=X.shape[1])
            inds = nonZeroCounts==0
            nonZeroCounts += inds
            mu = numpy.array(X.sum(0), numpy.float).ravel()/nonZeroCounts
            mu[inds] = 0

        vals = SparseUtilsCython.partialOuterProduct(rowInds, colInds, numpy.ones(X.shape[0]), numpy.array(mu, numpy.float))
        X[X.nonzero()] = numpy.array(X[X.nonzero()] - vals, numpy.float)

        return X, mu
Пример #11
0
 def unshrink(self, X, U, V): 
     """
     Perform post-processing on a factorisation of a matrix X use factor 
     vectors U and V. 
     """
     logging.debug("Post processing singular values")
            
     #Fix for versions of numpy < 1.7 
     inds = numpy.unique(numpy.random.randint(0, X.data.shape[0], numpy.min([self.postProcessSamples, X.data.shape[0]]))) 
     a = numpy.array(X[X.nonzero()]).ravel()[inds]
         
     B = numpy.zeros((a.shape[0], U.shape[1])) 
         
     rowInds, colInds = X.nonzero() 
     rowInds = numpy.array(rowInds[inds], numpy.int32)
     colInds = numpy.array(colInds[inds], numpy.int32)  
     
     #Populate B 
     for i in range(U.shape[1]): 
         B[:, i] = SparseUtilsCython.partialOuterProduct(rowInds, colInds, U[:, i], V[:, i])
     
     s = numpy.linalg.pinv(B.T.dot(B)).dot(B.T).dot(a)
     
     return s