Exemplo n.º 1
0
 def testUpdateSvd(self): 
     """
     Let's see if the update to the SVD works. 
     """
     numRuns = 10        
     
     for i in range(numRuns): 
         m, n = numpy.random.randint(10, 100), numpy.random.randint(10, 100) 
         k = 3
         
         X = numpy.random.rand(m, n)
         
         U, s, V = RandomisedSVD.svd(X, k)
         
         E = numpy.random.randn(m, n) * 0.2 
         
         U2, s2, V2 = RandomisedSVD.svd(X + E, k)
         U3, s3, V3 = RandomisedSVD.updateSvd(X, U, s, V, E, k)
         
         XE = X + E
         error1 = numpy.linalg.norm(XE - (U*s).dot(V.T))
         error2 = numpy.linalg.norm(XE - (U2*s2).dot(V2.T))
         error3 = numpy.linalg.norm(XE - (U3*s3).dot(V3.T))
 
         self.assertTrue(error1 >= error3)
         #print(error1, error2, error3)
         
         #Test use of linear opertors 
         X = GeneralLinearOperator.asLinearOperator(X)
         E = GeneralLinearOperator.asLinearOperator(E) 
         
         U3, s3, V3 = RandomisedSVD.updateSvd(X, U, s, V, E, k)
         error4 = numpy.linalg.norm(XE - (U2*s2).dot(V2.T))
         self.assertEquals(error4, error2)
Exemplo n.º 2
0
 def testSvd2(self): 
     """
     We test the situation in which one gives an initial omega matrix 
     for the random projections. 
     """
     numRuns = 10 
     
     for i in range(numRuns): 
         m, n = numpy.random.randint(10, 100), numpy.random.randint(10, 100) 
         X = numpy.random.rand(m, n)
         
         k = numpy.random.randint(5, min(m, n)) 
         U, s, V = RandomisedSVD.svd(X, k)
 
         D = numpy.random.rand(m, n)*0.1
 
         Y = X + D 
         U2, s2, V2 = RandomisedSVD.svd(Y, k, p=0, q=0)
 
         U3, s3, V3 = RandomisedSVD.svd(Y, k, p=0, q=0, omega=V)
         
         error1 = numpy.linalg.norm(Y - (U2*s2).dot(V2.T)) 
         error2 = numpy.linalg.norm(Y - (U3*s3).dot(V3.T))
         
         self.assertTrue(error1 >= error2)
Exemplo n.º 3
0
    def initUV(self, X):
        m = X.shape[0]
        n = X.shape[1]

        if self.initialAlg == "rand":
            U = numpy.random.randn(m, self.k) * 0.1
            V = numpy.random.randn(n, self.k) * 0.1
        elif self.initialAlg == "svd":
            logging.debug("Initialising with Randomised SVD")
            U, s, V = RandomisedSVD.svd(X, self.k, self.p, self.q)
            U = U * s
        elif self.initialAlg == "softimpute":
            logging.debug("Initialising with softimpute")
            trainIterator = iter([X.toScipyCsc()])
            rho = 0.01
            learner = IterativeSoftImpute(rho, k=self.k, svdAlg="propack", postProcess=True)
            ZList = learner.learnModel(trainIterator)
            U, s, V = ZList.next()
            U = U * s
        elif self.initialAlg == "wrmf":
            logging.debug("Initialising with wrmf")
            learner = WeightedMf(self.k, w=self.w)
            U, V = learner.learnModel(X.toScipyCsr())
        else:
            raise ValueError("Unknown initialisation: " + str(self.initialAlg))

        U = numpy.ascontiguousarray(U)
        V = numpy.ascontiguousarray(V)

        return U, V
Exemplo n.º 4
0
    def initUV(self, X):
        m = X.shape[0]
        n = X.shape[1]

        if self.initialAlg == "rand":
            U = numpy.random.randn(m, self.k) * 0.1
            V = numpy.random.randn(n, self.k) * 0.1
        elif self.initialAlg == "svd":
            logging.debug("Initialising with Randomised SVD")
            U, s, V = RandomisedSVD.svd(X, self.k, self.p, self.q)
            U = U * s
        elif self.initialAlg == "softimpute":
            logging.debug("Initialising with softimpute")
            trainIterator = iter([X.toScipyCsc()])
            rho = 0.01
            learner = IterativeSoftImpute(rho,
                                          k=self.k,
                                          svdAlg="propack",
                                          postProcess=True)
            ZList = learner.learnModel(trainIterator)
            U, s, V = ZList.next()
            U = U * s
        elif self.initialAlg == "wrmf":
            logging.debug("Initialising with wrmf")
            learner = WeightedMf(self.k, w=self.w)
            U, V = learner.learnModel(X.toScipyCsr())
        else:
            raise ValueError("Unknown initialisation: " + str(self.initialAlg))

        U = numpy.ascontiguousarray(U)
        V = numpy.ascontiguousarray(V)

        return U, V
Exemplo n.º 5
0
 def testSvd(self): 
     n = 100 
     m = 80
     A = scipy.sparse.rand(m, n, 0.1)
     
     ks = [10, 20, 30, 40] 
     q = 2 
     
     lastError = numpy.linalg.norm(A.todense())        
     
     for k in ks: 
         U, s, V = RandomisedSVD.svd(A, k, q)
         
         nptst.assert_array_almost_equal(U.T.dot(U), numpy.eye(k))
         nptst.assert_array_almost_equal(V.T.dot(V), numpy.eye(k))
         A2 = (U*s).dot(V.T)
         
         error = numpy.linalg.norm(A - A2)
         self.assertTrue(error <= lastError)
         lastError = error 
         
         #Compare versus exact svd 
         U, s, V = numpy.linalg.svd(numpy.array(A.todense()))
         inds = numpy.flipud(numpy.argsort(s))[0:k*2]
         U, s, V = Util.indSvd(U, s, V, inds)
         
         Ak = (U*s).dot(V.T)
         
         error2 = numpy.linalg.norm(A - Ak)
         self.assertTrue(error2 <= error)
    def clusterFromIterator(self, graphListIterator, verbose=False):
        """
        Find a set of clusters for the graphs given by the iterator. If verbose 
        is true the each iteration is timed and bounded the results are returned 
        as lists.
        
        The difference between a weight matrix and the previous one should be
        positive.
        """
        clustersList = []
        decompositionTimeList = []
        kMeansTimeList = []
        boundList = []
        sinThetaList = []
        i = 0

        for subW in graphListIterator:
            if __debug__:
                Parameter.checkSymmetric(subW)

            if self.logStep and i % self.logStep == 0:
                logging.debug("Graph index: " + str(i))
            logging.debug("Clustering graph of size " + str(subW.shape))
            if self.alg != "efficientNystrom":
                ABBA = GraphUtils.shiftLaplacian(subW)

            # --- Eigen value decomposition ---
            startTime = time.time()
            if self.alg == "IASC":
                if i % self.T != 0:
                    omega, Q = self.approxUpdateEig(subW, ABBA, omega, Q)

                    if self.computeBound:
                        inds = numpy.flipud(numpy.argsort(omega))
                        Q = Q[:, inds]
                        omega = omega[inds]
                        bounds = self.pertBound(omega, Q, omegaKbot, AKbot,
                                                self.k2)
                        #boundList.append([i, bounds[0], bounds[1]])

                        #Now use accurate values of norm of R and delta
                        rank = Util.rank(ABBA.todense())
                        gamma, U = scipy.sparse.linalg.eigsh(ABBA,
                                                             rank - 1,
                                                             which="LM",
                                                             ncv=ABBA.shape[0])
                        #logging.debug("gamma=" + str(gamma))
                        bounds2 = self.realBound(omega, Q, gamma, AKbot,
                                                 self.k2)
                        boundList.append(
                            [bounds[0], bounds[1], bounds2[0], bounds2[1]])
                else:
                    logging.debug("Computing exact eigenvectors")
                    self.storeInformation(subW, ABBA)

                    if self.computeBound:
                        #omega, Q = scipy.sparse.linalg.eigsh(ABBA, min(self.k2*2, ABBA.shape[0]-1), which="LM", ncv = min(10*self.k2, ABBA.shape[0]))
                        rank = Util.rank(ABBA.todense())
                        omega, Q = scipy.sparse.linalg.eigsh(ABBA,
                                                             rank - 1,
                                                             which="LM",
                                                             ncv=ABBA.shape[0])
                        inds = numpy.flipud(numpy.argsort(omega))
                        omegaKbot = omega[inds[self.k2:]]
                        QKbot = Q[:, inds[self.k2:]]
                        AKbot = (QKbot * omegaKbot).dot(QKbot.T)

                        omegaSort = numpy.flipud(numpy.sort(omega))
                        boundList.append([0] * 4)
                    else:
                        omega, Q = scipy.sparse.linalg.eigsh(
                            ABBA,
                            min(self.k2, ABBA.shape[0] - 1),
                            which="LM",
                            ncv=min(10 * self.k2, ABBA.shape[0]))

            elif self.alg == "nystrom":
                omega, Q = Nystrom.eigpsd(ABBA, self.k3)
            elif self.alg == "exact":
                omega, Q = scipy.sparse.linalg.eigsh(
                    ABBA,
                    min(self.k1, ABBA.shape[0] - 1),
                    which="LM",
                    ncv=min(15 * self.k1, ABBA.shape[0]))
            elif self.alg == "efficientNystrom":
                omega, Q = EfficientNystrom.eigWeight(subW, self.k2, self.k1)
            elif self.alg == "randomisedSvd":
                Q, omega, R = RandomisedSVD.svd(ABBA, self.k4)
            else:
                raise ValueError("Invalid Algorithm: " + str(self.alg))

            if self.computeSinTheta:
                omegaExact, QExact = scipy.linalg.eigh(ABBA.todense())
                inds = numpy.flipud(numpy.argsort(omegaExact))
                QExactKbot = QExact[:, inds[self.k1:]]
                inds = numpy.flipud(numpy.argsort(omega))
                QApproxK = Q[:, inds[:self.k1]]
                sinThetaList.append(
                    scipy.linalg.norm(QExactKbot.T.dot(QApproxK)))

            decompositionTimeList.append(time.time() - startTime)

            if self.alg == "IASC":
                self.storeInformation(subW, ABBA)

            # --- Kmeans ---
            startTime = time.time()
            inds = numpy.flipud(numpy.argsort(omega))

            standardiser = Standardiser()
            #For some very strange reason we get an overflow when computing the
            #norm of the rows of Q even though its elements are bounded by 1.
            #We'll ignore it for now
            try:
                V = standardiser.normaliseArray(Q[:, inds[0:self.k1]].real.T).T
            except FloatingPointError as e:
                logging.warn("FloatingPointError: " + str(e))
            V = VqUtils.whiten(V)
            if i == 0:
                centroids, distortion = vq.kmeans(V,
                                                  self.k1,
                                                  iter=self.nb_iter_kmeans)
            else:
                centroids = self.findCentroids(V, clusters[:subW.shape[0]])
                if centroids.shape[0] < self.k1:
                    nb_missing_centroids = self.k1 - centroids.shape[0]
                    random_centroids = V[numpy.random.randint(
                        0, V.shape[0], nb_missing_centroids), :]
                    centroids = numpy.vstack((centroids, random_centroids))
                centroids, distortion = vq.kmeans(
                    V, centroids)  #iter can only be 1
            clusters, distortion = vq.vq(V, centroids)
            kMeansTimeList.append(time.time() - startTime)

            clustersList.append(clusters)

            #logging.debug("subW.shape: " + str(subW.shape))
            #logging.debug("len(clusters): " + str(len(clusters)))
            #from sandbox.util.ProfileUtils import ProfileUtils
            #logging.debug("Total memory usage: " + str(ProfileUtils.memory()/10**6) + "MB")
            if ProfileUtils.memory() > 10**9:
                ProfileUtils.memDisplay(locals())

            i += 1

        if verbose:
            eigenQuality = {
                "boundList": boundList,
                "sinThetaList": sinThetaList
            }
            return clustersList, numpy.array(
                (decompositionTimeList, kMeansTimeList)).T, eigenQuality
        else:
            return clustersList
Exemplo n.º 7
0
            def next(self):
                X = self.XIterator.next()
                logging.debug("Learning on matrix with shape: " +
                              str(X.shape) + " and " + str(X.nnz) +
                              " non-zeros")

                if self.iterativeSoftImpute.weighted:
                    #Compute row and col probabilities
                    up, vp = SparseUtils.nonzeroRowColsProbs(X)
                    nzuInds = up == 0
                    nzvInds = vp == 0
                    u = numpy.sqrt(1 / (up + numpy.array(nzuInds, numpy.int)))
                    v = numpy.sqrt(1 / (vp + numpy.array(nzvInds, numpy.int)))
                    u[nzuInds] = 0
                    v[nzvInds] = 0

                if self.rhos != None:
                    self.iterativeSoftImpute.setRho(self.rhos.next())

                if not scipy.sparse.isspmatrix_csc(X):
                    raise ValueError("X must be a csc_matrix not " +
                                     str(type(X)))

                #Figure out what lambda should be
                #PROPACK has problems with convergence
                Y = scipy.sparse.csc_matrix(X, dtype=numpy.float)
                U, s, V = ExpSU.SparseUtils.svdArpack(Y, 1, kmax=20)
                del Y
                #U, s, V = SparseUtils.svdPropack(X, 1, kmax=20)
                maxS = s[0]
                logging.debug("Largest singular value : " + str(maxS))

                (n, m) = X.shape

                if self.j == 0:
                    self.oldU = numpy.zeros((n, 1))
                    self.oldS = numpy.zeros(1)
                    self.oldV = numpy.zeros((m, 1))
                else:
                    oldN = self.oldU.shape[0]
                    oldM = self.oldV.shape[0]

                    if self.iterativeSoftImpute.updateAlg == "initial":
                        if n > oldN:
                            self.oldU = Util.extendArray(
                                self.oldU, (n, self.oldU.shape[1]))
                        elif n < oldN:
                            self.oldU = self.oldU[0:n, :]

                        if m > oldM:
                            self.oldV = Util.extendArray(
                                self.oldV, (m, self.oldV.shape[1]))
                        elif m < oldN:
                            self.oldV = self.oldV[0:m, :]
                    elif self.iterativeSoftImpute.updateAlg == "zero":
                        self.oldU = numpy.zeros((n, 1))
                        self.oldS = numpy.zeros(1)
                        self.oldV = numpy.zeros((m, 1))
                    else:
                        raise ValueError("Unknown SVD update algorithm: " +
                                         self.updateAlg)

                rowInds, colInds = X.nonzero()

                gamma = self.iterativeSoftImpute.eps + 1
                i = 0

                self.iterativeSoftImpute.measures = numpy.zeros(
                    (self.iterativeSoftImpute.maxIterations, 4))

                while gamma > self.iterativeSoftImpute.eps:
                    if i == self.iterativeSoftImpute.maxIterations:
                        logging.debug("Maximum number of iterations reached")
                        break

                    ZOmega = SparseUtilsCython.partialReconstructPQ(
                        (rowInds, colInds), self.oldU * self.oldS, self.oldV)
                    Y = X - ZOmega
                    #Y = Y.tocsc()
                    #del ZOmega
                    Y = csarray(Y, storagetype="row")
                    gc.collect()

                    #os.system('taskset -p 0xffffffff %d' % os.getpid())

                    if self.iterativeSoftImpute.svdAlg == "propack":
                        L = LinOperatorUtils.sparseLowRankOp(Y,
                                                             self.oldU,
                                                             self.oldS,
                                                             self.oldV,
                                                             parallel=False)
                        newU, newS, newV = SparseUtils.svdPropack(
                            L,
                            k=self.iterativeSoftImpute.k,
                            kmax=self.iterativeSoftImpute.kmax)
                    elif self.iterativeSoftImpute.svdAlg == "arpack":
                        L = LinOperatorUtils.sparseLowRankOp(Y,
                                                             self.oldU,
                                                             self.oldS,
                                                             self.oldV,
                                                             parallel=False)
                        newU, newS, newV = SparseUtils.svdArpack(
                            L,
                            k=self.iterativeSoftImpute.k,
                            kmax=self.iterativeSoftImpute.kmax)
                    elif self.iterativeSoftImpute.svdAlg == "svdUpdate":
                        newU, newS, newV = SVDUpdate.addSparseProjected(
                            self.oldU, self.oldS, self.oldV, Y,
                            self.iterativeSoftImpute.k)
                    elif self.iterativeSoftImpute.svdAlg == "rsvd":
                        L = LinOperatorUtils.sparseLowRankOp(Y,
                                                             self.oldU,
                                                             self.oldS,
                                                             self.oldV,
                                                             parallel=True)
                        newU, newS, newV = RandomisedSVD.svd(
                            L,
                            self.iterativeSoftImpute.k,
                            p=self.iterativeSoftImpute.p,
                            q=self.iterativeSoftImpute.q)
                    elif self.iterativeSoftImpute.svdAlg == "rsvdUpdate":
                        L = LinOperatorUtils.sparseLowRankOp(Y,
                                                             self.oldU,
                                                             self.oldS,
                                                             self.oldV,
                                                             parallel=True)
                        if self.j == 0:
                            newU, newS, newV = RandomisedSVD.svd(
                                L,
                                self.iterativeSoftImpute.k,
                                p=self.iterativeSoftImpute.p,
                                q=self.iterativeSoftImpute.q)
                        else:
                            newU, newS, newV = RandomisedSVD.svd(
                                L,
                                self.iterativeSoftImpute.k,
                                p=self.iterativeSoftImpute.p,
                                q=self.iterativeSoftImpute.qu,
                                omega=self.oldV)
                    elif self.iterativeSoftImpute.svdAlg == "rsvdUpdate2":

                        if self.j == 0:
                            L = LinOperatorUtils.sparseLowRankOp(Y,
                                                                 self.oldU,
                                                                 self.oldS,
                                                                 self.oldV,
                                                                 parallel=True)
                            newU, newS, newV = RandomisedSVD.svd(
                                L,
                                self.iterativeSoftImpute.k,
                                p=self.iterativeSoftImpute.p,
                                q=self.iterativeSoftImpute.q)
                        else:
                            #Need linear operator which is U s V
                            L = LinOperatorUtils.lowRankOp(
                                self.oldU, self.oldS, self.oldV)
                            Y = GeneralLinearOperator.asLinearOperator(
                                Y, parallel=True)
                            newU, newS, newV = RandomisedSVD.updateSvd(
                                L,
                                self.oldU,
                                self.oldS,
                                self.oldV,
                                Y,
                                self.iterativeSoftImpute.k,
                                p=self.iterativeSoftImpute.p)
                    else:
                        raise ValueError("Unknown SVD algorithm: " +
                                         self.iterativeSoftImpute.svdAlg)

                    if self.iterativeSoftImpute.weighted and i == 0:
                        delta = numpy.diag((u * newU.T).dot(newU))
                        pi = numpy.diag((v * newV.T).dot(newV))
                        lmbda = (maxS / numpy.max(
                            delta * pi)) * self.iterativeSoftImpute.rho
                        lmbdav = lmbda * delta * pi
                    elif not self.iterativeSoftImpute.weighted:
                        lmbda = maxS * self.iterativeSoftImpute.rho
                        if i == 0:
                            logging.debug("lambda: " + str(lmbda))
                        lmbdav = lmbda

                    newS = newS - lmbdav
                    #Soft threshold
                    newS = numpy.clip(newS, 0, numpy.max(newS))

                    normOldZ = (self.oldS**2).sum()
                    normNewZmOldZ = (self.oldS**2).sum() + (
                        newS**2).sum() - 2 * numpy.trace(
                            (self.oldV.T.dot(newV * newS)).dot(
                                newU.T.dot(self.oldU * self.oldS)))

                    #We can get newZ == oldZ in which case we break
                    if normNewZmOldZ < self.tol:
                        gamma = 0
                    elif abs(normOldZ) < self.tol:
                        gamma = self.iterativeSoftImpute.eps + 1
                    else:
                        gamma = normNewZmOldZ / normOldZ

                    if self.iterativeSoftImpute.verbose:
                        theta1 = (
                            self.iterativeSoftImpute.k -
                            numpy.linalg.norm(self.oldU.T.dot(newU), 'fro')**
                            2) / self.iterativeSoftImpute.k
                        theta2 = (
                            self.iterativeSoftImpute.k -
                            numpy.linalg.norm(self.oldV.T.dot(newV), 'fro')**
                            2) / self.iterativeSoftImpute.k
                        thetaS = numpy.linalg.norm(
                            newS - self.oldS)**2 / numpy.linalg.norm(newS)**2
                        self.iterativeSoftImpute.measures[i, :] = numpy.array(
                            [gamma, theta1, theta2, thetaS])

                    self.oldU = newU.copy()
                    self.oldS = newS.copy()
                    self.oldV = newV.copy()

                    logging.debug("Iteration " + str(i) + " gamma=" +
                                  str(gamma))
                    i += 1

                if self.iterativeSoftImpute.postProcess:
                    #Add the mean vectors
                    previousS = newS
                    newU = numpy.c_[newU, numpy.array(X.mean(1)).ravel()]
                    newV = numpy.c_[newV, numpy.array(X.mean(0)).ravel()]
                    newS = self.iterativeSoftImpute.unshrink(X, newU, newV)

                    #Note that this increases the rank of U and V by 1
                    #print("Difference in s after postprocessing: " + str(numpy.linalg.norm(previousS - newS[0:-1])))
                    logging.debug("Difference in s after postprocessing: " +
                                  str(numpy.linalg.norm(previousS -
                                                        newS[0:-1])))

                logging.debug("Number of iterations for rho=" +
                              str(self.iterativeSoftImpute.rho) + ": " +
                              str(i))
                self.j += 1
                return (newU, newS, newV)
Exemplo n.º 8
0
            def next(self):
                X = self.XIterator.next()
                logging.debug("Learning on matrix with shape: " + str(X.shape) + " and " + str(X.nnz) + " non-zeros")    
                
                if self.iterativeSoftImpute.weighted: 
                    #Compute row and col probabilities 
                    up, vp = SparseUtils.nonzeroRowColsProbs(X)
                    nzuInds = up==0
                    nzvInds = vp==0
                    u = numpy.sqrt(1/(up + numpy.array(nzuInds, numpy.int))) 
                    v = numpy.sqrt(1/(vp + numpy.array(nzvInds, numpy.int)))
                    u[nzuInds] = 0 
                    v[nzvInds] = 0 
                
                if self.rhos != None: 
                    self.iterativeSoftImpute.setRho(self.rhos.next())

                if not scipy.sparse.isspmatrix_csc(X):
                    raise ValueError("X must be a csc_matrix not " + str(type(X)))
                    
                #Figure out what lambda should be 
                #PROPACK has problems with convergence 
                Y = scipy.sparse.csc_matrix(X, dtype=numpy.float)
                U, s, V = ExpSU.SparseUtils.svdArpack(Y, 1, kmax=20)
                del Y
                #U, s, V = SparseUtils.svdPropack(X, 1, kmax=20)
                maxS = s[0]
                logging.debug("Largest singular value : " + str(maxS))

                (n, m) = X.shape

                if self.j == 0:
                    self.oldU = numpy.zeros((n, 1))
                    self.oldS = numpy.zeros(1)
                    self.oldV = numpy.zeros((m, 1))
                else:
                    oldN = self.oldU.shape[0]
                    oldM = self.oldV.shape[0]

                    if self.iterativeSoftImpute.updateAlg == "initial":
                        if n > oldN:
                            self.oldU = Util.extendArray(self.oldU, (n, self.oldU.shape[1]))
                        elif n < oldN:
                            self.oldU = self.oldU[0:n, :]

                        if m > oldM:
                            self.oldV = Util.extendArray(self.oldV, (m, self.oldV.shape[1]))
                        elif m < oldN:
                            self.oldV = self.oldV[0:m, :]
                    elif self.iterativeSoftImpute.updateAlg == "zero":
                        self.oldU = numpy.zeros((n, 1))
                        self.oldS = numpy.zeros(1)
                        self.oldV = numpy.zeros((m, 1))
                    else:
                        raise ValueError("Unknown SVD update algorithm: " + self.updateAlg)

                rowInds, colInds = X.nonzero()

                gamma = self.iterativeSoftImpute.eps + 1
                i = 0

                self.iterativeSoftImpute.measures = numpy.zeros((self.iterativeSoftImpute.maxIterations, 4))

                while gamma > self.iterativeSoftImpute.eps:
                    if i == self.iterativeSoftImpute.maxIterations: 
                        logging.debug("Maximum number of iterations reached")
                        break 
                    
                    ZOmega = SparseUtilsCython.partialReconstructPQ((rowInds, colInds), self.oldU*self.oldS, self.oldV)
                    Y = X - ZOmega
                    #Y = Y.tocsc()
                    #del ZOmega
                    Y = csarray(Y, storagetype="row")
                    gc.collect()
                    
                    #os.system('taskset -p 0xffffffff %d' % os.getpid())

                    if self.iterativeSoftImpute.svdAlg=="propack":
                        L = LinOperatorUtils.sparseLowRankOp(Y, self.oldU, self.oldS, self.oldV, parallel=False)                        
                        newU, newS, newV = SparseUtils.svdPropack(L, k=self.iterativeSoftImpute.k, kmax=self.iterativeSoftImpute.kmax)
                    elif self.iterativeSoftImpute.svdAlg=="arpack":
                        L = LinOperatorUtils.sparseLowRankOp(Y, self.oldU, self.oldS, self.oldV, parallel=False)                        
                        newU, newS, newV = SparseUtils.svdArpack(L, k=self.iterativeSoftImpute.k, kmax=self.iterativeSoftImpute.kmax)
                    elif self.iterativeSoftImpute.svdAlg=="svdUpdate":
                        newU, newS, newV = SVDUpdate.addSparseProjected(self.oldU, self.oldS, self.oldV, Y, self.iterativeSoftImpute.k)
                    elif self.iterativeSoftImpute.svdAlg=="rsvd":
                        L = LinOperatorUtils.sparseLowRankOp(Y, self.oldU, self.oldS, self.oldV, parallel=True)
                        newU, newS, newV = RandomisedSVD.svd(L, self.iterativeSoftImpute.k, p=self.iterativeSoftImpute.p, q=self.iterativeSoftImpute.q)
                    elif self.iterativeSoftImpute.svdAlg=="rsvdUpdate": 
                        L = LinOperatorUtils.sparseLowRankOp(Y, self.oldU, self.oldS, self.oldV, parallel=True)
                        if self.j == 0: 
                            newU, newS, newV = RandomisedSVD.svd(L, self.iterativeSoftImpute.k, p=self.iterativeSoftImpute.p, q=self.iterativeSoftImpute.q)
                        else: 
                            newU, newS, newV = RandomisedSVD.svd(L, self.iterativeSoftImpute.k, p=self.iterativeSoftImpute.p, q=self.iterativeSoftImpute.qu, omega=self.oldV)
                    elif self.iterativeSoftImpute.svdAlg=="rsvdUpdate2":
                        
                        if self.j == 0: 
                            L = LinOperatorUtils.sparseLowRankOp(Y, self.oldU, self.oldS, self.oldV, parallel=True)
                            newU, newS, newV = RandomisedSVD.svd(L, self.iterativeSoftImpute.k, p=self.iterativeSoftImpute.p, q=self.iterativeSoftImpute.q)
                        else: 
                            #Need linear operator which is U s V 
                            L = LinOperatorUtils.lowRankOp(self.oldU, self.oldS, self.oldV)
                            Y = GeneralLinearOperator.asLinearOperator(Y, parallel=True)
                            newU, newS, newV = RandomisedSVD.updateSvd(L, self.oldU, self.oldS, self.oldV, Y, self.iterativeSoftImpute.k, p=self.iterativeSoftImpute.p)
                    else:
                        raise ValueError("Unknown SVD algorithm: " + self.iterativeSoftImpute.svdAlg)

                    if self.iterativeSoftImpute.weighted and i==0: 
                        delta = numpy.diag((u*newU.T).dot(newU))
                        pi = numpy.diag((v*newV.T).dot(newV))
                        lmbda = (maxS/numpy.max(delta*pi))*self.iterativeSoftImpute.rho
                        lmbdav = lmbda*delta*pi
                    elif not self.iterativeSoftImpute.weighted: 
                        lmbda = maxS*self.iterativeSoftImpute.rho
                        if i==0: 
                            logging.debug("lambda: " + str(lmbda))
                        lmbdav = lmbda
                        
                    newS = newS - lmbdav                    
                    #Soft threshold
                    newS = numpy.clip(newS, 0, numpy.max(newS))
                    

                    normOldZ = (self.oldS**2).sum()
                    normNewZmOldZ = (self.oldS**2).sum() + (newS**2).sum() - 2*numpy.trace((self.oldV.T.dot(newV*newS)).dot(newU.T.dot(self.oldU*self.oldS)))

                    #We can get newZ == oldZ in which case we break
                    if normNewZmOldZ < self.tol:
                        gamma = 0
                    elif abs(normOldZ) < self.tol:
                        gamma = self.iterativeSoftImpute.eps + 1
                    else:
                        gamma = normNewZmOldZ/normOldZ
                        
                    if self.iterativeSoftImpute.verbose: 
                        theta1 = (self.iterativeSoftImpute.k - numpy.linalg.norm(self.oldU.T.dot(newU), 'fro')**2)/self.iterativeSoftImpute.k
                        theta2 = (self.iterativeSoftImpute.k - numpy.linalg.norm(self.oldV.T.dot(newV), 'fro')**2)/self.iterativeSoftImpute.k
                        thetaS = numpy.linalg.norm(newS - self.oldS)**2/numpy.linalg.norm(newS)**2
                        self.iterativeSoftImpute.measures[i, :] = numpy.array([gamma, theta1, theta2, thetaS])

                    self.oldU = newU.copy()
                    self.oldS = newS.copy()
                    self.oldV = newV.copy()

                    logging.debug("Iteration " + str(i) + " gamma="+str(gamma))
                    i += 1

                if self.iterativeSoftImpute.postProcess: 
                    #Add the mean vectors 
                    previousS = newS
                    newU = numpy.c_[newU, numpy.array(X.mean(1)).ravel()]
                    newV = numpy.c_[newV, numpy.array(X.mean(0)).ravel()]
                    newS = self.iterativeSoftImpute.unshrink(X, newU, newV)  
                    
                    #Note that this increases the rank of U and V by 1 
                    #print("Difference in s after postprocessing: " + str(numpy.linalg.norm(previousS - newS[0:-1]))) 
                    logging.debug("Difference in s after postprocessing: " + str(numpy.linalg.norm(previousS - newS[0:-1]))) 

                logging.debug("Number of iterations for rho="+str(self.iterativeSoftImpute.rho) + ": " + str(i))
                self.j += 1
                return (newU, newS, newV)
Exemplo n.º 9
0
    print(i)
    if i == 10: 
        break 
    
    tempTimes = []
    tempErrors = []
    
    startTime = time.time()
    U, s, V = SparseUtils.svdPropack(X, k)
    tempTimes.append(time.time()-startTime)
    tempErrors.append(numpy.linalg.norm(numpy.array(X.todense()) - (U*s).dot(V.T))/numpy.linalg.norm(X.todense()))
    
    for p in ps: 
        for q in qs: 
            startTime = time.time()
            U2, s2, V2 = RandomisedSVD.svd(X, k, p, q)
            tempTimes.append(time.time()-startTime)
            tempErrors.append(numpy.linalg.norm(numpy.array(X.todense()) - (U2*s2).dot(V2.T))/numpy.linalg.norm(X.todense()) )
            
            startTime = time.time()
            if i == 0: 
                U3, s3, V3 = RandomisedSVD.svd(X, k, p, q)
            else: 
                U3, s3, V3 = RandomisedSVD.svd(X, k, p, q, omega=lastV)    
            tempTimes.append(time.time()-startTime)
            tempErrors.append(numpy.linalg.norm(numpy.array(X.todense()) - (U3*s3).dot(V3.T))/numpy.linalg.norm(X.todense()) )

    lastU = U2 
    lastS = s2 
    lastV = V2 
    def clusterFromIterator(self, graphListIterator, verbose=False):
        """
        Find a set of clusters for the graphs given by the iterator. If verbose 
        is true the each iteration is timed and bounded the results are returned 
        as lists.
        
        The difference between a weight matrix and the previous one should be
        positive.
        """
        clustersList = []
        decompositionTimeList = [] 
        kMeansTimeList = [] 
        boundList = []
        sinThetaList = []
        i = 0

        for subW in graphListIterator:
            if __debug__:
                Parameter.checkSymmetric(subW)

            if self.logStep and i % self.logStep == 0:
                logging.debug("Graph index: " + str(i))
            logging.debug("Clustering graph of size " + str(subW.shape))
            if self.alg!="efficientNystrom": 
                ABBA = GraphUtils.shiftLaplacian(subW)

            # --- Eigen value decomposition ---
            startTime = time.time()
            if self.alg=="IASC": 
                if i % self.T != 0:
                    omega, Q = self.approxUpdateEig(subW, ABBA, omega, Q)   
                    
                    if self.computeBound:
                        inds = numpy.flipud(numpy.argsort(omega))
                        Q = Q[:, inds]
                        omega = omega[inds]
                        bounds = self.pertBound(omega, Q, omegaKbot, AKbot, self.k2)
                        #boundList.append([i, bounds[0], bounds[1]])
                        
                        #Now use accurate values of norm of R and delta   
                        rank = Util.rank(ABBA.todense())
                        gamma, U = scipy.sparse.linalg.eigsh(ABBA, rank-1, which="LM", ncv = ABBA.shape[0])
                        #logging.debug("gamma=" + str(gamma))
                        bounds2 = self.realBound(omega, Q, gamma, AKbot, self.k2)                  
                        boundList.append([bounds[0], bounds[1], bounds2[0], bounds2[1]])      
                else: 
                    logging.debug("Computing exact eigenvectors")
                    self.storeInformation(subW, ABBA)

                    if self.computeBound: 
                        #omega, Q = scipy.sparse.linalg.eigsh(ABBA, min(self.k2*2, ABBA.shape[0]-1), which="LM", ncv = min(10*self.k2, ABBA.shape[0]))
                        rank = Util.rank(ABBA.todense())
                        omega, Q = scipy.sparse.linalg.eigsh(ABBA, rank-1, which="LM", ncv = ABBA.shape[0])
                        inds = numpy.flipud(numpy.argsort(omega))
                        omegaKbot = omega[inds[self.k2:]]  
                        QKbot = Q[:, inds[self.k2:]] 
                        AKbot = (QKbot*omegaKbot).dot(QKbot.T)
                        
                        omegaSort = numpy.flipud(numpy.sort(omega))
                        boundList.append([0]*4)      
                    else: 
                        omega, Q = scipy.sparse.linalg.eigsh(ABBA, min(self.k2, ABBA.shape[0]-1), which="LM", ncv = min(10*self.k2, ABBA.shape[0]))
                            
            elif self.alg == "nystrom":
                omega, Q = Nystrom.eigpsd(ABBA, self.k3)
            elif self.alg == "exact": 
                omega, Q = scipy.sparse.linalg.eigsh(ABBA, min(self.k1, ABBA.shape[0]-1), which="LM", ncv = min(15*self.k1, ABBA.shape[0]))
            elif self.alg == "efficientNystrom":
                omega, Q = EfficientNystrom.eigWeight(subW, self.k2, self.k1)
            elif self.alg == "randomisedSvd": 
                Q, omega, R = RandomisedSVD.svd(ABBA, self.k4)
            else:
                raise ValueError("Invalid Algorithm: " + str(self.alg))

            if self.computeSinTheta:
                omegaExact, QExact = scipy.linalg.eigh(ABBA.todense())
                inds = numpy.flipud(numpy.argsort(omegaExact))
                QExactKbot = QExact[:, inds[self.k1:]]
                inds = numpy.flipud(numpy.argsort(omega))
                QApproxK = Q[:,inds[:self.k1]]
                sinThetaList.append(scipy.linalg.norm(QExactKbot.T.dot(QApproxK)))
          
            decompositionTimeList.append(time.time()-startTime)                  
                  
            if self.alg=="IASC":
                self.storeInformation(subW, ABBA)
            
            # --- Kmeans ---
            startTime = time.time()
            inds = numpy.flipud(numpy.argsort(omega))

            standardiser = Standardiser()
            #For some very strange reason we get an overflow when computing the
            #norm of the rows of Q even though its elements are bounded by 1.
            #We'll ignore it for now
            try:
                V = standardiser.normaliseArray(Q[:, inds[0:self.k1]].real.T).T
            except FloatingPointError as e:
                logging.warn("FloatingPointError: " + str(e))
            V = VqUtils.whiten(V)
            if i == 0:
                centroids, distortion = vq.kmeans(V, self.k1, iter=self.nb_iter_kmeans)
            else:
                centroids = self.findCentroids(V, clusters[:subW.shape[0]])
                if centroids.shape[0] < self.k1:
                    nb_missing_centroids = self.k1 - centroids.shape[0]
                    random_centroids = V[numpy.random.randint(0, V.shape[0], nb_missing_centroids),:]
                    centroids = numpy.vstack((centroids, random_centroids))
                centroids, distortion = vq.kmeans(V, centroids) #iter can only be 1
            clusters, distortion = vq.vq(V, centroids)
            kMeansTimeList.append(time.time()-startTime)

            clustersList.append(clusters)

            #logging.debug("subW.shape: " + str(subW.shape))
            #logging.debug("len(clusters): " + str(len(clusters)))
            #from sandbox.util.ProfileUtils import ProfileUtils
            #logging.debug("Total memory usage: " + str(ProfileUtils.memory()/10**6) + "MB")
            if ProfileUtils.memory() > 10**9:
                ProfileUtils.memDisplay(locals())

            i += 1

        if verbose:
            eigenQuality = {"boundList" : boundList, "sinThetaList" : sinThetaList}
            return clustersList, numpy.array((decompositionTimeList, kMeansTimeList)).T, eigenQuality
        else:
            return clustersList
Exemplo n.º 11
0
            #Nystrom method 
            print("Running Nystrom")
            for j, nystromN in enumerate(nystromNs):  
                omega2, Q2 = Nystrom.eigpsd(L, nystromN)
                inds = numpy.flipud(numpy.argsort(omega2))
                omega2, Q2 = omega2[inds], Q2[:, inds]
                omega2k, Q2k = omega2[0:k], Q2[:, 0:k]
                
#                errors[i, j] += computeBound(L, omega, Q, omega2k, Q2k, k)
                errors[i, j] += computeSinTheta(Qkbot, Q2k)
            

            #Randomised SVD method 
            print("Running Random SVD")
            for j, r in enumerate(randSVDVecs):  
                Q4, omega4, R4 = RandomisedSVD.svd(L, r)
                inds = numpy.flipud(numpy.argsort(omega4))
                omega4, Q4 = omega4[inds], Q4[:, inds]
                omega4k, Q4k = omega4[0:k], Q4[:, 0:k]
                
#                errors[i, j+len(nystromNs)] += computeBound(L, omega, Q, omega4k, Q4k, k)
                errors[i, j+len(nystromNs)] += computeSinTheta(Qkbot, Q4k)
            
            
            #Incremental updates 
            print("Running Eigen-update")
            for j, l in enumerate(IASCL):  
                omega3, Q3 = eigenUpdate(lastL, L, lastOmegas[j], lastQs[j], l)
                inds = numpy.flipud(numpy.argsort(omega3)) 
                omega3, Q3 = omega3[inds], Q3[:, inds]
                omega3k, Q3k = omega3[0:k], Q3[:, 0:k]