Exemplo n.º 1
0
def qr_destroy(la):
    """
    Return QR decomposition of `la[0]`. Content of `la` gets destroyed in the process.

    Using this function should be less memory intense than calling `scipy.linalg.qr(la[0])`,
    because the memory used in `la[0]` is reclaimed earlier.
    """
    a = numpy.asfortranarray(la[0])
    del la[0], la # now `a` is the only reference to the input matrix
    m, n = a.shape
    # perform q, r = QR(a); code hacked out of scipy.linalg.qr
    logger.debug("computing QR of %s dense matrix" % str(a.shape))
    geqrf, = get_lapack_funcs(('geqrf',), (a,))
    qr, tau, work, info = geqrf(a, lwork=-1, overwrite_a=True)
    qr, tau, work, info = geqrf(a, lwork=work[0], overwrite_a=True)
    del a # free up mem
    assert info >= 0
    r = triu(qr[:n, :n])
    if m < n: # rare case, #features < #topics
        qr = qr[:, :m] # retains fortran order
    gorgqr, = get_lapack_funcs(('orgqr',), (qr,))
    q, work, info = gorgqr(qr, tau, lwork=-1, overwrite_a=True)
    q, work, info = gorgqr(qr, tau, lwork=work[0], overwrite_a=True)
    assert info >= 0, "qr failed"
    assert q.flags.f_contiguous
    return q, r
def qr_destroy(la):
    a = numpy.asfortranarray(la[0])
    del la[0], la # now `a` is the only reference to the input matrix
    m, n = a.shape
    # perform q, r = QR(component); code hacked out of scipy.linalg.qr
    logger.debug("computing QR of %s dense matrix" % str(a.shape))
    geqrf, = get_lapack_funcs(('geqrf',), (a,))
    qr, tau, work, info = geqrf(a, lwork = -1, overwrite_a = True)
    qr, tau, work, info = geqrf(a, lwork = work[0], overwrite_a = True)
    del a # free up mem
    assert info >= 0
    r = triu(qr[:n, :n])
    if m < n: # rare case, #features < #topics
        qr = qr[:, :m] # retains fortran order
    gorgqr, = get_lapack_funcs(('orgqr',), (qr,))
    q, work, info = gorgqr(qr, tau, lwork = -1, overwrite_a = True)
    q, work, info = gorgqr(qr, tau, lwork = work[0], overwrite_a = True)
    assert info >= 0, "qr failed"
    assert q.flags.f_contiguous
    return q, r
def qr_destroy(la):
    a = numpy.asfortranarray(la[0])
    del la[0], la  # now `a` is the only reference to the input matrix
    m, n = a.shape
    # perform q, r = QR(component); code hacked out of scipy.linalg.qr
    logger.debug("computing QR of %s dense matrix" % str(a.shape))
    geqrf, = get_lapack_funcs(('geqrf', ), (a, ))
    qr, tau, work, info = geqrf(a, lwork=-1, overwrite_a=True)
    qr, tau, work, info = geqrf(a, lwork=work[0], overwrite_a=True)
    del a  # free up mem
    assert info >= 0
    r = triu(qr[:n, :n])
    if m < n:  # rare case, #features < #topics
        qr = qr[:, :m]  # retains fortran order
    gorgqr, = get_lapack_funcs(('orgqr', ), (qr, ))
    q, work, info = gorgqr(qr, tau, lwork=-1, overwrite_a=True)
    q, work, info = gorgqr(qr, tau, lwork=work[0], overwrite_a=True)
    assert info >= 0, "qr failed"
    assert q.flags.f_contiguous
    return q, r
Exemplo n.º 4
0
def qr_decomposition(la):
    """Reduced QR decomposition."""
    print "+" * 100, "Performing reduced QR decomposition ..."
    a = numpy.asfortranarray(la[0])
    del la[0], la
    m, n = a.shape
    geqrf, = get_lapack_funcs(('geqrf', ), (a, ))
    qr, tau, work, info = geqrf(a, lwork=-1, overwrite_a=True)
    qr, tau, work, info = geqrf(a, lwork=work[0], overwrite_a=True)
    del a
    assert info >= 0
    r = triu(qr[:n, :n])
    if m < n:
        qr = qr[:, :m]
    gorgqr, = get_lapack_funcs(('orgqr', ), (qr, ))
    q, work, info = gorgqr(qr, tau, lwork=-1, overwrite_a=True)
    q, work, info = gorgqr(qr, tau, lwork=work[0], overwrite_a=True)
    assert info >= 0, "qr failed"
    assert q.flags.f_contiguous
    return q, r
Exemplo n.º 5
0
def qr_decomposition(la):
    """Reduced QR decomposition."""
    print "+"*100, "Performing reduced QR decomposition ..."
    a = numpy.asfortranarray(la[0])
    del la[0], la
    m, n = a.shape
    geqrf, = get_lapack_funcs(('geqrf',), (a,))
    qr, tau, work, info = geqrf(a, lwork=-1, overwrite_a=True)
    qr, tau, work, info = geqrf(a, lwork=work[0], overwrite_a=True)
    del a
    assert info >= 0
    r = triu(qr[:n, :n])
    if m < n:
        qr = qr[:, :m]
    gorgqr, = get_lapack_funcs(('orgqr',), (qr,))
    q, work, info = gorgqr(qr, tau, lwork=-1, overwrite_a=True)
    q, work, info = gorgqr(qr, tau, lwork=work[0], overwrite_a=True)
    assert info >= 0, "qr failed"
    assert q.flags.f_contiguous
    return q, r
Exemplo n.º 6
0
 def triu_indices(n, k=0):
     m = numpy.ones((n, n), int)
     a = triu(m, k)
     return numpy.where(a != 0)
    def merge(self, other, decay = 1.0):
        """
        Merge this Projection with another. 
        
        Content of `other` is destroyed in the process, so pass this function a 
        copy if you need it further.
        
        This is the optimized merge described in algorithm 5.
        """
        if other.u is None:
            # the other projection is empty => do nothing
            return
        if self.u is None:
            # we are empty => result of merge is the other projection, whatever it is
            self.u = other.u.copy()
            self.s = other.s.copy()
            return
        if self.m != other.m:
            raise ValueError("vector space mismatch: update has %s features, expected %s" %
                             (other.m, self.m))
        logger.info("merging projections: %s + %s" % (str(self.u.shape), str(other.u.shape)))
#        diff = numpy.dot(self.u.T, self.u) - numpy.eye(self.u.shape[1])
#        logger.info('orth error after=%f' % numpy.sum(diff * diff))
        m, n1, n2 = self.u.shape[0], self.u.shape[1], other.u.shape[1]
        # TODO Maybe keep the bases as elementary reflectors, without 
        # forming explicit matrices with gorgqr.
        # The only operation we ever need is basis^T*basis ond basis*component.
        # But how to do that in numpy? And is it fast(er)?
        
        # find component of u2 orthogonal to u1
        # IMPORTANT: keep matrices in suitable order for matrix products; failing to do so gives 8x lower performance :(
        self.u = numpy.asfortranarray(self.u) # does nothing if input already fortran-order array
        other.u = numpy.asfortranarray(other.u)
        gemm, = get_blas_funcs(('gemm',), (self.u,))
        logger.debug("constructing orthogonal component")
        c = gemm(1.0, self.u, other.u, trans_a = True)
        gemm(-1.0, self.u, c, beta = 1.0, c = other.u, overwrite_c = True)
        
        # perform q, r = QR(component); code hacked out of scipy.linalg.qr
        logger.debug("computing QR of %s dense matrix" % str(other.u.shape))
        geqrf, = get_lapack_funcs(('geqrf',), (other.u,))
        qr, tau, work, info = geqrf(other.u, lwork = -1, overwrite_a = True) # sometimes segfaults with overwrite_a=True...
        qr, tau, work, info = geqrf(other.u, lwork = work[0], overwrite_a = True) # sometimes segfaults with overwrite_a=True...
        del other.u
        assert info >= 0
        r = triu(qr[:n2, :n2])
        if m < n2: # rare case...
            qr = qr[:,:m] # retains fortran order
        gorgqr, = get_lapack_funcs(('orgqr',), (qr,))
        q, work, info = gorgqr(qr, tau, lwork = -1, overwrite_a = True)
        q, work, info = gorgqr(qr, tau, lwork = work[0], overwrite_a = True)
        assert info >= 0, "qr failed"
        assert q.flags.f_contiguous
        
        # find rotation that diagonalizes r
        k = numpy.bmat([[numpy.diag(decay * self.s), c * other.s], [matutils.pad(numpy.matrix([]).reshape(0, 0), n2, n1), r * other.s]])
        logger.debug("computing SVD of %s dense matrix" % str(k.shape))
        u_k, s_k, _ = numpy.linalg.svd(k, full_matrices = False) # TODO *ugly overkill*!! only need first self.k SVD factors... but there is no LAPACK wrapper for partial svd/eigendecomp in numpy :(
        
        k = clipSpectrum(s_k, self.k)
        u_k, s_k = u_k[:, :k], s_k[:k]
        
        # update & rotate current basis U
        logger.debug("updating orthonormal basis U")
        self.u = gemm(1.0, self.u, u_k[:n1]) # TODO temporarily creates an extra (m,k) dense array in memory. find a way to avoid this!
        gemm(1.0, q, u_k[n1:], beta = 1.0, c = self.u, overwrite_c = True) # u = [u,u']*u_k
        self.s = s_k
Exemplo n.º 8
0
    def merge(self, other, decay=1.0):
        """
        Merge this Projection with another. 
        
        Content of `other` is destroyed in the process, so pass this function a 
        copy if you need it further.
        
        This is the optimized merge described in algorithm 5.
        """
        if other.u is None:
            # the other projection is empty => do nothing
            return
        if self.u is None:
            # we are empty => result of merge is the other projection, whatever it is
            if other.s is None:
                # other.u contains a direct document chunk, not svd => perform svd
                docs = other.u
                assert scipy.sparse.issparse(docs)
                if self.m * self.k < 10000:
                    # SVDLIBC gives spurious results for small matrices.. run full
                    # LAPACK on them instead
                    logger.info("computing dense SVD of %s matrix" %
                                str(docs.shape))
                    u, s, vt = numpy.linalg.svd(docs.todense(),
                                                full_matrices=False)
                else:
                    try:
                        import sparsesvd
                    except ImportError:
                        raise ImportError(
                            "for LSA, the `sparsesvd` module is needed but not found; run `easy_install sparsesvd`"
                        )
                    logger.info("computing sparse SVD of %s matrix" %
                                str(docs.shape))
                    ut, s, vt = sparsesvd.sparsesvd(
                        docs, self.k + 30
                    )  # ask for a few extra factors, because for some reason SVDLIBC sometimes returns fewer factors than requested
                    u = ut.T
                    del ut
                del vt
                k = clipSpectrum(s, self.k)
                self.u = u[:, :k].copy('F')
                self.s = s[:k]
            else:
                self.u = other.u.copy('F')
                self.s = other.s.copy()
            return
        if self.m != other.m:
            raise ValueError(
                "vector space mismatch: update has %s features, expected %s" %
                (other.m, self.m))
        logger.info("merging projections: %s + %s" %
                    (str(self.u.shape), str(other.u.shape)))
        m, n1, n2 = self.u.shape[0], self.u.shape[1], other.u.shape[1]
        if other.s is None:
            other.u = other.u.todense()
            other.s = 1.0  # broadcasting will promote this to eye(n2) where needed
        # TODO Maybe keep the bases as elementary reflectors, without
        # forming explicit matrices with gorgqr.
        # The only operation we ever need is basis^T*basis ond basis*component.
        # But how to do that in numpy? And is it fast(er)?

        # find component of u2 orthogonal to u1
        # IMPORTANT: keep matrices in suitable order for matrix products; failing to do so gives 8x lower performance :(
        self.u = numpy.asfortranarray(
            self.u)  # does nothing if input already fortran-order array
        other.u = numpy.asfortranarray(other.u)
        gemm, = get_blas_funcs(('gemm', ), (self.u, ))
        logger.debug("constructing orthogonal component")
        c = gemm(1.0, self.u, other.u, trans_a=True)
        gemm(-1.0, self.u, c, beta=1.0, c=other.u, overwrite_c=True)

        # perform q, r = QR(component); code hacked out of scipy.linalg.qr
        logger.debug("computing QR of %s dense matrix" % str(other.u.shape))
        geqrf, = get_lapack_funcs(('geqrf', ), (other.u, ))
        qr, tau, work, info = geqrf(
            other.u, lwork=-1,
            overwrite_a=True)  # sometimes segfaults with overwrite_a=True...?
        qr, tau, work, info = geqrf(
            other.u, lwork=work[0],
            overwrite_a=True)  # sometimes segfaults with overwrite_a=True...?
        del other.u
        assert info >= 0
        r = triu(qr[:n2, :n2])
        if m < n2:  # rare case, #features < #topics
            qr = qr[:, :m]  # retains fortran order
        gorgqr, = get_lapack_funcs(('orgqr', ), (qr, ))
        q, work, info = gorgqr(qr, tau, lwork=-1, overwrite_a=True)
        q, work, info = gorgqr(qr, tau, lwork=work[0], overwrite_a=True)
        assert info >= 0, "qr failed"
        assert q.flags.f_contiguous

        # find rotation that diagonalizes r
        k = numpy.bmat([[numpy.diag(decay * self.s), c * other.s],
                        [
                            matutils.pad(
                                numpy.matrix([]).reshape(0, 0), min(m, n2),
                                n1), r * other.s
                        ]])
        logger.debug("computing SVD of %s dense matrix" % str(k.shape))
        u_k, s_k, _ = numpy.linalg.svd(
            k, full_matrices=False
        )  # TODO *ugly overkill*!! only need first self.k SVD factors... but there is no LAPACK wrapper for partial svd/eigendecomp in numpy :(

        k = clipSpectrum(s_k, self.k)
        u_k, s_k = u_k[:, :k], s_k[:k]

        # update & rotate current basis U
        logger.debug("updating orthonormal basis U")
        self.u = gemm(
            1.0, self.u, u_k[:n1]
        )  # TODO temporarily creates an extra (m,k) dense array in memory. find a way to avoid this!
        gemm(1.0, q, u_k[n1:], beta=1.0, c=self.u,
             overwrite_c=True)  # u = [u,u']*u_k
        self.s = s_k
    def merge(self, other, decay=1.0):
        """
        Merge this Projection with another. 
        
        Content of `other` is destroyed in the process, so pass this function a 
        copy if you need it further.
        
        This is the optimized merge described in algorithm 5.
        """
        if other.u is None:
            # the other projection is empty => do nothing
            return
        if self.u is None:
            # we are empty => result of merge is the other projection, whatever it is
            self.u = other.u.copy()
            self.s = other.s.copy()
            return
        if self.m != other.m:
            raise ValueError(
                "vector space mismatch: update has %s features, expected %s" %
                (other.m, self.m))
        logger.info("merging projections: %s + %s" %
                    (str(self.u.shape), str(other.u.shape)))
        #        diff = numpy.dot(self.u.T, self.u) - numpy.eye(self.u.shape[1])
        #        logger.info('orth error after=%f' % numpy.sum(diff * diff))
        m, n1, n2 = self.u.shape[0], self.u.shape[1], other.u.shape[1]
        # TODO Maybe keep the bases as elementary reflectors, without
        # forming explicit matrices with gorgqr.
        # The only operation we ever need is basis^T*basis ond basis*component.
        # But how to do that in numpy? And is it fast(er)?

        # find component of u2 orthogonal to u1
        # IMPORTANT: keep matrices in suitable order for matrix products; failing to do so gives 8x lower performance :(
        self.u = numpy.asfortranarray(
            self.u)  # does nothing if input already fortran-order array
        other.u = numpy.asfortranarray(other.u)
        gemm, = get_blas_funcs(('gemm', ), (self.u, ))
        logger.debug("constructing orthogonal component")
        c = gemm(1.0, self.u, other.u, trans_a=True)
        gemm(-1.0, self.u, c, beta=1.0, c=other.u, overwrite_c=True)

        # perform q, r = QR(component); code hacked out of scipy.linalg.qr
        logger.debug("computing QR of %s dense matrix" % str(other.u.shape))
        geqrf, = get_lapack_funcs(('geqrf', ), (other.u, ))
        qr, tau, work, info = geqrf(
            other.u, lwork=-1,
            overwrite_a=True)  # sometimes segfaults with overwrite_a=True...
        qr, tau, work, info = geqrf(
            other.u, lwork=work[0],
            overwrite_a=True)  # sometimes segfaults with overwrite_a=True...
        del other.u
        assert info >= 0
        r = triu(qr[:n2, :n2])
        if m < n2:  # rare case...
            qr = qr[:, :m]  # retains fortran order
        gorgqr, = get_lapack_funcs(('orgqr', ), (qr, ))
        q, work, info = gorgqr(qr, tau, lwork=-1, overwrite_a=True)
        q, work, info = gorgqr(qr, tau, lwork=work[0], overwrite_a=True)
        assert info >= 0, "qr failed"
        assert q.flags.f_contiguous

        # find rotation that diagonalizes r
        k = numpy.bmat([[
            numpy.diag(decay * self.s), c * other.s
        ], [matutils.pad(numpy.matrix([]).reshape(0, 0), n2, n1),
            r * other.s]])
        logger.debug("computing SVD of %s dense matrix" % str(k.shape))
        u_k, s_k, _ = numpy.linalg.svd(
            k, full_matrices=False
        )  # TODO *ugly overkill*!! only need first self.k SVD factors... but there is no LAPACK wrapper for partial svd/eigendecomp in numpy :(

        k = clipSpectrum(s_k, self.k)
        u_k, s_k = u_k[:, :k], s_k[:k]

        # update & rotate current basis U
        logger.debug("updating orthonormal basis U")
        self.u = gemm(
            1.0, self.u, u_k[:n1]
        )  # TODO temporarily creates an extra (m,k) dense array in memory. find a way to avoid this!
        gemm(1.0, q, u_k[n1:], beta=1.0, c=self.u,
             overwrite_c=True)  # u = [u,u']*u_k
        self.s = s_k
 def merge(self, other, decay = 1.0):
     """
     Merge this Projection with another. 
     
     Content of `other` is destroyed in the process, so pass this function a 
     copy if you need it further.
     
     This is the optimized merge described in algorithm 5.
     """
     if other.u is None:
         # the other projection is empty => do nothing
         return
     if self.u is None:
         # we are empty => result of merge is the other projection, whatever it is
         if other.s is None:
             # other.u contains a direct document chunk, not svd => perform svd
             docs = other.u
             assert scipy.sparse.issparse(docs)
             if self.m * self.k < 10000:
                 # SVDLIBC gives spurious results for small matrices.. run full
                 # LAPACK on them instead
                 logger.info("computing dense SVD of %s matrix" % str(docs.shape))
                 u, s, vt = numpy.linalg.svd(docs.todense(), full_matrices = False)
             else:
                 try:
                     import sparsesvd
                 except ImportError:
                     raise ImportError("for LSA, the `sparsesvd` module is needed but not found; run `easy_install sparsesvd`")
                 logger.info("computing sparse SVD of %s matrix" % str(docs.shape))
                 ut, s, vt = sparsesvd.sparsesvd(docs, self.k + 30) # ask for a few extra factors, because for some reason SVDLIBC sometimes returns fewer factors than requested
                 u = ut.T
                 del ut
             del vt
             k = clipSpectrum(s, self.k)
             self.u = u[:, :k].copy('F')
             self.s = s[:k]
         else:
             self.u = other.u.copy('F')
             self.s = other.s.copy()
         return
     if self.m != other.m:
         raise ValueError("vector space mismatch: update has %s features, expected %s" %
                          (other.m, self.m))
     logger.info("merging projections: %s + %s" % (str(self.u.shape), str(other.u.shape)))
     m, n1, n2 = self.u.shape[0], self.u.shape[1], other.u.shape[1]
     if other.s is None:
         other.u = other.u.todense()
         other.s = 1.0 # broadcasting will promote this to eye(n2) where needed
     # TODO Maybe keep the bases as elementary reflectors, without 
     # forming explicit matrices with gorgqr.
     # The only operation we ever need is basis^T*basis ond basis*component.
     # But how to do that in numpy? And is it fast(er)?
     
     # find component of u2 orthogonal to u1
     # IMPORTANT: keep matrices in suitable order for matrix products; failing to do so gives 8x lower performance :(
     self.u = numpy.asfortranarray(self.u) # does nothing if input already fortran-order array
     other.u = numpy.asfortranarray(other.u)
     gemm, = get_blas_funcs(('gemm',), (self.u,))
     logger.debug("constructing orthogonal component")
     c = gemm(1.0, self.u, other.u, trans_a = True)
     gemm(-1.0, self.u, c, beta = 1.0, c = other.u, overwrite_c = True)
     
     # perform q, r = QR(component); code hacked out of scipy.linalg.qr
     logger.debug("computing QR of %s dense matrix" % str(other.u.shape))
     geqrf, = get_lapack_funcs(('geqrf',), (other.u,))
     qr, tau, work, info = geqrf(other.u, lwork = -1, overwrite_a = True) # sometimes segfaults with overwrite_a=True...?
     qr, tau, work, info = geqrf(other.u, lwork = work[0], overwrite_a = True) # sometimes segfaults with overwrite_a=True...?
     del other.u
     assert info >= 0
     r = triu(qr[:n2, :n2])
     if m < n2: # rare case, #features < #topics
         qr = qr[:, :m] # retains fortran order
     gorgqr, = get_lapack_funcs(('orgqr',), (qr,))
     q, work, info = gorgqr(qr, tau, lwork = -1, overwrite_a = True)
     q, work, info = gorgqr(qr, tau, lwork = work[0], overwrite_a = True)
     assert info >= 0, "qr failed"
     assert q.flags.f_contiguous
     
     # find rotation that diagonalizes r
     k = numpy.bmat([[numpy.diag(decay * self.s), c * other.s], [matutils.pad(numpy.matrix([]).reshape(0, 0), min(m, n2), n1), r * other.s]])
     logger.debug("computing SVD of %s dense matrix" % str(k.shape))
     try:
         # in numpy < 1.1.0, running SVD sometimes results in "LinAlgError: SVD did not converge'.
         # for these early versions of numpy, catch the error and try to compute
         # SVD again, but over k*k^T.
         # see http://www.mail-archive.com/[email protected]/msg07224.html and
         # bug ticket http://projects.scipy.org/numpy/ticket/706
         u_k, s_k, _ = numpy.linalg.svd(k, full_matrices = False) # TODO *ugly overkill*!! only need first self.k SVD factors... but there is no LAPACK wrapper for partial svd/eigendecomp in numpy :(
     except numpy.linalg.LinAlgError:
         logging.error("SVD(A) failed; trying SVD(A * A^T)")
         u_k, s_k, _ = numpy.linalg.svd(numpy.dot(k, k.T), full_matrices = False) # if this fails too, give up
         s_k = numpy.sqrt(s_k)
     
     k = clipSpectrum(s_k, self.k)
     u_k, s_k = u_k[:, :k], s_k[:k]
     
     # update & rotate current basis U
     logger.debug("updating orthonormal basis U")
     self.u = gemm(1.0, self.u, u_k[:n1]) # TODO temporarily creates an extra (m,k) dense array in memory. find a way to avoid this!
     gemm(1.0, q, u_k[n1:], beta = 1.0, c = self.u, overwrite_c = True) # u = [u,u']*u_k
     self.s = s_k