Exemplo n.º 1
0
def __cngrnc(r, x, alpha = 1.0, trans = 'N', offsetx = 0):
    """
    In-place congruence transformation

        x := alpha * r * x * r' if trans = 'N'.

        x := alpha * r' * x * r if trans = 'T'.

    r is a square n x n-matrix. 
    x is a square n x n-matrix or n^2-vector. 
    """

    n = r.size[0]

    # Scale diagonal of x by 1/2.  
    blas.scal(0.5, x, inc = n+1, offset = offsetx)
    
    # a := r*tril(x) if trans is 'N',  a := tril(x)*r if trans is 'T'.
    a = +r
    if trans == 'N':  
        blas.trmm(x, a, side = 'R', m = n, n = n, ldA = n, ldB = n, 
            offsetA = offsetx)
    else:  
        blas.trmm(x, a, side = 'L', m = n, n = n, ldA = n, ldB = n,
            offsetA = offsetx)

    # x := alpha * (a * r' + r * a')  
    #    = alpha * r * (tril(x) + tril(x)') * r' if trans is 'N'.
    # x := alpha * (a' * r  + r' * a)  
    #    = alpha * r' * (tril(x)' + tril(x)) * r if trans = 'T'.
    blas.syr2k(r, a, x, trans = trans, alpha = alpha, n = n, k = n, 
        ldB = n, ldC = n, offsetC = offsetx)
Exemplo n.º 2
0
def llt(L):
    """
    Supernodal multifrontal Cholesky product:

    .. math::
         X = LL^T

    where :math:`L` is lower-triangular. On exit, the argument `L`
    contains the product :math:`X`.

    :param L:    :py:class:`cspmatrix` (factor)
    """

    assert isinstance(L, cspmatrix) and L.is_factor is True, "L must be a cspmatrix factor"

    n = L.symb.n
    snpost = L.symb.snpost
    snptr = L.symb.snptr
    chptr = L.symb.chptr
    chidx = L.symb.chidx

    relptr = L.symb.relptr
    relidx = L.symb.relidx
    blkptr = L.symb.blkptr
    blkval = L.blkval

    stack = []

    for k in snpost:

        nn = snptr[k+1]-snptr[k]       # |Nk|
        na = relptr[k+1]-relptr[k]     # |Ak|
        nj = na + nn

        # compute [I; L_{Ak,Nk}]*D_k*[I;L_{Ak,Nk}]'; store in F
        blas.trmm(blkval, blkval, side = "R", m = na, n = nn, ldA = nj,\
                  ldB = nj, offsetA = blkptr[k], offsetB = blkptr[k]+nn)
        F = matrix(0.0, (nj, nj))
        blas.syrk(blkval, F, n = nj, k = nn, offsetA = blkptr[k], ldA = nj)

        # if supernode k has any children, subtract update matrices 
        for _ in range(chptr[k],chptr[k+1]):
            Ui, i = stack.pop()
            frontal_add_update(F, Ui, relidx, relptr, i)

        # if supernode k is not a root node, push update matrix onto stack
        if na > 0:
            Uk = matrix(0.0,(na,na))
            lapack.lacpy(F, Uk, m = na, n = na, uplo = 'L', offsetA = nn*nj+nn, ldA = nj)
            stack.append((Uk,k))

        # copy leading Nk columns of F to blkval
        lapack.lacpy(F, blkval, m = nj, n = nn, ldA = nj, uplo = 'L',\
                     ldB = nj, offsetB = blkptr[k])

    L._is_factor = False

    return 
Exemplo n.º 3
0
    def cngrnc(r, x, alpha = 1.0):
        """
        Congruence transformation

            x := alpha * r'*x*r.

        r and x are square matrices.  
        """

        # Scale diagonal of x by 1/2.  
        x[::n+1] *= 0.5 
    
        # a := tril(x)*r 
        a = +r
        tx = matrix(x, (n,n))
        blas.trmm(tx, a, side='L')

        # x := alpha*(a*r' + r*a') 
        blas.syr2k(r, a, tx, trans = 'T', alpha = alpha)
        x[:] = tx[:]
Exemplo n.º 4
0
    def cngrnc(r, x, alpha = 1.0):
        """
        Congruence transformation

            x := alpha * r'*x*r.

        r and x are square matrices.
        """

        # Scale diagonal of x by 1/2.
        x[::n+1] *= 0.5

        # a := tril(x)*r
        a = +r
        tx = matrix(x, (n,n))
        blas.trmm(tx, a, side = 'L')

        # x := alpha*(a*r' + r*a')
        blas.syr2k(r, a, tx, trans = 'T', alpha = alpha)
        x[:] = tx[:]
Exemplo n.º 5
0
def __cngrnc(r, x, alpha=1.0, trans='N', offsetx=0):
    """
    In-place congruence transformation

        x := alpha * r * x * r' if trans = 'N'.

        x := alpha * r' * x * r if trans = 'T'.

    r is a square n x n-matrix. 
    x is a square n x n-matrix or n^2-vector. 
    """

    n = r.size[0]

    # Scale diagonal of x by 1/2.
    blas.scal(0.5, x, inc=n + 1, offset=offsetx)

    # a := r*tril(x) if trans is 'N',  a := tril(x)*r if trans is 'T'.
    a = +r
    if trans == 'N':
        blas.trmm(x, a, side='R', m=n, n=n, ldA=n, ldB=n, offsetA=offsetx)
    else:
        blas.trmm(x, a, side='L', m=n, n=n, ldA=n, ldB=n, offsetA=offsetx)

    # x := alpha * (a * r' + r * a')
    #    = alpha * r * (tril(x) + tril(x)') * r' if trans is 'N'.
    # x := alpha * (a' * r  + r' * a)
    #    = alpha * r' * (tril(x)' + tril(x)) * r if trans = 'T'.
    blas.syr2k(r,
               a,
               x,
               trans=trans,
               alpha=alpha,
               n=n,
               k=n,
               ldB=n,
               ldC=n,
               offsetC=offsetx)
Exemplo n.º 6
0
    def fsolve(x, y, z):
        """
        Solves the system of equations

            [ 0  G'*W^{-1} ] [ ux ] = [ bx ]
            [ G  -W'       ] [ uz ]   [ bz ]
        
        """
        #  Compute bx := bx + G'*W^{-1}*W^{-T}*bz
        v = matrix(0., (N, 1))
        for i in range(N):
            blas.symv(z, rr, v, ldA=N, offsetA=1, n=N, offsetx=N * i)
            x[i] += blas.dot(rr, v, n=N, offsetx=N * i)
        blas.symv(z, q, v, ldA=N, offsetA=1, n=N)
        x[N] += blas.dot(q, v) + z[0] * W['di'][0]**2
        #  Solve G'*W^{-1}*W^{-T}*G*ux = bx
        lapack.potrs(H, x)

        # Compute bz := -W^{-T}*(bz-G*ux)
        # z -= G*x
        z[1::N + 1] -= x[:-1]
        z -= x[-1]
        # Apply scaling
        z[0] *= -W['di'][0]
        blas.scal(0.5, z, n=N, offset=1, inc=N + 1)
        tmp = +r
        blas.trmm(z, tmp, ldA=N, offsetA=1, n=N, m=N)
        blas.syr2k(r,
                   tmp,
                   z,
                   trans='T',
                   offsetC=1,
                   ldC=N,
                   n=N,
                   k=N,
                   alpha=-1.0)
Exemplo n.º 7
0
def cngrnc(r, x, n, xstart=0, alpha=1.0):
    """
    Congruence transformation
    
    x := alpha * r'*x*r.
    
    r is a square matrix, and x is a symmetric matrix.
    """

    # return alpha*r.T*matrix(x, (n,n))*r

    # Scale diagonal of x by 1/2.
    nsqr = n * n
    xend = xstart + nsqr
    x[xstart:xend:n + 1] *= 0.5

    # a := tril(x)*r
    a = +r
    tx = matrix(x[xstart:xend], (n, n))
    blas.trmm(tx, a, side='L')

    # x := alpha*(a*r' + r*a')
    blas.syr2k(r, a, tx, trans='T', alpha=alpha)
    x[xstart:xend] = tx[:]
Exemplo n.º 8
0
def __scale(L, Y, U, adj=False, inv=False, factored_updates=True):

    n = L.symb.n
    snpost = L.symb.snpost
    snptr = L.symb.snptr
    chptr = L.symb.chptr
    chidx = L.symb.chidx

    relptr = L.symb.relptr
    relidx = L.symb.relidx
    blkptr = L.symb.blkptr

    stack = []

    for k in reversed(list(snpost)):

        nn = snptr[k + 1] - snptr[k]  # |Nk|
        na = relptr[k + 1] - relptr[k]  # |Ak|
        nj = na + nn

        F = matrix(0.0, (nj, nj))
        lapack.lacpy(Y.blkval,
                     F,
                     m=nj,
                     n=nn,
                     ldA=nj,
                     offsetA=blkptr[k],
                     uplo='L')

        # if supernode k is not a root node:
        if na > 0:
            # copy Vk to 2,2 block of F
            Vk = stack.pop()
            lapack.lacpy(Vk,
                         F,
                         ldB=nj,
                         offsetB=nn * (nj + 1),
                         m=na,
                         n=na,
                         uplo='L')

        # if supernode k has any children:
        for ii in range(chptr[k], chptr[k + 1]):
            i = chidx[ii]
            if factored_updates:
                r = relidx[relptr[i]:relptr[i + 1]]
                stack.append(frontal_get_update_factor(F, r, nn, na))
            else:
                stack.append(frontal_get_update(F, relidx, relptr, i))

        # if supernode k is not a root node:
        if na > 0:
            if factored_updates:
                # In this case we have Vk = Lk'*Lk
                if adj is False: trns = 'N'
                elif adj is True: trns = 'T'
            else:
                # factorize Vk
                lapack.potrf(F, offsetA=nj * nn + nn, n=na, ldA=nj)
                # In this case we have Vk = Lk*Lk'
                if adj is False: trns = 'T'
                elif adj is True: trns = 'N'

        if adj is False: tr = ['T', 'N']
        elif adj is True: tr = ['N', 'T']

        if inv is False:
            for Ut in U:
                # symmetrize (1,1) block of Ut_{k} and scale
                U11 = matrix(0.0, (nn, nn))
                lapack.lacpy(Ut.blkval,
                             U11,
                             offsetA=blkptr[k],
                             m=nn,
                             n=nn,
                             ldA=nj,
                             uplo='L')
                U11 += U11.T
                U11[::nn + 1] *= 0.5
                lapack.lacpy(U11,
                             Ut.blkval,
                             offsetB=blkptr[k],
                             m=nn,
                             n=nn,
                             ldB=nj,
                             uplo='N')

                blas.trsm(L.blkval, Ut.blkval, side = 'R', transA = tr[0],\
                          m = nj, n = nn, offsetA = blkptr[k], ldA = nj,\
                          offsetB = blkptr[k], ldB = nj)
                blas.trsm(L.blkval, Ut.blkval, m = nn, n = nn, transA = tr[1],\
                          offsetA = blkptr[k], offsetB = blkptr[k],\
                          ldA = nj, ldB = nj)

                # zero-out strict upper triangular part of {Nj,Nj} block
                for i in range(1, nn):
                    blas.scal(0.0, Ut.blkval, offset=blkptr[k] + nj * i, n=i)

                if na > 0:                    blas.trmm(F, Ut.blkval, m = na, n = nn, transA = trns,\
                              offsetA = nj*nn+nn, ldA = nj,\
                              offsetB = blkptr[k]+nn, ldB = nj)
        else:  # inv is True
            for Ut in U:
                # symmetrize (1,1) block of Ut_{k} and scale
                U11 = matrix(0.0, (nn, nn))
                lapack.lacpy(Ut.blkval,
                             U11,
                             offsetA=blkptr[k],
                             m=nn,
                             n=nn,
                             ldA=nj,
                             uplo='L')
                U11 += U11.T
                U11[::nn + 1] *= 0.5
                lapack.lacpy(U11,
                             Ut.blkval,
                             offsetB=blkptr[k],
                             m=nn,
                             n=nn,
                             ldB=nj,
                             uplo='N')

                blas.trmm(L.blkval, Ut.blkval, side = 'R', transA = tr[0],\
                          m = nj, n = nn, offsetA = blkptr[k], ldA = nj,\
                          offsetB = blkptr[k], ldB = nj)
                blas.trmm(L.blkval, Ut.blkval, m = nn, n = nn, transA = tr[1],\
                          offsetA = blkptr[k], offsetB = blkptr[k],\
                          ldA = nj, ldB = nj)

                # zero-out strict upper triangular part of {Nj,Nj} block
                for i in range(1, nn):
                    blas.scal(0.0, Ut.blkval, offset=blkptr[k] + nj * i, n=i)

                if na > 0:                    blas.trsm(F, Ut.blkval, m = na, n = nn, transA = trns,\
                              offsetA = nj*nn+nn, ldA = nj,\
                              offsetB = blkptr[k]+nn, ldB = nj)

    return
Exemplo n.º 9
0
    def F(W):
        """
        Generate a solver for

                                             A'(uz0) = bx[0]
                                          -uz0 - uz1 = bx[1] 
            A(ux[0]) - ux[1] - r0*r0' * uz0 * r0*r0' = bz0 
                     - ux[1] - r1*r1' * uz1 * r1*r1' = bz1.

        uz0, uz1, bz0, bz1 are symmetric m x m-matrices.
        ux[0], bx[0] are n-vectors.
        ux[1], bx[1] are symmetric m x m-matrices.

        We first calculate a congruence that diagonalizes r0*r0' and r1*r1':
 
            U' * r0 * r0' * U = I,  U' * r1 * r1' * U = S.

        We then make a change of variables

            usx[0] = ux[0],  
            usx[1] = U' * ux[1] * U  
              usz0 = U^-1 * uz0 * U^-T  
              usz1 = U^-1 * uz1 * U^-T 

        and define 

              As() = U' * A() * U'  
            bsx[1] = U^-1 * bx[1] * U^-T
              bsz0 = U' * bz0 * U  
              bsz1 = U' * bz1 * U.  

        This gives

                             As'(usz0) = bx[0]
                          -usz0 - usz1 = bsx[1] 
            As(usx[0]) - usx[1] - usz0 = bsz0 
                -usx[1] - S * usz1 * S = bsz1.


        1. Eliminate usz0, usz1 using equations 3 and 4,

               usz0 = As(usx[0]) - usx[1] - bsz0
               usz1 = -S^-1 * (usx[1] + bsz1) * S^-1.

           This gives two equations in usx[0] an usx[1].

               As'(As(usx[0]) - usx[1]) = bx[0] + As'(bsz0)

               -As(usx[0]) + usx[1] + S^-1 * usx[1] * S^-1
                   = bsx[1] - bsz0 - S^-1 * bsz1 * S^-1.


        2. Eliminate usx[1] using equation 2:

               usx[1] + S * usx[1] * S 
                   = S * ( As(usx[0]) + bsx[1] - bsz0 ) * S - bsz1

           i.e., with Gamma[i,j] = 1.0 + S[i,i] * S[j,j],
 
               usx[1] = ( S * As(usx[0]) * S ) ./ Gamma 
                        + ( S * ( bsx[1] - bsz0 ) * S - bsz1 ) ./ Gamma.

           This gives an equation in usx[0].

               As'( As(usx[0]) ./ Gamma ) 
                   = bx0 + As'(bsz0) + 
                     As'( (S * ( bsx[1] - bsz0 ) * S - bsz1) ./ Gamma )
                   = bx0 + As'( ( bsz0 - bsz1 + S * bsx[1] * S ) ./ Gamma ).

        """

        # Calculate U s.t.
        #
        #     U' * r0*r0' * U = I,   U' * r1*r1' * U = diag(s).

        # Cholesky factorization r0 * r0' = L * L'
        blas.syrk(W['r'][0], L)
        lapack.potrf(L)

        # SVD L^-1 * r1 = U * diag(s) * V'
        blas.copy(W['r'][1], U)
        blas.trsm(L, U)
        lapack.gesvd(U, s, jobu='O')

        # s := s**2
        s[:] = s**2

        # Uti := U
        blas.copy(U, Uti)

        # U := L^-T * U
        blas.trsm(L, U, transA='T')

        # Uti := L * Uti = U^-T
        blas.trmm(L, Uti)

        # Us := U * diag(s)^-1
        blas.copy(U, Us)
        for i in range(m):
            blas.tbsv(s, Us, n=m, k=0, ldA=1, incx=m, offsetx=i)

        # S is m x m with lower triangular entries s[i] * s[j]
        # sqrtG is m x m with lower triangular entries sqrt(1.0 + s[i]*s[j])
        # Upper triangular entries are undefined but nonzero.

        blas.scal(0.0, S)
        blas.syrk(s, S)
        Gamma = 1.0 + S
        sqrtG = sqrt(Gamma)

        # Asc[i] = (U' * Ai * * U ) ./  sqrtG,  for i = 1, ..., n
        #        = Asi ./ sqrt(Gamma)
        blas.copy(A, Asc)
        misc.scale(
            Asc,  # only 'r' part of the dictionary is used   
            {
                'dnl': matrix(0.0, (0, 1)),
                'dnli': matrix(0.0, (0, 1)),
                'd': matrix(0.0, (0, 1)),
                'di': matrix(0.0, (0, 1)),
                'v': [],
                'beta': [],
                'r': [U],
                'rti': [U]
            })
        for i in range(n):
            blas.tbsv(sqrtG, Asc, n=msq, k=0, ldA=1, offsetx=i * msq)

        # Convert columns of Asc to packed storage
        misc.pack2(Asc, {'l': 0, 'q': [], 's': [m]})

        # Cholesky factorization of Asc' * Asc.
        H = matrix(0.0, (n, n))
        blas.syrk(Asc, H, trans='T', k=mpckd)
        lapack.potrf(H)

        def solve(x, y, z):
            """

            1. Solve for usx[0]:

               Asc'(Asc(usx[0]))
                   = bx0 + Asc'( ( bsz0 - bsz1 + S * bsx[1] * S ) ./ sqrtG)
                   = bx0 + Asc'( ( bsz0 + S * ( bsx[1] - bssz1) S ) 
                     ./ sqrtG)

               where bsx[1] = U^-1 * bx[1] * U^-T, bsz0 = U' * bz0 * U, 
               bsz1 = U' * bz1 * U, bssz1 = S^-1 * bsz1 * S^-1 

            2. Solve for usx[1]:

               usx[1] + S * usx[1] * S 
                   = S * ( As(usx[0]) + bsx[1] - bsz0 ) * S - bsz1 

               usx[1] 
                   = ( S * (As(usx[0]) + bsx[1] - bsz0) * S - bsz1) ./ Gamma
                   = -bsz0 + (S * As(usx[0]) * S) ./ Gamma
                     + (bsz0 - bsz1 + S * bsx[1] * S ) . / Gamma
                   = -bsz0 + (S * As(usx[0]) * S) ./ Gamma
                     + (bsz0 + S * ( bsx[1] - bssz1 ) * S ) . / Gamma

               Unscale ux[1] = Uti * usx[1] * Uti'

            3. Compute usz0, usz1

               r0' * uz0 * r0 = r0^-1 * ( A(ux[0]) - ux[1] - bz0 ) * r0^-T
               r1' * uz1 * r1 = r1^-1 * ( -ux[1] - bz1 ) * r1^-T

            """

            # z0 := U' * z0 * U
            #     = bsz0
            __cngrnc(U, z, trans='T')

            # z1 := Us' * bz1 * Us
            #     = S^-1 * U' * bz1 * U * S^-1
            #     = S^-1 * bsz1 * S^-1
            __cngrnc(Us, z, trans='T', offsetx=msq)

            # x[1] := Uti' * x[1] * Uti
            #       = bsx[1]
            __cngrnc(Uti, x[1], trans='T')

            # x[1] := x[1] - z[msq:]
            #       = bsx[1] - S^-1 * bsz1 * S^-1
            blas.axpy(z, x[1], alpha=-1.0, offsetx=msq)

            # x1 = (S * x[1] * S + z[:msq] ) ./ sqrtG
            #    = (S * ( bsx[1] - S^-1 * bsz1 * S^-1) * S + bsz0 ) ./ sqrtG
            #    = (S * bsx[1] * S - bsz1 + bsz0 ) ./ sqrtG
            # in packed storage
            blas.copy(x[1], x1)
            blas.tbmv(S, x1, n=msq, k=0, ldA=1)
            blas.axpy(z, x1, n=msq)
            blas.tbsv(sqrtG, x1, n=msq, k=0, ldA=1)
            misc.pack2(x1, {'l': 0, 'q': [], 's': [m]})

            # x[0] := x[0] + Asc'*x1
            #       = bx0 + Asc'( ( bsz0 - bsz1 + S * bsx[1] * S ) ./ sqrtG)
            #       = bx0 + As'( ( bz0 - bz1 + S * bx[1] * S ) ./ Gamma )
            blas.gemv(Asc, x1, x[0], m=mpckd, trans='T', beta=1.0)

            # x[0] := H^-1 * x[0]
            #       = ux[0]
            lapack.potrs(H, x[0])

            # x1 = Asc(x[0]) .* sqrtG  (unpacked)
            #    = As(x[0])
            blas.gemv(Asc, x[0], tmp, m=mpckd)
            misc.unpack(tmp, x1, {'l': 0, 'q': [], 's': [m]})
            blas.tbmv(sqrtG, x1, n=msq, k=0, ldA=1)

            # usx[1] = (x1 + (x[1] - z[:msq])) ./ sqrtG**2
            #        = (As(ux[0]) + bsx[1] - bsz0 - S^-1 * bsz1 * S^-1)
            #           ./ Gamma

            # x[1] := x[1] - z[:msq]
            #       = bsx[1] - bsz0 - S^-1 * bsz1 * S^-1
            blas.axpy(z, x[1], -1.0, n=msq)

            # x[1] := x[1] + x1
            #       = As(ux) + bsx[1] - bsz0 - S^-1 * bsz1 * S^-1
            blas.axpy(x1, x[1])

            # x[1] := x[1] / Gammma
            #       = (As(ux) + bsx[1] - bsz0 + S^-1 * bsz1 * S^-1 ) / Gamma
            #       = S^-1 * usx[1] * S^-1
            blas.tbsv(Gamma, x[1], n=msq, k=0, ldA=1)

            # z[msq:] := r1' * U * (-z[msq:] - x[1]) * U * r1
            #         := -r1' * U * S^-1 * (bsz1 + ux[1]) * S^-1 *  U * r1
            #         := -r1' * uz1 * r1
            blas.axpy(x[1], z, n=msq, offsety=msq)
            blas.scal(-1.0, z, offset=msq)
            __cngrnc(U, z, offsetx=msq)
            __cngrnc(W['r'][1], z, trans='T', offsetx=msq)

            # x[1] :=  S * x[1] * S
            #       =  usx1
            blas.tbmv(S, x[1], n=msq, k=0, ldA=1)

            # z[:msq] = r0' * U' * ( x1 - x[1] - z[:msq] ) * U * r0
            #         = r0' * U' * ( As(ux) - usx1 - bsz0 ) * U * r0
            #         = r0' * U' *  usz0 * U * r0
            #         = r0' * uz0 * r0
            blas.axpy(x1, z, -1.0, n=msq)
            blas.scal(-1.0, z, n=msq)
            blas.axpy(x[1], z, -1.0, n=msq)
            __cngrnc(U, z)
            __cngrnc(W['r'][0], z, trans='T')

            # x[1] := Uti * x[1] * Uti'
            #       = ux[1]
            __cngrnc(Uti, x[1])

        return solve
Exemplo n.º 10
0
        def f(x, y, z):
            """

            Solve 

                              C * ux + G' * uzl - 2*A'(uzs21) = bx
                                                       -uzs11 = bX1
                                                       -uzs22 = bX2
                                           G * ux - D^2 * uzl = bzl
                [ -uX1   -A(ux)' ]       [ uzs11 uzs21' ]     
                [                ] - T * [              ] * T = bzs.
                [ -A(ux) -uX2    ]       [ uzs21 uzs22  ]

            On entry, x = (bx, bX1, bX2) and z = [ bzl; bzs[:] ].
            On exit, x = (ux, uX1, uX2) and z = [ D*uzl; (r'*uzs*r)[:] ].

            Define X = uzs21, Z = T * uzs * T:   
 
                      C * ux + G' * uzl - 2*A'(X) = bx
                                [ 0  X' ]               [ bX1 0   ]
                            T * [       ] * T - Z = T * [         ] * T
                                [ X  0  ]               [ 0   bX2 ]
                               G * ux - D^2 * uzl = bzl
                [ -uX1   -A(ux)' ]   [ Z11 Z21' ]     
                [                ] - [          ] = bzs
                [ -A(ux) -uX2    ]   [ Z21 Z22  ]

            Return x = (ux, uX1, uX2), z = [ D*uzl; (rti'*Z*rti)[:] ].

            We use the congruence transformation 

                [ V1   0   ] [ T11  T21' ] [ V1'  0  ]   [ I  S' ]
                [          ] [           ] [         ] = [       ]
                [ 0    V2' ] [ T21  T22  ] [ 0    V2 ]   [ S  I  ]

            and the factorization 

                X + S * X' * S = L( L'(X) ) 

            to write this as

                                  C * ux + G' * uzl - 2*A'(X) = bx
                L'(V2^-1 * X * V1^-1) - L^-1(V2' * Z21 * V1') = bX
                                           G * ux - D^2 * uzl = bzl
                            [ -uX1   -A(ux)' ]   [ Z11 Z21' ]     
                            [                ] - [          ] = bzs,
                            [ -A(ux) -uX2    ]   [ Z21 Z22  ]

            or

                C * ux + Gs' * uuzl - 2*As'(XX) = bx
                                      XX - ZZ21 = bX
                                 Gs * ux - uuzl = D^-1 * bzl
                                 -As(ux) - ZZ21 = bbzs_21
                                     -uX1 - Z11 = bzs_11
                                     -uX2 - Z22 = bzs_22

            if we introduce scaled variables

                uuzl = D * uzl
                  XX = L'(V2^-1 * X * V1^-1) 
                     = L'(V2^-1 * uzs21 * V1^-1)
                ZZ21 = L^-1(V2' * Z21 * V1') 

            and define

                bbzs_21 = L^-1(V2' * bzs_21 * V1')
                                           [ bX1  0   ]
                     bX = L^-1( V2' * (T * [          ] * T)_21 * V1').
                                           [ 0    bX2 ]           
 
            Eliminating Z21 gives 

                C * ux + Gs' * uuzl - 2*As'(XX) = bx
                                 Gs * ux - uuzl = D^-1 * bzl
                                   -As(ux) - XX = bbzs_21 - bX
                                     -uX1 - Z11 = bzs_11
                                     -uX2 - Z22 = bzs_22 

            and eliminating uuzl and XX gives

                        H * ux = bx + Gs' * D^-1 * bzl + 2*As'(bX - bbzs_21)
                Gs * ux - uuzl = D^-1 * bzl
                  -As(ux) - XX = bbzs_21 - bX
                    -uX1 - Z11 = bzs_11
                    -uX2 - Z22 = bzs_22.


            In summary, we can use the following algorithm: 

            1. bXX := bX - bbzs21
                                        [ bX1 0   ]
                    = L^-1( V2' * ((T * [         ] * T)_21 - bzs_21) * V1')
                                        [ 0   bX2 ]

            2. Solve H * ux = bx + Gs' * D^-1 * bzl + 2*As'(bXX).

            3. From ux, compute 

                   uuzl = Gs*ux - D^-1 * bzl and 
                      X = V2 * L^-T(-As(ux) + bXX) * V1.

            4. Return ux, uuzl, 

                   rti' * Z * rti = r' * [ -bX1, X'; X, -bX2 ] * r
 
               and uX1 = -Z11 - bzs_11,  uX2 = -Z22 - bzs_22.

            """

            # Save bzs_11, bzs_22, bzs_21.
            lapack.lacpy(z, bz11, uplo = 'L', m = q, n = q, ldA = p+q,
                offsetA = m)
            lapack.lacpy(z, bz21, m = p, n = q, ldA = p+q, offsetA = m+q)
            lapack.lacpy(z, bz22, uplo = 'L', m = p, n = p, ldA = p+q,
                offsetA = m + (p+q+1)*q)


            # zl := D^-1 * zl
            #     = D^-1 * bzl
            blas.tbmv(W['di'], z, n = m, k = 0, ldA = 1)


            # zs := r' * [ bX1, 0; 0, bX2 ] * r.

            # zs := [ bX1, 0; 0, bX2 ]
            blas.scal(0.0, z, offset = m)
            lapack.lacpy(x[1], z, uplo = 'L', m = q, n = q, ldB = p+q,
                offsetB = m)
            lapack.lacpy(x[2], z, uplo = 'L', m = p, n = p, ldB = p+q,
                offsetB = m + (p+q+1)*q)

            # scale diagonal of zs by 1/2
            blas.scal(0.5, z, inc = p+q+1, offset = m)

            # a := tril(zs)*r  
            blas.copy(r, a)
            blas.trmm(z, a, side = 'L', m = p+q, n = p+q, ldA = p+q, ldB = 
                p+q, offsetA = m)

            # zs := a'*r + r'*a 
            blas.syr2k(r, a, z, trans = 'T', n = p+q, k = p+q, ldB = p+q,
                ldC = p+q, offsetC = m)



            # bz21 := L^-1( V2' * ((r * zs * r')_21 - bz21) * V1')
            #
            #                           [ bX1 0   ]
            #       = L^-1( V2' * ((T * [         ] * T)_21 - bz21) * V1').
            #                           [ 0   bX2 ]

            # a = [ r21 r22 ] * z
            #   = [ r21 r22 ] * r' * [ bX1, 0; 0, bX2 ] * r
            #   = [ T21  T22 ] * [ bX1, 0; 0, bX2 ] * r
            blas.symm(z, r, a, side = 'R', m = p, n = p+q, ldA = p+q, 
                ldC = p+q, offsetB = q)
    
            # bz21 := -bz21 + a * [ r11, r12 ]'
            #       = -bz21 + (T * [ bX1, 0; 0, bX2 ] * T)_21
            blas.gemm(a, r, bz21, transB = 'T', m = p, n = q, k = p+q, 
                beta = -1.0, ldA = p+q, ldC = p)

            # bz21 := V2' * bz21 * V1'
            #       = V2' * (-bz21 + (T*[bX1, 0; 0, bX2]*T)_21) * V1'
            blas.gemm(V2, bz21, tmp, transA = 'T', m = p, n = q, k = p, 
                ldB = p)
            blas.gemm(tmp, V1, bz21, transB = 'T', m = p, n = q, k = q, 
                ldC = p)

            # bz21[:] := D * (I-P) * bz21[:] 
            #       = L^-1 * bz21[:]
            #       = bXX[:]
            blas.copy(bz21, tmp)
            base.gemv(P, bz21, tmp, alpha = -1.0, beta = 1.0)
            base.gemv(D, tmp, bz21)


            # Solve H * ux = bx + Gs' * D^-1 * bzl + 2*As'(bXX).

            # x[0] := x[0] + Gs'*zl + 2*As'(bz21) 
            #       = bx + G' * D^-1 * bzl + 2 * As'(bXX)
            blas.gemv(Gs, z, x[0], trans = 'T', alpha = 1.0, beta = 1.0)
            blas.gemv(As, bz21, x[0], trans = 'T', alpha = 2.0, beta = 1.0) 

            # x[0] := H \ x[0] 
            #      = ux
            lapack.potrs(H, x[0])


            # uuzl = Gs*ux - D^-1 * bzl
            blas.gemv(Gs, x[0], z, alpha = 1.0, beta = -1.0)

            
            # bz21 := V2 * L^-T(-As(ux) + bz21) * V1
            #       = X
            blas.gemv(As, x[0], bz21, alpha = -1.0, beta = 1.0)
            blas.tbsv(DV, bz21, n = p*q, k = 0, ldA = 1)
            blas.copy(bz21, tmp)
            base.gemv(P, tmp, bz21, alpha = -1.0, beta = 1.0, trans = 'T')
            blas.gemm(V2, bz21, tmp)
            blas.gemm(tmp, V1, bz21)


            # zs := -zs + r' * [ 0, X'; X, 0 ] * r
            #     = r' * [ -bX1, X'; X, -bX2 ] * r.

            # a := bz21 * [ r11, r12 ]
            #   =  X * [ r11, r12 ]
            blas.gemm(bz21, r, a, m = p, n = p+q, k = q, ldA = p, ldC = p+q)
            
            # z := -z + [ r21, r22 ]' * a + a' * [ r21, r22 ]
            #    = rti' * uzs * rti
            blas.syr2k(r, a, z, trans = 'T', beta = -1.0, n = p+q, k = p,
                offsetA = q, offsetC = m, ldB = p+q, ldC = p+q)  



            # uX1 = -Z11 - bzs_11 
            #     = -(r*zs*r')_11 - bzs_11
            # uX2 = -Z22 - bzs_22 
            #     = -(r*zs*r')_22 - bzs_22


            blas.copy(bz11, x[1])
            blas.copy(bz22, x[2])

            # scale diagonal of zs by 1/2
            blas.scal(0.5, z, inc = p+q+1, offset = m)

            # a := r*tril(zs)  
            blas.copy(r, a)
            blas.trmm(z, a, side = 'R', m = p+q, n = p+q, ldA = p+q, ldB = 
                p+q, offsetA = m)

            # x[1] := -x[1] - a[:q,:] * r[:q, :]' - r[:q,:] * a[:q,:]'
            #       = -bzs_11 - (r*zs*r')_11
            blas.syr2k(a, r, x[1], n = q, alpha = -1.0, beta = -1.0) 

            # x[2] := -x[2] - a[q:,:] * r[q:, :]' - r[q:,:] * a[q:,:]'
            #       = -bzs_22 - (r*zs*r')_22
            blas.syr2k(a, r, x[2], n = p, alpha = -1.0, beta = -1.0, 
                offsetA = q, offsetB = q)

            # scale diagonal of zs by 1/2
            blas.scal(2.0, z, inc = p+q+1, offset = m)
Exemplo n.º 11
0
def trmm(L, B, alpha = 1.0, trans = 'N', nrhs = None, offsetB = 0, ldB = None):
    r"""
    Multiplication with sparse triangular matrix. Computes

    .. math::

       B &:= \alpha L B    \text{ if trans is 'N'} \\
       B &:= \alpha L^T B  \text{ if trans is 'T'}

    where :math:`L` is a :py:class:`cspmatrix` factor.

    :param L:  :py:class:`cspmatrix` factor
    :param B:  matrix
    :param alpha:  float (default: 1.0)
    :param trans:  'N' or 'T' (default: 'N')   
    :param nrhs:   number of right-hand sides (default: number of columns in :math:`B`)
    :param offsetB: integer (default: 0)
    :param ldB:   leading dimension of :math:`B` (default: number of rows in :math:`B`)
    """
    
    assert isinstance(L, cspmatrix) and L.is_factor is True, "L must be a cspmatrix factor"
    assert isinstance(B, matrix), "B must be a matrix"

    if ldB is None: ldB = B.size[0]
    if nrhs is None: nrhs = B.size[1]
    assert trans in ['N', 'T']

    n = L.symb.n
    snpost = L.symb.snpost
    snptr = L.symb.snptr
    snode = L.symb.snode
    chptr = L.symb.chptr
    chidx = L.symb.chidx

    relptr = L.symb.relptr
    relidx = L.symb.relidx
    blkptr = L.symb.blkptr
    blkval = L.blkval

    p = L.symb.p
    if p is None: p = range(n)
     
    stack = []

    if trans is 'N':

        for k in snpost:

            nn = snptr[k+1]-snptr[k]       # |Nk|
            na = relptr[k+1]-relptr[k]     # |Ak|
            nj = na + nn

            # extract and scale block from rhs
            Uk = matrix(0.0,(nj,nrhs))
            for j in range(nrhs):
                for i,ir in enumerate(snode[snptr[k]:snptr[k+1]]):
                    Uk[i,j] = alpha*B[offsetB + j*ldB + p[ir]]
            blas.trmm(blkval, Uk, m = nn, n = nrhs, offsetA = blkptr[k], ldA = nj)

            if na > 0:
                # compute new contribution (to be stacked)
                blas.gemm(blkval, Uk, Uk, m = na, n = nrhs, k = nn, alpha = 1.0,\
                         offsetA = blkptr[k]+nn, ldA = nj, offsetC = nn)

            # add contributions from children
            for _ in range(chptr[k],chptr[k+1]):
                Ui, i = stack.pop()
                r = relidx[relptr[i]:relptr[i+1]]
                Uk[r,:] += Ui

            # if k is not a root node
            if na > 0: stack.append((Uk[nn:,:],k))
            
            # copy block to rhs
            for j in range(nrhs):
                for i,ir in enumerate(snode[snptr[k]:snptr[k+1]]):
                    B[offsetB + j*ldB + p[ir]] = Uk[i,j]
                
    else: # trans is 'T'

        for k in reversed(list(snpost)):
            
            nn = snptr[k+1]-snptr[k]       # |Nk|
            na = relptr[k+1]-relptr[k]     # |Ak|
            nj = na + nn

            # extract and scale block from rhs
            Uk = matrix(0.0,(nj,nrhs))
            for j in range(nrhs):
                for i,ir in enumerate(snode[snptr[k]:snptr[k+1]]):
                    Uk[i,j] = alpha*B[offsetB + j*ldB + p[ir]]
            
            # if k is not a root node
            if na > 0:
                Uk[nn:,:] = stack.pop()

            # stack contributions for children
            for ii in range(chptr[k],chptr[k+1]):
                i = chidx[ii]
                stack.append(Uk[relidx[relptr[i]:relptr[i+1]],:])

            if na > 0:
                blas.gemm(blkval, Uk, Uk, alpha = 1.0, beta = 1.0, m = nn, n = nrhs, k = na,\
                          transA = 'T', offsetA = blkptr[k]+nn, ldA = nj, offsetB = nn)

            # scale and copy block to rhs
            blas.trmm(blkval, Uk, transA = 'T', m = nn, n = nrhs, offsetA = blkptr[k], ldA = nj)
            for j in range(nrhs):
                for i,ir in enumerate(snode[snptr[k]:snptr[k+1]]):
                    B[offsetB + j*ldB + p[ir]] = Uk[i,j]

    return
Exemplo n.º 12
0
        def f(x, y, z):
            """

            Solve 

                              C * ux + G' * uzl - 2*A'(uzs21) = bx
                                                       -uzs11 = bX1
                                                       -uzs22 = bX2
                                           G * ux - D^2 * uzl = bzl
                [ -uX1   -A(ux)' ]       [ uzs11 uzs21' ]     
                [                ] - T * [              ] * T = bzs.
                [ -A(ux) -uX2    ]       [ uzs21 uzs22  ]

            On entry, x = (bx, bX1, bX2) and z = [ bzl; bzs[:] ].
            On exit, x = (ux, uX1, uX2) and z = [ D*uzl; (r'*uzs*r)[:] ].

            Define X = uzs21, Z = T * uzs * T:   
 
                      C * ux + G' * uzl - 2*A'(X) = bx
                                [ 0  X' ]               [ bX1 0   ]
                            T * [       ] * T - Z = T * [         ] * T
                                [ X  0  ]               [ 0   bX2 ]
                               G * ux - D^2 * uzl = bzl
                [ -uX1   -A(ux)' ]   [ Z11 Z21' ]     
                [                ] - [          ] = bzs
                [ -A(ux) -uX2    ]   [ Z21 Z22  ]

            Return x = (ux, uX1, uX2), z = [ D*uzl; (rti'*Z*rti)[:] ].

            We use the congruence transformation 

                [ V1   0   ] [ T11  T21' ] [ V1'  0  ]   [ I  S' ]
                [          ] [           ] [         ] = [       ]
                [ 0    V2' ] [ T21  T22  ] [ 0    V2 ]   [ S  I  ]

            and the factorization 

                X + S * X' * S = L( L'(X) ) 

            to write this as

                                  C * ux + G' * uzl - 2*A'(X) = bx
                L'(V2^-1 * X * V1^-1) - L^-1(V2' * Z21 * V1') = bX
                                           G * ux - D^2 * uzl = bzl
                            [ -uX1   -A(ux)' ]   [ Z11 Z21' ]     
                            [                ] - [          ] = bzs,
                            [ -A(ux) -uX2    ]   [ Z21 Z22  ]

            or

                C * ux + Gs' * uuzl - 2*As'(XX) = bx
                                      XX - ZZ21 = bX
                                 Gs * ux - uuzl = D^-1 * bzl
                                 -As(ux) - ZZ21 = bbzs_21
                                     -uX1 - Z11 = bzs_11
                                     -uX2 - Z22 = bzs_22

            if we introduce scaled variables

                uuzl = D * uzl
                  XX = L'(V2^-1 * X * V1^-1) 
                     = L'(V2^-1 * uzs21 * V1^-1)
                ZZ21 = L^-1(V2' * Z21 * V1') 

            and define

                bbzs_21 = L^-1(V2' * bzs_21 * V1')
                                           [ bX1  0   ]
                     bX = L^-1( V2' * (T * [          ] * T)_21 * V1').
                                           [ 0    bX2 ]           
 
            Eliminating Z21 gives 

                C * ux + Gs' * uuzl - 2*As'(XX) = bx
                                 Gs * ux - uuzl = D^-1 * bzl
                                   -As(ux) - XX = bbzs_21 - bX
                                     -uX1 - Z11 = bzs_11
                                     -uX2 - Z22 = bzs_22 

            and eliminating uuzl and XX gives

                        H * ux = bx + Gs' * D^-1 * bzl + 2*As'(bX - bbzs_21)
                Gs * ux - uuzl = D^-1 * bzl
                  -As(ux) - XX = bbzs_21 - bX
                    -uX1 - Z11 = bzs_11
                    -uX2 - Z22 = bzs_22.


            In summary, we can use the following algorithm: 

            1. bXX := bX - bbzs21
                                        [ bX1 0   ]
                    = L^-1( V2' * ((T * [         ] * T)_21 - bzs_21) * V1')
                                        [ 0   bX2 ]

            2. Solve H * ux = bx + Gs' * D^-1 * bzl + 2*As'(bXX).

            3. From ux, compute 

                   uuzl = Gs*ux - D^-1 * bzl and 
                      X = V2 * L^-T(-As(ux) + bXX) * V1.

            4. Return ux, uuzl, 

                   rti' * Z * rti = r' * [ -bX1, X'; X, -bX2 ] * r
 
               and uX1 = -Z11 - bzs_11,  uX2 = -Z22 - bzs_22.

            """

            # Save bzs_11, bzs_22, bzs_21.
            lapack.lacpy(z, bz11, uplo='L', m=q, n=q, ldA=p + q, offsetA=m)
            lapack.lacpy(z, bz21, m=p, n=q, ldA=p + q, offsetA=m + q)
            lapack.lacpy(z,
                         bz22,
                         uplo='L',
                         m=p,
                         n=p,
                         ldA=p + q,
                         offsetA=m + (p + q + 1) * q)

            # zl := D^-1 * zl
            #     = D^-1 * bzl
            blas.tbmv(W['di'], z, n=m, k=0, ldA=1)

            # zs := r' * [ bX1, 0; 0, bX2 ] * r.

            # zs := [ bX1, 0; 0, bX2 ]
            blas.scal(0.0, z, offset=m)
            lapack.lacpy(x[1], z, uplo='L', m=q, n=q, ldB=p + q, offsetB=m)
            lapack.lacpy(x[2],
                         z,
                         uplo='L',
                         m=p,
                         n=p,
                         ldB=p + q,
                         offsetB=m + (p + q + 1) * q)

            # scale diagonal of zs by 1/2
            blas.scal(0.5, z, inc=p + q + 1, offset=m)

            # a := tril(zs)*r
            blas.copy(r, a)
            blas.trmm(z,
                      a,
                      side='L',
                      m=p + q,
                      n=p + q,
                      ldA=p + q,
                      ldB=p + q,
                      offsetA=m)

            # zs := a'*r + r'*a
            blas.syr2k(r,
                       a,
                       z,
                       trans='T',
                       n=p + q,
                       k=p + q,
                       ldB=p + q,
                       ldC=p + q,
                       offsetC=m)

            # bz21 := L^-1( V2' * ((r * zs * r')_21 - bz21) * V1')
            #
            #                           [ bX1 0   ]
            #       = L^-1( V2' * ((T * [         ] * T)_21 - bz21) * V1').
            #                           [ 0   bX2 ]

            # a = [ r21 r22 ] * z
            #   = [ r21 r22 ] * r' * [ bX1, 0; 0, bX2 ] * r
            #   = [ T21  T22 ] * [ bX1, 0; 0, bX2 ] * r
            blas.symm(z,
                      r,
                      a,
                      side='R',
                      m=p,
                      n=p + q,
                      ldA=p + q,
                      ldC=p + q,
                      offsetB=q)

            # bz21 := -bz21 + a * [ r11, r12 ]'
            #       = -bz21 + (T * [ bX1, 0; 0, bX2 ] * T)_21
            blas.gemm(a,
                      r,
                      bz21,
                      transB='T',
                      m=p,
                      n=q,
                      k=p + q,
                      beta=-1.0,
                      ldA=p + q,
                      ldC=p)

            # bz21 := V2' * bz21 * V1'
            #       = V2' * (-bz21 + (T*[bX1, 0; 0, bX2]*T)_21) * V1'
            blas.gemm(V2, bz21, tmp, transA='T', m=p, n=q, k=p, ldB=p)
            blas.gemm(tmp, V1, bz21, transB='T', m=p, n=q, k=q, ldC=p)

            # bz21[:] := D * (I-P) * bz21[:]
            #       = L^-1 * bz21[:]
            #       = bXX[:]
            blas.copy(bz21, tmp)
            base.gemv(P, bz21, tmp, alpha=-1.0, beta=1.0)
            base.gemv(D, tmp, bz21)

            # Solve H * ux = bx + Gs' * D^-1 * bzl + 2*As'(bXX).

            # x[0] := x[0] + Gs'*zl + 2*As'(bz21)
            #       = bx + G' * D^-1 * bzl + 2 * As'(bXX)
            blas.gemv(Gs, z, x[0], trans='T', alpha=1.0, beta=1.0)
            blas.gemv(As, bz21, x[0], trans='T', alpha=2.0, beta=1.0)

            # x[0] := H \ x[0]
            #      = ux
            lapack.potrs(H, x[0])

            # uuzl = Gs*ux - D^-1 * bzl
            blas.gemv(Gs, x[0], z, alpha=1.0, beta=-1.0)

            # bz21 := V2 * L^-T(-As(ux) + bz21) * V1
            #       = X
            blas.gemv(As, x[0], bz21, alpha=-1.0, beta=1.0)
            blas.tbsv(DV, bz21, n=p * q, k=0, ldA=1)
            blas.copy(bz21, tmp)
            base.gemv(P, tmp, bz21, alpha=-1.0, beta=1.0, trans='T')
            blas.gemm(V2, bz21, tmp)
            blas.gemm(tmp, V1, bz21)

            # zs := -zs + r' * [ 0, X'; X, 0 ] * r
            #     = r' * [ -bX1, X'; X, -bX2 ] * r.

            # a := bz21 * [ r11, r12 ]
            #   =  X * [ r11, r12 ]
            blas.gemm(bz21, r, a, m=p, n=p + q, k=q, ldA=p, ldC=p + q)

            # z := -z + [ r21, r22 ]' * a + a' * [ r21, r22 ]
            #    = rti' * uzs * rti
            blas.syr2k(r,
                       a,
                       z,
                       trans='T',
                       beta=-1.0,
                       n=p + q,
                       k=p,
                       offsetA=q,
                       offsetC=m,
                       ldB=p + q,
                       ldC=p + q)

            # uX1 = -Z11 - bzs_11
            #     = -(r*zs*r')_11 - bzs_11
            # uX2 = -Z22 - bzs_22
            #     = -(r*zs*r')_22 - bzs_22

            blas.copy(bz11, x[1])
            blas.copy(bz22, x[2])

            # scale diagonal of zs by 1/2
            blas.scal(0.5, z, inc=p + q + 1, offset=m)

            # a := r*tril(zs)
            blas.copy(r, a)
            blas.trmm(z,
                      a,
                      side='R',
                      m=p + q,
                      n=p + q,
                      ldA=p + q,
                      ldB=p + q,
                      offsetA=m)

            # x[1] := -x[1] - a[:q,:] * r[:q, :]' - r[:q,:] * a[:q,:]'
            #       = -bzs_11 - (r*zs*r')_11
            blas.syr2k(a, r, x[1], n=q, alpha=-1.0, beta=-1.0)

            # x[2] := -x[2] - a[q:,:] * r[q:, :]' - r[q:,:] * a[q:,:]'
            #       = -bzs_22 - (r*zs*r')_22
            blas.syr2k(a,
                       r,
                       x[2],
                       n=p,
                       alpha=-1.0,
                       beta=-1.0,
                       offsetA=q,
                       offsetB=q)

            # scale diagonal of zs by 1/2
            blas.scal(2.0, z, inc=p + q + 1, offset=m)
Exemplo n.º 13
0
    def spmatrix(self, reordered = True, symmetric = False):
        """
        Converts the :py:class:`cspmatrix` :math:`A` to a sparse matrix. A reordered
        matrix is returned if the optional argument `reordered` is
        `True` (default), and otherwise the inverse permutation is applied. Only the
        default options are allowed if the :py:class:`cspmatrix` :math:`A` represents
        a Cholesky factor. 

        :param reordered:  boolean (default: True)
        :param symmetric:  boolean (default: False)			   
        """
        n = self.symb.n
        snptr = self.symb.snptr
        snode = self.symb.snode
        relptr = self.symb.relptr
        snrowidx = self.symb.snrowidx
        sncolptr = self.symb.sncolptr
        blkptr = self.symb.blkptr
        blkval = self.blkval
        
        if self.is_factor:
            if symmetric: raise ValueError("'symmetric = True' not implemented for Cholesky factors")
            if not reordered: raise ValueError("'reordered = False' not implemented for Cholesky factors")
            snpost = self.symb.snpost
            blkval = +blkval
            for k in snpost:
                j = snode[snptr[k]]            # representative vertex
                nn = snptr[k+1]-snptr[k]       # |Nk|
                na = relptr[k+1]-relptr[k]     # |Ak|
                if na == 0: continue
                nj = na + nn
                if nn == 1:
                    blas.scal(blkval[blkptr[k]],blkval,offset = blkptr[k]+1,n=na)
                else:
                    blas.trmm(blkval,blkval, transA = "N", diag = "N", side = "R",uplo = "L", \
                              m = na, n = nn, ldA = nj, ldB = nj, \
                              offsetA = blkptr[k],offsetB = blkptr[k] + nn)

        cc = matrix(0,(n,1))  # count number of nonzeros in each col
        for k in range(self.symb.Nsn):
            nn = snptr[k+1]-snptr[k]
            na = relptr[k+1]-relptr[k]
            nj = nn + na
            for i in range(nn):
                j = snode[snptr[k]+i]
                cc[j] = nj - i

        # build col. ptr
        cp = [0]
        for i in range(n): cp.append(cp[-1] + cc[i])
        cp = matrix(cp)

        # copy data and row indices
        val = matrix(0.0, (cp[-1],1))
        ri = matrix(0, (cp[-1],1))
        for k in range(self.symb.Nsn):
            nn = snptr[k+1]-snptr[k]
            na = relptr[k+1]-relptr[k]
            nj = nn + na
            for i in range(nn):
                j = snode[snptr[k]+i]
                blas.copy(blkval, val, offsetx = blkptr[k]+nj*i+i, offsety = cp[j], n = nj-i)
                ri[cp[j]:cp[j+1]] = snrowidx[sncolptr[k]+i:sncolptr[k+1]]

        I = []; J = []
        for i in range(n):
            I += list(ri[cp[i]:cp[i+1]])
            J += (cp[i+1]-cp[i])*[i]

        tmp = spmatrix(val, I, J, (n,n))  # tmp is reordered and lower tril.
        
        if reordered or self.symb.p is None:
            # reordered matrix (do not apply inverse permutation)
            if not symmetric: return tmp
            else: return symmetrize(tmp)
        else:
            # apply inverse permutation            
            tmp = perm(symmetrize(tmp), self.symb.ip)
            if symmetric: return tmp
            else: return tril(tmp) 
Exemplo n.º 14
0
def trmm(L, B, alpha=1.0, trans='N', nrhs=None, offsetB=0, ldB=None):
    r"""
    Multiplication with sparse triangular matrix. Computes

    .. math::

       B &:= \alpha L B    \text{ if trans is 'N'} \\
       B &:= \alpha L^T B  \text{ if trans is 'T'}

    where :math:`L` is a :py:class:`cspmatrix` factor.

    :param L:  :py:class:`cspmatrix` factor
    :param B:  matrix
    :param alpha:  float (default: 1.0)
    :param trans:  'N' or 'T' (default: 'N')   
    :param nrhs:   number of right-hand sides (default: number of columns in :math:`B`)
    :param offsetB: integer (default: 0)
    :param ldB:   leading dimension of :math:`B` (default: number of rows in :math:`B`)
    """

    assert isinstance(
        L, cspmatrix) and L.is_factor is True, "L must be a cspmatrix factor"
    assert isinstance(B, matrix), "B must be a matrix"

    if ldB is None: ldB = B.size[0]
    if nrhs is None: nrhs = B.size[1]
    assert trans in ['N', 'T']

    n = L.symb.n
    snpost = L.symb.snpost
    snptr = L.symb.snptr
    snode = L.symb.snode
    chptr = L.symb.chptr
    chidx = L.symb.chidx

    relptr = L.symb.relptr
    relidx = L.symb.relidx
    blkptr = L.symb.blkptr
    blkval = L.blkval

    p = L.symb.p
    if p is None: p = range(n)

    stack = []

    if trans is 'N':

        for k in snpost:

            nn = snptr[k + 1] - snptr[k]  # |Nk|
            na = relptr[k + 1] - relptr[k]  # |Ak|
            nj = na + nn

            # extract and scale block from rhs
            Uk = matrix(0.0, (nj, nrhs))
            for j in range(nrhs):
                for i, ir in enumerate(snode[snptr[k]:snptr[k + 1]]):
                    Uk[i, j] = alpha * B[offsetB + j * ldB + p[ir]]
            blas.trmm(blkval, Uk, m=nn, n=nrhs, offsetA=blkptr[k], ldA=nj)

            if na > 0:
                # compute new contribution (to be stacked)
                blas.gemm(blkval, Uk, Uk, m = na, n = nrhs, k = nn, alpha = 1.0,\
                         offsetA = blkptr[k]+nn, ldA = nj, offsetC = nn)

            # add contributions from children
            for _ in range(chptr[k], chptr[k + 1]):
                Ui, i = stack.pop()
                r = relidx[relptr[i]:relptr[i + 1]]
                Uk[r, :] += Ui

            # if k is not a root node
            if na > 0: stack.append((Uk[nn:, :], k))

            # copy block to rhs
            for j in range(nrhs):
                for i, ir in enumerate(snode[snptr[k]:snptr[k + 1]]):
                    B[offsetB + j * ldB + p[ir]] = Uk[i, j]

    else:  # trans is 'T'

        for k in reversed(list(snpost)):

            nn = snptr[k + 1] - snptr[k]  # |Nk|
            na = relptr[k + 1] - relptr[k]  # |Ak|
            nj = na + nn

            # extract and scale block from rhs
            Uk = matrix(0.0, (nj, nrhs))
            for j in range(nrhs):
                for i, ir in enumerate(snode[snptr[k]:snptr[k + 1]]):
                    Uk[i, j] = alpha * B[offsetB + j * ldB + p[ir]]

            # if k is not a root node
            if na > 0:
                Uk[nn:, :] = stack.pop()

            # stack contributions for children
            for ii in range(chptr[k], chptr[k + 1]):
                i = chidx[ii]
                stack.append(Uk[relidx[relptr[i]:relptr[i + 1]], :])

            if na > 0:
                blas.gemm(blkval, Uk, Uk, alpha = 1.0, beta = 1.0, m = nn, n = nrhs, k = na,\
                          transA = 'T', offsetA = blkptr[k]+nn, ldA = nj, offsetB = nn)

            # scale and copy block to rhs
            blas.trmm(blkval,
                      Uk,
                      transA='T',
                      m=nn,
                      n=nrhs,
                      offsetA=blkptr[k],
                      ldA=nj)
            for j in range(nrhs):
                for i, ir in enumerate(snode[snptr[k]:snptr[k + 1]]):
                    B[offsetB + j * ldB + p[ir]] = Uk[i, j]

    return
Exemplo n.º 15
0
    def F(W):
        """
        Generate a solver for

                                             A'(uz0) = bx[0]
                                          -uz0 - uz1 = bx[1] 
            A(ux[0]) - ux[1] - r0*r0' * uz0 * r0*r0' = bz0 
                     - ux[1] - r1*r1' * uz1 * r1*r1' = bz1.

        uz0, uz1, bz0, bz1 are symmetric m x m-matrices.
        ux[0], bx[0] are n-vectors.
        ux[1], bx[1] are symmetric m x m-matrices.

        We first calculate a congruence that diagonalizes r0*r0' and r1*r1':
 
            U' * r0 * r0' * U = I,  U' * r1 * r1' * U = S.

        We then make a change of variables

            usx[0] = ux[0],  
            usx[1] = U' * ux[1] * U  
              usz0 = U^-1 * uz0 * U^-T  
              usz1 = U^-1 * uz1 * U^-T 

        and define 

              As() = U' * A() * U'  
            bsx[1] = U^-1 * bx[1] * U^-T
              bsz0 = U' * bz0 * U  
              bsz1 = U' * bz1 * U.  

        This gives

                             As'(usz0) = bx[0]
                          -usz0 - usz1 = bsx[1] 
            As(usx[0]) - usx[1] - usz0 = bsz0 
                -usx[1] - S * usz1 * S = bsz1.


        1. Eliminate usz0, usz1 using equations 3 and 4,

               usz0 = As(usx[0]) - usx[1] - bsz0
               usz1 = -S^-1 * (usx[1] + bsz1) * S^-1.

           This gives two equations in usx[0] an usx[1].

               As'(As(usx[0]) - usx[1]) = bx[0] + As'(bsz0)

               -As(usx[0]) + usx[1] + S^-1 * usx[1] * S^-1
                   = bsx[1] - bsz0 - S^-1 * bsz1 * S^-1.


        2. Eliminate usx[1] using equation 2:

               usx[1] + S * usx[1] * S 
                   = S * ( As(usx[0]) + bsx[1] - bsz0 ) * S - bsz1

           i.e., with Gamma[i,j] = 1.0 + S[i,i] * S[j,j],
 
               usx[1] = ( S * As(usx[0]) * S ) ./ Gamma 
                        + ( S * ( bsx[1] - bsz0 ) * S - bsz1 ) ./ Gamma.

           This gives an equation in usx[0].

               As'( As(usx[0]) ./ Gamma ) 
                   = bx0 + As'(bsz0) + 
                     As'( (S * ( bsx[1] - bsz0 ) * S - bsz1) ./ Gamma )
                   = bx0 + As'( ( bsz0 - bsz1 + S * bsx[1] * S ) ./ Gamma ).

        """

        # Calculate U s.t. 
        # 
        #     U' * r0*r0' * U = I,   U' * r1*r1' * U = diag(s).
 
        # Cholesky factorization r0 * r0' = L * L'
        blas.syrk(W['r'][0], L)
        lapack.potrf(L)

        # SVD L^-1 * r1 = U * diag(s) * V'  
        blas.copy(W['r'][1], U)
        blas.trsm(L, U) 
        lapack.gesvd(U, s, jobu = 'O')

        # s := s**2
        s[:] = s**2

        # Uti := U
        blas.copy(U, Uti)

        # U := L^-T * U
        blas.trsm(L, U, transA = 'T')

        # Uti := L * Uti = U^-T 
        blas.trmm(L, Uti)

        # Us := U * diag(s)^-1
        blas.copy(U, Us)
        for i in range(m):
            blas.tbsv(s, Us, n = m, k = 0, ldA = 1, incx = m, offsetx = i)

        # S is m x m with lower triangular entries s[i] * s[j] 
        # sqrtG is m x m with lower triangular entries sqrt(1.0 + s[i]*s[j])
        # Upper triangular entries are undefined but nonzero.

        blas.scal(0.0, S)
        blas.syrk(s, S)
        Gamma = 1.0 + S
        sqrtG = sqrt(Gamma)


        # Asc[i] = (U' * Ai * * U ) ./  sqrtG,  for i = 1, ..., n
        #        = Asi ./ sqrt(Gamma)
        blas.copy(A, Asc)
        misc.scale(Asc,   # only 'r' part of the dictionary is used   
            {'dnl': matrix(0.0, (0, 1)), 'dnli': matrix(0.0, (0, 1)),
             'd': matrix(0.0, (0, 1)), 'di': matrix(0.0, (0, 1)),
             'v': [], 'beta': [], 'r': [ U ], 'rti': [ U ]}) 
        for i in range(n):
            blas.tbsv(sqrtG, Asc, n = msq, k = 0, ldA = 1, offsetx = i*msq)

        # Convert columns of Asc to packed storage
        misc.pack2(Asc, {'l': 0, 'q': [], 's': [ m ]})

        # Cholesky factorization of Asc' * Asc.
        H = matrix(0.0, (n, n))
        blas.syrk(Asc, H, trans = 'T', k = mpckd)
        lapack.potrf(H)


        def solve(x, y, z):
            """

            1. Solve for usx[0]:

               Asc'(Asc(usx[0]))
                   = bx0 + Asc'( ( bsz0 - bsz1 + S * bsx[1] * S ) ./ sqrtG)
                   = bx0 + Asc'( ( bsz0 + S * ( bsx[1] - bssz1) S ) 
                     ./ sqrtG)

               where bsx[1] = U^-1 * bx[1] * U^-T, bsz0 = U' * bz0 * U, 
               bsz1 = U' * bz1 * U, bssz1 = S^-1 * bsz1 * S^-1 

            2. Solve for usx[1]:

               usx[1] + S * usx[1] * S 
                   = S * ( As(usx[0]) + bsx[1] - bsz0 ) * S - bsz1 

               usx[1] 
                   = ( S * (As(usx[0]) + bsx[1] - bsz0) * S - bsz1) ./ Gamma
                   = -bsz0 + (S * As(usx[0]) * S) ./ Gamma
                     + (bsz0 - bsz1 + S * bsx[1] * S ) . / Gamma
                   = -bsz0 + (S * As(usx[0]) * S) ./ Gamma
                     + (bsz0 + S * ( bsx[1] - bssz1 ) * S ) . / Gamma

               Unscale ux[1] = Uti * usx[1] * Uti'

            3. Compute usz0, usz1

               r0' * uz0 * r0 = r0^-1 * ( A(ux[0]) - ux[1] - bz0 ) * r0^-T
               r1' * uz1 * r1 = r1^-1 * ( -ux[1] - bz1 ) * r1^-T

            """

            # z0 := U' * z0 * U 
            #     = bsz0
            __cngrnc(U, z, trans = 'T')

            # z1 := Us' * bz1 * Us 
            #     = S^-1 * U' * bz1 * U * S^-1
            #     = S^-1 * bsz1 * S^-1
            __cngrnc(Us, z, trans = 'T', offsetx = msq)

            # x[1] := Uti' * x[1] * Uti 
            #       = bsx[1]
            __cngrnc(Uti, x[1], trans = 'T')
        
            # x[1] := x[1] - z[msq:] 
            #       = bsx[1] - S^-1 * bsz1 * S^-1
            blas.axpy(z, x[1], alpha = -1.0, offsetx = msq)


            # x1 = (S * x[1] * S + z[:msq] ) ./ sqrtG
            #    = (S * ( bsx[1] - S^-1 * bsz1 * S^-1) * S + bsz0 ) ./ sqrtG
            #    = (S * bsx[1] * S - bsz1 + bsz0 ) ./ sqrtG
            # in packed storage
            blas.copy(x[1], x1)
            blas.tbmv(S, x1, n = msq, k = 0, ldA = 1)
            blas.axpy(z, x1, n = msq)
            blas.tbsv(sqrtG, x1, n = msq, k = 0, ldA = 1)
            misc.pack2(x1, {'l': 0, 'q': [], 's': [m]})

            # x[0] := x[0] + Asc'*x1 
            #       = bx0 + Asc'( ( bsz0 - bsz1 + S * bsx[1] * S ) ./ sqrtG)
            #       = bx0 + As'( ( bz0 - bz1 + S * bx[1] * S ) ./ Gamma )
            blas.gemv(Asc, x1, x[0], m = mpckd, trans = 'T', beta = 1.0)

            # x[0] := H^-1 * x[0]
            #       = ux[0]
            lapack.potrs(H, x[0])


            # x1 = Asc(x[0]) .* sqrtG  (unpacked)
            #    = As(x[0])  
            blas.gemv(Asc, x[0], tmp, m = mpckd)
            misc.unpack(tmp, x1, {'l': 0, 'q': [], 's': [m]})
            blas.tbmv(sqrtG, x1, n = msq, k = 0, ldA = 1)


            # usx[1] = (x1 + (x[1] - z[:msq])) ./ sqrtG**2 
            #        = (As(ux[0]) + bsx[1] - bsz0 - S^-1 * bsz1 * S^-1) 
            #           ./ Gamma

            # x[1] := x[1] - z[:msq] 
            #       = bsx[1] - bsz0 - S^-1 * bsz1 * S^-1
            blas.axpy(z, x[1], -1.0, n = msq)

            # x[1] := x[1] + x1
            #       = As(ux) + bsx[1] - bsz0 - S^-1 * bsz1 * S^-1 
            blas.axpy(x1, x[1])

            # x[1] := x[1] / Gammma
            #       = (As(ux) + bsx[1] - bsz0 + S^-1 * bsz1 * S^-1 ) / Gamma
            #       = S^-1 * usx[1] * S^-1
            blas.tbsv(Gamma, x[1], n = msq, k = 0, ldA = 1)
            

            # z[msq:] := r1' * U * (-z[msq:] - x[1]) * U * r1
            #         := -r1' * U * S^-1 * (bsz1 + ux[1]) * S^-1 *  U * r1
            #         := -r1' * uz1 * r1
            blas.axpy(x[1], z, n = msq, offsety = msq)
            blas.scal(-1.0, z, offset = msq)
            __cngrnc(U, z, offsetx = msq)
            __cngrnc(W['r'][1], z, trans = 'T', offsetx = msq)

            # x[1] :=  S * x[1] * S
            #       =  usx1 
            blas.tbmv(S, x[1], n = msq, k = 0, ldA = 1)

            # z[:msq] = r0' * U' * ( x1 - x[1] - z[:msq] ) * U * r0
            #         = r0' * U' * ( As(ux) - usx1 - bsz0 ) * U * r0
            #         = r0' * U' *  usz0 * U * r0
            #         = r0' * uz0 * r0
            blas.axpy(x1, z, -1.0, n = msq)
            blas.scal(-1.0, z, n = msq)
            blas.axpy(x[1], z, -1.0, n = msq)
            __cngrnc(U, z)
            __cngrnc(W['r'][0], z, trans = 'T')

            # x[1] := Uti * x[1] * Uti'
            #       = ux[1]
            __cngrnc(Uti, x[1])


        return solve
Exemplo n.º 16
0
    def spmatrix(self, reordered=True, symmetric=False):
        """
        Converts the :py:class:`cspmatrix` :math:`A` to a sparse matrix. A reordered
        matrix is returned if the optional argument `reordered` is
        `True` (default), and otherwise the inverse permutation is applied. Only the
        default options are allowed if the :py:class:`cspmatrix` :math:`A` represents
        a Cholesky factor. 

        :param reordered:  boolean (default: True)
        :param symmetric:  boolean (default: False)			   
        """
        n = self.symb.n
        snptr = self.symb.snptr
        snode = self.symb.snode
        relptr = self.symb.relptr
        snrowidx = self.symb.snrowidx
        sncolptr = self.symb.sncolptr
        blkptr = self.symb.blkptr
        blkval = self.blkval

        if self.is_factor:
            if symmetric:
                raise ValueError(
                    "'symmetric = True' not implemented for Cholesky factors")
            if not reordered:
                raise ValueError(
                    "'reordered = False' not implemented for Cholesky factors")
            snpost = self.symb.snpost
            blkval = +blkval
            for k in snpost:
                j = snode[snptr[k]]  # representative vertex
                nn = snptr[k + 1] - snptr[k]  # |Nk|
                na = relptr[k + 1] - relptr[k]  # |Ak|
                if na == 0: continue
                nj = na + nn
                if nn == 1:
                    blas.scal(blkval[blkptr[k]],
                              blkval,
                              offset=blkptr[k] + 1,
                              n=na)
                else:
                    blas.trmm(blkval,blkval, transA = "N", diag = "N", side = "R",uplo = "L", \
                              m = na, n = nn, ldA = nj, ldB = nj, \
                              offsetA = blkptr[k],offsetB = blkptr[k] + nn)

        cc = matrix(0, (n, 1))  # count number of nonzeros in each col
        for k in range(self.symb.Nsn):
            nn = snptr[k + 1] - snptr[k]
            na = relptr[k + 1] - relptr[k]
            nj = nn + na
            for i in range(nn):
                j = snode[snptr[k] + i]
                cc[j] = nj - i

        # build col. ptr
        cp = [0]
        for i in range(n):
            cp.append(cp[-1] + cc[i])
        cp = matrix(cp)

        # copy data and row indices
        val = matrix(0.0, (cp[-1], 1))
        ri = matrix(0, (cp[-1], 1))
        for k in range(self.symb.Nsn):
            nn = snptr[k + 1] - snptr[k]
            na = relptr[k + 1] - relptr[k]
            nj = nn + na
            for i in range(nn):
                j = snode[snptr[k] + i]
                blas.copy(blkval,
                          val,
                          offsetx=blkptr[k] + nj * i + i,
                          offsety=cp[j],
                          n=nj - i)
                ri[cp[j]:cp[j + 1]] = snrowidx[sncolptr[k] + i:sncolptr[k + 1]]

        I = []
        J = []
        for i in range(n):
            I += list(ri[cp[i]:cp[i + 1]])
            J += (cp[i + 1] - cp[i]) * [i]

        tmp = spmatrix(val, I, J, (n, n))  # tmp is reordered and lower tril.

        if reordered or self.symb.p is None:
            # reordered matrix (do not apply inverse permutation)
            if not symmetric: return tmp
            else: return symmetrize(tmp)
        else:
            # apply inverse permutation
            tmp = perm(symmetrize(tmp), self.symb.ip)
            if symmetric: return tmp
            else: return tril(tmp)
Exemplo n.º 17
0
def scale(x, W, trans='N', inverse='N'):
    """
    Applies Nesterov-Todd scaling or its inverse.

    Computes

         x := W*x        (trans is 'N', inverse = 'N')
         x := W^T*x      (trans is 'T', inverse = 'N')
         x := W^{-1}*x   (trans is 'N', inverse = 'I')
         x := W^{-T}*x   (trans is 'T', inverse = 'I').

    x is a dense 'd' matrix.

    W is a dictionary with entries:

    - W['dnl']: positive vector
    - W['dnli']: componentwise inverse of W['dnl']
    - W['d']: positive vector
    - W['di']: componentwise inverse of W['d']
    - W['v']: lists of 2nd order cone vectors with unit hyperbolic norms
    - W['beta']: list of positive numbers
    - W['r']: list of square matrices
    - W['rti']: list of square matrices.  rti[k] is the inverse transpose
      of r[k].

    The 'dnl' and 'dnli' entries are optional, and only present when the
    function is called from the nonlinear solver.
    """
    from cvxopt import blas

    ind = 0

    # Scaling for nonlinear component xk is xk := dnl .* xk; inverse
    # scaling is xk ./ dnl = dnli .* xk, where dnl = W['dnl'],
    # dnli = W['dnli'].

    if 'dnl' in W:
        if inverse == 'N':
            w = W['dnl']
        else:
            w = W['dnli']
        for k in range(x.size[1]):
            blas.tbmv(w, x, n=w.size[0], k=0, ldA=1, offsetx=k * x.size[0])
        ind += w.size[0]

    # Scaling for linear 'l' component xk is xk := d .* xk; inverse
    # scaling is xk ./ d = di .* xk, where d = W['d'], di = W['di'].

    if inverse == 'N':
        w = W['d']
    else:
        w = W['di']
    for k in range(x.size[1]):
        blas.tbmv(w, x, n=w.size[0], k=0, ldA=1, offsetx=k * x.size[0] + ind)
    ind += w.size[0]

    # Scaling for 'q' component is
    #
    #     xk := beta * (2*v*v' - J) * xk
    #         = beta * (2*v*(xk'*v)' - J*xk)
    #
    # where beta = W['beta'][k], v = W['v'][k], J = [1, 0; 0, -I].
    #
    # Inverse scaling is
    #
    #     xk := 1/beta * (2*J*v*v'*J - J) * xk
    #         = 1/beta * (-J) * (2*v*((-J*xk)'*v)' + xk).

    w = matrix(0.0, (x.size[1], 1))
    for k in range(len(W['v'])):
        v = W['v'][k]
        m = v.size[0]
        if inverse == 'I':
            blas.scal(-1.0, x, offset=ind, inc=x.size[0])
        blas.gemv(x, v, w, trans='T', m=m, n=x.size[1], offsetA=ind, ldA=x.size[0])
        blas.scal(-1.0, x, offset=ind, inc=x.size[0])
        blas.ger(v, w, x, alpha=2.0, m=m, n=x.size[1], ldA=x.size[0], offsetA=ind)
        if inverse == 'I':
            blas.scal(-1.0, x, offset=ind, inc=x.size[0])
            a = 1.0 / W['beta'][k]
        else:
            a = W['beta'][k]
        for i in range(x.size[1]):
            blas.scal(a, x, n=m, offset=ind + i * x.size[0])
        ind += m

    # Scaling for 's' component xk is
    #
    #     xk := vec( r' * mat(xk) * r )  if trans = 'N'
    #     xk := vec( r * mat(xk) * r' )  if trans = 'T'.
    #
    # r is kth element of W['r'].
    #
    # Inverse scaling is
    #
    #     xk := vec( rti * mat(xk) * rti' )  if trans = 'N'
    #     xk := vec( rti' * mat(xk) * rti )  if trans = 'T'.
    #
    # rti is kth element of W['rti'].

    maxn = max([0] + [r.size[0] for r in W['r']])
    a = matrix(0.0, (maxn, maxn))
    for k in range(len(W['r'])):

        if inverse == 'N':
            r = W['r'][k]
            if trans == 'N':
                t = 'T'
            else:
                t = 'N'
        else:
            r = W['rti'][k]
            t = trans

        n = r.size[0]
        for i in range(x.size[1]):

            # scale diagonal of xk by 0.5
            blas.scal(0.5, x, offset=ind + i * x.size[0], inc=n + 1, n=n)

            # a = r*tril(x) (t is 'N') or a = tril(x)*r  (t is 'T')
            blas.copy(r, a)
            if t == 'N':
                blas.trmm(x, a, side='R', m=n, n=n, ldA=n, ldB=n,
                          offsetA=ind + i * x.size[0])
            else:
                blas.trmm(x, a, side='L', m=n, n=n, ldA=n, ldB=n,
                          offsetA=ind + i * x.size[0])

            # x := (r*a' + a*r')  if t is 'N'
            # x := (r'*a + a'*r)  if t is 'T'
            blas.syr2k(r, a, x, trans=t, n=n, k=n, ldB=n, ldC=n,
                       offsetC=ind + i * x.size[0])

        ind += n ** 2
    pylab.figure(1, facecolor='w', figsize=(6,6)) 
    pylab.plot(V[0,:], V[1,:],'ow', mec='k')
    pylab.plot([0], [0], 'k+')
    I = [ k for k in range(n) if xd[k] > 1e-5 ]
    pylab.plot(V[0,I], V[1,I],'or')

# Enclosing ellipse is  {x | x' * (V*diag(xe)*V')^-1 * x = sqrt(2)}
nopts = 1000
angles = matrix( [ a*2.0*pi/nopts for a in range(nopts) ], (1,nopts) )
circle = matrix(0.0, (2,nopts))
circle[0,:], circle[1,:] = cos(angles), sin(angles)

W = V * spdiag(xd) * V.T
lapack.potrf(W)
ellipse = sqrt(2.0) * circle
blas.trmm(W, ellipse)
if pylab_installed:
    pylab.plot(ellipse[0,:].T, ellipse[1,:].T, 'k--')
    pylab.axis([-5, 5, -5, 5])
    pylab.title('D-optimal design (fig. 7.9)')
    pylab.axis('off')


# E-design.
#
# maximize    w
# subject to  w*I <= V*diag(x)*V' 
#             x >= 0
#             sum(x) = 1

novars = n+1