def llt(L): """ Supernodal multifrontal Cholesky product: .. math:: X = LL^T where :math:`L` is lower-triangular. On exit, the argument `L` contains the product :math:`X`. :param L: :py:class:`cspmatrix` (factor) """ assert isinstance(L, cspmatrix) and L.is_factor is True, "L must be a cspmatrix factor" n = L.symb.n snpost = L.symb.snpost snptr = L.symb.snptr chptr = L.symb.chptr chidx = L.symb.chidx relptr = L.symb.relptr relidx = L.symb.relidx blkptr = L.symb.blkptr blkval = L.blkval stack = [] for k in snpost: nn = snptr[k+1]-snptr[k] # |Nk| na = relptr[k+1]-relptr[k] # |Ak| nj = na + nn # compute [I; L_{Ak,Nk}]*D_k*[I;L_{Ak,Nk}]'; store in F blas.trmm(blkval, blkval, side = "R", m = na, n = nn, ldA = nj,\ ldB = nj, offsetA = blkptr[k], offsetB = blkptr[k]+nn) F = matrix(0.0, (nj, nj)) blas.syrk(blkval, F, n = nj, k = nn, offsetA = blkptr[k], ldA = nj) # if supernode k has any children, subtract update matrices for _ in range(chptr[k],chptr[k+1]): Ui, i = stack.pop() frontal_add_update(F, Ui, relidx, relptr, i) # if supernode k is not a root node, push update matrix onto stack if na > 0: Uk = matrix(0.0,(na,na)) lapack.lacpy(F, Uk, m = na, n = na, uplo = 'L', offsetA = nn*nj+nn, ldA = nj) stack.append((Uk,k)) # copy leading Nk columns of F to blkval lapack.lacpy(F, blkval, m = nj, n = nn, ldA = nj, uplo = 'L',\ ldB = nj, offsetB = blkptr[k]) L._is_factor = False return
def cholesky(X): """ Supernodal multifrontal Cholesky factorization: .. math:: X = LL^T where :math:`L` is lower-triangular. On exit, the argument :math:`X` contains the Cholesky factor :math:`L`. :param X: :py:class:`cspmatrix` """ assert isinstance(X, cspmatrix) and X.is_factor is False, "X must be a cspmatrix" n = X.symb.n snpost = X.symb.snpost snptr = X.symb.snptr chptr = X.symb.chptr chidx = X.symb.chidx relptr = X.symb.relptr relidx = X.symb.relidx blkptr = X.symb.blkptr blkval = X.blkval stack = [] for k in snpost: nn = snptr[k+1]-snptr[k] # |Nk| na = relptr[k+1]-relptr[k] # |Ak| nj = na + nn # build frontal matrix F = matrix(0.0, (nj, nj)) lapack.lacpy(blkval, F, offsetA = blkptr[k], m = nj, n = nn, ldA = nj, uplo = 'L') # add update matrices from children to frontal matrix for i in range(chptr[k+1]-1,chptr[k]-1,-1): Ui = stack.pop() frontal_add_update(F, Ui, relidx, relptr, chidx[i]) # factor L_{Nk,Nk} lapack.potrf(F, n = nn, ldA = nj) # if supernode k is not a root node, compute and push update matrix onto stack if na > 0: # compute L_{Ak,Nk} := A_{Ak,Nk}*inv(L_{Nk,Nk}') blas.trsm(F, F, m = na, n = nn, ldA = nj, ldB = nj, offsetB = nn, transA = 'T', side = 'R') # compute Uk = Uk - L_{Ak,Nk}*inv(D_{Nk,Nk})*L_{Ak,Nk}' if nn == 1: blas.syr(F, F, n = na, offsetx = nn, \ offsetA = nn*nj+nn, ldA = nj, alpha = -1.0) else: blas.syrk(F, F, k = nn, n = na, offsetA = nn, ldA = nj, offsetC = nn*nj+nn, ldC = nj, alpha = -1.0, beta = 1.0) # compute L_{Ak,Nk} := L_{Ak,Nk}*inv(L_{Nk,Nk}) blas.trsm(F, F, m = na, n = nn,\ ldA = nj, ldB = nj, offsetB = nn, side = 'R') # add Uk to stack Uk = matrix(0.0,(na,na)) lapack.lacpy(F, Uk, m = na, n = na, uplo = 'L', offsetA = nn*nj+nn, ldA = nj) stack.append(Uk) # copy the leading Nk columns of frontal matrix to blkval lapack.lacpy(F, blkval, uplo = "L", offsetB = blkptr[k], m = nj, n = nn, ldB = nj) X.is_factor = True return
def f(x, y, z): """ Solve C * ux + G' * uzl - 2*A'(uzs21) = bx -uzs11 = bX1 -uzs22 = bX2 G * ux - D^2 * uzl = bzl [ -uX1 -A(ux)' ] [ uzs11 uzs21' ] [ ] - T * [ ] * T = bzs. [ -A(ux) -uX2 ] [ uzs21 uzs22 ] On entry, x = (bx, bX1, bX2) and z = [ bzl; bzs[:] ]. On exit, x = (ux, uX1, uX2) and z = [ D*uzl; (r'*uzs*r)[:] ]. Define X = uzs21, Z = T * uzs * T: C * ux + G' * uzl - 2*A'(X) = bx [ 0 X' ] [ bX1 0 ] T * [ ] * T - Z = T * [ ] * T [ X 0 ] [ 0 bX2 ] G * ux - D^2 * uzl = bzl [ -uX1 -A(ux)' ] [ Z11 Z21' ] [ ] - [ ] = bzs [ -A(ux) -uX2 ] [ Z21 Z22 ] Return x = (ux, uX1, uX2), z = [ D*uzl; (rti'*Z*rti)[:] ]. We use the congruence transformation [ V1 0 ] [ T11 T21' ] [ V1' 0 ] [ I S' ] [ ] [ ] [ ] = [ ] [ 0 V2' ] [ T21 T22 ] [ 0 V2 ] [ S I ] and the factorization X + S * X' * S = L( L'(X) ) to write this as C * ux + G' * uzl - 2*A'(X) = bx L'(V2^-1 * X * V1^-1) - L^-1(V2' * Z21 * V1') = bX G * ux - D^2 * uzl = bzl [ -uX1 -A(ux)' ] [ Z11 Z21' ] [ ] - [ ] = bzs, [ -A(ux) -uX2 ] [ Z21 Z22 ] or C * ux + Gs' * uuzl - 2*As'(XX) = bx XX - ZZ21 = bX Gs * ux - uuzl = D^-1 * bzl -As(ux) - ZZ21 = bbzs_21 -uX1 - Z11 = bzs_11 -uX2 - Z22 = bzs_22 if we introduce scaled variables uuzl = D * uzl XX = L'(V2^-1 * X * V1^-1) = L'(V2^-1 * uzs21 * V1^-1) ZZ21 = L^-1(V2' * Z21 * V1') and define bbzs_21 = L^-1(V2' * bzs_21 * V1') [ bX1 0 ] bX = L^-1( V2' * (T * [ ] * T)_21 * V1'). [ 0 bX2 ] Eliminating Z21 gives C * ux + Gs' * uuzl - 2*As'(XX) = bx Gs * ux - uuzl = D^-1 * bzl -As(ux) - XX = bbzs_21 - bX -uX1 - Z11 = bzs_11 -uX2 - Z22 = bzs_22 and eliminating uuzl and XX gives H * ux = bx + Gs' * D^-1 * bzl + 2*As'(bX - bbzs_21) Gs * ux - uuzl = D^-1 * bzl -As(ux) - XX = bbzs_21 - bX -uX1 - Z11 = bzs_11 -uX2 - Z22 = bzs_22. In summary, we can use the following algorithm: 1. bXX := bX - bbzs21 [ bX1 0 ] = L^-1( V2' * ((T * [ ] * T)_21 - bzs_21) * V1') [ 0 bX2 ] 2. Solve H * ux = bx + Gs' * D^-1 * bzl + 2*As'(bXX). 3. From ux, compute uuzl = Gs*ux - D^-1 * bzl and X = V2 * L^-T(-As(ux) + bXX) * V1. 4. Return ux, uuzl, rti' * Z * rti = r' * [ -bX1, X'; X, -bX2 ] * r and uX1 = -Z11 - bzs_11, uX2 = -Z22 - bzs_22. """ # Save bzs_11, bzs_22, bzs_21. lapack.lacpy(z, bz11, uplo = 'L', m = q, n = q, ldA = p+q, offsetA = m) lapack.lacpy(z, bz21, m = p, n = q, ldA = p+q, offsetA = m+q) lapack.lacpy(z, bz22, uplo = 'L', m = p, n = p, ldA = p+q, offsetA = m + (p+q+1)*q) # zl := D^-1 * zl # = D^-1 * bzl blas.tbmv(W['di'], z, n = m, k = 0, ldA = 1) # zs := r' * [ bX1, 0; 0, bX2 ] * r. # zs := [ bX1, 0; 0, bX2 ] blas.scal(0.0, z, offset = m) lapack.lacpy(x[1], z, uplo = 'L', m = q, n = q, ldB = p+q, offsetB = m) lapack.lacpy(x[2], z, uplo = 'L', m = p, n = p, ldB = p+q, offsetB = m + (p+q+1)*q) # scale diagonal of zs by 1/2 blas.scal(0.5, z, inc = p+q+1, offset = m) # a := tril(zs)*r blas.copy(r, a) blas.trmm(z, a, side = 'L', m = p+q, n = p+q, ldA = p+q, ldB = p+q, offsetA = m) # zs := a'*r + r'*a blas.syr2k(r, a, z, trans = 'T', n = p+q, k = p+q, ldB = p+q, ldC = p+q, offsetC = m) # bz21 := L^-1( V2' * ((r * zs * r')_21 - bz21) * V1') # # [ bX1 0 ] # = L^-1( V2' * ((T * [ ] * T)_21 - bz21) * V1'). # [ 0 bX2 ] # a = [ r21 r22 ] * z # = [ r21 r22 ] * r' * [ bX1, 0; 0, bX2 ] * r # = [ T21 T22 ] * [ bX1, 0; 0, bX2 ] * r blas.symm(z, r, a, side = 'R', m = p, n = p+q, ldA = p+q, ldC = p+q, offsetB = q) # bz21 := -bz21 + a * [ r11, r12 ]' # = -bz21 + (T * [ bX1, 0; 0, bX2 ] * T)_21 blas.gemm(a, r, bz21, transB = 'T', m = p, n = q, k = p+q, beta = -1.0, ldA = p+q, ldC = p) # bz21 := V2' * bz21 * V1' # = V2' * (-bz21 + (T*[bX1, 0; 0, bX2]*T)_21) * V1' blas.gemm(V2, bz21, tmp, transA = 'T', m = p, n = q, k = p, ldB = p) blas.gemm(tmp, V1, bz21, transB = 'T', m = p, n = q, k = q, ldC = p) # bz21[:] := D * (I-P) * bz21[:] # = L^-1 * bz21[:] # = bXX[:] blas.copy(bz21, tmp) base.gemv(P, bz21, tmp, alpha = -1.0, beta = 1.0) base.gemv(D, tmp, bz21) # Solve H * ux = bx + Gs' * D^-1 * bzl + 2*As'(bXX). # x[0] := x[0] + Gs'*zl + 2*As'(bz21) # = bx + G' * D^-1 * bzl + 2 * As'(bXX) blas.gemv(Gs, z, x[0], trans = 'T', alpha = 1.0, beta = 1.0) blas.gemv(As, bz21, x[0], trans = 'T', alpha = 2.0, beta = 1.0) # x[0] := H \ x[0] # = ux lapack.potrs(H, x[0]) # uuzl = Gs*ux - D^-1 * bzl blas.gemv(Gs, x[0], z, alpha = 1.0, beta = -1.0) # bz21 := V2 * L^-T(-As(ux) + bz21) * V1 # = X blas.gemv(As, x[0], bz21, alpha = -1.0, beta = 1.0) blas.tbsv(DV, bz21, n = p*q, k = 0, ldA = 1) blas.copy(bz21, tmp) base.gemv(P, tmp, bz21, alpha = -1.0, beta = 1.0, trans = 'T') blas.gemm(V2, bz21, tmp) blas.gemm(tmp, V1, bz21) # zs := -zs + r' * [ 0, X'; X, 0 ] * r # = r' * [ -bX1, X'; X, -bX2 ] * r. # a := bz21 * [ r11, r12 ] # = X * [ r11, r12 ] blas.gemm(bz21, r, a, m = p, n = p+q, k = q, ldA = p, ldC = p+q) # z := -z + [ r21, r22 ]' * a + a' * [ r21, r22 ] # = rti' * uzs * rti blas.syr2k(r, a, z, trans = 'T', beta = -1.0, n = p+q, k = p, offsetA = q, offsetC = m, ldB = p+q, ldC = p+q) # uX1 = -Z11 - bzs_11 # = -(r*zs*r')_11 - bzs_11 # uX2 = -Z22 - bzs_22 # = -(r*zs*r')_22 - bzs_22 blas.copy(bz11, x[1]) blas.copy(bz22, x[2]) # scale diagonal of zs by 1/2 blas.scal(0.5, z, inc = p+q+1, offset = m) # a := r*tril(zs) blas.copy(r, a) blas.trmm(z, a, side = 'R', m = p+q, n = p+q, ldA = p+q, ldB = p+q, offsetA = m) # x[1] := -x[1] - a[:q,:] * r[:q, :]' - r[:q,:] * a[:q,:]' # = -bzs_11 - (r*zs*r')_11 blas.syr2k(a, r, x[1], n = q, alpha = -1.0, beta = -1.0) # x[2] := -x[2] - a[q:,:] * r[q:, :]' - r[q:,:] * a[q:,:]' # = -bzs_22 - (r*zs*r')_22 blas.syr2k(a, r, x[2], n = p, alpha = -1.0, beta = -1.0, offsetA = q, offsetB = q) # scale diagonal of zs by 1/2 blas.scal(2.0, z, inc = p+q+1, offset = m)
def F(W): """ Create a solver for the linear equations C * ux + G' * uzl - 2*A'(uzs21) = bx -uzs11 = bX1 -uzs22 = bX2 G * ux - Dl^2 * uzl = bzl [ -uX1 -A(ux)' ] [ uzs11 uzs21' ] [ ] - r*r' * [ ] * r*r' = bzs [ -A(ux) -uX2 ] [ uzs21 uzs22 ] where Dl = diag(W['l']), r = W['r'][0]. On entry, x = (bx, bX1, bX2) and z = [ bzl; bzs[:] ]. On exit, x = (ux, uX1, uX2) and z = [ Dl*uzl; (r'*uzs*r)[:] ]. 1. Compute matrices V1, V2 such that (with T = r*r') [ V1 0 ] [ T11 T21' ] [ V1' 0 ] [ I S' ] [ ] [ ] [ ] = [ ] [ 0 V2' ] [ T21 T22 ] [ 0 V2 ] [ S I ] and S = [ diag(s); 0 ], s a positive q-vector. 2. Factor the mapping X -> X + S * X' * S: X + S * X' * S = L( L'( X )). 3. Compute scaled mappings: a matrix As with as its columns the coefficients of the scaled mapping L^-1( V2' * A() * V1' ) and the matrix Gs = Dl^-1 * G. 4. Cholesky factorization of H = C + Gs'*Gs + 2*As'*As. """ # 1. Compute V1, V2, s. r = W['r'][0] # LQ factorization R[:q, :] = L1 * Q1. lapack.lacpy(r, Q1, m = q) lapack.gelqf(Q1, tau1) lapack.lacpy(Q1, L1, n = q, uplo = 'L') lapack.orglq(Q1, tau1) # LQ factorization R[q:, :] = L2 * Q2. lapack.lacpy(r, Q2, m = p, offsetA = q) lapack.gelqf(Q2, tau2) lapack.lacpy(Q2, L2, n = p, uplo = 'L') lapack.orglq(Q2, tau2) # V2, V1, s are computed from an SVD: if # # Q2 * Q1' = U * diag(s) * V', # # then V1 = V' * L1^-1 and V2 = L2^-T * U. # T21 = Q2 * Q1.T blas.gemm(Q2, Q1, T21, transB = 'T') # SVD T21 = U * diag(s) * V'. Store U in V2 and V' in V1. lapack.gesvd(T21, s, jobu = 'A', jobvt = 'A', U = V2, Vt = V1) # # Q2 := Q2 * Q1' without extracting Q1; store T21 in Q2 # this will requires lapack.ormlq or lapack.unmlq # V2 = L2^-T * U blas.trsm(L2, V2, transA = 'T') # V1 = V' * L1^-1 blas.trsm(L1, V1, side = 'R') # 2. Factorization X + S * X' * S = L( L'( X )). # # The factor L is stored as a diagonal matrix D and a sparse lower # triangular matrix P, such that # # L(X)[:] = D**-1 * (I + P) * X[:] # L^-1(X)[:] = D * (I - P) * X[:]. # SS is q x q with SS[i,j] = si*sj. blas.scal(0.0, SS) blas.syr(s, SS) # For a p x q matrix X, P*X[:] is Y[:] where # # Yij = si * sj * Xji if i < j # = 0 otherwise. # P.V = SS[Itril2] # For a p x q matrix X, D*X[:] is Y[:] where # # Yij = Xij / sqrt( 1 - si^2 * sj^2 ) if i < j # = Xii / sqrt( 1 + si^2 ) if i = j # = Xij otherwise. # DV[Idiag] = sqrt(1.0 + SS[::q+1]) DV[Itriu] = sqrt(1.0 - SS[Itril3]**2) D.V = DV**-1 # 3. Scaled linear mappings # Ask := V2' * Ask * V1' blas.scal(0.0, As) base.axpy(A, As) for i in xrange(n): # tmp := V2' * As[i, :] blas.gemm(V2, As, tmp, transA = 'T', m = p, n = q, k = p, ldB = p, offsetB = i*p*q) # As[:,i] := tmp * V1' blas.gemm(tmp, V1, As, transB = 'T', m = p, n = q, k = q, ldC = p, offsetC = i*p*q) # As := D * (I - P) * As # = L^-1 * As. blas.copy(As, As2) base.gemm(P, As, As2, alpha = -1.0, beta = 1.0) base.gemm(D, As2, As) # Gs := Dl^-1 * G blas.scal(0.0, Gs) base.axpy(G, Gs) for k in xrange(n): blas.tbmv(W['di'], Gs, n = m, k = 0, ldA = 1, offsetx = k*m) # 4. Cholesky factorization of H = C + Gs' * Gs + 2 * As' * As. blas.syrk(As, H, trans = 'T', alpha = 2.0) blas.syrk(Gs, H, trans = 'T', beta = 1.0) base.axpy(C, H) lapack.potrf(H) def f(x, y, z): """ Solve C * ux + G' * uzl - 2*A'(uzs21) = bx -uzs11 = bX1 -uzs22 = bX2 G * ux - D^2 * uzl = bzl [ -uX1 -A(ux)' ] [ uzs11 uzs21' ] [ ] - T * [ ] * T = bzs. [ -A(ux) -uX2 ] [ uzs21 uzs22 ] On entry, x = (bx, bX1, bX2) and z = [ bzl; bzs[:] ]. On exit, x = (ux, uX1, uX2) and z = [ D*uzl; (r'*uzs*r)[:] ]. Define X = uzs21, Z = T * uzs * T: C * ux + G' * uzl - 2*A'(X) = bx [ 0 X' ] [ bX1 0 ] T * [ ] * T - Z = T * [ ] * T [ X 0 ] [ 0 bX2 ] G * ux - D^2 * uzl = bzl [ -uX1 -A(ux)' ] [ Z11 Z21' ] [ ] - [ ] = bzs [ -A(ux) -uX2 ] [ Z21 Z22 ] Return x = (ux, uX1, uX2), z = [ D*uzl; (rti'*Z*rti)[:] ]. We use the congruence transformation [ V1 0 ] [ T11 T21' ] [ V1' 0 ] [ I S' ] [ ] [ ] [ ] = [ ] [ 0 V2' ] [ T21 T22 ] [ 0 V2 ] [ S I ] and the factorization X + S * X' * S = L( L'(X) ) to write this as C * ux + G' * uzl - 2*A'(X) = bx L'(V2^-1 * X * V1^-1) - L^-1(V2' * Z21 * V1') = bX G * ux - D^2 * uzl = bzl [ -uX1 -A(ux)' ] [ Z11 Z21' ] [ ] - [ ] = bzs, [ -A(ux) -uX2 ] [ Z21 Z22 ] or C * ux + Gs' * uuzl - 2*As'(XX) = bx XX - ZZ21 = bX Gs * ux - uuzl = D^-1 * bzl -As(ux) - ZZ21 = bbzs_21 -uX1 - Z11 = bzs_11 -uX2 - Z22 = bzs_22 if we introduce scaled variables uuzl = D * uzl XX = L'(V2^-1 * X * V1^-1) = L'(V2^-1 * uzs21 * V1^-1) ZZ21 = L^-1(V2' * Z21 * V1') and define bbzs_21 = L^-1(V2' * bzs_21 * V1') [ bX1 0 ] bX = L^-1( V2' * (T * [ ] * T)_21 * V1'). [ 0 bX2 ] Eliminating Z21 gives C * ux + Gs' * uuzl - 2*As'(XX) = bx Gs * ux - uuzl = D^-1 * bzl -As(ux) - XX = bbzs_21 - bX -uX1 - Z11 = bzs_11 -uX2 - Z22 = bzs_22 and eliminating uuzl and XX gives H * ux = bx + Gs' * D^-1 * bzl + 2*As'(bX - bbzs_21) Gs * ux - uuzl = D^-1 * bzl -As(ux) - XX = bbzs_21 - bX -uX1 - Z11 = bzs_11 -uX2 - Z22 = bzs_22. In summary, we can use the following algorithm: 1. bXX := bX - bbzs21 [ bX1 0 ] = L^-1( V2' * ((T * [ ] * T)_21 - bzs_21) * V1') [ 0 bX2 ] 2. Solve H * ux = bx + Gs' * D^-1 * bzl + 2*As'(bXX). 3. From ux, compute uuzl = Gs*ux - D^-1 * bzl and X = V2 * L^-T(-As(ux) + bXX) * V1. 4. Return ux, uuzl, rti' * Z * rti = r' * [ -bX1, X'; X, -bX2 ] * r and uX1 = -Z11 - bzs_11, uX2 = -Z22 - bzs_22. """ # Save bzs_11, bzs_22, bzs_21. lapack.lacpy(z, bz11, uplo = 'L', m = q, n = q, ldA = p+q, offsetA = m) lapack.lacpy(z, bz21, m = p, n = q, ldA = p+q, offsetA = m+q) lapack.lacpy(z, bz22, uplo = 'L', m = p, n = p, ldA = p+q, offsetA = m + (p+q+1)*q) # zl := D^-1 * zl # = D^-1 * bzl blas.tbmv(W['di'], z, n = m, k = 0, ldA = 1) # zs := r' * [ bX1, 0; 0, bX2 ] * r. # zs := [ bX1, 0; 0, bX2 ] blas.scal(0.0, z, offset = m) lapack.lacpy(x[1], z, uplo = 'L', m = q, n = q, ldB = p+q, offsetB = m) lapack.lacpy(x[2], z, uplo = 'L', m = p, n = p, ldB = p+q, offsetB = m + (p+q+1)*q) # scale diagonal of zs by 1/2 blas.scal(0.5, z, inc = p+q+1, offset = m) # a := tril(zs)*r blas.copy(r, a) blas.trmm(z, a, side = 'L', m = p+q, n = p+q, ldA = p+q, ldB = p+q, offsetA = m) # zs := a'*r + r'*a blas.syr2k(r, a, z, trans = 'T', n = p+q, k = p+q, ldB = p+q, ldC = p+q, offsetC = m) # bz21 := L^-1( V2' * ((r * zs * r')_21 - bz21) * V1') # # [ bX1 0 ] # = L^-1( V2' * ((T * [ ] * T)_21 - bz21) * V1'). # [ 0 bX2 ] # a = [ r21 r22 ] * z # = [ r21 r22 ] * r' * [ bX1, 0; 0, bX2 ] * r # = [ T21 T22 ] * [ bX1, 0; 0, bX2 ] * r blas.symm(z, r, a, side = 'R', m = p, n = p+q, ldA = p+q, ldC = p+q, offsetB = q) # bz21 := -bz21 + a * [ r11, r12 ]' # = -bz21 + (T * [ bX1, 0; 0, bX2 ] * T)_21 blas.gemm(a, r, bz21, transB = 'T', m = p, n = q, k = p+q, beta = -1.0, ldA = p+q, ldC = p) # bz21 := V2' * bz21 * V1' # = V2' * (-bz21 + (T*[bX1, 0; 0, bX2]*T)_21) * V1' blas.gemm(V2, bz21, tmp, transA = 'T', m = p, n = q, k = p, ldB = p) blas.gemm(tmp, V1, bz21, transB = 'T', m = p, n = q, k = q, ldC = p) # bz21[:] := D * (I-P) * bz21[:] # = L^-1 * bz21[:] # = bXX[:] blas.copy(bz21, tmp) base.gemv(P, bz21, tmp, alpha = -1.0, beta = 1.0) base.gemv(D, tmp, bz21) # Solve H * ux = bx + Gs' * D^-1 * bzl + 2*As'(bXX). # x[0] := x[0] + Gs'*zl + 2*As'(bz21) # = bx + G' * D^-1 * bzl + 2 * As'(bXX) blas.gemv(Gs, z, x[0], trans = 'T', alpha = 1.0, beta = 1.0) blas.gemv(As, bz21, x[0], trans = 'T', alpha = 2.0, beta = 1.0) # x[0] := H \ x[0] # = ux lapack.potrs(H, x[0]) # uuzl = Gs*ux - D^-1 * bzl blas.gemv(Gs, x[0], z, alpha = 1.0, beta = -1.0) # bz21 := V2 * L^-T(-As(ux) + bz21) * V1 # = X blas.gemv(As, x[0], bz21, alpha = -1.0, beta = 1.0) blas.tbsv(DV, bz21, n = p*q, k = 0, ldA = 1) blas.copy(bz21, tmp) base.gemv(P, tmp, bz21, alpha = -1.0, beta = 1.0, trans = 'T') blas.gemm(V2, bz21, tmp) blas.gemm(tmp, V1, bz21) # zs := -zs + r' * [ 0, X'; X, 0 ] * r # = r' * [ -bX1, X'; X, -bX2 ] * r. # a := bz21 * [ r11, r12 ] # = X * [ r11, r12 ] blas.gemm(bz21, r, a, m = p, n = p+q, k = q, ldA = p, ldC = p+q) # z := -z + [ r21, r22 ]' * a + a' * [ r21, r22 ] # = rti' * uzs * rti blas.syr2k(r, a, z, trans = 'T', beta = -1.0, n = p+q, k = p, offsetA = q, offsetC = m, ldB = p+q, ldC = p+q) # uX1 = -Z11 - bzs_11 # = -(r*zs*r')_11 - bzs_11 # uX2 = -Z22 - bzs_22 # = -(r*zs*r')_22 - bzs_22 blas.copy(bz11, x[1]) blas.copy(bz22, x[2]) # scale diagonal of zs by 1/2 blas.scal(0.5, z, inc = p+q+1, offset = m) # a := r*tril(zs) blas.copy(r, a) blas.trmm(z, a, side = 'R', m = p+q, n = p+q, ldA = p+q, ldB = p+q, offsetA = m) # x[1] := -x[1] - a[:q,:] * r[:q, :]' - r[:q,:] * a[:q,:]' # = -bzs_11 - (r*zs*r')_11 blas.syr2k(a, r, x[1], n = q, alpha = -1.0, beta = -1.0) # x[2] := -x[2] - a[q:,:] * r[q:, :]' - r[q:,:] * a[q:,:]' # = -bzs_22 - (r*zs*r')_22 blas.syr2k(a, r, x[2], n = p, alpha = -1.0, beta = -1.0, offsetA = q, offsetB = q) # scale diagonal of zs by 1/2 blas.scal(2.0, z, inc = p+q+1, offset = m) return f
def f(x, y, z): """ Solve C * ux + G' * uzl - 2*A'(uzs21) = bx -uzs11 = bX1 -uzs22 = bX2 G * ux - D^2 * uzl = bzl [ -uX1 -A(ux)' ] [ uzs11 uzs21' ] [ ] - T * [ ] * T = bzs. [ -A(ux) -uX2 ] [ uzs21 uzs22 ] On entry, x = (bx, bX1, bX2) and z = [ bzl; bzs[:] ]. On exit, x = (ux, uX1, uX2) and z = [ D*uzl; (r'*uzs*r)[:] ]. Define X = uzs21, Z = T * uzs * T: C * ux + G' * uzl - 2*A'(X) = bx [ 0 X' ] [ bX1 0 ] T * [ ] * T - Z = T * [ ] * T [ X 0 ] [ 0 bX2 ] G * ux - D^2 * uzl = bzl [ -uX1 -A(ux)' ] [ Z11 Z21' ] [ ] - [ ] = bzs [ -A(ux) -uX2 ] [ Z21 Z22 ] Return x = (ux, uX1, uX2), z = [ D*uzl; (rti'*Z*rti)[:] ]. We use the congruence transformation [ V1 0 ] [ T11 T21' ] [ V1' 0 ] [ I S' ] [ ] [ ] [ ] = [ ] [ 0 V2' ] [ T21 T22 ] [ 0 V2 ] [ S I ] and the factorization X + S * X' * S = L( L'(X) ) to write this as C * ux + G' * uzl - 2*A'(X) = bx L'(V2^-1 * X * V1^-1) - L^-1(V2' * Z21 * V1') = bX G * ux - D^2 * uzl = bzl [ -uX1 -A(ux)' ] [ Z11 Z21' ] [ ] - [ ] = bzs, [ -A(ux) -uX2 ] [ Z21 Z22 ] or C * ux + Gs' * uuzl - 2*As'(XX) = bx XX - ZZ21 = bX Gs * ux - uuzl = D^-1 * bzl -As(ux) - ZZ21 = bbzs_21 -uX1 - Z11 = bzs_11 -uX2 - Z22 = bzs_22 if we introduce scaled variables uuzl = D * uzl XX = L'(V2^-1 * X * V1^-1) = L'(V2^-1 * uzs21 * V1^-1) ZZ21 = L^-1(V2' * Z21 * V1') and define bbzs_21 = L^-1(V2' * bzs_21 * V1') [ bX1 0 ] bX = L^-1( V2' * (T * [ ] * T)_21 * V1'). [ 0 bX2 ] Eliminating Z21 gives C * ux + Gs' * uuzl - 2*As'(XX) = bx Gs * ux - uuzl = D^-1 * bzl -As(ux) - XX = bbzs_21 - bX -uX1 - Z11 = bzs_11 -uX2 - Z22 = bzs_22 and eliminating uuzl and XX gives H * ux = bx + Gs' * D^-1 * bzl + 2*As'(bX - bbzs_21) Gs * ux - uuzl = D^-1 * bzl -As(ux) - XX = bbzs_21 - bX -uX1 - Z11 = bzs_11 -uX2 - Z22 = bzs_22. In summary, we can use the following algorithm: 1. bXX := bX - bbzs21 [ bX1 0 ] = L^-1( V2' * ((T * [ ] * T)_21 - bzs_21) * V1') [ 0 bX2 ] 2. Solve H * ux = bx + Gs' * D^-1 * bzl + 2*As'(bXX). 3. From ux, compute uuzl = Gs*ux - D^-1 * bzl and X = V2 * L^-T(-As(ux) + bXX) * V1. 4. Return ux, uuzl, rti' * Z * rti = r' * [ -bX1, X'; X, -bX2 ] * r and uX1 = -Z11 - bzs_11, uX2 = -Z22 - bzs_22. """ # Save bzs_11, bzs_22, bzs_21. lapack.lacpy(z, bz11, uplo='L', m=q, n=q, ldA=p + q, offsetA=m) lapack.lacpy(z, bz21, m=p, n=q, ldA=p + q, offsetA=m + q) lapack.lacpy(z, bz22, uplo='L', m=p, n=p, ldA=p + q, offsetA=m + (p + q + 1) * q) # zl := D^-1 * zl # = D^-1 * bzl blas.tbmv(W['di'], z, n=m, k=0, ldA=1) # zs := r' * [ bX1, 0; 0, bX2 ] * r. # zs := [ bX1, 0; 0, bX2 ] blas.scal(0.0, z, offset=m) lapack.lacpy(x[1], z, uplo='L', m=q, n=q, ldB=p + q, offsetB=m) lapack.lacpy(x[2], z, uplo='L', m=p, n=p, ldB=p + q, offsetB=m + (p + q + 1) * q) # scale diagonal of zs by 1/2 blas.scal(0.5, z, inc=p + q + 1, offset=m) # a := tril(zs)*r blas.copy(r, a) blas.trmm(z, a, side='L', m=p + q, n=p + q, ldA=p + q, ldB=p + q, offsetA=m) # zs := a'*r + r'*a blas.syr2k(r, a, z, trans='T', n=p + q, k=p + q, ldB=p + q, ldC=p + q, offsetC=m) # bz21 := L^-1( V2' * ((r * zs * r')_21 - bz21) * V1') # # [ bX1 0 ] # = L^-1( V2' * ((T * [ ] * T)_21 - bz21) * V1'). # [ 0 bX2 ] # a = [ r21 r22 ] * z # = [ r21 r22 ] * r' * [ bX1, 0; 0, bX2 ] * r # = [ T21 T22 ] * [ bX1, 0; 0, bX2 ] * r blas.symm(z, r, a, side='R', m=p, n=p + q, ldA=p + q, ldC=p + q, offsetB=q) # bz21 := -bz21 + a * [ r11, r12 ]' # = -bz21 + (T * [ bX1, 0; 0, bX2 ] * T)_21 blas.gemm(a, r, bz21, transB='T', m=p, n=q, k=p + q, beta=-1.0, ldA=p + q, ldC=p) # bz21 := V2' * bz21 * V1' # = V2' * (-bz21 + (T*[bX1, 0; 0, bX2]*T)_21) * V1' blas.gemm(V2, bz21, tmp, transA='T', m=p, n=q, k=p, ldB=p) blas.gemm(tmp, V1, bz21, transB='T', m=p, n=q, k=q, ldC=p) # bz21[:] := D * (I-P) * bz21[:] # = L^-1 * bz21[:] # = bXX[:] blas.copy(bz21, tmp) base.gemv(P, bz21, tmp, alpha=-1.0, beta=1.0) base.gemv(D, tmp, bz21) # Solve H * ux = bx + Gs' * D^-1 * bzl + 2*As'(bXX). # x[0] := x[0] + Gs'*zl + 2*As'(bz21) # = bx + G' * D^-1 * bzl + 2 * As'(bXX) blas.gemv(Gs, z, x[0], trans='T', alpha=1.0, beta=1.0) blas.gemv(As, bz21, x[0], trans='T', alpha=2.0, beta=1.0) # x[0] := H \ x[0] # = ux lapack.potrs(H, x[0]) # uuzl = Gs*ux - D^-1 * bzl blas.gemv(Gs, x[0], z, alpha=1.0, beta=-1.0) # bz21 := V2 * L^-T(-As(ux) + bz21) * V1 # = X blas.gemv(As, x[0], bz21, alpha=-1.0, beta=1.0) blas.tbsv(DV, bz21, n=p * q, k=0, ldA=1) blas.copy(bz21, tmp) base.gemv(P, tmp, bz21, alpha=-1.0, beta=1.0, trans='T') blas.gemm(V2, bz21, tmp) blas.gemm(tmp, V1, bz21) # zs := -zs + r' * [ 0, X'; X, 0 ] * r # = r' * [ -bX1, X'; X, -bX2 ] * r. # a := bz21 * [ r11, r12 ] # = X * [ r11, r12 ] blas.gemm(bz21, r, a, m=p, n=p + q, k=q, ldA=p, ldC=p + q) # z := -z + [ r21, r22 ]' * a + a' * [ r21, r22 ] # = rti' * uzs * rti blas.syr2k(r, a, z, trans='T', beta=-1.0, n=p + q, k=p, offsetA=q, offsetC=m, ldB=p + q, ldC=p + q) # uX1 = -Z11 - bzs_11 # = -(r*zs*r')_11 - bzs_11 # uX2 = -Z22 - bzs_22 # = -(r*zs*r')_22 - bzs_22 blas.copy(bz11, x[1]) blas.copy(bz22, x[2]) # scale diagonal of zs by 1/2 blas.scal(0.5, z, inc=p + q + 1, offset=m) # a := r*tril(zs) blas.copy(r, a) blas.trmm(z, a, side='R', m=p + q, n=p + q, ldA=p + q, ldB=p + q, offsetA=m) # x[1] := -x[1] - a[:q,:] * r[:q, :]' - r[:q,:] * a[:q,:]' # = -bzs_11 - (r*zs*r')_11 blas.syr2k(a, r, x[1], n=q, alpha=-1.0, beta=-1.0) # x[2] := -x[2] - a[q:,:] * r[q:, :]' - r[q:,:] * a[q:,:]' # = -bzs_22 - (r*zs*r')_22 blas.syr2k(a, r, x[2], n=p, alpha=-1.0, beta=-1.0, offsetA=q, offsetB=q) # scale diagonal of zs by 1/2 blas.scal(2.0, z, inc=p + q + 1, offset=m)
def F(W): """ Create a solver for the linear equations C * ux + G' * uzl - 2*A'(uzs21) = bx -uzs11 = bX1 -uzs22 = bX2 G * ux - Dl^2 * uzl = bzl [ -uX1 -A(ux)' ] [ uzs11 uzs21' ] [ ] - r*r' * [ ] * r*r' = bzs [ -A(ux) -uX2 ] [ uzs21 uzs22 ] where Dl = diag(W['l']), r = W['r'][0]. On entry, x = (bx, bX1, bX2) and z = [ bzl; bzs[:] ]. On exit, x = (ux, uX1, uX2) and z = [ Dl*uzl; (r'*uzs*r)[:] ]. 1. Compute matrices V1, V2 such that (with T = r*r') [ V1 0 ] [ T11 T21' ] [ V1' 0 ] [ I S' ] [ ] [ ] [ ] = [ ] [ 0 V2' ] [ T21 T22 ] [ 0 V2 ] [ S I ] and S = [ diag(s); 0 ], s a positive q-vector. 2. Factor the mapping X -> X + S * X' * S: X + S * X' * S = L( L'( X )). 3. Compute scaled mappings: a matrix As with as its columns the coefficients of the scaled mapping L^-1( V2' * A() * V1' ) and the matrix Gs = Dl^-1 * G. 4. Cholesky factorization of H = C + Gs'*Gs + 2*As'*As. """ # 1. Compute V1, V2, s. r = W['r'][0] # LQ factorization R[:q, :] = L1 * Q1. lapack.lacpy(r, Q1, m=q) lapack.gelqf(Q1, tau1) lapack.lacpy(Q1, L1, n=q, uplo='L') lapack.orglq(Q1, tau1) # LQ factorization R[q:, :] = L2 * Q2. lapack.lacpy(r, Q2, m=p, offsetA=q) lapack.gelqf(Q2, tau2) lapack.lacpy(Q2, L2, n=p, uplo='L') lapack.orglq(Q2, tau2) # V2, V1, s are computed from an SVD: if # # Q2 * Q1' = U * diag(s) * V', # # then V1 = V' * L1^-1 and V2 = L2^-T * U. # T21 = Q2 * Q1.T blas.gemm(Q2, Q1, T21, transB='T') # SVD T21 = U * diag(s) * V'. Store U in V2 and V' in V1. lapack.gesvd(T21, s, jobu='A', jobvt='A', U=V2, Vt=V1) # # Q2 := Q2 * Q1' without extracting Q1; store T21 in Q2 # this will requires lapack.ormlq or lapack.unmlq # V2 = L2^-T * U blas.trsm(L2, V2, transA='T') # V1 = V' * L1^-1 blas.trsm(L1, V1, side='R') # 2. Factorization X + S * X' * S = L( L'( X )). # # The factor L is stored as a diagonal matrix D and a sparse lower # triangular matrix P, such that # # L(X)[:] = D**-1 * (I + P) * X[:] # L^-1(X)[:] = D * (I - P) * X[:]. # SS is q x q with SS[i,j] = si*sj. blas.scal(0.0, SS) blas.syr(s, SS) # For a p x q matrix X, P*X[:] is Y[:] where # # Yij = si * sj * Xji if i < j # = 0 otherwise. # P.V = SS[Itril2] # For a p x q matrix X, D*X[:] is Y[:] where # # Yij = Xij / sqrt( 1 - si^2 * sj^2 ) if i < j # = Xii / sqrt( 1 + si^2 ) if i = j # = Xij otherwise. # DV[Idiag] = sqrt(1.0 + SS[::q + 1]) DV[Itriu] = sqrt(1.0 - SS[Itril3]**2) D.V = DV**-1 # 3. Scaled linear mappings # Ask := V2' * Ask * V1' blas.scal(0.0, As) base.axpy(A, As) for i in xrange(n): # tmp := V2' * As[i, :] blas.gemm(V2, As, tmp, transA='T', m=p, n=q, k=p, ldB=p, offsetB=i * p * q) # As[:,i] := tmp * V1' blas.gemm(tmp, V1, As, transB='T', m=p, n=q, k=q, ldC=p, offsetC=i * p * q) # As := D * (I - P) * As # = L^-1 * As. blas.copy(As, As2) base.gemm(P, As, As2, alpha=-1.0, beta=1.0) base.gemm(D, As2, As) # Gs := Dl^-1 * G blas.scal(0.0, Gs) base.axpy(G, Gs) for k in xrange(n): blas.tbmv(W['di'], Gs, n=m, k=0, ldA=1, offsetx=k * m) # 4. Cholesky factorization of H = C + Gs' * Gs + 2 * As' * As. blas.syrk(As, H, trans='T', alpha=2.0) blas.syrk(Gs, H, trans='T', beta=1.0) base.axpy(C, H) lapack.potrf(H) def f(x, y, z): """ Solve C * ux + G' * uzl - 2*A'(uzs21) = bx -uzs11 = bX1 -uzs22 = bX2 G * ux - D^2 * uzl = bzl [ -uX1 -A(ux)' ] [ uzs11 uzs21' ] [ ] - T * [ ] * T = bzs. [ -A(ux) -uX2 ] [ uzs21 uzs22 ] On entry, x = (bx, bX1, bX2) and z = [ bzl; bzs[:] ]. On exit, x = (ux, uX1, uX2) and z = [ D*uzl; (r'*uzs*r)[:] ]. Define X = uzs21, Z = T * uzs * T: C * ux + G' * uzl - 2*A'(X) = bx [ 0 X' ] [ bX1 0 ] T * [ ] * T - Z = T * [ ] * T [ X 0 ] [ 0 bX2 ] G * ux - D^2 * uzl = bzl [ -uX1 -A(ux)' ] [ Z11 Z21' ] [ ] - [ ] = bzs [ -A(ux) -uX2 ] [ Z21 Z22 ] Return x = (ux, uX1, uX2), z = [ D*uzl; (rti'*Z*rti)[:] ]. We use the congruence transformation [ V1 0 ] [ T11 T21' ] [ V1' 0 ] [ I S' ] [ ] [ ] [ ] = [ ] [ 0 V2' ] [ T21 T22 ] [ 0 V2 ] [ S I ] and the factorization X + S * X' * S = L( L'(X) ) to write this as C * ux + G' * uzl - 2*A'(X) = bx L'(V2^-1 * X * V1^-1) - L^-1(V2' * Z21 * V1') = bX G * ux - D^2 * uzl = bzl [ -uX1 -A(ux)' ] [ Z11 Z21' ] [ ] - [ ] = bzs, [ -A(ux) -uX2 ] [ Z21 Z22 ] or C * ux + Gs' * uuzl - 2*As'(XX) = bx XX - ZZ21 = bX Gs * ux - uuzl = D^-1 * bzl -As(ux) - ZZ21 = bbzs_21 -uX1 - Z11 = bzs_11 -uX2 - Z22 = bzs_22 if we introduce scaled variables uuzl = D * uzl XX = L'(V2^-1 * X * V1^-1) = L'(V2^-1 * uzs21 * V1^-1) ZZ21 = L^-1(V2' * Z21 * V1') and define bbzs_21 = L^-1(V2' * bzs_21 * V1') [ bX1 0 ] bX = L^-1( V2' * (T * [ ] * T)_21 * V1'). [ 0 bX2 ] Eliminating Z21 gives C * ux + Gs' * uuzl - 2*As'(XX) = bx Gs * ux - uuzl = D^-1 * bzl -As(ux) - XX = bbzs_21 - bX -uX1 - Z11 = bzs_11 -uX2 - Z22 = bzs_22 and eliminating uuzl and XX gives H * ux = bx + Gs' * D^-1 * bzl + 2*As'(bX - bbzs_21) Gs * ux - uuzl = D^-1 * bzl -As(ux) - XX = bbzs_21 - bX -uX1 - Z11 = bzs_11 -uX2 - Z22 = bzs_22. In summary, we can use the following algorithm: 1. bXX := bX - bbzs21 [ bX1 0 ] = L^-1( V2' * ((T * [ ] * T)_21 - bzs_21) * V1') [ 0 bX2 ] 2. Solve H * ux = bx + Gs' * D^-1 * bzl + 2*As'(bXX). 3. From ux, compute uuzl = Gs*ux - D^-1 * bzl and X = V2 * L^-T(-As(ux) + bXX) * V1. 4. Return ux, uuzl, rti' * Z * rti = r' * [ -bX1, X'; X, -bX2 ] * r and uX1 = -Z11 - bzs_11, uX2 = -Z22 - bzs_22. """ # Save bzs_11, bzs_22, bzs_21. lapack.lacpy(z, bz11, uplo='L', m=q, n=q, ldA=p + q, offsetA=m) lapack.lacpy(z, bz21, m=p, n=q, ldA=p + q, offsetA=m + q) lapack.lacpy(z, bz22, uplo='L', m=p, n=p, ldA=p + q, offsetA=m + (p + q + 1) * q) # zl := D^-1 * zl # = D^-1 * bzl blas.tbmv(W['di'], z, n=m, k=0, ldA=1) # zs := r' * [ bX1, 0; 0, bX2 ] * r. # zs := [ bX1, 0; 0, bX2 ] blas.scal(0.0, z, offset=m) lapack.lacpy(x[1], z, uplo='L', m=q, n=q, ldB=p + q, offsetB=m) lapack.lacpy(x[2], z, uplo='L', m=p, n=p, ldB=p + q, offsetB=m + (p + q + 1) * q) # scale diagonal of zs by 1/2 blas.scal(0.5, z, inc=p + q + 1, offset=m) # a := tril(zs)*r blas.copy(r, a) blas.trmm(z, a, side='L', m=p + q, n=p + q, ldA=p + q, ldB=p + q, offsetA=m) # zs := a'*r + r'*a blas.syr2k(r, a, z, trans='T', n=p + q, k=p + q, ldB=p + q, ldC=p + q, offsetC=m) # bz21 := L^-1( V2' * ((r * zs * r')_21 - bz21) * V1') # # [ bX1 0 ] # = L^-1( V2' * ((T * [ ] * T)_21 - bz21) * V1'). # [ 0 bX2 ] # a = [ r21 r22 ] * z # = [ r21 r22 ] * r' * [ bX1, 0; 0, bX2 ] * r # = [ T21 T22 ] * [ bX1, 0; 0, bX2 ] * r blas.symm(z, r, a, side='R', m=p, n=p + q, ldA=p + q, ldC=p + q, offsetB=q) # bz21 := -bz21 + a * [ r11, r12 ]' # = -bz21 + (T * [ bX1, 0; 0, bX2 ] * T)_21 blas.gemm(a, r, bz21, transB='T', m=p, n=q, k=p + q, beta=-1.0, ldA=p + q, ldC=p) # bz21 := V2' * bz21 * V1' # = V2' * (-bz21 + (T*[bX1, 0; 0, bX2]*T)_21) * V1' blas.gemm(V2, bz21, tmp, transA='T', m=p, n=q, k=p, ldB=p) blas.gemm(tmp, V1, bz21, transB='T', m=p, n=q, k=q, ldC=p) # bz21[:] := D * (I-P) * bz21[:] # = L^-1 * bz21[:] # = bXX[:] blas.copy(bz21, tmp) base.gemv(P, bz21, tmp, alpha=-1.0, beta=1.0) base.gemv(D, tmp, bz21) # Solve H * ux = bx + Gs' * D^-1 * bzl + 2*As'(bXX). # x[0] := x[0] + Gs'*zl + 2*As'(bz21) # = bx + G' * D^-1 * bzl + 2 * As'(bXX) blas.gemv(Gs, z, x[0], trans='T', alpha=1.0, beta=1.0) blas.gemv(As, bz21, x[0], trans='T', alpha=2.0, beta=1.0) # x[0] := H \ x[0] # = ux lapack.potrs(H, x[0]) # uuzl = Gs*ux - D^-1 * bzl blas.gemv(Gs, x[0], z, alpha=1.0, beta=-1.0) # bz21 := V2 * L^-T(-As(ux) + bz21) * V1 # = X blas.gemv(As, x[0], bz21, alpha=-1.0, beta=1.0) blas.tbsv(DV, bz21, n=p * q, k=0, ldA=1) blas.copy(bz21, tmp) base.gemv(P, tmp, bz21, alpha=-1.0, beta=1.0, trans='T') blas.gemm(V2, bz21, tmp) blas.gemm(tmp, V1, bz21) # zs := -zs + r' * [ 0, X'; X, 0 ] * r # = r' * [ -bX1, X'; X, -bX2 ] * r. # a := bz21 * [ r11, r12 ] # = X * [ r11, r12 ] blas.gemm(bz21, r, a, m=p, n=p + q, k=q, ldA=p, ldC=p + q) # z := -z + [ r21, r22 ]' * a + a' * [ r21, r22 ] # = rti' * uzs * rti blas.syr2k(r, a, z, trans='T', beta=-1.0, n=p + q, k=p, offsetA=q, offsetC=m, ldB=p + q, ldC=p + q) # uX1 = -Z11 - bzs_11 # = -(r*zs*r')_11 - bzs_11 # uX2 = -Z22 - bzs_22 # = -(r*zs*r')_22 - bzs_22 blas.copy(bz11, x[1]) blas.copy(bz22, x[2]) # scale diagonal of zs by 1/2 blas.scal(0.5, z, inc=p + q + 1, offset=m) # a := r*tril(zs) blas.copy(r, a) blas.trmm(z, a, side='R', m=p + q, n=p + q, ldA=p + q, ldB=p + q, offsetA=m) # x[1] := -x[1] - a[:q,:] * r[:q, :]' - r[:q,:] * a[:q,:]' # = -bzs_11 - (r*zs*r')_11 blas.syr2k(a, r, x[1], n=q, alpha=-1.0, beta=-1.0) # x[2] := -x[2] - a[q:,:] * r[q:, :]' - r[q:,:] * a[q:,:]' # = -bzs_22 - (r*zs*r')_22 blas.syr2k(a, r, x[2], n=p, alpha=-1.0, beta=-1.0, offsetA=q, offsetB=q) # scale diagonal of zs by 1/2 blas.scal(2.0, z, inc=p + q + 1, offset=m) return f
def __M2T(L, U, inv=False): n = L.symb.n snpost = L.symb.snpost snptr = L.symb.snptr chptr = L.symb.chptr chidx = L.symb.chidx relptr = L.symb.relptr relidx = L.symb.relidx blkptr = L.symb.blkptr stack = [] alpha = 1.0 if inv: alpha = -1.0 for Ut in U: for k in reversed(list(snpost)): nn = snptr[k + 1] - snptr[k] # |Nk| na = relptr[k + 1] - relptr[k] # |Ak| nj = na + nn # allocate F and copy Ut_{Jk,Nk} to leading columns of F F = matrix(0.0, (nj, nj)) lapack.lacpy(Ut.blkval, F, offsetA=blkptr[k], ldA=nj, m=nj, n=nn, uplo='L') # if supernode k is not a root node: if na > 0: # copy Vk to 2,2 block of F Vk = stack.pop() lapack.lacpy(Vk, F, offsetB=nn * (nj + 1), m=na, n=na, uplo='L') ## compute T_{Jk,Nk} (stored in leading columns of F) if inv: # if supernode k has any children: for ii in range(chptr[k], chptr[k + 1]): stack.append( frontal_get_update(F, relidx, relptr, chidx[ii])) # if supernode k is not a root node: if na > 0: # F_{Nk,Nk} := F_{Nk,Nk} - alpha*F_{Ak,Nk}'*L_{Ak,Nk} blas.gemm(F, L.blkval, F, beta = 1.0, alpha = -alpha, m = nn, n = nn, k = na,\ transA = 'T', ldA = nj, ldB = nj, ldC = nj,\ offsetA = nn, offsetB = blkptr[k]+nn, offsetC = 0) # F_{Ak,Nk} := F_{Ak,Nk} - alpha*F_{Ak,Ak}*L_{Ak,Nk} blas.symm(F, L.blkval, F, side = 'L', beta = 1.0, alpha = -alpha,\ m = na, n = nn, ldA = nj, ldB = nj, ldC = nj,\ offsetA = (nj+1)*nn, offsetB = blkptr[k]+nn, offsetC = nn) # F_{Nk,Nk} := F_{Nk,Nk} - alpha*L_{Ak,Nk}'*F_{Ak,Nk} blas.gemm(L.blkval, F, F, beta = 1.0, alpha = -alpha, m = nn, n = nn, k = na,\ transA = 'T', ldA = nj, ldB = nj, ldC = nj,\ offsetA = blkptr[k]+nn, offsetB = nn, offsetC = 0) # copy the leading Nk columns of frontal matrix to Ut lapack.lacpy(F, Ut.blkval, offsetB=blkptr[k], ldB=nj, m=nj, n=nn, uplo='L') if not inv: # if supernode k has any children: for ii in range(chptr[k], chptr[k + 1]): stack.append( frontal_get_update(F, relidx, relptr, chidx[ii])) return
def __Y2K(L, U, inv=False): n = L.symb.n snpost = L.symb.snpost snptr = L.symb.snptr chptr = L.symb.chptr chidx = L.symb.chidx relptr = L.symb.relptr relidx = L.symb.relidx blkptr = L.symb.blkptr stack = [] alpha = 1.0 if inv: alpha = -1.0 for Ut in U: for k in snpost: nn = snptr[k + 1] - snptr[k] # |Nk| na = relptr[k + 1] - relptr[k] # |Ak| nj = na + nn # allocate F and copy Ut_{Jk,Nk} to leading columns of F F = matrix(0.0, (nj, nj)) lapack.lacpy(Ut.blkval, F, offsetA=blkptr[k], ldA=nj, m=nj, n=nn, uplo='L') if not inv: # add update matrices from children to frontal matrix for i in range(chptr[k + 1] - 1, chptr[k] - 1, -1): Ui = stack.pop() frontal_add_update(F, Ui, relidx, relptr, chidx[i]) if na > 0: # F_{Ak,Ak} := F_{Ak,Ak} - alpha*L_{Ak,Nk}*F_{Ak,Nk}' blas.gemm(L.blkval, F, F, beta = 1.0, alpha = -alpha, m = na, n = na, k = nn,\ ldA = nj, ldB = nj, ldC = nj, transB = 'T',\ offsetA = blkptr[k]+nn, offsetB = nn, offsetC = nn*(nj+1)) # F_{Ak,Nk} := F_{Ak,Nk} - alpha*L_{Ak,Nk}*F_{Nk,Nk} blas.symm(F, L.blkval, F, side = 'R', beta = 1.0, alpha = -alpha,\ m = na, n = nn, ldA = nj, ldB = nj, ldC = nj,\ offsetA = 0, offsetB = blkptr[k]+nn, offsetC = nn) # F_{Ak,Ak} := F_{Ak,Ak} - alpha*F_{Ak,Nk}*L_{Ak,Nk}' blas.gemm(F, L.blkval, F, beta = 1.0, alpha = -alpha, m = na, n = na, k = nn,\ ldA = nj, ldB = nj, ldC = nj, transB = 'T',\ offsetA = nn, offsetB = blkptr[k]+nn, offsetC = nn*(nj+1)) if inv: # add update matrices from children to frontal matrix for i in range(chptr[k + 1] - 1, chptr[k] - 1, -1): Ui = stack.pop() frontal_add_update(F, Ui, relidx, relptr, chidx[i]) if na > 0: # add Uk' to stack Uk = matrix(0.0, (na, na)) lapack.lacpy(F, Uk, m=na, n=na, offsetA=nn * (nj + 1), ldA=nj, uplo='L') stack.append(Uk) # copy the leading Nk columns of frontal matrix to blkval lapack.lacpy(F, Ut.blkval, uplo='L', offsetB=blkptr[k], m=nj, n=nn, ldB=nj) return
def __scale(L, Y, U, adj=False, inv=False, factored_updates=True): n = L.symb.n snpost = L.symb.snpost snptr = L.symb.snptr chptr = L.symb.chptr chidx = L.symb.chidx relptr = L.symb.relptr relidx = L.symb.relidx blkptr = L.symb.blkptr stack = [] for k in reversed(list(snpost)): nn = snptr[k + 1] - snptr[k] # |Nk| na = relptr[k + 1] - relptr[k] # |Ak| nj = na + nn F = matrix(0.0, (nj, nj)) lapack.lacpy(Y.blkval, F, m=nj, n=nn, ldA=nj, offsetA=blkptr[k], uplo='L') # if supernode k is not a root node: if na > 0: # copy Vk to 2,2 block of F Vk = stack.pop() lapack.lacpy(Vk, F, ldB=nj, offsetB=nn * (nj + 1), m=na, n=na, uplo='L') # if supernode k has any children: for ii in range(chptr[k], chptr[k + 1]): i = chidx[ii] if factored_updates: r = relidx[relptr[i]:relptr[i + 1]] stack.append(frontal_get_update_factor(F, r, nn, na)) else: stack.append(frontal_get_update(F, relidx, relptr, i)) # if supernode k is not a root node: if na > 0: if factored_updates: # In this case we have Vk = Lk'*Lk if adj is False: trns = 'N' elif adj is True: trns = 'T' else: # factorize Vk lapack.potrf(F, offsetA=nj * nn + nn, n=na, ldA=nj) # In this case we have Vk = Lk*Lk' if adj is False: trns = 'T' elif adj is True: trns = 'N' if adj is False: tr = ['T', 'N'] elif adj is True: tr = ['N', 'T'] if inv is False: for Ut in U: # symmetrize (1,1) block of Ut_{k} and scale U11 = matrix(0.0, (nn, nn)) lapack.lacpy(Ut.blkval, U11, offsetA=blkptr[k], m=nn, n=nn, ldA=nj, uplo='L') U11 += U11.T U11[::nn + 1] *= 0.5 lapack.lacpy(U11, Ut.blkval, offsetB=blkptr[k], m=nn, n=nn, ldB=nj, uplo='N') blas.trsm(L.blkval, Ut.blkval, side = 'R', transA = tr[0],\ m = nj, n = nn, offsetA = blkptr[k], ldA = nj,\ offsetB = blkptr[k], ldB = nj) blas.trsm(L.blkval, Ut.blkval, m = nn, n = nn, transA = tr[1],\ offsetA = blkptr[k], offsetB = blkptr[k],\ ldA = nj, ldB = nj) # zero-out strict upper triangular part of {Nj,Nj} block for i in range(1, nn): blas.scal(0.0, Ut.blkval, offset=blkptr[k] + nj * i, n=i) if na > 0: blas.trmm(F, Ut.blkval, m = na, n = nn, transA = trns,\ offsetA = nj*nn+nn, ldA = nj,\ offsetB = blkptr[k]+nn, ldB = nj) else: # inv is True for Ut in U: # symmetrize (1,1) block of Ut_{k} and scale U11 = matrix(0.0, (nn, nn)) lapack.lacpy(Ut.blkval, U11, offsetA=blkptr[k], m=nn, n=nn, ldA=nj, uplo='L') U11 += U11.T U11[::nn + 1] *= 0.5 lapack.lacpy(U11, Ut.blkval, offsetB=blkptr[k], m=nn, n=nn, ldB=nj, uplo='N') blas.trmm(L.blkval, Ut.blkval, side = 'R', transA = tr[0],\ m = nj, n = nn, offsetA = blkptr[k], ldA = nj,\ offsetB = blkptr[k], ldB = nj) blas.trmm(L.blkval, Ut.blkval, m = nn, n = nn, transA = tr[1],\ offsetA = blkptr[k], offsetB = blkptr[k],\ ldA = nj, ldB = nj) # zero-out strict upper triangular part of {Nj,Nj} block for i in range(1, nn): blas.scal(0.0, Ut.blkval, offset=blkptr[k] + nj * i, n=i) if na > 0: blas.trsm(F, Ut.blkval, m = na, n = nn, transA = trns,\ offsetA = nj*nn+nn, ldA = nj,\ offsetB = blkptr[k]+nn, ldB = nj) return
def completion(X, factored_updates = True): """ Supernodal multifrontal maximum determinant positive definite matrix completion. The routine computes the Cholesky factor :math:`L` of the inverse of the maximum determinant positive definite matrix completion of :math:`X`:, i.e., .. math:: P( S^{-1} ) = X where :math:`S = LL^T`. On exit, the argument `X` contains the lower-triangular Cholesky factor :math:`L`. The optional argument `factored_updates` can be used to enable (if True) or disable (if False) updating of intermediate factorizations. :param X: :py:class:`cspmatrix` :param factored_updates: boolean """ assert isinstance(X, cspmatrix) and X.is_factor is False, "X must be a cspmatrix" n = X.symb.n snpost = X.symb.snpost snptr = X.symb.snptr chptr = X.symb.chptr chidx = X.symb.chidx relptr = X.symb.relptr relidx = X.symb.relidx blkptr = X.symb.blkptr blkval = X.blkval stack = [] for k in reversed(list(snpost)): nn = snptr[k+1]-snptr[k] # |Nk| na = relptr[k+1]-relptr[k] # |Ak| nj = na + nn # allocate F and copy X_{Jk,Nk} to leading columns of F F = matrix(0.0, (nj,nj)) lapack.lacpy(blkval, F, offsetA = blkptr[k], ldA = nj, m = nj, n = nn, uplo = 'L') # if supernode k is not a root node: if na > 0: # copy Vk to 2,2 block of F Vk = stack.pop() lapack.lacpy(Vk, F, offsetB = nn*nj+nn, m = na, n = na, uplo = 'L') # if supernode k has any children: for ii in range(chptr[k],chptr[k+1]): i = chidx[ii] if factored_updates: r = relidx[relptr[i]:relptr[i+1]] stack.append(frontal_get_update_factor(F,r,nn,na)) else: stack.append(frontal_get_update(F,relidx,relptr,i)) # if supernode k is not a root node: if na > 0: if factored_updates: # In this case we have Vk = Lk'*Lk trL1 = 'T' trL2 = 'N' else: # factorize Vk lapack.potrf(F, offsetA = nj*nn+nn, n = na, ldA = nj) # In this case we have Vk = Lk*Lk' trL1 = 'N' trL2 = 'T' # compute L_{Ak,Nk} and inv(D_{Nk,Nk}) = S_{Nk,Nk} - S_{Ak,Nk}'*L_{Ak,Nk} lapack.trtrs(F, blkval, offsetA = nj*nn+nn, trans = trL1,\ offsetB = blkptr[k]+nn, ldB = nj, n = na, nrhs = nn) blas.syrk(blkval, blkval, n = nn, k = na, trans= 'T', alpha = -1.0, beta = 1.0, offsetA = blkptr[k]+nn, offsetC = blkptr[k], ldA = nj, ldC = nj) lapack.trtrs(F, blkval, offsetA = nj*nn+nn, trans = trL2,\ offsetB = blkptr[k]+nn, ldB = nj, n = na, nrhs = nn) for i in range(nn): blas.scal(-1.0, blkval, n = na, offset = blkptr[k] + i*nj + nn) # factorize inv(D_{Nk,Nk}) as R*R' so that D_{Nk,Nk} = L*L' with L = inv(R)' lapack.lacpy(blkval, F, offsetA = blkptr[k], ldA = nj,\ ldB = nj, m = nn, n = nn, uplo = 'L') # copy -- FIX! F[:nn,:nn] = matrix(F[:nn,:nn][::-1],(nn,nn)) # reverse -- FIX! lapack.potrf(F, ldA = nj, n = nn, uplo = 'U') # factorize F[:nn,:nn] = matrix(F[:nn,:nn][::-1],(nn,nn)) # reverse -- FIX! lapack.lacpy(F, blkval, offsetB = blkptr[k], ldA = nj,\ ldB = nj, m = nn, n = nn, uplo = 'L') # copy -- FIX! # compute L = inv(R') lapack.trtri(blkval, offsetA = blkptr[k], ldA = nj, n = nn) X._is_factor = True return
def mrcompletion(A, reordered=True): """ Minimum rank positive semidefinite completion. The routine takes a positive semidefinite cspmatrix :math:`A` and returns a dense matrix :math:`Y` with :math:`r` columns that satisfies .. math:: P( YY^T ) = A where .. math:: r = \max_{i} |\gamma_i| is the clique number (the size of the largest clique). :param A: :py:class:`cspmatrix` :param reordered: boolean """ assert isinstance(A, cspmatrix) and A.is_factor is False, "A must be a cspmatrix" symb = A.symb n = symb.n snpost = symb.snpost snptr = symb.snptr chptr = symb.chptr chidx = symb.chidx relptr = symb.relptr relidx = symb.relidx blkptr = symb.blkptr blkval = A.blkval stack = [] r = 0 maxr = symb.clique_number Y = matrix(0.0,(n,maxr)) # storage for factorization Z = matrix(0.0,(maxr,maxr)) # storage for EVD of cliques w = matrix(0.0,(maxr,1)) # storage for EVD of cliques P = matrix(0.0,(maxr,maxr)) # storage for left singular vectors Q1t = matrix(0.0,(maxr,maxr)) # storage for right singular vectors (1) Q2t = matrix(0.0,(maxr,maxr)) # storage for right singular vectors (2) S = matrix(0.0,(maxr,1)) # storage for singular values V = matrix(0.0,(maxr,maxr)) Ya = matrix(0.0,(maxr,maxr)) # visit supernodes in reverse topological order for k in range(symb.Nsn-1,-1,-1): nn = snptr[k+1]-snptr[k] # |Nk| na = relptr[k+1]-relptr[k] # |Ak| nj = na + nn # allocate F and copy X_{Jk,Nk} to leading columns of F F = matrix(0.0, (nj,nj)) lapack.lacpy(blkval, F, offsetA = blkptr[k], ldA = nj, m = nj, n = nn, uplo = 'L') # if supernode k is not a root node: if na > 0: # copy Vk to 2,2 block of F Vk = stack.pop() lapack.lacpy(Vk, F, offsetB = nn*nj+nn, m = na, n = na, uplo = 'L') # if supernode k has any children: for ii in range(chptr[k],chptr[k+1]): stack.append(frontal_get_update(F,relidx,relptr,chidx[ii])) # Compute factorization of F lapack.syevr(F, w, jobz='V', range='A', uplo='L', Z=Z, n=nj,ldZ=maxr) rk = sum([1 for wi in w[:nj] if wi > 1e-14*w[nj-1]]) # determine rank of clique k r = max(rk,r) # update rank # Scale last rk cols of Z and copy parts to Yn for j in range(nj-rk,nj): Z[:nj,j] *= sqrt(w[j]) In = symb.snrowidx[symb.sncolptr[k]:symb.sncolptr[k]+nn] Y[In,:rk] = Z[:nn,nj-rk:nj] # if supernode k is not a root node: if na > 0: # Extract data Ia = symb.snrowidx[symb.sncolptr[k]+nn:symb.sncolptr[k+1]] Ya[:na,:r] = Y[Ia,:r] V[:na,:rk] = Z[nn:nj,nj-rk:nj] V[:na,rk:r] *= 0.0 # Compute SVDs: V = P*S*Q1t and Ya = P*S*Q2t lapack.gesvd(V,S,jobu='A',jobvt='A',U=P,Vt=Q1t,ldU=maxr,ldVt=maxr,m=na,n=r,ldA=maxr) lapack.gesvd(Ya,S,jobu='N',jobvt='A',Vt=Q2t,ldVt=maxr,m=na,n=r,ldA=maxr) # Scale Q2t for i in range(min(na,rk)): if S[i] > 1e-14*S[0]: Q2t[i,:r] = P[:na,i].T*Y[Ia,:r]/S[i] # Scale Yn Y[In,:r] = Y[In,:r]*Q1t[:r,:r].T*Q2t[:r,:r] if reordered: return Y[:,:r] else: return Y[symb.ip,:r]
def projected_inverse(L): """ Supernodal multifrontal projected inverse. The routine computes the projected inverse .. math:: Y = P(L^{-T}L^{-1}) where :math:`L` is a Cholesky factor. On exit, the argument :math:`L` contains the projected inverse :math:`Y`. :param L: :py:class:`cspmatrix` (factor) """ assert isinstance(L, cspmatrix) and L.is_factor is True, "L must be a cspmatrix factor" n = L.symb.n snpost = L.symb.snpost snptr = L.symb.snptr chptr = L.symb.chptr chidx = L.symb.chidx relptr = L.symb.relptr relidx = L.symb.relidx blkptr = L.symb.blkptr blkval = L.blkval stack = [] for k in reversed(list(snpost)): nn = snptr[k+1]-snptr[k] # |Nk| na = relptr[k+1]-relptr[k] # |Ak| nj = na + nn # invert factor of D_{Nk,Nk} lapack.trtri(blkval, offsetA = blkptr[k], ldA = nj, n = nn) # zero-out strict upper triangular part of {Nj,Nj} block (just in case!) for i in range(1,nn): blas.scal(0.0, blkval, offset = blkptr[k] + nj*i, n = i) # compute inv(D_{Nk,Nk}) (store in 1,1 block of F) F = matrix(0.0, (nj,nj)) blas.syrk(blkval, F, trans = 'T', offsetA = blkptr[k], ldA = nj, n = nn, k = nn) # if supernode k is not a root node: if na > 0: # copy "update matrix" to 2,2 block of F Vk = stack.pop() lapack.lacpy(Vk, F, ldB = nj, offsetB = nn*nj+nn, m = na, n = na, uplo = 'L') # compute S_{Ak,Nk} = -Vk*L_{Ak,Nk}; store in 2,1 block of F blas.symm(Vk, blkval, F, m = na, n = nn, offsetB = blkptr[k]+nn,\ ldB = nj, offsetC = nn, ldC = nj, alpha = -1.0) # compute S_nn = inv(D_{Nk,Nk}) - S_{Ak,Nk}'*L_{Ak,Nk}; store in 1,1 block of F blas.gemm(F, blkval, F, transA = 'T', m = nn, n = nn, k = na,\ offsetA = nn, alpha = -1.0, beta = 1.0,\ offsetB = blkptr[k]+nn, ldB = nj) # extract update matrices if supernode k has any children for ii in range(chptr[k],chptr[k+1]): i = chidx[ii] stack.append(frontal_get_update(F, relidx, relptr, i)) # copy S_{Jk,Nk} (i.e., 1,1 and 2,1 blocks of F) to blkval lapack.lacpy(F, blkval, m = nj, n = nn, offsetB = blkptr[k], ldB = nj, uplo = 'L') L._is_factor = False return