def makefig1(): pylab.figure(1, facecolor='w', figsize=(6,6)) pylab.plot(V[0,:].T, V[1,:].T, 'b-') nopts = 1000 angles = matrix( [a*2.0*pi/nopts for a in range(nopts) ], (1,nopts) ) circle = matrix(0.0, (2,nopts)) circle[0,:], circle[1,:] = cos(angles), sin(angles) for k in range(len(C)): c = C[k] pylab.plot([c[0]], [c[1]], 'og') pylab.text(c[0], c[1], "s%d" %k) pylab.plot(c[0] + circle[0,:].T, c[1]+circle[1,:].T, 'g:') if k >= 1: v = V[:,k-1] if k==1: dir = 0.5 * (C[k] + C[-1]) - v else: dir = 0.5 * (C[k] + C[k-1]) - v pylab.plot([v[0], v[0] + 5*dir[0]], [v[1], v[1] + 5*dir[1]], 'b-') ellipse = +circle blas.trsm(L, ellipse, transA='T') pylab.plot(xc[0] + ellipse[0,:].T, xc[1]+ellipse[1,:].T, 'r-') for Xk in X: pylab.plot([Xk[0]], [Xk[1]], 'ro') pylab.axis([-5, 5, -5, 5]) pylab.title('Geometrical interpretation of Chebyshev bound (fig. 7.7)') pylab.axis('off')
def makefig1(): pylab.figure(1, facecolor='w', figsize=(6, 6)) pylab.plot(V[0, :].T, V[1, :].T, 'b-') nopts = 1000 angles = matrix([a * 2.0 * pi / nopts for a in range(nopts)], (1, nopts)) circle = matrix(0.0, (2, nopts)) circle[0, :], circle[1, :] = cos(angles), sin(angles) for k in range(len(C)): c = C[k] pylab.plot([c[0]], [c[1]], 'ow') pylab.text(c[0], c[1], "s%d" % k) pylab.plot(c[0] + circle[0, :].T, c[1] + circle[1, :].T, 'g:') if k >= 1: v = V[:, k - 1] if k == 1: dir = 0.5 * (C[k] + C[-1]) - v else: dir = 0.5 * (C[k] + C[k - 1]) - v pylab.plot([v[0], v[0] + 5 * dir[0]], [v[1], v[1] + 5 * dir[1]], 'b-') ellipse = +circle blas.trsm(L, ellipse, transA='T') pylab.plot(xc[0] + ellipse[0, :].T, xc[1] + ellipse[1, :].T, 'r-') for Xk in X: pylab.plot([Xk[0]], [Xk[1]], 'ro') pylab.axis([-5, 5, -5, 5]) pylab.title('Geometrical interpretation of Chebyshev bound (fig. 7.7)') pylab.axis('off')
def F(x=None, z=None): if x is None: return 0, matrix(1.0, (n,1)) X = V * spdiag(x) * V.T L = +X try: lapack.potrf(L) except ArithmeticError: return None f = - 2.0 * (log(L[0,0]) + log(L[1,1])) W = +V blas.trsm(L, W) gradf = matrix(-1.0, (1,2)) * W**2 if z is None: return f, gradf H = matrix(0.0, (n,n)) blas.syrk(W, H, trans='T') return f, gradf, z[0] * H**2
def test_trsm(self): L = cp.cspmatrix(self.symb) + self.A cp.cholesky(L) B = cp.eye(self.symb.n) Bt = matrix(B)[self.symb.p, :] Lt = matrix(L.spmatrix(reordered=True, symmetric=False)) cp.trsm(L, B) blas.trsm(Lt, Bt) diff = list((B - Bt[self.symb.ip, :])[:]) self.assertAlmostEqualLists(diff, len(diff) * [0.0]) B = cp.eye(self.symb.n) Bt = matrix(B)[self.symb.p, :] Lt = matrix(L.spmatrix(reordered=True, symmetric=False)) cp.trsm(L, B, trans='T') blas.trsm(Lt, Bt, transA='T') diff = list(B - Bt[self.symb.ip, :])[:] self.assertAlmostEqualLists(diff, len(diff) * [0.0])
def test_trsm(self): L = cp.cspmatrix(self.symb) + self.A cp.cholesky(L) B = cp.eye(self.symb.n) Bt = matrix(B)[self.symb.p,:] Lt = matrix(L.spmatrix(reordered=True,symmetric=False)) cp.trsm(L, B) blas.trsm(Lt,Bt) diff = list((B-Bt[self.symb.ip,:])[:]) self.assertAlmostEqualLists(diff, len(diff)*[0.0]) B = cp.eye(self.symb.n) Bt = matrix(B)[self.symb.p,:] Lt = matrix(L.spmatrix(reordered=True,symmetric=False)) cp.trsm(L, B, trans = 'T') blas.trsm(Lt,Bt,transA='T') diff = list(B-Bt[self.symb.ip,:])[:] self.assertAlmostEqualLists(diff, len(diff)*[0.0])
def factor(W, H=None, Df=None): if F['firstcall']: if type(G) is matrix: F['Gs'] = matrix(0.0, G.size) else: F['Gs'] = spmatrix(0.0, G.I, G.J, G.size) if mnl: if type(Df) is matrix: F['Dfs'] = matrix(0.0, Df.size) else: F['Dfs'] = spmatrix(0.0, Df.I, Df.J, Df.size) if (mnl and type(Df) is matrix) or type(G) is matrix or \ type(H) is matrix: F['S'] = matrix(0.0, (n, n)) F['K'] = matrix(0.0, (p, p)) else: F['S'] = spmatrix([], [], [], (n, n), 'd') F['Sf'] = None if type(A) is matrix: F['K'] = matrix(0.0, (p, p)) else: F['K'] = spmatrix([], [], [], (p, p), 'd') # Dfs = Wnl^{-1} * Df if mnl: base.gemm(spmatrix(W['dnli'], list(range(mnl)), list(range(mnl))), Df, F['Dfs'], partial=True) # Gs = Wl^{-1} * G. base.gemm(spmatrix(W['di'], list(range(ml)), list(range(ml))), G, F['Gs'], partial=True) if F['firstcall']: base.syrk(F['Gs'], F['S'], trans='T') if mnl: base.syrk(F['Dfs'], F['S'], trans='T', beta=1.0) if H is not None: F['S'] += H try: if type(F['S']) is matrix: lapack.potrf(F['S']) else: F['Sf'] = cholmod.symbolic(F['S']) cholmod.numeric(F['S'], F['Sf']) except ArithmeticError: F['singular'] = True if type(A) is matrix and type(F['S']) is spmatrix: F['S'] = matrix(0.0, (n, n)) base.syrk(F['Gs'], F['S'], trans='T') if mnl: base.syrk(F['Dfs'], F['S'], trans='T', beta=1.0) base.syrk(A, F['S'], trans='T', beta=1.0) if H is not None: F['S'] += H if type(F['S']) is matrix: lapack.potrf(F['S']) else: F['Sf'] = cholmod.symbolic(F['S']) cholmod.numeric(F['S'], F['Sf']) F['firstcall'] = False else: base.syrk(F['Gs'], F['S'], trans='T', partial=True) if mnl: base.syrk(F['Dfs'], F['S'], trans='T', beta=1.0, partial=True) if H is not None: F['S'] += H if F['singular']: base.syrk(A, F['S'], trans='T', beta=1.0, partial=True) if type(F['S']) is matrix: lapack.potrf(F['S']) else: cholmod.numeric(F['S'], F['Sf']) if type(F['S']) is matrix: # Asct := L^{-1}*A'. Factor K = Asct'*Asct. if type(A) is matrix: Asct = A.T else: Asct = matrix(A.T) blas.trsm(F['S'], Asct) blas.syrk(Asct, F['K'], trans='T') lapack.potrf(F['K']) else: # Asct := L^{-1}*P*A'. Factor K = Asct'*Asct. if type(A) is matrix: Asct = A.T cholmod.solve(F['Sf'], Asct, sys=7) cholmod.solve(F['Sf'], Asct, sys=4) blas.syrk(Asct, F['K'], trans='T') lapack.potrf(F['K']) else: Asct = cholmod.spsolve(F['Sf'], A.T, sys=7) Asct = cholmod.spsolve(F['Sf'], Asct, sys=4) base.syrk(Asct, F['K'], trans='T') Kf = cholmod.symbolic(F['K']) cholmod.numeric(F['K'], Kf) def solve(x, y, z): # Solve # # [ H A' GG'*W^{-1} ] [ ux ] [ bx ] # [ A 0 0 ] * [ uy ] = [ by ] # [ W^{-T}*GG 0 -I ] [ W*uz ] [ W^{-T}*bz ] # # and return ux, uy, W*uz. # # If not F['singular']: # # K*uy = A * S^{-1} * ( bx + GG'*W^{-1}*W^{-T}*bz ) - by # S*ux = bx + GG'*W^{-1}*W^{-T}*bz - A'*uy # W*uz = W^{-T} * ( GG*ux - bz ). # # If F['singular']: # # K*uy = A * S^{-1} * ( bx + GG'*W^{-1}*W^{-T}*bz + A'*by ) # - by # S*ux = bx + GG'*W^{-1}*W^{-T}*bz + A'*by - A'*y. # W*uz = W^{-T} * ( GG*ux - bz ). # z := W^{-1} * z = W^{-1} * bz scale(z, W, trans='T', inverse='I') # If not F['singular']: # x := L^{-1} * P * (x + GGs'*z) # = L^{-1} * P * (x + GG'*W^{-1}*W^{-T}*bz) # # If F['singular']: # x := L^{-1} * P * (x + GGs'*z + A'*y)) # = L^{-1} * P * (x + GG'*W^{-1}*W^{-T}*bz + A'*y) if mnl: base.gemv(F['Dfs'], z, x, trans='T', beta=1.0) base.gemv(F['Gs'], z, x, offsetx=mnl, trans='T', beta=1.0) if F['singular']: base.gemv(A, y, x, trans='T', beta=1.0) if type(F['S']) is matrix: blas.trsv(F['S'], x) else: cholmod.solve(F['Sf'], x, sys=7) cholmod.solve(F['Sf'], x, sys=4) # y := K^{-1} * (Asc*x - y) # = K^{-1} * (A * S^{-1} * (bx + GG'*W^{-1}*W^{-T}*bz) - by) # (if not F['singular']) # = K^{-1} * (A * S^{-1} * (bx + GG'*W^{-1}*W^{-T}*bz + # A'*by) - by) # (if F['singular']). base.gemv(Asct, x, y, trans='T', beta=-1.0) if type(F['K']) is matrix: lapack.potrs(F['K'], y) else: cholmod.solve(Kf, y) # x := P' * L^{-T} * (x - Asc'*y) # = S^{-1} * (bx + GG'*W^{-1}*W^{-T}*bz - A'*y) # (if not F['singular']) # = S^{-1} * (bx + GG'*W^{-1}*W^{-T}*bz + A'*by - A'*y) # (if F['singular']) base.gemv(Asct, y, x, alpha=-1.0, beta=1.0) if type(F['S']) is matrix: blas.trsv(F['S'], x, trans='T') else: cholmod.solve(F['Sf'], x, sys=5) cholmod.solve(F['Sf'], x, sys=8) # W*z := GGs*x - z = W^{-T} * (GG*x - bz) if mnl: base.gemv(F['Dfs'], x, z, beta=-1.0) base.gemv(F['Gs'], x, z, beta=-1.0, offsety=mnl) return solve
pylab.plot(X[:, 0], X[:, 1], 'ko', X[:, 0], X[:, 1], '-k') # Ellipsoid in the form { x | || L' * (x-c) ||_2 <= 1 } L = +A lapack.potrf(L) c = +b lapack.potrs(L, c) # 1000 points on the unit circle nopts = 1000 angles = matrix([a * 2.0 * pi / nopts for a in range(nopts)], (1, nopts)) circle = matrix(0.0, (2, nopts)) circle[0, :], circle[1, :] = cos(angles), sin(angles) # ellipse = L^-T * circle + c blas.trsm(L, circle, transA='T') ellipse = circle + c[:, nopts * [0]] ellipse2 = 0.5 * circle + c[:, nopts * [0]] pylab.plot(ellipse[0, :].T, ellipse[1, :].T, 'k-') pylab.fill(ellipse2[0, :].T, ellipse2[1, :].T, facecolor='#F0F0F0') pylab.title('Loewner-John ellipsoid (fig 8.3)') pylab.axis('equal') pylab.axis('off') # Maximum volume enclosed ellipsoid # # minimize -log det B # subject to ||B * gk||_2 + gk'*c <= hk, k=1,...,m # # with variables B and c.
def F(W): """ Create a solver for the linear equations C * ux + G' * uzl - 2*A'(uzs21) = bx -uzs11 = bX1 -uzs22 = bX2 G * ux - Dl^2 * uzl = bzl [ -uX1 -A(ux)' ] [ uzs11 uzs21' ] [ ] - r*r' * [ ] * r*r' = bzs [ -A(ux) -uX2 ] [ uzs21 uzs22 ] where Dl = diag(W['l']), r = W['r'][0]. On entry, x = (bx, bX1, bX2) and z = [ bzl; bzs[:] ]. On exit, x = (ux, uX1, uX2) and z = [ Dl*uzl; (r'*uzs*r)[:] ]. 1. Compute matrices V1, V2 such that (with T = r*r') [ V1 0 ] [ T11 T21' ] [ V1' 0 ] [ I S' ] [ ] [ ] [ ] = [ ] [ 0 V2' ] [ T21 T22 ] [ 0 V2 ] [ S I ] and S = [ diag(s); 0 ], s a positive q-vector. 2. Factor the mapping X -> X + S * X' * S: X + S * X' * S = L( L'( X )). 3. Compute scaled mappings: a matrix As with as its columns the coefficients of the scaled mapping L^-1( V2' * A() * V1' ) and the matrix Gs = Dl^-1 * G. 4. Cholesky factorization of H = C + Gs'*Gs + 2*As'*As. """ # 1. Compute V1, V2, s. r = W['r'][0] # LQ factorization R[:q, :] = L1 * Q1. lapack.lacpy(r, Q1, m = q) lapack.gelqf(Q1, tau1) lapack.lacpy(Q1, L1, n = q, uplo = 'L') lapack.orglq(Q1, tau1) # LQ factorization R[q:, :] = L2 * Q2. lapack.lacpy(r, Q2, m = p, offsetA = q) lapack.gelqf(Q2, tau2) lapack.lacpy(Q2, L2, n = p, uplo = 'L') lapack.orglq(Q2, tau2) # V2, V1, s are computed from an SVD: if # # Q2 * Q1' = U * diag(s) * V', # # then V1 = V' * L1^-1 and V2 = L2^-T * U. # T21 = Q2 * Q1.T blas.gemm(Q2, Q1, T21, transB = 'T') # SVD T21 = U * diag(s) * V'. Store U in V2 and V' in V1. lapack.gesvd(T21, s, jobu = 'A', jobvt = 'A', U = V2, Vt = V1) # # Q2 := Q2 * Q1' without extracting Q1; store T21 in Q2 # this will requires lapack.ormlq or lapack.unmlq # V2 = L2^-T * U blas.trsm(L2, V2, transA = 'T') # V1 = V' * L1^-1 blas.trsm(L1, V1, side = 'R') # 2. Factorization X + S * X' * S = L( L'( X )). # # The factor L is stored as a diagonal matrix D and a sparse lower # triangular matrix P, such that # # L(X)[:] = D**-1 * (I + P) * X[:] # L^-1(X)[:] = D * (I - P) * X[:]. # SS is q x q with SS[i,j] = si*sj. blas.scal(0.0, SS) blas.syr(s, SS) # For a p x q matrix X, P*X[:] is Y[:] where # # Yij = si * sj * Xji if i < j # = 0 otherwise. # P.V = SS[Itril2] # For a p x q matrix X, D*X[:] is Y[:] where # # Yij = Xij / sqrt( 1 - si^2 * sj^2 ) if i < j # = Xii / sqrt( 1 + si^2 ) if i = j # = Xij otherwise. # DV[Idiag] = sqrt(1.0 + SS[::q+1]) DV[Itriu] = sqrt(1.0 - SS[Itril3]**2) D.V = DV**-1 # 3. Scaled linear mappings # Ask := V2' * Ask * V1' blas.scal(0.0, As) base.axpy(A, As) for i in xrange(n): # tmp := V2' * As[i, :] blas.gemm(V2, As, tmp, transA = 'T', m = p, n = q, k = p, ldB = p, offsetB = i*p*q) # As[:,i] := tmp * V1' blas.gemm(tmp, V1, As, transB = 'T', m = p, n = q, k = q, ldC = p, offsetC = i*p*q) # As := D * (I - P) * As # = L^-1 * As. blas.copy(As, As2) base.gemm(P, As, As2, alpha = -1.0, beta = 1.0) base.gemm(D, As2, As) # Gs := Dl^-1 * G blas.scal(0.0, Gs) base.axpy(G, Gs) for k in xrange(n): blas.tbmv(W['di'], Gs, n = m, k = 0, ldA = 1, offsetx = k*m) # 4. Cholesky factorization of H = C + Gs' * Gs + 2 * As' * As. blas.syrk(As, H, trans = 'T', alpha = 2.0) blas.syrk(Gs, H, trans = 'T', beta = 1.0) base.axpy(C, H) lapack.potrf(H) def f(x, y, z): """ Solve C * ux + G' * uzl - 2*A'(uzs21) = bx -uzs11 = bX1 -uzs22 = bX2 G * ux - D^2 * uzl = bzl [ -uX1 -A(ux)' ] [ uzs11 uzs21' ] [ ] - T * [ ] * T = bzs. [ -A(ux) -uX2 ] [ uzs21 uzs22 ] On entry, x = (bx, bX1, bX2) and z = [ bzl; bzs[:] ]. On exit, x = (ux, uX1, uX2) and z = [ D*uzl; (r'*uzs*r)[:] ]. Define X = uzs21, Z = T * uzs * T: C * ux + G' * uzl - 2*A'(X) = bx [ 0 X' ] [ bX1 0 ] T * [ ] * T - Z = T * [ ] * T [ X 0 ] [ 0 bX2 ] G * ux - D^2 * uzl = bzl [ -uX1 -A(ux)' ] [ Z11 Z21' ] [ ] - [ ] = bzs [ -A(ux) -uX2 ] [ Z21 Z22 ] Return x = (ux, uX1, uX2), z = [ D*uzl; (rti'*Z*rti)[:] ]. We use the congruence transformation [ V1 0 ] [ T11 T21' ] [ V1' 0 ] [ I S' ] [ ] [ ] [ ] = [ ] [ 0 V2' ] [ T21 T22 ] [ 0 V2 ] [ S I ] and the factorization X + S * X' * S = L( L'(X) ) to write this as C * ux + G' * uzl - 2*A'(X) = bx L'(V2^-1 * X * V1^-1) - L^-1(V2' * Z21 * V1') = bX G * ux - D^2 * uzl = bzl [ -uX1 -A(ux)' ] [ Z11 Z21' ] [ ] - [ ] = bzs, [ -A(ux) -uX2 ] [ Z21 Z22 ] or C * ux + Gs' * uuzl - 2*As'(XX) = bx XX - ZZ21 = bX Gs * ux - uuzl = D^-1 * bzl -As(ux) - ZZ21 = bbzs_21 -uX1 - Z11 = bzs_11 -uX2 - Z22 = bzs_22 if we introduce scaled variables uuzl = D * uzl XX = L'(V2^-1 * X * V1^-1) = L'(V2^-1 * uzs21 * V1^-1) ZZ21 = L^-1(V2' * Z21 * V1') and define bbzs_21 = L^-1(V2' * bzs_21 * V1') [ bX1 0 ] bX = L^-1( V2' * (T * [ ] * T)_21 * V1'). [ 0 bX2 ] Eliminating Z21 gives C * ux + Gs' * uuzl - 2*As'(XX) = bx Gs * ux - uuzl = D^-1 * bzl -As(ux) - XX = bbzs_21 - bX -uX1 - Z11 = bzs_11 -uX2 - Z22 = bzs_22 and eliminating uuzl and XX gives H * ux = bx + Gs' * D^-1 * bzl + 2*As'(bX - bbzs_21) Gs * ux - uuzl = D^-1 * bzl -As(ux) - XX = bbzs_21 - bX -uX1 - Z11 = bzs_11 -uX2 - Z22 = bzs_22. In summary, we can use the following algorithm: 1. bXX := bX - bbzs21 [ bX1 0 ] = L^-1( V2' * ((T * [ ] * T)_21 - bzs_21) * V1') [ 0 bX2 ] 2. Solve H * ux = bx + Gs' * D^-1 * bzl + 2*As'(bXX). 3. From ux, compute uuzl = Gs*ux - D^-1 * bzl and X = V2 * L^-T(-As(ux) + bXX) * V1. 4. Return ux, uuzl, rti' * Z * rti = r' * [ -bX1, X'; X, -bX2 ] * r and uX1 = -Z11 - bzs_11, uX2 = -Z22 - bzs_22. """ # Save bzs_11, bzs_22, bzs_21. lapack.lacpy(z, bz11, uplo = 'L', m = q, n = q, ldA = p+q, offsetA = m) lapack.lacpy(z, bz21, m = p, n = q, ldA = p+q, offsetA = m+q) lapack.lacpy(z, bz22, uplo = 'L', m = p, n = p, ldA = p+q, offsetA = m + (p+q+1)*q) # zl := D^-1 * zl # = D^-1 * bzl blas.tbmv(W['di'], z, n = m, k = 0, ldA = 1) # zs := r' * [ bX1, 0; 0, bX2 ] * r. # zs := [ bX1, 0; 0, bX2 ] blas.scal(0.0, z, offset = m) lapack.lacpy(x[1], z, uplo = 'L', m = q, n = q, ldB = p+q, offsetB = m) lapack.lacpy(x[2], z, uplo = 'L', m = p, n = p, ldB = p+q, offsetB = m + (p+q+1)*q) # scale diagonal of zs by 1/2 blas.scal(0.5, z, inc = p+q+1, offset = m) # a := tril(zs)*r blas.copy(r, a) blas.trmm(z, a, side = 'L', m = p+q, n = p+q, ldA = p+q, ldB = p+q, offsetA = m) # zs := a'*r + r'*a blas.syr2k(r, a, z, trans = 'T', n = p+q, k = p+q, ldB = p+q, ldC = p+q, offsetC = m) # bz21 := L^-1( V2' * ((r * zs * r')_21 - bz21) * V1') # # [ bX1 0 ] # = L^-1( V2' * ((T * [ ] * T)_21 - bz21) * V1'). # [ 0 bX2 ] # a = [ r21 r22 ] * z # = [ r21 r22 ] * r' * [ bX1, 0; 0, bX2 ] * r # = [ T21 T22 ] * [ bX1, 0; 0, bX2 ] * r blas.symm(z, r, a, side = 'R', m = p, n = p+q, ldA = p+q, ldC = p+q, offsetB = q) # bz21 := -bz21 + a * [ r11, r12 ]' # = -bz21 + (T * [ bX1, 0; 0, bX2 ] * T)_21 blas.gemm(a, r, bz21, transB = 'T', m = p, n = q, k = p+q, beta = -1.0, ldA = p+q, ldC = p) # bz21 := V2' * bz21 * V1' # = V2' * (-bz21 + (T*[bX1, 0; 0, bX2]*T)_21) * V1' blas.gemm(V2, bz21, tmp, transA = 'T', m = p, n = q, k = p, ldB = p) blas.gemm(tmp, V1, bz21, transB = 'T', m = p, n = q, k = q, ldC = p) # bz21[:] := D * (I-P) * bz21[:] # = L^-1 * bz21[:] # = bXX[:] blas.copy(bz21, tmp) base.gemv(P, bz21, tmp, alpha = -1.0, beta = 1.0) base.gemv(D, tmp, bz21) # Solve H * ux = bx + Gs' * D^-1 * bzl + 2*As'(bXX). # x[0] := x[0] + Gs'*zl + 2*As'(bz21) # = bx + G' * D^-1 * bzl + 2 * As'(bXX) blas.gemv(Gs, z, x[0], trans = 'T', alpha = 1.0, beta = 1.0) blas.gemv(As, bz21, x[0], trans = 'T', alpha = 2.0, beta = 1.0) # x[0] := H \ x[0] # = ux lapack.potrs(H, x[0]) # uuzl = Gs*ux - D^-1 * bzl blas.gemv(Gs, x[0], z, alpha = 1.0, beta = -1.0) # bz21 := V2 * L^-T(-As(ux) + bz21) * V1 # = X blas.gemv(As, x[0], bz21, alpha = -1.0, beta = 1.0) blas.tbsv(DV, bz21, n = p*q, k = 0, ldA = 1) blas.copy(bz21, tmp) base.gemv(P, tmp, bz21, alpha = -1.0, beta = 1.0, trans = 'T') blas.gemm(V2, bz21, tmp) blas.gemm(tmp, V1, bz21) # zs := -zs + r' * [ 0, X'; X, 0 ] * r # = r' * [ -bX1, X'; X, -bX2 ] * r. # a := bz21 * [ r11, r12 ] # = X * [ r11, r12 ] blas.gemm(bz21, r, a, m = p, n = p+q, k = q, ldA = p, ldC = p+q) # z := -z + [ r21, r22 ]' * a + a' * [ r21, r22 ] # = rti' * uzs * rti blas.syr2k(r, a, z, trans = 'T', beta = -1.0, n = p+q, k = p, offsetA = q, offsetC = m, ldB = p+q, ldC = p+q) # uX1 = -Z11 - bzs_11 # = -(r*zs*r')_11 - bzs_11 # uX2 = -Z22 - bzs_22 # = -(r*zs*r')_22 - bzs_22 blas.copy(bz11, x[1]) blas.copy(bz22, x[2]) # scale diagonal of zs by 1/2 blas.scal(0.5, z, inc = p+q+1, offset = m) # a := r*tril(zs) blas.copy(r, a) blas.trmm(z, a, side = 'R', m = p+q, n = p+q, ldA = p+q, ldB = p+q, offsetA = m) # x[1] := -x[1] - a[:q,:] * r[:q, :]' - r[:q,:] * a[:q,:]' # = -bzs_11 - (r*zs*r')_11 blas.syr2k(a, r, x[1], n = q, alpha = -1.0, beta = -1.0) # x[2] := -x[2] - a[q:,:] * r[q:, :]' - r[q:,:] * a[q:,:]' # = -bzs_22 - (r*zs*r')_22 blas.syr2k(a, r, x[2], n = p, alpha = -1.0, beta = -1.0, offsetA = q, offsetB = q) # scale diagonal of zs by 1/2 blas.scal(2.0, z, inc = p+q+1, offset = m) return f
def F(W): """ Create a solver for the linear equations C * ux + G' * uzl - 2*A'(uzs21) = bx -uzs11 = bX1 -uzs22 = bX2 G * ux - Dl^2 * uzl = bzl [ -uX1 -A(ux)' ] [ uzs11 uzs21' ] [ ] - r*r' * [ ] * r*r' = bzs [ -A(ux) -uX2 ] [ uzs21 uzs22 ] where Dl = diag(W['l']), r = W['r'][0]. On entry, x = (bx, bX1, bX2) and z = [ bzl; bzs[:] ]. On exit, x = (ux, uX1, uX2) and z = [ Dl*uzl; (r'*uzs*r)[:] ]. 1. Compute matrices V1, V2 such that (with T = r*r') [ V1 0 ] [ T11 T21' ] [ V1' 0 ] [ I S' ] [ ] [ ] [ ] = [ ] [ 0 V2' ] [ T21 T22 ] [ 0 V2 ] [ S I ] and S = [ diag(s); 0 ], s a positive q-vector. 2. Factor the mapping X -> X + S * X' * S: X + S * X' * S = L( L'( X )). 3. Compute scaled mappings: a matrix As with as its columns the coefficients of the scaled mapping L^-1( V2' * A() * V1' ) and the matrix Gs = Dl^-1 * G. 4. Cholesky factorization of H = C + Gs'*Gs + 2*As'*As. """ # 1. Compute V1, V2, s. r = W['r'][0] # LQ factorization R[:q, :] = L1 * Q1. lapack.lacpy(r, Q1, m=q) lapack.gelqf(Q1, tau1) lapack.lacpy(Q1, L1, n=q, uplo='L') lapack.orglq(Q1, tau1) # LQ factorization R[q:, :] = L2 * Q2. lapack.lacpy(r, Q2, m=p, offsetA=q) lapack.gelqf(Q2, tau2) lapack.lacpy(Q2, L2, n=p, uplo='L') lapack.orglq(Q2, tau2) # V2, V1, s are computed from an SVD: if # # Q2 * Q1' = U * diag(s) * V', # # then V1 = V' * L1^-1 and V2 = L2^-T * U. # T21 = Q2 * Q1.T blas.gemm(Q2, Q1, T21, transB='T') # SVD T21 = U * diag(s) * V'. Store U in V2 and V' in V1. lapack.gesvd(T21, s, jobu='A', jobvt='A', U=V2, Vt=V1) # # Q2 := Q2 * Q1' without extracting Q1; store T21 in Q2 # this will requires lapack.ormlq or lapack.unmlq # V2 = L2^-T * U blas.trsm(L2, V2, transA='T') # V1 = V' * L1^-1 blas.trsm(L1, V1, side='R') # 2. Factorization X + S * X' * S = L( L'( X )). # # The factor L is stored as a diagonal matrix D and a sparse lower # triangular matrix P, such that # # L(X)[:] = D**-1 * (I + P) * X[:] # L^-1(X)[:] = D * (I - P) * X[:]. # SS is q x q with SS[i,j] = si*sj. blas.scal(0.0, SS) blas.syr(s, SS) # For a p x q matrix X, P*X[:] is Y[:] where # # Yij = si * sj * Xji if i < j # = 0 otherwise. # P.V = SS[Itril2] # For a p x q matrix X, D*X[:] is Y[:] where # # Yij = Xij / sqrt( 1 - si^2 * sj^2 ) if i < j # = Xii / sqrt( 1 + si^2 ) if i = j # = Xij otherwise. # DV[Idiag] = sqrt(1.0 + SS[::q + 1]) DV[Itriu] = sqrt(1.0 - SS[Itril3]**2) D.V = DV**-1 # 3. Scaled linear mappings # Ask := V2' * Ask * V1' blas.scal(0.0, As) base.axpy(A, As) for i in xrange(n): # tmp := V2' * As[i, :] blas.gemm(V2, As, tmp, transA='T', m=p, n=q, k=p, ldB=p, offsetB=i * p * q) # As[:,i] := tmp * V1' blas.gemm(tmp, V1, As, transB='T', m=p, n=q, k=q, ldC=p, offsetC=i * p * q) # As := D * (I - P) * As # = L^-1 * As. blas.copy(As, As2) base.gemm(P, As, As2, alpha=-1.0, beta=1.0) base.gemm(D, As2, As) # Gs := Dl^-1 * G blas.scal(0.0, Gs) base.axpy(G, Gs) for k in xrange(n): blas.tbmv(W['di'], Gs, n=m, k=0, ldA=1, offsetx=k * m) # 4. Cholesky factorization of H = C + Gs' * Gs + 2 * As' * As. blas.syrk(As, H, trans='T', alpha=2.0) blas.syrk(Gs, H, trans='T', beta=1.0) base.axpy(C, H) lapack.potrf(H) def f(x, y, z): """ Solve C * ux + G' * uzl - 2*A'(uzs21) = bx -uzs11 = bX1 -uzs22 = bX2 G * ux - D^2 * uzl = bzl [ -uX1 -A(ux)' ] [ uzs11 uzs21' ] [ ] - T * [ ] * T = bzs. [ -A(ux) -uX2 ] [ uzs21 uzs22 ] On entry, x = (bx, bX1, bX2) and z = [ bzl; bzs[:] ]. On exit, x = (ux, uX1, uX2) and z = [ D*uzl; (r'*uzs*r)[:] ]. Define X = uzs21, Z = T * uzs * T: C * ux + G' * uzl - 2*A'(X) = bx [ 0 X' ] [ bX1 0 ] T * [ ] * T - Z = T * [ ] * T [ X 0 ] [ 0 bX2 ] G * ux - D^2 * uzl = bzl [ -uX1 -A(ux)' ] [ Z11 Z21' ] [ ] - [ ] = bzs [ -A(ux) -uX2 ] [ Z21 Z22 ] Return x = (ux, uX1, uX2), z = [ D*uzl; (rti'*Z*rti)[:] ]. We use the congruence transformation [ V1 0 ] [ T11 T21' ] [ V1' 0 ] [ I S' ] [ ] [ ] [ ] = [ ] [ 0 V2' ] [ T21 T22 ] [ 0 V2 ] [ S I ] and the factorization X + S * X' * S = L( L'(X) ) to write this as C * ux + G' * uzl - 2*A'(X) = bx L'(V2^-1 * X * V1^-1) - L^-1(V2' * Z21 * V1') = bX G * ux - D^2 * uzl = bzl [ -uX1 -A(ux)' ] [ Z11 Z21' ] [ ] - [ ] = bzs, [ -A(ux) -uX2 ] [ Z21 Z22 ] or C * ux + Gs' * uuzl - 2*As'(XX) = bx XX - ZZ21 = bX Gs * ux - uuzl = D^-1 * bzl -As(ux) - ZZ21 = bbzs_21 -uX1 - Z11 = bzs_11 -uX2 - Z22 = bzs_22 if we introduce scaled variables uuzl = D * uzl XX = L'(V2^-1 * X * V1^-1) = L'(V2^-1 * uzs21 * V1^-1) ZZ21 = L^-1(V2' * Z21 * V1') and define bbzs_21 = L^-1(V2' * bzs_21 * V1') [ bX1 0 ] bX = L^-1( V2' * (T * [ ] * T)_21 * V1'). [ 0 bX2 ] Eliminating Z21 gives C * ux + Gs' * uuzl - 2*As'(XX) = bx Gs * ux - uuzl = D^-1 * bzl -As(ux) - XX = bbzs_21 - bX -uX1 - Z11 = bzs_11 -uX2 - Z22 = bzs_22 and eliminating uuzl and XX gives H * ux = bx + Gs' * D^-1 * bzl + 2*As'(bX - bbzs_21) Gs * ux - uuzl = D^-1 * bzl -As(ux) - XX = bbzs_21 - bX -uX1 - Z11 = bzs_11 -uX2 - Z22 = bzs_22. In summary, we can use the following algorithm: 1. bXX := bX - bbzs21 [ bX1 0 ] = L^-1( V2' * ((T * [ ] * T)_21 - bzs_21) * V1') [ 0 bX2 ] 2. Solve H * ux = bx + Gs' * D^-1 * bzl + 2*As'(bXX). 3. From ux, compute uuzl = Gs*ux - D^-1 * bzl and X = V2 * L^-T(-As(ux) + bXX) * V1. 4. Return ux, uuzl, rti' * Z * rti = r' * [ -bX1, X'; X, -bX2 ] * r and uX1 = -Z11 - bzs_11, uX2 = -Z22 - bzs_22. """ # Save bzs_11, bzs_22, bzs_21. lapack.lacpy(z, bz11, uplo='L', m=q, n=q, ldA=p + q, offsetA=m) lapack.lacpy(z, bz21, m=p, n=q, ldA=p + q, offsetA=m + q) lapack.lacpy(z, bz22, uplo='L', m=p, n=p, ldA=p + q, offsetA=m + (p + q + 1) * q) # zl := D^-1 * zl # = D^-1 * bzl blas.tbmv(W['di'], z, n=m, k=0, ldA=1) # zs := r' * [ bX1, 0; 0, bX2 ] * r. # zs := [ bX1, 0; 0, bX2 ] blas.scal(0.0, z, offset=m) lapack.lacpy(x[1], z, uplo='L', m=q, n=q, ldB=p + q, offsetB=m) lapack.lacpy(x[2], z, uplo='L', m=p, n=p, ldB=p + q, offsetB=m + (p + q + 1) * q) # scale diagonal of zs by 1/2 blas.scal(0.5, z, inc=p + q + 1, offset=m) # a := tril(zs)*r blas.copy(r, a) blas.trmm(z, a, side='L', m=p + q, n=p + q, ldA=p + q, ldB=p + q, offsetA=m) # zs := a'*r + r'*a blas.syr2k(r, a, z, trans='T', n=p + q, k=p + q, ldB=p + q, ldC=p + q, offsetC=m) # bz21 := L^-1( V2' * ((r * zs * r')_21 - bz21) * V1') # # [ bX1 0 ] # = L^-1( V2' * ((T * [ ] * T)_21 - bz21) * V1'). # [ 0 bX2 ] # a = [ r21 r22 ] * z # = [ r21 r22 ] * r' * [ bX1, 0; 0, bX2 ] * r # = [ T21 T22 ] * [ bX1, 0; 0, bX2 ] * r blas.symm(z, r, a, side='R', m=p, n=p + q, ldA=p + q, ldC=p + q, offsetB=q) # bz21 := -bz21 + a * [ r11, r12 ]' # = -bz21 + (T * [ bX1, 0; 0, bX2 ] * T)_21 blas.gemm(a, r, bz21, transB='T', m=p, n=q, k=p + q, beta=-1.0, ldA=p + q, ldC=p) # bz21 := V2' * bz21 * V1' # = V2' * (-bz21 + (T*[bX1, 0; 0, bX2]*T)_21) * V1' blas.gemm(V2, bz21, tmp, transA='T', m=p, n=q, k=p, ldB=p) blas.gemm(tmp, V1, bz21, transB='T', m=p, n=q, k=q, ldC=p) # bz21[:] := D * (I-P) * bz21[:] # = L^-1 * bz21[:] # = bXX[:] blas.copy(bz21, tmp) base.gemv(P, bz21, tmp, alpha=-1.0, beta=1.0) base.gemv(D, tmp, bz21) # Solve H * ux = bx + Gs' * D^-1 * bzl + 2*As'(bXX). # x[0] := x[0] + Gs'*zl + 2*As'(bz21) # = bx + G' * D^-1 * bzl + 2 * As'(bXX) blas.gemv(Gs, z, x[0], trans='T', alpha=1.0, beta=1.0) blas.gemv(As, bz21, x[0], trans='T', alpha=2.0, beta=1.0) # x[0] := H \ x[0] # = ux lapack.potrs(H, x[0]) # uuzl = Gs*ux - D^-1 * bzl blas.gemv(Gs, x[0], z, alpha=1.0, beta=-1.0) # bz21 := V2 * L^-T(-As(ux) + bz21) * V1 # = X blas.gemv(As, x[0], bz21, alpha=-1.0, beta=1.0) blas.tbsv(DV, bz21, n=p * q, k=0, ldA=1) blas.copy(bz21, tmp) base.gemv(P, tmp, bz21, alpha=-1.0, beta=1.0, trans='T') blas.gemm(V2, bz21, tmp) blas.gemm(tmp, V1, bz21) # zs := -zs + r' * [ 0, X'; X, 0 ] * r # = r' * [ -bX1, X'; X, -bX2 ] * r. # a := bz21 * [ r11, r12 ] # = X * [ r11, r12 ] blas.gemm(bz21, r, a, m=p, n=p + q, k=q, ldA=p, ldC=p + q) # z := -z + [ r21, r22 ]' * a + a' * [ r21, r22 ] # = rti' * uzs * rti blas.syr2k(r, a, z, trans='T', beta=-1.0, n=p + q, k=p, offsetA=q, offsetC=m, ldB=p + q, ldC=p + q) # uX1 = -Z11 - bzs_11 # = -(r*zs*r')_11 - bzs_11 # uX2 = -Z22 - bzs_22 # = -(r*zs*r')_22 - bzs_22 blas.copy(bz11, x[1]) blas.copy(bz22, x[2]) # scale diagonal of zs by 1/2 blas.scal(0.5, z, inc=p + q + 1, offset=m) # a := r*tril(zs) blas.copy(r, a) blas.trmm(z, a, side='R', m=p + q, n=p + q, ldA=p + q, ldB=p + q, offsetA=m) # x[1] := -x[1] - a[:q,:] * r[:q, :]' - r[:q,:] * a[:q,:]' # = -bzs_11 - (r*zs*r')_11 blas.syr2k(a, r, x[1], n=q, alpha=-1.0, beta=-1.0) # x[2] := -x[2] - a[q:,:] * r[q:, :]' - r[q:,:] * a[q:,:]' # = -bzs_22 - (r*zs*r')_22 blas.syr2k(a, r, x[2], n=p, alpha=-1.0, beta=-1.0, offsetA=q, offsetB=q) # scale diagonal of zs by 1/2 blas.scal(2.0, z, inc=p + q + 1, offset=m) return f
Hac = G.T * spdiag((h-G*xac)**-1) * G if pylab_installed: pylab.figure(3, facecolor='w') # polyhedron for k in range(m): edge = X[[k,k+1],:] + 0.1 * matrix([1., 0., 0., -1.], (2,2)) * \ (X[2*[k],:] - X[2*[k+1],:]) pylab.plot(edge[:,0], edge[:,1], 'k') # 1000 points on the unit circle nopts = 1000 angles = matrix( [ a*2.0*pi/nopts for a in range(nopts) ], (1,nopts) ) circle = matrix(0.0, (2,nopts)) circle[0,:], circle[1,:] = cos(angles), sin(angles) # ellipse = L^-T * circle + xc where Hac = L*L' lapack.potrf(Hac) ellipse = +circle blas.trsm(Hac, ellipse, transA='T') ellipse += xac[:, nopts*[0]] pylab.fill(ellipse[0,:].T, ellipse[1,:].T, facecolor = '#F0F0F0') pylab.plot([xac[0]], [xac[1]], 'ko') pylab.title('Analytic center (fig 8.7)') pylab.axis('equal') pylab.axis('off') pylab.show()
def F(W): """ Returns a function f(x, y, z) that solves the KKT conditions """ U = +W['d'][0:n*r] U = U ** 2 V = +W['d'][n*r:2*n*r] V = V ** 2 Wd = +W['d'] if DEBUG > 0: print "# Computing L22" L22 = [] for i in range(r): BBTi = BBT + spmatrix(U[i*n:(i+1)*n] ,range(n),range(n)) cholesky(BBTi) L22.append(BBTi) if DEBUG > 0: print "# Computing L32" L32 = [] for i in range(r): # Solves L32 . L22 = - B . B^\top C = -BBT blas.trsm(L22[i], C, side='R', transA='T') L32.append(C) if DEBUG > 0: print "# Computing L33" L33 = [] for i in range(r): A = spmatrix(V[i*n:(i+1)*n] ,range(n),range(n)) + BBT - L32[i] * L32[i].T cholesky(A) L33.append(A) if DEBUG > 0: print "# Computing L42" L42 = [] for i in range(r): A = +L22[i].T lapack.trtri(A, uplo='U') L42.append(A) if DEBUG > 0: print "# Computing L43" L43 = [] for i in range(r): A = id_n - L42[i] * L32[i].T blas.trsm(L33[i], A, side='R', transA='T') L43.append(A) if DEBUG > 0: print "# Computing L44 and D4" # The indices for the diagonal of a dense matrix L44 = matrix(0, (n,n)) for i in range(r): L44 = L44 + L43[i] * L43[i].T + L42[i] * L42[i].T cholesky(L44) # WARNING: y, z and t have been permuted (LD33 and LD44piv) if DEBUG > 0: print "## PRE-COMPUTATION DONE ##" # Checking the decomposition if DEBUG > 1: if DEBUG > 2: mA = [] mB = [] mD = [] for i in range(3*r+1): m = [] for j in range(3*r+1): m.append(zero_n) mA.append(m) for i in range(r): mA[i][i] = cholK mA[i+r][i+r] = L22[i] mA[i][i+r] = -B mA[i][i+2*r] = B mA[i+r][i+2*r] = L32[i] mA[i+2*r][i+2*r] = L33[i] mA[i+r][3*r] = L42[i] mA[i+2*r][3*r] = L43[i] mD.append(id_n) mA[3*r][3*r] = L44 for i in range(2*r): mD.append(-id_n) mD.append(id_n) printing.options['width'] = 30 mA = sparse(mA) mD = spdiag(mD) print "LL^T =\n%s" % (mA * mD * mA.T), print "P =\n%s" % P, print g print "W = %s" % W["d"].T, print "U^2 = %s" % U.T, print "V^2 = %s" % V.T, print "### Pre-compute for check" solve = chol2(W,P) def f(x, y, z): """ On entry bx, bz are stored in x, z. On exit x, z contain the solution, with z scaled: z./di is returned instead of z. """ # Maps to our variables x,y,z and t if DEBUG > 0: print "... Computing ..." print "bx = %sbz = %s" % (x.T, z.T), a = [] b = x[n*r:n*r + n] c = [] d = [] for i in range(r): a.append(x[i*n:(i+1)*n]) c.append(z[i*n:(i+1)*n]) d.append(z[(i+r)*n:(i+r+1)*n]) if DEBUG: # Now solves using cvxopt xp = +x zp = +z solve(xp,y,zp) # First phase for i in range(r): blas.trsm(cholK, a[i]) Bai = B * a[i] c[i] = - Bai - c[i] blas.trsm(L22[i], c[i]) d[i] = Bai - L32[i] * c[i] - d[i] blas.trsm(L33[i], d[i]) b = b + L42[i] * c[i] + L43[i] * d[i] blas.trsm(L44, b) # Second phase blas.trsm(L44, b, transA='T') for i in range(r): d[i] = d[i] - L43[i].T * b blas.trsm(L33[i], d[i], transA='T') c[i] = c[i] - L32[i].T * d[i] - L42[i].T * b blas.trsm(L22[i], c[i], transA='T') a[i] = a[i] + B.T * (c[i] - d[i]) blas.trsm(cholK, a[i], transA='T') # Store in vectors and scale x[n*r:n*r + n] = b for i in range(r): x[i*n:(i+1)*n] = a[i] z[i*n:(i+1)*n] = c[i] z[(i+r)*n:(i+r+1)*n] = d[i] z[:] = mul( Wd, z) if DEBUG: print "x = %s" % x.T, print "z = %s" % z.T, print "Delta(x) = %s" % (x - xp).T, print "Delta(z) = %s" % (z - zp).T, delta= blas.nrm2(x-xp) + blas.nrm2(z-zp) if (delta > 1e-8): print "--- DELTA TOO HIGH = %.3e ---" % delta return f
def __scale(L, Y, U, adj=False, inv=False, factored_updates=True): n = L.symb.n snpost = L.symb.snpost snptr = L.symb.snptr chptr = L.symb.chptr chidx = L.symb.chidx relptr = L.symb.relptr relidx = L.symb.relidx blkptr = L.symb.blkptr stack = [] for k in reversed(list(snpost)): nn = snptr[k + 1] - snptr[k] # |Nk| na = relptr[k + 1] - relptr[k] # |Ak| nj = na + nn F = matrix(0.0, (nj, nj)) lapack.lacpy(Y.blkval, F, m=nj, n=nn, ldA=nj, offsetA=blkptr[k], uplo='L') # if supernode k is not a root node: if na > 0: # copy Vk to 2,2 block of F Vk = stack.pop() lapack.lacpy(Vk, F, ldB=nj, offsetB=nn * (nj + 1), m=na, n=na, uplo='L') # if supernode k has any children: for ii in range(chptr[k], chptr[k + 1]): i = chidx[ii] if factored_updates: r = relidx[relptr[i]:relptr[i + 1]] stack.append(frontal_get_update_factor(F, r, nn, na)) else: stack.append(frontal_get_update(F, relidx, relptr, i)) # if supernode k is not a root node: if na > 0: if factored_updates: # In this case we have Vk = Lk'*Lk if adj is False: trns = 'N' elif adj is True: trns = 'T' else: # factorize Vk lapack.potrf(F, offsetA=nj * nn + nn, n=na, ldA=nj) # In this case we have Vk = Lk*Lk' if adj is False: trns = 'T' elif adj is True: trns = 'N' if adj is False: tr = ['T', 'N'] elif adj is True: tr = ['N', 'T'] if inv is False: for Ut in U: # symmetrize (1,1) block of Ut_{k} and scale U11 = matrix(0.0, (nn, nn)) lapack.lacpy(Ut.blkval, U11, offsetA=blkptr[k], m=nn, n=nn, ldA=nj, uplo='L') U11 += U11.T U11[::nn + 1] *= 0.5 lapack.lacpy(U11, Ut.blkval, offsetB=blkptr[k], m=nn, n=nn, ldB=nj, uplo='N') blas.trsm(L.blkval, Ut.blkval, side = 'R', transA = tr[0],\ m = nj, n = nn, offsetA = blkptr[k], ldA = nj,\ offsetB = blkptr[k], ldB = nj) blas.trsm(L.blkval, Ut.blkval, m = nn, n = nn, transA = tr[1],\ offsetA = blkptr[k], offsetB = blkptr[k],\ ldA = nj, ldB = nj) # zero-out strict upper triangular part of {Nj,Nj} block for i in range(1, nn): blas.scal(0.0, Ut.blkval, offset=blkptr[k] + nj * i, n=i) if na > 0: blas.trmm(F, Ut.blkval, m = na, n = nn, transA = trns,\ offsetA = nj*nn+nn, ldA = nj,\ offsetB = blkptr[k]+nn, ldB = nj) else: # inv is True for Ut in U: # symmetrize (1,1) block of Ut_{k} and scale U11 = matrix(0.0, (nn, nn)) lapack.lacpy(Ut.blkval, U11, offsetA=blkptr[k], m=nn, n=nn, ldA=nj, uplo='L') U11 += U11.T U11[::nn + 1] *= 0.5 lapack.lacpy(U11, Ut.blkval, offsetB=blkptr[k], m=nn, n=nn, ldB=nj, uplo='N') blas.trmm(L.blkval, Ut.blkval, side = 'R', transA = tr[0],\ m = nj, n = nn, offsetA = blkptr[k], ldA = nj,\ offsetB = blkptr[k], ldB = nj) blas.trmm(L.blkval, Ut.blkval, m = nn, n = nn, transA = tr[1],\ offsetA = blkptr[k], offsetB = blkptr[k],\ ldA = nj, ldB = nj) # zero-out strict upper triangular part of {Nj,Nj} block for i in range(1, nn): blas.scal(0.0, Ut.blkval, offset=blkptr[k] + nj * i, n=i) if na > 0: blas.trsm(F, Ut.blkval, m = na, n = nn, transA = trns,\ offsetA = nj*nn+nn, ldA = nj,\ offsetB = blkptr[k]+nn, ldB = nj) return
def F(W): """ Generate a solver for A'(uz0) = bx[0] -uz0 - uz1 = bx[1] A(ux[0]) - ux[1] - r0*r0' * uz0 * r0*r0' = bz0 - ux[1] - r1*r1' * uz1 * r1*r1' = bz1. uz0, uz1, bz0, bz1 are symmetric m x m-matrices. ux[0], bx[0] are n-vectors. ux[1], bx[1] are symmetric m x m-matrices. We first calculate a congruence that diagonalizes r0*r0' and r1*r1': U' * r0 * r0' * U = I, U' * r1 * r1' * U = S. We then make a change of variables usx[0] = ux[0], usx[1] = U' * ux[1] * U usz0 = U^-1 * uz0 * U^-T usz1 = U^-1 * uz1 * U^-T and define As() = U' * A() * U' bsx[1] = U^-1 * bx[1] * U^-T bsz0 = U' * bz0 * U bsz1 = U' * bz1 * U. This gives As'(usz0) = bx[0] -usz0 - usz1 = bsx[1] As(usx[0]) - usx[1] - usz0 = bsz0 -usx[1] - S * usz1 * S = bsz1. 1. Eliminate usz0, usz1 using equations 3 and 4, usz0 = As(usx[0]) - usx[1] - bsz0 usz1 = -S^-1 * (usx[1] + bsz1) * S^-1. This gives two equations in usx[0] an usx[1]. As'(As(usx[0]) - usx[1]) = bx[0] + As'(bsz0) -As(usx[0]) + usx[1] + S^-1 * usx[1] * S^-1 = bsx[1] - bsz0 - S^-1 * bsz1 * S^-1. 2. Eliminate usx[1] using equation 2: usx[1] + S * usx[1] * S = S * ( As(usx[0]) + bsx[1] - bsz0 ) * S - bsz1 i.e., with Gamma[i,j] = 1.0 + S[i,i] * S[j,j], usx[1] = ( S * As(usx[0]) * S ) ./ Gamma + ( S * ( bsx[1] - bsz0 ) * S - bsz1 ) ./ Gamma. This gives an equation in usx[0]. As'( As(usx[0]) ./ Gamma ) = bx0 + As'(bsz0) + As'( (S * ( bsx[1] - bsz0 ) * S - bsz1) ./ Gamma ) = bx0 + As'( ( bsz0 - bsz1 + S * bsx[1] * S ) ./ Gamma ). """ # Calculate U s.t. # # U' * r0*r0' * U = I, U' * r1*r1' * U = diag(s). # Cholesky factorization r0 * r0' = L * L' blas.syrk(W['r'][0], L) lapack.potrf(L) # SVD L^-1 * r1 = U * diag(s) * V' blas.copy(W['r'][1], U) blas.trsm(L, U) lapack.gesvd(U, s, jobu='O') # s := s**2 s[:] = s**2 # Uti := U blas.copy(U, Uti) # U := L^-T * U blas.trsm(L, U, transA='T') # Uti := L * Uti = U^-T blas.trmm(L, Uti) # Us := U * diag(s)^-1 blas.copy(U, Us) for i in range(m): blas.tbsv(s, Us, n=m, k=0, ldA=1, incx=m, offsetx=i) # S is m x m with lower triangular entries s[i] * s[j] # sqrtG is m x m with lower triangular entries sqrt(1.0 + s[i]*s[j]) # Upper triangular entries are undefined but nonzero. blas.scal(0.0, S) blas.syrk(s, S) Gamma = 1.0 + S sqrtG = sqrt(Gamma) # Asc[i] = (U' * Ai * * U ) ./ sqrtG, for i = 1, ..., n # = Asi ./ sqrt(Gamma) blas.copy(A, Asc) misc.scale( Asc, # only 'r' part of the dictionary is used { 'dnl': matrix(0.0, (0, 1)), 'dnli': matrix(0.0, (0, 1)), 'd': matrix(0.0, (0, 1)), 'di': matrix(0.0, (0, 1)), 'v': [], 'beta': [], 'r': [U], 'rti': [U] }) for i in range(n): blas.tbsv(sqrtG, Asc, n=msq, k=0, ldA=1, offsetx=i * msq) # Convert columns of Asc to packed storage misc.pack2(Asc, {'l': 0, 'q': [], 's': [m]}) # Cholesky factorization of Asc' * Asc. H = matrix(0.0, (n, n)) blas.syrk(Asc, H, trans='T', k=mpckd) lapack.potrf(H) def solve(x, y, z): """ 1. Solve for usx[0]: Asc'(Asc(usx[0])) = bx0 + Asc'( ( bsz0 - bsz1 + S * bsx[1] * S ) ./ sqrtG) = bx0 + Asc'( ( bsz0 + S * ( bsx[1] - bssz1) S ) ./ sqrtG) where bsx[1] = U^-1 * bx[1] * U^-T, bsz0 = U' * bz0 * U, bsz1 = U' * bz1 * U, bssz1 = S^-1 * bsz1 * S^-1 2. Solve for usx[1]: usx[1] + S * usx[1] * S = S * ( As(usx[0]) + bsx[1] - bsz0 ) * S - bsz1 usx[1] = ( S * (As(usx[0]) + bsx[1] - bsz0) * S - bsz1) ./ Gamma = -bsz0 + (S * As(usx[0]) * S) ./ Gamma + (bsz0 - bsz1 + S * bsx[1] * S ) . / Gamma = -bsz0 + (S * As(usx[0]) * S) ./ Gamma + (bsz0 + S * ( bsx[1] - bssz1 ) * S ) . / Gamma Unscale ux[1] = Uti * usx[1] * Uti' 3. Compute usz0, usz1 r0' * uz0 * r0 = r0^-1 * ( A(ux[0]) - ux[1] - bz0 ) * r0^-T r1' * uz1 * r1 = r1^-1 * ( -ux[1] - bz1 ) * r1^-T """ # z0 := U' * z0 * U # = bsz0 __cngrnc(U, z, trans='T') # z1 := Us' * bz1 * Us # = S^-1 * U' * bz1 * U * S^-1 # = S^-1 * bsz1 * S^-1 __cngrnc(Us, z, trans='T', offsetx=msq) # x[1] := Uti' * x[1] * Uti # = bsx[1] __cngrnc(Uti, x[1], trans='T') # x[1] := x[1] - z[msq:] # = bsx[1] - S^-1 * bsz1 * S^-1 blas.axpy(z, x[1], alpha=-1.0, offsetx=msq) # x1 = (S * x[1] * S + z[:msq] ) ./ sqrtG # = (S * ( bsx[1] - S^-1 * bsz1 * S^-1) * S + bsz0 ) ./ sqrtG # = (S * bsx[1] * S - bsz1 + bsz0 ) ./ sqrtG # in packed storage blas.copy(x[1], x1) blas.tbmv(S, x1, n=msq, k=0, ldA=1) blas.axpy(z, x1, n=msq) blas.tbsv(sqrtG, x1, n=msq, k=0, ldA=1) misc.pack2(x1, {'l': 0, 'q': [], 's': [m]}) # x[0] := x[0] + Asc'*x1 # = bx0 + Asc'( ( bsz0 - bsz1 + S * bsx[1] * S ) ./ sqrtG) # = bx0 + As'( ( bz0 - bz1 + S * bx[1] * S ) ./ Gamma ) blas.gemv(Asc, x1, x[0], m=mpckd, trans='T', beta=1.0) # x[0] := H^-1 * x[0] # = ux[0] lapack.potrs(H, x[0]) # x1 = Asc(x[0]) .* sqrtG (unpacked) # = As(x[0]) blas.gemv(Asc, x[0], tmp, m=mpckd) misc.unpack(tmp, x1, {'l': 0, 'q': [], 's': [m]}) blas.tbmv(sqrtG, x1, n=msq, k=0, ldA=1) # usx[1] = (x1 + (x[1] - z[:msq])) ./ sqrtG**2 # = (As(ux[0]) + bsx[1] - bsz0 - S^-1 * bsz1 * S^-1) # ./ Gamma # x[1] := x[1] - z[:msq] # = bsx[1] - bsz0 - S^-1 * bsz1 * S^-1 blas.axpy(z, x[1], -1.0, n=msq) # x[1] := x[1] + x1 # = As(ux) + bsx[1] - bsz0 - S^-1 * bsz1 * S^-1 blas.axpy(x1, x[1]) # x[1] := x[1] / Gammma # = (As(ux) + bsx[1] - bsz0 + S^-1 * bsz1 * S^-1 ) / Gamma # = S^-1 * usx[1] * S^-1 blas.tbsv(Gamma, x[1], n=msq, k=0, ldA=1) # z[msq:] := r1' * U * (-z[msq:] - x[1]) * U * r1 # := -r1' * U * S^-1 * (bsz1 + ux[1]) * S^-1 * U * r1 # := -r1' * uz1 * r1 blas.axpy(x[1], z, n=msq, offsety=msq) blas.scal(-1.0, z, offset=msq) __cngrnc(U, z, offsetx=msq) __cngrnc(W['r'][1], z, trans='T', offsetx=msq) # x[1] := S * x[1] * S # = usx1 blas.tbmv(S, x[1], n=msq, k=0, ldA=1) # z[:msq] = r0' * U' * ( x1 - x[1] - z[:msq] ) * U * r0 # = r0' * U' * ( As(ux) - usx1 - bsz0 ) * U * r0 # = r0' * U' * usz0 * U * r0 # = r0' * uz0 * r0 blas.axpy(x1, z, -1.0, n=msq) blas.scal(-1.0, z, n=msq) blas.axpy(x[1], z, -1.0, n=msq) __cngrnc(U, z) __cngrnc(W['r'][0], z, trans='T') # x[1] := Uti * x[1] * Uti' # = ux[1] __cngrnc(Uti, x[1]) return solve
def cholesky(X): """ Supernodal multifrontal Cholesky factorization: .. math:: X = LL^T where :math:`L` is lower-triangular. On exit, the argument :math:`X` contains the Cholesky factor :math:`L`. :param X: :py:class:`cspmatrix` """ assert isinstance(X, cspmatrix) and X.is_factor is False, "X must be a cspmatrix" n = X.symb.n snpost = X.symb.snpost snptr = X.symb.snptr chptr = X.symb.chptr chidx = X.symb.chidx relptr = X.symb.relptr relidx = X.symb.relidx blkptr = X.symb.blkptr blkval = X.blkval stack = [] for k in snpost: nn = snptr[k+1]-snptr[k] # |Nk| na = relptr[k+1]-relptr[k] # |Ak| nj = na + nn # build frontal matrix F = matrix(0.0, (nj, nj)) lapack.lacpy(blkval, F, offsetA = blkptr[k], m = nj, n = nn, ldA = nj, uplo = 'L') # add update matrices from children to frontal matrix for i in range(chptr[k+1]-1,chptr[k]-1,-1): Ui = stack.pop() frontal_add_update(F, Ui, relidx, relptr, chidx[i]) # factor L_{Nk,Nk} lapack.potrf(F, n = nn, ldA = nj) # if supernode k is not a root node, compute and push update matrix onto stack if na > 0: # compute L_{Ak,Nk} := A_{Ak,Nk}*inv(L_{Nk,Nk}') blas.trsm(F, F, m = na, n = nn, ldA = nj, ldB = nj, offsetB = nn, transA = 'T', side = 'R') # compute Uk = Uk - L_{Ak,Nk}*inv(D_{Nk,Nk})*L_{Ak,Nk}' if nn == 1: blas.syr(F, F, n = na, offsetx = nn, \ offsetA = nn*nj+nn, ldA = nj, alpha = -1.0) else: blas.syrk(F, F, k = nn, n = na, offsetA = nn, ldA = nj, offsetC = nn*nj+nn, ldC = nj, alpha = -1.0, beta = 1.0) # compute L_{Ak,Nk} := L_{Ak,Nk}*inv(L_{Nk,Nk}) blas.trsm(F, F, m = na, n = nn,\ ldA = nj, ldB = nj, offsetB = nn, side = 'R') # add Uk to stack Uk = matrix(0.0,(na,na)) lapack.lacpy(F, Uk, m = na, n = na, uplo = 'L', offsetA = nn*nj+nn, ldA = nj) stack.append(Uk) # copy the leading Nk columns of frontal matrix to blkval lapack.lacpy(F, blkval, uplo = "L", offsetB = blkptr[k], m = nj, n = nn, ldB = nj) X.is_factor = True return
def F(W): """ Generate a solver for A'(uz0) = bx[0] -uz0 - uz1 = bx[1] A(ux[0]) - ux[1] - r0*r0' * uz0 * r0*r0' = bz0 - ux[1] - r1*r1' * uz1 * r1*r1' = bz1. uz0, uz1, bz0, bz1 are symmetric m x m-matrices. ux[0], bx[0] are n-vectors. ux[1], bx[1] are symmetric m x m-matrices. We first calculate a congruence that diagonalizes r0*r0' and r1*r1': U' * r0 * r0' * U = I, U' * r1 * r1' * U = S. We then make a change of variables usx[0] = ux[0], usx[1] = U' * ux[1] * U usz0 = U^-1 * uz0 * U^-T usz1 = U^-1 * uz1 * U^-T and define As() = U' * A() * U' bsx[1] = U^-1 * bx[1] * U^-T bsz0 = U' * bz0 * U bsz1 = U' * bz1 * U. This gives As'(usz0) = bx[0] -usz0 - usz1 = bsx[1] As(usx[0]) - usx[1] - usz0 = bsz0 -usx[1] - S * usz1 * S = bsz1. 1. Eliminate usz0, usz1 using equations 3 and 4, usz0 = As(usx[0]) - usx[1] - bsz0 usz1 = -S^-1 * (usx[1] + bsz1) * S^-1. This gives two equations in usx[0] an usx[1]. As'(As(usx[0]) - usx[1]) = bx[0] + As'(bsz0) -As(usx[0]) + usx[1] + S^-1 * usx[1] * S^-1 = bsx[1] - bsz0 - S^-1 * bsz1 * S^-1. 2. Eliminate usx[1] using equation 2: usx[1] + S * usx[1] * S = S * ( As(usx[0]) + bsx[1] - bsz0 ) * S - bsz1 i.e., with Gamma[i,j] = 1.0 + S[i,i] * S[j,j], usx[1] = ( S * As(usx[0]) * S ) ./ Gamma + ( S * ( bsx[1] - bsz0 ) * S - bsz1 ) ./ Gamma. This gives an equation in usx[0]. As'( As(usx[0]) ./ Gamma ) = bx0 + As'(bsz0) + As'( (S * ( bsx[1] - bsz0 ) * S - bsz1) ./ Gamma ) = bx0 + As'( ( bsz0 - bsz1 + S * bsx[1] * S ) ./ Gamma ). """ # Calculate U s.t. # # U' * r0*r0' * U = I, U' * r1*r1' * U = diag(s). # Cholesky factorization r0 * r0' = L * L' blas.syrk(W['r'][0], L) lapack.potrf(L) # SVD L^-1 * r1 = U * diag(s) * V' blas.copy(W['r'][1], U) blas.trsm(L, U) lapack.gesvd(U, s, jobu = 'O') # s := s**2 s[:] = s**2 # Uti := U blas.copy(U, Uti) # U := L^-T * U blas.trsm(L, U, transA = 'T') # Uti := L * Uti = U^-T blas.trmm(L, Uti) # Us := U * diag(s)^-1 blas.copy(U, Us) for i in range(m): blas.tbsv(s, Us, n = m, k = 0, ldA = 1, incx = m, offsetx = i) # S is m x m with lower triangular entries s[i] * s[j] # sqrtG is m x m with lower triangular entries sqrt(1.0 + s[i]*s[j]) # Upper triangular entries are undefined but nonzero. blas.scal(0.0, S) blas.syrk(s, S) Gamma = 1.0 + S sqrtG = sqrt(Gamma) # Asc[i] = (U' * Ai * * U ) ./ sqrtG, for i = 1, ..., n # = Asi ./ sqrt(Gamma) blas.copy(A, Asc) misc.scale(Asc, # only 'r' part of the dictionary is used {'dnl': matrix(0.0, (0, 1)), 'dnli': matrix(0.0, (0, 1)), 'd': matrix(0.0, (0, 1)), 'di': matrix(0.0, (0, 1)), 'v': [], 'beta': [], 'r': [ U ], 'rti': [ U ]}) for i in range(n): blas.tbsv(sqrtG, Asc, n = msq, k = 0, ldA = 1, offsetx = i*msq) # Convert columns of Asc to packed storage misc.pack2(Asc, {'l': 0, 'q': [], 's': [ m ]}) # Cholesky factorization of Asc' * Asc. H = matrix(0.0, (n, n)) blas.syrk(Asc, H, trans = 'T', k = mpckd) lapack.potrf(H) def solve(x, y, z): """ 1. Solve for usx[0]: Asc'(Asc(usx[0])) = bx0 + Asc'( ( bsz0 - bsz1 + S * bsx[1] * S ) ./ sqrtG) = bx0 + Asc'( ( bsz0 + S * ( bsx[1] - bssz1) S ) ./ sqrtG) where bsx[1] = U^-1 * bx[1] * U^-T, bsz0 = U' * bz0 * U, bsz1 = U' * bz1 * U, bssz1 = S^-1 * bsz1 * S^-1 2. Solve for usx[1]: usx[1] + S * usx[1] * S = S * ( As(usx[0]) + bsx[1] - bsz0 ) * S - bsz1 usx[1] = ( S * (As(usx[0]) + bsx[1] - bsz0) * S - bsz1) ./ Gamma = -bsz0 + (S * As(usx[0]) * S) ./ Gamma + (bsz0 - bsz1 + S * bsx[1] * S ) . / Gamma = -bsz0 + (S * As(usx[0]) * S) ./ Gamma + (bsz0 + S * ( bsx[1] - bssz1 ) * S ) . / Gamma Unscale ux[1] = Uti * usx[1] * Uti' 3. Compute usz0, usz1 r0' * uz0 * r0 = r0^-1 * ( A(ux[0]) - ux[1] - bz0 ) * r0^-T r1' * uz1 * r1 = r1^-1 * ( -ux[1] - bz1 ) * r1^-T """ # z0 := U' * z0 * U # = bsz0 __cngrnc(U, z, trans = 'T') # z1 := Us' * bz1 * Us # = S^-1 * U' * bz1 * U * S^-1 # = S^-1 * bsz1 * S^-1 __cngrnc(Us, z, trans = 'T', offsetx = msq) # x[1] := Uti' * x[1] * Uti # = bsx[1] __cngrnc(Uti, x[1], trans = 'T') # x[1] := x[1] - z[msq:] # = bsx[1] - S^-1 * bsz1 * S^-1 blas.axpy(z, x[1], alpha = -1.0, offsetx = msq) # x1 = (S * x[1] * S + z[:msq] ) ./ sqrtG # = (S * ( bsx[1] - S^-1 * bsz1 * S^-1) * S + bsz0 ) ./ sqrtG # = (S * bsx[1] * S - bsz1 + bsz0 ) ./ sqrtG # in packed storage blas.copy(x[1], x1) blas.tbmv(S, x1, n = msq, k = 0, ldA = 1) blas.axpy(z, x1, n = msq) blas.tbsv(sqrtG, x1, n = msq, k = 0, ldA = 1) misc.pack2(x1, {'l': 0, 'q': [], 's': [m]}) # x[0] := x[0] + Asc'*x1 # = bx0 + Asc'( ( bsz0 - bsz1 + S * bsx[1] * S ) ./ sqrtG) # = bx0 + As'( ( bz0 - bz1 + S * bx[1] * S ) ./ Gamma ) blas.gemv(Asc, x1, x[0], m = mpckd, trans = 'T', beta = 1.0) # x[0] := H^-1 * x[0] # = ux[0] lapack.potrs(H, x[0]) # x1 = Asc(x[0]) .* sqrtG (unpacked) # = As(x[0]) blas.gemv(Asc, x[0], tmp, m = mpckd) misc.unpack(tmp, x1, {'l': 0, 'q': [], 's': [m]}) blas.tbmv(sqrtG, x1, n = msq, k = 0, ldA = 1) # usx[1] = (x1 + (x[1] - z[:msq])) ./ sqrtG**2 # = (As(ux[0]) + bsx[1] - bsz0 - S^-1 * bsz1 * S^-1) # ./ Gamma # x[1] := x[1] - z[:msq] # = bsx[1] - bsz0 - S^-1 * bsz1 * S^-1 blas.axpy(z, x[1], -1.0, n = msq) # x[1] := x[1] + x1 # = As(ux) + bsx[1] - bsz0 - S^-1 * bsz1 * S^-1 blas.axpy(x1, x[1]) # x[1] := x[1] / Gammma # = (As(ux) + bsx[1] - bsz0 + S^-1 * bsz1 * S^-1 ) / Gamma # = S^-1 * usx[1] * S^-1 blas.tbsv(Gamma, x[1], n = msq, k = 0, ldA = 1) # z[msq:] := r1' * U * (-z[msq:] - x[1]) * U * r1 # := -r1' * U * S^-1 * (bsz1 + ux[1]) * S^-1 * U * r1 # := -r1' * uz1 * r1 blas.axpy(x[1], z, n = msq, offsety = msq) blas.scal(-1.0, z, offset = msq) __cngrnc(U, z, offsetx = msq) __cngrnc(W['r'][1], z, trans = 'T', offsetx = msq) # x[1] := S * x[1] * S # = usx1 blas.tbmv(S, x[1], n = msq, k = 0, ldA = 1) # z[:msq] = r0' * U' * ( x1 - x[1] - z[:msq] ) * U * r0 # = r0' * U' * ( As(ux) - usx1 - bsz0 ) * U * r0 # = r0' * U' * usz0 * U * r0 # = r0' * uz0 * r0 blas.axpy(x1, z, -1.0, n = msq) blas.scal(-1.0, z, n = msq) blas.axpy(x[1], z, -1.0, n = msq) __cngrnc(U, z) __cngrnc(W['r'][0], z, trans = 'T') # x[1] := Uti * x[1] * Uti' # = ux[1] __cngrnc(Uti, x[1]) return solve
if pylab_installed: pylab.figure(2, facecolor='w', figsize=(6,6)) pylab.plot(V[0,:], V[1,:],'ow', mec = 'k') pylab.plot([0], [0], 'k+') I = [ k for k in range(n) if xe[k] > 1e-5 ] pylab.plot(V[0,I], V[1,I],'or') # Enclosing ellipse follows from the solution of the dual problem: # # minimize mu # subject to diag(V'*Z*V) <= mu*1 # Z >= 0 lapack.potrf(Z) ellipse = sqrt(mu) * circle blas.trsm(Z, ellipse, transA='T') if pylab_installed: pylab.plot(ellipse[0,:].T, ellipse[1,:].T, 'k--') pylab.axis([-5, 5, -5, 5]) pylab.title('E-optimal design (fig. 7.10)') pylab.axis('off') # A-design. # # minimize tr (V*diag(x)*V')^{-1} # subject to x >= 0 # sum(x) = 1 # # minimize tr Y # subject to [ V*diag(x)*V', I ]
def kkt(W): """ KKT solver for X*X' * ux + uy * 1_m' + mat(uz) = bx ux * 1_m = by ux - d.^2 .* mat(uz) = mat(bz). ux and bx are N x m matrices. uy and by are N-vectors. uz and bz are N*m-vectors. mat(uz) is the N x m matrix that satisfies mat(uz)[:] = uz. d = mat(W['d']) a positive N x m matrix. If we eliminate uz from the last equation using mat(uz) = (ux - mat(bz)) ./ d.^2 we get two equations in ux, uy: X*X' * ux + ux ./ d.^2 + uy * 1_m' = bx + mat(bz) ./ d.^2 ux * 1_m = by. From the 1st equation, uxk = (X*X' + Dk^-2)^-1 * (-uy + bxk + Dk^-2 * bzk) = Dk * (I + Xk*Xk')^-1 * Dk * (-uy + bxk + Dk^-2 * bzk) for k = 1, ..., m, where Dk = diag(d[:,k]), Xk = Dk * X, uxk is column k of ux, and bzk is column k of mat(bz). We use the matrix inversion lemma ( I + Xk * Xk' )^-1 = I - Xk * (I + Xk' * Xk)^-1 * Xk' = I - Xk * Hk^-1 * Xk' = I - Xk * Lk^-T * Lk^-1 * Xk' where Hk = I + Xk' * Xk = Lk * Lk' to write this as uxk = Dk * (I - Xk * Hk^-1 * Xk') * Dk * (-uy + bxk + Dk^-2 * bzk) = (Dk^2 - Dk^2 * X * Hk^-1 * X' * Dk^2) * (-uy + bxk + Dk^-2 * bzk). Substituting this in the second equation gives an equation for uy: sum_k (Dk^2 - Dk^2 * X * Hk^-1 * X' * Dk^2 ) * uy = -by + sum_k (Dk^2 - Dk^2 * X * Hk^-1 * X' * Dk^2) * ( bxk + Dk^-2 * bzk ), i.e., with D = (sum_k Dk^2)^1/2, Yk = D^-1 * Dk^2 * X * Lk^-T, D * ( I - sum_k Yk * Yk' ) * D * uy = -by + sum_k (Dk^2 - Dk^2 * X * Hk^-1 * X' * Dk^2) * ( bxk + Dk^-2 *bzk ). Another application of the matrix inversion lemma gives uy = D^-1 * (I + Y * S^-1 * Y') * D^-1 * ( -by + sum_k ( Dk^2 - Dk^2 * X * Hk^-1 * X' * Dk^2 ) * ( bxk + Dk^-2 *bzk ) ) with S = I - Y' * Y, Y = [ Y1 ... Ym ]. Summary: 1. Compute uy = D^-1 * (I + Y * S^-1 * Y') * D^-1 * ( -by + sum_k (Dk^2 - Dk^2 * X * Hk^-1 * X' * Dk^2) * ( bxk + Dk^-2 *bzk ) ) 2. For k = 1, ..., m: uxk = (Dk^2 - Dk^2 * X * Hk^-1 * X' * Dk^2) * (-uy + bxk + Dk^-2 * bzk) 3. Solve for uz d .* uz = ( ux - mat(bz) ) ./ d. Return ux, uy, d .* uz. """ ### utime0, stime0 = cputime() ### d = matrix(W['d'], (N, m)) dsq = matrix(W['d']**2, (N, m)) # Factor the matrices # # H[k] = I + Xk' * Xk # = I + X' * Dk^2 * X. # # Dk = diag(d[:,k]). for k in range(m): # H[k] = I blas.scal(0.0, H[k]) H[k][::n + 1] = 1.0 # Xs = Dk * X # = diag(d[:,k]]) * X blas.copy(X, Xs) for j in range(n): blas.tbmv(d, Xs, n=N, k=0, ldA=1, offsetA=k * N, offsetx=j * N) # H[k] := H[k] + Xs' * Xs # = I + Xk' * Xk blas.syrk(Xs, H[k], trans='T', beta=1.0) # Factorization H[k] = Lk * Lk' lapack.potrf(H[k]) ### utime, stime = cputime() print("Factor Hk's: utime = %.2f, stime = %.2f" \ %(utime-utime0, stime-stime0)) utime0, stime0 = cputime() ### # diag(D) = ( sum_k d[:,k]**2 ) ** 1/2 # = ( sum_k Dk^2) ** 1/2. blas.gemv(dsq, ones, D) D[:] = sqrt(D) ### # utime, stime = cputime() # print("Compute D: utime = %.2f, stime = %.2f" \ # %(utime-utime0, stime-stime0)) utime0, stime0 = cputime() ### # S = I - Y'* Y is an m x m block matrix. # The i,j block of Y' * Y is # # Yi' * Yj = Li^-1 * X' * Di^2 * D^-1 * Dj^2 * X * Lj^-T. # # We compute only the lower triangular blocks in Y'*Y. blas.scal(0.0, S) for i in range(m): for j in range(i + 1): # Xs = Di * Dj * D^-1 * X blas.copy(X, Xs) blas.copy(d, wN, n=N, offsetx=i * N) blas.tbmv(d, wN, n=N, k=0, ldA=1, offsetA=j * N) blas.tbsv(D, wN, n=N, k=0, ldA=1) for k in range(n): blas.tbmv(wN, Xs, n=N, k=0, ldA=1, offsetx=k * N) # block i, j of S is Xs' * Xs (as nonsymmetric matrix so we # get the correct multiple after scaling with Li, Lj) blas.gemm(Xs, Xs, S, transA='T', ldC=m * n, offsetC=(j * n) * m * n + i * n) ### utime, stime = cputime() print("Form S: utime = %.2f, stime = %.2f" \ %(utime-utime0, stime-stime0)) utime0, stime0 = cputime() ### for i in range(m): # multiply block row i of S on the left with Li^-1 blas.trsm(H[i], S, m=n, n=(i + 1) * n, ldB=m * n, offsetB=i * n) # multiply block column i of S on the right with Li^-T blas.trsm(H[i], S, side='R', transA='T', m=(m - i) * n, n=n, ldB=m * n, offsetB=i * n * (m * n + 1)) blas.scal(-1.0, S) S[::(m * n + 1)] += 1.0 ### utime, stime = cputime() print("Form S (2): utime = %.2f, stime = %.2f" \ %(utime-utime0, stime-stime0)) utime0, stime0 = cputime() ### # S = L*L' lapack.potrf(S) ### utime, stime = cputime() print("Factor S: utime = %.2f, stime = %.2f" \ %(utime-utime0, stime-stime0)) utime0, stime0 = cputime() ### def f(x, y, z): """ 1. Compute uy = D^-1 * (I + Y * S^-1 * Y') * D^-1 * ( -by + sum_k (Dk^2 - Dk^2 * X * Hk^-1 * X' * Dk^2) * ( bxk + Dk^-2 *bzk ) ) 2. For k = 1, ..., m: uxk = (Dk^2 - Dk^2 * X * Hk^-1 * X' * Dk^2) * (-uy + bxk + Dk^-2 * bzk) 3. Solve for uz d .* uz = ( ux - mat(bz) ) ./ d. Return ux, uy, d .* uz. """ ### utime0, stime0 = cputime() ### # xk := Dk^2 * xk + zk # = Dk^2 * bxk + bzk blas.tbmv(dsq, x, n=N * m, k=0, ldA=1) blas.axpy(z, x) # y := -y + sum_k ( I - Dk^2 * X * Hk^-1 * X' ) * xk # = -y + x*ones - sum_k Dk^2 * X * Hk^-1 * X' * xk # y := -y + x*ones blas.gemv(x, ones, y, alpha=1.0, beta=-1.0) # wnm = X' * x (wnm interpreted as an n x m matrix) blas.gemm(X, x, wnm, m=n, k=N, n=m, transA='T', ldB=N, ldC=n) # wnm[:,k] = Hk \ wnm[:,k] (for wnm as an n x m matrix) for k in range(m): lapack.potrs(H[k], wnm, offsetB=k * n) for k in range(m): # wN = X * wnm[:,k] blas.gemv(X, wnm, wN, offsetx=n * k) # wN = Dk^2 * wN blas.tbmv(dsq[:, k], wN, n=N, k=0, ldA=1) # y := y - wN blas.axpy(wN, y, -1.0) # y = D^-1 * (I + Y * S^-1 * Y') * D^-1 * y # # Y = [Y1 ... Ym ], Yk = D^-1 * Dk^2 * X * Lk^-T. # y := D^-1 * y blas.tbsv(D, y, n=N, k=0, ldA=1) # wnm = Y' * y (interpreted as an Nm vector) # = [ L1^-1 * X' * D1^2 * D^-1 * y; # L2^-1 * X' * D2^2 * D^-1 * y; # ... # Lm^-1 * X' * Dm^2 * D^-1 * y ] for k in range(m): # wN = D^-1 * Dk^2 * y blas.copy(y, wN) blas.tbmv(dsq, wN, n=N, k=0, ldA=1, offsetA=k * N) blas.tbsv(D, wN, n=N, k=0, ldA=1) # wnm[:,k] = X' * wN blas.gemv(X, wN, wnm, trans='T', offsety=k * n) # wnm[:,k] = Lk^-1 * wnm[:,k] blas.trsv(H[k], wnm, offsetx=k * n) # wnm := S^-1 * wnm (an mn-vector) lapack.potrs(S, wnm) # y := y + Y * wnm # = y + D^-1 * [ D1^2 * X * L1^-T ... D2^k * X * Lk^-T] # * wnm for k in range(m): # wnm[:,k] = Lk^-T * wnm[:,k] blas.trsv(H[k], wnm, trans='T', offsetx=k * n) # wN = X * wnm[:,k] blas.gemv(X, wnm, wN, offsetx=k * n) # wN = D^-1 * Dk^2 * wN blas.tbmv(dsq, wN, n=N, k=0, ldA=1, offsetA=k * N) blas.tbsv(D, wN, n=N, k=0, ldA=1) # y += wN blas.axpy(wN, y) # y := D^-1 * y blas.tbsv(D, y, n=N, k=0, ldA=1) # For k = 1, ..., m: # # xk = (I - Dk^2 * X * Hk^-1 * X') * (-Dk^2 * y + xk) # x = x - [ D1^2 * y ... Dm^2 * y] (as an N x m matrix) for k in range(m): blas.copy(y, wN) blas.tbmv(dsq, wN, n=N, k=0, ldA=1, offsetA=k * N) blas.axpy(wN, x, -1.0, offsety=k * N) # wnm = X' * x (as an n x m matrix) blas.gemm(X, x, wnm, transA='T', m=n, n=m, k=N, ldB=N, ldC=n) # wnm[:,k] = Hk^-1 * wnm[:,k] for k in range(m): lapack.potrs(H[k], wnm, offsetB=n * k) for k in range(m): # wN = X * wnm[:,k] blas.gemv(X, wnm, wN, offsetx=k * n) # wN = Dk^2 * wN blas.tbmv(dsq, wN, n=N, k=0, ldA=1, offsetA=k * N) # x[:,k] := x[:,k] - wN blas.axpy(wN, x, -1.0, n=N, offsety=k * N) # z := ( x - z ) ./ d blas.axpy(x, z, -1.0) blas.scal(-1.0, z) blas.tbsv(d, z, n=N * m, k=0, ldA=1) ### utime, stime = cputime() print("Solve: utime = %.2f, stime = %.2f" \ %(utime-utime0, stime-stime0)) ### return f
def trsm(L, B, alpha=1.0, trans='N', nrhs=None, offsetB=0, ldB=None): r""" Solves a triangular system of equations with multiple right-hand sides. Computes .. math:: B &:= \alpha L^{-1} B \text{ if trans is 'N'} \\ B &:= \alpha L^{-T} B \text{ if trans is 'T'} where :math:`L` is a :py:class:`cspmatrix` factor. :param L: :py:class:`cspmatrix` factor :param B: matrix :param alpha: float (default: 1.0) :param trans: 'N' or 'T' (default: 'N') :param nrhs: number of right-hand sides (default: number of columns in :math:`B`) :param offsetB: integer (default: 0) :param ldB: leading dimension of :math:`B` (default: number of rows in :math:`B`) """ assert isinstance( L, cspmatrix) and L.is_factor is True, "L must be a cspmatrix factor" assert isinstance(B, matrix), "B must be a matrix" if ldB is None: ldB = B.size[0] if nrhs is None: nrhs = B.size[1] assert trans in ['N', 'T'] n = L.symb.n snpost = L.symb.snpost snptr = L.symb.snptr snode = L.symb.snode chptr = L.symb.chptr chidx = L.symb.chidx relptr = L.symb.relptr relidx = L.symb.relidx blkptr = L.symb.blkptr blkval = L.blkval p = L.symb.p if p is None: p = range(n) stack = [] if trans is 'N': for k in snpost: nn = snptr[k + 1] - snptr[k] # |Nk| na = relptr[k + 1] - relptr[k] # |Ak| nj = na + nn # extract block from rhs Uk = matrix(0.0, (nj, nrhs)) for j in range(nrhs): for i, ir in enumerate(snode[snptr[k]:snptr[k + 1]]): Uk[i, j] = alpha * B[offsetB + j * ldB + p[ir]] # add contributions from children for _ in range(chptr[k], chptr[k + 1]): Ui, i = stack.pop() r = relidx[relptr[i]:relptr[i + 1]] Uk[r, :] += Ui # if k is not a root node if na > 0: blas.gemm(blkval, Uk, Uk, alpha = -1.0, beta = 1.0, m = na, n = nrhs, k = nn,\ offsetA = blkptr[k]+nn, ldA = nj, offsetC = nn) stack.append((Uk[nn:, :], k)) # scale and copy block to rhs blas.trsm(blkval, Uk, m=nn, n=nrhs, offsetA=blkptr[k], ldA=nj) for j in range(nrhs): for i, ir in enumerate(snode[snptr[k]:snptr[k + 1]]): B[offsetB + j * ldB + p[ir]] = Uk[i, j] else: # trans is 'T' for k in reversed(list(snpost)): nn = snptr[k + 1] - snptr[k] # |Nk| na = relptr[k + 1] - relptr[k] # |Ak| nj = na + nn # extract block from rhs and scale Uk = matrix(0.0, (nj, nrhs)) for j in range(nrhs): for i, ir in enumerate(snode[snptr[k]:snptr[k + 1]]): Uk[i, j] = alpha * B[offsetB + j * ldB + p[ir]] blas.trsm(blkval, Uk, transA='T', m=nn, n=nrhs, offsetA=blkptr[k], ldA=nj) # if k is not a root node if na > 0: Uk[nn:, :] = stack.pop() blas.gemm(blkval, Uk, Uk, alpha = -1.0, beta = 1.0, m = nn, n = nrhs, k = na,\ transA = 'T', offsetA = blkptr[k]+nn, ldA = nj, offsetB = nn) # stack contributions for children for ii in range(chptr[k], chptr[k + 1]): i = chidx[ii] stack.append(Uk[relidx[relptr[i]:relptr[i + 1]], :]) # copy block to rhs for j in range(nrhs): for i, ir in enumerate(snode[snptr[k]:snptr[k + 1]]): B[offsetB + j * ldB + p[ir]] = Uk[i, j] return
def f(x, y, z): """ On entry bx, bz are stored in x, z. On exit x, z contain the solution, with z scaled: z./di is returned instead of z. """ # Maps to our variables x,y,z and t if DEBUG > 0: print "... Computing ..." print "bx = %sbz = %s" % (x.T, z.T), a = [] b = x[n * r:n * r + n] c = [] d = [] for i in range(r): a.append(x[i * n:(i + 1) * n]) c.append(z[i * n:(i + 1) * n]) d.append(z[(i + r) * n:(i + r + 1) * n]) if DEBUG: # Now solves using cvxopt xp = +x zp = +z solve(xp, y, zp) # First phase for i in range(r): blas.trsm(cholK, a[i]) Bai = B * a[i] c[i] = -Bai - c[i] blas.trsm(L22[i], c[i]) d[i] = Bai - L32[i] * c[i] - d[i] blas.trsm(L33[i], d[i]) b = b + L42[i] * c[i] + L43[i] * d[i] blas.trsm(L44, b) # Second phase blas.trsm(L44, b, transA='T') for i in range(r): d[i] = d[i] - L43[i].T * b blas.trsm(L33[i], d[i], transA='T') c[i] = c[i] - L32[i].T * d[i] - L42[i].T * b blas.trsm(L22[i], c[i], transA='T') a[i] = a[i] + B.T * (c[i] - d[i]) blas.trsm(cholK, a[i], transA='T') # Store in vectors and scale x[n * r:n * r + n] = b for i in range(r): x[i * n:(i + 1) * n] = a[i] z[i * n:(i + 1) * n] = c[i] z[(i + r) * n:(i + r + 1) * n] = d[i] z[:] = mul(Wd, z) if DEBUG: print "x = %s" % x.T, print "z = %s" % z.T, print "Delta(x) = %s" % (x - xp).T, print "Delta(z) = %s" % (z - zp).T, delta = blas.nrm2(x - xp) + blas.nrm2(z - zp) if (delta > 1e-8): print "--- DELTA TOO HIGH = %.3e ---" % delta
def f(x, y, z): """ On entry bx, bz are stored in x, z. On exit x, z contain the solution, with z scaled: z./di is returned instead of z. """ # Maps to our variables x,y,z and t if DEBUG > 0: print "... Computing ..." print "bx = %sbz = %s" % (x.T, z.T), a = [] b = x[n*r:n*r + n] c = [] d = [] for i in range(r): a.append(x[i*n:(i+1)*n]) c.append(z[i*n:(i+1)*n]) d.append(z[(i+r)*n:(i+r+1)*n]) if DEBUG: # Now solves using cvxopt xp = +x zp = +z solve(xp,y,zp) # First phase for i in range(r): blas.trsm(cholK, a[i]) Bai = B * a[i] c[i] = - Bai - c[i] blas.trsm(L22[i], c[i]) d[i] = Bai - L32[i] * c[i] - d[i] blas.trsm(L33[i], d[i]) b = b + L42[i] * c[i] + L43[i] * d[i] blas.trsm(L44, b) # Second phase blas.trsm(L44, b, transA='T') for i in range(r): d[i] = d[i] - L43[i].T * b blas.trsm(L33[i], d[i], transA='T') c[i] = c[i] - L32[i].T * d[i] - L42[i].T * b blas.trsm(L22[i], c[i], transA='T') a[i] = a[i] + B.T * (c[i] - d[i]) blas.trsm(cholK, a[i], transA='T') # Store in vectors and scale x[n*r:n*r + n] = b for i in range(r): x[i*n:(i+1)*n] = a[i] z[i*n:(i+1)*n] = c[i] z[(i+r)*n:(i+r+1)*n] = d[i] z[:] = mul( Wd, z) if DEBUG: print "x = %s" % x.T, print "z = %s" % z.T, print "Delta(x) = %s" % (x - xp).T, print "Delta(z) = %s" % (z - zp).T, delta= blas.nrm2(x-xp) + blas.nrm2(z-zp) if (delta > 1e-8): print "--- DELTA TOO HIGH = %.3e ---" % delta
def F(W): """ Returns a function f(x, y, z) that solves the KKT conditions """ U = +W['d'][0:n * r] U = U**2 V = +W['d'][n * r:2 * n * r] V = V**2 Wd = +W['d'] if DEBUG > 0: print "# Computing L22" L22 = [] for i in range(r): BBTi = BBT + spmatrix(U[i * n:(i + 1) * n], range(n), range(n)) cholesky(BBTi) L22.append(BBTi) if DEBUG > 0: print "# Computing L32" L32 = [] for i in range(r): # Solves L32 . L22 = - B . B^\top C = -BBT blas.trsm(L22[i], C, side='R', transA='T') L32.append(C) if DEBUG > 0: print "# Computing L33" L33 = [] for i in range(r): A = spmatrix(V[i * n:(i + 1) * n], range(n), range(n)) + BBT - L32[i] * L32[i].T cholesky(A) L33.append(A) if DEBUG > 0: print "# Computing L42" L42 = [] for i in range(r): A = +L22[i].T lapack.trtri(A, uplo='U') L42.append(A) if DEBUG > 0: print "# Computing L43" L43 = [] for i in range(r): A = id_n - L42[i] * L32[i].T blas.trsm(L33[i], A, side='R', transA='T') L43.append(A) if DEBUG > 0: print "# Computing L44 and D4" # The indices for the diagonal of a dense matrix L44 = matrix(0, (n, n)) for i in range(r): L44 = L44 + L43[i] * L43[i].T + L42[i] * L42[i].T cholesky(L44) # WARNING: y, z and t have been permuted (LD33 and LD44piv) if DEBUG > 0: print "## PRE-COMPUTATION DONE ##" # Checking the decomposition if DEBUG > 1: if DEBUG > 2: mA = [] mB = [] mD = [] for i in range(3 * r + 1): m = [] for j in range(3 * r + 1): m.append(zero_n) mA.append(m) for i in range(r): mA[i][i] = cholK mA[i + r][i + r] = L22[i] mA[i][i + r] = -B mA[i][i + 2 * r] = B mA[i + r][i + 2 * r] = L32[i] mA[i + 2 * r][i + 2 * r] = L33[i] mA[i + r][3 * r] = L42[i] mA[i + 2 * r][3 * r] = L43[i] mD.append(id_n) mA[3 * r][3 * r] = L44 for i in range(2 * r): mD.append(-id_n) mD.append(id_n) printing.options['width'] = 30 mA = sparse(mA) mD = spdiag(mD) print "LL^T =\n%s" % (mA * mD * mA.T), print "P =\n%s" % P, print g print "W = %s" % W["d"].T, print "U^2 = %s" % U.T, print "V^2 = %s" % V.T, print "### Pre-compute for check" solve = chol2(W, P) def f(x, y, z): """ On entry bx, bz are stored in x, z. On exit x, z contain the solution, with z scaled: z./di is returned instead of z. """ # Maps to our variables x,y,z and t if DEBUG > 0: print "... Computing ..." print "bx = %sbz = %s" % (x.T, z.T), a = [] b = x[n * r:n * r + n] c = [] d = [] for i in range(r): a.append(x[i * n:(i + 1) * n]) c.append(z[i * n:(i + 1) * n]) d.append(z[(i + r) * n:(i + r + 1) * n]) if DEBUG: # Now solves using cvxopt xp = +x zp = +z solve(xp, y, zp) # First phase for i in range(r): blas.trsm(cholK, a[i]) Bai = B * a[i] c[i] = -Bai - c[i] blas.trsm(L22[i], c[i]) d[i] = Bai - L32[i] * c[i] - d[i] blas.trsm(L33[i], d[i]) b = b + L42[i] * c[i] + L43[i] * d[i] blas.trsm(L44, b) # Second phase blas.trsm(L44, b, transA='T') for i in range(r): d[i] = d[i] - L43[i].T * b blas.trsm(L33[i], d[i], transA='T') c[i] = c[i] - L32[i].T * d[i] - L42[i].T * b blas.trsm(L22[i], c[i], transA='T') a[i] = a[i] + B.T * (c[i] - d[i]) blas.trsm(cholK, a[i], transA='T') # Store in vectors and scale x[n * r:n * r + n] = b for i in range(r): x[i * n:(i + 1) * n] = a[i] z[i * n:(i + 1) * n] = c[i] z[(i + r) * n:(i + r + 1) * n] = d[i] z[:] = mul(Wd, z) if DEBUG: print "x = %s" % x.T, print "z = %s" % z.T, print "Delta(x) = %s" % (x - xp).T, print "Delta(z) = %s" % (z - zp).T, delta = blas.nrm2(x - xp) + blas.nrm2(z - zp) if (delta > 1e-8): print "--- DELTA TOO HIGH = %.3e ---" % delta return f
pylab.plot(X[:,0], X[:,1], 'ko', X[:,0], X[:,1], '-k') # Ellipsoid in the form { x | || L' * (x-c) ||_2 <= 1 } L = +A lapack.potrf(L) c = +b lapack.potrs(L, c) # 1000 points on the unit circle nopts = 1000 angles = matrix( [ a*2.0*pi/nopts for a in range(nopts) ], (1,nopts) ) circle = matrix(0.0, (2,nopts)) circle[0,:], circle[1,:] = cos(angles), sin(angles) # ellipse = L^-T * circle + c blas.trsm(L, circle, transA='T') ellipse = circle + c[:, nopts*[0]] ellipse2 = 0.5 * circle + c[:, nopts*[0]] pylab.plot(ellipse[0,:].T, ellipse[1,:].T, 'k-') pylab.fill(ellipse2[0,:].T, ellipse2[1,:].T, facecolor = '#F0F0F0') pylab.title('Loewner-John ellipsoid (fig 8.3)') pylab.axis('equal') pylab.axis('off') # Maximum volume enclosed ellipsoid # # minimize -log det B # subject to ||B * gk||_2 + gk'*c <= hk, k=1,...,m #
def trsm(L, B, alpha = 1.0, trans = 'N', nrhs = None, offsetB = 0, ldB = None): r""" Solves a triangular system of equations with multiple right-hand sides. Computes .. math:: B &:= \alpha L^{-1} B \text{ if trans is 'N'} \\ B &:= \alpha L^{-T} B \text{ if trans is 'T'} where :math:`L` is a :py:class:`cspmatrix` factor. :param L: :py:class:`cspmatrix` factor :param B: matrix :param alpha: float (default: 1.0) :param trans: 'N' or 'T' (default: 'N') :param nrhs: number of right-hand sides (default: number of columns in :math:`B`) :param offsetB: integer (default: 0) :param ldB: leading dimension of :math:`B` (default: number of rows in :math:`B`) """ assert isinstance(L, cspmatrix) and L.is_factor is True, "L must be a cspmatrix factor" assert isinstance(B, matrix), "B must be a matrix" if ldB is None: ldB = B.size[0] if nrhs is None: nrhs = B.size[1] assert trans in ['N', 'T'] n = L.symb.n snpost = L.symb.snpost snptr = L.symb.snptr snode = L.symb.snode chptr = L.symb.chptr chidx = L.symb.chidx relptr = L.symb.relptr relidx = L.symb.relidx blkptr = L.symb.blkptr blkval = L.blkval p = L.symb.p if p is None: p = range(n) stack = [] if trans is 'N': for k in snpost: nn = snptr[k+1]-snptr[k] # |Nk| na = relptr[k+1]-relptr[k] # |Ak| nj = na + nn # extract block from rhs Uk = matrix(0.0,(nj,nrhs)) for j in range(nrhs): for i,ir in enumerate(snode[snptr[k]:snptr[k+1]]): Uk[i,j] = alpha*B[offsetB + j*ldB + p[ir]] # add contributions from children for _ in range(chptr[k],chptr[k+1]): Ui, i = stack.pop() r = relidx[relptr[i]:relptr[i+1]] Uk[r,:] += Ui # if k is not a root node if na > 0: blas.gemm(blkval, Uk, Uk, alpha = -1.0, beta = 1.0, m = na, n = nrhs, k = nn,\ offsetA = blkptr[k]+nn, ldA = nj, offsetC = nn) stack.append((Uk[nn:,:],k)) # scale and copy block to rhs blas.trsm(blkval, Uk, m = nn, n = nrhs, offsetA = blkptr[k], ldA = nj) for j in range(nrhs): for i,ir in enumerate(snode[snptr[k]:snptr[k+1]]): B[offsetB + j*ldB + p[ir]] = Uk[i,j] else: # trans is 'T' for k in reversed(list(snpost)): nn = snptr[k+1]-snptr[k] # |Nk| na = relptr[k+1]-relptr[k] # |Ak| nj = na + nn # extract block from rhs and scale Uk = matrix(0.0,(nj,nrhs)) for j in range(nrhs): for i,ir in enumerate(snode[snptr[k]:snptr[k+1]]): Uk[i,j] = alpha*B[offsetB + j*ldB + p[ir]] blas.trsm(blkval, Uk, transA = 'T', m = nn, n = nrhs, offsetA = blkptr[k], ldA = nj) # if k is not a root node if na > 0: Uk[nn:,:] = stack.pop() blas.gemm(blkval, Uk, Uk, alpha = -1.0, beta = 1.0, m = nn, n = nrhs, k = na,\ transA = 'T', offsetA = blkptr[k]+nn, ldA = nj, offsetB = nn) # stack contributions for children for ii in range(chptr[k],chptr[k+1]): i = chidx[ii] stack.append(Uk[relidx[relptr[i]:relptr[i+1]],:]) # copy block to rhs for j in range(nrhs): for i,ir in enumerate(snode[snptr[k]:snptr[k+1]]): B[offsetB + j*ldB + p[ir]] = Uk[i,j] return