def Af(u, v, alpha=1.0, beta=0.0, trans="N"): # v := alpha * A(u) + beta * v if trans is 'N' # v := alpha * A'(u) + beta * v if trans is 'T' blas.scal(beta, v) if trans == "N": sgemv(A, u, v, n=ns, m=ms, alpha=alpha, beta=1.0, trans="T", offsetx=0) elif trans == "T": sgemv(A, u, v, n=ns, m=ms, alpha=alpha, beta=1.0, trans="N", offsetx=0)
def __cngrnc(r, x, alpha = 1.0, trans = 'N', offsetx = 0): """ In-place congruence transformation x := alpha * r * x * r' if trans = 'N'. x := alpha * r' * x * r if trans = 'T'. r is a square n x n-matrix. x is a square n x n-matrix or n^2-vector. """ n = r.size[0] # Scale diagonal of x by 1/2. blas.scal(0.5, x, inc = n+1, offset = offsetx) # a := r*tril(x) if trans is 'N', a := tril(x)*r if trans is 'T'. a = +r if trans == 'N': blas.trmm(x, a, side = 'R', m = n, n = n, ldA = n, ldB = n, offsetA = offsetx) else: blas.trmm(x, a, side = 'L', m = n, n = n, ldA = n, ldB = n, offsetA = offsetx) # x := alpha * (a * r' + r * a') # = alpha * r * (tril(x) + tril(x)') * r' if trans is 'N'. # x := alpha * (a' * r + r' * a) # = alpha * r' * (tril(x)' + tril(x)) * r if trans = 'T'. blas.syr2k(r, a, x, trans = trans, alpha = alpha, n = n, k = n, ldB = n, ldC = n, offsetC = offsetx)
def Af(u, v, alpha=1.0, beta=0.0, trans="N"): # v := alpha * A(u) + beta * v if trans is 'N' # v := alpha * A'(u) + beta * v if trans is 'T' blas.scal(beta, v) if trans == "N": blas.axpy(u, v, alpha=alpha, n=nl / 2) blas.axpy(u, v, alpha=-alpha, offsetx=nl / 2, n=nl / 2) sgemv(A, u, v, n=ns, m=ms, alpha=alpha, beta=1.0, trans="T", offsetx=nl) elif trans == "T": blas.axpy(u, v, alpha=alpha, n=nl / 2) blas.axpy(u, v, alpha=-alpha, offsety=nl / 2, n=nl / 2) sgemv(A, u, v, n=ns, m=ms, alpha=alpha, beta=1.0, trans="N", offsety=nl)
def Gf(u, v, alpha=1.0, beta=0.0, trans='N'): # v = -alpha*u + beta * v # u and v are vectors representing N symmetric matrices in the # cvxopt format. blas.scal(beta, v) blas.axpy(u, v, alpha=-alpha)
def classifier2(Y, soft=False): M = Y.size[0] # K = Y*X' / sigma K = matrix(0.0, (width, M)) blas.gemm(X, Y, K, transB='T', alpha=1.0 / sigma, m=width) # c[i] = ||Yi||^2 / sigma ones = matrix(1.0, (max(width, n, M), 1)) c = Y**2 * ones[:n] blas.scal(1.0 / sigma, c) # Kij := Kij - 0.5 * (ci + aj) # = || yi - xj ||^2 / (2*sigma) blas.ger(ones[:width], c, K, alpha=-0.5) blas.ger(a[:width], ones[:M], K, alpha=-0.5) # Kij = exp(Kij) K = exp(K) # complete K lapack.potrs(L11, K) K = matrix([K, matrix(0., (N - width, M))], (N, M)) chompack.trsm(Lc, K, trans='N') chompack.trsm(Lc, K, trans='T') x = matrix(b, (M, 1)) blas.gemv(K, z, x, trans='T', beta=1.0) if soft: return x else: return matrix([2 * (xk > 0.0) - 1 for xk in x])
def G(u, v, alpha = 1.0, beta = 0.0, trans = 'N'): """ If trans is 'N': v[:msq] := alpha * (A*u[0] - u[1][:]) + beta * v[:msq] v[msq:] := -alpha * u[1][:] + beta * v[msq:]. If trans is 'T': v[0] := alpha * A' * u[:msq] + beta * v[0] v[1][:] := alpha * (-u[:msq] - u[msq:]) + beta * v[1][:]. """ if trans == 'N': blas.gemv(A, u[0], v, alpha = alpha, beta = beta) blas.axpy(u[1], v, alpha = -alpha) blas.scal(beta, v, offset = msq) blas.axpy(u[1], v, alpha = -alpha, offsety = msq) else: misc.sgemv(A, u, v[0], dims = {'l': 0, 'q': [], 's': [m]}, alpha = alpha, beta = beta, trans = 'T') blas.scal(beta, v[1]) blas.axpy(u, v[1], alpha = -alpha, n = msq) blas.axpy(u, v[1], alpha = -alpha, n = msq, offsetx = msq)
def __imul__(self, a): if isinstance(a,float): blas.scal(a, self.blkval) elif isinstance(a, int) or isinstance(a, long): blas.scal(float(a), self.blkval) else: raise NotImplementedError("only scalar multiplication has been implemented") return self
def __imul__(self, a): if isinstance(a, float): blas.scal(a, self.blkval) elif isinstance(a, int) or isinstance(a, long): blas.scal(float(a), self.blkval) else: raise NotImplementedError( "only scalar multiplication has been implemented") return self
def syr2(X, u, v, alpha=1.0, beta=1.0, reordered=False): r""" Computes the projected rank 2 update of a cspmatrix X .. math:: X := \alpha*P(u v^T + v u^T) + \beta X. """ assert X.is_factor is False, "cspmatrix factor object" symb = X.symb n = symb.n snptr = symb.snptr snode = symb.snode blkval = X.blkval blkptr = symb.blkptr relptr = symb.relptr snrowidx = symb.snrowidx sncolptr = symb.sncolptr if symb.p is not None and reordered is False: up = u[symb.p] vp = v[symb.p] else: up = u vp = v for k in range(symb.Nsn): nn = snptr[k + 1] - snptr[k] na = relptr[k + 1] - relptr[k] nj = na + nn for i in range(nn): blas.scal(beta, blkval, n=nj - i, offset=blkptr[k] + (nj + 1) * i) uk = up[snrowidx[sncolptr[k]:sncolptr[k + 1]]] vk = vp[snrowidx[sncolptr[k]:sncolptr[k + 1]]] blas.syr2(uk, vk, blkval, n=nn, offsetA=blkptr[k], ldA=nj, alpha=alpha) blas.ger(uk, vk, blkval, m=na, n=nn, offsetx=nn, offsetA=blkptr[k] + nn, ldA=nj, alpha=alpha) blas.ger(vk, uk, blkval, m=na, n=nn, offsetx=nn, offsetA=blkptr[k] + nn, ldA=nj, alpha=alpha) return
def G(u, vvV, alpha=1.0, beta=0.0, trans="N"): """ v := alpha*[I, -I; -I, -I] * u + beta * v (trans = 'N' or 'T') """ blas.scal(beta, vvV) blas.axpy(u, vvV, n=iC, alpha=alpha) blas.axpy(u, vvV, n=iC, alpha=-alpha, offsetx=iC) blas.axpy(u, vvV, n=iC, alpha=-alpha, offsety=iC) blas.axpy(u, vvV, n=iC, alpha=-alpha, offsetx=iC, offsety=iC)
def G(u, vvV, alpha=1.0, beta=0.0, trans='N'): """ v := alpha*[I, -I; -I, -I] * u + beta * v (trans = 'N' or 'T') """ blas.scal(beta, vvV) blas.axpy(u, vvV, n=iC, alpha=alpha) blas.axpy(u, vvV, n=iC, alpha=-alpha, offsetx=iC) blas.axpy(u, vvV, n=iC, alpha=-alpha, offsety=iC) blas.axpy(u, vvV, n=iC, alpha=-alpha, offsetx=iC, offsety=iC)
def G(u, v, alpha=1.0, beta=0.0, trans='N'): """ v := alpha*[I, -I; -I, -I] * u + beta * v (trans = 'N' or 'T') """ blas.scal(beta, v) blas.axpy(u, v, n=n, alpha=alpha) blas.axpy(u, v, n=n, alpha=-alpha, offsetx=n) blas.axpy(u, v, n=n, alpha=-alpha, offsety=n) blas.axpy(u, v, n=n, alpha=-alpha, offsetx=n, offsety=n)
def G(u, v, alpha = 1.0, beta = 0.0, trans = 'N'): """ v := alpha*[I, -I; -I, -I] * u + beta * v (trans = 'N' or 'T') """ blas.scal(beta, v) blas.axpy(u, v, n = n, alpha = alpha) blas.axpy(u, v, n = n, alpha = -alpha, offsetx = n) blas.axpy(u, v, n = n, alpha = -alpha, offsety = n) blas.axpy(u, v, n = n, alpha = -alpha, offsetx = n, offsety = n)
def factor(W, H = None, Df = None): blas.scal(0.0, K) if H is not None: K[:n, :n] = H K[n:n+p, :n] = A for k in range(n): if mnl: g[:mnl] = Df[:,k] g[mnl:] = G[:,k] scale(g, W, trans = 'T', inverse = 'I') pack(g, K, dims, mnl, offsety = k*ldK + n + p) K[(ldK+1)*(p+n) :: ldK+1] = -1.0 # Add positive regularization in 1x1 block and negative in 2x2 block. blas.copy(K, Ktilde) Ktilde[0 : (ldK+1)*n : ldK+1] += EPSILON Ktilde[(ldK+1)*n :: ldK+1] += -EPSILON lapack.sytrf(Ktilde, ipiv) def solve(x, y, z): # Solve # # [ H A' GG'*W^{-1} ] [ ux ] [ bx ] # [ A 0 0 ] * [ uy [ = [ by ] # [ W^{-T}*GG 0 -I ] [ W*uz ] [ W^{-T}*bz ] # # and return ux, uy, W*uz. # # On entry, x, y, z contain bx, by, bz. On exit, they contain # the solution ux, uy, W*uz. blas.scal(0.0, sltn) blas.copy(x, u) blas.copy(y, u, offsety = n) scale(z, W, trans = 'T', inverse = 'I') pack(z, u, dims, mnl, offsety = n + p) blas.copy(u, r) # Iterative refinement algorithm: # Init: sltn = 0, r_0 = [bx; by; W^{-T}*bz] # 1. u_k = Ktilde^-1 * r_k # 2. sltn += u_k # 3. r_k+1 = r - K*sltn # Repeat until exceed MAX_ITER iterations or ||r|| <= ERROR_BOUND iteration = 0 resid_norm = 1 while iteration <= MAX_ITER and resid_norm > ERROR_BOUND: lapack.sytrs(Ktilde, ipiv, u) blas.axpy(u, sltn, alpha = 1.0) blas.copy(r, u) blas.symv(K, sltn, u, alpha = -1.0, beta = 1.0) resid_norm = math.sqrt(blas.dot(u, u)) iteration += 1 blas.copy(sltn, x, n = n) blas.copy(sltn, y, offsetx = n, n = p) unpack(sltn, z, dims, mnl, offsetx = n + p) return solve
def Fs(x, y, alpha = 1.0, beta = 0.0, trans = 'N'): """ y := alpha*(-diag(x)) + beta*y. """ if trans=='N': # x is a vector; y is a matrix. blas.scal(beta, y) blas.axpy(x, y, alpha = -alpha, incy = n+1) else: # x is a matrix; y is a vector. blas.scal(beta, y) blas.axpy(x, y, alpha = -alpha, incx = n+1)
def Fs(x, y, alpha=1.0, beta=0.0, trans='N'): """ y := alpha*(-diag(x)) + beta*y. """ if trans == 'N': # x is a vector; y is a matrix. blas.scal(beta, y) blas.axpy(x, y, alpha=-alpha, incy=n + 1) else: # x is a matrix; y is a vector. blas.scal(beta, y) blas.axpy(x, y, alpha=-alpha, incx=n + 1)
def Fgp(x = None, z = None): if x is None: return mnl, matrix(0.0, (n,1)) f = matrix(0.0, (mnl+1,1)) Df = matrix(0.0, (mnl+1,n)) # y = F*x+g blas.copy(g, y) base.gemv(F, x, y, beta=1.0) if z is not None: H = matrix(0.0, (n,n)) for i, start, stop in ind: # yi := exp(yi) = exp(Fi*x+gi) ymax = max(y[start:stop]) y[start:stop] = base.exp(y[start:stop] - ymax) # fi = log sum yi = log sum exp(Fi*x+gi) ysum = blas.asum(y, n=stop-start, offset=start) f[i] = ymax + math.log(ysum) # yi := yi / sum(yi) = exp(Fi*x+gi) / sum(exp(Fi*x+gi)) blas.scal(1.0/ysum, y, n=stop-start, offset=start) # gradfi := Fi' * yi # = Fi' * exp(Fi*x+gi) / sum(exp(Fi*x+gi)) base.gemv(F, y, Df, trans='T', m=stop-start, incy=mnl+1, offsetA=start, offsetx=start, offsety=i) if z is not None: # Hi = Fi' * (diag(yi) - yi*yi') * Fi # = Fisc' * Fisc # where # Fisc = diag(yi)^1/2 * (I - 1*yi') * Fi # = diag(yi)^1/2 * (Fi - 1*gradfi') Fsc[:K[i], :] = F[start:stop, :] for k in range(start,stop): blas.axpy(Df, Fsc, n=n, alpha=-1.0, incx=mnl+1, incy=Fsc.size[0], offsetx=i, offsety=k-start) blas.scal(math.sqrt(y[k]), Fsc, inc=Fsc.size[0], offset=k-start) # H += z[i]*Hi = z[i] * Fisc' * Fisc blas.syrk(Fsc, H, trans='T', k=stop-start, alpha=z[i], beta=1.0) if z is None: return f, Df return f, Df, H
def G(x, y, alpha=1.0, beta=0.0, trans='N'): """ If trans is 'N', x is an N x m matrix, and y is an N*m-vector. y := alpha * x[:] + beta * y. If trans is 'T', x is an N*m vector, and y is an N x m matrix. y[:] := alpha * x + beta * y[:]. """ blas.scal(beta, y) blas.axpy(x, y, alpha)
def A(x, y, alpha=1.0, beta=0.0, trans='N'): """ If trans is 'N', x is an N x m matrix and y an N-vector. y := alpha * x * 1_m + beta y. If trans is 'T', x is an N vector and y an N x m matrix. y := alpha * x * 1_m' + beta y. """ if trans == 'N': blas.gemv(x, ones, y, alpha=alpha, beta=beta) else: blas.scal(beta, y) blas.ger(x, ones, y, alpha=alpha)
def g(x, y, z): # Solve # # [ K d3 ] [ ux_y ] # [ ] [ ] = # [ d3' 1'*d3 ] [ ux_b ] # # [ bx_y ] [ D ] # [ ] - [ ] * D3 * (D2 * bx_v + bx_z - bx_w). # [ bx_b ] [ d' ] x[:N] -= mul(d, mul(d3, mul(d2, x[-N:]) + z[:N] - z[-N:])) x[N] -= blas.dot(d, mul(d3, mul(d2, x[-N:]) + z[:N] - z[-N:])) # Solve dy1 := K^-1 * x[:N] blas.copy(x, dy1, n=N) chompack.trsm(L, dy1, trans='N') chompack.trsm(L, dy1, trans='T') # Find ux_y = dy1 - ux_b * dy2 s.t # # d3' * ( dy1 - ux_b * dy2 + ux_b ) = x[N] # # i.e. x[N] := ( x[N] - d3'* dy1 ) / ( d3'* ( 1 - dy2 ) ). x[N] = ( x[N] - blas.dot(d3, dy1) ) / \ ( blas.asum(d3) - blas.dot(d3, dy2) ) x[:N] = dy1 - x[N] * dy2 # ux_v = D4 * ( bx_v - D1^-1 (bz_z + D * (ux_y + ux_b)) # - D2^-1 * bz_w ) x[-N:] = mul( d4, x[-N:] - div(z[:N] + mul(d, x[:N] + x[N]), d1) - div(z[N:], d2)) # uz_z = - D1^-1 * ( bx_z - D * ( ux_y + ux_b ) - ux_v ) # uz_w = - D2^-1 * ( bx_w - uz_w ) z[:N] += base.mul(d, x[:N] + x[N]) + x[-N:] z[-N:] += x[-N:] blas.scal(-1.0, z) # Return W['di'] * uz blas.tbmv(W['di'], z, n=2 * N, k=0, ldA=1)
def f(x, y, z): # z := mat(z) # = mat(bz) z.size = N, m # x := x + D .* z # = bx + mat(bz) ./ d.^2 x += mul(D, z) # y := y - sum_k (Q + Dk)^-1 * X[:,k] # = by - sum_k (Q + Dk)^-1 * (bxk + Dk * bzk) for k in range(m): blas.symv(H[k], x[:, k], y, alpha=-1.0, beta=1.0) # y := H^-1 * y # = -uy lapack.potrs(S, y) # x[:,k] := H[k] * (x[:,k] + y) # = (Q + Dk)^-1 * (bxk + bzk ./ d.^2 + y) # = ux[:,k] w = matrix(0.0, (N, 1)) for k in range(m): # x[:,k] := x[:,k] + y blas.axpy(y, x, offsety=N * k, n=N) # w := H[k] * x[:,k] # = (Q + Dk)^-1 * (bxk + bzk ./ d.^2 + y) blas.symv(H[k], x, w, offsetx=N * k) # x[:,k] := w # = ux[:,k] blas.copy(w, x, offsety=N * k) # y := -y # = uy blas.scal(-1.0, y) # z := (x - z) ./ d blas.axpy(x, z, -1.0) blas.tbsv(W['d'], z, n=m * N, k=0, ldA=1) blas.scal(-1.0, z) z.size = N * m, 1
def classifier(Y, soft=False): M = Y.size[0] # K = Y*X' / sigma K = matrix(0.0, (M, Nr)) blas.gemm(Y, Xr, K, transB='T', alpha=1.0 / sigma) # c[i] = ||Yi||^2 / sigma ones = matrix(1.0, (max([M, Nr, n]), 1)) c = Y**2 * ones[:n] blas.scal(1.0 / sigma, c) # Kij := Kij - 0.5 * (ci + aj) # = || yi - xj ||^2 / (2*sigma) blas.ger(c, ones, K, alpha=-0.5) blas.ger(ones, a[sv], K, alpha=-0.5) x = exp(K) * zr + b if soft: return x else: return matrix([2 * (xk > 0.0) - 1 for xk in x])
def Af(u, v, alpha=1.0, beta=0.0, trans='N'): # v := alpha * A(u) + beta * v if trans is 'N' # v := alpha * A'(u) + beta * v if trans is 'T' blas.scal(beta, v) if trans == 'N': offseti = 0 for i in xrange(M): offsetj = 0 for j in xrange(N): if type(A[i][j]) is matrix or type(A[i][j]) is spmatrix: sgemv(A[i][j], u, v, n=ns[j], m=ms[i], trans='T', alpha=alpha, beta=1.0, offsetx=offsetj, offsety=offseti) offsetj += ns[j]**2 offseti += ms[i] else: offsetj = 0 for j in xrange(N): offseti = 0 for i in xrange(M): if type(A[i][j]) is matrix or type(A[i][j]) is spmatrix: sgemv(A[i][j], u, v, n=ns[j], m=ms[i], trans='N', alpha=alpha, beta=1.0, offsetx=offseti, offsety=offsetj) offseti += ms[i] offsetj += ns[j]**2
def Gf(u, v, alpha = 1.0, beta = 0.0, trans = 'N'): if trans == 'N': # v[:m] := alpha * G * u[0] + beta * v[:m] base.gemv(G, u[0], v, alpha = alpha, beta = beta) # v[m:] := alpha * [-u[1], -A(u[0])'; -A(u[0]), -u[2]] # + beta * v[m:] blas.scal(beta, v, offset = m) v[m + I11] -= alpha * u[1][:] v[m + I21] -= alpha * A * u[0] v[m + I22] -= alpha * u[2][:] else: # v[0] := alpha * ( G.T * u[:m] - 2.0 * A.T * u[m + I21] ) # + beta v[1] base.gemv(G, u, v[0], trans = 'T', alpha = alpha, beta = beta) base.gemv(A, u[m + I21], v[0], trans = 'T', alpha = -2.0*alpha, beta = 1.0) # v[1] := -alpha * u[m + I11] + beta * v[1] blas.scal(beta, v[1]) blas.axpy(u[m + I11], v[1], alpha = -alpha) # v[2] := -alpha * u[m + I22] + beta * v[2] blas.scal(beta, v[2]) blas.axpy(u[m + I22], v[2], alpha = -alpha)
def Af(u, v, alpha = 1.0, beta = 0.0, trans = 'N'): if trans == 'N': pass else: blas.scal(beta, v[0]) blas.scal(beta, v[1]) blas.scal(beta, v[2])
def syr2(X, u, v, alpha = 1.0, beta = 1.0, reordered=False): r""" Computes the projected rank 2 update of a cspmatrix X .. math:: X := \alpha*P(u v^T + v u^T) + \beta X. """ assert X.is_factor is False, "cspmatrix factor object" symb = X.symb n = symb.n snptr = symb.snptr snode = symb.snode blkval = X.blkval blkptr = symb.blkptr relptr = symb.relptr snrowidx = symb.snrowidx sncolptr = symb.sncolptr if symb.p is not None and reordered is False: up = u[symb.p] vp = v[symb.p] else: up = u vp = v for k in range(symb.Nsn): nn = snptr[k+1]-snptr[k] na = relptr[k+1]-relptr[k] nj = na + nn for i in range(nn): blas.scal(beta, blkval, n = nj-i, offset = blkptr[k]+(nj+1)*i) uk = up[snrowidx[sncolptr[k]:sncolptr[k+1]]] vk = vp[snrowidx[sncolptr[k]:sncolptr[k+1]]] blas.syr2(uk, vk, blkval, n = nn, offsetA = blkptr[k], ldA = nj, alpha = alpha) blas.ger(uk, vk, blkval, m = na, n = nn, offsetx = nn, offsetA = blkptr[k]+nn, ldA = nj, alpha = alpha) blas.ger(vk, uk, blkval, m = na, n = nn, offsetx = nn, offsetA = blkptr[k]+nn, ldA = nj, alpha = alpha) return
def Gf(u, v, alpha=1.0, beta=0.0, trans='N'): if trans == 'N': # v[:m] := alpha * G * u[0] + beta * v[:m] base.gemv(G, u[0], v, alpha=alpha, beta=beta) # v[m:] := alpha * [-u[1], -A(u[0])'; -A(u[0]), -u[2]] # + beta * v[m:] blas.scal(beta, v, offset=m) v[m + I11] -= alpha * u[1][:] v[m + I21] -= alpha * A * u[0] v[m + I22] -= alpha * u[2][:] else: # v[0] := alpha * ( G.T * u[:m] - 2.0 * A.T * u[m + I21] ) # + beta v[1] base.gemv(G, u, v[0], trans='T', alpha=alpha, beta=beta) base.gemv(A, u[m + I21], v[0], trans='T', alpha=-2.0 * alpha, beta=1.0) # v[1] := -alpha * u[m + I11] + beta * v[1] blas.scal(beta, v[1]) blas.axpy(u[m + I11], v[1], alpha=-alpha) # v[2] := -alpha * u[m + I22] + beta * v[2] blas.scal(beta, v[2]) blas.axpy(u[m + I22], v[2], alpha=-alpha)
def Af(u, v, alpha=1.0, beta=0.0, trans='N'): if trans == 'N': pass else: blas.scal(beta, v[0]) blas.scal(beta, v[1]) blas.scal(beta, v[2])
def balanced_cut_conelp(A, N, delta=1.0, cut='min'): """ Generates problem data for semidefinite relaxation of the following balance-constrained min/max cut problem min./max. -Tr(AX) subject to X{ii} == 1, i = 1,...,n sum(X) + x == delta^2 x >= 0, X p.s.d. """ c = matrix([matrix(-1.0, (N, 1)), -delta**2]) h = matrix([0.0, matrix(A.toarray(), (N**2, 1))], (N**2 + 1, 1)) if cut == 'min': blas.scal(-1.0, h) elif not cut == 'max': raise ValueError('cut must be "min" or "max"') G = sparse([[ spmatrix(1.0, [1 + i * (N + 1) for i in range(N)], range(N), (N**2 + 1, N)) ], [matrix(1.0, (N**2 + 1, 1))]]) dims = {'l': 1, 'q': [], 's': [N]} return c, G, h, dims
def factor(W, H=None, Df=None): blas.scal(0.0, K) if H is not None: K[:n, :n] = H K[n:n+p, :n] = A for k in range(n): if mnl: g[:mnl] = Df[:, k] g[mnl:] = G[:, k] scale(g, W, trans='T', inverse='I') pack(g, K, dims, mnl, offsety=k*ldK + n + p) K[(ldK+1)*(p+n):: ldK+1] = -1.0 # Add positive regularization in 1x1 block and negative in 2x2 block. K[0: (ldK+1)*n: ldK+1] += REG_EPS K[(ldK+1)*n:: ldK+1] += -REG_EPS lapack.sytrf(K, ipiv) def solve(x, y, z): # Solve # # [ H A' GG'*W^{-1} ] [ ux ] [ bx ] # [ A 0 0 ] * [ uy [ = [ by ] # [ W^{-T}*GG 0 -I ] [ W*uz ] [ W^{-T}*bz ] # # and return ux, uy, W*uz. # # On entry, x, y, z contain bx, by, bz. On exit, they contain # the solution ux, uy, W*uz. blas.copy(x, u) blas.copy(y, u, offsety=n) scale(z, W, trans='T', inverse='I') pack(z, u, dims, mnl, offsety=n + p) lapack.sytrs(K, ipiv, u) blas.copy(u, x, n=n) blas.copy(u, y, offsetx=n, n=p) unpack(u, z, dims, mnl, offsetx=n + p) return solve
def __cngrnc(r, x, alpha=1.0, trans='N', offsetx=0): """ In-place congruence transformation x := alpha * r * x * r' if trans = 'N'. x := alpha * r' * x * r if trans = 'T'. r is a square n x n-matrix. x is a square n x n-matrix or n^2-vector. """ n = r.size[0] # Scale diagonal of x by 1/2. blas.scal(0.5, x, inc=n + 1, offset=offsetx) # a := r*tril(x) if trans is 'N', a := tril(x)*r if trans is 'T'. a = +r if trans == 'N': blas.trmm(x, a, side='R', m=n, n=n, ldA=n, ldB=n, offsetA=offsetx) else: blas.trmm(x, a, side='L', m=n, n=n, ldA=n, ldB=n, offsetA=offsetx) # x := alpha * (a * r' + r * a') # = alpha * r * (tril(x) + tril(x)') * r' if trans is 'N'. # x := alpha * (a' * r + r' * a) # = alpha * r' * (tril(x)' + tril(x)) * r if trans = 'T'. blas.syr2k(r, a, x, trans=trans, alpha=alpha, n=n, k=n, ldB=n, ldC=n, offsetC=offsetx)
def G(u, v, alpha=1.0, beta=0.0, trans='N'): """ If trans is 'N': v[:msq] := alpha * (A*u[0] - u[1][:]) + beta * v[:msq] v[msq:] := -alpha * u[1][:] + beta * v[msq:]. If trans is 'T': v[0] := alpha * A' * u[:msq] + beta * v[0] v[1][:] := alpha * (-u[:msq] - u[msq:]) + beta * v[1][:]. """ if trans == 'N': blas.gemv(A, u[0], v, alpha=alpha, beta=beta) blas.axpy(u[1], v, alpha=-alpha) blas.scal(beta, v, offset=msq) blas.axpy(u[1], v, alpha=-alpha, offsety=msq) else: misc.sgemv(A, u, v[0], dims={ 'l': 0, 'q': [], 's': [m] }, alpha=alpha, beta=beta, trans='T') blas.scal(beta, v[1]) blas.axpy(u, v[1], alpha=-alpha, n=msq) blas.axpy(u, v[1], alpha=-alpha, n=msq, offsetx=msq)
def fsolve(x, y, z): """ Solves the system of equations [ 0 G'*W^{-1} ] [ ux ] = [ bx ] [ G -W' ] [ uz ] [ bz ] """ # Compute bx := bx + G'*W^{-1}*W^{-T}*bz v = matrix(0., (N, 1)) for i in range(N): blas.symv(z, rr, v, ldA=N, offsetA=1, n=N, offsetx=N * i) x[i] += blas.dot(rr, v, n=N, offsetx=N * i) blas.symv(z, q, v, ldA=N, offsetA=1, n=N) x[N] += blas.dot(q, v) + z[0] * W['di'][0]**2 # Solve G'*W^{-1}*W^{-T}*G*ux = bx lapack.potrs(H, x) # Compute bz := -W^{-T}*(bz-G*ux) # z -= G*x z[1::N + 1] -= x[:-1] z -= x[-1] # Apply scaling z[0] *= -W['di'][0] blas.scal(0.5, z, n=N, offset=1, inc=N + 1) tmp = +r blas.trmm(z, tmp, ldA=N, offsetA=1, n=N, m=N) blas.syr2k(r, tmp, z, trans='T', offsetC=1, ldC=N, n=N, k=N, alpha=-1.0)
def solve(x, y, z): # Solve # # [ H A' GG'*W^{-1} ] [ ux ] [ bx ] # [ A 0 0 ] * [ uy [ = [ by ] # [ W^{-T}*GG 0 -I ] [ W*uz ] [ W^{-T}*bz ] # # and return ux, uy, W*uz. # # On entry, x, y, z contain bx, by, bz. On exit, they contain # the solution ux, uy, W*uz. blas.scal(0.0, sltn) blas.copy(x, u) blas.copy(y, u, offsety = n) scale(z, W, trans = 'T', inverse = 'I') pack(z, u, dims, mnl, offsety = n + p) blas.copy(u, r) # Iterative refinement algorithm: # Init: sltn = 0, r_0 = [bx; by; W^{-T}*bz] # 1. u_k = Ktilde^-1 * r_k # 2. sltn += u_k # 3. r_k+1 = r - K*sltn # Repeat until exceed MAX_ITER iterations or ||r|| <= ERROR_BOUND iteration = 0 resid_norm = 1 while iteration <= MAX_ITER and resid_norm > ERROR_BOUND: lapack.sytrs(Ktilde, ipiv, u) blas.axpy(u, sltn, alpha = 1.0) blas.copy(r, u) blas.symv(K, sltn, u, alpha = -1.0, beta = 1.0) resid_norm = math.sqrt(blas.dot(u, u)) iteration += 1 blas.copy(sltn, x, n = n) blas.copy(sltn, y, offsetx = n, n = p) unpack(sltn, z, dims, mnl, offsetx = n + p)
def kernel_matrix(X, kernel, sigma=1.0, theta=1.0, degree=1, V=None, width=None): """ Computes the kernel matrix or a partial kernel matrix. Input arguments. X is an N x n matrix. kernel is a string with values 'linear', 'rfb', 'poly', or 'tanh'. 'linear': k(u,v) = u'*v/sigma. 'rbf': k(u,v) = exp(-||u - v||^2 / (2*sigma)). 'poly': k(u,v) = (u'*v/sigma)**degree. 'tanh': k(u,v) = tanh(u'*v/sigma - theta). kernel is a sigma and theta are positive numbers. degree is a positive integer. V is an N x N sparse matrix (default is None). width is a positive integer (default is None). Output. Q, an N x N matrix or sparse matrix. If V is a sparse matrix, a partial kernel matrix with the sparsity pattern V is returned. If width is specified and V = 'band', a partial kernel matrix with band sparsity is returned (width is the half-bandwidth). a, an N x 1 matrix with the products <xi,xi>/sigma. """ N, n = X.size #### dense (full) kernel matrix if V is None: if verbose: print("building kernel matrix ..") # Qij = xi'*xj / sigma Q = matrix(0.0, (N, N)) blas.syrk(X, Q, alpha=1.0 / sigma) a = Q[::N + 1] # ai = ||xi||**2 / sigma if kernel == 'linear': pass elif kernel == 'rbf': # Qij := Qij - 0.5 * ( ai + aj ) # = -||xi - xj||^2 / (2*sigma) ones = matrix(1.0, (N, 1)) blas.syr2(a, ones, Q, alpha=-0.5) Q = exp(Q) elif kernel == 'tanh': Q = exp(Q - theta) Q = div(Q - Q**-1, Q + Q**-1) elif kernel == 'poly': Q = Q**degree else: raise ValueError('invalid kernel type') #### general sparse partial kernel matrix elif type(V) is cvxopt.base.spmatrix: if verbose: print("building projected kernel matrix ...") Q = +V base.syrk(X, Q, partial=True, alpha=1.0 / sigma) # ai = ||xi||**2 / sigma a = matrix(Q[::N + 1], (N, 1)) if kernel == 'linear': pass elif kernel == 'rbf': ones = matrix(1.0, (N, 1)) # Qij := Qij - 0.5 * ( ai + aj ) # = -||xi - xj||^2 / (2*sigma) p = chompack.maxcardsearch(V) symb = chompack.symbolic(Q, p) Qc = chompack.cspmatrix(symb) + Q chompack.syr2(Qc, a, ones, alpha=-0.5) Q = Qc.spmatrix(reordered=False) Q.V = exp(Q.V) elif kernel == 'tanh': v = +Q.V v = exp(v - theta) v = div(v - v**-1, v + v**-1) Q.V = v elif kernel == 'poly': Q.V = Q.V**degree else: raise ValueError('invalid kernel type') #### banded partial kernel matrix elif V == 'band' and width is not None: # Lower triangular part of band matrix with bandwidth 2*w+1. if verbose: print("building projected kernel matrix ...") I = [i for k in range(N) for i in range(k, min(width + k + 1, N))] J = [k for k in range(N) for i in range(min(width + 1, N - k))] V = matrix(0.0, (len(I), 1)) oy = 0 for k in range(N): # V[:,k] = Xtrain[k:k+w, :] * Xtrain[k,:].T m = min(width + 1, N - k) blas.gemv(X, X, V, m=m, ldA=N, incx=N, offsetA=k, offsetx=k, offsety=oy) oy += m blas.scal(1.0 / sigma, V) # ai = ||xi||**2 / sigma a = matrix(V[[i for i in range(len(I)) if I[i] == J[i]]], (N, 1)) if kernel == 'linear': Q = spmatrix(V, I, J, (N, N)) elif kernel == 'rbf': Q = spmatrix(V, I, J, (N, N)) ones = matrix(1.0, (N, 1)) # Qij := Qij - 0.5 * ( ai + aj ) # = -||xi - xj||^2 / (2*sigma) symb = chompack.symbolic(Q) Qc = chompack.cspmatrix(symb) + Q chompack.syr2(Qc, a, ones, alpha=-0.5) Q = Qc.spmatrix(reordered=False) Q.V = exp(Q.V) elif kernel == 'tanh': V = exp(V - theta) V = div(V - V**-1, V + V**-1) Q = spmatrix(V, I, J, (N, N)) elif kernel == 'poly': Q = spmatrix(V**degree, I, J, (N, N)) else: raise ValueError('invalid kernel type') else: raise TypeError('invalid type V') return Q, a
def f(x, y, z): """ Solve C * ux + G' * uzl - 2*A'(uzs21) = bx -uzs11 = bX1 -uzs22 = bX2 G * ux - D^2 * uzl = bzl [ -uX1 -A(ux)' ] [ uzs11 uzs21' ] [ ] - T * [ ] * T = bzs. [ -A(ux) -uX2 ] [ uzs21 uzs22 ] On entry, x = (bx, bX1, bX2) and z = [ bzl; bzs[:] ]. On exit, x = (ux, uX1, uX2) and z = [ D*uzl; (r'*uzs*r)[:] ]. Define X = uzs21, Z = T * uzs * T: C * ux + G' * uzl - 2*A'(X) = bx [ 0 X' ] [ bX1 0 ] T * [ ] * T - Z = T * [ ] * T [ X 0 ] [ 0 bX2 ] G * ux - D^2 * uzl = bzl [ -uX1 -A(ux)' ] [ Z11 Z21' ] [ ] - [ ] = bzs [ -A(ux) -uX2 ] [ Z21 Z22 ] Return x = (ux, uX1, uX2), z = [ D*uzl; (rti'*Z*rti)[:] ]. We use the congruence transformation [ V1 0 ] [ T11 T21' ] [ V1' 0 ] [ I S' ] [ ] [ ] [ ] = [ ] [ 0 V2' ] [ T21 T22 ] [ 0 V2 ] [ S I ] and the factorization X + S * X' * S = L( L'(X) ) to write this as C * ux + G' * uzl - 2*A'(X) = bx L'(V2^-1 * X * V1^-1) - L^-1(V2' * Z21 * V1') = bX G * ux - D^2 * uzl = bzl [ -uX1 -A(ux)' ] [ Z11 Z21' ] [ ] - [ ] = bzs, [ -A(ux) -uX2 ] [ Z21 Z22 ] or C * ux + Gs' * uuzl - 2*As'(XX) = bx XX - ZZ21 = bX Gs * ux - uuzl = D^-1 * bzl -As(ux) - ZZ21 = bbzs_21 -uX1 - Z11 = bzs_11 -uX2 - Z22 = bzs_22 if we introduce scaled variables uuzl = D * uzl XX = L'(V2^-1 * X * V1^-1) = L'(V2^-1 * uzs21 * V1^-1) ZZ21 = L^-1(V2' * Z21 * V1') and define bbzs_21 = L^-1(V2' * bzs_21 * V1') [ bX1 0 ] bX = L^-1( V2' * (T * [ ] * T)_21 * V1'). [ 0 bX2 ] Eliminating Z21 gives C * ux + Gs' * uuzl - 2*As'(XX) = bx Gs * ux - uuzl = D^-1 * bzl -As(ux) - XX = bbzs_21 - bX -uX1 - Z11 = bzs_11 -uX2 - Z22 = bzs_22 and eliminating uuzl and XX gives H * ux = bx + Gs' * D^-1 * bzl + 2*As'(bX - bbzs_21) Gs * ux - uuzl = D^-1 * bzl -As(ux) - XX = bbzs_21 - bX -uX1 - Z11 = bzs_11 -uX2 - Z22 = bzs_22. In summary, we can use the following algorithm: 1. bXX := bX - bbzs21 [ bX1 0 ] = L^-1( V2' * ((T * [ ] * T)_21 - bzs_21) * V1') [ 0 bX2 ] 2. Solve H * ux = bx + Gs' * D^-1 * bzl + 2*As'(bXX). 3. From ux, compute uuzl = Gs*ux - D^-1 * bzl and X = V2 * L^-T(-As(ux) + bXX) * V1. 4. Return ux, uuzl, rti' * Z * rti = r' * [ -bX1, X'; X, -bX2 ] * r and uX1 = -Z11 - bzs_11, uX2 = -Z22 - bzs_22. """ # Save bzs_11, bzs_22, bzs_21. lapack.lacpy(z, bz11, uplo = 'L', m = q, n = q, ldA = p+q, offsetA = m) lapack.lacpy(z, bz21, m = p, n = q, ldA = p+q, offsetA = m+q) lapack.lacpy(z, bz22, uplo = 'L', m = p, n = p, ldA = p+q, offsetA = m + (p+q+1)*q) # zl := D^-1 * zl # = D^-1 * bzl blas.tbmv(W['di'], z, n = m, k = 0, ldA = 1) # zs := r' * [ bX1, 0; 0, bX2 ] * r. # zs := [ bX1, 0; 0, bX2 ] blas.scal(0.0, z, offset = m) lapack.lacpy(x[1], z, uplo = 'L', m = q, n = q, ldB = p+q, offsetB = m) lapack.lacpy(x[2], z, uplo = 'L', m = p, n = p, ldB = p+q, offsetB = m + (p+q+1)*q) # scale diagonal of zs by 1/2 blas.scal(0.5, z, inc = p+q+1, offset = m) # a := tril(zs)*r blas.copy(r, a) blas.trmm(z, a, side = 'L', m = p+q, n = p+q, ldA = p+q, ldB = p+q, offsetA = m) # zs := a'*r + r'*a blas.syr2k(r, a, z, trans = 'T', n = p+q, k = p+q, ldB = p+q, ldC = p+q, offsetC = m) # bz21 := L^-1( V2' * ((r * zs * r')_21 - bz21) * V1') # # [ bX1 0 ] # = L^-1( V2' * ((T * [ ] * T)_21 - bz21) * V1'). # [ 0 bX2 ] # a = [ r21 r22 ] * z # = [ r21 r22 ] * r' * [ bX1, 0; 0, bX2 ] * r # = [ T21 T22 ] * [ bX1, 0; 0, bX2 ] * r blas.symm(z, r, a, side = 'R', m = p, n = p+q, ldA = p+q, ldC = p+q, offsetB = q) # bz21 := -bz21 + a * [ r11, r12 ]' # = -bz21 + (T * [ bX1, 0; 0, bX2 ] * T)_21 blas.gemm(a, r, bz21, transB = 'T', m = p, n = q, k = p+q, beta = -1.0, ldA = p+q, ldC = p) # bz21 := V2' * bz21 * V1' # = V2' * (-bz21 + (T*[bX1, 0; 0, bX2]*T)_21) * V1' blas.gemm(V2, bz21, tmp, transA = 'T', m = p, n = q, k = p, ldB = p) blas.gemm(tmp, V1, bz21, transB = 'T', m = p, n = q, k = q, ldC = p) # bz21[:] := D * (I-P) * bz21[:] # = L^-1 * bz21[:] # = bXX[:] blas.copy(bz21, tmp) base.gemv(P, bz21, tmp, alpha = -1.0, beta = 1.0) base.gemv(D, tmp, bz21) # Solve H * ux = bx + Gs' * D^-1 * bzl + 2*As'(bXX). # x[0] := x[0] + Gs'*zl + 2*As'(bz21) # = bx + G' * D^-1 * bzl + 2 * As'(bXX) blas.gemv(Gs, z, x[0], trans = 'T', alpha = 1.0, beta = 1.0) blas.gemv(As, bz21, x[0], trans = 'T', alpha = 2.0, beta = 1.0) # x[0] := H \ x[0] # = ux lapack.potrs(H, x[0]) # uuzl = Gs*ux - D^-1 * bzl blas.gemv(Gs, x[0], z, alpha = 1.0, beta = -1.0) # bz21 := V2 * L^-T(-As(ux) + bz21) * V1 # = X blas.gemv(As, x[0], bz21, alpha = -1.0, beta = 1.0) blas.tbsv(DV, bz21, n = p*q, k = 0, ldA = 1) blas.copy(bz21, tmp) base.gemv(P, tmp, bz21, alpha = -1.0, beta = 1.0, trans = 'T') blas.gemm(V2, bz21, tmp) blas.gemm(tmp, V1, bz21) # zs := -zs + r' * [ 0, X'; X, 0 ] * r # = r' * [ -bX1, X'; X, -bX2 ] * r. # a := bz21 * [ r11, r12 ] # = X * [ r11, r12 ] blas.gemm(bz21, r, a, m = p, n = p+q, k = q, ldA = p, ldC = p+q) # z := -z + [ r21, r22 ]' * a + a' * [ r21, r22 ] # = rti' * uzs * rti blas.syr2k(r, a, z, trans = 'T', beta = -1.0, n = p+q, k = p, offsetA = q, offsetC = m, ldB = p+q, ldC = p+q) # uX1 = -Z11 - bzs_11 # = -(r*zs*r')_11 - bzs_11 # uX2 = -Z22 - bzs_22 # = -(r*zs*r')_22 - bzs_22 blas.copy(bz11, x[1]) blas.copy(bz22, x[2]) # scale diagonal of zs by 1/2 blas.scal(0.5, z, inc = p+q+1, offset = m) # a := r*tril(zs) blas.copy(r, a) blas.trmm(z, a, side = 'R', m = p+q, n = p+q, ldA = p+q, ldB = p+q, offsetA = m) # x[1] := -x[1] - a[:q,:] * r[:q, :]' - r[:q,:] * a[:q,:]' # = -bzs_11 - (r*zs*r')_11 blas.syr2k(a, r, x[1], n = q, alpha = -1.0, beta = -1.0) # x[2] := -x[2] - a[q:,:] * r[q:, :]' - r[q:,:] * a[q:,:]' # = -bzs_22 - (r*zs*r')_22 blas.syr2k(a, r, x[2], n = p, alpha = -1.0, beta = -1.0, offsetA = q, offsetB = q) # scale diagonal of zs by 1/2 blas.scal(2.0, z, inc = p+q+1, offset = m)
def F(W): """ Create a solver for the linear equations C * ux + G' * uzl - 2*A'(uzs21) = bx -uzs11 = bX1 -uzs22 = bX2 G * ux - Dl^2 * uzl = bzl [ -uX1 -A(ux)' ] [ uzs11 uzs21' ] [ ] - r*r' * [ ] * r*r' = bzs [ -A(ux) -uX2 ] [ uzs21 uzs22 ] where Dl = diag(W['l']), r = W['r'][0]. On entry, x = (bx, bX1, bX2) and z = [ bzl; bzs[:] ]. On exit, x = (ux, uX1, uX2) and z = [ Dl*uzl; (r'*uzs*r)[:] ]. 1. Compute matrices V1, V2 such that (with T = r*r') [ V1 0 ] [ T11 T21' ] [ V1' 0 ] [ I S' ] [ ] [ ] [ ] = [ ] [ 0 V2' ] [ T21 T22 ] [ 0 V2 ] [ S I ] and S = [ diag(s); 0 ], s a positive q-vector. 2. Factor the mapping X -> X + S * X' * S: X + S * X' * S = L( L'( X )). 3. Compute scaled mappings: a matrix As with as its columns the coefficients of the scaled mapping L^-1( V2' * A() * V1' ) and the matrix Gs = Dl^-1 * G. 4. Cholesky factorization of H = C + Gs'*Gs + 2*As'*As. """ # 1. Compute V1, V2, s. r = W['r'][0] # LQ factorization R[:q, :] = L1 * Q1. lapack.lacpy(r, Q1, m = q) lapack.gelqf(Q1, tau1) lapack.lacpy(Q1, L1, n = q, uplo = 'L') lapack.orglq(Q1, tau1) # LQ factorization R[q:, :] = L2 * Q2. lapack.lacpy(r, Q2, m = p, offsetA = q) lapack.gelqf(Q2, tau2) lapack.lacpy(Q2, L2, n = p, uplo = 'L') lapack.orglq(Q2, tau2) # V2, V1, s are computed from an SVD: if # # Q2 * Q1' = U * diag(s) * V', # # then V1 = V' * L1^-1 and V2 = L2^-T * U. # T21 = Q2 * Q1.T blas.gemm(Q2, Q1, T21, transB = 'T') # SVD T21 = U * diag(s) * V'. Store U in V2 and V' in V1. lapack.gesvd(T21, s, jobu = 'A', jobvt = 'A', U = V2, Vt = V1) # # Q2 := Q2 * Q1' without extracting Q1; store T21 in Q2 # this will requires lapack.ormlq or lapack.unmlq # V2 = L2^-T * U blas.trsm(L2, V2, transA = 'T') # V1 = V' * L1^-1 blas.trsm(L1, V1, side = 'R') # 2. Factorization X + S * X' * S = L( L'( X )). # # The factor L is stored as a diagonal matrix D and a sparse lower # triangular matrix P, such that # # L(X)[:] = D**-1 * (I + P) * X[:] # L^-1(X)[:] = D * (I - P) * X[:]. # SS is q x q with SS[i,j] = si*sj. blas.scal(0.0, SS) blas.syr(s, SS) # For a p x q matrix X, P*X[:] is Y[:] where # # Yij = si * sj * Xji if i < j # = 0 otherwise. # P.V = SS[Itril2] # For a p x q matrix X, D*X[:] is Y[:] where # # Yij = Xij / sqrt( 1 - si^2 * sj^2 ) if i < j # = Xii / sqrt( 1 + si^2 ) if i = j # = Xij otherwise. # DV[Idiag] = sqrt(1.0 + SS[::q+1]) DV[Itriu] = sqrt(1.0 - SS[Itril3]**2) D.V = DV**-1 # 3. Scaled linear mappings # Ask := V2' * Ask * V1' blas.scal(0.0, As) base.axpy(A, As) for i in xrange(n): # tmp := V2' * As[i, :] blas.gemm(V2, As, tmp, transA = 'T', m = p, n = q, k = p, ldB = p, offsetB = i*p*q) # As[:,i] := tmp * V1' blas.gemm(tmp, V1, As, transB = 'T', m = p, n = q, k = q, ldC = p, offsetC = i*p*q) # As := D * (I - P) * As # = L^-1 * As. blas.copy(As, As2) base.gemm(P, As, As2, alpha = -1.0, beta = 1.0) base.gemm(D, As2, As) # Gs := Dl^-1 * G blas.scal(0.0, Gs) base.axpy(G, Gs) for k in xrange(n): blas.tbmv(W['di'], Gs, n = m, k = 0, ldA = 1, offsetx = k*m) # 4. Cholesky factorization of H = C + Gs' * Gs + 2 * As' * As. blas.syrk(As, H, trans = 'T', alpha = 2.0) blas.syrk(Gs, H, trans = 'T', beta = 1.0) base.axpy(C, H) lapack.potrf(H) def f(x, y, z): """ Solve C * ux + G' * uzl - 2*A'(uzs21) = bx -uzs11 = bX1 -uzs22 = bX2 G * ux - D^2 * uzl = bzl [ -uX1 -A(ux)' ] [ uzs11 uzs21' ] [ ] - T * [ ] * T = bzs. [ -A(ux) -uX2 ] [ uzs21 uzs22 ] On entry, x = (bx, bX1, bX2) and z = [ bzl; bzs[:] ]. On exit, x = (ux, uX1, uX2) and z = [ D*uzl; (r'*uzs*r)[:] ]. Define X = uzs21, Z = T * uzs * T: C * ux + G' * uzl - 2*A'(X) = bx [ 0 X' ] [ bX1 0 ] T * [ ] * T - Z = T * [ ] * T [ X 0 ] [ 0 bX2 ] G * ux - D^2 * uzl = bzl [ -uX1 -A(ux)' ] [ Z11 Z21' ] [ ] - [ ] = bzs [ -A(ux) -uX2 ] [ Z21 Z22 ] Return x = (ux, uX1, uX2), z = [ D*uzl; (rti'*Z*rti)[:] ]. We use the congruence transformation [ V1 0 ] [ T11 T21' ] [ V1' 0 ] [ I S' ] [ ] [ ] [ ] = [ ] [ 0 V2' ] [ T21 T22 ] [ 0 V2 ] [ S I ] and the factorization X + S * X' * S = L( L'(X) ) to write this as C * ux + G' * uzl - 2*A'(X) = bx L'(V2^-1 * X * V1^-1) - L^-1(V2' * Z21 * V1') = bX G * ux - D^2 * uzl = bzl [ -uX1 -A(ux)' ] [ Z11 Z21' ] [ ] - [ ] = bzs, [ -A(ux) -uX2 ] [ Z21 Z22 ] or C * ux + Gs' * uuzl - 2*As'(XX) = bx XX - ZZ21 = bX Gs * ux - uuzl = D^-1 * bzl -As(ux) - ZZ21 = bbzs_21 -uX1 - Z11 = bzs_11 -uX2 - Z22 = bzs_22 if we introduce scaled variables uuzl = D * uzl XX = L'(V2^-1 * X * V1^-1) = L'(V2^-1 * uzs21 * V1^-1) ZZ21 = L^-1(V2' * Z21 * V1') and define bbzs_21 = L^-1(V2' * bzs_21 * V1') [ bX1 0 ] bX = L^-1( V2' * (T * [ ] * T)_21 * V1'). [ 0 bX2 ] Eliminating Z21 gives C * ux + Gs' * uuzl - 2*As'(XX) = bx Gs * ux - uuzl = D^-1 * bzl -As(ux) - XX = bbzs_21 - bX -uX1 - Z11 = bzs_11 -uX2 - Z22 = bzs_22 and eliminating uuzl and XX gives H * ux = bx + Gs' * D^-1 * bzl + 2*As'(bX - bbzs_21) Gs * ux - uuzl = D^-1 * bzl -As(ux) - XX = bbzs_21 - bX -uX1 - Z11 = bzs_11 -uX2 - Z22 = bzs_22. In summary, we can use the following algorithm: 1. bXX := bX - bbzs21 [ bX1 0 ] = L^-1( V2' * ((T * [ ] * T)_21 - bzs_21) * V1') [ 0 bX2 ] 2. Solve H * ux = bx + Gs' * D^-1 * bzl + 2*As'(bXX). 3. From ux, compute uuzl = Gs*ux - D^-1 * bzl and X = V2 * L^-T(-As(ux) + bXX) * V1. 4. Return ux, uuzl, rti' * Z * rti = r' * [ -bX1, X'; X, -bX2 ] * r and uX1 = -Z11 - bzs_11, uX2 = -Z22 - bzs_22. """ # Save bzs_11, bzs_22, bzs_21. lapack.lacpy(z, bz11, uplo = 'L', m = q, n = q, ldA = p+q, offsetA = m) lapack.lacpy(z, bz21, m = p, n = q, ldA = p+q, offsetA = m+q) lapack.lacpy(z, bz22, uplo = 'L', m = p, n = p, ldA = p+q, offsetA = m + (p+q+1)*q) # zl := D^-1 * zl # = D^-1 * bzl blas.tbmv(W['di'], z, n = m, k = 0, ldA = 1) # zs := r' * [ bX1, 0; 0, bX2 ] * r. # zs := [ bX1, 0; 0, bX2 ] blas.scal(0.0, z, offset = m) lapack.lacpy(x[1], z, uplo = 'L', m = q, n = q, ldB = p+q, offsetB = m) lapack.lacpy(x[2], z, uplo = 'L', m = p, n = p, ldB = p+q, offsetB = m + (p+q+1)*q) # scale diagonal of zs by 1/2 blas.scal(0.5, z, inc = p+q+1, offset = m) # a := tril(zs)*r blas.copy(r, a) blas.trmm(z, a, side = 'L', m = p+q, n = p+q, ldA = p+q, ldB = p+q, offsetA = m) # zs := a'*r + r'*a blas.syr2k(r, a, z, trans = 'T', n = p+q, k = p+q, ldB = p+q, ldC = p+q, offsetC = m) # bz21 := L^-1( V2' * ((r * zs * r')_21 - bz21) * V1') # # [ bX1 0 ] # = L^-1( V2' * ((T * [ ] * T)_21 - bz21) * V1'). # [ 0 bX2 ] # a = [ r21 r22 ] * z # = [ r21 r22 ] * r' * [ bX1, 0; 0, bX2 ] * r # = [ T21 T22 ] * [ bX1, 0; 0, bX2 ] * r blas.symm(z, r, a, side = 'R', m = p, n = p+q, ldA = p+q, ldC = p+q, offsetB = q) # bz21 := -bz21 + a * [ r11, r12 ]' # = -bz21 + (T * [ bX1, 0; 0, bX2 ] * T)_21 blas.gemm(a, r, bz21, transB = 'T', m = p, n = q, k = p+q, beta = -1.0, ldA = p+q, ldC = p) # bz21 := V2' * bz21 * V1' # = V2' * (-bz21 + (T*[bX1, 0; 0, bX2]*T)_21) * V1' blas.gemm(V2, bz21, tmp, transA = 'T', m = p, n = q, k = p, ldB = p) blas.gemm(tmp, V1, bz21, transB = 'T', m = p, n = q, k = q, ldC = p) # bz21[:] := D * (I-P) * bz21[:] # = L^-1 * bz21[:] # = bXX[:] blas.copy(bz21, tmp) base.gemv(P, bz21, tmp, alpha = -1.0, beta = 1.0) base.gemv(D, tmp, bz21) # Solve H * ux = bx + Gs' * D^-1 * bzl + 2*As'(bXX). # x[0] := x[0] + Gs'*zl + 2*As'(bz21) # = bx + G' * D^-1 * bzl + 2 * As'(bXX) blas.gemv(Gs, z, x[0], trans = 'T', alpha = 1.0, beta = 1.0) blas.gemv(As, bz21, x[0], trans = 'T', alpha = 2.0, beta = 1.0) # x[0] := H \ x[0] # = ux lapack.potrs(H, x[0]) # uuzl = Gs*ux - D^-1 * bzl blas.gemv(Gs, x[0], z, alpha = 1.0, beta = -1.0) # bz21 := V2 * L^-T(-As(ux) + bz21) * V1 # = X blas.gemv(As, x[0], bz21, alpha = -1.0, beta = 1.0) blas.tbsv(DV, bz21, n = p*q, k = 0, ldA = 1) blas.copy(bz21, tmp) base.gemv(P, tmp, bz21, alpha = -1.0, beta = 1.0, trans = 'T') blas.gemm(V2, bz21, tmp) blas.gemm(tmp, V1, bz21) # zs := -zs + r' * [ 0, X'; X, 0 ] * r # = r' * [ -bX1, X'; X, -bX2 ] * r. # a := bz21 * [ r11, r12 ] # = X * [ r11, r12 ] blas.gemm(bz21, r, a, m = p, n = p+q, k = q, ldA = p, ldC = p+q) # z := -z + [ r21, r22 ]' * a + a' * [ r21, r22 ] # = rti' * uzs * rti blas.syr2k(r, a, z, trans = 'T', beta = -1.0, n = p+q, k = p, offsetA = q, offsetC = m, ldB = p+q, ldC = p+q) # uX1 = -Z11 - bzs_11 # = -(r*zs*r')_11 - bzs_11 # uX2 = -Z22 - bzs_22 # = -(r*zs*r')_22 - bzs_22 blas.copy(bz11, x[1]) blas.copy(bz22, x[2]) # scale diagonal of zs by 1/2 blas.scal(0.5, z, inc = p+q+1, offset = m) # a := r*tril(zs) blas.copy(r, a) blas.trmm(z, a, side = 'R', m = p+q, n = p+q, ldA = p+q, ldB = p+q, offsetA = m) # x[1] := -x[1] - a[:q,:] * r[:q, :]' - r[:q,:] * a[:q,:]' # = -bzs_11 - (r*zs*r')_11 blas.syr2k(a, r, x[1], n = q, alpha = -1.0, beta = -1.0) # x[2] := -x[2] - a[q:,:] * r[q:, :]' - r[q:,:] * a[q:,:]' # = -bzs_22 - (r*zs*r')_22 blas.syr2k(a, r, x[2], n = p, alpha = -1.0, beta = -1.0, offsetA = q, offsetB = q) # scale diagonal of zs by 1/2 blas.scal(2.0, z, inc = p+q+1, offset = m) return f
def F(W): """ Generate a solver for A'(uz0) = bx[0] -uz0 - uz1 = bx[1] A(ux[0]) - ux[1] - r0*r0' * uz0 * r0*r0' = bz0 - ux[1] - r1*r1' * uz1 * r1*r1' = bz1. uz0, uz1, bz0, bz1 are symmetric m x m-matrices. ux[0], bx[0] are n-vectors. ux[1], bx[1] are symmetric m x m-matrices. We first calculate a congruence that diagonalizes r0*r0' and r1*r1': U' * r0 * r0' * U = I, U' * r1 * r1' * U = S. We then make a change of variables usx[0] = ux[0], usx[1] = U' * ux[1] * U usz0 = U^-1 * uz0 * U^-T usz1 = U^-1 * uz1 * U^-T and define As() = U' * A() * U' bsx[1] = U^-1 * bx[1] * U^-T bsz0 = U' * bz0 * U bsz1 = U' * bz1 * U. This gives As'(usz0) = bx[0] -usz0 - usz1 = bsx[1] As(usx[0]) - usx[1] - usz0 = bsz0 -usx[1] - S * usz1 * S = bsz1. 1. Eliminate usz0, usz1 using equations 3 and 4, usz0 = As(usx[0]) - usx[1] - bsz0 usz1 = -S^-1 * (usx[1] + bsz1) * S^-1. This gives two equations in usx[0] an usx[1]. As'(As(usx[0]) - usx[1]) = bx[0] + As'(bsz0) -As(usx[0]) + usx[1] + S^-1 * usx[1] * S^-1 = bsx[1] - bsz0 - S^-1 * bsz1 * S^-1. 2. Eliminate usx[1] using equation 2: usx[1] + S * usx[1] * S = S * ( As(usx[0]) + bsx[1] - bsz0 ) * S - bsz1 i.e., with Gamma[i,j] = 1.0 + S[i,i] * S[j,j], usx[1] = ( S * As(usx[0]) * S ) ./ Gamma + ( S * ( bsx[1] - bsz0 ) * S - bsz1 ) ./ Gamma. This gives an equation in usx[0]. As'( As(usx[0]) ./ Gamma ) = bx0 + As'(bsz0) + As'( (S * ( bsx[1] - bsz0 ) * S - bsz1) ./ Gamma ) = bx0 + As'( ( bsz0 - bsz1 + S * bsx[1] * S ) ./ Gamma ). """ # Calculate U s.t. # # U' * r0*r0' * U = I, U' * r1*r1' * U = diag(s). # Cholesky factorization r0 * r0' = L * L' blas.syrk(W['r'][0], L) lapack.potrf(L) # SVD L^-1 * r1 = U * diag(s) * V' blas.copy(W['r'][1], U) blas.trsm(L, U) lapack.gesvd(U, s, jobu='O') # s := s**2 s[:] = s**2 # Uti := U blas.copy(U, Uti) # U := L^-T * U blas.trsm(L, U, transA='T') # Uti := L * Uti = U^-T blas.trmm(L, Uti) # Us := U * diag(s)^-1 blas.copy(U, Us) for i in range(m): blas.tbsv(s, Us, n=m, k=0, ldA=1, incx=m, offsetx=i) # S is m x m with lower triangular entries s[i] * s[j] # sqrtG is m x m with lower triangular entries sqrt(1.0 + s[i]*s[j]) # Upper triangular entries are undefined but nonzero. blas.scal(0.0, S) blas.syrk(s, S) Gamma = 1.0 + S sqrtG = sqrt(Gamma) # Asc[i] = (U' * Ai * * U ) ./ sqrtG, for i = 1, ..., n # = Asi ./ sqrt(Gamma) blas.copy(A, Asc) misc.scale( Asc, # only 'r' part of the dictionary is used { 'dnl': matrix(0.0, (0, 1)), 'dnli': matrix(0.0, (0, 1)), 'd': matrix(0.0, (0, 1)), 'di': matrix(0.0, (0, 1)), 'v': [], 'beta': [], 'r': [U], 'rti': [U] }) for i in range(n): blas.tbsv(sqrtG, Asc, n=msq, k=0, ldA=1, offsetx=i * msq) # Convert columns of Asc to packed storage misc.pack2(Asc, {'l': 0, 'q': [], 's': [m]}) # Cholesky factorization of Asc' * Asc. H = matrix(0.0, (n, n)) blas.syrk(Asc, H, trans='T', k=mpckd) lapack.potrf(H) def solve(x, y, z): """ 1. Solve for usx[0]: Asc'(Asc(usx[0])) = bx0 + Asc'( ( bsz0 - bsz1 + S * bsx[1] * S ) ./ sqrtG) = bx0 + Asc'( ( bsz0 + S * ( bsx[1] - bssz1) S ) ./ sqrtG) where bsx[1] = U^-1 * bx[1] * U^-T, bsz0 = U' * bz0 * U, bsz1 = U' * bz1 * U, bssz1 = S^-1 * bsz1 * S^-1 2. Solve for usx[1]: usx[1] + S * usx[1] * S = S * ( As(usx[0]) + bsx[1] - bsz0 ) * S - bsz1 usx[1] = ( S * (As(usx[0]) + bsx[1] - bsz0) * S - bsz1) ./ Gamma = -bsz0 + (S * As(usx[0]) * S) ./ Gamma + (bsz0 - bsz1 + S * bsx[1] * S ) . / Gamma = -bsz0 + (S * As(usx[0]) * S) ./ Gamma + (bsz0 + S * ( bsx[1] - bssz1 ) * S ) . / Gamma Unscale ux[1] = Uti * usx[1] * Uti' 3. Compute usz0, usz1 r0' * uz0 * r0 = r0^-1 * ( A(ux[0]) - ux[1] - bz0 ) * r0^-T r1' * uz1 * r1 = r1^-1 * ( -ux[1] - bz1 ) * r1^-T """ # z0 := U' * z0 * U # = bsz0 __cngrnc(U, z, trans='T') # z1 := Us' * bz1 * Us # = S^-1 * U' * bz1 * U * S^-1 # = S^-1 * bsz1 * S^-1 __cngrnc(Us, z, trans='T', offsetx=msq) # x[1] := Uti' * x[1] * Uti # = bsx[1] __cngrnc(Uti, x[1], trans='T') # x[1] := x[1] - z[msq:] # = bsx[1] - S^-1 * bsz1 * S^-1 blas.axpy(z, x[1], alpha=-1.0, offsetx=msq) # x1 = (S * x[1] * S + z[:msq] ) ./ sqrtG # = (S * ( bsx[1] - S^-1 * bsz1 * S^-1) * S + bsz0 ) ./ sqrtG # = (S * bsx[1] * S - bsz1 + bsz0 ) ./ sqrtG # in packed storage blas.copy(x[1], x1) blas.tbmv(S, x1, n=msq, k=0, ldA=1) blas.axpy(z, x1, n=msq) blas.tbsv(sqrtG, x1, n=msq, k=0, ldA=1) misc.pack2(x1, {'l': 0, 'q': [], 's': [m]}) # x[0] := x[0] + Asc'*x1 # = bx0 + Asc'( ( bsz0 - bsz1 + S * bsx[1] * S ) ./ sqrtG) # = bx0 + As'( ( bz0 - bz1 + S * bx[1] * S ) ./ Gamma ) blas.gemv(Asc, x1, x[0], m=mpckd, trans='T', beta=1.0) # x[0] := H^-1 * x[0] # = ux[0] lapack.potrs(H, x[0]) # x1 = Asc(x[0]) .* sqrtG (unpacked) # = As(x[0]) blas.gemv(Asc, x[0], tmp, m=mpckd) misc.unpack(tmp, x1, {'l': 0, 'q': [], 's': [m]}) blas.tbmv(sqrtG, x1, n=msq, k=0, ldA=1) # usx[1] = (x1 + (x[1] - z[:msq])) ./ sqrtG**2 # = (As(ux[0]) + bsx[1] - bsz0 - S^-1 * bsz1 * S^-1) # ./ Gamma # x[1] := x[1] - z[:msq] # = bsx[1] - bsz0 - S^-1 * bsz1 * S^-1 blas.axpy(z, x[1], -1.0, n=msq) # x[1] := x[1] + x1 # = As(ux) + bsx[1] - bsz0 - S^-1 * bsz1 * S^-1 blas.axpy(x1, x[1]) # x[1] := x[1] / Gammma # = (As(ux) + bsx[1] - bsz0 + S^-1 * bsz1 * S^-1 ) / Gamma # = S^-1 * usx[1] * S^-1 blas.tbsv(Gamma, x[1], n=msq, k=0, ldA=1) # z[msq:] := r1' * U * (-z[msq:] - x[1]) * U * r1 # := -r1' * U * S^-1 * (bsz1 + ux[1]) * S^-1 * U * r1 # := -r1' * uz1 * r1 blas.axpy(x[1], z, n=msq, offsety=msq) blas.scal(-1.0, z, offset=msq) __cngrnc(U, z, offsetx=msq) __cngrnc(W['r'][1], z, trans='T', offsetx=msq) # x[1] := S * x[1] * S # = usx1 blas.tbmv(S, x[1], n=msq, k=0, ldA=1) # z[:msq] = r0' * U' * ( x1 - x[1] - z[:msq] ) * U * r0 # = r0' * U' * ( As(ux) - usx1 - bsz0 ) * U * r0 # = r0' * U' * usz0 * U * r0 # = r0' * uz0 * r0 blas.axpy(x1, z, -1.0, n=msq) blas.scal(-1.0, z, n=msq) blas.axpy(x[1], z, -1.0, n=msq) __cngrnc(U, z) __cngrnc(W['r'][0], z, trans='T') # x[1] := Uti * x[1] * Uti' # = ux[1] __cngrnc(Uti, x[1]) return solve
def xscal(alpha, u): blas.scal(alpha, u[0]) blas.scal(alpha, u[1]) blas.scal(alpha, u[2])
def f(x, y, z): """ Solve C * ux + G' * uzl - 2*A'(uzs21) = bx -uzs11 = bX1 -uzs22 = bX2 G * ux - D^2 * uzl = bzl [ -uX1 -A(ux)' ] [ uzs11 uzs21' ] [ ] - T * [ ] * T = bzs. [ -A(ux) -uX2 ] [ uzs21 uzs22 ] On entry, x = (bx, bX1, bX2) and z = [ bzl; bzs[:] ]. On exit, x = (ux, uX1, uX2) and z = [ D*uzl; (r'*uzs*r)[:] ]. Define X = uzs21, Z = T * uzs * T: C * ux + G' * uzl - 2*A'(X) = bx [ 0 X' ] [ bX1 0 ] T * [ ] * T - Z = T * [ ] * T [ X 0 ] [ 0 bX2 ] G * ux - D^2 * uzl = bzl [ -uX1 -A(ux)' ] [ Z11 Z21' ] [ ] - [ ] = bzs [ -A(ux) -uX2 ] [ Z21 Z22 ] Return x = (ux, uX1, uX2), z = [ D*uzl; (rti'*Z*rti)[:] ]. We use the congruence transformation [ V1 0 ] [ T11 T21' ] [ V1' 0 ] [ I S' ] [ ] [ ] [ ] = [ ] [ 0 V2' ] [ T21 T22 ] [ 0 V2 ] [ S I ] and the factorization X + S * X' * S = L( L'(X) ) to write this as C * ux + G' * uzl - 2*A'(X) = bx L'(V2^-1 * X * V1^-1) - L^-1(V2' * Z21 * V1') = bX G * ux - D^2 * uzl = bzl [ -uX1 -A(ux)' ] [ Z11 Z21' ] [ ] - [ ] = bzs, [ -A(ux) -uX2 ] [ Z21 Z22 ] or C * ux + Gs' * uuzl - 2*As'(XX) = bx XX - ZZ21 = bX Gs * ux - uuzl = D^-1 * bzl -As(ux) - ZZ21 = bbzs_21 -uX1 - Z11 = bzs_11 -uX2 - Z22 = bzs_22 if we introduce scaled variables uuzl = D * uzl XX = L'(V2^-1 * X * V1^-1) = L'(V2^-1 * uzs21 * V1^-1) ZZ21 = L^-1(V2' * Z21 * V1') and define bbzs_21 = L^-1(V2' * bzs_21 * V1') [ bX1 0 ] bX = L^-1( V2' * (T * [ ] * T)_21 * V1'). [ 0 bX2 ] Eliminating Z21 gives C * ux + Gs' * uuzl - 2*As'(XX) = bx Gs * ux - uuzl = D^-1 * bzl -As(ux) - XX = bbzs_21 - bX -uX1 - Z11 = bzs_11 -uX2 - Z22 = bzs_22 and eliminating uuzl and XX gives H * ux = bx + Gs' * D^-1 * bzl + 2*As'(bX - bbzs_21) Gs * ux - uuzl = D^-1 * bzl -As(ux) - XX = bbzs_21 - bX -uX1 - Z11 = bzs_11 -uX2 - Z22 = bzs_22. In summary, we can use the following algorithm: 1. bXX := bX - bbzs21 [ bX1 0 ] = L^-1( V2' * ((T * [ ] * T)_21 - bzs_21) * V1') [ 0 bX2 ] 2. Solve H * ux = bx + Gs' * D^-1 * bzl + 2*As'(bXX). 3. From ux, compute uuzl = Gs*ux - D^-1 * bzl and X = V2 * L^-T(-As(ux) + bXX) * V1. 4. Return ux, uuzl, rti' * Z * rti = r' * [ -bX1, X'; X, -bX2 ] * r and uX1 = -Z11 - bzs_11, uX2 = -Z22 - bzs_22. """ # Save bzs_11, bzs_22, bzs_21. lapack.lacpy(z, bz11, uplo='L', m=q, n=q, ldA=p + q, offsetA=m) lapack.lacpy(z, bz21, m=p, n=q, ldA=p + q, offsetA=m + q) lapack.lacpy(z, bz22, uplo='L', m=p, n=p, ldA=p + q, offsetA=m + (p + q + 1) * q) # zl := D^-1 * zl # = D^-1 * bzl blas.tbmv(W['di'], z, n=m, k=0, ldA=1) # zs := r' * [ bX1, 0; 0, bX2 ] * r. # zs := [ bX1, 0; 0, bX2 ] blas.scal(0.0, z, offset=m) lapack.lacpy(x[1], z, uplo='L', m=q, n=q, ldB=p + q, offsetB=m) lapack.lacpy(x[2], z, uplo='L', m=p, n=p, ldB=p + q, offsetB=m + (p + q + 1) * q) # scale diagonal of zs by 1/2 blas.scal(0.5, z, inc=p + q + 1, offset=m) # a := tril(zs)*r blas.copy(r, a) blas.trmm(z, a, side='L', m=p + q, n=p + q, ldA=p + q, ldB=p + q, offsetA=m) # zs := a'*r + r'*a blas.syr2k(r, a, z, trans='T', n=p + q, k=p + q, ldB=p + q, ldC=p + q, offsetC=m) # bz21 := L^-1( V2' * ((r * zs * r')_21 - bz21) * V1') # # [ bX1 0 ] # = L^-1( V2' * ((T * [ ] * T)_21 - bz21) * V1'). # [ 0 bX2 ] # a = [ r21 r22 ] * z # = [ r21 r22 ] * r' * [ bX1, 0; 0, bX2 ] * r # = [ T21 T22 ] * [ bX1, 0; 0, bX2 ] * r blas.symm(z, r, a, side='R', m=p, n=p + q, ldA=p + q, ldC=p + q, offsetB=q) # bz21 := -bz21 + a * [ r11, r12 ]' # = -bz21 + (T * [ bX1, 0; 0, bX2 ] * T)_21 blas.gemm(a, r, bz21, transB='T', m=p, n=q, k=p + q, beta=-1.0, ldA=p + q, ldC=p) # bz21 := V2' * bz21 * V1' # = V2' * (-bz21 + (T*[bX1, 0; 0, bX2]*T)_21) * V1' blas.gemm(V2, bz21, tmp, transA='T', m=p, n=q, k=p, ldB=p) blas.gemm(tmp, V1, bz21, transB='T', m=p, n=q, k=q, ldC=p) # bz21[:] := D * (I-P) * bz21[:] # = L^-1 * bz21[:] # = bXX[:] blas.copy(bz21, tmp) base.gemv(P, bz21, tmp, alpha=-1.0, beta=1.0) base.gemv(D, tmp, bz21) # Solve H * ux = bx + Gs' * D^-1 * bzl + 2*As'(bXX). # x[0] := x[0] + Gs'*zl + 2*As'(bz21) # = bx + G' * D^-1 * bzl + 2 * As'(bXX) blas.gemv(Gs, z, x[0], trans='T', alpha=1.0, beta=1.0) blas.gemv(As, bz21, x[0], trans='T', alpha=2.0, beta=1.0) # x[0] := H \ x[0] # = ux lapack.potrs(H, x[0]) # uuzl = Gs*ux - D^-1 * bzl blas.gemv(Gs, x[0], z, alpha=1.0, beta=-1.0) # bz21 := V2 * L^-T(-As(ux) + bz21) * V1 # = X blas.gemv(As, x[0], bz21, alpha=-1.0, beta=1.0) blas.tbsv(DV, bz21, n=p * q, k=0, ldA=1) blas.copy(bz21, tmp) base.gemv(P, tmp, bz21, alpha=-1.0, beta=1.0, trans='T') blas.gemm(V2, bz21, tmp) blas.gemm(tmp, V1, bz21) # zs := -zs + r' * [ 0, X'; X, 0 ] * r # = r' * [ -bX1, X'; X, -bX2 ] * r. # a := bz21 * [ r11, r12 ] # = X * [ r11, r12 ] blas.gemm(bz21, r, a, m=p, n=p + q, k=q, ldA=p, ldC=p + q) # z := -z + [ r21, r22 ]' * a + a' * [ r21, r22 ] # = rti' * uzs * rti blas.syr2k(r, a, z, trans='T', beta=-1.0, n=p + q, k=p, offsetA=q, offsetC=m, ldB=p + q, ldC=p + q) # uX1 = -Z11 - bzs_11 # = -(r*zs*r')_11 - bzs_11 # uX2 = -Z22 - bzs_22 # = -(r*zs*r')_22 - bzs_22 blas.copy(bz11, x[1]) blas.copy(bz22, x[2]) # scale diagonal of zs by 1/2 blas.scal(0.5, z, inc=p + q + 1, offset=m) # a := r*tril(zs) blas.copy(r, a) blas.trmm(z, a, side='R', m=p + q, n=p + q, ldA=p + q, ldB=p + q, offsetA=m) # x[1] := -x[1] - a[:q,:] * r[:q, :]' - r[:q,:] * a[:q,:]' # = -bzs_11 - (r*zs*r')_11 blas.syr2k(a, r, x[1], n=q, alpha=-1.0, beta=-1.0) # x[2] := -x[2] - a[q:,:] * r[q:, :]' - r[q:,:] * a[q:,:]' # = -bzs_22 - (r*zs*r')_22 blas.syr2k(a, r, x[2], n=p, alpha=-1.0, beta=-1.0, offsetA=q, offsetB=q) # scale diagonal of zs by 1/2 blas.scal(2.0, z, inc=p + q + 1, offset=m)
def F(W): """ Generate a solver for A'(uz0) = bx[0] -uz0 - uz1 = bx[1] A(ux[0]) - ux[1] - r0*r0' * uz0 * r0*r0' = bz0 - ux[1] - r1*r1' * uz1 * r1*r1' = bz1. uz0, uz1, bz0, bz1 are symmetric m x m-matrices. ux[0], bx[0] are n-vectors. ux[1], bx[1] are symmetric m x m-matrices. We first calculate a congruence that diagonalizes r0*r0' and r1*r1': U' * r0 * r0' * U = I, U' * r1 * r1' * U = S. We then make a change of variables usx[0] = ux[0], usx[1] = U' * ux[1] * U usz0 = U^-1 * uz0 * U^-T usz1 = U^-1 * uz1 * U^-T and define As() = U' * A() * U' bsx[1] = U^-1 * bx[1] * U^-T bsz0 = U' * bz0 * U bsz1 = U' * bz1 * U. This gives As'(usz0) = bx[0] -usz0 - usz1 = bsx[1] As(usx[0]) - usx[1] - usz0 = bsz0 -usx[1] - S * usz1 * S = bsz1. 1. Eliminate usz0, usz1 using equations 3 and 4, usz0 = As(usx[0]) - usx[1] - bsz0 usz1 = -S^-1 * (usx[1] + bsz1) * S^-1. This gives two equations in usx[0] an usx[1]. As'(As(usx[0]) - usx[1]) = bx[0] + As'(bsz0) -As(usx[0]) + usx[1] + S^-1 * usx[1] * S^-1 = bsx[1] - bsz0 - S^-1 * bsz1 * S^-1. 2. Eliminate usx[1] using equation 2: usx[1] + S * usx[1] * S = S * ( As(usx[0]) + bsx[1] - bsz0 ) * S - bsz1 i.e., with Gamma[i,j] = 1.0 + S[i,i] * S[j,j], usx[1] = ( S * As(usx[0]) * S ) ./ Gamma + ( S * ( bsx[1] - bsz0 ) * S - bsz1 ) ./ Gamma. This gives an equation in usx[0]. As'( As(usx[0]) ./ Gamma ) = bx0 + As'(bsz0) + As'( (S * ( bsx[1] - bsz0 ) * S - bsz1) ./ Gamma ) = bx0 + As'( ( bsz0 - bsz1 + S * bsx[1] * S ) ./ Gamma ). """ # Calculate U s.t. # # U' * r0*r0' * U = I, U' * r1*r1' * U = diag(s). # Cholesky factorization r0 * r0' = L * L' blas.syrk(W['r'][0], L) lapack.potrf(L) # SVD L^-1 * r1 = U * diag(s) * V' blas.copy(W['r'][1], U) blas.trsm(L, U) lapack.gesvd(U, s, jobu = 'O') # s := s**2 s[:] = s**2 # Uti := U blas.copy(U, Uti) # U := L^-T * U blas.trsm(L, U, transA = 'T') # Uti := L * Uti = U^-T blas.trmm(L, Uti) # Us := U * diag(s)^-1 blas.copy(U, Us) for i in range(m): blas.tbsv(s, Us, n = m, k = 0, ldA = 1, incx = m, offsetx = i) # S is m x m with lower triangular entries s[i] * s[j] # sqrtG is m x m with lower triangular entries sqrt(1.0 + s[i]*s[j]) # Upper triangular entries are undefined but nonzero. blas.scal(0.0, S) blas.syrk(s, S) Gamma = 1.0 + S sqrtG = sqrt(Gamma) # Asc[i] = (U' * Ai * * U ) ./ sqrtG, for i = 1, ..., n # = Asi ./ sqrt(Gamma) blas.copy(A, Asc) misc.scale(Asc, # only 'r' part of the dictionary is used {'dnl': matrix(0.0, (0, 1)), 'dnli': matrix(0.0, (0, 1)), 'd': matrix(0.0, (0, 1)), 'di': matrix(0.0, (0, 1)), 'v': [], 'beta': [], 'r': [ U ], 'rti': [ U ]}) for i in range(n): blas.tbsv(sqrtG, Asc, n = msq, k = 0, ldA = 1, offsetx = i*msq) # Convert columns of Asc to packed storage misc.pack2(Asc, {'l': 0, 'q': [], 's': [ m ]}) # Cholesky factorization of Asc' * Asc. H = matrix(0.0, (n, n)) blas.syrk(Asc, H, trans = 'T', k = mpckd) lapack.potrf(H) def solve(x, y, z): """ 1. Solve for usx[0]: Asc'(Asc(usx[0])) = bx0 + Asc'( ( bsz0 - bsz1 + S * bsx[1] * S ) ./ sqrtG) = bx0 + Asc'( ( bsz0 + S * ( bsx[1] - bssz1) S ) ./ sqrtG) where bsx[1] = U^-1 * bx[1] * U^-T, bsz0 = U' * bz0 * U, bsz1 = U' * bz1 * U, bssz1 = S^-1 * bsz1 * S^-1 2. Solve for usx[1]: usx[1] + S * usx[1] * S = S * ( As(usx[0]) + bsx[1] - bsz0 ) * S - bsz1 usx[1] = ( S * (As(usx[0]) + bsx[1] - bsz0) * S - bsz1) ./ Gamma = -bsz0 + (S * As(usx[0]) * S) ./ Gamma + (bsz0 - bsz1 + S * bsx[1] * S ) . / Gamma = -bsz0 + (S * As(usx[0]) * S) ./ Gamma + (bsz0 + S * ( bsx[1] - bssz1 ) * S ) . / Gamma Unscale ux[1] = Uti * usx[1] * Uti' 3. Compute usz0, usz1 r0' * uz0 * r0 = r0^-1 * ( A(ux[0]) - ux[1] - bz0 ) * r0^-T r1' * uz1 * r1 = r1^-1 * ( -ux[1] - bz1 ) * r1^-T """ # z0 := U' * z0 * U # = bsz0 __cngrnc(U, z, trans = 'T') # z1 := Us' * bz1 * Us # = S^-1 * U' * bz1 * U * S^-1 # = S^-1 * bsz1 * S^-1 __cngrnc(Us, z, trans = 'T', offsetx = msq) # x[1] := Uti' * x[1] * Uti # = bsx[1] __cngrnc(Uti, x[1], trans = 'T') # x[1] := x[1] - z[msq:] # = bsx[1] - S^-1 * bsz1 * S^-1 blas.axpy(z, x[1], alpha = -1.0, offsetx = msq) # x1 = (S * x[1] * S + z[:msq] ) ./ sqrtG # = (S * ( bsx[1] - S^-1 * bsz1 * S^-1) * S + bsz0 ) ./ sqrtG # = (S * bsx[1] * S - bsz1 + bsz0 ) ./ sqrtG # in packed storage blas.copy(x[1], x1) blas.tbmv(S, x1, n = msq, k = 0, ldA = 1) blas.axpy(z, x1, n = msq) blas.tbsv(sqrtG, x1, n = msq, k = 0, ldA = 1) misc.pack2(x1, {'l': 0, 'q': [], 's': [m]}) # x[0] := x[0] + Asc'*x1 # = bx0 + Asc'( ( bsz0 - bsz1 + S * bsx[1] * S ) ./ sqrtG) # = bx0 + As'( ( bz0 - bz1 + S * bx[1] * S ) ./ Gamma ) blas.gemv(Asc, x1, x[0], m = mpckd, trans = 'T', beta = 1.0) # x[0] := H^-1 * x[0] # = ux[0] lapack.potrs(H, x[0]) # x1 = Asc(x[0]) .* sqrtG (unpacked) # = As(x[0]) blas.gemv(Asc, x[0], tmp, m = mpckd) misc.unpack(tmp, x1, {'l': 0, 'q': [], 's': [m]}) blas.tbmv(sqrtG, x1, n = msq, k = 0, ldA = 1) # usx[1] = (x1 + (x[1] - z[:msq])) ./ sqrtG**2 # = (As(ux[0]) + bsx[1] - bsz0 - S^-1 * bsz1 * S^-1) # ./ Gamma # x[1] := x[1] - z[:msq] # = bsx[1] - bsz0 - S^-1 * bsz1 * S^-1 blas.axpy(z, x[1], -1.0, n = msq) # x[1] := x[1] + x1 # = As(ux) + bsx[1] - bsz0 - S^-1 * bsz1 * S^-1 blas.axpy(x1, x[1]) # x[1] := x[1] / Gammma # = (As(ux) + bsx[1] - bsz0 + S^-1 * bsz1 * S^-1 ) / Gamma # = S^-1 * usx[1] * S^-1 blas.tbsv(Gamma, x[1], n = msq, k = 0, ldA = 1) # z[msq:] := r1' * U * (-z[msq:] - x[1]) * U * r1 # := -r1' * U * S^-1 * (bsz1 + ux[1]) * S^-1 * U * r1 # := -r1' * uz1 * r1 blas.axpy(x[1], z, n = msq, offsety = msq) blas.scal(-1.0, z, offset = msq) __cngrnc(U, z, offsetx = msq) __cngrnc(W['r'][1], z, trans = 'T', offsetx = msq) # x[1] := S * x[1] * S # = usx1 blas.tbmv(S, x[1], n = msq, k = 0, ldA = 1) # z[:msq] = r0' * U' * ( x1 - x[1] - z[:msq] ) * U * r0 # = r0' * U' * ( As(ux) - usx1 - bsz0 ) * U * r0 # = r0' * U' * usz0 * U * r0 # = r0' * uz0 * r0 blas.axpy(x1, z, -1.0, n = msq) blas.scal(-1.0, z, n = msq) blas.axpy(x[1], z, -1.0, n = msq) __cngrnc(U, z) __cngrnc(W['r'][0], z, trans = 'T') # x[1] := Uti * x[1] * Uti' # = ux[1] __cngrnc(Uti, x[1]) return solve
def Pf(u, v, alpha=1.0, beta=0.0): base.symv(C, u[0], v[0], alpha=alpha, beta=beta) blas.scal(beta, v[1]) blas.scal(beta, v[2])
def sysid(y, u, vsig, svth = None): """ System identification using the subspace method and nuclear norm optimization. Estimate a linear time-invariant state-space model given inputs and outputs. The algorithm is described in [1]. INPUT y 'd' matrix of size (p, N). y are the measured outputs, p is the number of outputs, and N is the number of data points measured. u 'd' matrix of size (m, N). u are the inputs, m is the number of inputs, and N is the number of data points. vsig a weighting parameter in the nuclear norm optimization, its value is approximately the 1-sigma output noise level svth an optional parameter, if specified, the model order is determined as the number of singular values greater than svth times the maximum singular value. The default value is 1E-3 OUTPUT sol a dictionary with the following words -- 'A', 'B', 'C', 'D' are the state-space matrices -- 'svN', the original singular values of the Hankel matrix -- 'sv', the optimized singular values of the Hankel matrix -- 'x0', the initial state x(0) -- 'n', the model order [1] Zhang Liu and Lieven Vandenberghe. "Interior-point method for nuclear norm approximation with application to system identification." """ m, N, p = u.size[0], u.size[1], y.size[0] if y.size[1] != N: raise ValueError, "y and u must have the same length" # Y = G*X + H*U + V, Y has size a x b, U has size c x b, Un has b x d r = min(int(30/p),int((N+1.0)/(p+m+1)+1.0)) a = r*p c = r*m b = N-r+1 d = b-c # construct Hankel matrix Y Y = Hankel(y,r,b,p=p,q=1) # construct Hankel matrix U U = Hankel(u,r,b,p=m,q=1) # compute Un = null(U) and YUn = Y*Un Vt = matrix(0.0,(b,b)) Stemp = matrix(0.0,(c,1)) Un = matrix(0.0,(b,d)) YUn = matrix(0.0,(a,d)) lapack.gesvd(U,Stemp,jobvt='A',Vt=Vt) Un[:,:] = Vt.T[:,c:] blas.gemm(Y,Un,YUn) # compute original singular values svN = matrix(0.0,(min(a,d),1)) lapack.gesvd(YUn,svN) # variable, [y(1);...;y(N)] # form the coefficient matrices for the nuclear norm optimization # minimize | Yh * Un |_* + alpha * | y - yh |_F AA = Hankel_basis(r,b,p=p,q=1) A = matrix(0.0,(a*d,p*N)) temp = spmatrix([],[],[],(a,b),'d') temp2 = matrix(0.0,(a,d)) for ii in xrange(p*N): temp[:] = AA[:,ii] base.gemm(temp,Un,temp2) A[:,ii] = temp2[:] B = matrix(0.0,(a,d)) # flip the matrix if columns is more than rows if a < d: Itrans = [i+j*a for i in xrange(a) for j in xrange(d)] B[:] = B[Itrans] B.size = (d,a) for ii in xrange(p*N): A[:,ii] = A[Itrans,ii] # regularized term x0 = y[:] Qd = matrix(2.0*svN[0]/p/N/(vsig**2),(p*N,1)) # solve the nuclear norm optimization sol = nrmapp(A, B, C = base.spdiag(Qd), d = -base.mul(x0, Qd)) status = sol['status'] x = sol['x'] # construct YhUn and take the svd YhUn = matrix(B) blas.gemv(A,x,YhUn,beta=1.0) if a < d: YhUn = YhUn.T Uh = matrix(0.0,(a,d)) sv = matrix(0.0,(d,1)) lapack.gesvd(YhUn,sv,jobu='S',U=Uh) # determine model order if svth is None: svth = 1E-3 svthn = sv[0]*svth n=1 while sv[n] >= svthn and n < 10: n=n+1 # estimate A, C Uhn = Uh[:,:n] for ii in xrange(n): blas.scal(sv[ii],Uhn,n=a,offset=ii*a) syseC = Uhn[:p,:] Als = Uhn[:-p,:] Bls = Uhn[p:,:] lapack.gels(Als,Bls) syseA = Bls[:n,:] Als[:,:] = Uhn[:-p,:] Bls[:,:] = Uhn[p:,:] blas.gemm(Als,syseA,Bls,beta=-1.0) Aerr = blas.nrm2(Bls) # stabilize A Sc = matrix(0.0,(n,n),'z') w = matrix(0.0, (n,1), 'z') Vs = matrix(0.0, (n,n), 'z') def F(w): return (abs(w) < 1.0) Sc[:,:] = syseA ns = lapack.gees(Sc, w, Vs, select = F) while ns < n: #print "stabilize matrix A" w[ns:] = w[ns:]**-1 Sc[::n+1] = w Sc = Vs*Sc*Vs.H syseA[:,:] = Sc.real() Sc[:,:] = syseA ns = lapack.gees(Sc, w, Vs, select = F) # estimate B,D,x0 stored in vector [x0; vec(D); vec(B)] F1 = matrix(0.0,(p*N,n)) F1[:p,:] = syseC for ii in xrange(1,N): F1[ii*p:(ii+1)*p,:] = F1[(ii-1)*p:ii*p,:]*syseA F2 = matrix(0.0,(p*N,p*m)) ut = u.T for ii in xrange(p): F2[ii::p,ii::p] = ut F3 = matrix(0.0,(p*N,n*m)) F3t = matrix(0.0,(p*(N-1),n*m)) for ii in xrange(1,N): for jj in xrange(p): for kk in xrange(n): F3t[jj:jj+(N-ii)*p:p,kk::n] = ut[:N-ii,:]*F1[(ii-1)*p+jj,kk] F3[ii*p:,:] = F3[ii*p:,:] + F3t[:(N-ii)*p,:] F = matrix([[F1],[F2],[F3]]) yls = y[:] Sls = matrix(0.0,(F.size[1],1)) Uls = matrix(0.0,(F.size[0],F.size[1])) Vtls = matrix(0.0,(F.size[1],F.size[1])) lapack.gesvd(F, Sls, jobu='S', jobvt='S', U=Uls, Vt=Vtls) Frank=len([ii for ii in xrange(Sls.size[0]) if Sls[ii] >= 1E-6]) #print 'Rank deficiency = ', F.size[1] - Frank xx = matrix(0.0,(F.size[1],1)) xx[:Frank] = Uls.T[:Frank,:] * yls xx[:Frank] = base.mul(xx[:Frank],Sls[:Frank]**-1) xx[:] = Vtls.T[:,:Frank]*xx[:Frank] blas.gemv(F,xx,yls,beta=-1.0) xxerr = blas.nrm2(yls) x0 = xx[:n] syseD = xx[n:n+p*m] syseD.size = (p,m) syseB = xx[n+p*m:] syseB.size = (n,m) return {'A': syseA, 'B': syseB, 'C': syseC, 'D': syseD, 'svN': svN, 'sv': \ sv, 'x0': x0, 'n': n, 'Aerr': Aerr, 'xxerr': xxerr}
def Pf(u, v, alpha = 1.0, beta = 0.0): base.symv(C, u[0], v[0], alpha = alpha, beta = beta) blas.scal(beta, v[1]) blas.scal(beta, v[2])
def completion(X, factored_updates = True): """ Supernodal multifrontal maximum determinant positive definite matrix completion. The routine computes the Cholesky factor :math:`L` of the inverse of the maximum determinant positive definite matrix completion of :math:`X`:, i.e., .. math:: P( S^{-1} ) = X where :math:`S = LL^T`. On exit, the argument `X` contains the lower-triangular Cholesky factor :math:`L`. The optional argument `factored_updates` can be used to enable (if True) or disable (if False) updating of intermediate factorizations. :param X: :py:class:`cspmatrix` :param factored_updates: boolean """ assert isinstance(X, cspmatrix) and X.is_factor is False, "X must be a cspmatrix" n = X.symb.n snpost = X.symb.snpost snptr = X.symb.snptr chptr = X.symb.chptr chidx = X.symb.chidx relptr = X.symb.relptr relidx = X.symb.relidx blkptr = X.symb.blkptr blkval = X.blkval stack = [] for k in reversed(list(snpost)): nn = snptr[k+1]-snptr[k] # |Nk| na = relptr[k+1]-relptr[k] # |Ak| nj = na + nn # allocate F and copy X_{Jk,Nk} to leading columns of F F = matrix(0.0, (nj,nj)) lapack.lacpy(blkval, F, offsetA = blkptr[k], ldA = nj, m = nj, n = nn, uplo = 'L') # if supernode k is not a root node: if na > 0: # copy Vk to 2,2 block of F Vk = stack.pop() lapack.lacpy(Vk, F, offsetB = nn*nj+nn, m = na, n = na, uplo = 'L') # if supernode k has any children: for ii in range(chptr[k],chptr[k+1]): i = chidx[ii] if factored_updates: r = relidx[relptr[i]:relptr[i+1]] stack.append(frontal_get_update_factor(F,r,nn,na)) else: stack.append(frontal_get_update(F,relidx,relptr,i)) # if supernode k is not a root node: if na > 0: if factored_updates: # In this case we have Vk = Lk'*Lk trL1 = 'T' trL2 = 'N' else: # factorize Vk lapack.potrf(F, offsetA = nj*nn+nn, n = na, ldA = nj) # In this case we have Vk = Lk*Lk' trL1 = 'N' trL2 = 'T' # compute L_{Ak,Nk} and inv(D_{Nk,Nk}) = S_{Nk,Nk} - S_{Ak,Nk}'*L_{Ak,Nk} lapack.trtrs(F, blkval, offsetA = nj*nn+nn, trans = trL1,\ offsetB = blkptr[k]+nn, ldB = nj, n = na, nrhs = nn) blas.syrk(blkval, blkval, n = nn, k = na, trans= 'T', alpha = -1.0, beta = 1.0, offsetA = blkptr[k]+nn, offsetC = blkptr[k], ldA = nj, ldC = nj) lapack.trtrs(F, blkval, offsetA = nj*nn+nn, trans = trL2,\ offsetB = blkptr[k]+nn, ldB = nj, n = na, nrhs = nn) for i in range(nn): blas.scal(-1.0, blkval, n = na, offset = blkptr[k] + i*nj + nn) # factorize inv(D_{Nk,Nk}) as R*R' so that D_{Nk,Nk} = L*L' with L = inv(R)' lapack.lacpy(blkval, F, offsetA = blkptr[k], ldA = nj,\ ldB = nj, m = nn, n = nn, uplo = 'L') # copy -- FIX! F[:nn,:nn] = matrix(F[:nn,:nn][::-1],(nn,nn)) # reverse -- FIX! lapack.potrf(F, ldA = nj, n = nn, uplo = 'U') # factorize F[:nn,:nn] = matrix(F[:nn,:nn][::-1],(nn,nn)) # reverse -- FIX! lapack.lacpy(F, blkval, offsetB = blkptr[k], ldA = nj,\ ldB = nj, m = nn, n = nn, uplo = 'L') # copy -- FIX! # compute L = inv(R') lapack.trtri(blkval, offsetA = blkptr[k], ldA = nj, n = nn) X._is_factor = True return
def spmatrix(self, reordered = True, symmetric = False): """ Converts the :py:class:`cspmatrix` :math:`A` to a sparse matrix. A reordered matrix is returned if the optional argument `reordered` is `True` (default), and otherwise the inverse permutation is applied. Only the default options are allowed if the :py:class:`cspmatrix` :math:`A` represents a Cholesky factor. :param reordered: boolean (default: True) :param symmetric: boolean (default: False) """ n = self.symb.n snptr = self.symb.snptr snode = self.symb.snode relptr = self.symb.relptr snrowidx = self.symb.snrowidx sncolptr = self.symb.sncolptr blkptr = self.symb.blkptr blkval = self.blkval if self.is_factor: if symmetric: raise ValueError("'symmetric = True' not implemented for Cholesky factors") if not reordered: raise ValueError("'reordered = False' not implemented for Cholesky factors") snpost = self.symb.snpost blkval = +blkval for k in snpost: j = snode[snptr[k]] # representative vertex nn = snptr[k+1]-snptr[k] # |Nk| na = relptr[k+1]-relptr[k] # |Ak| if na == 0: continue nj = na + nn if nn == 1: blas.scal(blkval[blkptr[k]],blkval,offset = blkptr[k]+1,n=na) else: blas.trmm(blkval,blkval, transA = "N", diag = "N", side = "R",uplo = "L", \ m = na, n = nn, ldA = nj, ldB = nj, \ offsetA = blkptr[k],offsetB = blkptr[k] + nn) cc = matrix(0,(n,1)) # count number of nonzeros in each col for k in range(self.symb.Nsn): nn = snptr[k+1]-snptr[k] na = relptr[k+1]-relptr[k] nj = nn + na for i in range(nn): j = snode[snptr[k]+i] cc[j] = nj - i # build col. ptr cp = [0] for i in range(n): cp.append(cp[-1] + cc[i]) cp = matrix(cp) # copy data and row indices val = matrix(0.0, (cp[-1],1)) ri = matrix(0, (cp[-1],1)) for k in range(self.symb.Nsn): nn = snptr[k+1]-snptr[k] na = relptr[k+1]-relptr[k] nj = nn + na for i in range(nn): j = snode[snptr[k]+i] blas.copy(blkval, val, offsetx = blkptr[k]+nj*i+i, offsety = cp[j], n = nj-i) ri[cp[j]:cp[j+1]] = snrowidx[sncolptr[k]+i:sncolptr[k+1]] I = []; J = [] for i in range(n): I += list(ri[cp[i]:cp[i+1]]) J += (cp[i+1]-cp[i])*[i] tmp = spmatrix(val, I, J, (n,n)) # tmp is reordered and lower tril. if reordered or self.symb.p is None: # reordered matrix (do not apply inverse permutation) if not symmetric: return tmp else: return symmetrize(tmp) else: # apply inverse permutation tmp = perm(symmetrize(tmp), self.symb.ip) if symmetric: return tmp else: return tril(tmp)
def solve(x, y, z): """ 1. Solve for usx[0]: Asc'(Asc(usx[0])) = bx0 + Asc'( ( bsz0 - bsz1 + S * bsx[1] * S ) ./ sqrtG) = bx0 + Asc'( ( bsz0 + S * ( bsx[1] - bssz1) S ) ./ sqrtG) where bsx[1] = U^-1 * bx[1] * U^-T, bsz0 = U' * bz0 * U, bsz1 = U' * bz1 * U, bssz1 = S^-1 * bsz1 * S^-1 2. Solve for usx[1]: usx[1] + S * usx[1] * S = S * ( As(usx[0]) + bsx[1] - bsz0 ) * S - bsz1 usx[1] = ( S * (As(usx[0]) + bsx[1] - bsz0) * S - bsz1) ./ Gamma = -bsz0 + (S * As(usx[0]) * S) ./ Gamma + (bsz0 - bsz1 + S * bsx[1] * S ) . / Gamma = -bsz0 + (S * As(usx[0]) * S) ./ Gamma + (bsz0 + S * ( bsx[1] - bssz1 ) * S ) . / Gamma Unscale ux[1] = Uti * usx[1] * Uti' 3. Compute usz0, usz1 r0' * uz0 * r0 = r0^-1 * ( A(ux[0]) - ux[1] - bz0 ) * r0^-T r1' * uz1 * r1 = r1^-1 * ( -ux[1] - bz1 ) * r1^-T """ # z0 := U' * z0 * U # = bsz0 __cngrnc(U, z, trans='T') # z1 := Us' * bz1 * Us # = S^-1 * U' * bz1 * U * S^-1 # = S^-1 * bsz1 * S^-1 __cngrnc(Us, z, trans='T', offsetx=msq) # x[1] := Uti' * x[1] * Uti # = bsx[1] __cngrnc(Uti, x[1], trans='T') # x[1] := x[1] - z[msq:] # = bsx[1] - S^-1 * bsz1 * S^-1 blas.axpy(z, x[1], alpha=-1.0, offsetx=msq) # x1 = (S * x[1] * S + z[:msq] ) ./ sqrtG # = (S * ( bsx[1] - S^-1 * bsz1 * S^-1) * S + bsz0 ) ./ sqrtG # = (S * bsx[1] * S - bsz1 + bsz0 ) ./ sqrtG # in packed storage blas.copy(x[1], x1) blas.tbmv(S, x1, n=msq, k=0, ldA=1) blas.axpy(z, x1, n=msq) blas.tbsv(sqrtG, x1, n=msq, k=0, ldA=1) misc.pack2(x1, {'l': 0, 'q': [], 's': [m]}) # x[0] := x[0] + Asc'*x1 # = bx0 + Asc'( ( bsz0 - bsz1 + S * bsx[1] * S ) ./ sqrtG) # = bx0 + As'( ( bz0 - bz1 + S * bx[1] * S ) ./ Gamma ) blas.gemv(Asc, x1, x[0], m=mpckd, trans='T', beta=1.0) # x[0] := H^-1 * x[0] # = ux[0] lapack.potrs(H, x[0]) # x1 = Asc(x[0]) .* sqrtG (unpacked) # = As(x[0]) blas.gemv(Asc, x[0], tmp, m=mpckd) misc.unpack(tmp, x1, {'l': 0, 'q': [], 's': [m]}) blas.tbmv(sqrtG, x1, n=msq, k=0, ldA=1) # usx[1] = (x1 + (x[1] - z[:msq])) ./ sqrtG**2 # = (As(ux[0]) + bsx[1] - bsz0 - S^-1 * bsz1 * S^-1) # ./ Gamma # x[1] := x[1] - z[:msq] # = bsx[1] - bsz0 - S^-1 * bsz1 * S^-1 blas.axpy(z, x[1], -1.0, n=msq) # x[1] := x[1] + x1 # = As(ux) + bsx[1] - bsz0 - S^-1 * bsz1 * S^-1 blas.axpy(x1, x[1]) # x[1] := x[1] / Gammma # = (As(ux) + bsx[1] - bsz0 + S^-1 * bsz1 * S^-1 ) / Gamma # = S^-1 * usx[1] * S^-1 blas.tbsv(Gamma, x[1], n=msq, k=0, ldA=1) # z[msq:] := r1' * U * (-z[msq:] - x[1]) * U * r1 # := -r1' * U * S^-1 * (bsz1 + ux[1]) * S^-1 * U * r1 # := -r1' * uz1 * r1 blas.axpy(x[1], z, n=msq, offsety=msq) blas.scal(-1.0, z, offset=msq) __cngrnc(U, z, offsetx=msq) __cngrnc(W['r'][1], z, trans='T', offsetx=msq) # x[1] := S * x[1] * S # = usx1 blas.tbmv(S, x[1], n=msq, k=0, ldA=1) # z[:msq] = r0' * U' * ( x1 - x[1] - z[:msq] ) * U * r0 # = r0' * U' * ( As(ux) - usx1 - bsz0 ) * U * r0 # = r0' * U' * usz0 * U * r0 # = r0' * uz0 * r0 blas.axpy(x1, z, -1.0, n=msq) blas.scal(-1.0, z, n=msq) blas.axpy(x[1], z, -1.0, n=msq) __cngrnc(U, z) __cngrnc(W['r'][0], z, trans='T') # x[1] := Uti * x[1] * Uti' # = ux[1] __cngrnc(Uti, x[1])
def loop(P, q, G, h, A, b, round): # coneqp # P = np.outer(y, y) * K (373x373) # q = vector of -1 (373x1) # G = np.vstack( np.diag(np.ones(n_samples) * -1), np.diag(np.ones(n_samples)) ) # (746x373) # h = np.vstack( np.zeros(n_samples), np.ones(n_samples) * c ) # (746x1) # A = y # b = 0.0 def fP(P, x, y, alpha=1.0, beta=0.0): # base.symv return alpha * np.dot(P, x) + beta * y def fA(x, y, trans="N", alpha=1.0, beta=0.0): # base.gemv # general matrix-vector product return cvxopt.gemv(x, y, trans, alpha, beta) dims = None kktsolver = "chol2" if kktsolver == "chol": factor = getattr(misc, "kkt_chol")(G, dims, A) else: factor = getattr(misc, "kkt_chol2")(G, dims, A) def kktsolver(W): return factor(W, P) f3 = kktsolver({"d": matrix(0.0, (0, 1)), "di": matrix(0.0, (0, 1)), "beta": [], "v": [], "r": [], "rti": []}) resx0 = max(1.0, math.sqrt(blas.dot(q, q))) # 19.13 resy0 = max(1.0, math.sqrt(blas.dot(b, b))) # 1.0 resz0 = max(1.0, misc.snrm2(h, dims)) # 193.13 x = matrix(q) blas.scal(-1.0, x) y = matrix(b) f3(x, y, matrix(0.0, (0, 1))) # dres = || P*x + q + A'*y || / resx0 rx = matrix(q) fP(x, rx, beta=1.0) pcost = 0.5 * (blas.dot(x, rx) + blas.dot(x, q)) fA(y, rx, beta=1.0, trans="T") dres = math.sqrt(blas.dot(rx, rx)) / resx0 # pres = || A*x - b || / resy0 ry = matrix(b) fA(x, ry, alpha=1.0, beta=-1.0) pres = math.sqrt(blas.dot(ry, ry)) / resy0 if pcost == 0.0: relgap = None else: relgap = 0.0 cdim = G.size[0] x, y = matrix(q), matrix(b) s, z = matrix(0.0, (cdim, 1)), matrix(0.0, (cdim, 1)) pcost = -6.3339e03 dcost = -5.5410e05 gap = 2e06 pres = 2e00 dres = 2e-14 print_round(round, pcost, dcost, gap, pres, dres)
def xscal(alpha, u): blas.scal(alpha, u[0]) blas.scal(alpha, u[1])
def sysid(y, u, vsig, svth=None): """ System identification using the subspace method and nuclear norm optimization. Estimate a linear time-invariant state-space model given inputs and outputs. The algorithm is described in [1]. INPUT y 'd' matrix of size (p, N). y are the measured outputs, p is the number of outputs, and N is the number of data points measured. u 'd' matrix of size (m, N). u are the inputs, m is the number of inputs, and N is the number of data points. vsig a weighting parameter in the nuclear norm optimization, its value is approximately the 1-sigma output noise level svth an optional parameter, if specified, the model order is determined as the number of singular values greater than svth times the maximum singular value. The default value is 1E-3 OUTPUT sol a dictionary with the following words -- 'A', 'B', 'C', 'D' are the state-space matrices -- 'svN', the original singular values of the Hankel matrix -- 'sv', the optimized singular values of the Hankel matrix -- 'x0', the initial state x(0) -- 'n', the model order [1] Zhang Liu and Lieven Vandenberghe. "Interior-point method for nuclear norm approximation with application to system identification." """ m, N, p = u.size[0], u.size[1], y.size[0] if y.size[1] != N: raise ValueError, "y and u must have the same length" # Y = G*X + H*U + V, Y has size a x b, U has size c x b, Un has b x d r = min(int(30 / p), int((N + 1.0) / (p + m + 1) + 1.0)) a = r * p c = r * m b = N - r + 1 d = b - c # construct Hankel matrix Y Y = Hankel(y, r, b, p=p, q=1) # construct Hankel matrix U U = Hankel(u, r, b, p=m, q=1) # compute Un = null(U) and YUn = Y*Un Vt = matrix(0.0, (b, b)) Stemp = matrix(0.0, (c, 1)) Un = matrix(0.0, (b, d)) YUn = matrix(0.0, (a, d)) lapack.gesvd(U, Stemp, jobvt='A', Vt=Vt) Un[:, :] = Vt.T[:, c:] blas.gemm(Y, Un, YUn) # compute original singular values svN = matrix(0.0, (min(a, d), 1)) lapack.gesvd(YUn, svN) # variable, [y(1);...;y(N)] # form the coefficient matrices for the nuclear norm optimization # minimize | Yh * Un |_* + alpha * | y - yh |_F AA = Hankel_basis(r, b, p=p, q=1) A = matrix(0.0, (a * d, p * N)) temp = spmatrix([], [], [], (a, b), 'd') temp2 = matrix(0.0, (a, d)) for ii in xrange(p * N): temp[:] = AA[:, ii] base.gemm(temp, Un, temp2) A[:, ii] = temp2[:] B = matrix(0.0, (a, d)) # flip the matrix if columns is more than rows if a < d: Itrans = [i + j * a for i in xrange(a) for j in xrange(d)] B[:] = B[Itrans] B.size = (d, a) for ii in xrange(p * N): A[:, ii] = A[Itrans, ii] # regularized term x0 = y[:] Qd = matrix(2.0 * svN[0] / p / N / (vsig**2), (p * N, 1)) # solve the nuclear norm optimization sol = nrmapp(A, B, C=base.spdiag(Qd), d=-base.mul(x0, Qd)) status = sol['status'] x = sol['x'] # construct YhUn and take the svd YhUn = matrix(B) blas.gemv(A, x, YhUn, beta=1.0) if a < d: YhUn = YhUn.T Uh = matrix(0.0, (a, d)) sv = matrix(0.0, (d, 1)) lapack.gesvd(YhUn, sv, jobu='S', U=Uh) # determine model order if svth is None: svth = 1E-3 svthn = sv[0] * svth n = 1 while sv[n] >= svthn and n < 10: n = n + 1 # estimate A, C Uhn = Uh[:, :n] for ii in xrange(n): blas.scal(sv[ii], Uhn, n=a, offset=ii * a) syseC = Uhn[:p, :] Als = Uhn[:-p, :] Bls = Uhn[p:, :] lapack.gels(Als, Bls) syseA = Bls[:n, :] Als[:, :] = Uhn[:-p, :] Bls[:, :] = Uhn[p:, :] blas.gemm(Als, syseA, Bls, beta=-1.0) Aerr = blas.nrm2(Bls) # stabilize A Sc = matrix(0.0, (n, n), 'z') w = matrix(0.0, (n, 1), 'z') Vs = matrix(0.0, (n, n), 'z') def F(w): return (abs(w) < 1.0) Sc[:, :] = syseA ns = lapack.gees(Sc, w, Vs, select=F) while ns < n: #print "stabilize matrix A" w[ns:] = w[ns:]**-1 Sc[::n + 1] = w Sc = Vs * Sc * Vs.H syseA[:, :] = Sc.real() Sc[:, :] = syseA ns = lapack.gees(Sc, w, Vs, select=F) # estimate B,D,x0 stored in vector [x0; vec(D); vec(B)] F1 = matrix(0.0, (p * N, n)) F1[:p, :] = syseC for ii in xrange(1, N): F1[ii * p:(ii + 1) * p, :] = F1[(ii - 1) * p:ii * p, :] * syseA F2 = matrix(0.0, (p * N, p * m)) ut = u.T for ii in xrange(p): F2[ii::p, ii::p] = ut F3 = matrix(0.0, (p * N, n * m)) F3t = matrix(0.0, (p * (N - 1), n * m)) for ii in xrange(1, N): for jj in xrange(p): for kk in xrange(n): F3t[jj:jj + (N - ii) * p:p, kk::n] = ut[:N - ii, :] * F1[(ii - 1) * p + jj, kk] F3[ii * p:, :] = F3[ii * p:, :] + F3t[:(N - ii) * p, :] F = matrix([[F1], [F2], [F3]]) yls = y[:] Sls = matrix(0.0, (F.size[1], 1)) Uls = matrix(0.0, (F.size[0], F.size[1])) Vtls = matrix(0.0, (F.size[1], F.size[1])) lapack.gesvd(F, Sls, jobu='S', jobvt='S', U=Uls, Vt=Vtls) Frank = len([ii for ii in xrange(Sls.size[0]) if Sls[ii] >= 1E-6]) #print 'Rank deficiency = ', F.size[1] - Frank xx = matrix(0.0, (F.size[1], 1)) xx[:Frank] = Uls.T[:Frank, :] * yls xx[:Frank] = base.mul(xx[:Frank], Sls[:Frank]**-1) xx[:] = Vtls.T[:, :Frank] * xx[:Frank] blas.gemv(F, xx, yls, beta=-1.0) xxerr = blas.nrm2(yls) x0 = xx[:n] syseD = xx[n:n + p * m] syseD.size = (p, m) syseB = xx[n + p * m:] syseB.size = (n, m) return {'A': syseA, 'B': syseB, 'C': syseC, 'D': syseD, 'svN': svN, 'sv': \ sv, 'x0': x0, 'n': n, 'Aerr': Aerr, 'xxerr': xxerr}
def F(W): """ Create a solver for the linear equations C * ux + G' * uzl - 2*A'(uzs21) = bx -uzs11 = bX1 -uzs22 = bX2 G * ux - Dl^2 * uzl = bzl [ -uX1 -A(ux)' ] [ uzs11 uzs21' ] [ ] - r*r' * [ ] * r*r' = bzs [ -A(ux) -uX2 ] [ uzs21 uzs22 ] where Dl = diag(W['l']), r = W['r'][0]. On entry, x = (bx, bX1, bX2) and z = [ bzl; bzs[:] ]. On exit, x = (ux, uX1, uX2) and z = [ Dl*uzl; (r'*uzs*r)[:] ]. 1. Compute matrices V1, V2 such that (with T = r*r') [ V1 0 ] [ T11 T21' ] [ V1' 0 ] [ I S' ] [ ] [ ] [ ] = [ ] [ 0 V2' ] [ T21 T22 ] [ 0 V2 ] [ S I ] and S = [ diag(s); 0 ], s a positive q-vector. 2. Factor the mapping X -> X + S * X' * S: X + S * X' * S = L( L'( X )). 3. Compute scaled mappings: a matrix As with as its columns the coefficients of the scaled mapping L^-1( V2' * A() * V1' ) and the matrix Gs = Dl^-1 * G. 4. Cholesky factorization of H = C + Gs'*Gs + 2*As'*As. """ # 1. Compute V1, V2, s. r = W['r'][0] # LQ factorization R[:q, :] = L1 * Q1. lapack.lacpy(r, Q1, m=q) lapack.gelqf(Q1, tau1) lapack.lacpy(Q1, L1, n=q, uplo='L') lapack.orglq(Q1, tau1) # LQ factorization R[q:, :] = L2 * Q2. lapack.lacpy(r, Q2, m=p, offsetA=q) lapack.gelqf(Q2, tau2) lapack.lacpy(Q2, L2, n=p, uplo='L') lapack.orglq(Q2, tau2) # V2, V1, s are computed from an SVD: if # # Q2 * Q1' = U * diag(s) * V', # # then V1 = V' * L1^-1 and V2 = L2^-T * U. # T21 = Q2 * Q1.T blas.gemm(Q2, Q1, T21, transB='T') # SVD T21 = U * diag(s) * V'. Store U in V2 and V' in V1. lapack.gesvd(T21, s, jobu='A', jobvt='A', U=V2, Vt=V1) # # Q2 := Q2 * Q1' without extracting Q1; store T21 in Q2 # this will requires lapack.ormlq or lapack.unmlq # V2 = L2^-T * U blas.trsm(L2, V2, transA='T') # V1 = V' * L1^-1 blas.trsm(L1, V1, side='R') # 2. Factorization X + S * X' * S = L( L'( X )). # # The factor L is stored as a diagonal matrix D and a sparse lower # triangular matrix P, such that # # L(X)[:] = D**-1 * (I + P) * X[:] # L^-1(X)[:] = D * (I - P) * X[:]. # SS is q x q with SS[i,j] = si*sj. blas.scal(0.0, SS) blas.syr(s, SS) # For a p x q matrix X, P*X[:] is Y[:] where # # Yij = si * sj * Xji if i < j # = 0 otherwise. # P.V = SS[Itril2] # For a p x q matrix X, D*X[:] is Y[:] where # # Yij = Xij / sqrt( 1 - si^2 * sj^2 ) if i < j # = Xii / sqrt( 1 + si^2 ) if i = j # = Xij otherwise. # DV[Idiag] = sqrt(1.0 + SS[::q + 1]) DV[Itriu] = sqrt(1.0 - SS[Itril3]**2) D.V = DV**-1 # 3. Scaled linear mappings # Ask := V2' * Ask * V1' blas.scal(0.0, As) base.axpy(A, As) for i in xrange(n): # tmp := V2' * As[i, :] blas.gemm(V2, As, tmp, transA='T', m=p, n=q, k=p, ldB=p, offsetB=i * p * q) # As[:,i] := tmp * V1' blas.gemm(tmp, V1, As, transB='T', m=p, n=q, k=q, ldC=p, offsetC=i * p * q) # As := D * (I - P) * As # = L^-1 * As. blas.copy(As, As2) base.gemm(P, As, As2, alpha=-1.0, beta=1.0) base.gemm(D, As2, As) # Gs := Dl^-1 * G blas.scal(0.0, Gs) base.axpy(G, Gs) for k in xrange(n): blas.tbmv(W['di'], Gs, n=m, k=0, ldA=1, offsetx=k * m) # 4. Cholesky factorization of H = C + Gs' * Gs + 2 * As' * As. blas.syrk(As, H, trans='T', alpha=2.0) blas.syrk(Gs, H, trans='T', beta=1.0) base.axpy(C, H) lapack.potrf(H) def f(x, y, z): """ Solve C * ux + G' * uzl - 2*A'(uzs21) = bx -uzs11 = bX1 -uzs22 = bX2 G * ux - D^2 * uzl = bzl [ -uX1 -A(ux)' ] [ uzs11 uzs21' ] [ ] - T * [ ] * T = bzs. [ -A(ux) -uX2 ] [ uzs21 uzs22 ] On entry, x = (bx, bX1, bX2) and z = [ bzl; bzs[:] ]. On exit, x = (ux, uX1, uX2) and z = [ D*uzl; (r'*uzs*r)[:] ]. Define X = uzs21, Z = T * uzs * T: C * ux + G' * uzl - 2*A'(X) = bx [ 0 X' ] [ bX1 0 ] T * [ ] * T - Z = T * [ ] * T [ X 0 ] [ 0 bX2 ] G * ux - D^2 * uzl = bzl [ -uX1 -A(ux)' ] [ Z11 Z21' ] [ ] - [ ] = bzs [ -A(ux) -uX2 ] [ Z21 Z22 ] Return x = (ux, uX1, uX2), z = [ D*uzl; (rti'*Z*rti)[:] ]. We use the congruence transformation [ V1 0 ] [ T11 T21' ] [ V1' 0 ] [ I S' ] [ ] [ ] [ ] = [ ] [ 0 V2' ] [ T21 T22 ] [ 0 V2 ] [ S I ] and the factorization X + S * X' * S = L( L'(X) ) to write this as C * ux + G' * uzl - 2*A'(X) = bx L'(V2^-1 * X * V1^-1) - L^-1(V2' * Z21 * V1') = bX G * ux - D^2 * uzl = bzl [ -uX1 -A(ux)' ] [ Z11 Z21' ] [ ] - [ ] = bzs, [ -A(ux) -uX2 ] [ Z21 Z22 ] or C * ux + Gs' * uuzl - 2*As'(XX) = bx XX - ZZ21 = bX Gs * ux - uuzl = D^-1 * bzl -As(ux) - ZZ21 = bbzs_21 -uX1 - Z11 = bzs_11 -uX2 - Z22 = bzs_22 if we introduce scaled variables uuzl = D * uzl XX = L'(V2^-1 * X * V1^-1) = L'(V2^-1 * uzs21 * V1^-1) ZZ21 = L^-1(V2' * Z21 * V1') and define bbzs_21 = L^-1(V2' * bzs_21 * V1') [ bX1 0 ] bX = L^-1( V2' * (T * [ ] * T)_21 * V1'). [ 0 bX2 ] Eliminating Z21 gives C * ux + Gs' * uuzl - 2*As'(XX) = bx Gs * ux - uuzl = D^-1 * bzl -As(ux) - XX = bbzs_21 - bX -uX1 - Z11 = bzs_11 -uX2 - Z22 = bzs_22 and eliminating uuzl and XX gives H * ux = bx + Gs' * D^-1 * bzl + 2*As'(bX - bbzs_21) Gs * ux - uuzl = D^-1 * bzl -As(ux) - XX = bbzs_21 - bX -uX1 - Z11 = bzs_11 -uX2 - Z22 = bzs_22. In summary, we can use the following algorithm: 1. bXX := bX - bbzs21 [ bX1 0 ] = L^-1( V2' * ((T * [ ] * T)_21 - bzs_21) * V1') [ 0 bX2 ] 2. Solve H * ux = bx + Gs' * D^-1 * bzl + 2*As'(bXX). 3. From ux, compute uuzl = Gs*ux - D^-1 * bzl and X = V2 * L^-T(-As(ux) + bXX) * V1. 4. Return ux, uuzl, rti' * Z * rti = r' * [ -bX1, X'; X, -bX2 ] * r and uX1 = -Z11 - bzs_11, uX2 = -Z22 - bzs_22. """ # Save bzs_11, bzs_22, bzs_21. lapack.lacpy(z, bz11, uplo='L', m=q, n=q, ldA=p + q, offsetA=m) lapack.lacpy(z, bz21, m=p, n=q, ldA=p + q, offsetA=m + q) lapack.lacpy(z, bz22, uplo='L', m=p, n=p, ldA=p + q, offsetA=m + (p + q + 1) * q) # zl := D^-1 * zl # = D^-1 * bzl blas.tbmv(W['di'], z, n=m, k=0, ldA=1) # zs := r' * [ bX1, 0; 0, bX2 ] * r. # zs := [ bX1, 0; 0, bX2 ] blas.scal(0.0, z, offset=m) lapack.lacpy(x[1], z, uplo='L', m=q, n=q, ldB=p + q, offsetB=m) lapack.lacpy(x[2], z, uplo='L', m=p, n=p, ldB=p + q, offsetB=m + (p + q + 1) * q) # scale diagonal of zs by 1/2 blas.scal(0.5, z, inc=p + q + 1, offset=m) # a := tril(zs)*r blas.copy(r, a) blas.trmm(z, a, side='L', m=p + q, n=p + q, ldA=p + q, ldB=p + q, offsetA=m) # zs := a'*r + r'*a blas.syr2k(r, a, z, trans='T', n=p + q, k=p + q, ldB=p + q, ldC=p + q, offsetC=m) # bz21 := L^-1( V2' * ((r * zs * r')_21 - bz21) * V1') # # [ bX1 0 ] # = L^-1( V2' * ((T * [ ] * T)_21 - bz21) * V1'). # [ 0 bX2 ] # a = [ r21 r22 ] * z # = [ r21 r22 ] * r' * [ bX1, 0; 0, bX2 ] * r # = [ T21 T22 ] * [ bX1, 0; 0, bX2 ] * r blas.symm(z, r, a, side='R', m=p, n=p + q, ldA=p + q, ldC=p + q, offsetB=q) # bz21 := -bz21 + a * [ r11, r12 ]' # = -bz21 + (T * [ bX1, 0; 0, bX2 ] * T)_21 blas.gemm(a, r, bz21, transB='T', m=p, n=q, k=p + q, beta=-1.0, ldA=p + q, ldC=p) # bz21 := V2' * bz21 * V1' # = V2' * (-bz21 + (T*[bX1, 0; 0, bX2]*T)_21) * V1' blas.gemm(V2, bz21, tmp, transA='T', m=p, n=q, k=p, ldB=p) blas.gemm(tmp, V1, bz21, transB='T', m=p, n=q, k=q, ldC=p) # bz21[:] := D * (I-P) * bz21[:] # = L^-1 * bz21[:] # = bXX[:] blas.copy(bz21, tmp) base.gemv(P, bz21, tmp, alpha=-1.0, beta=1.0) base.gemv(D, tmp, bz21) # Solve H * ux = bx + Gs' * D^-1 * bzl + 2*As'(bXX). # x[0] := x[0] + Gs'*zl + 2*As'(bz21) # = bx + G' * D^-1 * bzl + 2 * As'(bXX) blas.gemv(Gs, z, x[0], trans='T', alpha=1.0, beta=1.0) blas.gemv(As, bz21, x[0], trans='T', alpha=2.0, beta=1.0) # x[0] := H \ x[0] # = ux lapack.potrs(H, x[0]) # uuzl = Gs*ux - D^-1 * bzl blas.gemv(Gs, x[0], z, alpha=1.0, beta=-1.0) # bz21 := V2 * L^-T(-As(ux) + bz21) * V1 # = X blas.gemv(As, x[0], bz21, alpha=-1.0, beta=1.0) blas.tbsv(DV, bz21, n=p * q, k=0, ldA=1) blas.copy(bz21, tmp) base.gemv(P, tmp, bz21, alpha=-1.0, beta=1.0, trans='T') blas.gemm(V2, bz21, tmp) blas.gemm(tmp, V1, bz21) # zs := -zs + r' * [ 0, X'; X, 0 ] * r # = r' * [ -bX1, X'; X, -bX2 ] * r. # a := bz21 * [ r11, r12 ] # = X * [ r11, r12 ] blas.gemm(bz21, r, a, m=p, n=p + q, k=q, ldA=p, ldC=p + q) # z := -z + [ r21, r22 ]' * a + a' * [ r21, r22 ] # = rti' * uzs * rti blas.syr2k(r, a, z, trans='T', beta=-1.0, n=p + q, k=p, offsetA=q, offsetC=m, ldB=p + q, ldC=p + q) # uX1 = -Z11 - bzs_11 # = -(r*zs*r')_11 - bzs_11 # uX2 = -Z22 - bzs_22 # = -(r*zs*r')_22 - bzs_22 blas.copy(bz11, x[1]) blas.copy(bz22, x[2]) # scale diagonal of zs by 1/2 blas.scal(0.5, z, inc=p + q + 1, offset=m) # a := r*tril(zs) blas.copy(r, a) blas.trmm(z, a, side='R', m=p + q, n=p + q, ldA=p + q, ldB=p + q, offsetA=m) # x[1] := -x[1] - a[:q,:] * r[:q, :]' - r[:q,:] * a[:q,:]' # = -bzs_11 - (r*zs*r')_11 blas.syr2k(a, r, x[1], n=q, alpha=-1.0, beta=-1.0) # x[2] := -x[2] - a[q:,:] * r[q:, :]' - r[q:,:] * a[q:,:]' # = -bzs_22 - (r*zs*r')_22 blas.syr2k(a, r, x[2], n=p, alpha=-1.0, beta=-1.0, offsetA=q, offsetB=q) # scale diagonal of zs by 1/2 blas.scal(2.0, z, inc=p + q + 1, offset=m) return f
def solve(x, y, z): """ 1. Solve for usx[0]: Asc'(Asc(usx[0])) = bx0 + Asc'( ( bsz0 - bsz1 + S * bsx[1] * S ) ./ sqrtG) = bx0 + Asc'( ( bsz0 + S * ( bsx[1] - bssz1) S ) ./ sqrtG) where bsx[1] = U^-1 * bx[1] * U^-T, bsz0 = U' * bz0 * U, bsz1 = U' * bz1 * U, bssz1 = S^-1 * bsz1 * S^-1 2. Solve for usx[1]: usx[1] + S * usx[1] * S = S * ( As(usx[0]) + bsx[1] - bsz0 ) * S - bsz1 usx[1] = ( S * (As(usx[0]) + bsx[1] - bsz0) * S - bsz1) ./ Gamma = -bsz0 + (S * As(usx[0]) * S) ./ Gamma + (bsz0 - bsz1 + S * bsx[1] * S ) . / Gamma = -bsz0 + (S * As(usx[0]) * S) ./ Gamma + (bsz0 + S * ( bsx[1] - bssz1 ) * S ) . / Gamma Unscale ux[1] = Uti * usx[1] * Uti' 3. Compute usz0, usz1 r0' * uz0 * r0 = r0^-1 * ( A(ux[0]) - ux[1] - bz0 ) * r0^-T r1' * uz1 * r1 = r1^-1 * ( -ux[1] - bz1 ) * r1^-T """ # z0 := U' * z0 * U # = bsz0 __cngrnc(U, z, trans = 'T') # z1 := Us' * bz1 * Us # = S^-1 * U' * bz1 * U * S^-1 # = S^-1 * bsz1 * S^-1 __cngrnc(Us, z, trans = 'T', offsetx = msq) # x[1] := Uti' * x[1] * Uti # = bsx[1] __cngrnc(Uti, x[1], trans = 'T') # x[1] := x[1] - z[msq:] # = bsx[1] - S^-1 * bsz1 * S^-1 blas.axpy(z, x[1], alpha = -1.0, offsetx = msq) # x1 = (S * x[1] * S + z[:msq] ) ./ sqrtG # = (S * ( bsx[1] - S^-1 * bsz1 * S^-1) * S + bsz0 ) ./ sqrtG # = (S * bsx[1] * S - bsz1 + bsz0 ) ./ sqrtG # in packed storage blas.copy(x[1], x1) blas.tbmv(S, x1, n = msq, k = 0, ldA = 1) blas.axpy(z, x1, n = msq) blas.tbsv(sqrtG, x1, n = msq, k = 0, ldA = 1) misc.pack2(x1, {'l': 0, 'q': [], 's': [m]}) # x[0] := x[0] + Asc'*x1 # = bx0 + Asc'( ( bsz0 - bsz1 + S * bsx[1] * S ) ./ sqrtG) # = bx0 + As'( ( bz0 - bz1 + S * bx[1] * S ) ./ Gamma ) blas.gemv(Asc, x1, x[0], m = mpckd, trans = 'T', beta = 1.0) # x[0] := H^-1 * x[0] # = ux[0] lapack.potrs(H, x[0]) # x1 = Asc(x[0]) .* sqrtG (unpacked) # = As(x[0]) blas.gemv(Asc, x[0], tmp, m = mpckd) misc.unpack(tmp, x1, {'l': 0, 'q': [], 's': [m]}) blas.tbmv(sqrtG, x1, n = msq, k = 0, ldA = 1) # usx[1] = (x1 + (x[1] - z[:msq])) ./ sqrtG**2 # = (As(ux[0]) + bsx[1] - bsz0 - S^-1 * bsz1 * S^-1) # ./ Gamma # x[1] := x[1] - z[:msq] # = bsx[1] - bsz0 - S^-1 * bsz1 * S^-1 blas.axpy(z, x[1], -1.0, n = msq) # x[1] := x[1] + x1 # = As(ux) + bsx[1] - bsz0 - S^-1 * bsz1 * S^-1 blas.axpy(x1, x[1]) # x[1] := x[1] / Gammma # = (As(ux) + bsx[1] - bsz0 + S^-1 * bsz1 * S^-1 ) / Gamma # = S^-1 * usx[1] * S^-1 blas.tbsv(Gamma, x[1], n = msq, k = 0, ldA = 1) # z[msq:] := r1' * U * (-z[msq:] - x[1]) * U * r1 # := -r1' * U * S^-1 * (bsz1 + ux[1]) * S^-1 * U * r1 # := -r1' * uz1 * r1 blas.axpy(x[1], z, n = msq, offsety = msq) blas.scal(-1.0, z, offset = msq) __cngrnc(U, z, offsetx = msq) __cngrnc(W['r'][1], z, trans = 'T', offsetx = msq) # x[1] := S * x[1] * S # = usx1 blas.tbmv(S, x[1], n = msq, k = 0, ldA = 1) # z[:msq] = r0' * U' * ( x1 - x[1] - z[:msq] ) * U * r0 # = r0' * U' * ( As(ux) - usx1 - bsz0 ) * U * r0 # = r0' * U' * usz0 * U * r0 # = r0' * uz0 * r0 blas.axpy(x1, z, -1.0, n = msq) blas.scal(-1.0, z, n = msq) blas.axpy(x[1], z, -1.0, n = msq) __cngrnc(U, z) __cngrnc(W['r'][0], z, trans = 'T') # x[1] := Uti * x[1] * Uti' # = ux[1] __cngrnc(Uti, x[1])