def symeig(mtxA, mtxB=None, select=None): # Selection notation is like eigh. import scipy.linalg as sla if select is None: if np.iscomplexobj(mtxA): if mtxB is None: fun = sla.get_lapack_funcs('heev', arrays=(mtxA,)) else: fun = sla.get_lapack_funcs('hegv', arrays=(mtxA,)) else: if mtxB is None: fun = sla.get_lapack_funcs('syev', arrays=(mtxA,)) else: fun = sla.get_lapack_funcs('sygv', arrays=(mtxA,)) ## print fun if mtxB is None: out = fun(mtxA) else: out = fun(mtxA, mtxB) out = out[1], out[0], out[2] ## print w ## print v ## print info ## from symeig import symeig ## print symeig( mtxA, mtxB ) return out[:-1] else: return sla.eigh(mtxA, mtxB, eigvals=select)
def test_for_simetric_indefinite_matrix(self): # Define test matrix A. # Note that the leading 5x5 submatrix is indefinite. A = np.asarray([[1, 2, 3, 7, 8], [2, 5, 5, 9, 0], [3, 5, 11, 1, 2], [7, 9, 1, 7, 5], [8, 0, 2, 5, 8]]) # Get Cholesky from lapack functions cholesky, = get_lapack_funcs(('potrf',), (A,)) # Compute Cholesky Decomposition c, k = cholesky(A, lower=False, overwrite_a=False, clean=True) delta, v = singular_leading_submatrix(A, c, k) A[k-1, k-1] += delta # Check if the leading submatrix is singular. assert_array_almost_equal(det(A[:k, :k]), 0) # Check if `v` fullfil the specified properties quadratic_term = np.dot(v, np.dot(A, v)) assert_array_almost_equal(quadratic_term, 0)
def _expm_product_helper(A, mu, iteration_stash, t, B): # Estimate expm(t*M).dot(B). # A = M - mu*I # mu = mean(trace(M)) # The iteration stash helps to compute numbers of iterations to use. # t is a scaling factor. # B is the input matrix for the linear operator. # Compute some input-dependent constants. tol = np.ldexp(1, -53) n0 = B.shape[1] m, s = iteration_stash.fragment_3_1(n0, t) #print('1-norm:', A.one_norm(), 't:', t, 'mu:', mu, 'n0:', n0, 'm:', m, 's:', s) # Get the lapack function for computing matrix norms. lange, = get_lapack_funcs(('lange',), (B,)) F = B eta = np.exp(t*mu / float(s)) for i in range(s): c1 = lange('i', B) for j in range(m): coeff = t / float(s*(j+1)) B = coeff * A.dot(B) c2 = lange('i', B) F = F + B if c1 + c2 <= tol * lange('i', F): break c1 = c2 F = eta * F B = F return F
def py_maxvol(A, tol = 1.05, max_iters = 100): """Python implementation of 1-volume maximization. For information see :py:func:`maxvol` function""" if tol < 1: tol = 1.0 N, r = A.shape if N <= r: return np.arange(N, dtype = np.int32), np.eye(N, dtype = A.dtype) # DGETRF B = np.copy(A, order = 'F') C = np.copy(B.T, order = 'F') H, ipiv, info = get_lapack_funcs('getrf', [B])(B, overwrite_a = 1) # computing pivots from ipiv index = np.arange(N, dtype = np.int32) for i in xrange(r): tmp = index[i] index[i] = index[ipiv[i]] index[ipiv[i]] = tmp # solve A = CH, H is in LU format B = H[:r] # It will be much faster to use dtrsm instead of dtrtrs trtrs = get_lapack_funcs('trtrs', [B]) trtrs(B, C, trans = 1, lower = 0, unitdiag = 0, overwrite_b = 1) trtrs(B, C, trans = 1, lower = 1, unitdiag = 1, overwrite_b = 1) # C has shape (r, N) -- it is stored transposed # find max value in C i, j = divmod(abs(C).argmax(), N) # set cgeru or zgeru for complex numbers and dger or sger for float numbers try: ger = get_blas_funcs('geru', [C]) except: ger = get_blas_funcs('ger', [C]) # set iters to 0 iters = 0 # check if need to swap rows while abs(C[i,j]) > tol and iters < max_iters: # add j to index and recompute C by SVM-formula index[i] = j tmp_row = C[i].copy() tmp_column = C[:,j].copy() tmp_column[i] -= 1. alpha = -1./C[i,j] ger(alpha, tmp_column, tmp_row, a = C, overwrite_a = 1) iters += 1 i, j = divmod(abs(C).argmax(), N) return index[:r].copy(), C.T
def symeig( mtxA, mtxB = None, eigenvectors = True, select = None ): import scipy.linalg as sla if select is None: if np.iscomplexobj( mtxA ): if mtxB is None: fun = sla.get_lapack_funcs('heev', arrays=(mtxA,)) else: fun = sla.get_lapack_funcs('hegv', arrays=(mtxA,)) else: if mtxB is None: fun = sla.get_lapack_funcs('syev', arrays=(mtxA,)) else: fun = sla.get_lapack_funcs('sygv', arrays=(mtxA,)) ## print fun if mtxB is None: out = fun( mtxA ) else: out = fun( mtxA, mtxB ) out = out[1], out[0], out[2] ## print w ## print v ## print info ## from symeig import symeig ## print symeig( mtxA, mtxB ) else: out = sla.eig( mtxA, mtxB, right = eigenvectors ) w = out[0] ii = np.argsort( w ) w = w[slice( *select )] if eigenvectors: v = out[1][:,ii] v = v[:,slice( *select )] out = w, v, 0 else: out = w, 0 return out[:-1]
def __init__(self, x, fun, jac, hess, hessp=None, k_easy=0.1, k_hard=0.2): super(IterativeSubproblem, self).__init__(x, fun, jac, hess) # When the trust-region shrinks in two consecutive # calculations (``tr_radius < previous_tr_radius``) # the lower bound ``lambda_lb`` may be reused, # facilitating the convergence. To indicate no # previous value is known at first ``previous_tr_radius`` # is set to -1 and ``lambda_lb`` to None. self.previous_tr_radius = -1 self.lambda_lb = None self.niter = 0 # ``k_easy`` and ``k_hard`` are parameters used # to determine the stop criteria to the iterative # subproblem solver. Take a look at pp. 194-197 # from reference _[1] for a more detailed description. self.k_easy = k_easy self.k_hard = k_hard # Get Lapack function for cholesky decomposition. # The implemented Scipy wrapper does not return # the incomplete factorization needed by the method. self.cholesky, = get_lapack_funcs(('potrf',), (self.hess,)) # Get info about Hessian self.dimension = len(self.hess) self.hess_gershgorin_lb,\ self.hess_gershgorin_ub = gershgorin_bounds(self.hess) self.hess_inf = norm(self.hess, np.Inf) self.hess_fro = norm(self.hess, 'fro') # A constant such that for vectors smaler than that # backward substituition is not reliable. It was stabilished # based on Golub, G. H., Van Loan, C. F. (2013). # "Matrix computations". Forth Edition. JHU press., p.165. self.CLOSE_TO_ZERO = self.dimension * self.EPS * self.hess_inf
def test_for_first_element_equal_to_zero(self): # Define test matrix A. # Note that the leading 2x2 submatrix is singular. A = np.array([[0, 3, 11], [3, 12, 5], [11, 5, 6]]) # Get Cholesky from lapack functions cholesky, = get_lapack_funcs(('potrf',), (A,)) # Compute Cholesky Decomposition c, k = cholesky(A, lower=False, overwrite_a=False, clean=True) delta, v = singular_leading_submatrix(A, c, k) A[k-1, k-1] += delta # Check if the leading submatrix is singular assert_array_almost_equal(det(A[:k, :k]), 0) # Check if `v` fullfil the specified properties quadratic_term = np.dot(v, np.dot(A, v)) assert_array_almost_equal(quadratic_term, 0)
def gmres_mgs(A, b, x0=None, tol=1e-5, restrt=None, maxiter=None, xtype=None, M=None, callback=None, residuals=None, reorth=False): """Generalized Minimum Residual Method (GMRES) based on MGS. GMRES iteratively refines the initial solution guess to the system Ax = b Modified Gram-Schmidt version Parameters ---------- A : array, matrix, sparse matrix, LinearOperator n x n, linear system to solve b : array, matrix right hand side, shape is (n,) or (n,1) x0 : array, matrix initial guess, default is a vector of zeros tol : float relative convergence tolerance, i.e. tol is scaled by the norm of the initial preconditioned residual restrt : None, int - if int, restrt is max number of inner iterations and maxiter is the max number of outer iterations - if None, do not restart GMRES, and max number of inner iterations is maxiter maxiter : None, int - if restrt is None, maxiter is the max number of inner iterations and GMRES does not restart - if restrt is int, maxiter is the max number of outer iterations, and restrt is the max number of inner iterations xtype : type dtype for the solution, default is automatic type detection M : array, matrix, sparse matrix, LinearOperator n x n, inverted preconditioner, i.e. solve M A x = M b. callback : function User-supplied function is called after each iteration as callback(xk), where xk is the current solution vector residuals : list residuals contains the preconditioned residual norm history, including the initial residual. reorth : boolean If True, then a check is made whether to re-orthogonalize the Krylov space each GMRES iteration Returns ------- (xNew, info) xNew : an updated guess to the solution of Ax = b info : halting status of gmres == ============================================= 0 successful exit >0 convergence to tolerance not achieved, return iteration count instead. This value is precisely the order of the Krylov space. <0 numerical breakdown, or illegal input == ============================================= Notes ----- - The LinearOperator class is in scipy.sparse.linalg.interface. Use this class if you prefer to define A or M as a mat-vec routine as opposed to explicitly constructing the matrix. A.psolve(..) is still supported as a legacy. - For robustness, modified Gram-Schmidt is used to orthogonalize the Krylov Space Givens Rotations are used to provide the residual norm each iteration Examples -------- >>> from pyamg.krylov import gmres >>> from pyamg.util.linalg import norm >>> import numpy as np >>> from pyamg.gallery import poisson >>> A = poisson((10,10)) >>> b = np.ones((A.shape[0],)) >>> (x,flag) = gmres(A,b, maxiter=2, tol=1e-8, orthog='mgs') >>> print norm(b - A*x) >>> 6.5428213057 References ---------- .. [1] Yousef Saad, "Iterative Methods for Sparse Linear Systems, Second Edition", SIAM, pp. 151-172, pp. 272-275, 2003 http://www-users.cs.umn.edu/~saad/books.html .. [2] C. T. Kelley, http://www4.ncsu.edu/~ctk/matlab_roots.html """ # Convert inputs to linear system, with error checking A, M, x, b, postprocess = make_system(A, M, x0, b) dimen = A.shape[0] # Ensure that warnings are always reissued from this function import warnings warnings.filterwarnings('always', module='pyamg.krylov._gmres_mgs') # Choose type if not hasattr(A, 'dtype'): Atype = upcast(x.dtype, b.dtype) else: Atype = A.dtype if not hasattr(M, 'dtype'): Mtype = upcast(x.dtype, b.dtype) else: Mtype = M.dtype xtype = upcast(Atype, x.dtype, b.dtype, Mtype) if restrt is not None: restrt = int(restrt) if maxiter is not None: maxiter = int(maxiter) # Get fast access to underlying BLAS routines # dotc is the conjugate dot, dotu does no conjugation [lartg] = get_lapack_funcs(['lartg'], [x]) if np.iscomplexobj(np.zeros((1, ), dtype=xtype)): [axpy, dotu, dotc, scal] =\ get_blas_funcs(['axpy', 'dotu', 'dotc', 'scal'], [x]) else: # real type [axpy, dotu, dotc, scal] =\ get_blas_funcs(['axpy', 'dot', 'dot', 'scal'], [x]) # Make full use of direct access to BLAS by defining own norm def norm(z): return np.sqrt(np.real(dotc(z, z))) # Should norm(r) be kept if residuals == []: keep_r = True else: keep_r = False # Set number of outer and inner iterations if restrt: if maxiter: max_outer = maxiter else: max_outer = 1 if restrt > dimen: warn('Setting number of inner iterations (restrt) to maximum\ allowed, which is A.shape[0] ') restrt = dimen max_inner = restrt else: max_outer = 1 if maxiter > dimen: warn('Setting number of inner iterations (maxiter) to maximum\ allowed, which is A.shape[0] ') maxiter = dimen elif maxiter is None: maxiter = min(dimen, 40) max_inner = maxiter # Is this a one dimensional matrix? if dimen == 1: entry = np.ravel(A * np.array([1.0], dtype=xtype)) return (postprocess(b / entry), 0) # Prep for method r = b - np.ravel(A * x) # Apply preconditioner r = np.ravel(M * r) normr = norm(r) if keep_r: residuals.append(normr) # Check for nan, inf # if isnan(r).any() or isinf(r).any(): # warn('inf or nan after application of preconditioner') # return(postprocess(x), -1) # Check initial guess ( scaling by b, if b != 0, # must account for case when norm(b) is very small) normb = norm(b) if normb == 0.0: normb = 1.0 if normr < tol * normb: return (postprocess(x), 0) # Scale tol by ||r_0||_2, we use the preconditioned residual # because this is left preconditioned GMRES. if normr != 0.0: tol = tol * normr # Use separate variable to track iterations. If convergence fails, we # cannot simply report niter = (outer-1)*max_outer + inner. Numerical # error could cause the inner loop to halt while the actual ||r|| > tol. niter = 0 # Begin GMRES for outer in range(max_outer): # Preallocate for Givens Rotations, Hessenberg matrix and Krylov Space # Space required is O(dimen*max_inner). # NOTE: We are dealing with row-major matrices, so we traverse in a # row-major fashion, # i.e., H and V's transpose is what we store. Q = [] # Givens Rotations # Upper Hessenberg matrix, which is then # converted to upper tri with Givens Rots H = np.zeros((max_inner + 1, max_inner + 1), dtype=xtype) V = np.zeros((max_inner + 1, dimen), dtype=xtype) # Krylov Space # vs store the pointers to each column of V. # This saves a considerable amount of time. vs = [] # v = r/normr V[0, :] = scal(1.0 / normr, r) vs.append(V[0, :]) # This is the RHS vector for the problem in the Krylov Space g = np.zeros((dimen, ), dtype=xtype) g[0] = normr for inner in range(max_inner): # New Search Direction v = V[inner + 1, :] v[:] = np.ravel(M * (A * vs[-1])) vs.append(v) normv_old = norm(v) # Check for nan, inf # if isnan(V[inner+1, :]).any() or isinf(V[inner+1, :]).any(): # warn('inf or nan after application of preconditioner') # return(postprocess(x), -1) # Modified Gram Schmidt for k in range(inner + 1): vk = vs[k] alpha = dotc(vk, v) H[inner, k] = alpha v[:] = axpy(vk, v, dimen, -alpha) normv = norm(v) H[inner, inner + 1] = normv # Re-orthogonalize if (reorth is True) and (normv_old == normv_old + 0.001 * normv): for k in range(inner + 1): vk = vs[k] alpha = dotc(vk, v) H[inner, k] = H[inner, k] + alpha v[:] = axpy(vk, v, dimen, -alpha) # Check for breakdown if H[inner, inner + 1] != 0.0: v[:] = scal(1.0 / H[inner, inner + 1], v) # Apply previous Givens rotations to H if inner > 0: apply_givens(Q, H[inner, :], inner) # Calculate and apply next complex-valued Givens Rotation # ==> Note that if max_inner = dimen, then this is unnecessary # for the last inner # iteration, when inner = dimen-1. if inner != dimen - 1: if H[inner, inner + 1] != 0: [c, s, r] = lartg(H[inner, inner], H[inner, inner + 1]) Qblock = np.array([[c, s], [-np.conjugate(s), c]], dtype=xtype) Q.append(Qblock) # Apply Givens Rotation to g, # the RHS for the linear system in the Krylov Subspace. g[inner:inner + 2] = np.dot(Qblock, g[inner:inner + 2]) # Apply effect of Givens Rotation to H H[inner, inner] = dotu(Qblock[0, :], H[inner, inner:inner + 2]) H[inner, inner + 1] = 0.0 niter += 1 # Don't update normr if last inner iteration, because # normr is calculated directly after this loop ends. if inner < max_inner - 1: normr = np.abs(g[inner + 1]) if normr < tol: break # Allow user access to the iterates if callback is not None: callback(x) if keep_r: residuals.append(normr) # end inner loop, back to outer loop # Find best update to x in Krylov Space V. Solve inner x inner system. y = sp.linalg.solve(H[0:inner + 1, 0:inner + 1].T, g[0:inner + 1]) update = np.ravel(V[:inner + 1, :].T.dot(y.reshape(-1, 1))) x = x + update r = b - np.ravel(A * x) # Apply preconditioner r = np.ravel(M * r) normr = norm(r) # Check for nan, inf # if isnan(r).any() or isinf(r).any(): # warn('inf or nan after application of preconditioner') # return(postprocess(x), -1) # Allow user access to the iterates if callback is not None: callback(x) if keep_r: residuals.append(normr) # Has GMRES stagnated? indices = (x != 0) if indices.any(): change = np.max(np.abs(update[indices] / x[indices])) if change < 1e-12: # No change, halt return (postprocess(x), -1) # test for convergence if normr < tol: return (postprocess(x), 0) # end outer loop return (postprocess(x), niter)
def gmres_mgs(T,S,nc,X,b,Rp,Xp,Yp,mu, R, max_iter = 1000, tol = 1.0E-8): #max_iter = param.max_iter #R = param.restart #tol = param.tol """ GMRES solver. Arguments ---------- surf_array : array, contains the surface classes of each region on the surface. field_array: array, contains the Field classes of each region on the surface. X : array, initial guess. b : array, right hand side. param : class, parameters related to the surface. ind0 : class, it contains the indices related to the treecode computation. timing : class, it contains timing information for different parts of the code. kernel : pycuda source module. Returns -------- X : array, an updated guess to the solution. iteration : int, number of outer iterations for convergence References ---------- .. [1] Yousef Saad, "Iterative Methods for Sparse Linear Systems, Second Edition", SIAM, pp. 151-172, pp. 272-275, 2003 http://www-users.cs.umn.edu/~saad/books.html .. [2] C. T. Kelley, http://www4.ncsu.edu/~ctk/matlab_roots.html """ # Defining xtype as dtype of the problem, to decide which BLAS functions # import. xtype = upcast(X.dtype, b.dtype) # Get fast access to underlying BLAS routines # dotc is the conjugate dot, dotu does no conjugation [lartg] = get_lapack_funcs(['lartg'], [X] ) if numpy.iscomplexobj(numpy.zeros((1,), dtype=xtype)): [axpy, dotu, dotc, scal] =\ get_blas_funcs(['axpy', 'dotu', 'dotc', 'scal'], [X]) else: # real type [axpy, dotu, dotc, scal] =\ get_blas_funcs(['axpy', 'dot', 'dot', 'scal'], [X]) # Make full use of direct access to BLAS by defining own norm def norm(z): return numpy.sqrt(numpy.real(dotc(z, z))) # Defining dimension dimen = len(X) #max_iter = param.max_iter #R = param.restart #tol = param.tol # Set number of outer and inner iterations if R > dimen: warn('Setting number of inner iterations (restrt) to maximum\ allowed, which is A.shape[0] ') R = dimen max_inner = R #max_outer should be max_iter/max_inner but this might not be an integer #so we get the ceil of the division. #In the inner loop there is a if statement to break in case max_iter is #reached. max_outer = int(numpy.ceil(max_iter/max_inner)) #print max_outer # Prep for method #aux = gmres_dot(X, surf_array, field_array, ind0, param, timing, kernel) #aux = numpy.matmul(A,X) aux = vector_Ax_p2p(T,S,nc,X,Rp,Xp,Yp,mu) r = b - aux normr = norm(r) # Check initial guess ( scaling by b, if b != 0, must account for # case when norm(b) is very small) normb = norm(b) if normb == 0.0: normb = 1.0 if normr < tol*normb: return X iteration = 0 # Here start the GMRES for outer in range(max_outer): # Preallocate for Givens Rotations, Hessenberg matrix and Krylov Space # Space required is O(dimen*max_inner). # NOTE: We are dealing with row-major matrices, so we traverse in a # row-major fashion, # i.e., H and V's transpose is what we store. Q = [] # Initialzing Givens Rotations # Upper Hessenberg matrix, which is then # converted to upper triagonal with Givens Rotations H = numpy.zeros((max_inner+1, max_inner+1), dtype=xtype) V = numpy.zeros((max_inner+1, dimen), dtype=xtype) # Krylov space # vs store the pointers to each column of V. # This saves a considerable amount of time. vs = [] # v = r/normr V[0, :] = scal(1.0/normr, r) # scal wrapper of dscal --> x = a*x vs.append(V[0, :]) #Saving initial residual to be used to calculate the rel_resid if iteration==0: res_0 = normb #RHS vector in the Krylov space g = numpy.zeros((dimen, ), dtype=xtype) g[0] = normr for inner in range(max_inner): #New search direction v= V[inner+1, :] #v[:] = gmres_dot(vs[-1], surf_array, field_array, ind0, param, #timing, kernel) #v[:] = numpy.matmul(A,vs[-1]) v[:] = vector_Ax_p2p(T,S,nc,vs[-1],Rp,Xp,Yp,mu) vs.append(v) #Modified Gram Schmidt for k in range(inner+1): vk = vs[k] alpha = dotc(vk, v) H[inner, k] = alpha v[:] = axpy(vk, v, dimen, -alpha) # y := a*x + y #axpy is a wrapper for daxpy (blas function) normv = norm(v) H[inner, inner+1] = normv #Check for breakdown if H[inner, inner+1] != 0.0: v[:] = scal(1.0/H[inner, inner+1], v) #Apply for Givens rotations to H if inner > 0: apply_givens(Q, H[inner, :], inner) #Calculate and apply next complex-valued Givens rotations #If max_inner = dimen, we don't need to calculate, this #is unnecessary for the last inner iteration when inner = dimen -1 if inner != dimen - 1: if H[inner, inner+1] != 0: #lartg is a lapack function that computes the parameters #for a Givens rotation [c, s, _] = lartg(H[inner, inner], H[inner, inner+1]) Qblock = numpy.array([[c, s], [-numpy.conjugate(s),c]], dtype=xtype) Q.append(Qblock) #Apply Givens Rotations to RHS for the linear system in # the krylov space. g[inner:inner+2] = scipy.dot(Qblock, g[inner:inner+2]) #Apply Givens rotations to H H[inner, inner] = dotu(Qblock[0,:], H[inner, inner:inner+2]) H[inner, inner+1] = 0.0 iteration+= 1 if inner < max_inner-1: normr = abs(g[inner+1]) rel_resid = normr/res_0 if rel_resid < tol: break if iteration%1==0: print('Iteration: {}, relative residual: {}'.format(iteration,rel_resid)) if (inner + 1 == R): print('Residual: {}. Restart...'.format(rel_resid)) if iteration==max_iter: print('Warning!!!!' 'You have reached the maximum number of iterations : {}.'.format(iteration)) print('The run will stop. Check the residual behaviour you might have a bug.' 'For future runs you might consider changing the tolerance or' ' increasing the number of max_iter.') break # end inner loop, back to outer loop # Find best update to X in Krylov Space V. Solve inner X inner system. y = scipy.linalg.solve (H[0:inner+1, 0:inner+1].T, g[0:inner+1]) update = numpy.ravel(scipy.mat(V[:inner+1, :]).T * y.reshape(-1,1)) X= X + update #aux = gmres_dot(X, surf_array, field_array, ind0, param, timing, kernel) #aux = numpy.matmul(A,X) aux = vector_Ax_p2p(T,S,nc,X,Rp,Xp,Yp,mu) r = b - aux normr = norm(r) rel_resid = normr/res_0 # test for convergence if rel_resid < tol: print('GMRES solve') print('Converged after {} iterations to a residual of {}'.format(iteration,rel_resid)) #print('Time weight vector: {}'.format(timing.time_mass)) #print('Time sort : {}'.format(timing.time_sort)) #print('Time data transfer: {}'.format(timing.time_trans)) # print('Time P2M : {}'.format(timing.time_P2M)) # print('Time M2M : {}'.format(timing.time_M2M)) # print('Time M2P : {}'.format(timing.time_M2P)) # print('Time P2P : {}'.format(timing.time_P2P)) # print('\tTime analy: {}'.format(timing.time_an)) return X, iteration #end outer loop return X, iteration
def admm(self, z, y, gamma): """Alternating direction method of multipliers.""" # Optimization: # This has been reasonably optimized already and performs ~3x # faster than a naive translation of the matlab version. # Two major changes are a custom function for calculating # the norm of a 1d vector and accessing the lapack solver # directly. # However it still isn't as fast as matlab (~1/3rd the # speed). # There are two complexity sources: # 1. the matrix solver. I can't see how this can get any # faster (tested with Intel MKL on Canopy). # 2. the test for convergence. This is the dominant source # now (~3x the time of the solver) # One simple speedup (~2x faster) is to only test # convergence every n iterations (n~10). However this breaks # output comparison with the matlab code. This might not # actually be a problem. # Further avenues for optimization: # - write in cython and import as compiled module, e.g. # http://docs.cython.org/src/userguide/numpy_tutorial.html # - use two cores, with one core performing the admm and # the other watching for convergence. a = (gamma / self.rho) q = self.dmd.q # precompute cholesky decomposition C = linalg.cholesky(self.Prho, lower=False) # link directly to LAPACK fortran solver for positive # definite symmetric system with precomputed cholesky decomp: potrs, = linalg.get_lapack_funcs(('potrs', ), arrays=(C, q)) # simple norm of a 1d vector, called directly from BLAS norm, = linalg.get_blas_funcs(('nrm2', ), arrays=(q, )) # square root outside of the loop root_n = np.sqrt(self.n) for ADMMstep in range(self.max_admm_iter): # ## x-minimization step (alpha minimisation) u = z - (1. / self.rho) * y qs = q + (self.rho / 2.) * u # Solve P x = qs, using fact that P is hermitian and # positive definite and assuming P is well behaved (no # inf or nan). xnew = potrs(C, qs, lower=False, overwrite_b=False)[0] # ## # ## z-minimization step (beta minimisation) v = xnew + (1 / self.rho) * y # Soft-thresholding of v # zero for |v| < a # v - a for v > a # v + a for v < -a # n.b. This doesn't actually do this because v is # complex. This is the same as the matlab source. You might # want to use np.sign, but this won't work because v is complex. abs_v = np.abs(v) znew = ((1 - a / abs_v) * v) * (abs_v > a) # ## # ## Lagrange multiplier update step y = y + self.rho * (xnew - znew) # ## # ## Test convergence of admm # Primal and dual residuals res_prim = norm(xnew - znew) res_dual = self.rho * norm(znew - z) # Stopping criteria eps_prim = root_n * self.eps_abs \ + self.eps_rel * max(norm(xnew), norm(znew)) eps_dual = root_n * self.eps_abs + self.eps_rel * norm(y) if (res_prim < eps_prim) & (res_dual < eps_dual): return z else: z = znew return z
def gmres_mgs(A, b, x0=None, tol=1e-5, restrt=None, maxiter=None, xtype=None, M=None, callback=None, residuals=None, reorth=False): ''' Generalized Minimum Residual Method (GMRES) GMRES iteratively refines the initial solution guess to the system Ax = b Modified Gram-Schmidt version Parameters ---------- A : {array, matrix, sparse matrix, LinearOperator} n x n, linear system to solve b : {array, matrix} right hand side, shape is (n,) or (n,1) x0 : {array, matrix} initial guess, default is a vector of zeros tol : float relative convergence tolerance, i.e. tol is scaled by the norm of the initial preconditioned residual restrt : {None, int} - if int, restrt is max number of inner iterations and maxiter is the max number of outer iterations - if None, do not restart GMRES, and max number of inner iterations is maxiter maxiter : {None, int} - if restrt is None, maxiter is the max number of inner iterations and GMRES does not restart - if restrt is int, maxiter is the max number of outer iterations, and restrt is the max number of inner iterations xtype : type dtype for the solution, default is automatic type detection M : {array, matrix, sparse matrix, LinearOperator} n x n, inverted preconditioner, i.e. solve M A x = M b. callback : function User-supplied function is called after each iteration as callback( ||rk||_2 ), where rk is the current preconditioned residual vector residuals : list residuals contains the preconditioned residual norm history, including the initial residual. reorth : boolean If True, then a check is made whether to re-orthogonalize the Krylov space each GMRES iteration Returns ------- (xNew, info) xNew : an updated guess to the solution of Ax = b info : halting status of gmres == ============================================= 0 successful exit >0 convergence to tolerance not achieved, return iteration count instead. This value is precisely the order of the Krylov space. <0 numerical breakdown, or illegal input == ============================================= Notes ----- - The LinearOperator class is in scipy.sparse.linalg.interface. Use this class if you prefer to define A or M as a mat-vec routine as opposed to explicitly constructing the matrix. A.psolve(..) is still supported as a legacy. - For robustness, modified Gram-Schmidt is used to orthogonalize the Krylov Space Givens Rotations are used to provide the residual norm each iteration Examples -------- >>> from pyamg.krylov import gmres >>> from pyamg.util.linalg import norm >>> import numpy as np >>> from pyamg.gallery import poisson >>> A = poisson((10,10)) >>> b = np.ones((A.shape[0],)) >>> (x,flag) = gmres(A,b, maxiter=2, tol=1e-8, orthog='mgs') >>> print norm(b - A*x) >>> 6.5428213057 References ---------- .. [1] Yousef Saad, "Iterative Methods for Sparse Linear Systems, Second Edition", SIAM, pp. 151-172, pp. 272-275, 2003 http://www-users.cs.umn.edu/~saad/books.html .. [2] C. T. Kelley, http://www4.ncsu.edu/~ctk/matlab_roots.html ''' # Convert inputs to linear system, with error checking A, M, x, b, postprocess = make_system(A, M, x0, b) dimen = A.shape[0] # Ensure that warnings are always reissued from this function import warnings warnings.filterwarnings('always', module='pyamg\.krylov\._gmres_mgs') # Choose type if not hasattr(A, 'dtype'): Atype = upcast(x.dtype, b.dtype) else: Atype = A.dtype if not hasattr(M, 'dtype'): Mtype = upcast(x.dtype, b.dtype) else: Mtype = M.dtype xtype = upcast(Atype, x.dtype, b.dtype, Mtype) if restrt is not None: restrt = int(restrt) if maxiter is not None: maxiter = int(maxiter) # Get fast access to underlying BLAS routines # dotc is the conjugate dot, dotu does no conjugation [lartg] = get_lapack_funcs(['lartg'], [x] ) if iscomplexobj(zeros((1,), dtype=xtype)): [axpy, dotu, dotc, scal] =\ get_blas_funcs(['axpy', 'dotu', 'dotc', 'scal'], [x]) else: # real type [axpy, dotu, dotc, scal] =\ get_blas_funcs(['axpy', 'dot', 'dot', 'scal'], [x]) # Make full use of direct access to BLAS by defining own norm def norm(z): return sqrt(real(dotc(z, z))) # Should norm(r) be kept if residuals == []: keep_r = True else: keep_r = False # Set number of outer and inner iterations if restrt: if maxiter: max_outer = maxiter else: max_outer = 1 if restrt > dimen: warn('Setting number of inner iterations (restrt) to maximum\ allowed, which is A.shape[0] ') restrt = dimen max_inner = restrt else: max_outer = 1 if maxiter > dimen: warn('Setting number of inner iterations (maxiter) to maximum\ allowed, which is A.shape[0] ') maxiter = dimen elif maxiter is None: maxiter = min(dimen, 40) max_inner = maxiter # Is this a one dimensional matrix? if dimen == 1: entry = ravel(A*array([1.0], dtype=xtype)) return (postprocess(b/entry), 0) # Prep for method r = b - ravel(A*x) # Apply preconditioner r = ravel(M*r) normr = norm(r) if keep_r: residuals.append(normr) # Check for nan, inf # if isnan(r).any() or isinf(r).any(): # warn('inf or nan after application of preconditioner') # return(postprocess(x), -1) # Check initial guess ( scaling by b, if b != 0, # must account for case when norm(b) is very small) normb = norm(b) if normb == 0.0: normb = 1.0 if normr < tol*normb: if callback is not None: callback(norm(r)) return (postprocess(x), 0) # Scale tol by ||r_0||_2, we use the preconditioned residual # because this is left preconditioned GMRES. if normr != 0.0: tol = tol*normr # Use separate variable to track iterations. If convergence fails, we # cannot simply report niter = (outer-1)*max_outer + inner. Numerical # error could cause the inner loop to halt while the actual ||r|| > tol. niter = 0 # Begin GMRES for outer in range(max_outer): # Preallocate for Givens Rotations, Hessenberg matrix and Krylov Space # Space required is O(dimen*max_inner). # NOTE: We are dealing with row-major matrices, so we traverse in a # row-major fashion, # i.e., H and V's transpose is what we store. Q = [] # Givens Rotations # Upper Hessenberg matrix, which is then # converted to upper tri with Givens Rots H = zeros((max_inner+1, max_inner+1), dtype=xtype) V = zeros((max_inner+1, dimen), dtype=xtype) # Krylov Space # vs store the pointers to each column of V. # This saves a considerable amount of time. vs = [] # v = r/normr V[0, :] = scal(1.0/normr, r) vs.append(V[0, :]) # This is the RHS vector for the problem in the Krylov Space g = zeros((dimen,), dtype=xtype) g[0] = normr for inner in range(max_inner): # New Search Direction v = V[inner+1, :] v[:] = ravel(M*(A*vs[-1])) vs.append(v) normv_old = norm(v) # Check for nan, inf # if isnan(V[inner+1, :]).any() or isinf(V[inner+1, :]).any(): # warn('inf or nan after application of preconditioner') # return(postprocess(x), -1) # Modified Gram Schmidt for k in range(inner+1): vk = vs[k] alpha = dotc(vk, v) H[inner, k] = alpha v[:] = axpy(vk, v, dimen, -alpha) normv = norm(v) H[inner, inner+1] = normv # Re-orthogonalize if (reorth is True) and (normv_old == normv_old + 0.001*normv): for k in range(inner+1): vk = vs[k] alpha = dotc(vk, v) H[inner, k] = H[inner, k] + alpha v[:] = axpy(vk, v, dimen, -alpha) # Check for breakdown if H[inner, inner+1] != 0.0: v[:] = scal(1.0/H[inner, inner+1], v) # Apply previous Givens rotations to H if inner > 0: apply_givens(Q, H[inner, :], inner) # Calculate and apply next complex-valued Givens Rotation # ==> Note that if max_inner = dimen, then this is unnecessary # for the last inner # iteration, when inner = dimen-1. if inner != dimen-1: if H[inner, inner+1] != 0: [c, s, r] = lartg(H[inner, inner], H[inner, inner+1]) Qblock = array([[c, s], [-conjugate(s), c]], dtype=xtype) Q.append(Qblock) # Apply Givens Rotation to g, # the RHS for the linear system in the Krylov Subspace. g[inner:inner+2] = sp.dot(Qblock, g[inner:inner+2]) # Apply effect of Givens Rotation to H H[inner, inner] = dotu(Qblock[0, :], H[inner, inner:inner+2]) H[inner, inner+1] = 0.0 niter += 1 # Don't update normr if last inner iteration, because # normr is calculated directly after this loop ends. if inner < max_inner-1: normr = abs(g[inner+1]) if normr < tol: break # Allow user access to residual if callback is not None: callback(normr) if keep_r: residuals.append(normr) # end inner loop, back to outer loop # Find best update to x in Krylov Space V. Solve inner x inner system. y = sp.linalg.solve(H[0:inner+1, 0:inner+1].T, g[0:inner+1]) update = ravel(sp.mat(V[:inner+1, :]).T*y.reshape(-1, 1)) x = x + update r = b - ravel(A*x) # Apply preconditioner r = ravel(M*r) normr = norm(r) # Check for nan, inf # if isnan(r).any() or isinf(r).any(): # warn('inf or nan after application of preconditioner') # return(postprocess(x), -1) # Allow user access to residual if callback is not None: callback(normr) if keep_r: residuals.append(normr) # Has GMRES stagnated? indices = (x != 0) if indices.any(): change = max(abs(update[indices] / x[indices])) if change < 1e-12: # No change, halt return (postprocess(x), -1) # test for convergence if normr < tol: return (postprocess(x), 0) # end outer loop return (postprocess(x), niter)
import numpy as np import scipy.sparse as ss import warnings from scipy.linalg import cho_factor, get_lapack_funcs from sklearn import linear_model from sklearn.metrics import pairwise_distances from graphs import Graph from ..mini_six import range from .neighbors import nearest_neighbors __all__ = ['sparse_regularized_graph', 'smce_graph'] # For quickly running cho_solve without lots of checking potrs = get_lapack_funcs('potrs') # TODO: implement NNLRS next # http://www.cis.pku.edu.cn/faculty/vision/zlin/Publications/2012-CVPR-NNLRS.pdf def smce_graph(X, metric='l2', sparsity_param=10, kmax=None, keep_ratio=0.95): '''Sparse graph construction from the SMCE paper. X : 2-dimensional array-like metric : str, optional sparsity_param : float, optional kmax : int, optional keep_ratio : float, optional When <1, keep edges up to (keep_ratio * total weight)
# -*- coding: utf-8 -*- import numpy as np from scipy.stats import f #fisher from . import dv, zero_finding import lmfit LinAlgError = np.linalg.LinAlgError from .base_functions import (_fold_exp, _coh_gaussian, _fold_exp_and_coh) import scipy.linalg as linalg posv = linalg.get_lapack_funcs(('posv')) def direct_solve(a, b): c, x, info = posv(a, b, lower=False, overwrite_a=True, overwrite_b=False) return x alpha = 0.001 def solve_mat(A, b_mat, method='ridge'): """ Returns the solution for the least squares problem |Ax - b_i|^2. """ if method == 'fast': #return linalg.solve(A.T.dot(A), A.T.dot(b_mat), sym_pos=True) return direct_solve(A.T.dot(A), A.T.dot(b_mat)) elif method == 'ridge':
from scipy import linalg from scipy.linalg import LinAlgError from scipy._lib._util import _asarray_validated _d = np.empty(0, np.float64) _z = np.empty(0, np.complex128) dgemm = linalg.get_blas_funcs('gemm', (_d,)) zgemm = linalg.get_blas_funcs('gemm', (_z,)) dgemv = linalg.get_blas_funcs('gemv', (_d,)) ddot = linalg.get_blas_funcs('dot', (_d,)) _I = np.cast['F'](1j) ############################################################################### # linalg.svd and linalg.pinv2 dgesdd, dgesdd_lwork = linalg.get_lapack_funcs(('gesdd', 'gesdd_lwork'), (_d,)) zgesdd, zgesdd_lwork = linalg.get_lapack_funcs(('gesdd', 'gesdd_lwork'), (_z,)) dgesvd, dgesvd_lwork = linalg.get_lapack_funcs(('gesvd', 'gesvd_lwork'), (_d,)) zgesvd, zgesvd_lwork = linalg.get_lapack_funcs(('gesvd', 'gesvd_lwork'), (_z,)) def _svd_lwork(shape, dtype=np.float64): """Set up SVD calculations on identical-shape float64/complex128 arrays.""" if dtype == np.float64: gesdd_lwork, gesvd_lwork = dgesdd_lwork, dgesvd_lwork else: assert dtype == np.complex128 gesdd_lwork, gesvd_lwork = zgesdd_lwork, zgesvd_lwork sdd_lwork = linalg.decomp_svd._compute_lwork( gesdd_lwork, *shape, compute_uv=True, full_matrices=False) svd_lwork = linalg.decomp_svd._compute_lwork(
def gmres_mgs(surf_array, field_array, X, b, param, ind0, timing, kernel): """ GMRES solver. Arguments ---------- surf_array : array, contains the surface classes of each region on the surface. field_array: array, contains the Field classes of each region on the surface. X : array, initial guess. b : array, right hand side. param : class, parameters related to the surface. ind0 : class, it contains the indices related to the treecode computation. timing : class, it contains timing information for different parts of the code. kernel : pycuda source module. Returns -------- X : array, an updated guess to the solution. iteration : int, number of outer iterations for convergence References ---------- .. [1] Yousef Saad, "Iterative Methods for Sparse Linear Systems, Second Edition", SIAM, pp. 151-172, pp. 272-275, 2003 http://www-users.cs.umn.edu/~saad/books.html .. [2] C. T. Kelley, http://www4.ncsu.edu/~ctk/matlab_roots.html """ # Defining xtype as dtype of the problem, to decide which BLAS functions # import. xtype = upcast(X.dtype, b.dtype) # Get fast access to underlying BLAS routines # dotc is the conjugate dot, dotu does no conjugation [lartg] = get_lapack_funcs(['lartg'], [X] ) if numpy.iscomplexobj(numpy.zeros((1,), dtype=xtype)): [axpy, dotu, dotc, scal] =\ get_blas_funcs(['axpy', 'dotu', 'dotc', 'scal'], [X]) else: # real type [axpy, dotu, dotc, scal] =\ get_blas_funcs(['axpy', 'dot', 'dot', 'scal'], [X]) # Make full use of direct access to BLAS by defining own norm def norm(z): return numpy.sqrt(numpy.real(dotc(z, z))) # Defining dimension dimen = len(X) max_iter = param.max_iter R = param.restart tol = param.tol # Set number of outer and inner iterations if R > dimen: warn('Setting number of inner iterations (restrt) to maximum\ allowed, which is A.shape[0] ') R = dimen max_inner = R #max_outer should be max_iter/max_inner but this might not be an integer #so we get the ceil of the division. #In the inner loop there is a if statement to break in case max_iter is #reached. max_outer = int(numpy.ceil(max_iter/max_inner)) # Prep for method aux = gmres_dot(X, surf_array, field_array, ind0, param, timing, kernel) r = b - aux normr = norm(r) # Check initial guess ( scaling by b, if b != 0, must account for # case when norm(b) is very small) normb = norm(b) if normb == 0.0: normb = 1.0 if normr < tol*normb: return X iteration = 0 # Here start the GMRES for outer in range(max_outer): # Preallocate for Givens Rotations, Hessenberg matrix and Krylov Space # Space required is O(dimen*max_inner). # NOTE: We are dealing with row-major matrices, so we traverse in a # row-major fashion, # i.e., H and V's transpose is what we store. Q = [] # Initialzing Givens Rotations # Upper Hessenberg matrix, which is then # converted to upper triagonal with Givens Rotations H = numpy.zeros((max_inner+1, max_inner+1), dtype=xtype) V = numpy.zeros((max_inner+1, dimen), dtype=xtype) # Krylov space # vs store the pointers to each column of V. # This saves a considerable amount of time. vs = [] # v = r/normr V[0, :] = scal(1.0/normr, r) # scal wrapper of dscal --> x = a*x vs.append(V[0, :]) #Saving initial residual to be used to calculate the rel_resid if iteration==0: res_0 = normb #RHS vector in the Krylov space g = numpy.zeros((dimen, ), dtype=xtype) g[0] = normr for inner in range(max_inner): #New search direction v= V[inner+1, :] v[:] = gmres_dot(vs[-1], surf_array, field_array, ind0, param, timing, kernel) vs.append(v) #Modified Gram Schmidt for k in range(inner+1): vk = vs[k] alpha = dotc(vk, v) H[inner, k] = alpha v[:] = axpy(vk, v, dimen, -alpha) # y := a*x + y #axpy is a wrapper for daxpy (blas function) normv = norm(v) H[inner, inner+1] = normv #Check for breakdown if H[inner, inner+1] != 0.0: v[:] = scal(1.0/H[inner, inner+1], v) #Apply for Givens rotations to H if inner > 0: apply_givens(Q, H[inner, :], inner) #Calculate and apply next complex-valued Givens rotations #If max_inner = dimen, we don't need to calculate, this #is unnecessary for the last inner iteration when inner = dimen -1 if inner != dimen - 1: if H[inner, inner+1] != 0: #lartg is a lapack function that computes the parameters #for a Givens rotation [c, s, _] = lartg(H[inner, inner], H[inner, inner+1]) Qblock = numpy.array([[c, s], [-numpy.conjugate(s),c]], dtype=xtype) Q.append(Qblock) #Apply Givens Rotations to RHS for the linear system in # the krylov space. g[inner:inner+2] = scipy.dot(Qblock, g[inner:inner+2]) #Apply Givens rotations to H H[inner, inner] = dotu(Qblock[0,:], H[inner, inner:inner+2]) H[inner, inner+1] = 0.0 iteration+= 1 if inner < max_inner-1: normr = abs(g[inner+1]) rel_resid = normr/res_0 if rel_resid < tol: break if iteration%1==0: print('Iteration: {}, relative residual: {}'.format(iteration,rel_resid)) if (inner + 1 == R): print('Residual: {}. Restart...'.format(rel_resid)) if iteration==max_iter: print('Warning!!!!' 'You have reached the maximum number of iterations : {}.'.format(iteration)) print('The run will stop. Check the residual behaviour you might have a bug.' 'For future runs you might consider changing the tolerance or' ' increasing the number of max_iter.') break # end inner loop, back to outer loop # Find best update to X in Krylov Space V. Solve inner X inner system. y = scipy.linalg.solve (H[0:inner+1, 0:inner+1].T, g[0:inner+1]) update = numpy.ravel(scipy.mat(V[:inner+1, :]).T * y.reshape(-1,1)) X= X + update aux = gmres_dot(X, surf_array, field_array, ind0, param, timing, kernel) r = b - aux normr = norm(r) rel_resid = normr/res_0 # test for convergence if rel_resid < tol: print('GMRES solve') print('Converged after {} iterations to a residual of {}'.format(iteration,rel_resid)) print('Time weight vector: {}'.format(timing.time_mass)) print('Time sort : {}'.format(timing.time_sort)) print('Time data transfer: {}'.format(timing.time_trans)) print('Time P2M : {}'.format(timing.time_P2M)) print('Time M2M : {}'.format(timing.time_M2M)) print('Time M2P : {}'.format(timing.time_M2P)) print('Time P2P : {}'.format(timing.time_P2P)) print('\tTime analy: {}'.format(timing.time_an)) return X, iteration #end outer loop return X, iteration
def lgmres(A, b, x0=None, tol=1e-5, maxiter=1000, M=None, callback=None, inner_m=30, outer_k=3, outer_v=None, store_outer_Av=True, prepend_outer_v=False): """ Solve a matrix equation using the LGMRES algorithm. The LGMRES algorithm [1]_ [2]_ is designed to avoid some problems in the convergence in restarted GMRES, and often converges in fewer iterations. Parameters ---------- A : {sparse matrix, dense matrix, LinearOperator} The real or complex N-by-N matrix of the linear system. b : {array, matrix} Right hand side of the linear system. Has shape (N,) or (N,1). x0 : {array, matrix} Starting guess for the solution. tol : float, optional Tolerance to achieve. The algorithm terminates when either the relative or the absolute residual is below `tol`. maxiter : int, optional Maximum number of iterations. Iteration will stop after maxiter steps even if the specified tolerance has not been achieved. M : {sparse matrix, dense matrix, LinearOperator}, optional Preconditioner for A. The preconditioner should approximate the inverse of A. Effective preconditioning dramatically improves the rate of convergence, which implies that fewer iterations are needed to reach a given error tolerance. callback : function, optional User-supplied function to call after each iteration. It is called as callback(xk), where xk is the current solution vector. inner_m : int, optional Number of inner GMRES iterations per each outer iteration. outer_k : int, optional Number of vectors to carry between inner GMRES iterations. According to [1]_, good values are in the range of 1...3. However, note that if you want to use the additional vectors to accelerate solving multiple similar problems, larger values may be beneficial. outer_v : list of tuples, optional List containing tuples ``(v, Av)`` of vectors and corresponding matrix-vector products, used to augment the Krylov subspace, and carried between inner GMRES iterations. The element ``Av`` can be `None` if the matrix-vector product should be re-evaluated. This parameter is modified in-place by `lgmres`, and can be used to pass "guess" vectors in and out of the algorithm when solving similar problems. store_outer_Av : bool, optional Whether LGMRES should store also A*v in addition to vectors `v` in the `outer_v` list. Default is True. prepend_outer_v : bool, optional Whether to put outer_v augmentation vectors before Krylov iterates. In standard LGMRES, prepend_outer_v=False. Returns ------- x : array or matrix The converged solution. info : int Provides convergence information: - 0 : successful exit - >0 : convergence to tolerance not achieved, number of iterations - <0 : illegal input or breakdown Notes ----- The LGMRES algorithm [1]_ [2]_ is designed to avoid the slowing of convergence in restarted GMRES, due to alternating residual vectors. Typically, it often outperforms GMRES(m) of comparable memory requirements by some measure, or at least is not much worse. Another advantage in this algorithm is that you can supply it with 'guess' vectors in the `outer_v` argument that augment the Krylov subspace. If the solution lies close to the span of these vectors, the algorithm converges faster. This can be useful if several very similar matrices need to be inverted one after another, such as in Newton-Krylov iteration where the Jacobian matrix often changes little in the nonlinear steps. References ---------- .. [1] A.H. Baker and E.R. Jessup and T. Manteuffel, "A Technique for Accelerating the Convergence of Restarted GMRES", SIAM J. Matrix Anal. Appl. 26, 962 (2005). .. [2] A.H. Baker, "On Improving the Performance of the Linear Solver restarted GMRES", PhD thesis, University of Colorado (2003). Examples -------- >>> from scipy.sparse import csc_matrix >>> from scipy.sparse.linalg import lgmres >>> A = csc_matrix([[3, 2, 0], [1, -1, 0], [0, 5, 1]], dtype=float) >>> b = np.array([2, 4, -1], dtype=float) >>> x, exitCode = lgmres(A, b) >>> print(exitCode) # 0 indicates successful convergence 0 >>> np.allclose(A.dot(x), b) True """ A, M, x, b, postprocess = make_system(A, M, x0, b) if not np.isfinite(b).all(): raise ValueError("RHS must contain only finite numbers") matvec = A.matvec psolve = M.matvec if outer_v is None: outer_v = [] axpy, dot, scal = None, None, None nrm2 = get_blas_funcs('nrm2', [b]) b_norm = nrm2(b) if b_norm == 0: b_norm = 1 for k_outer in xrange(maxiter): r_outer = matvec(x) - b # -- callback if callback is not None: callback(x) # -- determine input type routines if axpy is None: if np.iscomplexobj(r_outer) and not np.iscomplexobj(x): x = x.astype(r_outer.dtype) axpy, dot, scal, nrm2 = get_blas_funcs( ['axpy', 'dot', 'scal', 'nrm2'], (x, r_outer)) trtrs = get_lapack_funcs('trtrs', (x, r_outer)) # -- check stopping condition r_norm = nrm2(r_outer) if r_norm <= tol * b_norm or r_norm <= tol: break # -- inner LGMRES iteration v0 = -psolve(r_outer) inner_res_0 = nrm2(v0) if inner_res_0 == 0: rnorm = nrm2(r_outer) raise RuntimeError("Preconditioner returned a zero vector; " "|v| ~ %.1g, |M v| = 0" % rnorm) v0 = scal(1.0 / inner_res_0, v0) try: Q, R, B, vs, zs, y = _fgmres(matvec, v0, inner_m, lpsolve=psolve, atol=tol * b_norm / r_norm, outer_v=outer_v, prepend_outer_v=prepend_outer_v) y *= inner_res_0 if not np.isfinite(y).all(): # Overflow etc. in computation. There's no way to # recover from this, so we have to bail out. raise LinAlgError() except LinAlgError: # Floating point over/underflow, non-finite result from # matmul etc. -- report failure. return postprocess(x), k_outer + 1 # -- GMRES terminated: eval solution dx = zs[0] * y[0] for w, yc in zip(zs[1:], y[1:]): dx = axpy(w, dx, dx.shape[0], yc) # dx += w*yc # -- Store LGMRES augmentation vectors nx = nrm2(dx) if nx > 0: if store_outer_Av: q = Q.dot(R.dot(y)) ax = vs[0] * q[0] for v, qc in zip(vs[1:], q[1:]): ax = axpy(v, ax, ax.shape[0], qc) outer_v.append((dx / nx, ax / nx)) else: outer_v.append((dx / nx, None)) # -- Retain only a finite number of augmentation vectors while len(outer_v) > outer_k: del outer_v[0] # -- Apply step x += dx else: # didn't converge ... return postprocess(x), maxiter return postprocess(x), 0
def _cholesky(a, lower=False, overwrite_a=False, clean=True, check_finite=True, full_pivot=False, pivot_tol=-1): """Common code for cholesky() and cho_factor().""" a1 = asarray_chkfinite(a) if check_finite else asarray(a) a1 = atleast_2d(a1) # Dimension check if a1.ndim != 2: raise ValueError('Input array needs to be 2 dimensional but received ' 'a {}d-array.'.format(a1.ndim)) # Squareness check if a1.shape[0] != a1.shape[1]: raise ValueError('Input array is expected to be square but has ' 'the shape: {}.'.format(a1.shape)) # Quick return for square empty array if a1.size == 0: return a1.copy(), lower #if not is_hermitian(): # raise LinAlgError("Expected symmetric or hermitian matrix") overwrite_a = overwrite_a or _datacopied(a1, a) # if the pivot flag is false, return the result if not full_pivot: potrf, = get_lapack_funcs(('potrf', ), (a1, )) c, info = potrf(a1, lower=lower, overwrite_a=overwrite_a, clean=clean) if info > 0: raise LinAlgError( "%d-th leading minor of the array is not positive " "definite" % info) if info < 0: raise ValueError( 'LAPACK reported an illegal value in {}-th argument' 'on entry to "POTRF".'.format(-info)) return c, lower else: # if the pivot flag is true, return the result plus rank and pivot pstrf, = get_lapack_funcs(('pstrf', ), (a1, )) c, pivot, rank, info = pstrf(a1, lower=lower, overwrite_a=overwrite_a, tol=pivot_tol) # if info > 0: if rank == 0: raise LinAlgError( "%d-th leading minor of the array is not positive " "semidefinite" % info) else: raise LinAlgError("The array is rank deficient with " "computed rank %d" % info) if info < 0: raise ValueError( 'LAPACK reported an illegal value in {}-th argument' 'on entry to "PSTRF".'.format(-info)) return c, lower, rank, pivot
def lgmres(A, b, x0=None, tol=1e-5, maxiter=1000, M=None, callback=None, inner_m=30, outer_k=3, outer_v=None, store_outer_Av=True, prepend_outer_v=False, res = "both"): """ Solve a matrix equation using the LGMRES algorithm. The LGMRES algorithm [1]_ [2]_ is designed to avoid some problems in the convergence in restarted GMRES, and often converges in fewer iterations. Parameters ---------- A : {sparse matrix, dense matrix, LinearOperator} The real or complex N-by-N matrix of the linear system. b : {array, matrix} Right hand side of the linear system. Has shape (N,) or (N,1). x0 : {array, matrix} Starting guess for the solution. tol : float, optional Tolerance to achieve. The algorithm terminates when either the relative or the absolute residual is below `tol`. res: string, optional choose between relative, absolute or both (scipy default) to terminate the algo maxiter : int, optional Maximum number of iterations. Iteration will stop after maxiter steps even if the specified tolerance has not been achieved. M : {sparse matrix, dense matrix, LinearOperator}, optional Preconditioner for A. The preconditioner should approximate the inverse of A. Effective preconditioning dramatically improves the rate of convergence, which implies that fewer iterations are needed to reach a given error tolerance. callback : function, optional User-supplied function to call after each iteration. It is called as callback(xk), where xk is the current solution vector. inner_m : int, optional Number of inner GMRES iterations per each outer iteration. outer_k : int, optional Number of vectors to carry between inner GMRES iterations. According to [1]_, good values are in the range of 1...3. However, note that if you want to use the additional vectors to accelerate solving multiple similar problems, larger values may be beneficial. outer_v : list of tuples, optional List containing tuples ``(v, Av)`` of vectors and corresponding matrix-vector products, used to augment the Krylov subspace, and carried between inner GMRES iterations. The element ``Av`` can be `None` if the matrix-vector product should be re-evaluated. This parameter is modified in-place by `lgmres`, and can be used to pass "guess" vectors in and out of the algorithm when solving similar problems. store_outer_Av : bool, optional Whether LGMRES should store also A*v in addition to vectors `v` in the `outer_v` list. Default is True. prepend_outer_v : bool, optional Whether to put outer_v augmentation vectors before Krylov iterates. In standard LGMRES, prepend_outer_v=False. Returns ------- x : array or matrix The converged solution. info : int Provides convergence information: - 0 : successful exit - >0 : convergence to tolerance not achieved, number of iterations - <0 : illegal input or breakdown Notes ----- The LGMRES algorithm [1]_ [2]_ is designed to avoid the slowing of convergence in restarted GMRES, due to alternating residual vectors. Typically, it often outperforms GMRES(m) of comparable memory requirements by some measure, or at least is not much worse. Another advantage in this algorithm is that you can supply it with 'guess' vectors in the `outer_v` argument that augment the Krylov subspace. If the solution lies close to the span of these vectors, the algorithm converges faster. This can be useful if several very similar matrices need to be inverted one after another, such as in Newton-Krylov iteration where the Jacobian matrix often changes little in the nonlinear steps. References ---------- .. [1] A.H. Baker and E.R. Jessup and T. Manteuffel, "A Technique for Accelerating the Convergence of Restarted GMRES", SIAM J. Matrix Anal. Appl. 26, 962 (2005). .. [2] A.H. Baker, "On Improving the Performance of the Linear Solver restarted GMRES", PhD thesis, University of Colorado (2003). Examples -------- >>> from scipy.sparse import csc_matrix >>> from scipy.sparse.linalg import lgmres >>> A = csc_matrix([[3, 2, 0], [1, -1, 0], [0, 5, 1]], dtype=float) >>> b = np.array([2, 4, -1], dtype=float) >>> x, exitCode = lgmres(A, b) >>> print(exitCode) # 0 indicates successful convergence 0 >>> np.allclose(A.dot(x), b) True """ A,M,x,b,postprocess = make_system(A,M,x0,b) if not np.isfinite(b).all(): raise ValueError("RHS must contain only finite numbers") matvec = A.matvec psolve = M.matvec if outer_v is None: outer_v = [] axpy, dot, scal = None, None, None nrm2 = get_blas_funcs('nrm2', [b]) b_norm = nrm2(b) if b_norm == 0: b_norm = 1 for k_outer in xrange(maxiter): r_outer = matvec(x) - b # -- callback if callback is not None: callback(x) # -- determine input type routines if axpy is None: if np.iscomplexobj(r_outer) and not np.iscomplexobj(x): x = x.astype(r_outer.dtype) axpy, dot, scal, nrm2 = get_blas_funcs(['axpy', 'dot', 'scal', 'nrm2'], (x, r_outer)) trtrs = get_lapack_funcs('trtrs', (x, r_outer)) # -- check stopping condition r_norm = nrm2(r_outer) if res == "both": if r_norm <= tol * b_norm or r_norm <= tol: break elif res == "absolute": if r_norm <= tol: break elif res == "relative": if r_norm <= tol * b_norm: break else: raise ValueError("res must be absolute, relative or both") # -- inner LGMRES iteration v0 = -psolve(r_outer) inner_res_0 = nrm2(v0) if inner_res_0 == 0: rnorm = nrm2(r_outer) raise RuntimeError("Preconditioner returned a zero vector; " "|v| ~ %.1g, |M v| = 0" % rnorm) v0 = scal(1.0/inner_res_0, v0) try: Q, R, B, vs, zs, y = _fgmres(matvec, v0, inner_m, lpsolve=psolve, atol=tol*b_norm/r_norm, outer_v=outer_v, prepend_outer_v=prepend_outer_v) y *= inner_res_0 if not np.isfinite(y).all(): # Overflow etc. in computation. There's no way to # recover from this, so we have to bail out. raise LinAlgError() except LinAlgError: # Floating point over/underflow, non-finite result from # matmul etc. -- report failure. return postprocess(x), k_outer + 1 # -- GMRES terminated: eval solution dx = zs[0]*y[0] for w, yc in zip(zs[1:], y[1:]): dx = axpy(w, dx, dx.shape[0], yc) # dx += w*yc # -- Store LGMRES augmentation vectors nx = nrm2(dx) if nx > 0: if store_outer_Av: q = Q.dot(R.dot(y)) ax = vs[0]*q[0] for v, qc in zip(vs[1:], q[1:]): ax = axpy(v, ax, ax.shape[0], qc) outer_v.append((dx/nx, ax/nx)) else: outer_v.append((dx/nx, None)) # -- Retain only a finite number of augmentation vectors while len(outer_v) > outer_k: del outer_v[0] # -- Apply step x += dx else: # didn't converge ... return postprocess(x), maxiter return postprocess(x), 0
# Licensed under the 3-clause BSD license. # http://opensource.org/licenses/BSD-3-Clause # # Copyright (C) 2014 Tuomas Sivula # All rights reserved. from __future__ import division import sys from timeit import default_timer as timer import numpy as np from scipy import linalg from sklearn.covariance import GraphLassoCV # LAPACK qr routine dgeqrf_routine = linalg.get_lapack_funcs('geqrf') from util import (invert_normal_params, olse, get_last_fit_sample, suppress_stdout, load_stan, copy_fit_samples) class Worker(object): """Worker responsible of calculations for each site. Parameters ---------- index : integer The index of this site stan_model : StanModel The StanModel instance responsible for the MCMC sampling.
def lgmres(A, b, x0=None, tol=1e-5, maxiter=1000, M=None, callback=None, inner_m=30, outer_k=3, outer_v=None, store_outer_Av=True): """ Solve a matrix equation using the LGMRES algorithm. The LGMRES algorithm [1]_ [2]_ is designed to avoid some problems in the convergence in restarted GMRES, and often converges in fewer iterations. Parameters ---------- A : {sparse matrix, dense matrix, LinearOperator} The real or complex N-by-N matrix of the linear system. b : {array, matrix} Right hand side of the linear system. Has shape (N,) or (N,1). x0 : {array, matrix} Starting guess for the solution. tol : float, optional Tolerance to achieve. The algorithm terminates when either the relative or the absolute residual is below `tol`. maxiter : int, optional Maximum number of iterations. Iteration will stop after maxiter steps even if the specified tolerance has not been achieved. M : {sparse matrix, dense matrix, LinearOperator}, optional Preconditioner for A. The preconditioner should approximate the inverse of A. Effective preconditioning dramatically improves the rate of convergence, which implies that fewer iterations are needed to reach a given error tolerance. callback : function, optional User-supplied function to call after each iteration. It is called as callback(xk), where xk is the current solution vector. inner_m : int, optional Number of inner GMRES iterations per each outer iteration. outer_k : int, optional Number of vectors to carry between inner GMRES iterations. According to [1]_, good values are in the range of 1...3. However, note that if you want to use the additional vectors to accelerate solving multiple similar problems, larger values may be beneficial. outer_v : list of tuples, optional List containing tuples ``(v, Av)`` of vectors and corresponding matrix-vector products, used to augment the Krylov subspace, and carried between inner GMRES iterations. The element ``Av`` can be `None` if the matrix-vector product should be re-evaluated. This parameter is modified in-place by `lgmres`, and can be used to pass "guess" vectors in and out of the algorithm when solving similar problems. store_outer_Av : bool, optional Whether LGMRES should store also A*v in addition to vectors `v` in the `outer_v` list. Default is True. Returns ------- x : array or matrix The converged solution. info : int Provides convergence information: - 0 : successful exit - >0 : convergence to tolerance not achieved, number of iterations - <0 : illegal input or breakdown Notes ----- The LGMRES algorithm [1]_ [2]_ is designed to avoid the slowing of convergence in restarted GMRES, due to alternating residual vectors. Typically, it often outperforms GMRES(m) of comparable memory requirements by some measure, or at least is not much worse. Another advantage in this algorithm is that you can supply it with 'guess' vectors in the `outer_v` argument that augment the Krylov subspace. If the solution lies close to the span of these vectors, the algorithm converges faster. This can be useful if several very similar matrices need to be inverted one after another, such as in Newton-Krylov iteration where the Jacobian matrix often changes little in the nonlinear steps. References ---------- .. [1] A.H. Baker and E.R. Jessup and T. Manteuffel, SIAM J. Matrix Anal. Appl. 26, 962 (2005). .. [2] A.H. Baker, PhD thesis, University of Colorado (2003). http://amath.colorado.edu/activities/thesis/allisonb/Thesis.ps """ A, M, x, b, postprocess = make_system(A, M, x0, b) if not np.isfinite(b).all(): raise ValueError("RHS must contain only finite numbers") matvec = A.matvec psolve = M.matvec if outer_v is None: outer_v = [] axpy, dot, scal = None, None, None nrm2 = get_blas_funcs('nrm2', [b]) b_norm = nrm2(b) if b_norm == 0: b_norm = 1 for k_outer in xrange(maxiter): r_outer = matvec(x) - b # -- callback if callback is not None: callback(x) # -- determine input type routines if axpy is None: if np.iscomplexobj(r_outer) and not np.iscomplexobj(x): x = x.astype(r_outer.dtype) axpy, dot, scal, nrm2 = get_blas_funcs( ['axpy', 'dot', 'scal', 'nrm2'], (x, r_outer)) trtrs = get_lapack_funcs('trtrs', (x, r_outer)) # -- check stopping condition r_norm = nrm2(r_outer) if r_norm <= tol * b_norm or r_norm <= tol: break # -- inner LGMRES iteration vs0 = -psolve(r_outer) inner_res_0 = nrm2(vs0) if inner_res_0 == 0: rnorm = nrm2(r_outer) raise RuntimeError("Preconditioner returned a zero vector; " "|v| ~ %.1g, |M v| = 0" % rnorm) vs0 = scal(1.0 / inner_res_0, vs0) vs = [vs0] ws = [] y = None # H is stored in QR factorized form Q = np.ones((1, 1), dtype=vs0.dtype) R = np.zeros((1, 0), dtype=vs0.dtype) eps = np.finfo(vs0.dtype).eps breakdown = False for j in xrange(1, 1 + inner_m + len(outer_v)): # -- Arnoldi process: # # Build an orthonormal basis V and matrices W and H such that # A W = V H # Columns of W, V, and H are stored in `ws`, `vs` and `hs`. # # The first column of V is always the residual vector, `vs0`; # V has *one more column* than the other of the three matrices. # # The other columns in V are built by feeding in, one # by one, some vectors `z` and orthonormalizing them # against the basis so far. The trick here is to # feed in first some augmentation vectors, before # starting to construct the Krylov basis on `v0`. # # It was shown in [BJM]_ that a good choice (the LGMRES choice) # for these augmentation vectors are the `dx` vectors obtained # from a couple of the previous restart cycles. # # Note especially that while `vs0` is always the first # column in V, there is no reason why it should also be # the first column in W. (In fact, below `vs0` comes in # W only after the augmentation vectors.) # # The rest of the algorithm then goes as in GMRES, one # solves a minimization problem in the smaller subspace # spanned by W (range) and V (image). # # ++ evaluate v_new = None if j < len(outer_v) + 1: z, v_new = outer_v[j - 1] elif j == len(outer_v) + 1: z = vs0 else: z = vs[-1] if v_new is None: v_new = psolve(matvec(z)) else: # Note: v_new is modified in-place below. Must make a # copy to ensure that the outer_v vectors are not # clobbered. v_new = v_new.copy() # ++ orthogonalize v_new_norm = nrm2(v_new) hcur = np.zeros(j + 1, dtype=Q.dtype) for i, v in enumerate(vs): alpha = dot(v, v_new) hcur[i] = alpha v_new = axpy(v, v_new, v.shape[0], -alpha) # v_new -= alpha*v hcur[-1] = nrm2(v_new) with np.errstate(over='ignore', divide='ignore'): # Careful with denormals alpha = 1 / hcur[-1] if np.isfinite(alpha): v_new = scal(alpha, v_new) if not (hcur[-1] > eps * v_new_norm): # v_new essentially in the span of previous vectors, # or we have nans. Bail out after updating the QR # solution. breakdown = True vs.append(v_new) ws.append(z) # -- GMRES optimization problem # Add new column to H=Q*R, padding other columns with zeros Q2 = np.zeros((j + 1, j + 1), dtype=Q.dtype, order='F') Q2[:j, :j] = Q Q2[j, j] = 1 R2 = np.zeros((j + 1, j - 1), dtype=R.dtype, order='F') R2[:j, :] = R Q, R = qr_insert(Q2, R2, hcur, j - 1, which='col', overwrite_qru=True, check_finite=False) # Transformed least squares problem # || Q R y - inner_res_0 * e_1 ||_2 = min! # Since R = [R'; 0], solution is y = inner_res_0 (R')^{-1} (Q^H)[:j,0] # Residual is immediately known inner_res = abs(Q[0, -1]) * inner_res_0 # -- check for termination if inner_res <= tol * inner_res_0 or breakdown: break if not np.isfinite(R[j - 1, j - 1]): # nans encountered, bail out return postprocess(x), k_outer + 1 # -- Get the LSQ problem solution # # The problem is triangular, but the condition number may be # bad (or in case of breakdown the last diagonal entry may be # zero), so use lstsq instead of trtrs. y, _, _, _, = lstsq(R[:j, :j], Q[0, :j].conj()) y *= inner_res_0 if not np.isfinite(y).all(): # Floating point over/underflow, non-finite result from # matmul etc. -- report failure. return postprocess(x), k_outer + 1 # -- GMRES terminated: eval solution dx = ws[0] * y[0] for w, yc in zip(ws[1:], y[1:]): dx = axpy(w, dx, dx.shape[0], yc) # dx += w*yc # -- Store LGMRES augmentation vectors nx = nrm2(dx) if nx > 0: if store_outer_Av: q = Q.dot(R.dot(y)) ax = vs[0] * q[0] for v, qc in zip(vs[1:], q[1:]): ax = axpy(v, ax, ax.shape[0], qc) outer_v.append((dx / nx, ax / nx)) else: outer_v.append((dx / nx, None)) # -- Retain only a finite number of augmentation vectors while len(outer_v) > outer_k: del outer_v[0] # -- Apply step x += dx else: # didn't converge ... return postprocess(x), maxiter return postprocess(x), 0
def make_interp_spline(x, y, k=3, t=None, bc_type=None, axis=0, check_finite=True): """Compute the (coefficients of) interpolating B-spline. Parameters ---------- x : array_like, shape (n,) Abscissas. y : array_like, shape (n, ...) Ordinates. k : int, optional B-spline degree. Default is cubic, k=3. t : array_like, shape (nt + k + 1,), optional. Knots. The number of knots needs to agree with the number of datapoints and the number of derivatives at the edges. Specifically, ``nt - n`` must equal ``len(deriv_l) + len(deriv_r)``. bc_type : 2-tuple or None Boundary conditions. Default is None, which means choosing the boundary conditions automatically. Otherwise, it must be a length-two tuple where the first element sets the boundary conditions at ``x[0]`` and the second element sets the boundary conditions at ``x[-1]``. Each of these must be an iterable of pairs ``(order, value)`` which gives the values of derivatives of specified orders at the given edge of the interpolation interval. Alternatively, the following string aliases are recognized: * ``"clamped"``: The first derivatives at the ends are zero. This is equivalent to ``bc_type=([(1, 0.0)], [(1, 0.0)])``. * ``"natural"``: The second derivatives at ends are zero. This is equivalent to ``bc_type=([(2, 0.0)], [(2, 0.0)])``. * ``"not-a-knot"`` (default): The first and second segments are the same polynomial. This is equivalent to having ``bc_type=None``. axis : int, optional Interpolation axis. Default is 0. check_finite : bool, optional Whether to check that the input arrays contain only finite numbers. Disabling may give a performance gain, but may result in problems (crashes, non-termination) if the inputs do contain infinities or NaNs. Default is True. Returns ------- b : a BSpline object of the degree ``k`` and with knots ``t``. Examples -------- Use cubic interpolation on Chebyshev nodes: >>> def cheb_nodes(N): ... jj = 2.*np.arange(N) + 1 ... x = np.cos(np.pi * jj / 2 / N)[::-1] ... return x >>> x = cheb_nodes(20) >>> y = np.sqrt(1 - x**2) >>> from scipy.interpolate import BSpline, make_interp_spline >>> b = make_interp_spline(x, y) >>> np.allclose(b(x), y) True Note that the default is a cubic spline with a not-a-knot boundary condition >>> b.k 3 Here we use a 'natural' spline, with zero 2nd derivatives at edges: >>> l, r = [(2, 0.0)], [(2, 0.0)] >>> b_n = make_interp_spline(x, y, bc_type=(l, r)) # or, bc_type="natural" >>> np.allclose(b_n(x), y) True >>> x0, x1 = x[0], x[-1] >>> np.allclose([b_n(x0, 2), b_n(x1, 2)], [0, 0]) True Interpolation of parametric curves is also supported. As an example, we compute a discretization of a snail curve in polar coordinates >>> phi = np.linspace(0, 2.*np.pi, 40) >>> r = 0.3 + np.cos(phi) >>> x, y = r*np.cos(phi), r*np.sin(phi) # convert to Cartesian coordinates Build an interpolating curve, parameterizing it by the angle >>> from scipy.interpolate import make_interp_spline >>> spl = make_interp_spline(phi, np.c_[x, y]) Evaluate the interpolant on a finer grid (note that we transpose the result to unpack it into a pair of x- and y-arrays) >>> phi_new = np.linspace(0, 2.*np.pi, 100) >>> x_new, y_new = spl(phi_new).T Plot the result >>> import matplotlib.pyplot as plt >>> plt.plot(x, y, 'o') >>> plt.plot(x_new, y_new, '-') >>> plt.show() See Also -------- BSpline : base class representing the B-spline objects CubicSpline : a cubic spline in the polynomial basis make_lsq_spline : a similar factory function for spline fitting UnivariateSpline : a wrapper over FITPACK spline fitting routines splrep : a wrapper over FITPACK spline fitting routines """ # convert string aliases for the boundary conditions if bc_type is None or bc_type == 'not-a-knot': deriv_l, deriv_r = None, None elif isinstance(bc_type, str): deriv_l, deriv_r = bc_type, bc_type else: try: deriv_l, deriv_r = bc_type except TypeError: raise ValueError("Unknown boundary condition: %s" % bc_type) y = np.asarray(y) axis = normalize_axis_index(axis, y.ndim) # special-case k=0 right away if k == 0: if any(_ is not None for _ in (t, deriv_l, deriv_r)): raise ValueError("Too much info for k=0: t and bc_type can only " "be None.") x = _as_float_array(x, check_finite) t = np.r_[x, x[-1]] c = np.asarray(y) c = np.rollaxis(c, axis) c = np.ascontiguousarray(c, dtype=_get_dtype(c.dtype)) return BSpline.construct_fast(t, c, k, axis=axis) # special-case k=1 (e.g., Lyche and Morken, Eq.(2.16)) if k == 1 and t is None: if not (deriv_l is None and deriv_r is None): raise ValueError( "Too much info for k=1: bc_type can only be None.") x = _as_float_array(x, check_finite) t = np.r_[x[0], x, x[-1]] c = np.asarray(y) c = np.rollaxis(c, axis) c = np.ascontiguousarray(c, dtype=_get_dtype(c.dtype)) return BSpline.construct_fast(t, c, k, axis=axis) x = _as_float_array(x, check_finite) y = _as_float_array(y, check_finite) k = operator.index(k) # come up with a sensible knot vector, if needed if t is None: if deriv_l is None and deriv_r is None: if k == 2: # OK, it's a bit ad hoc: Greville sites + omit # 2nd and 2nd-to-last points, a la not-a-knot t = (x[1:] + x[:-1]) / 2. t = np.r_[(x[0], ) * (k + 1), t[1:-1], (x[-1], ) * (k + 1)] else: t = _not_a_knot(x, k) else: t = _augknt(x, k) t = _as_float_array(t, check_finite) y = np.rollaxis(y, axis) # now internally interp axis is zero if x.ndim != 1 or np.any(x[1:] < x[:-1]): raise ValueError("Expect x to be a 1-D sorted array_like.") if np.any(x[1:] == x[:-1]): raise ValueError("Expect x to not have duplicates") if k < 0: raise ValueError("Expect non-negative k.") if t.ndim != 1 or np.any(t[1:] < t[:-1]): raise ValueError("Expect t to be a 1-D sorted array_like.") if x.size != y.shape[0]: raise ValueError('Shapes of x {} and y {} are incompatible'.format( x.shape, y.shape)) if t.size < x.size + k + 1: raise ValueError('Got %d knots, need at least %d.' % (t.size, x.size + k + 1)) if (x[0] < t[k]) or (x[-1] > t[-k]): raise ValueError('Out of bounds w/ x = %s.' % x) # Here : deriv_l, r = [(nu, value), ...] deriv_l = _convert_string_aliases(deriv_l, y.shape[1:]) deriv_l_ords, deriv_l_vals = _process_deriv_spec(deriv_l) nleft = deriv_l_ords.shape[0] deriv_r = _convert_string_aliases(deriv_r, y.shape[1:]) deriv_r_ords, deriv_r_vals = _process_deriv_spec(deriv_r) nright = deriv_r_ords.shape[0] # have `n` conditions for `nt` coefficients; need nt-n derivatives n = x.size nt = t.size - k - 1 if nt - n != nleft + nright: raise ValueError("The number of derivatives at boundaries does not " "match: expected %s, got %s+%s" % (nt - n, nleft, nright)) # set up the LHS: the collocation matrix + derivatives at boundaries kl = ku = k ab = np.zeros((2 * kl + ku + 1, nt), dtype=np.float_, order='F') _bspl._colloc(x, t, k, ab, offset=nleft) if nleft > 0: _bspl._handle_lhs_derivatives(t, k, x[0], ab, kl, ku, deriv_l_ords) if nright > 0: _bspl._handle_lhs_derivatives(t, k, x[-1], ab, kl, ku, deriv_r_ords, offset=nt - nright) # set up the RHS: values to interpolate (+ derivative values, if any) extradim = prod(y.shape[1:]) rhs = np.empty((nt, extradim), dtype=y.dtype) if nleft > 0: rhs[:nleft] = deriv_l_vals.reshape(-1, extradim) rhs[nleft:nt - nright] = y.reshape(-1, extradim) if nright > 0: rhs[nt - nright:] = deriv_r_vals.reshape(-1, extradim) # solve Ab @ x = rhs; this is the relevant part of linalg.solve_banded if check_finite: ab, rhs = map(np.asarray_chkfinite, (ab, rhs)) gbsv, = get_lapack_funcs(('gbsv', ), (ab, rhs)) lu, piv, c, info = gbsv(kl, ku, ab, rhs, overwrite_ab=True, overwrite_b=True) if info > 0: raise LinAlgError("Collocation matix is singular.") elif info < 0: raise ValueError('illegal value in %d-th argument of internal gbsv' % -info) c = np.ascontiguousarray(c.reshape((nt, ) + y.shape[1:])) return BSpline.construct_fast(t, c, k, axis=axis)
def gmres_householder(A, b, x0=None, tol=1e-5, restrt=None, maxiter=None, xtype=None, M=None, callback=None, residuals=None): ''' Generalized Minimum Residual Method (GMRES) GMRES iteratively refines the initial solution guess to the system Ax = b Householder reflections are used for orthogonalization Parameters ---------- A : {array, matrix, sparse matrix, LinearOperator} n x n, linear system to solve b : {array, matrix} right hand side, shape is (n,) or (n, 1) x0 : {array, matrix} initial guess, default is a vector of zeros tol : float relative convergence tolerance, i.e. tol is scaled by the norm of the initial preconditioned residual restrt : {None, int} - if int, restrt is max number of inner iterations and maxiter is the max number of outer iterations - if None, do not restart GMRES, and max number of inner iterations is maxiter maxiter : {None, int} - if restrt is None, maxiter is the max number of inner iterations and GMRES does not restart - if restrt is int, maxiter is the max number of outer iterations, and restrt is the max number of inner iterations xtype : type dtype for the solution, default is automatic type detection M : {array, matrix, sparse matrix, LinearOperator} n x n, inverted preconditioner, i.e. solve M A x = M b. callback : function User-supplied function is called after each iteration as callback(xk), where xk is the current solution vector residuals : list residuals contains the preconditioned residual norm history, including the initial residual. Returns ------- (xNew, info) xNew : an updated guess to the solution of Ax = b info : halting status of gmres == ============================================= 0 successful exit >0 convergence to tolerance not achieved, return iteration count instead. This value is precisely the order of the Krylov space. <0 numerical breakdown, or illegal input == ============================================= Notes ----- - The LinearOperator class is in scipy.sparse.linalg.interface. Use this class if you prefer to define A or M as a mat-vec routine as opposed to explicitly constructing the matrix. A.psolve(..) is still supported as a legacy. - For robustness, Householder reflections are used to orthonormalize the Krylov Space Givens Rotations are used to provide the residual norm each iteration Examples -------- >>> from pyamg.krylov import gmres >>> from pyamg.util.linalg import norm >>> import numpy as np >>> from pyamg.gallery import poisson >>> A = poisson((10, 10)) >>> b = np.ones((A.shape[0],)) >>> (x, flag) = gmres(A, b, maxiter=2, tol=1e-8, orthog='householder') >>> print norm(b - A*x) 6.5428213057 References ---------- .. [1] Yousef Saad, "Iterative Methods for Sparse Linear Systems, Second Edition", SIAM, pp. 151-172, pp. 272-275, 2003 http://www-users.cs.umn.edu/~saad/books.html ''' # Convert inputs to linear system, with error checking A, M, x, b, postprocess = make_system(A, M, x0, b, xtype) dimen = A.shape[0] # Ensure that warnings are always reissued from this function import warnings warnings.filterwarnings('always', module='pyamg\.krylov\._gmres_householder') # Choose type if not hasattr(A, 'dtype'): Atype = upcast(x.dtype, b.dtype) else: Atype = A.dtype if not hasattr(M, 'dtype'): Mtype = upcast(x.dtype, b.dtype) else: Mtype = M.dtype xtype = upcast(Atype, x.dtype, b.dtype, Mtype) if restrt is not None: restrt = int(restrt) if maxiter is not None: maxiter = int(maxiter) # Should norm(r) be kept if residuals == []: keep_r = True else: keep_r = False # Set number of outer and inner iterations if restrt: if maxiter: max_outer = maxiter else: max_outer = 1 if restrt > dimen: warn('Setting number of inner iterations (restrt) to maximum \ allowed, which is A.shape[0] ') restrt = dimen max_inner = restrt else: max_outer = 1 if maxiter > dimen: warn('Setting number of inner iterations (maxiter) to maximum \ allowed, which is A.shape[0] ') maxiter = dimen elif maxiter is None: maxiter = min(dimen, 40) max_inner = maxiter # Get fast access to underlying LAPACK routine [lartg] = get_lapack_funcs(['lartg'], [x]) # Is this a one dimensional matrix? if dimen == 1: entry = np.ravel(A*np.array([1.0], dtype=xtype)) return (postprocess(b/entry), 0) # Prep for method r = b - np.ravel(A*x) # Apply preconditioner r = np.ravel(M*r) normr = norm(r) if keep_r: residuals.append(normr) # Check for nan, inf # if isnan(r).any() or isinf(r).any(): # warn('inf or nan after application of preconditioner') # return(postprocess(x), -1) # Check initial guess ( scaling by b, if b != 0, # must account for case when norm(b) is very small) normb = norm(b) if normb == 0.0: normb = 1.0 if normr < tol*normb: return (postprocess(x), 0) # Scale tol by ||r_0||_2, we use the preconditioned residual # because this is left preconditioned GMRES. if normr != 0.0: tol = tol*normr # Use separate variable to track iterations. If convergence fails, we # cannot simply report niter = (outer-1)*max_outer + inner. Numerical # error could cause the inner loop to halt while the actual ||r|| > tol. niter = 0 # Begin GMRES for outer in range(max_outer): # Calculate vector w, which defines the Householder reflector # Take shortcut in calculating, # w = r + sign(r[1])*||r||_2*e_1 w = r beta = mysign(w[0])*normr w[0] = w[0] + beta w[:] = w / norm(w) # Preallocate for Krylov vectors, Householder reflectors and # Hessenberg matrix # Space required is O(dimen*max_inner) # Givens Rotations Q = np.zeros((4*max_inner,), dtype=xtype) # upper Hessenberg matrix (made upper tri with Givens Rotations) H = np.zeros((max_inner, max_inner), dtype=xtype) # Householder reflectors W = np.zeros((max_inner+1, dimen), dtype=xtype) W[0, :] = w # Multiply r with (I - 2*w*w.T), i.e. apply the Householder reflector # This is the RHS vector for the problem in the Krylov Space g = np.zeros((dimen,), dtype=xtype) g[0] = -beta for inner in range(max_inner): # Calculate Krylov vector in two steps # (1) Calculate v = P_j = (I - 2*w*w.T)v, where k = inner v = -2.0*np.conjugate(w[inner])*w v[inner] = v[inner] + 1.0 # (2) Calculate the rest, v = P_1*P_2*P_3...P_{j-1}*ej. # for j in range(inner-1,-1,-1): # v -= 2.0*dot(conjugate(W[j,:]), v)*W[j,:] amg_core.apply_householders(v, np.ravel(W), dimen, inner-1, -1, -1) # Calculate new search direction v = np.ravel(A*v) # Apply preconditioner v = np.ravel(M*v) # Check for nan, inf # if isnan(v).any() or isinf(v).any(): # warn('inf or nan after application of preconditioner') # return(postprocess(x), -1) # Factor in all Householder orthogonal reflections on new search # direction # for j in range(inner+1): # v -= 2.0*dot(conjugate(W[j,:]), v)*W[j,:] amg_core.apply_householders(v, np.ravel(W), dimen, 0, inner+1, 1) # Calculate next Householder reflector, w # w = v[inner+1:] + sign(v[inner+1])*||v[inner+1:]||_2*e_{inner+1) # Note that if max_inner = dimen, then this is unnecessary for the # last inner iteration, when inner = dimen-1. Here we do not need # to calculate a Householder reflector or Givens rotation because # nnz(v) is already the desired length, i.e. we do not need to # zero anything out. if inner != dimen-1: if inner < (max_inner-1): w = W[inner+1, :] vslice = v[inner+1:] alpha = norm(vslice) if alpha != 0: alpha = mysign(vslice[0])*alpha # do not need the final reflector for future calculations if inner < (max_inner-1): w[inner+1:] = vslice w[inner+1] += alpha w[:] = w / norm(w) # Apply new reflector to v # v = v - 2.0*w*(w.T*v) v[inner+1] = -alpha v[inner+2:] = 0.0 if inner > 0: # Apply all previous Givens Rotations to v amg_core.apply_givens(Q, v, dimen, inner) # Calculate the next Givens rotation, where j = inner Note that if # max_inner = dimen, then this is unnecessary for the last inner # iteration, when inner = dimen-1. Here we do not need to # calculate a Householder reflector or Givens rotation because # nnz(v) is already the desired length, i.e. we do not need to zero # anything out. if inner != dimen-1: if v[inner+1] != 0: [c, s, r] = lartg(v[inner], v[inner+1]) Qblock = np.array([[c, s], [-np.conjugate(s), c]], dtype=xtype) Q[(inner*4): ((inner+1)*4)] = np.ravel(Qblock).copy() # Apply Givens Rotation to g, the RHS for the linear system # in the Krylov Subspace. Note that this dot does a matrix # multiply, not an actual dot product where a conjugate # transpose is taken g[inner:inner+2] = np.dot(Qblock, g[inner:inner+2]) # Apply effect of Givens Rotation to v v[inner] = np.dot(Qblock[0, :], v[inner:inner+2]) v[inner+1] = 0.0 # Write to upper Hessenberg Matrix, # the LHS for the linear system in the Krylov Subspace H[:, inner] = v[0:max_inner] niter += 1 # Don't update normr if last inner iteration, because # normr is calculated directly after this loop ends. if inner < max_inner-1: normr = np.abs(g[inner+1]) if normr < tol: break # Allow user access to the iterates if callback is not None: callback(x) if keep_r: residuals.append(normr) # end inner loop, back to outer loop # Find best update to x in Krylov Space, V. Solve inner+1 x inner+1 # system. Apparently this is the best way to solve a triangular system # in the magical world of scipy # piv = arange(inner+1) # y = lu_solve((H[0:(inner+1), 0:(inner+1)], piv), g[0:(inner+1)], # trans=0) y = sp.linalg.solve(H[0:(inner+1), 0:(inner+1)], g[0:(inner+1)]) # Use Horner like Scheme to map solution, y, back to original space. # Note that we do not use the last reflector. update = np.zeros(x.shape, dtype=xtype) # for j in range(inner,-1,-1): # update[j] += y[j] # # Apply j-th reflector, (I - 2.0*w_j*w_j.T)*upadate # update -= 2.0*dot(conjugate(W[j,:]), update)*W[j,:] amg_core.householder_hornerscheme(update, np.ravel(W), np.ravel(y), dimen, inner, -1, -1) x[:] = x + update r = b - np.ravel(A*x) # Apply preconditioner r = np.ravel(M*r) normr = norm(r) # Check for nan, inf # if isnan(r).any() or isinf(r).any(): # warn('inf or nan after application of preconditioner') # return(postprocess(x), -1) # Allow user access to the iterates if callback is not None: callback(x) if keep_r: residuals.append(normr) # Has GMRES stagnated? indices = (x != 0) if indices.any(): change = np.max(np.abs(update[indices] / x[indices])) if change < 1e-12: # No change, halt return (postprocess(x), -1) # test for convergence if normr < tol: return (postprocess(x), 0) # end outer loop return (postprocess(x), niter)
from scipy import linalg from scipy.linalg import LinAlgError from scipy._lib._util import _asarray_validated from ..fixes import _check_info _d = np.empty(0, np.float64) _z = np.empty(0, np.complex128) dgemm = linalg.get_blas_funcs('gemm', (_d, )) zgemm = linalg.get_blas_funcs('gemm', (_z, )) dgemv = linalg.get_blas_funcs('gemv', (_d, )) ddot = linalg.get_blas_funcs('dot', (_d, )) _I = np.cast['F'](1j) ############################################################################### # linalg.svd and linalg.pinv2 dgesdd, dgesdd_lwork = linalg.get_lapack_funcs(('gesdd', 'gesdd_lwork'), (_d, )) zgesdd, zgesdd_lwork = linalg.get_lapack_funcs(('gesdd', 'gesdd_lwork'), (_z, )) dgesvd, dgesvd_lwork = linalg.get_lapack_funcs(('gesvd', 'gesvd_lwork'), (_d, )) zgesvd, zgesvd_lwork = linalg.get_lapack_funcs(('gesvd', 'gesvd_lwork'), (_z, )) def _svd_lwork(shape, dtype=np.float64): """Set up SVD calculations on identical-shape float64/complex128 arrays.""" if dtype == np.float64: gesdd_lwork, gesvd_lwork = dgesdd_lwork, dgesvd_lwork else: assert dtype == np.complex128 gesdd_lwork, gesvd_lwork = zgesdd_lwork, zgesvd_lwork
def make_interp_spline(x, y, k=3, t=None, bc_type=None, axis=0, check_finite=True): """Compute the (coefficients of) interpolating B-spline. Parameters ---------- x : array_like, shape (n,) Abscissas. y : array_like, shape (n, ...) Ordinates. k : int, optional B-spline degree. Default is cubic, k=3. t : array_like, shape (nt + k + 1,), optional. Knots. The number of knots needs to agree with the number of datapoints and the number of derivatives at the edges. Specifically, ``nt - n`` must equal ``len(deriv_l) + len(deriv_r)``. bc_type : 2-tuple or None Boundary conditions. Default is None, which means choosing the boundary conditions automatically. Otherwise, it must be a length-two tuple where the first element sets the boundary conditions at ``x[0]`` and the second element sets the boundary conditions at ``x[-1]``. Each of these must be an iterable of pairs ``(order, value)`` which gives the values of derivatives of specified orders at the given edge of the interpolation interval. Alternatively, the following string aliases are recognized: * ``"clamped"``: The first derivatives at the ends are zero. This is equivalent to ``bc_type=([(1, 0.0)], [(1, 0.0)])``. * ``"natural"``: The second derivatives at ends are zero. This is equivalent to ``bc_type=([(2, 0.0)], [(2, 0.0)])``. * ``"not-a-knot"`` (default): The first and second segments are the same polynomial. This is equivalent to having ``bc_type=None``. axis : int, optional Interpolation axis. Default is 0. check_finite : bool, optional Whether to check that the input arrays contain only finite numbers. Disabling may give a performance gain, but may result in problems (crashes, non-termination) if the inputs do contain infinities or NaNs. Default is True. Returns ------- b : a BSpline object of the degree ``k`` and with knots ``t``. Examples -------- Use cubic interpolation on Chebyshev nodes: >>> def cheb_nodes(N): ... jj = 2.*np.arange(N) + 1 ... x = np.cos(np.pi * jj / 2 / N)[::-1] ... return x >>> x = cheb_nodes(20) >>> y = np.sqrt(1 - x**2) >>> from scipy.interpolate import BSpline, make_interp_spline >>> b = make_interp_spline(x, y) >>> np.allclose(b(x), y) True Note that the default is a cubic spline with a not-a-knot boundary condition >>> b.k 3 Here we use a 'natural' spline, with zero 2nd derivatives at edges: >>> l, r = [(2, 0.0)], [(2, 0.0)] >>> b_n = make_interp_spline(x, y, bc_type=(l, r)) # or, bc_type="natural" >>> np.allclose(b_n(x), y) True >>> x0, x1 = x[0], x[-1] >>> np.allclose([b_n(x0, 2), b_n(x1, 2)], [0, 0]) True Interpolation of parametric curves is also supported. As an example, we compute a discretization of a snail curve in polar coordinates >>> phi = np.linspace(0, 2.*np.pi, 40) >>> r = 0.3 + np.cos(phi) >>> x, y = r*np.cos(phi), r*np.sin(phi) # convert to Cartesian coordinates Build an interpolating curve, parameterizing it by the angle >>> from scipy.interpolate import make_interp_spline >>> spl = make_interp_spline(phi, np.c_[x, y]) Evaluate the interpolant on a finer grid (note that we transpose the result to unpack it into a pair of x- and y-arrays) >>> phi_new = np.linspace(0, 2.*np.pi, 100) >>> x_new, y_new = spl(phi_new).T Plot the result >>> import matplotlib.pyplot as plt >>> plt.plot(x, y, 'o') >>> plt.plot(x_new, y_new, '-') >>> plt.show() See Also -------- BSpline : base class representing the B-spline objects CubicSpline : a cubic spline in the polynomial basis make_lsq_spline : a similar factory function for spline fitting UnivariateSpline : a wrapper over FITPACK spline fitting routines splrep : a wrapper over FITPACK spline fitting routines """ # convert string aliases for the boundary conditions if bc_type is None or bc_type == 'not-a-knot': deriv_l, deriv_r = None, None elif isinstance(bc_type, string_types): deriv_l, deriv_r = bc_type, bc_type else: try: deriv_l, deriv_r = bc_type except TypeError: raise ValueError("Unknown boundary condition: %s" % bc_type) y = np.asarray(y) if not -y.ndim <= axis < y.ndim: raise ValueError("axis {} is out of bounds".format(axis)) if axis < 0: axis += y.ndim # special-case k=0 right away if k == 0: if any(_ is not None for _ in (t, deriv_l, deriv_r)): raise ValueError("Too much info for k=0: t and bc_type can only " "be None.") x = _as_float_array(x, check_finite) t = np.r_[x, x[-1]] c = np.asarray(y) c = np.rollaxis(c, axis) c = np.ascontiguousarray(c, dtype=_get_dtype(c.dtype)) return BSpline.construct_fast(t, c, k, axis=axis) # special-case k=1 (e.g., Lyche and Morken, Eq.(2.16)) if k == 1 and t is None: if not (deriv_l is None and deriv_r is None): raise ValueError("Too much info for k=1: bc_type can only be None.") x = _as_float_array(x, check_finite) t = np.r_[x[0], x, x[-1]] c = np.asarray(y) c = np.rollaxis(c, axis) c = np.ascontiguousarray(c, dtype=_get_dtype(c.dtype)) return BSpline.construct_fast(t, c, k, axis=axis) x = _as_float_array(x, check_finite) y = _as_float_array(y, check_finite) k = operator.index(k) # come up with a sensible knot vector, if needed if t is None: if deriv_l is None and deriv_r is None: if k == 2: # OK, it's a bit ad hoc: Greville sites + omit # 2nd and 2nd-to-last points, a la not-a-knot t = (x[1:] + x[:-1]) / 2. t = np.r_[(x[0],)*(k+1), t[1:-1], (x[-1],)*(k+1)] else: t = _not_a_knot(x, k) else: t = _augknt(x, k) t = _as_float_array(t, check_finite) y = np.rollaxis(y, axis) # now internally interp axis is zero if x.ndim != 1 or np.any(x[1:] <= x[:-1]): raise ValueError("Expect x to be a 1-D sorted array_like.") if k < 0: raise ValueError("Expect non-negative k.") if t.ndim != 1 or np.any(t[1:] < t[:-1]): raise ValueError("Expect t to be a 1-D sorted array_like.") if x.size != y.shape[0]: raise ValueError('x and y are incompatible.') if t.size < x.size + k + 1: raise ValueError('Got %d knots, need at least %d.' % (t.size, x.size + k + 1)) if (x[0] < t[k]) or (x[-1] > t[-k]): raise ValueError('Out of bounds w/ x = %s.' % x) # Here : deriv_l, r = [(nu, value), ...] deriv_l = _convert_string_aliases(deriv_l, y.shape[1:]) deriv_l_ords, deriv_l_vals = _process_deriv_spec(deriv_l) nleft = deriv_l_ords.shape[0] deriv_r = _convert_string_aliases(deriv_r, y.shape[1:]) deriv_r_ords, deriv_r_vals = _process_deriv_spec(deriv_r) nright = deriv_r_ords.shape[0] # have `n` conditions for `nt` coefficients; need nt-n derivatives n = x.size nt = t.size - k - 1 if nt - n != nleft + nright: raise ValueError("The number of derivatives at boundaries does not " "match: expected %s, got %s+%s" % (nt-n, nleft, nright)) # set up the LHS: the collocation matrix + derivatives at boundaries kl = ku = k ab = np.zeros((2*kl + ku + 1, nt), dtype=np.float_, order='F') _bspl._colloc(x, t, k, ab, offset=nleft) if nleft > 0: _bspl._handle_lhs_derivatives(t, k, x[0], ab, kl, ku, deriv_l_ords) if nright > 0: _bspl._handle_lhs_derivatives(t, k, x[-1], ab, kl, ku, deriv_r_ords, offset=nt-nright) # set up the RHS: values to interpolate (+ derivative values, if any) extradim = prod(y.shape[1:]) rhs = np.empty((nt, extradim), dtype=y.dtype) if nleft > 0: rhs[:nleft] = deriv_l_vals.reshape(-1, extradim) rhs[nleft:nt - nright] = y.reshape(-1, extradim) if nright > 0: rhs[nt - nright:] = deriv_r_vals.reshape(-1, extradim) # solve Ab @ x = rhs; this is the relevant part of linalg.solve_banded if check_finite: ab, rhs = map(np.asarray_chkfinite, (ab, rhs)) gbsv, = get_lapack_funcs(('gbsv',), (ab, rhs)) lu, piv, c, info = gbsv(kl, ku, ab, rhs, overwrite_ab=True, overwrite_b=True) if info > 0: raise LinAlgError("Collocation matix is singular.") elif info < 0: raise ValueError('illegal value in %d-th argument of internal gbsv' % -info) c = np.ascontiguousarray(c.reshape((nt,) + y.shape[1:])) return BSpline.construct_fast(t, c, k, axis=axis)
def make_interp_spline(x, y, degrees=3): """Construct an interpolating B-spline. Parameters ---------- x : array_like broadcastable to (n_0, n_1, ..., n_(xdim-1), xdim) or arguments to ``numpy.meshgrid`` to construct same. Abscissas. y : array_like, shape (n_0, n_1, ..., n_(xdim-1),) + yshape Ordinates. degrees : ndarray, shape=(xdim,), dtype=np.intc Degree of interpolant for each axis (or broadcastable). Optional, default is 3. Returns ------- b : NDSpline object An interpolating `NDSpline`. See Also -------- make_lsq_spline, make_interp_spline_from_tidy """ if isinstance(x, np.ndarray): # mesh if x.ndim == 1: x = x[..., None] xdim = x.shape[-1] elif not isinstance(x, str) and len(x): # vectors xdim = len(x) x = np.stack(np.meshgrid(*x, indexing='ij'), axis=-1) else: raise ValueError("Don't know how to interpret x") if not np.all(y.shape[:xdim] == x.shape[:xdim]): raise ValueError("Expected y.shape to start with %s, got %s." % (repr(x.shape[:xdim]), repr(y.shape[:xdim]))) yshape = y.shape[xdim:] ydim = prod(yshape) # generally, x.shape = (n_0, n_1, ..., n_(xdim-1), xdim) # and y.sahpe = (n_0, n_1, ..., n_(xdim-1), ydim) degrees = np.broadcast_to(degrees, (xdim, )) # broadcasting does not play nicely with xdim as last axis for some reason bcs = np.broadcast_to(0, (xdim, 2, 2)) deriv_specs = np.asarray((bcs[:, :, 0] > 0), dtype=np.int) nak_spec = np.asarray((bcs[:, :, 0] <= 0), dtype=np.bool) knots = [] coefficients = np.pad(y.reshape(x.shape[:-1] + (ydim, )), np.r_[deriv_specs, np.c_[0, 0]], 'constant') axis = 0 check_finite = True for i in np.arange(xdim): all_other_ax_shape = np.asarray(np.r_[coefficients.shape[:i], y.shape[i + 1:xdim]], dtype=np.int) x_line_sel = ((0, ) * (i) + (slice(None, None), ) + (0, ) * (xdim - i - 1) + (i, )) x_slice = x[x_line_sel] k = degrees[i] left_nak, right_nak = nak_spec[i, :] both_nak = left_nak and right_nak # Here : deriv_l, r = [(nu, value), ...] deriv_l_ords, deriv_r_ords = bcs[i, :, 0].astype(np.int_) x_slice = _as_float_array(x_slice, check_finite) # should there be a general check for k <= deriv_spec ? if k == 0: # all derivatives are fully defined, can only set 0-th derivative, # special case for nearest-neighbor, causal/anti-causal zero-order # hold if not both_nak: raise ValueError( "Too much info for k=0: t and bc_type can only " "be notaknot.") left_zero, right_zero = (bcs[i, :, 1] == 0) if left_zero and right_zero: t = np.r_[x_slice[0], (x_slice[:-1] + x_slice[1:]) / 2., x_slice[-1]] elif not left_zero and right_zero: t = np.r_[x_slice, x_slice[-1]] elif left_zero and not right_zero: t = np.r_[x_slice[0], x_slice] else: raise ValueError( "Set deriv_spec = 0, with up to one side = -1 for k=0") # special-case k=1 (e.g., Lyche and Morken, Eq.(2.16)) if k == 1: # all derivatives are fully defined, can only set 0-th derivative, # aka not-a-knot boundary conditions to both sides if not both_nak: raise ValueError( "Too much info for k=1: bc_type can only be notaknot.") if k == 2: # it's possible this may be the best behavior for all even k > 0 if both_nak: # special, ad-hoc case using greville sites t = (x_slice[1:] + x_slice[:-1]) / 2. t = np.r_[(x_slice[0], ) * (k + 1), t[1:-1], (x_slice[-1], ) * (k + 1)] elif left_nak or right_nak: raise ValueError( "For k=2, can set both sides or neither side to notaknot") else: t = x_slice elif k != 0: t = _not_a_knot(x_slice, k, left_nak, right_nak) t = _as_float_array(t, check_finite) if left_nak: deriv_l_ords = np.array([]) deriv_l_vals = np.array([]) nleft = 0 else: deriv_l_ords = np.array([bcs[i - 1, 0, 0]], dtype=np.int_) deriv_l_vals = np.broadcast_to(bcs[i - 1, 0, 1], ydim) nleft = 1 if right_nak: deriv_r_ords = np.array([]) deriv_r_vals = np.array([]) nright = 0 else: deriv_r_ords = np.array([bcs[i - 1, 1, 0]], dtype=np.int_) deriv_r_vals = np.broadcast_to(bcs[i - 1, 1, 1], ydim) nright = 1 # have `n` conditions for `nt` coefficients; need nt-n derivatives n = x_slice.size nt = t.size - k - 1 # this also catches if deriv_spec > k-1, possibly? if np.clip(nt - n, 0, np.inf).astype(int) != nleft + nright: raise ValueError( "The number of derivatives at boundaries does not " "match: expected %s, got %s+%s" % (nt - n, nleft, nright)) # set up the LHS: the collocation matrix + derivatives at boundaries kl = ku = k ab = np.zeros((2 * kl + ku + 1, nt), dtype=np.float_, order='F') _sci_bspl._colloc(x_slice, t, k, ab, offset=nleft) if nleft > 0: _sci_bspl._handle_lhs_derivatives(t, k, x_slice[0], ab, kl, ku, deriv_l_ords) if nright > 0: _sci_bspl._handle_lhs_derivatives(t, k, x_slice[-1], ab, kl, ku, deriv_r_ords, offset=nt - nright) knots.append(t) if k >= 2: if x_slice.ndim != 1 or np.any(x_slice[1:] <= x_slice[:-1]): raise ValueError( "Expect x_slice to be a 1-D sorted array_like.") if k < 0: raise ValueError("Expect non-negative k.") if t.ndim != 1 or np.any(t[1:] < t[:-1]): raise ValueError("Expect t to be a 1-D sorted array_like.") if t.size < x_slice.size + k + 1: raise ValueError('Got %d knots, need at least %d.' % (t.size, x_slice.size + k + 1)) if (x_slice[0] < t[k]) or (x_slice[-1] > t[-k]): raise ValueError('Out of bounds w/ x_slice = %s.' % x_slice) for idx in np.ndindex(*all_other_ax_shape): offset_axes_remaining_sel = (tuple(idx[i:] + deriv_specs[i + 1:, 0])) y_line_sel = ( idx[:i] + (slice(deriv_specs[i, 0], -deriv_specs[i, 1] or None), ) + offset_axes_remaining_sel + (Ellipsis, )) coeff_line_sel = (idx[:i] + (slice(None, None), ) + offset_axes_remaining_sel + (Ellipsis, )) y_slice = coefficients[y_line_sel] # special-case k=0 right away if k == 0: c = np.asarray(y_slice) # special-case k=1 (e.g., Lyche and Morken, Eq.(2.16)) elif k == 1: c = np.asarray(y_slice) else: y_slice = _as_float_array(y_slice, check_finite) k = operator.index(k) if x_slice.size != y_slice.shape[0]: raise ValueError('x_slice and y_slice are incompatible.') # set up the RHS: values to interpolate (+ derivative values, if any) rhs = np.empty((nt, ydim), dtype=y_slice.dtype) if nleft > 0: rhs[:nleft] = deriv_l_vals.reshape(-1, ydim) rhs[nleft:nt - nright] = y_slice.reshape(-1, ydim) if nright > 0: rhs[nt - nright:] = deriv_r_vals.reshape(-1, ydim) # solve Ab @ x_slice = rhs; this is the relevant part of linalg.solve_banded if check_finite: ab, rhs = map(np.asarray_chkfinite, (ab, rhs)) gbsv, = get_lapack_funcs(('gbsv', ), (ab, rhs)) lu, piv, c, info = gbsv(kl, ku, ab, rhs, overwrite_ab=False, overwrite_b=True) if info > 0: raise LinAlgError("Collocation matix is singular.") elif info < 0: raise ValueError( 'illegal value in %d-th argument of internal gbsv' % -info) c = np.ascontiguousarray(c.reshape((nt, ) + y_slice.shape[1:])) coefficients[coeff_line_sel] = c coefficients = coefficients.reshape(coefficients.shape[:xdim] + yshape) return NDSpline( knots, coefficients, degrees, )
def admm(self, z, y, gamma): """Alternating direction method of multipliers.""" # Optimization: # This has been reasonably optimized already and performs ~3x # faster than a naive translation of the matlab version. # Two major changes are a custom function for calculating # the norm of a 1d vector and accessing the lapack solver # directly. # However it still isn't as fast as matlab (~1/3rd the # speed). # There are two complexity sources: # 1. the matrix solver. I can't see how this can get any # faster (tested with Intel MKL on Canopy). # 2. the test for convergence. This is the dominant source # now (~3x the time of the solver) # One simple speedup (~2x faster) is to only test # convergence every n iterations (n~10). However this breaks # output comparison with the matlab code. This might not # actually be a problem. # Further avenues for optimization: # - write in cython and import as compiled module, e.g. # http://docs.cython.org/src/userguide/numpy_tutorial.html # - use two cores, with one core performing the admm and # the other watching for convergence. a = (gamma / self.rho) q = self.dmd.q # precompute cholesky decomposition C = linalg.cholesky(self.Prho, lower=False) # link directly to LAPACK fortran solver for positive # definite symmetric system with precomputed cholesky decomp: potrs, = linalg.get_lapack_funcs(('potrs',), arrays=(C, q)) # simple norm of a 1d vector, called directly from BLAS norm, = linalg.get_blas_funcs(('nrm2',), arrays=(q,)) # square root outside of the loop root_n = np.sqrt(self.n) for ADMMstep in xrange(self.max_admm_iter): # ## x-minimization step (alpha minimisation) u = z - (1. / self.rho) * y qs = q + (self.rho / 2.) * u # Solve P x = qs, using fact that P is hermitian and # positive definite and assuming P is well behaved (no # inf or nan). xnew = potrs(C, qs, lower=False, overwrite_b=False)[0] # ## # ## z-minimization step (beta minimisation) v = xnew + (1 / self.rho) * y # Soft-thresholding of v # zero for |v| < a # v - a for v > a # v + a for v < -a # n.b. This doesn't actually do this because v is # complex. This is the same as the matlab source. You might # want to use np.sign, but this won't work because v is complex. abs_v = np.abs(v) znew = ((1 - a / abs_v) * v) * (abs_v > a) # ## # ## Lagrange multiplier update step y = y + self.rho * (xnew - znew) # ## # ## Test convergence of admm # Primal and dual residuals res_prim = norm(xnew - znew) res_dual = self.rho * norm(znew - z) # Stopping criteria eps_prim = root_n * self.eps_abs \ + self.eps_rel * max(norm(xnew), norm(znew)) eps_dual = root_n * self.eps_abs + self.eps_rel * norm(y) if (res_prim < eps_prim) & (res_dual < eps_dual): return z else: z = znew return z
def fgmres(A, b, x0=None, tol=1e-5, restrt=None, maxiter=None, M=None, callback=None, residuals=None): """Flexible Generalized Minimum Residual Method (fGMRES). fGMRES iteratively refines the initial solution guess to the system Ax = b. fGMRES is flexible in the sense that the right preconditioner (M) can vary from iteration to iteration. Parameters ---------- A : array, matrix, sparse matrix, LinearOperator n x n, linear system to solve b : array, matrix right hand side, shape is (n,) or (n,1) x0 : array, matrix initial guess, default is a vector of zeros tol : float Tolerance for stopping criteria, let r=r_k ||r|| < tol ||b|| if ||b||=0, then set ||b||=1 for these tests. restrt : None, int - if int, restrt is max number of inner iterations and maxiter is the max number of outer iterations - if None, do not restart GMRES, and max number of inner iterations is maxiter maxiter : None, int - if restrt is None, maxiter is the max number of inner iterations and GMRES does not restart - if restrt is int, maxiter is the max number of outer iterations, and restrt is the max number of inner iterations - defaults to min(n,40) if restart=None M : array, matrix, sparse matrix, LinearOperator n x n, inverted preconditioner, i.e. solve M A x = M b. M need not be stationary for fgmres callback : function User-supplied function is called after each iteration as callback(xk), where xk is the current solution vector residuals : list residual history in the 2-norm, including the initial residual reorth : boolean If True, then a check is made whether to re-orthogonalize the Krylov space each GMRES iteration Returns ------- xk, info xk : an updated guess after k iterations to the solution of Ax = b info : halting status == ======================================= 0 successful exit >0 convergence to tolerance not achieved, return iteration count instead. <0 numerical breakdown, or illegal input == ======================================= Notes ----- The LinearOperator class is in scipy.sparse.linalg.interface. Use this class if you prefer to define A or M as a mat-vec routine as opposed to explicitly constructing the matrix. fGMRES allows for non-stationary preconditioners, as opposed to GMRES For robustness, Householder reflections are used to orthonormalize the Krylov Space Givens Rotations are used to provide the residual norm each iteration Flexibility implies that the right preconditioner, M, can vary from iteration to iteration Examples -------- >>> from pyamg.krylov import fgmres >>> from pyamg.util.linalg import norm >>> import numpy as np >>> from pyamg.gallery import poisson >>> A = poisson((10,10)) >>> b = np.ones((A.shape[0],)) >>> (x,flag) = fgmres(A,b, maxiter=2, tol=1e-8) >>> print(f'{norm(b - A*x):.6}') 6.54282 References ---------- .. [1] Yousef Saad, "Iterative Methods for Sparse Linear Systems, Second Edition", SIAM, pp. 151-172, pp. 272-275, 2003 http://www-users.cs.umn.edu/~saad/books.html """ # Convert inputs to linear system, with error checking A, M, x, b, postprocess = make_system(A, M, x0, b) n = A.shape[0] # Ensure that warnings are always reissued from this function warnings.filterwarnings('always', module='pyamg.krylov._fgmres') # Get fast access to underlying BLAS routines [lartg] = get_lapack_funcs(['lartg'], [x]) # Set number of outer and inner iterations # If no restarts, # then set max_inner=maxiter and max_outer=n # If restarts are set, # then set max_inner=restart and max_outer=maxiter if restrt: if maxiter: max_outer = maxiter else: max_outer = 1 if restrt > n: warn('Setting restrt to maximum allowed, n.') restrt = n max_inner = restrt else: max_outer = 1 if maxiter > n: warn('Setting maxiter to maximum allowed, n.') maxiter = n elif maxiter is None: maxiter = min(n, 40) max_inner = maxiter # Is this a one dimensional matrix? if n == 1: entry = np.ravel(A @ np.array([1.0], dtype=x.dtype)) return (postprocess(b / entry), 0) # Prep for method r = b - A @ x normr = norm(r) if residuals is not None: residuals[:] = [normr] # initial residual # Check initial guess if b != 0, normb = norm(b) if normb == 0.0: normb = 1.0 # reset so that tol is unscaled if normr < tol * normb: return (postprocess(x), 0) # Use separate variable to track iterations. If convergence fails, # we cannot simply report niter = (outer-1)*max_outer + inner. Numerical # error could cause the inner loop to halt while the actual ||r|| > tol. niter = 0 # Begin fGMRES for _outer in range(max_outer): # Calculate vector w, which defines the Householder reflector # Take shortcut in calculating, # w = r + sign(r[1])*||r||_2*e_1 w = r beta = _mysign(w[0]) * normr w[0] += beta w /= norm(w) # Preallocate for Krylov vectors, Householder reflectors and Hessenberg # matrix # Space required is O(n*max_inner) # Givens Rotations Q = np.zeros((4 * max_inner, ), dtype=x.dtype) # upper Hessenberg matrix (made upper tri with Givens Rotations) H = np.zeros((max_inner, max_inner), dtype=x.dtype) W = np.zeros((max_inner, n), dtype=x.dtype) # Householder reflectors # For fGMRES, preconditioned vectors must be stored # No Horner-like scheme exists that allow us to avoid this Z = np.zeros((n, max_inner), dtype=x.dtype) W[0, :] = w # Multiply r with (I - 2*w*w.T), i.e. apply the Householder reflector # This is the RHS vector for the problem in the Krylov Space g = np.zeros((n, ), dtype=x.dtype) g[0] = -beta for inner in range(max_inner): # Calculate Krylov vector in two steps # (1) Calculate v = P_j = (I - 2*w*w.T)v, where k = inner v = -2.0 * np.conjugate(w[inner]) * w v[inner] += 1.0 # (2) Calculate the rest, v = P_1*P_2*P_3...P_{j-1}*ej. # for j in range(inner-1,-1,-1): # v = v - 2.0*dot(conjugate(W[j,:]), v)*W[j,:] amg_core.apply_householders(v, np.ravel(W), n, inner - 1, -1, -1) # Apply preconditioner v = M @ v # Check for nan, inf # if isnan(v).any() or isinf(v).any(): # warn('inf or nan after application of preconditioner') # return(postprocess(x), -1) Z[:, inner] = v # Calculate new search direction v = A @ v # Factor in all Householder orthogonal reflections on new search # direction # for j in range(inner+1): # v = v - 2.0*dot(conjugate(W[j,:]), v)*W[j,:] amg_core.apply_householders(v, np.ravel(W), n, 0, inner + 1, 1) # Calculate next Householder reflector, w # w = v[inner+1:] + sign(v[inner+1])*||v[inner+1:]||_2*e_{inner+1) # Note that if max_inner = n, then this is unnecessary for # the last inner iteration, when inner = n-1. Here we do # not need to calculate a Householder reflector or Givens # rotation because nnz(v) is already the desired length, # i.e. we do not need to zero anything out. if inner != n - 1: if inner < (max_inner - 1): w = W[inner + 1, :] vslice = v[inner + 1:] alpha = norm(vslice) if alpha != 0: alpha = _mysign(vslice[0]) * alpha # do not need the final reflector for future calculations if inner < (max_inner - 1): w[inner + 1:] = vslice w[inner + 1] += alpha w /= norm(w) # Apply new reflector to v # v = v - 2.0*w*(w.T*v) v[inner + 1] = -alpha v[inner + 2:] = 0.0 if inner > 0: # Apply all previous Givens Rotations to v amg_core.apply_givens(Q, v, n, inner) # Calculate the next Givens rotation, where j = inner Note that if # max_inner = n, then this is unnecessary for the last inner # iteration, when inner = n-1. Here we do not need to # calculate a Householder reflector or Givens rotation because # nnz(v) is already the desired length, i.e. we do not need to zero # anything out. if inner != n - 1: if v[inner + 1] != 0: [c, s, r] = lartg(v[inner], v[inner + 1]) Qblock = np.array([[c, s], [-np.conjugate(s), c]], dtype=x.dtype) Q[(inner * 4):((inner + 1) * 4)] = np.ravel(Qblock).copy() # Apply Givens Rotation to g, the RHS for the linear system # in the Krylov Subspace. Note that this dot does a matrix # multiply, not an actual dot product where a conjugate # transpose is taken g[inner:inner + 2] = np.dot(Qblock, g[inner:inner + 2]) # Apply effect of Givens Rotation to v v[inner] = np.dot(Qblock[0, :], v[inner:inner + 2]) v[inner + 1] = 0.0 # Write to upper Hessenberg Matrix, # the LHS for the linear system in the Krylov Subspace H[:, inner] = v[0:max_inner] # Don't update normr if last inner iteration, because # normr is calculated directly after this loop ends. if inner < max_inner - 1: normr = np.abs(g[inner + 1]) if normr < tol * normb: break if residuals is not None: residuals.append(normr) if callback is not None: y = sp.linalg.solve(H[0:(inner + 1), 0:(inner + 1)], g[0:(inner + 1)]) update = np.dot(Z[:, 0:inner + 1], y) callback(x + update) niter += 1 # end inner loop, back to outer loop # Find best update to x in Krylov Space, V. Solve inner+1 x inner+1 # system. Apparently this is the best way to solve a triangular system # in the magical world of scipy # piv = arange(inner+1) # y = lu_solve((H[0:(inner+1),0:(inner+1)], piv), # g[0:(inner+1)], trans=0) y = sp.linalg.solve(H[0:(inner + 1), 0:(inner + 1)], g[0:(inner + 1)]) # No Horner like scheme exists because the preconditioner can change # each iteration # Hence, we must store each preconditioned vector update = np.dot(Z[:, 0:inner + 1], y) x = x + update r = b - A @ x # Allow user access to the iterates if callback is not None: callback(x) normr = norm(r) if residuals is not None: residuals.append(normr) # Has fGMRES stagnated? indices = (x != 0) if indices.any(): change = np.max(np.abs(update[indices] / x[indices])) if change < 1e-12: # No change, halt return (postprocess(x), -1) # test for convergence if normr < tol * normb: return (postprocess(x), 0) # end outer loop return (postprocess(x), niter)
def gmres_householder(A, b, x0=None, tol=1e-5, restrt=None, maxiter=None, xtype=None, M=None, callback=None, residuals=None): ''' Generalized Minimum Residual Method (GMRES) GMRES iteratively refines the initial solution guess to the system Ax = b Householder reflections are used for orthogonalization Parameters ---------- A : {array, matrix, sparse matrix, LinearOperator} n x n, linear system to solve b : {array, matrix} right hand side, shape is (n,) or (n, 1) x0 : {array, matrix} initial guess, default is a vector of zeros tol : float relative convergence tolerance, i.e. tol is scaled by the norm of the initial preconditioned residual restrt : {None, int} - if int, restrt is max number of inner iterations and maxiter is the max number of outer iterations - if None, do not restart GMRES, and max number of inner iterations is maxiter maxiter : {None, int} - if restrt is None, maxiter is the max number of inner iterations and GMRES does not restart - if restrt is int, maxiter is the max number of outer iterations, and restrt is the max number of inner iterations xtype : type dtype for the solution, default is automatic type detection M : {array, matrix, sparse matrix, LinearOperator} n x n, inverted preconditioner, i.e. solve M A x = M b. callback : function User-supplied function is called after each iteration as callback( ||rk||_2 ), where rk is the current preconditioned residual vector residuals : list residuals contains the preconditioned residual norm history, including the initial residual. Returns ------- (xNew, info) xNew : an updated guess to the solution of Ax = b info : halting status of gmres == ============================================= 0 successful exit >0 convergence to tolerance not achieved, return iteration count instead. This value is precisely the order of the Krylov space. <0 numerical breakdown, or illegal input == ============================================= Notes ----- - The LinearOperator class is in scipy.sparse.linalg.interface. Use this class if you prefer to define A or M as a mat-vec routine as opposed to explicitly constructing the matrix. A.psolve(..) is still supported as a legacy. - For robustness, Householder reflections are used to orthonormalize the Krylov Space Givens Rotations are used to provide the residual norm each iteration Examples -------- >>> from pyamg.krylov import gmres >>> from pyamg.util.linalg import norm >>> import numpy as np >>> from pyamg.gallery import poisson >>> A = poisson((10, 10)) >>> b = np.ones((A.shape[0],)) >>> (x, flag) = gmres(A, b, maxiter=2, tol=1e-8, orthog='householder') >>> print norm(b - A*x) 6.5428213057 References ---------- .. [1] Yousef Saad, "Iterative Methods for Sparse Linear Systems, Second Edition", SIAM, pp. 151-172, pp. 272-275, 2003 http://www-users.cs.umn.edu/~saad/books.html ''' # Convert inputs to linear system, with error checking A, M, x, b, postprocess = make_system(A, M, x0, b, xtype) dimen = A.shape[0] # Ensure that warnings are always reissued from this function import warnings warnings.filterwarnings('always', module='pyamg\.krylov\._gmres_householder') # Choose type if not hasattr(A, 'dtype'): Atype = upcast(x.dtype, b.dtype) else: Atype = A.dtype if not hasattr(M, 'dtype'): Mtype = upcast(x.dtype, b.dtype) else: Mtype = M.dtype xtype = upcast(Atype, x.dtype, b.dtype, Mtype) if restrt is not None: restrt = int(restrt) if maxiter is not None: maxiter = int(maxiter) # Should norm(r) be kept if residuals == []: keep_r = True else: keep_r = False # Set number of outer and inner iterations if restrt: if maxiter: max_outer = maxiter else: max_outer = 1 if restrt > dimen: warn('Setting number of inner iterations (restrt) to maximum \ allowed, which is A.shape[0] ') restrt = dimen max_inner = restrt else: max_outer = 1 if maxiter > dimen: warn('Setting number of inner iterations (maxiter) to maximum \ allowed, which is A.shape[0] ') maxiter = dimen elif maxiter is None: maxiter = min(dimen, 40) max_inner = maxiter # Get fast access to underlying LAPACK routine [lartg] = get_lapack_funcs(['lartg'], [x]) # Is this a one dimensional matrix? if dimen == 1: entry = ravel(A * array([1.0], dtype=xtype)) return (postprocess(b / entry), 0) # Prep for method r = b - ravel(A * x) # Apply preconditioner r = ravel(M * r) normr = norm(r) if keep_r: residuals.append(normr) # Check for nan, inf # if isnan(r).any() or isinf(r).any(): # warn('inf or nan after application of preconditioner') # return(postprocess(x), -1) # Check initial guess ( scaling by b, if b != 0, # must account for case when norm(b) is very small) normb = norm(b) if normb == 0.0: normb = 1.0 if normr < tol * normb: if callback is not None: callback(norm(r)) return (postprocess(x), 0) # Scale tol by ||r_0||_2, we use the preconditioned residual # because this is left preconditioned GMRES. if normr != 0.0: tol = tol * normr # Use separate variable to track iterations. If convergence fails, we # cannot simply report niter = (outer-1)*max_outer + inner. Numerical # error could cause the inner loop to halt while the actual ||r|| > tol. niter = 0 # Begin GMRES for outer in range(max_outer): # Calculate vector w, which defines the Householder reflector # Take shortcut in calculating, # w = r + sign(r[1])*||r||_2*e_1 w = r beta = mysign(w[0]) * normr w[0] = w[0] + beta w[:] = w / norm(w) # Preallocate for Krylov vectors, Householder reflectors and # Hessenberg matrix # Space required is O(dimen*max_inner) # Givens Rotations Q = zeros((4 * max_inner, ), dtype=xtype) # upper Hessenberg matrix (made upper tri with Givens Rotations) H = zeros((max_inner, max_inner), dtype=xtype) # Householder reflectors W = zeros((max_inner + 1, dimen), dtype=xtype) W[0, :] = w # Multiply r with (I - 2*w*w.T), i.e. apply the Householder reflector # This is the RHS vector for the problem in the Krylov Space g = zeros((dimen, ), dtype=xtype) g[0] = -beta for inner in range(max_inner): # Calculate Krylov vector in two steps # (1) Calculate v = P_j = (I - 2*w*w.T)v, where k = inner v = -2.0 * conjugate(w[inner]) * w v[inner] = v[inner] + 1.0 # (2) Calculate the rest, v = P_1*P_2*P_3...P_{j-1}*ej. # for j in range(inner-1,-1,-1): # v -= 2.0*dot(conjugate(W[j,:]), v)*W[j,:] amg_core.apply_householders(v, ravel(W), dimen, inner - 1, -1, -1) # Calculate new search direction v = ravel(A * v) # Apply preconditioner v = ravel(M * v) # Check for nan, inf # if isnan(v).any() or isinf(v).any(): # warn('inf or nan after application of preconditioner') # return(postprocess(x), -1) # Factor in all Householder orthogonal reflections on new search # direction # for j in range(inner+1): # v -= 2.0*dot(conjugate(W[j,:]), v)*W[j,:] amg_core.apply_householders(v, ravel(W), dimen, 0, inner + 1, 1) # Calculate next Householder reflector, w # w = v[inner+1:] + sign(v[inner+1])*||v[inner+1:]||_2*e_{inner+1) # Note that if max_inner = dimen, then this is unnecessary for the # last inner iteration, when inner = dimen-1. Here we do not need # to calculate a Householder reflector or Givens rotation because # nnz(v) is already the desired length, i.e. we do not need to # zero anything out. if inner != dimen - 1: if inner < (max_inner - 1): w = W[inner + 1, :] vslice = v[inner + 1:] alpha = norm(vslice) if alpha != 0: alpha = mysign(vslice[0]) * alpha # do not need the final reflector for future calculations if inner < (max_inner - 1): w[inner + 1:] = vslice w[inner + 1] += alpha w[:] = w / norm(w) # Apply new reflector to v # v = v - 2.0*w*(w.T*v) v[inner + 1] = -alpha v[inner + 2:] = 0.0 if inner > 0: # Apply all previous Givens Rotations to v amg_core.apply_givens(Q, v, dimen, inner) # Calculate the next Givens rotation, where j = inner Note that if # max_inner = dimen, then this is unnecessary for the last inner # iteration, when inner = dimen-1. Here we do not need to # calculate a Householder reflector or Givens rotation because # nnz(v) is already the desired length, i.e. we do not need to zero # anything out. if inner != dimen - 1: if v[inner + 1] != 0: [c, s, r] = lartg(v[inner], v[inner + 1]) Qblock = array([[c, s], [-conjugate(s), c]], dtype=xtype) Q[(inner * 4):((inner + 1) * 4)] = ravel(Qblock).copy() # Apply Givens Rotation to g, the RHS for the linear system # in the Krylov Subspace. Note that this dot does a matrix # multiply, not an actual dot product where a conjugate # transpose is taken g[inner:inner + 2] = dot(Qblock, g[inner:inner + 2]) # Apply effect of Givens Rotation to v v[inner] = dot(Qblock[0, :], v[inner:inner + 2]) v[inner + 1] = 0.0 # Write to upper Hessenberg Matrix, # the LHS for the linear system in the Krylov Subspace H[:, inner] = v[0:max_inner] niter += 1 # Don't update normr if last inner iteration, because # normr is calculated directly after this loop ends. if inner < max_inner - 1: normr = abs(g[inner + 1]) if normr < tol: break # Allow user access to residual if callback is not None: callback(normr) if keep_r: residuals.append(normr) # end inner loop, back to outer loop # Find best update to x in Krylov Space, V. Solve inner+1 x inner+1 # system. Apparently this is the best way to solve a triangular system # in the magical world of scipy # piv = arange(inner+1) # y = lu_solve((H[0:(inner+1), 0:(inner+1)], piv), g[0:(inner+1)], # trans=0) y = sp.linalg.solve(H[0:(inner + 1), 0:(inner + 1)], g[0:(inner + 1)]) # Use Horner like Scheme to map solution, y, back to original space. # Note that we do not use the last reflector. update = zeros(x.shape, dtype=xtype) # for j in range(inner,-1,-1): # update[j] += y[j] # # Apply j-th reflector, (I - 2.0*w_j*w_j.T)*upadate # update -= 2.0*dot(conjugate(W[j,:]), update)*W[j,:] amg_core.householder_hornerscheme(update, ravel(W), ravel(y), dimen, inner, -1, -1) x[:] = x + update r = b - ravel(A * x) # Apply preconditioner r = ravel(M * r) normr = norm(r) # Check for nan, inf # if isnan(r).any() or isinf(r).any(): # warn('inf or nan after application of preconditioner') # return(postprocess(x), -1) # Allow user access to residual if callback is not None: callback(normr) if keep_r: residuals.append(normr) # Has GMRES stagnated? indices = (x != 0) if indices.any(): change = max(abs(update[indices] / x[indices])) if change < 1e-12: # No change, halt return (postprocess(x), -1) # test for convergence if normr < tol: return (postprocess(x), 0) # end outer loop return (postprocess(x), niter)
def _get_lapack_funcs(dtype, names): from scipy import linalg assert dtype in (np.float64, np.complex128) x = np.empty(0, dtype) return linalg.get_lapack_funcs(names, (x,))
def lgmres(A, b, x0=None, tol=1e-5, maxiter=1000, M=None, callback=None, inner_m=30, outer_k=3, outer_v=None, store_outer_Av=True): """ Solve a matrix equation using the LGMRES algorithm. The LGMRES algorithm [1]_ [2]_ is designed to avoid some problems in the convergence in restarted GMRES, and often converges in fewer iterations. Parameters ---------- A : {sparse matrix, dense matrix, LinearOperator} The real or complex N-by-N matrix of the linear system. b : {array, matrix} Right hand side of the linear system. Has shape (N,) or (N,1). x0 : {array, matrix} Starting guess for the solution. tol : float, optional Tolerance to achieve. The algorithm terminates when either the relative or the absolute residual is below `tol`. maxiter : int, optional Maximum number of iterations. Iteration will stop after maxiter steps even if the specified tolerance has not been achieved. M : {sparse matrix, dense matrix, LinearOperator}, optional Preconditioner for A. The preconditioner should approximate the inverse of A. Effective preconditioning dramatically improves the rate of convergence, which implies that fewer iterations are needed to reach a given error tolerance. callback : function, optional User-supplied function to call after each iteration. It is called as callback(xk), where xk is the current solution vector. inner_m : int, optional Number of inner GMRES iterations per each outer iteration. outer_k : int, optional Number of vectors to carry between inner GMRES iterations. According to [1]_, good values are in the range of 1...3. However, note that if you want to use the additional vectors to accelerate solving multiple similar problems, larger values may be beneficial. outer_v : list of tuples, optional List containing tuples ``(v, Av)`` of vectors and corresponding matrix-vector products, used to augment the Krylov subspace, and carried between inner GMRES iterations. The element ``Av`` can be `None` if the matrix-vector product should be re-evaluated. This parameter is modified in-place by `lgmres`, and can be used to pass "guess" vectors in and out of the algorithm when solving similar problems. store_outer_Av : bool, optional Whether LGMRES should store also A*v in addition to vectors `v` in the `outer_v` list. Default is True. Returns ------- x : array or matrix The converged solution. info : int Provides convergence information: - 0 : successful exit - >0 : convergence to tolerance not achieved, number of iterations - <0 : illegal input or breakdown Notes ----- The LGMRES algorithm [1]_ [2]_ is designed to avoid the slowing of convergence in restarted GMRES, due to alternating residual vectors. Typically, it often outperforms GMRES(m) of comparable memory requirements by some measure, or at least is not much worse. Another advantage in this algorithm is that you can supply it with 'guess' vectors in the `outer_v` argument that augment the Krylov subspace. If the solution lies close to the span of these vectors, the algorithm converges faster. This can be useful if several very similar matrices need to be inverted one after another, such as in Newton-Krylov iteration where the Jacobian matrix often changes little in the nonlinear steps. References ---------- .. [1] A.H. Baker and E.R. Jessup and T. Manteuffel, SIAM J. Matrix Anal. Appl. 26, 962 (2005). .. [2] A.H. Baker, PhD thesis, University of Colorado (2003). http://amath.colorado.edu/activities/thesis/allisonb/Thesis.ps """ A,M,x,b,postprocess = make_system(A,M,x0,b) if not np.isfinite(b).all(): raise ValueError("RHS must contain only finite numbers") matvec = A.matvec psolve = M.matvec if outer_v is None: outer_v = [] axpy, dot, scal = None, None, None nrm2 = get_blas_funcs('nrm2', [b]) b_norm = nrm2(b) if b_norm == 0: b_norm = 1 for k_outer in xrange(maxiter): r_outer = matvec(x) - b # -- callback if callback is not None: callback(x) # -- determine input type routines if axpy is None: if np.iscomplexobj(r_outer) and not np.iscomplexobj(x): x = x.astype(r_outer.dtype) axpy, dot, scal, nrm2 = get_blas_funcs(['axpy', 'dot', 'scal', 'nrm2'], (x, r_outer)) trtrs = get_lapack_funcs('trtrs', (x, r_outer)) # -- check stopping condition r_norm = nrm2(r_outer) if r_norm <= tol * b_norm or r_norm <= tol: break # -- inner LGMRES iteration vs0 = -psolve(r_outer) inner_res_0 = nrm2(vs0) if inner_res_0 == 0: rnorm = nrm2(r_outer) raise RuntimeError("Preconditioner returned a zero vector; " "|v| ~ %.1g, |M v| = 0" % rnorm) vs0 = scal(1.0/inner_res_0, vs0) vs = [vs0] ws = [] y = None # H is stored in QR factorized form Q = np.ones((1, 1), dtype=vs0.dtype) R = np.zeros((1, 0), dtype=vs0.dtype) eps = np.finfo(vs0.dtype).eps for j in xrange(1, 1 + inner_m + len(outer_v)): # -- Arnoldi process: # # Build an orthonormal basis V and matrices W and H such that # A W = V H # Columns of W, V, and H are stored in `ws`, `vs` and `hs`. # # The first column of V is always the residual vector, `vs0`; # V has *one more column* than the other of the three matrices. # # The other columns in V are built by feeding in, one # by one, some vectors `z` and orthonormalizing them # against the basis so far. The trick here is to # feed in first some augmentation vectors, before # starting to construct the Krylov basis on `v0`. # # It was shown in [BJM]_ that a good choice (the LGMRES choice) # for these augmentation vectors are the `dx` vectors obtained # from a couple of the previous restart cycles. # # Note especially that while `vs0` is always the first # column in V, there is no reason why it should also be # the first column in W. (In fact, below `vs0` comes in # W only after the augmentation vectors.) # # The rest of the algorithm then goes as in GMRES, one # solves a minimization problem in the smaller subspace # spanned by W (range) and V (image). # # ++ evaluate v_new = None if j < len(outer_v) + 1: z, v_new = outer_v[j-1] elif j == len(outer_v) + 1: z = vs0 else: z = vs[-1] if v_new is None: v_new = psolve(matvec(z)) else: # Note: v_new is modified in-place below. Must make a # copy to ensure that the outer_v vectors are not # clobbered. v_new = v_new.copy() # ++ orthogonalize hcur = np.zeros(j+1, dtype=Q.dtype) for i, v in enumerate(vs): alpha = dot(v, v_new) hcur[i] = alpha v_new = axpy(v, v_new, v.shape[0], -alpha) # v_new -= alpha*v hcur[-1] = nrm2(v_new) with np.errstate(over='ignore', divide='ignore'): # Careful with denormals alpha = 1/hcur[-1] if np.isfinite(alpha): v_new = scal(alpha, v_new) else: # v_new either zero (solution in span of previous # vectors) or we have nans. If we already have # previous vectors in R, we can discard the current # vector and bail out. if j > 1: j -= 1 break vs.append(v_new) ws.append(z) # -- GMRES optimization problem # Add new column to H=Q*R, padding other columns with zeros Q2 = np.zeros((j+1, j+1), dtype=Q.dtype, order='F') Q2[:j,:j] = Q Q2[j,j] = 1 R2 = np.zeros((j+1, j-1), dtype=R.dtype, order='F') R2[:j,:] = R Q, R = qr_insert(Q2, R2, hcur, j-1, which='col', overwrite_qru=True, check_finite=False) # Transformed least squares problem # || Q R y - inner_res_0 * e_1 ||_2 = min! # Since R = [R'; 0], solution is y = inner_res_0 (R')^{-1} (Q^H)[:j,0] # Residual is immediately known inner_res = abs(Q[0,-1]) * inner_res_0 # -- check for termination if inner_res <= tol * inner_res_0: break # -- Get the LSQ problem solution y, info = trtrs(R[:j,:j], Q[0,:j].conj()) if info != 0: # Zero diagonal -> exact solution, but we handled that above raise RuntimeError("QR solution failed") y *= inner_res_0 if not np.isfinite(y).all(): # Floating point over/underflow, non-finite result from # matmul etc. -- report failure. return postprocess(x), k_outer + 1 # -- GMRES terminated: eval solution dx = ws[0]*y[0] for w, yc in zip(ws[1:], y[1:]): dx = axpy(w, dx, dx.shape[0], yc) # dx += w*yc # -- Store LGMRES augmentation vectors nx = nrm2(dx) if nx > 0: if store_outer_Av: q = Q.dot(R.dot(y)) ax = vs[0]*q[0] for v, qc in zip(vs[1:], q[1:]): ax = axpy(v, ax, ax.shape[0], qc) outer_v.append((dx/nx, ax/nx)) else: outer_v.append((dx/nx, None)) # -- Retain only a finite number of augmentation vectors while len(outer_v) > outer_k: del outer_v[0] # -- Apply step x += dx else: # didn't converge ... return postprocess(x), maxiter return postprocess(x), 0
import numpy as np from scipy import linalg from pystan import StanModel from .cython_util import ( copy_triu_to_tril, auto_outer, ravel_triu, unravel_triu, fro_norm_squared ) # LAPACK positive definite inverse routine dpotri_routine = linalg.get_lapack_funcs('potri') # Precalculated constant _LOG_2PI = np.log(2*np.pi) def invert_normal_params(A, b=None, out_A=None, out_b=None, cho_form=False): """Invert moment parameters into natural parameters or vice versa. Switch between moment parameters (S,m) and natural parameters (Q,r) of a multivariate normal distribution. Providing (S,m) yields (Q,r) and vice versa. Parameters ---------- A : ndarray