def _lanczos_asis(a, V, u, alpha, beta, i_start, i_end): for i in range(i_start, i_end): u[...] = a @ V[i] cublas.dotc(V[i], u, out=alpha[i]) u -= u.T @ V[:i + 1].conj().T @ V[:i + 1] cublas.nrm2(u, out=beta[i]) if i >= i_end - 1: break V[i + 1] = u / beta[i]
def _update_asis(self, i_start, i_end): for i in range(i_start, i_end): u = self.A @ self.V[i] cublas.dotc(self.V[i], u, out=self.alpha[i]) u -= u.T @ self.V[:i + 1].conj().T @ self.V[:i + 1] cublas.nrm2(u, out=self.beta[i]) if i >= i_end - 1: break self.V[i + 1] = u / self.beta[i] return u
def test_nrm2(self): x = self._make_random_vector() ref = cupy.linalg.norm(x) out = self._make_out(self.dtype.char.lower()) res = cublas.nrm2(x, out=out) self._check_pointer(res, out) cupy.testing.assert_allclose(res, ref, rtol=self.tol, atol=self.tol)
def _lanczos_asis(a, V, u, alpha, beta, i_start, i_end): beta_eps = inversion_eps(a.dtype) for i in range(i_start, i_end): u[...] = a @ V[i] cublas.dotc(V[i], u, out=alpha[i]) u -= u.T @ V[:i + 1].conj().T @ V[:i + 1] cublas.nrm2(u, out=beta[i]) if i >= i_end - 1: break if beta[i] < beta_eps: V[i + 1:i_end, :] = 0 u[...] = 0 break if i == i_start: beta_eps *= beta[i] # scale eps to largest beta V[i + 1] = u / beta[i]
def eigsh(a, k=6, *, which='LM', ncv=None, maxiter=None, tol=0, return_eigenvectors=True): """Finds ``k`` eigenvalues and eigenvectors of the real symmetric matrix. Solves ``Ax = wx``, the standard eigenvalue problem for ``w`` eigenvalues with corresponding eigenvectors ``x``. Args: a (ndarray, spmatrix or LinearOperator): A symmetric square matrix with dimension ``(n, n)``. ``a`` must :class:`cupy.ndarray`, :class:`cupyx.scipy.sparse.spmatrix` or :class:`cupyx.scipy.sparse.linalg.LinearOperator`. k (int): The number of eigenvalues and eigenvectors to compute. Must be ``1 <= k < n``. which (str): 'LM' or 'LA'. 'LM': finds ``k`` largest (in magnitude) eigenvalues. 'LA': finds ``k`` largest (algebraic) eigenvalues. ncv (int): The number of Lanczos vectors generated. Must be ``k + 1 < ncv < n``. If ``None``, default value is used. maxiter (int): Maximum number of Lanczos update iterations. If ``None``, default value is used. tol (float): Tolerance for residuals ``||Ax - wx||``. If ``0``, machine precision is used. return_eigenvectors (bool): If ``True``, returns eigenvectors in addition to eigenvalues. Returns: tuple: If ``return_eigenvectors is True``, it returns ``w`` and ``x`` where ``w`` is eigenvalues and ``x`` is eigenvectors. Otherwise, it returns only ``w``. .. seealso:: :func:`scipy.sparse.linalg.eigsh` .. note:: This function uses the thick-restart Lanczos methods (https://sdm.lbl.gov/~kewu/ps/trlan.html). """ n = a.shape[0] if a.ndim != 2 or a.shape[0] != a.shape[1]: raise ValueError('expected square matrix (shape: {})'.format(a.shape)) if a.dtype.char not in 'fdFD': raise TypeError('unsupprted dtype (actual: {})'.format(a.dtype)) if k <= 0: raise ValueError('k must be greater than 0 (actual: {})'.format(k)) if k >= n: raise ValueError('k must be smaller than n (actual: {})'.format(k)) if which not in ('LM', 'LA'): raise ValueError('which must be \'LM\' or \'LA\' (actual: {})' ''.format(which)) if ncv is None: ncv = min(max(2 * k, k + 32), n - 1) else: ncv = min(max(ncv, k + 2), n - 1) if maxiter is None: maxiter = 10 * n if tol == 0: tol = numpy.finfo(a.dtype).eps alpha = cupy.zeros((ncv, ), dtype=a.dtype) beta = cupy.zeros((ncv, ), dtype=a.dtype.char.lower()) V = cupy.empty((ncv, n), dtype=a.dtype) # Set initial vector u = cupy.random.random((n, )).astype(a.dtype) V[0] = u / cublas.nrm2(u) # Choose Lanczos implementation, unconditionally use 'fast' for now upadte_impl = 'fast' if upadte_impl == 'fast': lanczos = _lanczos_fast(a, n, ncv) else: lanczos = _lanczos_asis # Lanczos iteration lanczos(a, V, u, alpha, beta, 0, ncv) iter = ncv w, s = _eigsh_solve_ritz(alpha, beta, None, k, which) x = V.T @ s # Compute residual beta_k = beta[-1] * s[-1, :] res = cublas.nrm2(beta_k) while res > tol and iter < maxiter: # Setup for thick-restart beta[:k] = 0 alpha[:k] = w V[:k] = x.T u -= u.T @ V[:k].conj().T @ V[:k] V[k] = u / cublas.nrm2(u) u[...] = a @ V[k] cublas.dotc(V[k], u, out=alpha[k]) u -= alpha[k] * V[k] u -= V[:k].T @ beta_k cublas.nrm2(u, out=beta[k]) V[k + 1] = u / beta[k] # Lanczos iteration lanczos(a, V, u, alpha, beta, k + 1, ncv) iter += ncv - k w, s = _eigsh_solve_ritz(alpha, beta, beta_k, k, which) x = V.T @ s # Compute residual beta_k = beta[-1] * s[-1, :] res = cublas.nrm2(beta_k) if return_eigenvectors: idx = cupy.argsort(w) return w[idx], x[:, idx] else: return cupy.sort(w)
def lsmr(A, b, x0=None, damp=0.0, atol=1e-6, btol=1e-6, conlim=1e8, maxiter=None): """Iterative solver for least-squares problems. lsmr solves the system of linear equations ``Ax = b``. If the system is inconsistent, it solves the least-squares problem ``min ||b - Ax||_2``. A is a rectangular matrix of dimension m-by-n, where all cases are allowed: m = n, m > n, or m < n. B is a vector of length m. The matrix A may be dense or sparse (usually sparse). Args: A (ndarray, spmatrix or LinearOperator): The real or complex matrix of the linear system. ``A`` must be :class:`cupy.ndarray`, :class:`cupyx.scipy.sparse.spmatrix` or :class:`cupyx.scipy.sparse.linalg.LinearOperator`. b (cupy.ndarray): Right hand side of the linear system with shape ``(m,)`` or ``(m, 1)``. x0 (cupy.ndarray): Starting guess for the solution. If None zeros are used. damp (float): Damping factor for regularized least-squares. `lsmr` solves the regularized least-squares problem :: min ||(b) - ( A )x|| ||(0) (damp*I) ||_2 where damp is a scalar. If damp is None or 0, the system is solved without regularization. atol, btol (float): Stopping tolerances. `lsmr` continues iterations until a certain backward error estimate is smaller than some quantity depending on atol and btol. conlim (float): `lsmr` terminates if an estimate of ``cond(A)`` i.e. condition number of matrix exceeds `conlim`. If `conlim` is None, the default value is 1e+8. maxiter (int): Maximum number of iterations. Returns: tuple: - `x` (ndarray): Least-square solution returned. - `istop` (int): istop gives the reason for stopping:: 0 means x=0 is a solution. 1 means x is an approximate solution to A*x = B, according to atol and btol. 2 means x approximately solves the least-squares problem according to atol. 3 means COND(A) seems to be greater than CONLIM. 4 is the same as 1 with atol = btol = eps (machine precision) 5 is the same as 2 with atol = eps. 6 is the same as 3 with CONLIM = 1/eps. 7 means ITN reached maxiter before the other stopping conditions were satisfied. - `itn` (int): Number of iterations used. - `normr` (float): ``norm(b-Ax)`` - `normar` (float): ``norm(A^T (b - Ax))`` - `norma` (float): ``norm(A)`` - `conda` (float): Condition number of A. - `normx` (float): ``norm(x)`` .. seealso:: :func:`scipy.sparse.linalg.lsmr` References: D. C.-L. Fong and M. A. Saunders, "LSMR: An iterative algorithm for sparse least-squares problems", SIAM J. Sci. Comput., vol. 33, pp. 2950-2971, 2011. """ A = _interface.aslinearoperator(A) b = b.squeeze() matvec = A.matvec rmatvec = A.rmatvec m, n = A.shape minDim = min([m, n]) if maxiter is None: maxiter = minDim * 5 u = b.copy() normb = cublas.nrm2(b) beta = normb.copy() normb = normb.get().item() if x0 is None: x = cupy.zeros((n,), dtype=A.dtype) else: if not (x0.shape == (n,) or x0.shape == (n, 1)): raise ValueError('x0 has incompatible dimensions') x = x0.astype(A.dtype).ravel() u -= matvec(x) beta = cublas.nrm2(u) beta_cpu = beta.get().item() v = cupy.zeros(n) alpha = cupy.zeros((), dtype=beta.dtype) alpha_cpu = 0 if beta_cpu > 0: u /= beta v = rmatvec(u) alpha = cublas.nrm2(v) alpha_cpu = alpha.get().item() if alpha_cpu > 0: v /= alpha # Initialize variables for 1st iteration. itn = 0 zetabar = alpha_cpu * beta_cpu alphabar = alpha_cpu rho = 1 rhobar = 1 cbar = 1 sbar = 0 h = v.copy() hbar = cupy.zeros(n) # x = cupy.zeros(n) # Initialize variables for estimation of ||r||. betadd = beta_cpu betad = 0 rhodold = 1 tautildeold = 0 thetatilde = 0 zeta = 0 d = 0 # Initialize variables for estimation of ||A|| and cond(A) normA2 = alpha_cpu * alpha_cpu maxrbar = 0 minrbar = 1e+100 normA = alpha_cpu condA = 1 normx = 0 # Items for use in stopping rules. istop = 0 ctol = 0 if conlim > 0: ctol = 1 / conlim normr = beta_cpu # Golub-Kahan process terminates when either alpha or beta is zero. # Reverse the order here from the original matlab code because # there was an error on return when arnorm==0 normar = alpha_cpu * beta_cpu if normar == 0: return x, istop, itn, normr, normar, normA, condA, normx # Main iteration loop. while itn < maxiter: itn = itn + 1 # Perform the next step of the bidiagonalization to obtain the # next beta, u, alpha, v. These satisfy the relations # beta*u = a*v - alpha*u, # alpha*v = A'*u - beta*v. u *= -alpha u += matvec(v) beta = cublas.nrm2(u) # norm(u) beta_cpu = beta.get().item() if beta_cpu > 0: u /= beta v *= -beta v += rmatvec(u) alpha = cublas.nrm2(v) # norm(v) alpha_cpu = alpha.get().item() if alpha_cpu > 0: v /= alpha # At this point, beta = beta_{k+1}, alpha = alpha_{k+1}. # Construct rotation Qhat_{k,2k+1}. chat, shat, alphahat = _symOrtho(alphabar, damp) # Use a plane rotation (Q_i) to turn B_i to R_i rhoold = rho c, s, rho = _symOrtho(alphahat, beta_cpu) thetanew = s * alpha_cpu alphabar = c * alpha_cpu # Use a plane rotation (Qbar_i) to turn R_i^T to R_i^bar rhobarold = rhobar zetaold = zeta thetabar = sbar * rho rhotemp = cbar * rho cbar, sbar, rhobar = _symOrtho(cbar * rho, thetanew) zeta = cbar * zetabar zetabar = - sbar * zetabar # Update h, h_hat, x. # hbar = h - (thetabar * rho / (rhoold * rhobarold)) * hbar hbar *= -(thetabar * rho / (rhoold * rhobarold)) hbar += h x += (zeta / (rho * rhobar)) * hbar # h = v - (thetanew / rho) * h h *= -(thetanew / rho) h += v # Estimate of ||r||. # Apply rotation Qhat_{k,2k+1}. betaacute = chat * betadd betacheck = -shat * betadd # Apply rotation Q_{k,k+1}. betahat = c * betaacute betadd = -s * betaacute # Apply rotation Qtilde_{k-1}. # betad = betad_{k-1} here. thetatildeold = thetatilde ctildeold, stildeold, rhotildeold = _symOrtho(rhodold, thetabar) thetatilde = stildeold * rhobar rhodold = ctildeold * rhobar betad = - stildeold * betad + ctildeold * betahat # betad = betad_k here. # rhodold = rhod_k here. tautildeold = (zetaold - thetatildeold * tautildeold) / rhotildeold taud = (zeta - thetatilde * tautildeold) / rhodold d = d + betacheck * betacheck normr = numpy.sqrt(d + (betad - taud)**2 + betadd * betadd) # Estimate ||A||. normA2 = normA2 + beta_cpu * beta_cpu normA = numpy.sqrt(normA2) normA2 = normA2 + alpha_cpu * alpha_cpu # Estimate cond(A). maxrbar = max(maxrbar, rhobarold) if itn > 1: minrbar = min(minrbar, rhobarold) condA = max(maxrbar, rhotemp) / min(minrbar, rhotemp) # Test for convergence. # Compute norms for convergence testing. normar = abs(zetabar) normx = cublas.nrm2(x) normx = normx.get().item() # Now use these norms to estimate certain other quantities, # some of which will be small near a solution. test1 = normr / normb if (normA * normr) != 0: test2 = normar / (normA * normr) else: test2 = numpy.infty test3 = 1 / condA t1 = test1 / (1 + normA*normx/normb) rtol = btol + atol*normA*normx/normb # The following tests guard against extremely small values of # atol, btol or ctol. (The user may have set any or all of # the parameters atol, btol, conlim to 0.) # The effect is equivalent to the normAl tests using # atol = eps, btol = eps, conlim = 1/eps. if itn >= maxiter: istop = 7 if 1 + test3 <= 1: istop = 6 if 1 + test2 <= 1: istop = 5 if 1 + t1 <= 1: istop = 4 # Allow for tolerances set by the user. if test3 <= ctol: istop = 3 if test2 <= atol: istop = 2 if test1 <= rtol: istop = 1 if istop > 0: break # The return type of SciPy is always float64. Therefore, x must be casted. x = x.astype(numpy.float64) return x, istop, itn, normr, normar, normA, condA, normx
def gmres(A, b, x0=None, tol=1e-5, restart=None, maxiter=None, M=None, callback=None, atol=None, callback_type=None): """Uses Generalized Minimal RESidual iteration to solve ``Ax = b``. Args: A (ndarray, spmatrix or LinearOperator): The real or complex matrix of the linear system with shape ``(n, n)``. ``A`` must be :class:`cupy.ndarray`, :class:`cupyx.scipy.sparse.spmatrix` or :class:`cupyx.scipy.sparse.linalg.LinearOperator`. b (cupy.ndarray): Right hand side of the linear system with shape ``(n,)`` or ``(n, 1)``. x0 (cupy.ndarray): Starting guess for the solution. tol (float): Tolerance for convergence. restart (int): Number of iterations between restarts. Larger values increase iteration cost, but may be necessary for convergence. maxiter (int): Maximum number of iterations. M (ndarray, spmatrix or LinearOperator): Preconditioner for ``A``. The preconditioner should approximate the inverse of ``A``. ``M`` must be :class:`cupy.ndarray`, :class:`cupyx.scipy.sparse.spmatrix` or :class:`cupyx.scipy.sparse.linalg.LinearOperator`. callback (function): User-specified function to call on every restart. It is called as ``callback(arg)``, where ``arg`` is selected by ``callback_type``. callback_type (str): 'x' or 'pr_norm'. If 'x', the current solution vector is used as an argument of callback function. if 'pr_norm', relative (preconditioned) residual norm is used as an arugment. atol (float): Tolerance for convergence. Returns: tuple: It returns ``x`` (cupy.ndarray) and ``info`` (int) where ``x`` is the converged solution and ``info`` provides convergence information. Reference: M. Wang, H. Klie, M. Parashar and H. Sudan, "Solving Sparse Linear Systems on NVIDIA Tesla GPUs", ICCS 2009 (2009). .. seealso:: :func:`scipy.sparse.linalg.gmres` """ A, M, x, b = _make_system(A, M, x0, b) matvec = A.matvec psolve = M.matvec n = A.shape[0] if n == 0: return cupy.empty_like(b), 0 b_norm = cupy.linalg.norm(b) if b_norm == 0: return b, 0 if atol is None: atol = tol * float(b_norm) else: atol = max(float(atol), tol * float(b_norm)) if maxiter is None: maxiter = n * 10 if restart is None: restart = 20 restart = min(restart, n) if callback_type is None: callback_type = 'pr_norm' if callback_type not in ('x', 'pr_norm'): raise ValueError('Unknown callback_type: {}'.format(callback_type)) if callback is None: callback_type = None V = cupy.empty((n, restart), dtype=A.dtype, order='F') H = cupy.zeros((restart + 1, restart), dtype=A.dtype, order='F') e = numpy.zeros((restart + 1, ), dtype=A.dtype) compute_hu = _make_compute_hu(V) iters = 0 while True: mx = psolve(x) r = b - matvec(mx) r_norm = cublas.nrm2(r) if callback_type == 'x': callback(mx) elif callback_type == 'pr_norm' and iters > 0: callback(r_norm / b_norm) if r_norm <= atol or iters >= maxiter: break v = r / r_norm V[:, 0] = v e[0] = r_norm # Arnoldi iteration for j in range(restart): z = psolve(v) u = matvec(z) H[:j + 1, j], u = compute_hu(u, j) cublas.nrm2(u, out=H[j + 1, j]) if j + 1 < restart: v = u / H[j + 1, j] V[:, j + 1] = v # Note: The least-square solution to equation Hy = e is computed on CPU # because it is faster if tha matrix size is small. ret = numpy.linalg.lstsq(cupy.asnumpy(H), e) y = cupy.array(ret[0]) x += V @ y iters += restart info = 0 if iters == maxiter and not (r_norm <= atol): info = iters return mx, info
def cg(A, b, x0=None, tol=1e-5, maxiter=None, M=None, callback=None, atol=None): """Uses Conjugate Gradient iteration to solve ``Ax = b``. Args: A (ndarray, spmatrix or LinearOperator): The real or complex matrix of the linear system with shape ``(n, n)``. ``A`` must be a hermitian, positive definitive matrix with type of :class:`cupy.ndarray`, :class:`cupyx.scipy.sparse.spmatrix` or :class:`cupyx.scipy.sparse.linalg.LinearOperator`. b (cupy.ndarray): Right hand side of the linear system with shape ``(n,)`` or ``(n, 1)``. x0 (cupy.ndarray): Starting guess for the solution. tol (float): Tolerance for convergence. maxiter (int): Maximum number of iterations. M (ndarray, spmatrix or LinearOperator): Preconditioner for ``A``. The preconditioner should approximate the inverse of ``A``. ``M`` must be :class:`cupy.ndarray`, :class:`cupyx.scipy.sparse.spmatrix` or :class:`cupyx.scipy.sparse.linalg.LinearOperator`. callback (function): User-specified function to call after each iteration. It is called as ``callback(xk)``, where ``xk`` is the current solution vector. atol (float): Tolerance for convergence. Returns: tuple: It returns ``x`` (cupy.ndarray) and ``info`` (int) where ``x`` is the converged solution and ``info`` provides convergence information. .. seealso:: :func:`scipy.sparse.linalg.cg` """ A, M, x, b = _make_system(A, M, x0, b) matvec = A.matvec psolve = M.matvec n = A.shape[0] if maxiter is None: maxiter = n * 10 if n == 0: return cupy.empty_like(b), 0 b_norm = cupy.linalg.norm(b) if b_norm == 0: return b, 0 if atol is None: atol = tol * float(b_norm) else: atol = max(float(atol), tol * float(b_norm)) r = b - matvec(x) iters = 0 rho = 0 while iters < maxiter: z = psolve(r) rho1 = rho rho = cublas.dotc(r, z) if iters == 0: p = z else: beta = rho / rho1 p = z + beta * p q = matvec(p) alpha = rho / cublas.dotc(p, q) x = x + alpha * p r = r - alpha * q iters += 1 if callback is not None: callback(x) resid = cublas.nrm2(r) if resid <= atol: break info = 0 if iters == maxiter and not (resid <= atol): info = iters return x, info
def cg(A, b, x0=None, tol=1e-5, maxiter=None, M=None, callback=None, atol=None): """Uses Conjugate Gradient iteration to solve ``Ax = b``. Args: A (cupy.ndarray or cupyx.scipy.sparse.spmatrix): The real or complex matrix of the linear system with shape ``(n, n)``. ``A`` must be a hermitian, positive definitive matrix. b (cupy.ndarray): Right hand side of the linear system with shape ``(n,)`` or ``(n, 1)``. x0 (cupy.ndarray): Starting guess for the solution. tol (float): Tolerance for convergence. maxiter (int): Maximum number of iterations. M (cupy.ndarray or cupyx.scipy.sparse.spmatrix): Preconditioner for ``A``. The preconditioner should approximate the inverse of ``A``. callback (function): User-specified function to call after each iteration. It is called as ``callback(xk)``, where ``xk`` is the current solution vector. atol (float): Tolerance for convergence. Returns: tuple: It returns ``x`` (cupy.ndarray) and ``info`` (int) where ``x`` is the converged solution and ``info`` provides convergence information. .. seealso:: :func:`scipy.sparse.linalg.cg` """ if A.ndim != 2 or A.shape[0] != A.shape[1]: raise ValueError('expected square matrix (shape: {})'.format(A.shape)) if A.dtype.char not in 'fdFD': raise TypeError('unsupprted dtype (actual: {})'.format(A.dtype)) n = A.shape[0] if not (b.shape == (n, ) or b.shape == (n, 1)): raise ValueError('b has incompatible dimensins') b = b.astype(A.dtype).ravel() if n == 0: return cupy.empty_like(b), 0 b_norm = cupy.linalg.norm(b) if b_norm == 0: return b, 0 if atol is None: atol = tol * float(b_norm) else: atol = max(float(atol), tol * float(b_norm)) if x0 is None: x = cupy.zeros((n, ), dtype=A.dtype) else: if not (x0.shape == (n, ) or x0.shape == (n, 1)): raise ValueError('x0 has incompatible dimensins') x = x0.astype(A.dtype).ravel() if maxiter is None: maxiter = n * 10 matvec, psolve = _make_funcs(A, M) r = b - matvec(x) iters = 0 rho = 0 while iters < maxiter: z = psolve(r) rho1 = rho rho = cublas.dotc(r, z) if iters == 0: p = z else: beta = rho / rho1 p = z + beta * p q = matvec(p) alpha = rho / cublas.dotc(p, q) x = x + alpha * p r = r - alpha * q iters += 1 if callback is not None: callback(x) resid = cublas.nrm2(r) if resid <= atol: break info = 0 if iters == maxiter and not (resid <= atol): info = iters return x, info