def test_spmv_hpmv(self): seed(1234) for ind, dtype in enumerate(DTYPES+COMPLEX_DTYPES): n = 3 A = rand(n, n).astype(dtype) if ind > 1: A += rand(n, n)*1j A = A.astype(dtype) A = A + A.T if ind < 4 else A + A.conj().T c, r = tril_indices(n) Ap = A[r, c] x = rand(n).astype(dtype) y = rand(n).astype(dtype) xlong = arange(2*n).astype(dtype) ylong = ones(2*n).astype(dtype) alpha, beta = dtype(1.25), dtype(2) if ind > 3: func, = get_blas_funcs(('hpmv',), dtype=dtype) else: func, = get_blas_funcs(('spmv',), dtype=dtype) y1 = func(n=n, alpha=alpha, ap=Ap, x=x, y=y, beta=beta) y2 = alpha * A.dot(x) + beta * y assert_array_almost_equal(y1, y2) # Test inc and offsets y1 = func(n=n-1, alpha=alpha, beta=beta, x=xlong, y=ylong, ap=Ap, incx=2, incy=2, offx=n, offy=n) y2 = (alpha * A[:-1, :-1]).dot(xlong[3::2]) + beta * ylong[3::2] assert_array_almost_equal(y1[3::2], y2) assert_almost_equal(y1[4], ylong[4])
def test_sbmv_hbmv(self): seed(1234) for ind, dtype in enumerate(DTYPES): n = 6 k = 2 A = zeros((n, n), dtype=dtype) Ab = zeros((k+1, n), dtype=dtype) # Form the array and its packed banded storage A[arange(n), arange(n)] = rand(n) for ind2 in range(1, k+1): temp = rand(n-ind2) A[arange(n-ind2), arange(ind2, n)] = temp Ab[-1-ind2, ind2:] = temp A = A.astype(dtype) A = A + A.T if ind < 2 else A + A.conj().T Ab[-1, :] = diag(A) x = rand(n).astype(dtype) y = rand(n).astype(dtype) alpha, beta = dtype(1.25), dtype(3) if ind > 1: func, = get_blas_funcs(('hbmv',), dtype=dtype) else: func, = get_blas_funcs(('sbmv',), dtype=dtype) y1 = func(k=k, alpha=alpha, a=Ab, x=x, y=y, beta=beta) y2 = alpha * A.dot(x) + beta * y assert_array_almost_equal(y1, y2)
def test_spr_hpr(self): seed(1234) for ind, dtype in enumerate(DTYPES+COMPLEX_DTYPES): n = 3 A = rand(n, n).astype(dtype) if ind > 1: A += rand(n, n)*1j A = A.astype(dtype) A = A + A.T if ind < 4 else A + A.conj().T c, r = tril_indices(n) Ap = A[r, c] x = rand(n).astype(dtype) alpha = (DTYPES+COMPLEX_DTYPES)[mod(ind, 4)](2.5) if ind > 3: func, = get_blas_funcs(('hpr',), dtype=dtype) y2 = alpha * x[:, None].dot(x[None, :].conj()) + A else: func, = get_blas_funcs(('spr',), dtype=dtype) y2 = alpha * x[:, None].dot(x[None, :]) + A y1 = func(n=n, alpha=alpha, ap=Ap, x=x) y1f = zeros((3, 3), dtype=dtype) y1f[r, c] = y1 y1f[c, r] = y1.conj() if ind > 3 else y1 assert_array_almost_equal(y1f, y2)
def test_spr2_hpr2(self): seed(1234) for ind, dtype in enumerate(DTYPES): n = 3 A = rand(n, n).astype(dtype) if ind > 1: A += rand(n, n)*1j A = A.astype(dtype) A = A + A.T if ind < 2 else A + A.conj().T c, r = tril_indices(n) Ap = A[r, c] x = rand(n).astype(dtype) y = rand(n).astype(dtype) alpha = dtype(2) if ind > 1: func, = get_blas_funcs(('hpr2',), dtype=dtype) else: func, = get_blas_funcs(('spr2',), dtype=dtype) u = alpha.conj() * x[:, None].dot(y[None, :].conj()) y2 = A + u + u.conj().T y1 = func(n=n, alpha=alpha, x=x, y=y, ap=Ap) y1f = zeros((3, 3), dtype=dtype) y1f[r, c] = y1 y1f[[1, 2, 2], [0, 0, 1]] = y1[[1, 3, 4]].conj() assert_array_almost_equal(y1f, y2)
def _have_blas_gemm(): try: linalg.get_blas_funcs(["gemm"]) return True except (AttributeError, ValueError): warnings.warn("Could not import BLAS, falling back to np.dot") return False
def test_get_blas_funcs(): # check that it returns Fortran code for arrays that are # fortran-ordered f1, f2, f3 = get_blas_funcs( ('axpy', 'axpy', 'axpy'), (np.empty((2,2), dtype=np.complex64, order='F'), np.empty((2,2), dtype=np.complex128, order='C')) ) # get_blas_funcs will choose libraries depending on most generic # array assert_equal(f1.typecode, 'z') assert_equal(f1.module_name, 'cblas') assert_equal(f2.typecode, 'z') assert_equal(f2.module_name, 'cblas') # check defaults. f1 = get_blas_funcs('rotg') assert_equal(f1.typecode, 'd') # check also dtype interface f1 = get_blas_funcs('gemm', dtype=np.complex64) assert_equal(f1.typecode, 'c') f1 = get_blas_funcs('gemm', dtype='F') assert_equal(f1.typecode, 'c')
def test_get_blas_funcs_alias(): # check alias for get_blas_funcs f, g = get_blas_funcs(('nrm2', 'dot'), dtype=np.complex64) assert f.typecode == 'c' assert g.typecode == 'c' f, g, h = get_blas_funcs(('dot', 'dotc', 'dotu'), dtype=np.float64) assert f is g assert f is h
def test_get_blas_funcs_alias(): # check alias for get_blas_funcs f, g = get_blas_funcs(("nrm2", "dot"), dtype=np.complex64) assert f.typecode == "c" assert g.typecode == "c" f, g, h = get_blas_funcs(("dot", "dotc", "dotu"), dtype=np.float64) assert f is g assert f is h
def py_rect_maxvol(A, tol = 1., maxK = None, min_add_K = None, minK = None, start_maxvol_iters = 10, identity_submatrix = True): """Python implementation of rectangular 2-volume maximization. For information see :py:func:`rect_maxvol` function""" # tol2 - square of parameter tol tol2 = tol**2 # N - number of rows, r - number of columns of matrix A N, r = A.shape if N <= r: return np.arange(N, dtype = np.int32), np.eye(N, dtype = A.dtype) if maxK is None or maxK > N: maxK = N if maxK < r: maxK = r if minK is None or minK < r: minK = r if minK > N: minK = N if min_add_K is not None: minK = max(minK, r + min_add_K) if minK > maxK: minK = maxK #raise ValueError('minK value cannot be greater than maxK value') index = np.zeros(N, dtype = np.int32) chosen = np.ones(N) tmp_index, C = py_maxvol(A, tol = 1, max_iters = start_maxvol_iters) index[:r] = tmp_index chosen[tmp_index] = 0 C = np.asfortranarray(C) # compute square 2-norms of each row in matrix C row_norm_sqr = np.array([chosen[i]*np.linalg.norm(C[i], 2)**2 for i in xrange(N)]) # find maximum value in row_norm_sqr i = np.argmax(row_norm_sqr) K = r # set cgeru or zgeru for complex numbers and dger or sger for float numbers try: ger = get_blas_funcs('geru', [C]) except: ger = get_blas_funcs('ger', [C]) while (row_norm_sqr[i] > tol2 and K < maxK) or K < minK: # add i to index and recompute C and square norms of each row by SVM-formula index[K] = i chosen[i] = 0 c = C[i].copy() v = C.dot(c.conj()) l = 1.0/(1+v[i]) ger(-l,v,c,a=C,overwrite_a=1) C = np.hstack([C, l*v.reshape(-1,1)]) row_norm_sqr -= (l*v*v.conj()).real row_norm_sqr *= chosen # find maximum value in row_norm_sqr i = row_norm_sqr.argmax() K += 1 if identity_submatrix: C[index[:K]] = np.eye(K, dtype = C.dtype) return index[:K].copy(), C
def test_fast_dot(): """Check fast dot blas wrapper function""" rng = np.random.RandomState(42) A = rng.random_sample([2, 10]) B = rng.random_sample([2, 10]) try: linalg.get_blas_funcs('gemm') has_blas = True except AttributeError, ValueError: has_blas = False
def test_inplace_swap_column(): X = np.array([[0, 3, 0], [2, 4, 0], [0, 0, 0], [9, 8, 7], [4, 0, 5]], dtype=np.float64) X_csr = sp.csr_matrix(X) X_csc = sp.csc_matrix(X) swap = linalg.get_blas_funcs(('swap',), (X,)) swap = swap[0] X[:, 0], X[:, -1] = swap(X[:, 0], X[:, -1]) inplace_swap_column(X_csr, 0, -1) inplace_swap_column(X_csc, 0, -1) assert_array_equal(X_csr.toarray(), X_csc.toarray()) assert_array_equal(X, X_csc.toarray()) assert_array_equal(X, X_csr.toarray()) X[:, 0], X[:, 1] = swap(X[:, 0], X[:, 1]) inplace_swap_column(X_csr, 0, 1) inplace_swap_column(X_csc, 0, 1) assert_array_equal(X_csr.toarray(), X_csc.toarray()) assert_array_equal(X, X_csc.toarray()) assert_array_equal(X, X_csr.toarray()) assert_raises(TypeError, inplace_swap_column, X_csr.tolil()) X = np.array([[0, 3, 0], [2, 4, 0], [0, 0, 0], [9, 8, 7], [4, 0, 5]], dtype=np.float32) X_csr = sp.csr_matrix(X) X_csc = sp.csc_matrix(X) swap = linalg.get_blas_funcs(('swap',), (X,)) swap = swap[0] X[:, 0], X[:, -1] = swap(X[:, 0], X[:, -1]) inplace_swap_column(X_csr, 0, -1) inplace_swap_column(X_csc, 0, -1) assert_array_equal(X_csr.toarray(), X_csc.toarray()) assert_array_equal(X, X_csc.toarray()) assert_array_equal(X, X_csr.toarray()) X[:, 0], X[:, 1] = swap(X[:, 0], X[:, 1]) inplace_swap_column(X_csr, 0, 1) inplace_swap_column(X_csc, 0, 1) assert_array_equal(X_csr.toarray(), X_csc.toarray()) assert_array_equal(X, X_csc.toarray()) assert_array_equal(X, X_csr.toarray()) assert_raises(TypeError, inplace_swap_column, X_csr.tolil())
def test_gbmv(self): seed(1234) for ind, dtype in enumerate(DTYPES): n = 7 m = 5 kl = 1 ku = 2 # fake a banded matrix via toeplitz A = toeplitz(append(rand(kl+1), zeros(m-kl-1)), append(rand(ku+1), zeros(n-ku-1))) A = A.astype(dtype) Ab = zeros((kl+ku+1, n), dtype=dtype) # Form the banded storage Ab[2, :5] = A[0, 0] # diag Ab[1, 1:6] = A[0, 1] # sup1 Ab[0, 2:7] = A[0, 2] # sup2 Ab[3, :4] = A[1, 0] # sub1 x = rand(n).astype(dtype) y = rand(m).astype(dtype) alpha, beta = dtype(3), dtype(-5) func, = get_blas_funcs(('gbmv',), dtype=dtype) y1 = func(m=m, n=n, ku=ku, kl=kl, alpha=alpha, a=Ab, x=x, y=y, beta=beta) y2 = alpha * A.dot(x) + beta * y assert_array_almost_equal(y1, y2)
def _update_dict_slow(self, subset, D_range): """Update dictionary from statistic Parameters ---------- subset: ndarray (len_subset), Mask used on X """ D_subset = self.D_[:, subset] if self.projection == "full": norm = enet_norm(self.D_, self.l1_ratio) else: norm = enet_norm(D_subset, self.l1_ratio) R = self.B_[:, subset] - np.dot(D_subset.T, self.A_).T ger, = linalg.get_blas_funcs(("ger",), (self.A_, D_subset)) for k in D_range: ger(1.0, self.A_[k], D_subset[k], a=R, overwrite_a=True) # R += np.dot(stat.A[:, j].reshape(n_components, 1), D_subset[k] = R[k] / (self.A_[k, k]) if self.projection == "full": self.D_[k][subset] = D_subset[k] self.D_[k] = enet_projection(self.D_[k], norm[k], self.l1_ratio) D_subset[k] = self.D_[k][subset] else: D_subset[k] = enet_projection(D_subset[k], norm[k], self.l1_ratio) ger(-1.0, self.A_[k], D_subset[k], a=R, overwrite_a=True) # R -= np.dot(stat.A[:, j].reshape(n_components, 1), if self.projection == "partial": self.D_[:, subset] = D_subset
def axpy(x, y, a=1.0): """ Quick level-1 call to BLAS y = a*x+y Parameters ---------- x : array_like nx1 real or complex vector y : array_like nx1 real or complex vector a : float real or complex scalar Returns ------- y : array_like Input variable y is rewritten Notes ----- The call to get_blas_funcs automatically determines the prefix for the blas call. """ from scipy.linalg import get_blas_funcs fn = get_blas_funcs(['axpy'], [x, y])[0] fn(x, y, a)
def cool_syrk(fact, X): syrk = get_blas_funcs("syrk", [X]) R = syrk(fact, X) d = np.diag(R).copy() size = mat_to_upper_F(R) R.resize([size,]) return R,d
def _update_dict_slow(X, A, B, G, Q, Q_idx, idx, fit_intercept, components_range, norm, impute=True): Q_idx = Q[:, idx] if impute: old_sub_G = Q_idx.dot(Q_idx.T) ger, = linalg.get_blas_funcs(('ger',), (A, Q_idx)) R = B[:, idx] - np.dot(Q_idx.T, A).T # norm = np.sqrt(np.sum(Q_idx ** 2, axis=1)) norm = np.sqrt(np.sum(Q ** 2, axis=1)) # Intercept on first column for j in components_range: ger(1.0, A[j], Q_idx[j], a=R, overwrite_a=True) Q_idx[j] = R[j] / A[j, j] # new_norm = np.sqrt(np.sum(Q_idx[j] ** 2)) # if new_norm > norm[j]: # Q_idx[j] /= new_norm / norm[j] Q[j, idx] = Q_idx[j] new_norm = np.sqrt(np.sum(Q[j] ** 2)) if new_norm > 1: Q_idx[j] /= new_norm Q[j] /= new_norm ger(-1.0, A[j], Q_idx[j], a=R, overwrite_a=True) Q[:, idx] = Q_idx if impute: G += Q_idx.dot(Q_idx.T) - old_sub_G
def dot(A, B, out=None): """ A drop in replacement for numpy.dot. Computes A.B optimized using fblas calls. """ import scipy.linalg as sp gemm = sp.get_blas_funcs('gemm', arrays=(A,B)) if out is None: lda, x, y, ldb = A.shape + B.shape if x != y: raise ValueError("matrices are not aligned") dtype = np.max([x.dtype for x in (A, B)]) out = np.empty((lda, ldb), dtype, order='F') if A.flags.c_contiguous and B.flags.c_contiguous: gemm(alpha=1., a=A.T, b=B.T, trans_a=True, trans_b=True, c=out, overwrite_c=True) if A.flags.c_contiguous and B.flags.f_contiguous: gemm(alpha=1., a=A.T, b=B, trans_a=True, c=out, overwrite_c=True) if A.flags.f_contiguous and B.flags.c_contiguous: gemm(alpha=1., a=A, b=B.T, trans_b=True, c=out, overwrite_c=True) if A.flags.f_contiguous and B.flags.f_contiguous: gemm(alpha=1., a=A, b=B, c=out, overwrite_c=True) return out
def norm2(q): """ Compute the euclidean norm of an array ``q`` by calling the BLAS routine """ q = np.asarray(q) nrm2 = get_blas_funcs('nrm2', dtype=q.dtype) return nrm2(q)
def test_trsv(self): seed(1234) for ind, dtype in enumerate(DTYPES): n = 15 A = (rand(n, n)+eye(n)).astype(dtype) x = rand(n).astype(dtype) func, = get_blas_funcs(('trsv',), dtype=dtype) y1 = func(a=A, x=x) y2 = solve(triu(A), x) assert_array_almost_equal(y1, y2) y1 = func(a=A, x=x, lower=1) y2 = solve(tril(A), x) assert_array_almost_equal(y1, y2) y1 = func(a=A, x=x, diag=1) A[arange(n), arange(n)] = dtype(1) y2 = solve(triu(A), x) assert_array_almost_equal(y1, y2) y1 = func(a=A, x=x, diag=1, trans=1) y2 = solve(triu(A).T, x) assert_array_almost_equal(y1, y2) y1 = func(a=A, x=x, diag=1, trans=2) y2 = solve(triu(A).conj().T, x) assert_array_almost_equal(y1, y2)
def test_tpsv(self): seed(1234) for ind, dtype in enumerate(DTYPES): n = 10 x = rand(n).astype(dtype) # Upper triangular array A = triu(rand(n, n)) if ind < 2 else triu(rand(n, n)+rand(n, n)*1j) A += eye(n) # Form the packed storage c, r = tril_indices(n) Ap = A[r, c] func, = get_blas_funcs(('tpsv',), dtype=dtype) y1 = func(n=n, ap=Ap, x=x) y2 = solve(A, x) assert_array_almost_equal(y1, y2) y1 = func(n=n, ap=Ap, x=x, diag=1) A[arange(n), arange(n)] = dtype(1) y2 = solve(A, x) assert_array_almost_equal(y1, y2) y1 = func(n=n, ap=Ap, x=x, diag=1, trans=1) y2 = solve(A.T, x) assert_array_almost_equal(y1, y2) y1 = func(n=n, ap=Ap, x=x, diag=1, trans=2) y2 = solve(A.conj().T, x) assert_array_almost_equal(y1, y2)
def _solve(v, alpha, cs, ds): """Evaluate w = M^-1 v""" if len(cs) == 0: return v/alpha # (B + C D^H)^-1 = B^-1 - B^-1 C (I + D^H B^-1 C)^-1 D^H B^-1 axpy, dotc = get_blas_funcs(['axpy', 'dotc'], cs[:1] + [v]) c0 = cs[0] A = alpha * np.identity(len(cs), dtype=c0.dtype) for i, d in enumerate(ds): for j, c in enumerate(cs): A[i,j] += dotc(d, c) q = np.zeros(len(cs), dtype=c0.dtype) for j, d in enumerate(ds): q[j] = dotc(d, v) q /= alpha q = solve(A, q) w = v/alpha for c, qc in zip(cs, q): w = axpy(c, w, w.size, -qc) return w
def _mixed_norm_solver_bcd(M, G, alpha, lipschitz_constant, maxit=200, tol=1e-8, verbose=None, init=None, n_orient=1, dgap_freq=10): """Solve L21 inverse problem with block coordinate descent.""" n_sensors, n_times = M.shape n_sensors, n_sources = G.shape n_positions = n_sources // n_orient if init is None: X = np.zeros((n_sources, n_times)) R = M.copy() else: X = init R = M - np.dot(G, X) E = [] # track primal objective function highest_d_obj = - np.inf active_set = np.zeros(n_sources, dtype=np.bool) # start with full AS alpha_lc = alpha / lipschitz_constant # First make G fortran for faster access to blocks of columns G = np.asfortranarray(G) # It is better to call gemm here # so it is called only once gemm = linalg.get_blas_funcs("gemm", [R.T, G[:, 0:n_orient]]) one_ovr_lc = 1. / lipschitz_constant # assert that all the multiplied matrices are fortran contiguous assert X.T.flags.f_contiguous assert R.T.flags.f_contiguous assert G.flags.f_contiguous # storing list of contiguous arrays list_G_j_c = [] for j in range(n_positions): idx = slice(j * n_orient, (j + 1) * n_orient) list_G_j_c.append(np.ascontiguousarray(G[:, idx])) for i in range(maxit): _bcd(G, X, R, active_set, one_ovr_lc, n_orient, n_positions, alpha_lc, gemm, list_G_j_c) if (i + 1) % dgap_freq == 0: _, p_obj, d_obj, _ = dgap_l21(M, G, X[active_set], active_set, alpha, n_orient) highest_d_obj = max(d_obj, highest_d_obj) gap = p_obj - highest_d_obj E.append(p_obj) logger.debug("Iteration %d :: p_obj %f :: dgap %f :: n_active %d" % (i + 1, p_obj, gap, np.sum(active_set) / n_orient)) if gap < tol: logger.debug('Convergence reached ! (gap: %s < %s)' % (gap, tol)) break X = X[active_set] return X, active_set, E
def _matvec(v, alpha, cs, ds): axpy, scal, dotc = get_blas_funcs(['axpy', 'scal', 'dotc'], cs[:1] + [v]) w = alpha * v for c, d in zip(cs, ds): a = dotc(d, v) w = axpy(c, w, w.size, a) return w
def _calc_lr(self, x, tmp, calc_l=False, A1=None, A2=None, rescale=True, max_itr=1000, rtol=1E-14, atol=1E-14): """Power iteration to obtain eigenvector corresponding to largest eigenvalue. x is modified in place. """ if A1 is None: A1 = self.A if A2 is None: A2 = self.A try: norm = la.get_blas_funcs("nrm2", [x]) except (ValueError, AttributeError): norm = np.linalg.norm # try: # allclose = ac.allclose_mat # except: # allclose = np.allclose # print "Falling back to numpy allclose()!" n = x.size #we will scale x so that stuff doesn't get too small x *= n / norm(x.ravel()) tmp[:] = x for i in xrange(max_itr): x[:] = tmp if calc_l: tm.eps_l_noop_inplace(x, A1, A2, tmp) else: tm.eps_r_noop_inplace(x, A1, A2, tmp) ev_mag = norm(tmp.ravel()) / n ev = (tmp.mean() / x.mean()).real tmp *= (1 / ev_mag) if norm((tmp - x).ravel()) < atol + rtol * n: # if allclose(tmp, x, rtol, atol): #print (i, ev, ev_mag, norm((tmp - x).ravel())/n, atol, rtol) x[:] = tmp break # else: # print (i, ev, ev_mag, norm((tmp - x).ravel())/norm(x.ravel()), atol, rtol) if rescale and not abs(ev - 1) < atol: A1 *= 1 / sp.sqrt(ev) if self.sanity_checks: if not A1 is A2: log.warning("Sanity check failed: Re-scaling with A1 <> A2!") if calc_l: tm.eps_l_noop_inplace(x, A1, A2, tmp) else: tm.eps_r_noop_inplace(x, A1, A2, tmp) ev = tmp.mean() / x.mean() if not abs(ev - 1) < atol: log.warning("Sanity check failed: Largest ev after re-scale = %s", ev) return x, i < max_itr - 1, i
def py_maxvol(A, tol = 1.05, max_iters = 100): """Python implementation of 1-volume maximization. For information see :py:func:`maxvol` function""" if tol < 1: tol = 1.0 N, r = A.shape if N <= r: return np.arange(N, dtype = np.int32), np.eye(N, dtype = A.dtype) # DGETRF B = np.copy(A, order = 'F') C = np.copy(B.T, order = 'F') H, ipiv, info = get_lapack_funcs('getrf', [B])(B, overwrite_a = 1) # computing pivots from ipiv index = np.arange(N, dtype = np.int32) for i in xrange(r): tmp = index[i] index[i] = index[ipiv[i]] index[ipiv[i]] = tmp # solve A = CH, H is in LU format B = H[:r] # It will be much faster to use dtrsm instead of dtrtrs trtrs = get_lapack_funcs('trtrs', [B]) trtrs(B, C, trans = 1, lower = 0, unitdiag = 0, overwrite_b = 1) trtrs(B, C, trans = 1, lower = 1, unitdiag = 1, overwrite_b = 1) # C has shape (r, N) -- it is stored transposed # find max value in C i, j = divmod(abs(C).argmax(), N) # set cgeru or zgeru for complex numbers and dger or sger for float numbers try: ger = get_blas_funcs('geru', [C]) except: ger = get_blas_funcs('ger', [C]) # set iters to 0 iters = 0 # check if need to swap rows while abs(C[i,j]) > tol and iters < max_iters: # add j to index and recompute C by SVM-formula index[i] = j tmp_row = C[i].copy() tmp_column = C[:,j].copy() tmp_column[i] -= 1. alpha = -1./C[i,j] ger(alpha, tmp_column, tmp_row, a = C, overwrite_a = 1) iters += 1 i, j = divmod(abs(C).argmax(), N) return index[:r].copy(), C.T
def fast_dot(A, B): # only try to use fast_dot for older numpy versions. # the related issue has been tackled meanwhile. Also, depending on the build # the current numpy master's dot can about 3 times faster. if LooseVersion(np.__version__) < '1.7.2': # backported try: linalg.get_blas_funcs(['gemm']) except (AttributeError, ValueError): warnings.warn('Could not import BLAS, falling back to np.dot') return np.dot(A, B) try: return _fast_dot(A, B) except ValueError: # Maltyped or malformed data. return np.dot(A, B) else: return np.dot(A, B)
def norm(x): """Compute the Euclidean or Frobenius norm of x. Returns the Euclidean norm when x is a vector, the Frobenius norm when x is a matrix (2-d array). """ x = np.asarray(x) nrm2, = linalg.get_blas_funcs(['nrm2'], [x]) return nrm2(x)
def get_copula_function(self): if self.copula_eval is not None: return self.copula_eval #1/sqrt(det R) * exp(-.5 (phi-1(us))T . (R^-1 - I) . (phi-1(us))) coef = 1.0 / np.sqrt(np.abs(linalg.det(self.R))) variance = self.R.getI() - np.identity(self.R.shape[0]) blas_symv = get_blas_funcs("symv", [variance]) blas_dot = get_blas_funcs("dot", [variance]) def eval(*us): invphis = ndtri(us) #Faster than norm.ppf(us) #sandwich = (invphis * variance).dot(invphis) sandwich = blas_dot(invphis, blas_symv(1, variance, invphis)) return coef * np.exp(-.5 * sandwich) self.copula_eval = eval return eval
def norm(x): """Compute the Euclidean or Frobenius norm of x. Returns the Euclidean norm when x is a vector, the Frobenius norm when x is a matrix (2-d array). More precise than sqrt(squared_norm(x)). """ x = np.asarray(x) nrm2, = linalg.get_blas_funcs(["nrm2"], [x]) return nrm2(x)
def dot(A,B): """ Uses blas libraries directly to perform dot product """ A, trans_a = _force_forder(A) B, trans_b = _force_forder(B) gemm_dot = linalg.get_blas_funcs("gemm", arrays=(A,B)) # gemm is implemented to compute: C = alpha*AB + beta*C return gemm_dot(alpha=1.0, a=A, b=B, trans_a=trans_a, trans_b=trans_b)
def _fast_dot(A, B): if B.shape[0] != A.shape[A.ndim - 1]: # check adopted from '_dotblas.c' raise ValueError if A.dtype != B.dtype or any(x.dtype not in (np.float32, np.float64) for x in [A, B]): warnings.warn('Falling back to np.dot. ' 'Data must be of same type of either ' '32 or 64 bit float for the BLAS function, gemm, to be ' 'used for an efficient dot operation. ', NonBLASDotWarning) raise ValueError if min(A.shape) == 1 or min(B.shape) == 1 or A.ndim != 2 or B.ndim != 2: raise ValueError # scipy 0.9 compliant API dot = linalg.get_blas_funcs(['gemm'], (A, B))[0] A, trans_a = _impose_f_order(A) B, trans_b = _impose_f_order(B) return dot(alpha=1.0, a=A, b=B, trans_a=trans_a, trans_b=trans_b)
def _update_dict_slow(X, A, B, G, Q, Q_idx, idx, fit_intercept, components_range, norm, impute=True): Q_idx = Q[:, idx] if impute: old_sub_G = Q_idx.dot(Q_idx.T) ger, = linalg.get_blas_funcs(('ger', ), (A, Q_idx)) R = B[:, idx] - np.dot(Q_idx.T, A).T # norm = np.sqrt(np.sum(Q_idx ** 2, axis=1)) norm = np.sqrt(np.sum(Q**2, axis=1)) # Intercept on first column for j in components_range: ger(1.0, A[j], Q_idx[j], a=R, overwrite_a=True) Q_idx[j] = R[j] / A[j, j] # new_norm = np.sqrt(np.sum(Q_idx[j] ** 2)) # if new_norm > norm[j]: # Q_idx[j] /= new_norm / norm[j] Q[j, idx] = Q_idx[j] new_norm = np.sqrt(np.sum(Q[j]**2)) if new_norm > 1: Q_idx[j] /= new_norm Q[j] /= new_norm ger(-1.0, A[j], Q_idx[j], a=R, overwrite_a=True) Q[:, idx] = Q_idx if impute: G += Q_idx.dot(Q_idx.T) - old_sub_G
def gemm(a, b, alpha=1., **kwargs): """ Wrapper for gemm in scipy.linalg. Detects which precision to use, and alpha (result multiplier) is default 1.0. GEMM performs a matrix-matrix multiplation (or matrix-vector) C = alpha*op(A)*op(B) + beta*C A,B,C are matrices, alpha and beta are scalars op(X) is either X or X', depending on whether trans_a or trans_b are 1 beta and C are optional op(A) must be m by k op(B) must be k by n C, if supplied, must be m by n set overwrite_c to 1 to use C's memory for output """ from scipy.linalg import get_blas_funcs _gemm, = get_blas_funcs(('gemm',), (a, b)) return _gemm(alpha, a, b, **kwargs)
def fast_dot(A, B): """Compute fast dot products directly calling BLAS. This function calls BLAS directly while warranting Fortran contiguity. This helps avoiding extra copies `np.dot` would have created. For details see section `Linear Algebra on large Arrays`: http://wiki.scipy.org/PerformanceTips Parameters ---------- A, B: instance of np.ndarray input matrices. """ if A.dtype != B.dtype: raise ValueError('A and B must be of the same type.') if A.dtype not in (np.float32, np.float64): raise ValueError('Data must be single or double precision float.') dot = linalg.get_blas_funcs('gemm', (A, B)) A, trans_a = _impose_f_order(A) B, trans_b = _impose_f_order(B) return dot(alpha=1.0, a=A, b=B, trans_a=trans_a, trans_b=trans_b)
def test_tbsv(self): seed(1234) for ind, dtype in enumerate(DTYPES): n = 6 k = 3 x = rand(n).astype(dtype) A = zeros((n, n), dtype=dtype) # Banded upper triangular array for sup in range(k + 1): A[arange(n - sup), arange(sup, n)] = rand(n - sup) # Add complex parts for c,z if ind > 1: A[nonzero(A)] += 1j * rand((k + 1) * n - (k * (k + 1) // 2)).astype(dtype) # Form the banded storage Ab = zeros((k + 1, n), dtype=dtype) for row in range(k + 1): Ab[-row - 1, row:] = diag(A, k=row) func, = get_blas_funcs(('tbsv', ), dtype=dtype) y1 = func(k=k, a=Ab, x=x) y2 = solve(A, x) assert_array_almost_equal(y1, y2) y1 = func(k=k, a=Ab, x=x, diag=1) A[arange(n), arange(n)] = dtype(1) y2 = solve(A, x) assert_array_almost_equal(y1, y2) y1 = func(k=k, a=Ab, x=x, diag=1, trans=1) y2 = solve(A.T, x) assert_array_almost_equal(y1, y2) y1 = func(k=k, a=Ab, x=x, diag=1, trans=2) y2 = solve(A.conj().T, x) assert_array_almost_equal(y1, y2)
def test_trmv(self): seed(1234) for ind, dtype in enumerate(DTYPES): n = 3 A = (rand(n, n) + eye(n)).astype(dtype) x = rand(3).astype(dtype) func, = get_blas_funcs(('trmv', ), dtype=dtype) y1 = func(a=A, x=x) y2 = triu(A).dot(x) assert_array_almost_equal(y1, y2) y1 = func(a=A, x=x, diag=1) A[arange(n), arange(n)] = dtype(1) y2 = triu(A).dot(x) assert_array_almost_equal(y1, y2) y1 = func(a=A, x=x, diag=1, trans=1) y2 = triu(A).T.dot(x) assert_array_almost_equal(y1, y2) y1 = func(a=A, x=x, diag=1, trans=2) y2 = triu(A).conj().T.dot(x) assert_array_almost_equal(y1, y2)
def test_get_blas_funcs(): # check that it returns Fortran code for arrays that are # fortran-ordered f1, f2, f3 = get_blas_funcs( ('axpy', 'axpy', 'axpy'), (np.empty((2,2), dtype=np.complex64, order='F'), np.empty((2,2), dtype=np.complex128, order='C')) ) # get_blas_funcs will choose libraries depending on most generic # array assert_equal(f1.typecode, 'z') assert_equal(f2.typecode, 'z') if cblas is not None: assert_equal(f1.module_name, 'cblas') assert_equal(f2.module_name, 'cblas') # check defaults. f1 = get_blas_funcs('rotg') assert_equal(f1.typecode, 'd') # check also dtype interface f1 = get_blas_funcs('gemm', dtype=np.complex64) assert_equal(f1.typecode, 'c') f1 = get_blas_funcs('gemm', dtype='F') assert_equal(f1.typecode, 'c') # extended precision complex f1 = get_blas_funcs('gemm', dtype=np.longcomplex) assert_equal(f1.typecode, 'z') # check safe complex upcasting f1 = get_blas_funcs('axpy', (np.empty((2,2), dtype=np.float64), np.empty((2,2), dtype=np.complex64)) ) assert_equal(f1.typecode, 'z')
def _cholesky_omp(X, y, n_nonzero_coefs, tol=None, copy_X=True, return_path=False): """Orthogonal Matching Pursuit step using the Cholesky decomposition. Parameters ---------- X : array, shape (n_samples, n_features) Input dictionary. Columns are assumed to have unit norm. y : array, shape (n_samples,) Input targets n_nonzero_coefs : int Targeted number of non-zero elements tol : float Targeted squared error, if not None overrides n_nonzero_coefs. copy_X : bool, optional Whether the design matrix X must be copied by the algorithm. A false value is only helpful if X is already Fortran-ordered, otherwise a copy is made anyway. return_path : bool, optional. Default: False Whether to return every value of the nonzero coefficients along the forward path. Useful for cross-validation. Returns ------- gamma : array, shape (n_nonzero_coefs,) Non-zero elements of the solution idx : array, shape (n_nonzero_coefs,) Indices of the positions of the elements in gamma within the solution vector coef : array, shape (n_features, n_nonzero_coefs) The first k values of column k correspond to the coefficient value for the active features at that step. The lower left triangle contains garbage. Only returned if ``return_path=True``. n_active : int Number of active features at convergence. """ if copy_X: X = X.copy('F') else: # even if we are allowed to overwrite, still copy it if bad order X = np.asfortranarray(X) min_float = np.finfo(X.dtype).eps nrm2, swap = linalg.get_blas_funcs(('nrm2', 'swap'), (X, )) potrs, = get_lapack_funcs(('potrs', ), (X, )) alpha = np.dot(X.T, y) residual = y gamma = np.empty(0) n_active = 0 indices = np.arange(X.shape[1]) # keeping track of swapping max_features = X.shape[1] if tol is not None else n_nonzero_coefs L = np.empty((max_features, max_features), dtype=X.dtype) if return_path: coefs = np.empty_like(L) while True: lam = np.argmax(np.abs(np.dot(X.T, residual))) if lam < n_active or alpha[lam]**2 < min_float: # atom already selected or inner product too small warnings.warn(premature, RuntimeWarning, stacklevel=2) break if n_active > 0: # Updates the Cholesky decomposition of X' X L[n_active, :n_active] = np.dot(X[:, :n_active].T, X[:, lam]) linalg.solve_triangular(L[:n_active, :n_active], L[n_active, :n_active], trans=0, lower=1, overwrite_b=True, check_finite=False) v = nrm2(L[n_active, :n_active])**2 Lkk = linalg.norm(X[:, lam])**2 - v if Lkk <= min_float: # selected atoms are dependent warnings.warn(premature, RuntimeWarning, stacklevel=2) break L[n_active, n_active] = sqrt(Lkk) else: L[0, 0] = linalg.norm(X[:, lam]) X.T[n_active], X.T[lam] = swap(X.T[n_active], X.T[lam]) alpha[n_active], alpha[lam] = alpha[lam], alpha[n_active] indices[n_active], indices[lam] = indices[lam], indices[n_active] n_active += 1 # solves LL'x = X'y as a composition of two triangular systems gamma, _ = potrs(L[:n_active, :n_active], alpha[:n_active], lower=True, overwrite_b=False) if return_path: coefs[:n_active, n_active - 1] = gamma residual = y - np.dot(X[:, :n_active], gamma) if tol is not None and nrm2(residual)**2 <= tol: break elif n_active == max_features: break if return_path: return gamma, indices[:n_active], coefs[:, :n_active], n_active else: return gamma, indices[:n_active], n_active
def _cholesky_omp(X, y, n_nonzero_coefs, tol=None, copy_X=True): """Orthogonal Matching Pursuit step using the Cholesky decomposition. Parameters: ----------- X: array, shape = (n_samples, n_features) Input dictionary. Columns are assumed to have unit norm. y: array, shape = (n_samples,) Input targets n_nonzero_coefs: int Targeted number of non-zero elements tol: float Targeted squared error, if not None overrides n_nonzero_coefs. copy_X: bool, optional Whether the design matrix X must be copied by the algorithm. A false value is only helpful if X is already Fortran-ordered, otherwise a copy is made anyway. Returns: -------- gamma: array, shape = (n_nonzero_coefs,) Non-zero elements of the solution idx: array, shape = (n_nonzero_coefs,) Indices of the positions of the elements in gamma within the solution vector """ if copy_X: X = X.copy('F') else: # even if we are allowed to overwrite, still copy it if bad order X = np.asfortranarray(X) min_float = np.finfo(X.dtype).eps nrm2, swap = linalg.get_blas_funcs(('nrm2', 'swap'), (X, )) potrs, = get_lapack_funcs(('potrs', ), (X, )) alpha = np.dot(X.T, y) residual = y gamma = np.empty(0) n_active = 0 indices = range(X.shape[1]) # keeping track of swapping max_features = X.shape[1] if tol is not None else n_nonzero_coefs L = np.empty((max_features, max_features), dtype=X.dtype) L[0, 0] = 1. while True: lam = np.argmax(np.abs(np.dot(X.T, residual))) if lam < n_active or alpha[lam]**2 < min_float: # atom already selected or inner product too small warn(premature) break if n_active > 0: # Updates the Cholesky decomposition of X' X L[n_active, :n_active] = np.dot(X[:, :n_active].T, X[:, lam]) solve_triangular(L[:n_active, :n_active], L[n_active, :n_active]) v = nrm2(L[n_active, :n_active])**2 if 1 - v <= min_float: # selected atoms are dependent warn(premature) break L[n_active, n_active] = np.sqrt(1 - v) X.T[n_active], X.T[lam] = swap(X.T[n_active], X.T[lam]) alpha[n_active], alpha[lam] = alpha[lam], alpha[n_active] indices[n_active], indices[lam] = indices[lam], indices[n_active] n_active += 1 # solves LL'x = y as a composition of two triangular systems gamma, _ = potrs(L[:n_active, :n_active], alpha[:n_active], lower=True, overwrite_b=False) residual = y - np.dot(X[:, :n_active], gamma) if tol is not None and nrm2(residual)**2 <= tol: break elif n_active == max_features: break return gamma, indices[:n_active]
__author__ = "Nikolay Bryskin" # https://github.com/nikicat import numpy as np import sys from collections import deque from scipy.linalg import get_blas_funcs gemm = get_blas_funcs('gemm') #np.dot = lambda a, b: gemm(1, a, b) OFFSET = 3000 SKIP_SIZE = 27 N_CHANNELS = 21 try: profile(lambda: 1) except NameError: def profile(func): return func class DelayedRLSPredictor: def __init__(self, n_channels, target_channel, M=3, lambda_=0.999, delta=100, delay=0,
class FullHessianUpdateStrategy(HessianUpdateStrategy): """Hessian update strategy with full dimensional internal representation. """ _syr = get_blas_funcs('syr', dtype='d') # Symmetric rank 1 update _syr2 = get_blas_funcs('syr2', dtype='d') # Symmetric rank 2 update # Symmetric matrix-vector product _symv = get_blas_funcs('symv', dtype='d') def __init__(self, init_scale='auto'): self.init_scale = init_scale # Until initialize is called we can't really use the class, # so it makes sense to set everything to None. self.first_iteration = None self.approx_type = None self.B = None self.H = None def initialize(self, n, approx_type): """Initialize internal matrix. Allocate internal memory for storing and updating the Hessian or its inverse. Parameters ---------- n : int Problem dimension. approx_type : {'hess', 'inv_hess'} Selects either the Hessian or the inverse Hessian. When set to 'hess' the Hessian will be stored and updated. When set to 'inv_hess' its inverse will be used instead. """ self.first_iteration = True self.n = n self.approx_type = approx_type if approx_type not in ('hess', 'inv_hess'): raise ValueError("`approx_type` must be 'hess' or 'inv_hess'.") # Create matrix if self.approx_type == 'hess': self.B = np.eye(n, dtype=float) else: self.H = np.eye(n, dtype=float) def _auto_scale(self, delta_x, delta_grad): # Heuristic to scale matrix at first iteration. # Described in Nocedal and Wright "Numerical Optimization" # p.143 formula (6.20). s_norm2 = np.dot(delta_x, delta_x) y_norm2 = np.dot(delta_grad, delta_grad) ys = np.abs(np.dot(delta_grad, delta_x)) if ys == 0.0 or y_norm2 == 0 or s_norm2 == 0: return 1 if self.approx_type == 'hess': return y_norm2 / ys else: return ys / y_norm2 def _update_implementation(self, delta_x, delta_grad): raise NotImplementedError("The method ``_update_implementation``" " is not implemented.") def update(self, delta_x, delta_grad): """Update internal matrix. Update Hessian matrix or its inverse (depending on how 'approx_type' is defined) using information about the last evaluated points. Parameters ---------- delta_x : ndarray The difference between two points the gradient function have been evaluated at: ``delta_x = x2 - x1``. delta_grad : ndarray The difference between the gradients: ``delta_grad = grad(x2) - grad(x1)``. """ if np.all(delta_x == 0.0): return if np.all(delta_grad == 0.0): warn('delta_grad == 0.0. Check if the approximated ' 'function is linear. If the function is linear ' 'better results can be obtained by defining the ' 'Hessian as zero instead of using quasi-Newton ' 'approximations.', UserWarning) return if self.first_iteration: # Get user specific scale if self.init_scale == "auto": scale = self._auto_scale(delta_x, delta_grad) else: scale = float(self.init_scale) # Scale initial matrix with ``scale * np.eye(n)`` if self.approx_type == 'hess': self.B *= scale else: self.H *= scale self.first_iteration = False self._update_implementation(delta_x, delta_grad) def dot(self, p): """Compute the product of the internal matrix with the given vector. Parameters ---------- p : array_like 1-d array representing a vector. Returns ------- Hp : array 1-d represents the result of multiplying the approximation matrix by vector p. """ if self.approx_type == 'hess': return self._symv(1, self.B, p) else: return self._symv(1, self.H, p) def get_matrix(self): """Return the current internal matrix. Returns ------- M : ndarray, shape (n, n) Dense matrix containing either the Hessian or its inverse (depending on how `approx_type` was defined). """ if self.approx_type == 'hess': M = np.copy(self.B) else: M = np.copy(self.H) li = np.tril_indices_from(M, k=-1) M[li] = M.T[li] return M
from __future__ import division import numpy as np import math import scipy.linalg as linalg from Cholesky import Cholesky import random trm = linalg.get_blas_funcs('trmm') from scipy.special import gammaln as gamlog class Gauss_Wishart_model: '''Probability distributions for a Bayesian Normal Wishart probability model.''' def __init__(self, Gaussian_component): self.GaussComp = Gaussian_component self.GaussComp._Gaussian_component__update_params(scale=1, prec=1, XX_T=1) self.d = self.GaussComp.d self.n = self.GaussComp.n self.prec_mu_norm_Z = None #normalizing constant of the prior probability self.prec_norm_Z = None #normalizing constant of the prior probability of the precision matrix self.mu_norm_Z = None #normalizing constant of the prior probability of the mean self.prior_lp = None #log prior probability of the mean and precision matrix self.prior_lp_prec = None #log prior probability of the precision matrix self.prior_lp_mu = None #log prior probability of the mean self.data_lp = None #log probability of the data self.joint_lp = None #Joint log probability (data, mean, precision)
def gmres_householder(A, b, x0=None, tol=1e-5, restrt=None, maxiter=None, xtype=None, M=None, callback=None, residuals=None): ''' Generalized Minimum Residual Method (GMRES) GMRES iteratively refines the initial solution guess to the system Ax = b Householder reflections are used for orthogonalization Parameters ---------- A : {array, matrix, sparse matrix, LinearOperator} n x n, linear system to solve b : {array, matrix} right hand side, shape is (n,) or (n, 1) x0 : {array, matrix} initial guess, default is a vector of zeros tol : float relative convergence tolerance, i.e. tol is scaled by the norm of the initial preconditioned residual restrt : {None, int} - if int, restrt is max number of inner iterations and maxiter is the max number of outer iterations - if None, do not restart GMRES, and max number of inner iterations is maxiter maxiter : {None, int} - if restrt is None, maxiter is the max number of inner iterations and GMRES does not restart - if restrt is int, maxiter is the max number of outer iterations, and restrt is the max number of inner iterations xtype : type dtype for the solution, default is automatic type detection M : {array, matrix, sparse matrix, LinearOperator} n x n, inverted preconditioner, i.e. solve M A x = M b. callback : function User-supplied function is called after each iteration as callback( ||rk||_2 ), where rk is the current preconditioned residual vector residuals : list residuals contains the preconditioned residual norm history, including the initial residual. Returns ------- (xNew, info) xNew : an updated guess to the solution of Ax = b info : halting status of gmres == ============================================= 0 successful exit >0 convergence to tolerance not achieved, return iteration count instead. This value is precisely the order of the Krylov space. <0 numerical breakdown, or illegal input == ============================================= Notes ----- - The LinearOperator class is in scipy.sparse.linalg.interface. Use this class if you prefer to define A or M as a mat-vec routine as opposed to explicitly constructing the matrix. A.psolve(..) is still supported as a legacy. - For robustness, Householder reflections are used to orthonormalize the Krylov Space Givens Rotations are used to provide the residual norm each iteration Examples -------- >>> from pyamg.krylov import gmres >>> from pyamg.util.linalg import norm >>> import numpy >>> from pyamg.gallery import poisson >>> A = poisson((10, 10)) >>> b = numpy.ones((A.shape[0],)) >>> (x, flag) = gmres(A, b, maxiter=2, tol=1e-8, orthog='householder') >>> print norm(b - A*x) 6.5428213057 References ---------- .. [1] Yousef Saad, "Iterative Methods for Sparse Linear Systems, Second Edition", SIAM, pp. 151-172, pp. 272-275, 2003 http://www-users.cs.umn.edu/~saad/books.html ''' # Convert inputs to linear system, with error checking A, M, x, b, postprocess = make_system(A, M, x0, b, xtype) dimen = A.shape[0] ## # Ensure that warnings are always reissued from this function import warnings warnings.filterwarnings('always', module='pyamg\.krylov\._gmres_householder') # Choose type if not hasattr(A, 'dtype'): Atype = upcast(x.dtype, b.dtype) else: Atype = A.dtype if not hasattr(M, 'dtype'): Mtype = upcast(x.dtype, b.dtype) else: Mtype = M.dtype xtype = upcast(Atype, x.dtype, b.dtype, Mtype) # known bug vvvvvv remove when fixed if Atype == complex: raise ValueError('[Known Bug] Housholder fails with complex matrices; \ use MGS') if restrt is not None: restrt = int(restrt) if maxiter is not None: maxiter = int(maxiter) # Should norm(r) be kept if residuals == []: keep_r = True else: keep_r = False # Set number of outer and inner iterations if restrt: if maxiter: max_outer = maxiter else: max_outer = 1 if restrt > dimen: warn('Setting number of inner iterations (restrt) to maximum \ allowed, which is A.shape[0] ') restrt = dimen max_inner = restrt else: max_outer = 1 if maxiter > dimen: warn('Setting number of inner iterations (maxiter) to maximum \ allowed, which is A.shape[0] ') maxiter = dimen elif maxiter is None: maxiter = min(dimen, 40) max_inner = maxiter # Get fast access to underlying BLAS routines [rotg] = get_blas_funcs(['rotg'], [x]) # Is this a one dimensional matrix? if dimen == 1: entry = ravel(A * array([1.0], dtype=xtype)) return (postprocess(b / entry), 0) # Prep for method r = b - ravel(A * x) #Apply preconditioner r = ravel(M * r) normr = norm(r) if keep_r: residuals.append(normr) ## Check for nan, inf #if isnan(r).any() or isinf(r).any(): # warn('inf or nan after application of preconditioner') # return(postprocess(x), -1) # Check initial guess ( scaling by b, if b != 0, # must account for case when norm(b) is very small) normb = norm(b) if normb == 0.0: normb = 1.0 if normr < tol * normb: if callback is not None: callback(norm(r)) return (postprocess(x), 0) # Scale tol by ||r_0||_2, we use the preconditioned residual # because this is left preconditioned GMRES. if normr != 0.0: tol = tol * normr # Use separate variable to track iterations. If convergence fails, we # cannot simply report niter = (outer-1)*max_outer + inner. Numerical # error could cause the inner loop to halt while the actual ||r|| > tol. niter = 0 # Begin GMRES for outer in range(max_outer): # Calculate vector w, which defines the Householder reflector # Take shortcut in calculating, # w = r + sign(r[1])*||r||_2*e_1 w = r beta = mysign(w[0]) * normr w[0] = w[0] + beta w[:] = w / norm(w) # Preallocate for Krylov vectors, Householder reflectors and # Hessenberg matrix # Space required is O(dimen*max_inner) # Givens Rotations Q = zeros((4 * max_inner, ), dtype=xtype) # upper Hessenberg matrix (made upper tri with Givens Rotations) H = zeros((max_inner, max_inner), dtype=xtype) # Householder reflectors W = zeros((max_inner + 1, dimen), dtype=xtype) W[0, :] = w # Multiply r with (I - 2*w*w.T), i.e. apply the Householder reflector # This is the RHS vector for the problem in the Krylov Space g = zeros((dimen, ), dtype=xtype) g[0] = -beta for inner in range(max_inner): # Calculate Krylov vector in two steps # (1) Calculate v = P_j = (I - 2*w*w.T)v, where k = inner v = -2.0 * conjugate(w[inner]) * w v[inner] = v[inner] + 1.0 # (2) Calculate the rest, v = P_1*P_2*P_3...P_{j-1}*ej. #for j in range(inner-1,-1,-1): # v -= 2.0*dot(conjugate(W[j,:]), v)*W[j,:] amg_core.apply_householders(v, ravel(W), dimen, inner - 1, -1, -1) # Calculate new search direction v = ravel(A * v) #Apply preconditioner v = ravel(M * v) ## Check for nan, inf #if isnan(v).any() or isinf(v).any(): # warn('inf or nan after application of preconditioner') # return(postprocess(x), -1) # Factor in all Householder orthogonal reflections on new search # direction #for j in range(inner+1): # v -= 2.0*dot(conjugate(W[j,:]), v)*W[j,:] amg_core.apply_householders(v, ravel(W), dimen, 0, inner + 1, 1) # Calculate next Householder reflector, w # w = v[inner+1:] + sign(v[inner+1])*||v[inner+1:]||_2*e_{inner+1) # Note that if max_inner = dimen, then this is unnecessary for the # last inner iteration, when inner = dimen-1. Here we do not need # to calculate a Householder reflector or Givens rotation because # nnz(v) is already the desired length, i.e. we do not need to # zero anything out. if inner != dimen - 1: if inner < (max_inner - 1): w = W[inner + 1, :] vslice = v[inner + 1:] alpha = norm(vslice) if alpha != 0: alpha = mysign(vslice[0]) * alpha # do not need the final reflector for future calculations if inner < (max_inner - 1): w[inner + 1:] = vslice w[inner + 1] += alpha w[:] = w / norm(w) # Apply new reflector to v # v = v - 2.0*w*(w.T*v) v[inner + 1] = -alpha v[inner + 2:] = 0.0 if inner > 0: # Apply all previous Givens Rotations to v amg_core.apply_givens(Q, v, dimen, inner) # Calculate the next Givens rotation, where j = inner Note that if # max_inner = dimen, then this is unnecessary for the last inner # iteration, when inner = dimen-1. Here we do not need to # calculate a Householder reflector or Givens rotation because # nnz(v) is already the desired length, i.e. we do not need to zero # anything out. if inner != dimen - 1: if v[inner + 1] != 0: [c, s] = rotg(v[inner], v[inner + 1]) Qblock = array([[c, s], [-conjugate(s), c]], dtype=xtype) Q[(inner * 4):((inner + 1) * 4)] = ravel(Qblock).copy() # Apply Givens Rotation to g, the RHS for the linear system # in the Krylov Subspace. Note that this dot does a matrix # multiply, not an actual dot product where a conjugate # transpose is taken g[inner:inner + 2] = dot(Qblock, g[inner:inner + 2]) # Apply effect of Givens Rotation to v v[inner] = dot(Qblock[0, :], v[inner:inner + 2]) v[inner + 1] = 0.0 # Write to upper Hessenberg Matrix, # the LHS for the linear system in the Krylov Subspace H[:, inner] = v[0:max_inner] niter += 1 # Don't update normr if last inner iteration, because # normr is calculated directly after this loop ends. if inner < max_inner - 1: normr = abs(g[inner + 1]) if normr < tol: break # Allow user access to residual if callback is not None: callback(normr) if keep_r: residuals.append(normr) # end inner loop, back to outer loop # Find best update to x in Krylov Space, V. Solve inner+1 x inner+1 # system. Apparently this is the best way to solve a triangular system # in the magical world of scipy #piv = arange(inner+1) #y = lu_solve((H[0:(inner+1), 0:(inner+1)], piv), g[0:(inner+1)], # trans=0) y = scipy.linalg.solve(H[0:(inner + 1), 0:(inner + 1)], g[0:(inner + 1)]) # Use Horner like Scheme to map solution, y, back to original space. # Note that we do not use the last reflector. update = zeros(x.shape, dtype=xtype) #for j in range(inner,-1,-1): # update[j] += y[j] # # Apply j-th reflector, (I - 2.0*w_j*w_j.T)*upadate # update -= 2.0*dot(conjugate(W[j,:]), update)*W[j,:] amg_core.householder_hornerscheme(update, ravel(W), ravel(y), dimen, inner, -1, -1) x[:] = x + update r = b - ravel(A * x) #Apply preconditioner r = ravel(M * r) normr = norm(r) ## Check for nan, inf #if isnan(r).any() or isinf(r).any(): # warn('inf or nan after application of preconditioner') # return(postprocess(x), -1) # Allow user access to residual if callback is not None: callback(normr) if keep_r: residuals.append(normr) # Has GMRES stagnated? indices = (x != 0) if indices.any(): change = max(abs(update[indices] / x[indices])) if change < 1e-12: # No change, halt return (postprocess(x), -1) # test for convergence if normr < tol: return (postprocess(x), 0) # end outer loop return (postprocess(x), niter)
def gcrotmk(A, b, x0=None, tol=1e-5, maxiter=1000, M=None, callback=None, m=20, k=None, CU=None, discard_C=False, truncate='oldest', atol=None): """ Solve a matrix equation using flexible GCROT(m,k) algorithm. Parameters ---------- A : {sparse matrix, ndarray, LinearOperator} The real or complex N-by-N matrix of the linear system. Alternatively, ``A`` can be a linear operator which can produce ``Ax`` using, e.g., ``scipy.sparse.linalg.LinearOperator``. b : ndarray Right hand side of the linear system. Has shape (N,) or (N,1). x0 : ndarray Starting guess for the solution. tol, atol : float, optional Tolerances for convergence, ``norm(residual) <= max(tol*norm(b), atol)``. The default for ``atol`` is `tol`. .. warning:: The default value for `atol` will be changed in a future release. For future compatibility, specify `atol` explicitly. maxiter : int, optional Maximum number of iterations. Iteration will stop after maxiter steps even if the specified tolerance has not been achieved. M : {sparse matrix, ndarray, LinearOperator}, optional Preconditioner for A. The preconditioner should approximate the inverse of A. gcrotmk is a 'flexible' algorithm and the preconditioner can vary from iteration to iteration. Effective preconditioning dramatically improves the rate of convergence, which implies that fewer iterations are needed to reach a given error tolerance. callback : function, optional User-supplied function to call after each iteration. It is called as callback(xk), where xk is the current solution vector. m : int, optional Number of inner FGMRES iterations per each outer iteration. Default: 20 k : int, optional Number of vectors to carry between inner FGMRES iterations. According to [2]_, good values are around m. Default: m CU : list of tuples, optional List of tuples ``(c, u)`` which contain the columns of the matrices C and U in the GCROT(m,k) algorithm. For details, see [2]_. The list given and vectors contained in it are modified in-place. If not given, start from empty matrices. The ``c`` elements in the tuples can be ``None``, in which case the vectors are recomputed via ``c = A u`` on start and orthogonalized as described in [3]_. discard_C : bool, optional Discard the C-vectors at the end. Useful if recycling Krylov subspaces for different linear systems. truncate : {'oldest', 'smallest'}, optional Truncation scheme to use. Drop: oldest vectors, or vectors with smallest singular values using the scheme discussed in [1,2]. See [2]_ for detailed comparison. Default: 'oldest' Returns ------- x : ndarray The solution found. info : int Provides convergence information: * 0 : successful exit * >0 : convergence to tolerance not achieved, number of iterations References ---------- .. [1] E. de Sturler, ''Truncation strategies for optimal Krylov subspace methods'', SIAM J. Numer. Anal. 36, 864 (1999). .. [2] J.E. Hicken and D.W. Zingg, ''A simplified and flexible variant of GCROT for solving nonsymmetric linear systems'', SIAM J. Sci. Comput. 32, 172 (2010). .. [3] M.L. Parks, E. de Sturler, G. Mackey, D.D. Johnson, S. Maiti, ''Recycling Krylov subspaces for sequences of linear systems'', SIAM J. Sci. Comput. 28, 1651 (2006). """ A, M, x, b, postprocess = make_system(A, M, x0, b) if not np.isfinite(b).all(): raise ValueError("RHS must contain only finite numbers") if truncate not in ('oldest', 'smallest'): raise ValueError("Invalid value for 'truncate': %r" % (truncate, )) if atol is None: warnings.warn( "scipy.sparse.linalg.gcrotmk called without specifying `atol`. " "The default value will change in the future. To preserve " "current behavior, set ``atol=tol``.", category=DeprecationWarning, stacklevel=2) atol = tol matvec = A.matvec psolve = M.matvec if CU is None: CU = [] if k is None: k = m axpy, dot, scal = None, None, None r = b - matvec(x) axpy, dot, scal, nrm2 = get_blas_funcs(['axpy', 'dot', 'scal', 'nrm2'], (x, r)) b_norm = nrm2(b) if b_norm == 0: x = b return (postprocess(x), 0) if discard_C: CU[:] = [(None, u) for c, u in CU] # Reorthogonalize old vectors if CU: # Sort already existing vectors to the front CU.sort(key=lambda cu: cu[0] is not None) # Fill-in missing ones C = np.empty((A.shape[0], len(CU)), dtype=r.dtype, order='F') us = [] j = 0 while CU: # More memory-efficient: throw away old vectors as we go c, u = CU.pop(0) if c is None: c = matvec(u) C[:, j] = c j += 1 us.append(u) # Orthogonalize Q, R, P = qr(C, overwrite_a=True, mode='economic', pivoting=True) del C # C := Q cs = list(Q.T) # U := U P R^-1, back-substitution new_us = [] for j in range(len(cs)): u = us[P[j]] for i in range(j): u = axpy(us[P[i]], u, u.shape[0], -R[i, j]) if abs(R[j, j]) < 1e-12 * abs(R[0, 0]): # discard rest of the vectors break u = scal(1.0 / R[j, j], u) new_us.append(u) # Form the new CU lists CU[:] = list(zip(cs, new_us))[::-1] if CU: axpy, dot = get_blas_funcs(['axpy', 'dot'], (r, )) # Solve first the projection operation with respect to the CU # vectors. This corresponds to modifying the initial guess to # be # # x' = x + U y # y = argmin_y || b - A (x + U y) ||^2 # # The solution is y = C^H (b - A x) for c, u in CU: yc = dot(c, r) x = axpy(u, x, x.shape[0], yc) r = axpy(c, r, r.shape[0], -yc) # GCROT main iteration for j_outer in range(maxiter): # -- callback if callback is not None: callback(x) beta = nrm2(r) # -- check stopping condition beta_tol = max(atol, tol * b_norm) if beta <= beta_tol and (j_outer > 0 or CU): # recompute residual to avoid rounding error r = b - matvec(x) beta = nrm2(r) if beta <= beta_tol: j_outer = -1 break ml = m + max(k - len(CU), 0) cs = [c for c, u in CU] try: Q, R, B, vs, zs, y, pres = _fgmres(matvec, r / beta, ml, rpsolve=psolve, atol=max(atol, tol * b_norm) / beta, cs=cs) y *= beta except LinAlgError: # Floating point over/underflow, non-finite result from # matmul etc. -- report failure. break # # At this point, # # [A U, A Z] = [C, V] G; G = [ I B ] # [ 0 H ] # # where [C, V] has orthonormal columns, and r = beta v_0. Moreover, # # || b - A (x + Z y + U q) ||_2 = || r - C B y - V H y - C q ||_2 = min! # # from which y = argmin_y || beta e_1 - H y ||_2, and q = -B y # # # GCROT(m,k) update # # Define new outer vectors # ux := (Z - U B) y ux = zs[0] * y[0] for z, yc in zip(zs[1:], y[1:]): ux = axpy(z, ux, ux.shape[0], yc) # ux += z*yc by = B.dot(y) for cu, byc in zip(CU, by): c, u = cu ux = axpy(u, ux, ux.shape[0], -byc) # ux -= u*byc # cx := V H y hy = Q.dot(R.dot(y)) cx = vs[0] * hy[0] for v, hyc in zip(vs[1:], hy[1:]): cx = axpy(v, cx, cx.shape[0], hyc) # cx += v*hyc # Normalize cx, maintaining cx = A ux # This new cx is orthogonal to the previous C, by construction try: alpha = 1 / nrm2(cx) if not np.isfinite(alpha): raise FloatingPointError() except (FloatingPointError, ZeroDivisionError): # Cannot update, so skip it continue cx = scal(alpha, cx) ux = scal(alpha, ux) # Update residual and solution gamma = dot(cx, r) r = axpy(cx, r, r.shape[0], -gamma) # r -= gamma*cx x = axpy(ux, x, x.shape[0], gamma) # x += gamma*ux # Truncate CU if truncate == 'oldest': while len(CU) >= k and CU: del CU[0] elif truncate == 'smallest': if len(CU) >= k and CU: # cf. [1,2] D = solve(R[:-1, :].T, B.T).T W, sigma, V = svd(D) # C := C W[:,:k-1], U := U W[:,:k-1] new_CU = [] for j, w in enumerate(W[:, :k - 1].T): c, u = CU[0] c = c * w[0] u = u * w[0] for cup, wp in zip(CU[1:], w[1:]): cp, up = cup c = axpy(cp, c, c.shape[0], wp) u = axpy(up, u, u.shape[0], wp) # Reorthogonalize at the same time; not necessary # in exact arithmetic, but floating point error # tends to accumulate here for cp, up in new_CU: alpha = dot(cp, c) c = axpy(cp, c, c.shape[0], -alpha) u = axpy(up, u, u.shape[0], -alpha) alpha = nrm2(c) c = scal(1.0 / alpha, c) u = scal(1.0 / alpha, u) new_CU.append((c, u)) CU[:] = new_CU # Add new vector to CU CU.append((cx, ux)) # Include the solution vector to the span CU.append((None, x.copy())) if discard_C: CU[:] = [(None, uz) for cz, uz in CU] return postprocess(x), j_outer + 1
def gmres_mgs(A, b, x0=None, tol=1e-5, restrt=None, maxiter=None, xtype=None, M=None, callback=None, residuals=None, reorth=False): ''' Generalized Minimum Residual Method (GMRES) GMRES iteratively refines the initial solution guess to the system Ax = b Modified Gram-Schmidt version Parameters ---------- A : {array, matrix, sparse matrix, LinearOperator} n x n, linear system to solve b : {array, matrix} right hand side, shape is (n,) or (n,1) x0 : {array, matrix} initial guess, default is a vector of zeros tol : float relative convergence tolerance, i.e. tol is scaled by the norm of the initial preconditioned residual restrt : {None, int} - if int, restrt is max number of inner iterations and maxiter is the max number of outer iterations - if None, do not restart GMRES, and max number of inner iterations is maxiter maxiter : {None, int} - if restrt is None, maxiter is the max number of inner iterations and GMRES does not restart - if restrt is int, maxiter is the max number of outer iterations, and restrt is the max number of inner iterations xtype : type dtype for the solution, default is automatic type detection M : {array, matrix, sparse matrix, LinearOperator} n x n, inverted preconditioner, i.e. solve M A x = M b. callback : function User-supplied function is called after each iteration as callback(xk), where xk is the current solution vector residuals : list residuals contains the preconditioned residual norm history, including the initial residual. reorth : boolean If True, then a check is made whether to re-orthogonalize the Krylov space each GMRES iteration Returns ------- (xNew, info) xNew : an updated guess to the solution of Ax = b info : halting status of gmres == ============================================= 0 successful exit >0 convergence to tolerance not achieved, return iteration count instead. This value is precisely the order of the Krylov space. <0 numerical breakdown, or illegal input == ============================================= Notes ----- - The LinearOperator class is in scipy.sparse.linalg.interface. Use this class if you prefer to define A or M as a mat-vec routine as opposed to explicitly constructing the matrix. A.psolve(..) is still supported as a legacy. - For robustness, modified Gram-Schmidt is used to orthogonalize the Krylov Space Givens Rotations are used to provide the residual norm each iteration Examples -------- >>> from pyamg.krylov import gmres >>> from pyamg.util.linalg import norm >>> import numpy as np >>> from pyamg.gallery import poisson >>> A = poisson((10,10)) >>> b = np.ones((A.shape[0],)) >>> (x,flag) = gmres(A,b, maxiter=2, tol=1e-8, orthog='mgs') >>> print norm(b - A*x) >>> 6.5428213057 References ---------- .. [1] Yousef Saad, "Iterative Methods for Sparse Linear Systems, Second Edition", SIAM, pp. 151-172, pp. 272-275, 2003 http://www-users.cs.umn.edu/~saad/books.html .. [2] C. T. Kelley, http://www4.ncsu.edu/~ctk/matlab_roots.html ''' # Convert inputs to linear system, with error checking A, M, x, b, postprocess = make_system(A, M, x0, b, xtype) dimen = A.shape[0] # Ensure that warnings are always reissued from this function import warnings warnings.filterwarnings('always', module='pyamg\.krylov\._gmres_mgs') # Choose type if not hasattr(A, 'dtype'): Atype = upcast(x.dtype, b.dtype) else: Atype = A.dtype if not hasattr(M, 'dtype'): Mtype = upcast(x.dtype, b.dtype) else: Mtype = M.dtype xtype = upcast(Atype, x.dtype, b.dtype, Mtype) if restrt is not None: restrt = int(restrt) if maxiter is not None: maxiter = int(maxiter) # Get fast access to underlying BLAS routines # dotc is the conjugate dot, dotu does no conjugation [lartg] = get_lapack_funcs(['lartg'], [x]) if np.iscomplexobj(np.zeros((1, ), dtype=xtype)): [axpy, dotu, dotc, scal] =\ get_blas_funcs(['axpy', 'dotu', 'dotc', 'scal'], [x]) else: # real type [axpy, dotu, dotc, scal] =\ get_blas_funcs(['axpy', 'dot', 'dot', 'scal'], [x]) # Make full use of direct access to BLAS by defining own norm def norm(z): return np.sqrt(np.real(dotc(z, z))) # Should norm(r) be kept if residuals == []: keep_r = True else: keep_r = False # Set number of outer and inner iterations if restrt: if maxiter: max_outer = maxiter else: max_outer = 1 if restrt > dimen: warn('Setting number of inner iterations (restrt) to maximum\ allowed, which is A.shape[0] ') restrt = dimen max_inner = restrt else: max_outer = 1 if maxiter > dimen: warn('Setting number of inner iterations (maxiter) to maximum\ allowed, which is A.shape[0] ') maxiter = dimen elif maxiter is None: maxiter = min(dimen, 40) max_inner = maxiter # Is this a one dimensional matrix? if dimen == 1: entry = np.ravel(A * np.array([1.0], dtype=xtype)) return (postprocess(b / entry), 0) # Prep for method r = b - np.ravel(A * x) # Apply preconditioner r = np.ravel(M * r) normr = norm(r) if keep_r: residuals.append(normr) # Check for nan, inf # if isnan(r).any() or isinf(r).any(): # warn('inf or nan after application of preconditioner') # return(postprocess(x), -1) # Check initial guess ( scaling by b, if b != 0, # must account for case when norm(b) is very small) normb = norm(b) if normb == 0.0: normb = 1.0 if normr < tol * normb: return (postprocess(x), 0) # Scale tol by ||r_0||_2, we use the preconditioned residual # because this is left preconditioned GMRES. if normr != 0.0: tol = tol * normr # Use separate variable to track iterations. If convergence fails, we # cannot simply report niter = (outer-1)*max_outer + inner. Numerical # error could cause the inner loop to halt while the actual ||r|| > tol. niter = 0 # Begin GMRES for outer in range(max_outer): # Preallocate for Givens Rotations, Hessenberg matrix and Krylov Space # Space required is O(dimen*max_inner). # NOTE: We are dealing with row-major matrices, so we traverse in a # row-major fashion, # i.e., H and V's transpose is what we store. Q = [] # Givens Rotations # Upper Hessenberg matrix, which is then # converted to upper tri with Givens Rots H = np.zeros((max_inner + 1, max_inner + 1), dtype=xtype) V = np.zeros((max_inner + 1, dimen), dtype=xtype) # Krylov Space # vs store the pointers to each column of V. # This saves a considerable amount of time. vs = [] # v = r/normr V[0, :] = scal(1.0 / normr, r) vs.append(V[0, :]) # This is the RHS vector for the problem in the Krylov Space g = np.zeros((dimen, ), dtype=xtype) g[0] = normr for inner in range(max_inner): # New Search Direction v = V[inner + 1, :] v[:] = np.ravel(M * (A * vs[-1])) vs.append(v) normv_old = norm(v) # Check for nan, inf # if isnan(V[inner+1, :]).any() or isinf(V[inner+1, :]).any(): # warn('inf or nan after application of preconditioner') # return(postprocess(x), -1) # Modified Gram Schmidt for k in range(inner + 1): vk = vs[k] alpha = dotc(vk, v) H[inner, k] = alpha v[:] = axpy(vk, v, dimen, -alpha) normv = norm(v) H[inner, inner + 1] = normv # Re-orthogonalize if (reorth is True) and (normv_old == normv_old + 0.001 * normv): for k in range(inner + 1): vk = vs[k] alpha = dotc(vk, v) H[inner, k] = H[inner, k] + alpha v[:] = axpy(vk, v, dimen, -alpha) # Check for breakdown if H[inner, inner + 1] != 0.0: v[:] = scal(1.0 / H[inner, inner + 1], v) # Apply previous Givens rotations to H if inner > 0: apply_givens(Q, H[inner, :], inner) # Calculate and apply next complex-valued Givens Rotation # ==> Note that if max_inner = dimen, then this is unnecessary # for the last inner # iteration, when inner = dimen-1. if inner != dimen - 1: if H[inner, inner + 1] != 0: [c, s, r] = lartg(H[inner, inner], H[inner, inner + 1]) Qblock = np.array([[c, s], [-np.conjugate(s), c]], dtype=xtype) Q.append(Qblock) # Apply Givens Rotation to g, # the RHS for the linear system in the Krylov Subspace. g[inner:inner + 2] = np.dot(Qblock, g[inner:inner + 2]) # Apply effect of Givens Rotation to H H[inner, inner] = dotu(Qblock[0, :], H[inner, inner:inner + 2]) H[inner, inner + 1] = 0.0 niter += 1 # Don't update normr if last inner iteration, because # normr is calculated directly after this loop ends. if inner < max_inner - 1: normr = np.abs(g[inner + 1]) if normr < tol: break # Allow user access to the iterates if callback is not None: callback(x) if keep_r: residuals.append(normr) # end inner loop, back to outer loop # Find best update to x in Krylov Space V. Solve inner x inner system. y = sp.linalg.solve(H[0:inner + 1, 0:inner + 1].T, g[0:inner + 1]) update = np.ravel(np.mat(V[:inner + 1, :]).T * y.reshape(-1, 1)) x = x + update r = b - np.ravel(A * x) # Apply preconditioner r = np.ravel(M * r) normr = norm(r) # Check for nan, inf # if isnan(r).any() or isinf(r).any(): # warn('inf or nan after application of preconditioner') # return(postprocess(x), -1) # Allow user access to the iterates if callback is not None: callback(x) if keep_r: residuals.append(normr) # Has GMRES stagnated? indices = (x != 0) if indices.any(): change = np.max(np.abs(update[indices] / x[indices])) if change < 1e-12: # No change, halt return (postprocess(x), -1) # test for convergence if normr < tol: return (postprocess(x), 0) # end outer loop return (postprocess(x), niter)
def gmres_mgs(A, b, x0=None, tol=1e-5, restrt=None, maxiter=None, M=None, callback=None, residuals=None, reorth=False): """Generalized Minimum Residual Method (GMRES) based on MGS. GMRES iteratively refines the initial solution guess to the system Ax = b. Modified Gram-Schmidt version. Left preconditioning, leading to preconditioned residuals. Parameters ---------- A : array, matrix, sparse matrix, LinearOperator n x n, linear system to solve b : array, matrix right hand side, shape is (n,) or (n,1) x0 : array, matrix initial guess, default is a vector of zeros tol : float Tolerance for stopping criteria, let r=r_k ||M r|| < tol ||M b|| if ||b||=0, then set ||M b||=1 for these tests. restrt : None, int - if int, restrt is max number of inner iterations and maxiter is the max number of outer iterations - if None, do not restart GMRES, and max number of inner iterations is maxiter maxiter : None, int - if restrt is None, maxiter is the max number of inner iterations and GMRES does not restart - if restrt is int, maxiter is the max number of outer iterations, and restrt is the max number of inner iterations - defaults to min(n,40) if restart=None M : array, matrix, sparse matrix, LinearOperator n x n, inverted preconditioner, i.e. solve M A x = M b. callback : function User-supplied function is called after each iteration as callback(xk), where xk is the current solution vector residuals : list preconditioned residual history in the 2-norm, including the initial preconditioned residual reorth : boolean If True, then a check is made whether to re-orthogonalize the Krylov space each GMRES iteration Returns ------- (xk, info) xk : an updated guess after k iterations to the solution of Ax = b info : halting status == ======================================= 0 successful exit >0 convergence to tolerance not achieved, return iteration count instead. <0 numerical breakdown, or illegal input == ======================================= Notes ----- The LinearOperator class is in scipy.sparse.linalg.interface. Use this class if you prefer to define A or M as a mat-vec routine as opposed to explicitly constructing the matrix. For robustness, modified Gram-Schmidt is used to orthogonalize the Krylov Space Givens Rotations are used to provide the residual norm each iteration The residual is the *preconditioned* residual. Examples -------- >>> from pyamg.krylov import gmres >>> from pyamg.util.linalg import norm >>> import numpy as np >>> from pyamg.gallery import poisson >>> A = poisson((10,10)) >>> b = np.ones((A.shape[0],)) >>> (x,flag) = gmres(A,b, maxiter=2, tol=1e-8, orthog='mgs') >>> print(f'{norm(b - A*x):.6}') 6.54282 References ---------- .. [1] Yousef Saad, "Iterative Methods for Sparse Linear Systems, Second Edition", SIAM, pp. 151-172, pp. 272-275, 2003 http://www-users.cs.umn.edu/~saad/books.html .. [2] C. T. Kelley, http://www4.ncsu.edu/~ctk/matlab_roots.html """ # Convert inputs to linear system, with error checking A, M, x, b, postprocess = make_system(A, M, x0, b) n = A.shape[0] # Ensure that warnings are always reissued from this function warnings.filterwarnings('always', module='pyamg.krylov._gmres_mgs') # Get fast access to underlying BLAS routines # dotc is the conjugate dot, dotu does no conjugation [lartg] = get_lapack_funcs(['lartg'], [x]) if np.iscomplexobj(np.zeros((1, ), dtype=x.dtype)): [axpy, dotu, dotc, scal] =\ get_blas_funcs(['axpy', 'dotu', 'dotc', 'scal'], [x]) else: # real type [axpy, dotu, dotc, scal] =\ get_blas_funcs(['axpy', 'dot', 'dot', 'scal'], [x]) # Set number of outer and inner iterations # If no restarts, # then set max_inner=maxiter and max_outer=n # If restarts are set, # then set max_inner=restart and max_outer=maxiter if restrt: if maxiter: max_outer = maxiter else: max_outer = 1 if restrt > n: warn('Setting restrt to maximum allowed, n.') restrt = n max_inner = restrt else: max_outer = 1 if maxiter > n: warn('Setting maxiter to maximum allowed, n.') maxiter = n elif maxiter is None: maxiter = min(n, 40) max_inner = maxiter # Is this a one dimensional matrix? if n == 1: entry = np.ravel(A @ np.array([1.0], dtype=x.dtype)) return (postprocess(b / entry), 0) # Prep for method r = b - A @ x # Apply preconditioner r = M @ r normr = norm(r) if residuals is not None: residuals[:] = [normr] # initial residual # Check initial guess if b != 0, normb = norm(b) if normb == 0.0: normMb = 1.0 # reset so that tol is unscaled else: normMb = norm(M @ b) # set the stopping criteria (see the docstring) if normr < tol * normMb: return (postprocess(x), 0) # Use separate variable to track iterations. If convergence fails, we # cannot simply report niter = (outer-1)*max_outer + inner. Numerical # error could cause the inner loop to halt while the actual ||r|| > tolerance. niter = 0 # Begin GMRES for _outer in range(max_outer): # Preallocate for Givens Rotations, Hessenberg matrix and Krylov Space # Space required is O(n*max_inner). # NOTE: We are dealing with row-major matrices, so we traverse in a # row-major fashion, # i.e., H and V's transpose is what we store. Q = [] # Givens Rotations # Upper Hessenberg matrix, which is then # converted to upper tri with Givens Rots H = np.zeros((max_inner + 1, max_inner + 1), dtype=x.dtype) V = np.zeros((max_inner + 1, n), dtype=x.dtype) # Krylov Space # vs store the pointers to each column of V. # This saves a considerable amount of time. vs = [] # v = r/normr V[0, :] = scal(1.0 / normr, r) vs.append(V[0, :]) # This is the RHS vector for the problem in the Krylov Space g = np.zeros((n, ), dtype=x.dtype) g[0] = normr for inner in range(max_inner): # New Search Direction v = V[inner + 1, :] v[:] = np.ravel(M @ (A @ vs[-1])) vs.append(v) normv_old = norm(v) # Modified Gram Schmidt for k in range(inner + 1): vk = vs[k] alpha = dotc(vk, v) H[inner, k] = alpha v[:] = axpy(vk, v, n, -alpha) normv = norm(v) H[inner, inner + 1] = normv # Re-orthogonalize if (reorth is True) and (normv_old == normv_old + 0.001 * normv): for k in range(inner + 1): vk = vs[k] alpha = dotc(vk, v) H[inner, k] = H[inner, k] + alpha v[:] = axpy(vk, v, n, -alpha) # Check for breakdown if H[inner, inner + 1] != 0.0: v[:] = scal(1.0 / H[inner, inner + 1], v) # Apply previous Givens rotations to H if inner > 0: apply_givens(Q, H[inner, :], inner) # Calculate and apply next complex-valued Givens Rotation # for the last inner iteration, when inner = n-1. # ==> Note that if max_inner = n, then this is unnecessary if inner != n - 1: if H[inner, inner + 1] != 0: [c, s, r] = lartg(H[inner, inner], H[inner, inner + 1]) Qblock = np.array([[c, s], [-np.conjugate(s), c]], dtype=x.dtype) Q.append(Qblock) # Apply Givens Rotation to g, # the RHS for the linear system in the Krylov Subspace. g[inner:inner + 2] = np.dot(Qblock, g[inner:inner + 2]) # Apply effect of Givens Rotation to H H[inner, inner] = dotu(Qblock[0, :], H[inner, inner:inner + 2]) H[inner, inner + 1] = 0.0 niter += 1 # Do not update normr if last inner iteration, because # normr is calculated directly after this loop ends. if inner < max_inner - 1: normr = np.abs(g[inner + 1]) if normr < tol * normMb: break if residuals is not None: residuals.append(normr) if callback is not None: y = sp.linalg.solve(H[0:inner + 1, 0:inner + 1].T, g[0:inner + 1]) update = np.ravel(V[:inner + 1, :].T.dot(y.reshape(-1, 1))) callback(x + update) # end inner loop, back to outer loop # Find best update to x in Krylov Space V. Solve inner x inner system. y = sp.linalg.solve(H[0:inner + 1, 0:inner + 1].T, g[0:inner + 1]) update = np.ravel(V[:inner + 1, :].T.dot(y.reshape(-1, 1))) x = x + update r = b - A @ x # Apply preconditioner r = M @ r normr = norm(r) # Allow user access to the iterates if callback is not None: callback(x) if residuals is not None: residuals.append(normr) # Has GMRES stagnated? indices = (x != 0) if indices.any(): change = np.max(np.abs(update[indices] / x[indices])) if change < 1e-12: # No change, halt return (postprocess(x), -1) # test for convergence if normr < tol * normMb: return (postprocess(x), 0) # end outer loop return (postprocess(x), niter)
def _fgmres(matvec, v0, m, atol, lpsolve=None, rpsolve=None, cs=(), outer_v=(), prepend_outer_v=False): """ FGMRES Arnoldi process, with optional projection or augmentation Parameters ---------- matvec : callable Operation A*x v0 : ndarray Initial vector, normalized to nrm2(v0) == 1 m : int Number of GMRES rounds atol : float Absolute tolerance for early exit lpsolve : callable Left preconditioner L rpsolve : callable Right preconditioner R CU : list of (ndarray, ndarray) Columns of matrices C and U in GCROT outer_v : list of ndarrays Augmentation vectors in LGMRES prepend_outer_v : bool, optional Whether augmentation vectors come before or after Krylov iterates Raises ------ LinAlgError If nans encountered Returns ------- Q, R : ndarray QR decomposition of the upper Hessenberg H=QR B : ndarray Projections corresponding to matrix C vs : list of ndarray Columns of matrix V zs : list of ndarray Columns of matrix Z y : ndarray Solution to ||H y - e_1||_2 = min! res : float The final (preconditioned) residual norm """ if lpsolve is None: lpsolve = lambda x: x if rpsolve is None: rpsolve = lambda x: x axpy, dot, scal, nrm2 = get_blas_funcs(['axpy', 'dot', 'scal', 'nrm2'], (v0, )) vs = [v0] zs = [] y = None res = np.nan m = m + len(outer_v) # Orthogonal projection coefficients B = np.zeros((len(cs), m), dtype=v0.dtype) # H is stored in QR factorized form Q = np.ones((1, 1), dtype=v0.dtype) R = np.zeros((1, 0), dtype=v0.dtype) eps = np.finfo(v0.dtype).eps breakdown = False # FGMRES Arnoldi process for j in range(m): # L A Z = C B + V H if prepend_outer_v and j < len(outer_v): z, w = outer_v[j] elif prepend_outer_v and j == len(outer_v): z = rpsolve(v0) w = None elif not prepend_outer_v and j >= m - len(outer_v): z, w = outer_v[j - (m - len(outer_v))] else: z = rpsolve(vs[-1]) w = None if w is None: w = lpsolve(matvec(z)) else: # w is clobbered below w = w.copy() w_norm = nrm2(w) # GCROT projection: L A -> (1 - C C^H) L A # i.e. orthogonalize against C for i, c in enumerate(cs): alpha = dot(c, w) B[i, j] = alpha w = axpy(c, w, c.shape[0], -alpha) # w -= alpha*c # Orthogonalize against V hcur = np.zeros(j + 2, dtype=Q.dtype) for i, v in enumerate(vs): alpha = dot(v, w) hcur[i] = alpha w = axpy(v, w, v.shape[0], -alpha) # w -= alpha*v hcur[i + 1] = nrm2(w) with np.errstate(over='ignore', divide='ignore'): # Careful with denormals alpha = 1 / hcur[-1] if np.isfinite(alpha): w = scal(alpha, w) if not (hcur[-1] > eps * w_norm): # w essentially in the span of previous vectors, # or we have nans. Bail out after updating the QR # solution. breakdown = True vs.append(w) zs.append(z) # Arnoldi LSQ problem # Add new column to H=Q@R, padding other columns with zeros Q2 = np.zeros((j + 2, j + 2), dtype=Q.dtype, order='F') Q2[:j + 1, :j + 1] = Q Q2[j + 1, j + 1] = 1 R2 = np.zeros((j + 2, j), dtype=R.dtype, order='F') R2[:j + 1, :] = R Q, R = qr_insert(Q2, R2, hcur, j, which='col', overwrite_qru=True, check_finite=False) # Transformed least squares problem # || Q R y - inner_res_0 * e_1 ||_2 = min! # Since R = [R'; 0], solution is y = inner_res_0 (R')^{-1} (Q^H)[:j,0] # Residual is immediately known res = abs(Q[0, -1]) # Check for termination if res < atol or breakdown: break if not np.isfinite(R[j, j]): # nans encountered, bail out raise LinAlgError() # -- Get the LSQ problem solution # The problem is triangular, but the condition number may be # bad (or in case of breakdown the last diagonal entry may be # zero), so use lstsq instead of trtrs. y, _, _, _, = lstsq(R[:j + 1, :j + 1], Q[0, :j + 1].conj()) B = B[:, :j + 1] return Q, R, B, vs, zs, y, res
def gmres_mgs(surf_array, field_array, X, b, param, ind0, timing, kernel): """ GMRES solver. Arguments ---------- surf_array : array, contains the surface classes of each region on the surface. field_array: array, contains the Field classes of each region on the surface. X : array, initial guess. b : array, right hand side. param : class, parameters related to the surface. ind0 : class, it contains the indices related to the treecode computation. timing : class, it contains timing information for different parts of the code. kernel : pycuda source module. Returns -------- X : array, an updated guess to the solution. iteration : int, number of outer iterations for convergence References ---------- .. [1] Yousef Saad, "Iterative Methods for Sparse Linear Systems, Second Edition", SIAM, pp. 151-172, pp. 272-275, 2003 http://www-users.cs.umn.edu/~saad/books.html .. [2] C. T. Kelley, http://www4.ncsu.edu/~ctk/matlab_roots.html """ # Defining xtype as dtype of the problem, to decide which BLAS functions # import. xtype = upcast(X.dtype, b.dtype) # Get fast access to underlying BLAS routines # dotc is the conjugate dot, dotu does no conjugation [lartg] = get_lapack_funcs(['lartg'], [X]) if numpy.iscomplexobj(numpy.zeros((1, ), dtype=xtype)): [axpy, dotu, dotc, scal] =\ get_blas_funcs(['axpy', 'dotu', 'dotc', 'scal'], [X]) else: # real type [axpy, dotu, dotc, scal] =\ get_blas_funcs(['axpy', 'dot', 'dot', 'scal'], [X]) # Make full use of direct access to BLAS by defining own norm def norm(z): return numpy.sqrt(numpy.real(dotc(z, z))) # Defining dimension dimen = len(X) max_iter = param.max_iter R = param.restart tol = param.tol # Set number of outer and inner iterations if R > dimen: warn('Setting number of inner iterations (restrt) to maximum\ allowed, which is A.shape[0] ') R = dimen max_inner = R #max_outer should be max_iter/max_inner but this might not be an integer #so we get the ceil of the division. #In the inner loop there is a if statement to break in case max_iter is #reached. max_outer = int(numpy.ceil(max_iter / max_inner)) # Prep for method aux = gmres_dot(X, surf_array, field_array, ind0, param, timing, kernel) r = b - aux normr = norm(r) # Check initial guess ( scaling by b, if b != 0, must account for # case when norm(b) is very small) normb = norm(b) if normb == 0.0: normb = 1.0 if normr < tol * normb: return X, 0 iteration = 0 # Here start the GMRES for outer in range(max_outer): # Preallocate for Givens Rotations, Hessenberg matrix and Krylov Space # Space required is O(dimen*max_inner). # NOTE: We are dealing with row-major matrices, so we traverse in a # row-major fashion, # i.e., H and V's transpose is what we store. Q = [] # Initialzing Givens Rotations # Upper Hessenberg matrix, which is then # converted to upper triagonal with Givens Rotations H = numpy.zeros((max_inner + 1, max_inner + 1), dtype=xtype) V = numpy.zeros((max_inner + 1, dimen), dtype=xtype) # Krylov space # vs store the pointers to each column of V. # This saves a considerable amount of time. vs = [] # v = r/normr V[0, :] = scal(1.0 / normr, r) # scal wrapper of dscal --> x = a*x vs.append(V[0, :]) #Saving initial residual to be used to calculate the rel_resid if iteration == 0: res_0 = normb #RHS vector in the Krylov space g = numpy.zeros((dimen, ), dtype=xtype) g[0] = normr for inner in range(max_inner): #New search direction v = V[inner + 1, :] v[:] = gmres_dot(vs[-1], surf_array, field_array, ind0, param, timing, kernel) vs.append(v) #Modified Gram Schmidt for k in range(inner + 1): vk = vs[k] alpha = dotc(vk, v) H[inner, k] = alpha v[:] = axpy(vk, v, dimen, -alpha) # y := a*x + y #axpy is a wrapper for daxpy (blas function) normv = norm(v) H[inner, inner + 1] = normv #Check for breakdown if H[inner, inner + 1] != 0.0: v[:] = scal(1.0 / H[inner, inner + 1], v) #Apply for Givens rotations to H if inner > 0: apply_givens(Q, H[inner, :], inner) #Calculate and apply next complex-valued Givens rotations #If max_inner = dimen, we don't need to calculate, this #is unnecessary for the last inner iteration when inner = dimen -1 if inner != dimen - 1: if H[inner, inner + 1] != 0: #lartg is a lapack function that computes the parameters #for a Givens rotation [c, s, _] = lartg(H[inner, inner], H[inner, inner + 1]) Qblock = numpy.array([[c, s], [-numpy.conjugate(s), c]], dtype=xtype) Q.append(Qblock) #Apply Givens Rotations to RHS for the linear system in # the krylov space. g[inner:inner + 2] = scipy.dot(Qblock, g[inner:inner + 2]) #Apply Givens rotations to H H[inner, inner] = dotu(Qblock[0, :], H[inner, inner:inner + 2]) H[inner, inner + 1] = 0.0 iteration += 1 if inner < max_inner - 1: normr = abs(g[inner + 1]) rel_resid = normr / res_0 if rel_resid < tol: break if iteration % 1 == 0: print('Iteration: {}, relative residual: {}'.format( iteration, rel_resid)) if (inner + 1 == R): print('Residual: {}. Restart...'.format(rel_resid)) if iteration == max_iter: print( 'Warning!!!!' 'You have reached the maximum number of iterations : {}.'. format(iteration)) print( 'The run will stop. Check the residual behaviour you might have a bug.' 'For future runs you might consider changing the tolerance or' ' increasing the number of max_iter.') break # end inner loop, back to outer loop # Find best update to X in Krylov Space V. Solve inner X inner system. y = scipy.linalg.solve(H[0:inner + 1, 0:inner + 1].T, g[0:inner + 1]) update = numpy.ravel(scipy.mat(V[:inner + 1, :]).T * y.reshape(-1, 1)) X = X + update aux = gmres_dot(X, surf_array, field_array, ind0, param, timing, kernel) r = b - aux normr = norm(r) rel_resid = normr / res_0 # test for convergence if rel_resid < tol: print('GMRES solve') print('Converged after {} iterations to a residual of {}'.format( iteration, rel_resid)) print('Time weight vector: {}'.format(timing.time_mass)) print('Time sort : {}'.format(timing.time_sort)) print('Time data transfer: {}'.format(timing.time_trans)) print('Time P2M : {}'.format(timing.time_P2M)) print('Time M2M : {}'.format(timing.time_M2M)) print('Time M2P : {}'.format(timing.time_M2P)) print('Time P2P : {}'.format(timing.time_P2P)) print('\tTime analy: {}'.format(timing.time_an)) return X, iteration #end outer loop return X, iteration
def test_fast_dot(): """Check fast dot blas wrapper function""" if fast_dot is np.dot: return rng = np.random.RandomState(42) A = rng.random_sample([2, 10]) B = rng.random_sample([2, 10]) try: linalg.get_blas_funcs(['gemm'])[0] has_blas = True except (AttributeError, ValueError): has_blas = False if has_blas: # Test _fast_dot for invalid input. # Maltyped data. for dt1, dt2 in [['f8', 'f4'], ['i4', 'i4']]: assert_raises(ValueError, _fast_dot, A.astype(dt1), B.astype(dt2).T) # Malformed data. ## ndim == 0 E = np.empty(0) assert_raises(ValueError, _fast_dot, E, E) ## ndim == 1 assert_raises(ValueError, _fast_dot, A, A[0]) ## ndim > 2 assert_raises(ValueError, _fast_dot, A.T, np.array([A, A])) ## min(shape) == 1 assert_raises(ValueError, _fast_dot, A, A[0, :][None, :]) # test for matrix mismatch error assert_raises(ValueError, _fast_dot, A, A) # Test cov-like use case + dtypes. for dtype in ['f8', 'f4']: A = A.astype(dtype) B = B.astype(dtype) # col < row C = np.dot(A.T, A) C_ = fast_dot(A.T, A) assert_almost_equal(C, C_, decimal=5) C = np.dot(A.T, B) C_ = fast_dot(A.T, B) assert_almost_equal(C, C_, decimal=5) C = np.dot(A, B.T) C_ = fast_dot(A, B.T) assert_almost_equal(C, C_, decimal=5) # Test square matrix * rectangular use case. A = rng.random_sample([2, 2]) for dtype in ['f8', 'f4']: A = A.astype(dtype) B = B.astype(dtype) C = np.dot(A, B) C_ = fast_dot(A, B) assert_almost_equal(C, C_, decimal=5) C = np.dot(A.T, B) C_ = fast_dot(A.T, B) assert_almost_equal(C, C_, decimal=5) if has_blas: for x in [np.array([[d] * 10] * 2) for d in [np.inf, np.nan]]: assert_raises(ValueError, _fast_dot, x, x.T)
def _update_dict(dictionary, Y, code, verbose=False, return_r2=False, random_state=None): """Update the dense dictionary factor in place. Parameters ---------- dictionary: array of shape (n_features, n_atoms) Value of the dictionary at the previous iteration. Y: array of shape (n_features, n_samples) Data matrix. code: array of shape (n_atoms, n_samples) Sparse coding of the data against which to optimize the dictionary. verbose: Degree of output the procedure will print. return_r2: bool Whether to compute and return the residual sum of squares corresponding to the computed solution. random_state: int or RandomState Pseudo number generator state used for random sampling. Returns ------- dictionary: array of shape (n_features, n_atoms) Updated dictionary. """ n_atoms = len(code) n_samples = Y.shape[0] random_state = check_random_state(random_state) # Residuals, computed 'in-place' for efficiency R = -np.dot(dictionary, code) R += Y R = np.asfortranarray(R) ger, = linalg.get_blas_funcs(('ger', ), (dictionary, code)) for k in xrange(n_atoms): # R <- 1.0 * U_k * V_k^T + R R = ger(1.0, dictionary[:, k], code[k, :], a=R, overwrite_a=True) dictionary[:, k] = np.dot(R, code[k, :].T) # Scale k'th atom atom_norm_square = np.dot(dictionary[:, k], dictionary[:, k]) if atom_norm_square < 1e-20: if verbose == 1: sys.stdout.write("+") sys.stdout.flush() elif verbose: print "Adding new random atom" dictionary[:, k] = random_state.randn(n_samples) # Setting corresponding coefs to 0 code[k, :] = 0.0 dictionary[:, k] /= sqrt(np.dot(dictionary[:, k], dictionary[:, k])) else: dictionary[:, k] /= sqrt(atom_norm_square) # R <- -1.0 * U_k * V_k^T + R R = ger(-1.0, dictionary[:, k], code[k, :], a=R, overwrite_a=True) if return_r2: R **= 2 # R is fortran-ordered. For numpy version < 1.6, sum does not # follow the quick striding first, and is thus inefficient on # fortran ordered data. We take a flat view of the data with no # striding R = as_strided(R, shape=(R.size, ), strides=(R.dtype.itemsize, )) R = np.sum(R) return dictionary, R return dictionary
def _gram_omp(Gram, Xy, n_nonzero_coefs, tol_0=None, tol=None, copy_Gram=True, copy_Xy=True): """Orthogonal Matching Pursuit step on a precomputed Gram matrix. This function uses the the Cholesky decomposition method. Parameters: ----------- Gram: array, shape = (n_features, n_features) Gram matrix of the input data matrix Xy: array, shape = (n_features,) Input targets n_nonzero_coefs: int Targeted number of non-zero elements tol_0: float Squared norm of y, required if tol is not None. tol: float Targeted squared error, if not None overrides n_nonzero_coefs. copy_Gram: bool, optional Whether the gram matrix must be copied by the algorithm. A false value is only helpful if it is already Fortran-ordered, otherwise a copy is made anyway. copy_Xy: bool, optional Whether the covariance vector Xy must be copied by the algorithm. If False, it may be overwritten. Returns: -------- gamma: array, shape = (n_nonzero_coefs,) Non-zero elements of the solution idx: array, shape = (n_nonzero_coefs,) Indices of the positions of the elements in gamma within the solution vector """ Gram = Gram.copy('F') if copy_Gram else np.asfortranarray(Gram) if copy_Xy: Xy = Xy.copy() min_float = np.finfo(Gram.dtype).eps nrm2, swap = linalg.get_blas_funcs(('nrm2', 'swap'), (Gram, )) potrs, = get_lapack_funcs(('potrs', ), (Gram, )) indices = range(len(Gram)) # keeping track of swapping alpha = Xy tol_curr = tol_0 delta = 0 gamma = np.empty(0) n_active = 0 max_features = len(Gram) if tol is not None else n_nonzero_coefs L = np.empty((max_features, max_features), dtype=Gram.dtype) L[0, 0] = 1. while True: lam = np.argmax(np.abs(alpha)) if lam < n_active or alpha[lam]**2 < min_float: # selected same atom twice, or inner product too small warn(premature) break if n_active > 0: L[n_active, :n_active] = Gram[lam, :n_active] solve_triangular(L[:n_active, :n_active], L[n_active, :n_active]) v = nrm2(L[n_active, :n_active])**2 if 1 - v <= min_float: # selected atoms are dependent warn(premature) break L[n_active, n_active] = np.sqrt(1 - v) Gram[n_active], Gram[lam] = swap(Gram[n_active], Gram[lam]) Gram.T[n_active], Gram.T[lam] = swap(Gram.T[n_active], Gram.T[lam]) indices[n_active], indices[lam] = indices[lam], indices[n_active] Xy[n_active], Xy[lam] = Xy[lam], Xy[n_active] n_active += 1 # solves LL'x = y as a composition of two triangular systems gamma, _ = potrs(L[:n_active, :n_active], Xy[:n_active], lower=True, overwrite_b=False) beta = np.dot(Gram[:, :n_active], gamma) alpha = Xy - beta if tol is not None: tol_curr += delta delta = np.inner(gamma, beta[:n_active]) tol_curr -= delta if tol_curr <= tol: break elif n_active == max_features: break return gamma, indices[:n_active]
def _gram_omp(Gram, Xy, n_nonzero_coefs, eps_0=None, eps=None, overwrite_gram=False, overwrite_Xy=False): """ Solves a single Orthogonal Matching Pursuit problem using the Cholesky decomposition, based on the Gram matrix and more precomputations. Parameters: ----------- Gram: array, shape = (n_features, n_features) Gram matrix of the input data matrix Xy: array, shape = (n_features,) Input targets n_nonzero_coefs: int Targeted number of non-zero elements eps_0: float Squared norm of y, required if eps is not None. eps: float Targeted squared error, if not None overrides n_nonzero_coefs. overwrite_gram: bool, Whether the gram matrix can be overwritten by the algorithm. This is only helpful if it is already Fortran-ordered, otherwise a copy is made anyway. overwrite_xy: bool, Whether the covariance vector Xy can be overwritten by the algorithm. Returns: -------- gamma: array, shape = (n_nonzero_coefs,) Non-zero elements of the solution idx: array, shape = (n_nonzero_coefs,) Indices of the positions of the elements in gamma within the solution vector """ if not overwrite_gram: Gram = Gram.copy('F') else: Gram = np.asfortranarray(Gram) if not overwrite_Xy: Xy = Xy.copy() min_float = np.finfo(Gram.dtype).eps nrm2, swap = linalg.get_blas_funcs(('nrm2', 'swap'), (Gram, )) potrs, = get_lapack_funcs(('potrs', ), (Gram, )) idx = [] alpha = Xy eps_curr = eps_0 delta = 0 n_active = 0 max_features = len(Gram) if eps is not None else n_nonzero_coefs L = np.empty((max_features, max_features), dtype=Gram.dtype) L[0, 0] = 1. while True: lam = np.argmax(np.abs(alpha)) if lam < n_active or alpha[lam]**2 < min_float: # selected same atom twice, or inner product too small warn(premature) break if n_active > 0: L[n_active, :n_active] = Gram[lam, :n_active] solve_triangular(L[:n_active, :n_active], L[n_active, :n_active]) v = nrm2(L[n_active, :n_active])**2 if 1 - v <= min_float: # selected atoms are dependent warn(premature) break L[n_active, n_active] = np.sqrt(1 - v) idx.append(lam) Gram[n_active], Gram[lam] = swap(Gram[n_active], Gram[lam]) Gram.T[n_active], Gram.T[lam] = swap(Gram.T[n_active], Gram.T[lam]) Xy[n_active], Xy[lam] = Xy[lam], Xy[n_active] n_active += 1 # solves LL'x = y as a composition of two triangular systems gamma, _ = potrs(L[:n_active, :n_active], Xy[:n_active], lower=True, overwrite_b=False) beta = np.dot(Gram[:, :n_active], gamma) alpha = Xy - beta if eps is not None: eps_curr += delta delta = np.inner(gamma, beta[:n_active]) eps_curr -= delta if eps_curr <= eps: break elif n_active == max_features: break return gamma, idx
def lgmres(A, b, x0=None, tol=1e-5, maxiter=1000, M=None, callback=None, inner_m=30, outer_k=3, outer_v=None, store_outer_Av=True, prepend_outer_v=False, atol=None): """ Solve a matrix equation using the LGMRES algorithm. The LGMRES algorithm [1]_ [2]_ is designed to avoid some problems in the convergence in restarted GMRES, and often converges in fewer iterations. Parameters ---------- A : {sparse matrix, ndarray, LinearOperator} The real or complex N-by-N matrix of the linear system. Alternatively, ``A`` can be a linear operator which can produce ``Ax`` using, e.g., ``scipy.sparse.linalg.LinearOperator``. b : ndarray Right hand side of the linear system. Has shape (N,) or (N,1). x0 : ndarray Starting guess for the solution. tol, atol : float, optional Tolerances for convergence, ``norm(residual) <= max(tol*norm(b), atol)``. The default for ``atol`` is `tol`. .. warning:: The default value for `atol` will be changed in a future release. For future compatibility, specify `atol` explicitly. maxiter : int, optional Maximum number of iterations. Iteration will stop after maxiter steps even if the specified tolerance has not been achieved. M : {sparse matrix, ndarray, LinearOperator}, optional Preconditioner for A. The preconditioner should approximate the inverse of A. Effective preconditioning dramatically improves the rate of convergence, which implies that fewer iterations are needed to reach a given error tolerance. callback : function, optional User-supplied function to call after each iteration. It is called as callback(xk), where xk is the current solution vector. inner_m : int, optional Number of inner GMRES iterations per each outer iteration. outer_k : int, optional Number of vectors to carry between inner GMRES iterations. According to [1]_, good values are in the range of 1...3. However, note that if you want to use the additional vectors to accelerate solving multiple similar problems, larger values may be beneficial. outer_v : list of tuples, optional List containing tuples ``(v, Av)`` of vectors and corresponding matrix-vector products, used to augment the Krylov subspace, and carried between inner GMRES iterations. The element ``Av`` can be `None` if the matrix-vector product should be re-evaluated. This parameter is modified in-place by `lgmres`, and can be used to pass "guess" vectors in and out of the algorithm when solving similar problems. store_outer_Av : bool, optional Whether LGMRES should store also A@v in addition to vectors `v` in the `outer_v` list. Default is True. prepend_outer_v : bool, optional Whether to put outer_v augmentation vectors before Krylov iterates. In standard LGMRES, prepend_outer_v=False. Returns ------- x : ndarray The converged solution. info : int Provides convergence information: - 0 : successful exit - >0 : convergence to tolerance not achieved, number of iterations - <0 : illegal input or breakdown Notes ----- The LGMRES algorithm [1]_ [2]_ is designed to avoid the slowing of convergence in restarted GMRES, due to alternating residual vectors. Typically, it often outperforms GMRES(m) of comparable memory requirements by some measure, or at least is not much worse. Another advantage in this algorithm is that you can supply it with 'guess' vectors in the `outer_v` argument that augment the Krylov subspace. If the solution lies close to the span of these vectors, the algorithm converges faster. This can be useful if several very similar matrices need to be inverted one after another, such as in Newton-Krylov iteration where the Jacobian matrix often changes little in the nonlinear steps. References ---------- .. [1] A.H. Baker and E.R. Jessup and T. Manteuffel, "A Technique for Accelerating the Convergence of Restarted GMRES", SIAM J. Matrix Anal. Appl. 26, 962 (2005). .. [2] A.H. Baker, "On Improving the Performance of the Linear Solver restarted GMRES", PhD thesis, University of Colorado (2003). Examples -------- >>> from scipy.sparse import csc_matrix >>> from scipy.sparse.linalg import lgmres >>> A = csc_matrix([[3, 2, 0], [1, -1, 0], [0, 5, 1]], dtype=float) >>> b = np.array([2, 4, -1], dtype=float) >>> x, exitCode = lgmres(A, b) >>> print(exitCode) # 0 indicates successful convergence 0 >>> np.allclose(A.dot(x), b) True """ A, M, x, b, postprocess = make_system(A, M, x0, b) if not np.isfinite(b).all(): raise ValueError("RHS must contain only finite numbers") if atol is None: warnings.warn( "scipy.sparse.linalg.lgmres called without specifying `atol`. " "The default value will change in the future. To preserve " "current behavior, set ``atol=tol``.", category=DeprecationWarning, stacklevel=2) atol = tol matvec = A.matvec psolve = M.matvec if outer_v is None: outer_v = [] axpy, dot, scal = None, None, None nrm2 = get_blas_funcs('nrm2', [b]) b_norm = nrm2(b) if b_norm == 0: x = b return (postprocess(x), 0) ptol_max_factor = 1.0 for k_outer in range(maxiter): r_outer = matvec(x) - b # -- callback if callback is not None: callback(x) # -- determine input type routines if axpy is None: if np.iscomplexobj(r_outer) and not np.iscomplexobj(x): x = x.astype(r_outer.dtype) axpy, dot, scal, nrm2 = get_blas_funcs( ['axpy', 'dot', 'scal', 'nrm2'], (x, r_outer)) # -- check stopping condition r_norm = nrm2(r_outer) if r_norm <= max(atol, tol * b_norm): break # -- inner LGMRES iteration v0 = -psolve(r_outer) inner_res_0 = nrm2(v0) if inner_res_0 == 0: rnorm = nrm2(r_outer) raise RuntimeError("Preconditioner returned a zero vector; " "|v| ~ %.1g, |M v| = 0" % rnorm) v0 = scal(1.0 / inner_res_0, v0) ptol = min(ptol_max_factor, max(atol, tol * b_norm) / r_norm) try: Q, R, B, vs, zs, y, pres = _fgmres(matvec, v0, inner_m, lpsolve=psolve, atol=ptol, outer_v=outer_v, prepend_outer_v=prepend_outer_v) y *= inner_res_0 if not np.isfinite(y).all(): # Overflow etc. in computation. There's no way to # recover from this, so we have to bail out. raise LinAlgError() except LinAlgError: # Floating point over/underflow, non-finite result from # matmul etc. -- report failure. return postprocess(x), k_outer + 1 # Inner loop tolerance control if pres > ptol: ptol_max_factor = min(1.0, 1.5 * ptol_max_factor) else: ptol_max_factor = max(1e-16, 0.25 * ptol_max_factor) # -- GMRES terminated: eval solution dx = zs[0] * y[0] for w, yc in zip(zs[1:], y[1:]): dx = axpy(w, dx, dx.shape[0], yc) # dx += w*yc # -- Store LGMRES augmentation vectors nx = nrm2(dx) if nx > 0: if store_outer_Av: q = Q.dot(R.dot(y)) ax = vs[0] * q[0] for v, qc in zip(vs[1:], q[1:]): ax = axpy(v, ax, ax.shape[0], qc) outer_v.append((dx / nx, ax / nx)) else: outer_v.append((dx / nx, None)) # -- Retain only a finite number of augmentation vectors while len(outer_v) > outer_k: del outer_v[0] # -- Apply step x += dx else: # didn't converge ... return postprocess(x), maxiter return postprocess(x), 0
def lars_path(X, y, Xy=None, Gram=None, max_features=None, max_iter=500, alpha_min=0, method='lar', overwrite_X=False, overwrite_Gram=False, verbose=False): """Compute Least Angle Regression and LASSO path Parameters ----------- X: array, shape: (n_samples, n_features) Input data y: array, shape: (n_samples) Input targets max_features: integer, optional Maximum number of selected features. max_iter: integer, optional Maximum number of iterations to perform. Gram: None, 'auto', array, shape: (n_features, n_features), optional Precomputed Gram matrix (X' * X), if 'auto', the Gram matrix is precomputed from the given X, if there are more samples than features alpha_min: float, optional Minimum correlation along the path. It corresponds to the regularization parameter alpha parameter in the Lasso. method: 'lar' | 'lasso' Specifies the returned model. Select 'lar' for Least Angle Regression, 'lasso' for the Lasso. Returns -------- alphas: array, shape: (max_features + 1,) Maximum of covariances (in absolute value) at each iteration. active: array, shape (max_features,) Indices of active variables at the end of the path. coefs: array, shape (n_features, max_features+1) Coefficients along the path See also -------- :ref:`LassoLars`, :ref:`Lars` Notes ------ * http://en.wikipedia.org/wiki/Least-angle_regression * http://en.wikipedia.org/wiki/Lasso_(statistics)#LASSO_method """ n_features = X.shape[1] n_samples = y.size if max_features is None: max_features = min(n_samples, n_features) coefs = np.zeros((max_features + 1, n_features)) alphas = np.zeros(max_features + 1) n_iter, n_active = 0, 0 active, indices = list(), np.arange(n_features) # holds the sign of covariance sign_active = np.empty(max_features, dtype=np.int8) drop = False eps = np.finfo(X.dtype).eps # will hold the cholesky factorization. Only lower part is # referenced. L = np.empty((max_features, max_features), dtype=X.dtype) swap, nrm2 = linalg.get_blas_funcs(('swap', 'nrm2'), (X,)) potrs, = get_lapack_funcs(('potrs',), (X,)) if Gram is None: if not overwrite_X: # force copy. setting the array to be fortran-ordered # speeds up the calculation of the (partial) Gram matrix # and allows to easily swap columns X = X.copy('F') elif Gram == 'auto': Gram = None if X.shape[0] > X.shape[1]: Gram = np.dot(X.T, X) else: if not overwrite_Gram: Gram = Gram.copy() if Xy is None: Cov = np.dot(X.T, y) else: Cov = Xy.copy() if verbose: print "Step\t\tAdded\t\tDropped\t\tActive set size\t\tC" while 1: if Cov.size: C_idx = np.argmax(np.abs(Cov)) C_ = Cov[C_idx] C = np.fabs(C_) # to match a for computing gamma_ else: C = 0. alphas[n_iter] = C / n_samples # Check for early stopping if alphas[n_iter] < alpha_min: # interpolate # interpolation factor 0 <= ss < 1 if n_iter > 0: # In the first iteration, all alphas are zero, the formula # below would make ss a NaN ss = (alphas[n_iter - 1] - alpha_min) / (alphas[n_iter - 1] - alphas[n_iter]) coefs[n_iter] = coefs[n_iter - 1] + ss * (coefs[n_iter] - coefs[n_iter - 1]) alphas[n_iter] = alpha_min break if n_active == max_features or n_iter == max_iter: break if not drop: # Update the Cholesky factorization of (Xa * Xa') # # # # ( L 0 ) # # L -> ( ) , where L * w = b # # ( w z ) z = 1 - ||w|| # # # # where u is the last added to the active set # sign_active[n_active] = np.sign(C_) m, n = n_active, C_idx + n_active Cov[C_idx], Cov[0] = swap(Cov[C_idx], Cov[0]) indices[n], indices[m] = indices[m], indices[n] Cov = Cov[1:] # remove Cov[0] if Gram is None: X.T[n], X.T[m] = swap(X.T[n], X.T[m]) c = nrm2(X.T[n_active]) ** 2 L[n_active, :n_active] = \ np.dot(X.T[n_active], X.T[:n_active].T) else: # swap does only work inplace if matrix is fortran # contiguous ... Gram[m], Gram[n] = swap(Gram[m], Gram[n]) Gram[:, m], Gram[:, n] = swap(Gram[:, m], Gram[:, n]) c = Gram[n_active, n_active] L[n_active, :n_active] = Gram[n_active, :n_active] # Update the cholesky decomposition for the Gram matrix arrayfuncs.solve_triangular(L[:n_active, :n_active], L[n_active, :n_active]) v = np.dot(L[n_active, :n_active], L[n_active, :n_active]) diag = max(np.sqrt(np.abs(c - v)), eps) L[n_active, n_active] = diag active.append(indices[n_active]) n_active += 1 if verbose: print "%s\t\t%s\t\t%s\t\t%s\t\t%s" % (n_iter, active[-1], '', n_active, C) # least squares solution least_squares, info = potrs(L[:n_active, :n_active], sign_active[:n_active], lower=True) # is this really needed ? AA = 1. / np.sqrt(np.sum(least_squares * sign_active[:n_active])) least_squares *= AA if Gram is None: # equiangular direction of variables in the active set eq_dir = np.dot(X.T[:n_active].T, least_squares) # correlation between each unactive variables and # eqiangular vector corr_eq_dir = np.dot(X.T[n_active:], eq_dir) else: # if huge number of features, this takes 50% of time, I # think could be avoided if we just update it using an # orthogonal (QR) decomposition of X corr_eq_dir = np.dot(Gram[:n_active, n_active:].T, least_squares) g1 = arrayfuncs.min_pos((C - Cov) / (AA - corr_eq_dir)) g2 = arrayfuncs.min_pos((C + Cov) / (AA + corr_eq_dir)) gamma_ = min(g1, g2, C / AA) # TODO: better names for these variables: z drop = False z = - coefs[n_iter, active] / least_squares z_pos = arrayfuncs.min_pos(z) if z_pos < gamma_: # some coefficients have changed sign idx = np.where(z == z_pos)[0] # update the sign, important for LAR sign_active[idx] = -sign_active[idx] if method == 'lasso': gamma_ = z_pos drop = True n_iter += 1 if n_iter >= coefs.shape[0]: # resize the coefs and alphas array add_features = 2 * max(1, (max_features - n_active)) coefs.resize((n_iter + add_features, n_features)) alphas.resize(n_iter + add_features) coefs[n_iter, active] = coefs[n_iter - 1, active] + \ gamma_ * least_squares # update correlations Cov -= gamma_ * corr_eq_dir if n_active > n_features: break # See if any coefficient has changed sign if drop and method == 'lasso': arrayfuncs.cholesky_delete(L[:n_active, :n_active], idx) n_active -= 1 m, n = idx, n_active drop_idx = active.pop(idx) if Gram is None: # propagate dropped variable for i in range(idx, n_active): X.T[i], X.T[i + 1] = swap(X.T[i], X.T[i + 1]) indices[i], indices[i + 1] = \ indices[i + 1], indices[i] # yeah this is stupid # TODO: this could be updated residual = y - np.dot(X[:, :n_active], coefs[n_iter, active]) temp = np.dot(X.T[n_active], residual) Cov = np.r_[temp, Cov] else: for i in range(idx, n_active): indices[i], indices[i + 1] = \ indices[i + 1], indices[i] Gram[i], Gram[i + 1] = swap(Gram[i], Gram[i + 1]) Gram[:, i], Gram[:, i + 1] = swap(Gram[:, i], Gram[:, i + 1]) # Cov_n = Cov_j + x_j * X + increment(betas) TODO: # will this still work with multiple drops ? # recompute covariance. Probably could be done better # wrong as Xy is not swapped with the rest of variables # TODO: this could be updated residual = y - np.dot(X, coefs[n_iter]) temp = np.dot(X.T[drop_idx], residual) Cov = np.r_[temp, Cov] sign_active = np.delete(sign_active, idx) sign_active = np.append(sign_active, 0.) # just to maintain size if verbose: print "%s\t\t%s\t\t%s\t\t%s\t\t%s" % (n_iter, '', drop_idx, n_active, abs(temp)) # resize coefs in case of early stop alphas = alphas[:n_iter + 1] coefs = coefs[:n_iter + 1] return alphas, active, coefs.T
def norm(v): v = np.asarray(v) __nrm2, = linalg.get_blas_funcs(['nrm2'], [v]) return __nrm2(v)
from scipy.linalg import get_blas_funcs import time import numpy as np st = time.time() X = np.random.randn(10, 4) Y = np.random.randn(7, 4).T gemms = get_blas_funcs("gemm", [X, Y]) print("Time comparison of blas", gemms(1, X, Y)) print(time.time() - st) st = time.time() print("Time comparison of np.dot", np.dot(X, Y)) print(time.time() - st) st = time.time() print("Time comparison of np.matmul", np.matmul(X, Y)) print(time.time() - st)
def _gram_omp(Gram, Xy, n_nonzero_coefs, tol_0=None, tol=None, copy_Gram=True, copy_Xy=True, return_path=False): """Orthogonal Matching Pursuit step on a precomputed Gram matrix. This function uses the Cholesky decomposition method. Parameters ---------- Gram : array, shape (n_features, n_features) Gram matrix of the input data matrix Xy : array, shape (n_features,) Input targets n_nonzero_coefs : int Targeted number of non-zero elements tol_0 : float Squared norm of y, required if tol is not None. tol : float Targeted squared error, if not None overrides n_nonzero_coefs. copy_Gram : bool, optional Whether the gram matrix must be copied by the algorithm. A false value is only helpful if it is already Fortran-ordered, otherwise a copy is made anyway. copy_Xy : bool, optional Whether the covariance vector Xy must be copied by the algorithm. If False, it may be overwritten. return_path : bool, optional. Default: False Whether to return every value of the nonzero coefficients along the forward path. Useful for cross-validation. Returns ------- gamma : array, shape (n_nonzero_coefs,) Non-zero elements of the solution idx : array, shape (n_nonzero_coefs,) Indices of the positions of the elements in gamma within the solution vector coefs : array, shape (n_features, n_nonzero_coefs) The first k values of column k correspond to the coefficient value for the active features at that step. The lower left triangle contains garbage. Only returned if ``return_path=True``. n_active : int Number of active features at convergence. """ Gram = Gram.copy('F') if copy_Gram else np.asfortranarray(Gram) if copy_Xy or not Xy.flags.writeable: Xy = Xy.copy() min_float = np.finfo(Gram.dtype).eps nrm2, swap = linalg.get_blas_funcs(('nrm2', 'swap'), (Gram, )) potrs, = get_lapack_funcs(('potrs', ), (Gram, )) indices = np.arange(len(Gram)) # keeping track of swapping alpha = Xy tol_curr = tol_0 delta = 0 gamma = np.empty(0) n_active = 0 max_features = len(Gram) if tol is not None else n_nonzero_coefs L = np.empty((max_features, max_features), dtype=Gram.dtype) L[0, 0] = 1. if return_path: coefs = np.empty_like(L) while True: lam = np.argmax(np.abs(alpha)) if lam < n_active or alpha[lam]**2 < min_float: # selected same atom twice, or inner product too small warnings.warn(premature, RuntimeWarning, stacklevel=3) break if n_active > 0: L[n_active, :n_active] = Gram[lam, :n_active] linalg.solve_triangular(L[:n_active, :n_active], L[n_active, :n_active], trans=0, lower=1, overwrite_b=True, check_finite=False) v = nrm2(L[n_active, :n_active])**2 Lkk = Gram[lam, lam] - v if Lkk <= min_float: # selected atoms are dependent warnings.warn(premature, RuntimeWarning, stacklevel=3) break L[n_active, n_active] = sqrt(Lkk) else: L[0, 0] = sqrt(Gram[lam, lam]) Gram[n_active], Gram[lam] = swap(Gram[n_active], Gram[lam]) Gram.T[n_active], Gram.T[lam] = swap(Gram.T[n_active], Gram.T[lam]) indices[n_active], indices[lam] = indices[lam], indices[n_active] Xy[n_active], Xy[lam] = Xy[lam], Xy[n_active] n_active += 1 # solves LL'x = X'y as a composition of two triangular systems gamma, _ = potrs(L[:n_active, :n_active], Xy[:n_active], lower=True, overwrite_b=False) if return_path: coefs[:n_active, n_active - 1] = gamma beta = np.dot(Gram[:, :n_active], gamma) alpha = Xy - beta if tol is not None: tol_curr += delta delta = np.inner(gamma, beta[:n_active]) tol_curr -= delta if abs(tol_curr) <= tol: break elif n_active == max_features: break if return_path: return gamma, indices[:n_active], coefs[:, :n_active], n_active else: return gamma, indices[:n_active], n_active
def lars_path(X, y, Xy=None, Gram=None, max_iter=500, alpha_min=0, method='lar', copy_X=True, eps=np.finfo(np.float).eps, copy_Gram=True, verbose=0, return_path=True, return_n_iter=False): """Compute Least Angle Regression or Lasso path using LARS algorithm [1] The optimization objective for the case method='lasso' is:: (1 / (2 * n_samples)) * ||y - Xw||^2_2 + alpha * ||w||_1 in the case of method='lars', the objective function is only known in the form of an implicit equation (see discussion in [1]) Parameters ----------- X : array, shape: (n_samples, n_features) Input data. y : array, shape: (n_samples) Input targets. max_iter : integer, optional (default=500) Maximum number of iterations to perform, set to infinity for no limit. Gram : None, 'auto', array, shape: (n_features, n_features), optional Precomputed Gram matrix (X' * X), if ``'auto'``, the Gram matrix is precomputed from the given X, if there are more samples than features. alpha_min : float, optional (default=0) Minimum correlation along the path. It corresponds to the regularization parameter alpha parameter in the Lasso. method : {'lar', 'lasso'}, optional (default='lar') Specifies the returned model. Select ``'lar'`` for Least Angle Regression, ``'lasso'`` for the Lasso. eps : float, optional (default=``np.finfo(np.float).eps``) The machine-precision regularization in the computation of the Cholesky diagonal factors. Increase this for very ill-conditioned systems. copy_X : bool, optional (default=True) If ``False``, ``X`` is overwritten. copy_Gram : bool, optional (default=True) If ``False``, ``Gram`` is overwritten. verbose : int (default=0) Controls output verbosity. return_path: bool, (optional=True) If ``return_path==True`` returns the entire path, else returns only the last point of the path. Returns -------- alphas: array, shape: [n_alphas + 1] Maximum of covariances (in absolute value) at each iteration. ``n_alphas`` is either ``max_iter``, ``n_features`` or the number of nodes in the path with ``alpha >= alpha_min``, whichever is smaller. active: array, shape [n_alphas] Indices of active variables at the end of the path. coefs: array, shape (n_features, n_alphas + 1) Coefficients along the path n_iter : int Number of iterations run. Returned only if return_n_iter is set to True. See also -------- lasso_path LassoLars Lars LassoLarsCV LarsCV sklearn.decomposition.sparse_encode References ---------- .. [1] "Least Angle Regression", Effron et al. http://www-stat.stanford.edu/~tibs/ftp/lars.pdf .. [2] `Wikipedia entry on the Least-angle regression <http://en.wikipedia.org/wiki/Least-angle_regression>`_ .. [3] `Wikipedia entry on the Lasso <http://en.wikipedia.org/wiki/Lasso_(statistics)#Lasso_method>`_ """ n_features = X.shape[1] n_samples = y.size max_features = min(max_iter, n_features) if return_path: coefs = np.zeros((max_features + 1, n_features)) alphas = np.zeros(max_features + 1) else: coef, prev_coef = np.zeros(n_features), np.zeros(n_features) alpha, prev_alpha = np.array([0.]), np.array([0.]) # better ideas? n_iter, n_active = 0, 0 active, indices = list(), np.arange(n_features) # holds the sign of covariance sign_active = np.empty(max_features, dtype=np.int8) drop = False # will hold the cholesky factorization. Only lower part is # referenced. # We are initializing this to "zeros" and not empty, because # it is passed to scipy linalg functions and thus if it has NaNs, # even if they are in the upper part that it not used, we # get errors raised. # Once we support only scipy > 0.12 we can use check_finite=False and # go back to "empty" L = np.zeros((max_features, max_features), dtype=X.dtype) swap, nrm2 = linalg.get_blas_funcs(('swap', 'nrm2'), (X, )) solve_cholesky, = get_lapack_funcs(('potrs', ), (X, )) if Gram is None: if copy_X: # force copy. setting the array to be fortran-ordered # speeds up the calculation of the (partial) Gram matrix # and allows to easily swap columns X = X.copy('F') elif Gram == 'auto': Gram = None if X.shape[0] > X.shape[1]: Gram = np.dot(X.T, X) elif copy_Gram: Gram = Gram.copy() if Xy is None: Cov = np.dot(X.T, y) else: Cov = Xy.copy() if verbose: if verbose > 1: print("Step\t\tAdded\t\tDropped\t\tActive set size\t\tC") else: sys.stdout.write('.') sys.stdout.flush() tiny = np.finfo(np.float).tiny # to avoid division by 0 warning tiny32 = np.finfo(np.float32).tiny # to avoid division by 0 warning equality_tolerance = np.finfo(np.float32).eps while True: if Cov.size: C_idx = np.argmax(np.abs(Cov)) C_ = Cov[C_idx] C = np.fabs(C_) else: C = 0. if return_path: alpha = alphas[n_iter, np.newaxis] coef = coefs[n_iter] prev_alpha = alphas[n_iter - 1, np.newaxis] prev_coef = coefs[n_iter - 1] alpha[0] = C / n_samples if alpha[0] <= alpha_min + equality_tolerance: # early stopping if abs(alpha[0] - alpha_min) > equality_tolerance: # interpolation factor 0 <= ss < 1 if n_iter > 0: # In the first iteration, all alphas are zero, the formula # below would make ss a NaN ss = ((prev_alpha[0] - alpha_min) / (prev_alpha[0] - alpha[0])) coef[:] = prev_coef + ss * (coef - prev_coef) alpha[0] = alpha_min if return_path: coefs[n_iter] = coef break if n_iter >= max_iter or n_active >= n_features: break if not drop: ########################################################## # Append x_j to the Cholesky factorization of (Xa * Xa') # # # # ( L 0 ) # # L -> ( ) , where L * w = Xa' x_j # # ( w z ) and z = ||x_j|| # # # ########################################################## sign_active[n_active] = np.sign(C_) m, n = n_active, C_idx + n_active Cov[C_idx], Cov[0] = swap(Cov[C_idx], Cov[0]) indices[n], indices[m] = indices[m], indices[n] Cov_not_shortened = Cov Cov = Cov[1:] # remove Cov[0] if Gram is None: X.T[n], X.T[m] = swap(X.T[n], X.T[m]) c = nrm2(X.T[n_active])**2 L[n_active, :n_active] = \ np.dot(X.T[n_active], X.T[:n_active].T) else: # swap does only work inplace if matrix is fortran # contiguous ... Gram[m], Gram[n] = swap(Gram[m], Gram[n]) Gram[:, m], Gram[:, n] = swap(Gram[:, m], Gram[:, n]) c = Gram[n_active, n_active] L[n_active, :n_active] = Gram[n_active, :n_active] # Update the cholesky decomposition for the Gram matrix if n_active: linalg.solve_triangular(L[:n_active, :n_active], L[n_active, :n_active], trans=0, lower=1, overwrite_b=True, **solve_triangular_args) v = np.dot(L[n_active, :n_active], L[n_active, :n_active]) diag = max(np.sqrt(np.abs(c - v)), eps) L[n_active, n_active] = diag if diag < 1e-7: # The system is becoming too ill-conditioned. # We have degenerate vectors in our active set. # We'll 'drop for good' the last regressor added. # Note: this case is very rare. It is no longer triggered by the # test suite. The `equality_tolerance` margin added in 0.16.0 to # get early stopping to work consistently on all versions of # Python including 32 bit Python under Windows seems to make it # very difficult to trigger the 'drop for good' strategy. warnings.warn( 'Regressors in active set degenerate. ' 'Dropping a regressor, after %i iterations, ' 'i.e. alpha=%.3e, ' 'with an active set of %i regressors, and ' 'the smallest cholesky pivot element being %.3e' % (n_iter, alpha, n_active, diag), ConvergenceWarning) # XXX: need to figure a 'drop for good' way Cov = Cov_not_shortened Cov[0] = 0 Cov[C_idx], Cov[0] = swap(Cov[C_idx], Cov[0]) continue active.append(indices[n_active]) n_active += 1 if verbose > 1: print("%s\t\t%s\t\t%s\t\t%s\t\t%s" % (n_iter, active[-1], '', n_active, C)) if method == 'lasso' and n_iter > 0 and prev_alpha[0] < alpha[0]: # alpha is increasing. This is because the updates of Cov are # bringing in too much numerical error that is greater than # than the remaining correlation with the # regressors. Time to bail out warnings.warn( 'Early stopping the lars path, as the residues ' 'are small and the current value of alpha is no ' 'longer well controlled. %i iterations, alpha=%.3e, ' 'previous alpha=%.3e, with an active set of %i ' 'regressors.' % (n_iter, alpha, prev_alpha, n_active), ConvergenceWarning) break # least squares solution least_squares, info = solve_cholesky(L[:n_active, :n_active], sign_active[:n_active], lower=True) if least_squares.size == 1 and least_squares == 0: # This happens because sign_active[:n_active] = 0 least_squares[...] = 1 AA = 1. else: # is this really needed ? AA = 1. / np.sqrt(np.sum(least_squares * sign_active[:n_active])) if not np.isfinite(AA): # L is too ill-conditioned i = 0 L_ = L[:n_active, :n_active].copy() while not np.isfinite(AA): L_.flat[::n_active + 1] += (2**i) * eps least_squares, info = solve_cholesky( L_, sign_active[:n_active], lower=True) tmp = max(np.sum(least_squares * sign_active[:n_active]), eps) AA = 1. / np.sqrt(tmp) i += 1 least_squares *= AA if Gram is None: # equiangular direction of variables in the active set eq_dir = np.dot(X.T[:n_active].T, least_squares) # correlation between each unactive variables and # eqiangular vector corr_eq_dir = np.dot(X.T[n_active:], eq_dir) else: # if huge number of features, this takes 50% of time, I # think could be avoided if we just update it using an # orthogonal (QR) decomposition of X corr_eq_dir = np.dot(Gram[:n_active, n_active:].T, least_squares) g1 = arrayfuncs.min_pos((C - Cov) / (AA - corr_eq_dir + tiny)) g2 = arrayfuncs.min_pos((C + Cov) / (AA + corr_eq_dir + tiny)) gamma_ = min(g1, g2, C / AA) # TODO: better names for these variables: z drop = False z = -coef[active] / (least_squares + tiny32) z_pos = arrayfuncs.min_pos(z) if z_pos < gamma_: # some coefficients have changed sign idx = np.where(z == z_pos)[0][::-1] # update the sign, important for LAR sign_active[idx] = -sign_active[idx] if method == 'lasso': gamma_ = z_pos drop = True n_iter += 1 if return_path: if n_iter >= coefs.shape[0]: del coef, alpha, prev_alpha, prev_coef # resize the coefs and alphas array add_features = 2 * max(1, (max_features - n_active)) coefs = np.resize(coefs, (n_iter + add_features, n_features)) alphas = np.resize(alphas, n_iter + add_features) coef = coefs[n_iter] prev_coef = coefs[n_iter - 1] alpha = alphas[n_iter, np.newaxis] prev_alpha = alphas[n_iter - 1, np.newaxis] else: # mimic the effect of incrementing n_iter on the array references prev_coef = coef prev_alpha[0] = alpha[0] coef = np.zeros_like(coef) coef[active] = prev_coef[active] + gamma_ * least_squares # update correlations Cov -= gamma_ * corr_eq_dir # See if any coefficient has changed sign if drop and method == 'lasso': # handle the case when idx is not length of 1 [ arrayfuncs.cholesky_delete(L[:n_active, :n_active], ii) for ii in idx ] n_active -= 1 m, n = idx, n_active # handle the case when idx is not length of 1 drop_idx = [active.pop(ii) for ii in idx] if Gram is None: # propagate dropped variable for ii in idx: for i in range(ii, n_active): X.T[i], X.T[i + 1] = swap(X.T[i], X.T[i + 1]) # yeah this is stupid indices[i], indices[i + 1] = indices[i + 1], indices[i] # TODO: this could be updated residual = y - np.dot(X[:, :n_active], coef[active]) temp = np.dot(X.T[n_active], residual) Cov = np.r_[temp, Cov] else: for ii in idx: for i in range(ii, n_active): indices[i], indices[i + 1] = indices[i + 1], indices[i] Gram[i], Gram[i + 1] = swap(Gram[i], Gram[i + 1]) Gram[:, i], Gram[:, i + 1] = swap(Gram[:, i], Gram[:, i + 1]) # Cov_n = Cov_j + x_j * X + increment(betas) TODO: # will this still work with multiple drops ? # recompute covariance. Probably could be done better # wrong as Xy is not swapped with the rest of variables # TODO: this could be updated residual = y - np.dot(X, coef) temp = np.dot(X.T[drop_idx], residual) Cov = np.r_[temp, Cov] sign_active = np.delete(sign_active, idx) sign_active = np.append(sign_active, 0.) # just to maintain size if verbose > 1: print("%s\t\t%s\t\t%s\t\t%s\t\t%s" % (n_iter, '', drop_idx, n_active, abs(temp))) if return_path: # resize coefs in case of early stop alphas = alphas[:n_iter + 1] coefs = coefs[:n_iter + 1] if return_n_iter: return alphas, active, coefs.T, n_iter else: return alphas, active, coefs.T else: if return_n_iter: return alpha, active, coef, n_iter else: return alpha, active, coef
def lars_path(X, y, Xy=None, Gram=None, max_iter=500, alpha_min=0, method='lar', copy_X=True, eps=np.finfo(np.float).eps, copy_Gram=True, verbose=False, return_path=True): """Compute Least Angle Regression and Lasso path The optimization objective for Lasso is:: (1 / (2 * n_samples)) * ||y - Xw||^2_2 + alpha * ||w||_1 Parameters ----------- X: array, shape: (n_samples, n_features) Input data y: array, shape: (n_samples) Input targets max_iter: integer, optional Maximum number of iterations to perform, set to infinity for no limit. Gram: None, 'auto', array, shape: (n_features, n_features), optional Precomputed Gram matrix (X' * X), if 'auto', the Gram matrix is precomputed from the given X, if there are more samples than features alpha_min: float, optional Minimum correlation along the path. It corresponds to the regularization parameter alpha parameter in the Lasso. method: {'lar', 'lasso'} Specifies the returned model. Select 'lar' for Least Angle Regression, 'lasso' for the Lasso. eps: float, optional The machine-precision regularization in the computation of the Cholesky diagonal factors. Increase this for very ill-conditioned systems. copy_X: bool If False, X is overwritten. copy_Gram: bool If False, Gram is overwritten. Returns -------- alphas: array, shape: (max_features + 1,) Maximum of covariances (in absolute value) at each iteration. active: array, shape (max_features,) Indices of active variables at the end of the path. coefs: array, shape (n_features, max_features + 1) Coefficients along the path See also -------- lasso_path LassoLars Lars LassoLarsCV LarsCV sklearn.decomposition.sparse_encode Notes ------ * http://en.wikipedia.org/wiki/Least-angle_regression * http://en.wikipedia.org/wiki/Lasso_(statistics)#LASSO_method """ n_features = X.shape[1] n_samples = y.size max_features = min(max_iter, n_features) if return_path: coefs = np.zeros((max_features + 1, n_features)) alphas = np.zeros(max_features + 1) else: coef, prev_coef = np.zeros(n_features), np.zeros(n_features) alpha, prev_alpha = np.array([0.]), np.array([0.]) # better ideas? n_iter, n_active = 0, 0 active, indices = list(), np.arange(n_features) # holds the sign of covariance sign_active = np.empty(max_features, dtype=np.int8) drop = False # will hold the cholesky factorization. Only lower part is # referenced. L = np.empty((max_features, max_features), dtype=X.dtype) swap, nrm2 = linalg.get_blas_funcs(('swap', 'nrm2'), (X, )) solve_cholesky, = get_lapack_funcs(('potrs', ), (X, )) if Gram is None: if copy_X: # force copy. setting the array to be fortran-ordered # speeds up the calculation of the (partial) Gram matrix # and allows to easily swap columns X = X.copy('F') elif Gram == 'auto': Gram = None if X.shape[0] > X.shape[1]: Gram = np.dot(X.T, X) elif copy_Gram: Gram = Gram.copy() if Xy is None: Cov = np.dot(X.T, y) else: Cov = Xy.copy() if verbose: if verbose > 1: print "Step\t\tAdded\t\tDropped\t\tActive set size\t\tC" else: sys.stdout.write('.') sys.stdout.flush() tiny = np.finfo(np.float).tiny # to avoid division by 0 warning tiny32 = np.finfo(np.float32).tiny # to avoid division by 0 warning while True: if Cov.size: C_idx = np.argmax(np.abs(Cov)) C_ = Cov[C_idx] C = np.fabs(C_) else: C = 0. if return_path: alpha = alphas[n_iter, np.newaxis] coef = coefs[n_iter] prev_alpha = alphas[n_iter - 1, np.newaxis] prev_coef = coefs[n_iter - 1] alpha[0] = C / n_samples if alpha[0] < alpha_min: # early stopping # interpolation factor 0 <= ss < 1 if n_iter > 0: # In the first iteration, all alphas are zero, the formula # below would make ss a NaN ss = (prev_alpha[0] - alpha_min) / (prev_alpha[0] - alpha[0]) coef[:] = prev_coef + ss * (coef - prev_coef) alpha[0] = alpha_min if return_path: coefs[n_iter] = coef break if n_iter >= max_iter or n_active >= n_features: break if not drop: ########################################################## # Append x_j to the Cholesky factorization of (Xa * Xa') # # # # ( L 0 ) # # L -> ( ) , where L * w = Xa' x_j # # ( w z ) and z = ||x_j|| # # # ########################################################## sign_active[n_active] = np.sign(C_) m, n = n_active, C_idx + n_active Cov[C_idx], Cov[0] = swap(Cov[C_idx], Cov[0]) indices[n], indices[m] = indices[m], indices[n] Cov = Cov[1:] # remove Cov[0] if Gram is None: X.T[n], X.T[m] = swap(X.T[n], X.T[m]) c = nrm2(X.T[n_active])**2 L[n_active, :n_active] = \ np.dot(X.T[n_active], X.T[:n_active].T) else: # swap does only work inplace if matrix is fortran # contiguous ... Gram[m], Gram[n] = swap(Gram[m], Gram[n]) Gram[:, m], Gram[:, n] = swap(Gram[:, m], Gram[:, n]) c = Gram[n_active, n_active] L[n_active, :n_active] = Gram[n_active, :n_active] # Update the cholesky decomposition for the Gram matrix arrayfuncs.solve_triangular(L[:n_active, :n_active], L[n_active, :n_active]) v = np.dot(L[n_active, :n_active], L[n_active, :n_active]) diag = max(np.sqrt(np.abs(c - v)), eps) L[n_active, n_active] = diag active.append(indices[n_active]) n_active += 1 if verbose > 1: print "%s\t\t%s\t\t%s\t\t%s\t\t%s" % (n_iter, active[-1], '', n_active, C) # least squares solution least_squares, info = solve_cholesky(L[:n_active, :n_active], sign_active[:n_active], lower=True) # is this really needed ? AA = 1. / np.sqrt(np.sum(least_squares * sign_active[:n_active])) if not np.isfinite(AA): # L is too ill-conditionned i = 0 L_ = L[:n_active, :n_active].copy() while not np.isfinite(AA): L_.flat[::n_active + 1] += (2**i) * eps least_squares, info = solve_cholesky(L_, sign_active[:n_active], lower=True) tmp = max(np.sum(least_squares * sign_active[:n_active]), eps) AA = 1. / np.sqrt(tmp) i += 1 least_squares *= AA if Gram is None: # equiangular direction of variables in the active set eq_dir = np.dot(X.T[:n_active].T, least_squares) # correlation between each unactive variables and # eqiangular vector corr_eq_dir = np.dot(X.T[n_active:], eq_dir) else: # if huge number of features, this takes 50% of time, I # think could be avoided if we just update it using an # orthogonal (QR) decomposition of X corr_eq_dir = np.dot(Gram[:n_active, n_active:].T, least_squares) g1 = arrayfuncs.min_pos((C - Cov) / (AA - corr_eq_dir + tiny)) g2 = arrayfuncs.min_pos((C + Cov) / (AA + corr_eq_dir + tiny)) gamma_ = min(g1, g2, C / AA) # TODO: better names for these variables: z drop = False z = -coef[active] / (least_squares + tiny32) z_pos = arrayfuncs.min_pos(z) if z_pos < gamma_: # some coefficients have changed sign idx = np.where(z == z_pos)[0] # update the sign, important for LAR sign_active[idx] = -sign_active[idx] if method == 'lasso': gamma_ = z_pos drop = True n_iter += 1 if return_path: if n_iter >= coefs.shape[0]: del coef, alpha, prev_alpha, prev_coef # resize the coefs and alphas array add_features = 2 * max(1, (max_features - n_active)) coefs.resize((n_iter + add_features, n_features)) alphas.resize(n_iter + add_features) coef = coefs[n_iter] prev_coef = coefs[n_iter - 1] alpha = alphas[n_iter, np.newaxis] prev_alpha = alphas[n_iter - 1, np.newaxis] else: # mimic the effect of incrementing n_iter on the array references prev_coef = coef prev_alpha[0] = alpha[0] coef = np.zeros_like(coef) coef[active] = prev_coef[active] + gamma_ * least_squares # update correlations Cov -= gamma_ * corr_eq_dir # See if any coefficient has changed sign if drop and method == 'lasso': arrayfuncs.cholesky_delete(L[:n_active, :n_active], idx) n_active -= 1 m, n = idx, n_active drop_idx = active.pop(idx) if Gram is None: # propagate dropped variable for i in range(idx, n_active): X.T[i], X.T[i + 1] = swap(X.T[i], X.T[i + 1]) indices[i], indices[i + 1] = \ indices[i + 1], indices[i] # yeah this is stupid # TODO: this could be updated residual = y - np.dot(X[:, :n_active], coef[active]) temp = np.dot(X.T[n_active], residual) Cov = np.r_[temp, Cov] else: for i in range(idx, n_active): indices[i], indices[i + 1] = \ indices[i + 1], indices[i] Gram[i], Gram[i + 1] = swap(Gram[i], Gram[i + 1]) Gram[:, i], Gram[:, i + 1] = swap(Gram[:, i], Gram[:, i + 1]) # Cov_n = Cov_j + x_j * X + increment(betas) TODO: # will this still work with multiple drops ? # recompute covariance. Probably could be done better # wrong as Xy is not swapped with the rest of variables # TODO: this could be updated residual = y - np.dot(X, coef) temp = np.dot(X.T[drop_idx], residual) Cov = np.r_[temp, Cov] sign_active = np.delete(sign_active, idx) sign_active = np.append(sign_active, 0.) # just to maintain size if verbose > 1: print "%s\t\t%s\t\t%s\t\t%s\t\t%s" % (n_iter, '', drop_idx, n_active, abs(temp)) if return_path: # resize coefs in case of early stop alphas = alphas[:n_iter + 1] coefs = coefs[:n_iter + 1] return alphas, active, coefs.T else: return alpha, active, coef
def py_rect_maxvol(A, tol=1., maxK=None, min_add_K=None, minK=None, start_maxvol_iters=10, identity_submatrix=True, top_k_index=-1): """ Python implementation of rectangular 2-volume maximization. See Also -------- rect_maxvol """ # tol2 - square of parameter tol tol2 = tol**2 # N - number of rows, r - number of columns of matrix A N, r = A.shape # some work on parameters if N <= r: return np.arange(N, dtype=np.int32), np.eye(N, dtype=A.dtype) if maxK is None or maxK > N: maxK = N if maxK < r: maxK = r if minK is None or minK < r: minK = r if minK > N: minK = N if min_add_K is not None: minK = max(minK, r + min_add_K) if minK > maxK: minK = maxK #raise ValueError('minK value cannot be greater than maxK value') if top_k_index == -1 or top_k_index > N: top_k_index = N if top_k_index < r: top_k_index = r # choose initial submatrix and coefficients according to maxvol # algorithm index = np.zeros(N, dtype=np.int32) chosen = np.ones(top_k_index) tmp_index, C = py_maxvol(A, 1.05, start_maxvol_iters, top_k_index) index[:r] = tmp_index chosen[tmp_index] = 0 C = np.asfortranarray(C) # compute square 2-norms of each row in coefficients matrix C row_norm_sqr = np.array( [chosen[i] * np.linalg.norm(C[i], 2)**2 for i in range(top_k_index)]) # find maximum value in row_norm_sqr i = np.argmax(row_norm_sqr) K = r # set cgeru or zgeru for complex numbers and dger or sger # for float numbers try: ger = get_blas_funcs('geru', [C]) except: ger = get_blas_funcs('ger', [C]) # augment maxvol submatrix with each iteration while (row_norm_sqr[i] > tol2 and K < maxK) or K < minK: # add i to index and recompute C and square norms of each row # by SVM-formula index[K] = i chosen[i] = 0 c = C[i].copy() v = C.dot(c.conj()) l = 1.0 / (1 + v[i]) ger(-l, v, c, a=C, overwrite_a=1) C = np.hstack([C, l * v.reshape(-1, 1)]) row_norm_sqr -= (l * v[:top_k_index] * v[:top_k_index].conj()).real row_norm_sqr *= chosen # find maximum value in row_norm_sqr i = row_norm_sqr.argmax() K += 1 # parameter identity_submatrix is True, set submatrix, # corresponding to maxvol rows, equal to identity matrix if identity_submatrix: C[index[:K]] = np.eye(K, dtype=C.dtype) return index[:K].copy(), C