def test_diag(): v = np.arange(11) assert_eq(da.diag(v), np.diag(v)) v = da.arange(11, chunks=3) darr = da.diag(v) nparr = np.diag(v) assert_eq(darr, nparr) assert sorted(da.diag(v).dask) == sorted(da.diag(v).dask) v = v + v + 3 darr = da.diag(v) nparr = np.diag(v) assert_eq(darr, nparr) v = da.arange(11, chunks=11) darr = da.diag(v) nparr = np.diag(v) assert_eq(darr, nparr) assert sorted(da.diag(v).dask) == sorted(da.diag(v).dask) x = np.arange(64).reshape((8, 8)) assert_eq(da.diag(x), np.diag(x)) d = da.from_array(x, chunks=(4, 4)) assert_eq(da.diag(d), np.diag(x))
def calculate_halfmatrices(weights, argmax, fftsize=128): """Compute the half matrices of the weights and the argmax and reconstruct the full arrays. Parameters ---------- weights : dask array argmax : dask array fftsize : int, default: 128 Returns ------- Wc : numpy array Computed weights, symmetric Mc : numpy array Computed locations of maxima """ # Calculate half of the matrices only, because we know it is antisymmetric. uargmax = da.triu( argmax, 1 ) # Diagonal shifts are zero anyway, so 1. Reconstruct after computation uW = da.triu(weights, 1) uW = uW + uW.T + da.diag(da.diag(weights)) # Do actual computations, get a cup of coffee Mc, Wc = da.compute(uargmax, uW) # Undo the flatten: Reconstruct 2D indices from global linear indices of argmax Mc = np.stack(np.unravel_index(Mc, (fftsize * 2, fftsize * 2))) Mc -= np.triu(np.full_like(Mc, fftsize), 1) # Compensate for the fft-shift Mc = Mc - Mc.swapaxes(1, 2) # Reconstruct full antisymmetric matrices # Mc = Mc / z_factor # Compensate for zoomfactor return Wc, Mc
def dmd_dask(D, r, eig=None): """ A dask implementation of Dynamic Model Decomposition. Args: D - dask array D is a d x T array for which each column corresponds to an observation at a specific time. r - integer number of components eig - string or None eig indicates the method to use to calculate eigenvalues eig='None' corresponds to use numpy.linalg.eig function (not out-of-core) eig='dask' uses a dask eigensolver based on QR decomposition Returns: mu - dask.array of length r dmd eigenvalues Phi - dask.array of dimensions d x r dmd modes s - dask.array singular values of D[:,:-1] - useful for determining the rank Examples: >>> mu,Phi,s = dmd_dask(D,r,eig=None) """ # offsets X_da = D[:, :-1] Y_da = D[:, 1:] # SVD u, s, v = da.linalg.svd(X_da) # rank truncaction u = u[:, :r] Sig = da.diag(s)[:r, :r] Sig_inv = da.diag(1 / s)[:r, :r] v = v.conj().T[:, :r] # build A tilde Atil = da.dot(da.dot(da.dot(u.conj().T, Y_da), v), Sig_inv) if eig is None: mu, W = la.eig(Atil) elif eig == 'dask': mu, W = eig_dask(Atil, 10) # build DMD modes Phi = da.dot(da.dot(da.dot(Y_da, v), Sig_inv), W) return (mu, Phi, s)
def test_diag(): v = da.arange(11, chunks=3) darr = da.diag(v) nparr = np.diag(v) eq(darr, nparr) v = v + v + 3 darr = da.diag(v) nparr = np.diag(v) eq(darr, nparr) v = da.arange(11, chunks=11) darr = da.diag(v) nparr = np.diag(v) eq(darr, nparr)
def check_dmd_dask(D, mu, Phi, show_warning=True): """ Checks how close the approximation using DMD is to the original data. Returns: None if the difference is within the tolerance Displays a warning otherwise. """ X = D[:, 0:-1] Y = D[:, 1:] #Y_est = da.dot(da.dot(da.dot(Phi, da.diag(mu)), pinv_SVD(Phi)), X) Phi_inv = pinv_SVD(Phi) PhiMu = da.dot(Phi, da.diag(mu)) #Y_est = da.dot(da.dot(PhiMu, Phi_inv), X) Y_est = da.dot(PhiMu, da.dot(Phi_inv, X)) diff = da.real(Y - Y_est) res = da.fabs(diff) rtol = 1.e-8 atol = 1.e-5 if da.all(res < atol + rtol * da.fabs(da.real(Y_est))).compute(): return (None) else: #if not b and show_warning: warn('dmd result does not satisfy Y=AX')
def test_SSPM_case3(): N = 500 k = 10 f = .1 s_orig = np.array([1.01**i for i in range(1, N + 1)]) array = da.diag(s_orig) SSPM = SuccessiveBatchedPowerMethod( k=k, sub_svd_start=True, tol=[1e-14, 1e-14], f=f, scoring_method=['q-vals', 'v-subspace'], factor=None) U_PM, S_PM, V_PM = SSPM.svd(array) np.testing.assert_array_almost_equal(s_orig[-k:][::-1], S_PM) for i, (sub_S, sub_V) in enumerate( zip(SSPM.history.iter['S'], SSPM.history.iter['V'])): s = sorted(s_orig[:int(f * (i + 1) * N)], reverse=True)[:k] np.testing.assert_array_almost_equal(s, sub_S) v = np.zeros_like(sub_V) for j in range(k): v[j, int(f * (i + 1) * N) - k + j] = 1 np.testing.assert_almost_equal(subspace_dist(v, sub_V, sub_S), 0)
def da_diagsvd(s, M, N): """ Construct the sigma matrix in SVD from singular values and size M, N. Parameters ---------- s : (M,) or (N,) array_like Singular values M : int Size of the matrix whose singular values are `s`. N : int Size of the matrix whose singular values are `s`. Returns ------- S : (M, N) ndarray The S-matrix in the singular value decomposition """ part = da.diag(s) MorN = len(s) if MorN == M: return da.block([part, da.zeros((M, N - M), dtype=s.dtype)]) elif MorN == N: return da.block([[part], [da.zeros((M - N, N), dtype=s.dtype)]]) else: raise ValueError("Length of s must be M or N.")
def test_PowerMethod_project(): N, P = 1000, 1000 k = 10 svd_array = da.random.random(size=(N, P)).persist() proj_array = da.random.random(size=(10, P)).persist() mu = da.mean(svd_array, axis=0).persist() std = da.diag(1 / da.std(svd_array, axis=0)).persist() for scale in [True, False]: for center in [True, False]: svd_array1 = svd_array proj_array1 = proj_array if center: svd_array1 = svd_array1 - mu proj_array1 = proj_array1 - mu if scale: svd_array1 = svd_array1.dot(std) proj_array1 = proj_array1.dot(std) U, S, V = da.linalg.svd(svd_array1) U_k, S_k, V_k = svd_to_trunc_svd(U, S, V, k=k) PM = PowerMethod(k=k, scale=scale, center=center, factor=None, tol=1e-12) U_PM, S_PM, V_PM = PM.svd(array=svd_array) np.testing.assert_array_almost_equal( PM.project(proj_array, onto=V_k.T), proj_array1.dot(V_k.T))
def test_svd_compressed(): m, n = 2000, 250 r = 10 np.random.seed(4321) mat1 = np.random.randn(m, r) mat2 = np.random.randn(r, n) mat = mat1.dot(mat2) data = da.from_array(mat, chunks=(500, 50)) u, s, vt = svd_compressed(data, r, seed=4321, n_power_iter=2) usvt = da.dot(u, da.dot(da.diag(s), vt)) tol = 0.2 assert_eq(da.linalg.norm(usvt), np.linalg.norm(mat), rtol=tol, atol=tol) # average accuracy check u = u[:, :r] s = s[:r] vt = vt[:r, :] s_exact = np.linalg.svd(mat)[1] s_exact = s_exact[:r] assert_eq(np.eye(r, r), da.dot(u.T, u)) # u must be orthonormal assert_eq(np.eye(r, r), da.dot(vt, vt.T)) # v must be orthonormal assert_eq(s, s_exact) # s must contain the singular values
def jacobi_preconditioner(array, name=None): name = 'jacobi-precond-' + array.name if name is None else name m, n = array.shape assert m == n, 'preconditioner expects square linear operator' diag = da.diag(array) return linop.DLODiagonal(da.core.map_blocks(da.reciprocal, diag, name=name))
def test_jacobi_preconditioner(): A_test = da.random.random((100, 100), chunks=20) d_test = da.diag(A_test) dd_test = da.diag(A_test.T.dot(A_test)) assert is_inverse(pre.jacobi_preconditioner(A_test), d_test) assert is_inverse(pre.jacobi_preconditioner(linop.DLODense(A_test)), d_test) assert is_inverse(pre.jacobi_preconditioner(linop.DLOGram(A_test)), dd_test) assert is_inverse( pre.jacobi_preconditioner(linop.DLORegularizedGram(A_test)), 1 + dd_test) mu = da.random.normal(1, 1, (), chunks=()) assert is_inverse( pre.jacobi_preconditioner( linop.DLORegularizedGram(A_test, regularization=mu)), mu + dd_test)
def pinv_SVD(X): """ a function to find a pseudo-inverse in dask using svd """ u, s, v = da.linalg.svd(X) S_inv = da.diag(1 / s) X_inv = da.dot(v.T.conj(), da.dot(S_inv, u.T.conj())) return (X_inv)
def single_window(df, rgeno, tgeno, threads=1, max_memory=None, justd=False, extend=False): ridx = df.i_ref.values tidx = df.i_tar.values rg = rgeno[:, ridx] tg = tgeno[:, tidx] if extend: # extend the Genotpe at both end to avoid edge effects ridx_a, ridx_b = np.array_split(ridx, 2) tidx_a, tidx_b = np.array_split(tidx, 2) rg = da.concatenate( [rgeno[:, (ridx_a[::-1][:-1])], rg, rgeno[:, (ridx_b[::-1][1:])]], axis=1) tg = da.concatenate( [tgeno[:, (tidx_a[::-1][:-1])], tg, tgeno[:, (tidx_b[::-1][1:])]], axis=1) D_r = da.dot(rg.T, rg) / rg.shape[0] D_t = da.dot(tg.T, tg) / tg.shape[0] # remove the extras D_r = D_r[:, (ridx_a.shape[0] + 1):][:, :(ridx.shape[0])] D_r = D_r[(ridx_a.shape[0] + 1):, :][:(ridx.shape[0]), :] D_t = D_t[:, (tidx_a.shape[0] + 1):][:, :(tidx.shape[0])] D_t = D_t[(tidx_a.shape[0] + 1):, :][:(tidx.shape[0]), :] assert D_r.shape[1] == ridx.shape[0] assert D_t.shape[1] == tidx.shape[0] else: D_r = da.dot(rg.T, rg) / rg.shape[0] D_t = da.dot(tg.T, tg) / tg.shape[0] if justd: return df.snp, D_r, D_t cot = da.diag(da.dot(D_r, D_t)) ref = da.diag(da.dot(D_r, D_r)) tar = da.diag(da.dot(D_t, D_t)) stacked = da.stack([df.snp, ref, tar, cot], axis=1) c_h_u_n_k_s = estimate_chunks(stacked.shape, threads, max_memory) stacked = da.rechunk(stacked, chunks=c_h_u_n_k_s) columns = ['snp', 'ref', 'tar', 'cotag'] return dd.from_dask_array(stacked, columns=columns).compute()
def test_diag(): v = cupy.arange(11) dv = da.from_array(v, chunks=(4, ), asarray=False) assert type(dv._meta) == cupy.core.core.ndarray assert_eq(dv, dv) # Check that _meta and computed arrays match types assert_eq(da.diag(dv), cupy.diag(v)) v = v + v + 3 dv = dv + dv + 3 darr = da.diag(dv) cupyarr = cupy.diag(v) assert type(darr._meta) == cupy.core.core.ndarray assert_eq(darr, darr) # Check that _meta and computed arrays match types assert_eq(darr, cupyarr) x = cupy.arange(64).reshape((8, 8)) dx = da.from_array(x, chunks=(4, 4), asarray=False) assert type(dx._meta) == cupy.core.core.ndarray assert_eq(dx, dx) # Check that _meta and computed arrays match types assert_eq(da.diag(dx), cupy.diag(x))
def update_velocities(position, velocity, mass, G, epsilon): """Calculate the interactions between all particles and update the velocities. Args: position (dask array): dask array of all particle positions in cartesian coordinates. velocity (dask array): dask array of all particle velocities in cartesian coordinates. mass (dask array): dask array of all particle masses. G (float): gravitational constant. epsilon (float): softening parameter. Returns: velocity: updated particle velocities in cartesian coordinates. """ dx = da.subtract.outer(position[:, 0], position[:, 0]) dy = da.subtract.outer(position[:, 1], position[:, 1]) dz = da.subtract.outer(position[:, 2], position[:, 2]) r2 = da.square(dx) + da.square(dy) + da.square(dz) + da.square(epsilon) # coef = -G * mass[:] ax = coef * dx ay = coef * dy az = coef * dz # ax_scaled = da.divide(ax, r2) ay_scaled = da.divide(ay, r2) az_scaled = da.divide(az, r2) # total_ax = da.nansum(ax_scaled, axis=1) total_ay = da.nansum(ay_scaled, axis=1) total_az = da.nansum(az_scaled, axis=1) # velocity_x = da.diag(da.add.outer(da.transpose(velocity)[0], total_ax)) velocity_y = da.diag(da.add.outer(da.transpose(velocity)[1], total_ay)) velocity_z = da.diag(da.add.outer(da.transpose(velocity)[2], total_az)) # velocity = np.column_stack((velocity_x.compute(), velocity_y.compute(), velocity_z.compute())) return velocity
def test_PowerMethod_subsvd_finds_eigenvectors_failure(): N = 1000 k = 10 s_orig = np.array([1.01**i for i in range(1, N + 1)]) array = da.diag(s_orig) PM = PowerMethod(tol=1e-16, lmbd=0, max_iter=100) U_PM, S_PM, V_PM = PM.svd(array) with pytest.raises(AssertionError): np.testing.assert_array_almost_equal(s_orig[-k:][::-1], S_PM, decimal=0)
def test_tsqr_svd_regular_blocks(): m, n = 20, 10 mat = np.random.rand(m, n) data = da.from_array(mat, chunks=(10, n), name='A') u, s, vt = tsqr(data, compute_svd=True) usvt = da.dot(u, da.dot(da.diag(s), vt)) s_exact = np.linalg.svd(mat)[1] assert_eq(mat, usvt) # accuracy check assert_eq(np.eye(n, n), da.dot(u.T, u)) # u must be orthonormal assert_eq(np.eye(n, n), da.dot(vt, vt.T)) # v must be orthonormal assert_eq(s, s_exact) # s must contain the singular values
def gram_rbf(X, threshold=1.0): if type(X) == torch.Tensor: dot_products = X @ X.t() sq_norms = dot_products.diag() sq_distances = -2*dot_products + sq_norms[:,None] + sq_norms[None,:] sq_median_distance = sq_distances.median() return torch.exp(-sq_distances / (2*threshold**2 * sq_median_distance)) elif type(X) == da.Array: dot_products = X @ X.T sq_norms = da.diag(dot_products) sq_distances = -2*dot_products + sq_norms[:,None] + sq_norms[None,:] sq_median_distance = da.percentile(sq_distances.ravel(), 50) return da.exp((-sq_distances / (2*threshold**2 * sq_median_distance))) else: raise ValueError
def eig_dask(A, nofIt=None): """ A dask eigenvalue solver: assumes A is symmetric and used the QR method to find eigenvalues and eigenvectors. nofIt: number of iterations (default is the size of A) """ A_new = A if nofIt is None: nofIt = A.shape[0] V = da.eye(A.shape[0], 100) for i in range(nofIt): Q, R = da.linalg.qr(A_new) A_new = da.dot(R, Q) V = da.dot(V, Q) return (da.diag(A_new), V)
def test_SSPM_case2(): N = 10000 k = 10 s_orig = np.ones(N) array = da.diag(s_orig) SSPM = SuccessiveBatchedPowerMethod(k=k, sub_svd_start=True, tol=1e-12, factor=None) _, _, _ = SSPM.svd(array) for sub_S in SSPM.history.iter['S']: np.testing.assert_array_almost_equal(s_orig[:k], sub_S)
def test_tsqr(m, n, chunks, error_type): mat = cupy.random.rand(m, n) data = da.from_array(mat, chunks=chunks, name="A", asarray=False) # qr m_q = m n_q = min(m, n) m_r = n_q n_r = n # svd m_u = m n_u = min(m, n) n_s = n_q m_vh = n_q n_vh = n d_vh = max(m_vh, n_vh) # full matrix returned if error_type is None: # test QR q, r = da.linalg.tsqr(data) assert_eq((m_q, n_q), q.shape) # shape check assert_eq((m_r, n_r), r.shape) # shape check assert_eq(mat, da.dot(q, r)) # accuracy check assert_eq(cupy.eye(n_q, n_q), da.dot(q.T, q)) # q must be orthonormal assert_eq(r, np.triu(r.rechunk(r.shape[0]))) # r must be upper triangular # test SVD u, s, vh = da.linalg.tsqr(data, compute_svd=True) s_exact = np.linalg.svd(mat)[1] assert_eq(s, s_exact) # s must contain the singular values assert_eq((m_u, n_u), u.shape) # shape check assert_eq((n_s, ), s.shape) # shape check assert_eq((d_vh, d_vh), vh.shape) # shape check assert_eq(np.eye(n_u, n_u), da.dot(u.T, u), check_type=False) # u must be orthonormal assert_eq(np.eye(d_vh, d_vh), da.dot(vh, vh.T), check_type=False) # vh must be orthonormal assert_eq(mat, da.dot(da.dot(u, da.diag(s)), vh[:n_q])) # accuracy check else: with pytest.raises(error_type): q, r = da.linalg.tsqr(data) with pytest.raises(error_type): u, s, vh = da.linalg.tsqr(data, compute_svd=True)
def __new__(cls, arrays: Iterable[dask.array.core.Array], tree_reduce: bool = True): # TODO: Use minimize operation reduction with dynamic programming. if isinstance(arrays, dask.array.core.Array): raise ValueError(f"expected Iterable of dask.array.core.Array. Got dask.array.core.Array") arrays = list(arrays) if any(not isinstance(x, (dask.array.core.Array, np.ndarray)) for x in arrays): raise ValueError(f"expected Iterable of dask.array.core.Array, " f"but got types {set(type(x) for x in arrays)}") if tree_reduce: reduce = tree_reduction else: reduce = linear_reduction prev_col = None for array in arrays: if array.ndim > 2: raise NotImplementedError(f'expected 1D or 2D arrays, got {array.ndim}D array') elif array.ndim == 1: n = p = array.shape[0] else: n, p = array.shape if prev_col is None: prev_col = p else: if prev_col != n: raise ValueError(f'expected chain-able dimensions, got {[a.shape for a in arrays]}') else: prev_col = p array = reduce((a if a.ndim == 2 else da.diag(a) for a in arrays), da.dot) self = super(ChainedArray, cls).__new__(cls, array.dask, array.name, array.chunks, array.dtype, array._meta, array.shape) self.tree_reduce = tree_reduce self.reduce = reduce self._arrays = arrays self.array = array return self
def test_diag(): v = da.arange(11, chunks=3) darr = da.diag(v) nparr = np.diag(v) eq(darr, nparr) assert sorted(da.diag(v).dask) == sorted(da.diag(v).dask) v = v + v + 3 darr = da.diag(v) nparr = np.diag(v) eq(darr, nparr) v = da.arange(11, chunks=11) darr = da.diag(v) nparr = np.diag(v) eq(darr, nparr) assert sorted(da.diag(v).dask) == sorted(da.diag(v).dask)
def test_no_chunks_svd(): x = np.random.random((100, 10)) u, s, v = np.linalg.svd(x, full_matrices=0) for chunks in [((np.nan, ) * 10, (10, )), ((np.nan, ) * 10, (np.nan, ))]: dx = da.from_array(x, chunks=(10, 10)) dx._chunks = chunks du, ds, dv = da.linalg.svd(dx) assert_eq(s, ds) assert_eq(u.dot(np.diag(s)).dot(v), du.dot(da.diag(ds)).dot(dv)) assert_eq(du.T.dot(du), np.eye(10)) assert_eq(dv.T.dot(dv), np.eye(10)) dx = da.from_array(x, chunks=(10, 10)) dx._chunks = ((np.nan, ) * 10, (np.nan, )) assert_eq(abs(v), abs(dv)) assert_eq(abs(u), abs(du))
def test_PowerMethod_subsvd_finds_eigenvectors(): N = 1000 k = 10 s_orig = np.array([1.01**i for i in range(1, N + 1)]) array = da.diag(s_orig) PM = PowerMethod(tol=1e-16, factor=None, lmbd=.1, max_iter=100) U_PM, S_PM, V_PM = PM.svd(array) np.testing.assert_array_almost_equal(s_orig[-k:][::-1], S_PM, decimal=0) # Max Q-Val is 21,000 v = np.zeros_like(V_PM).compute() for j in range(k): v[j, N - k + j] = 1 np.testing.assert_almost_equal(subspace_dist(V_PM, v, S_PM), 0, decimal=5)
def test_tsqr(m, n, chunks, error_type): mat = np.random.rand(m, n) data = da.from_array(mat, chunks=chunks, name='A') # qr m_q = m n_q = min(m, n) m_r = n_q n_r = n # svd m_u = m n_u = min(m, n) n_s = n_q m_vh = n_q n_vh = n d_vh = max(m_vh, n_vh) # full matrix returned if error_type is None: # test QR q, r = tsqr(data) assert_eq((m_q, n_q), q.shape) # shape check assert_eq((m_r, n_r), r.shape) # shape check assert_eq(mat, da.dot(q, r)) # accuracy check assert_eq(np.eye(n_q, n_q), da.dot(q.T, q)) # q must be orthonormal assert_eq(r, da.triu(r.rechunk(r.shape[0]))) # r must be upper triangular # test SVD u, s, vh = tsqr(data, compute_svd=True) s_exact = np.linalg.svd(mat)[1] assert_eq(s, s_exact) # s must contain the singular values assert_eq((m_u, n_u), u.shape) # shape check assert_eq((n_s,), s.shape) # shape check assert_eq((d_vh, d_vh), vh.shape) # shape check assert_eq(np.eye(n_u, n_u), da.dot(u.T, u)) # u must be orthonormal assert_eq(np.eye(d_vh, d_vh), da.dot(vh, vh.T)) # vh must be orthonormal assert_eq(mat, da.dot(da.dot(u, da.diag(s)), vh[:n_q])) # accuracy check else: with pytest.raises(error_type): q, r = tsqr(data) with pytest.raises(error_type): u, s, vh = tsqr(data, compute_svd=True)
def test_diag_2d_array_creation(k): # when input 1d-array is a numpy array: v = np.arange(11) assert_eq(da.diag(v, k), np.diag(v, k)) # when input 1d-array is a dask array: v = da.arange(11, chunks=3) darr = da.diag(v, k) nparr = np.diag(v, k) assert_eq(darr, nparr) assert sorted(da.diag(v, k).dask) == sorted(da.diag(v, k).dask) v = v + v + 3 darr = da.diag(v, k) nparr = np.diag(v, k) assert_eq(darr, nparr) v = da.arange(11, chunks=11) darr = da.diag(v, k) nparr = np.diag(v, k) assert_eq(darr, nparr) assert sorted(da.diag(v, k).dask) == sorted(da.diag(v, k).dask)
def test_no_chunks_svd(): x = np.random.random((100, 10)) u, s, v = np.linalg.svd(x, full_matrices=0) for chunks in [((np.nan,) * 10, (10,)), ((np.nan,) * 10, (np.nan,))]: dx = da.from_array(x, chunks=(10, 10)) dx._chunks = chunks du, ds, dv = da.linalg.svd(dx) assert_eq(s, ds) assert_eq(u.dot(np.diag(s)).dot(v), du.dot(da.diag(ds)).dot(dv)) assert_eq(du.T.dot(du), np.eye(10)) assert_eq(dv.T.dot(dv), np.eye(10)) dx = da.from_array(x, chunks=(10, 10)) dx._chunks = ((np.nan,) * 10, (np.nan,)) assert_eq(abs(v), abs(dv)) assert_eq(abs(u), abs(du))
def test_diag_bad_input(k): # when input numpy array is neither 1d nor 2d: v = np.arange(2 * 3 * 4).reshape((2, 3, 4)) with pytest.raises(ValueError, match="Array must be 1d or 2d only"): da.diag(v, k) # when input dask array is neither 1d nor 2d: v = da.arange(2 * 3 * 4).reshape((2, 3, 4)) with pytest.raises(ValueError, match="Array must be 1d or 2d only"): da.diag(v, k) # when input is not an array: v = 1 with pytest.raises(TypeError, match="v must be a dask array or numpy array"): da.diag(v, k)
def nearestPD(A, threads=1): """ Find the nearest positive-definite matrix to input A Python/Numpy port of John D'Errico's `nearestSPD` MATLAB code [1], which credits [2] from Ahmed Fasih [1] https://www.mathworks.com/matlabcentral/fileexchange/42885-nearestspd [2] N.J. Higham, "Computing a nearest symmetric positive semidefinite matrix" (1988): https://doi.org/10.1016/0024-3795(88)90223-6 """ isPD = lambda x: da.all(np.linalg.eigvals(x) > 0).compute() B = (A + A.T) / 2 _, s, V = da.linalg.svd(B) H = da.dot(V.T, da.dot(da.diag(s), V)) A2 = (B + H) / 2 A3 = (A2 + A2.T) / 2 if isPD(A3): return A3 spacing = da.spacing(da.linalg.norm(A)) # The above is different from [1]. It appears that MATLAB's `chol` Cholesky # decomposition will accept matrixes with exactly 0-eigenvalue, whereas # Numpy's will not. So where [1] uses `eps(mineig)` (where `eps` is Matlab # for `np.spacing`), we use the above definition. CAVEAT: our `spacing` # will be much larger than [1]'s `eps(mineig)`, since `mineig` is usually on # the order of 1e-16, and `eps(1e-16)` is on the order of 1e-34, whereas # `spacing` will, for Gaussian random matrixes of small dimension, be on # othe order of 1e-16. In practice, both ways converge, as the unit test # below suggests. eye_chunk = estimate_chunks((A.shape[0], A.shape[0]), threads=threads)[0] I = da.eye(A.shape[0], chunks=eye_chunk) k = 1 while not isPD(A3): mineig = da.min(da.real(np.linalg.eigvals(A3))) A3 += I * (-mineig * k**2 + spacing) k += 1 return A3
def test_ScaledArray_fromArrayMoment_array(): N1, P = 7, 10 N2 = 5 array1 = da.random.random(size=(N1, P)).persist() mu = da.mean(array1, axis=0) std = da.diag(1/da.std(array1, axis=0)) array2 = da.random.random(size=(N2, P)).persist() for scale in [True, False]: for center in [True, False]: for factor1 in [None, 'n', 'p']: sa1 = ScaledCenterArray(scale=scale, center=center, factor=factor1) sa1.fit(array1) for factor2, factor_value in zip([None, 'n', 'p'], [1, N2, P]): sa2 = ScaledCenterArray.fromScaledArray(array=array2, scaled_array=sa1, factor=factor2) sa2_array = array2 if center: sa2_array = sa2_array - mu if scale: sa2_array = sa2_array.dot(std) np.testing.assert_array_almost_equal(sa2.array, sa2_array)
def test_subspace_to_SVD_case2(): """ A = USV U = [e1 e2, ..., ek] \ [0, 0, ..., 0] | N by K [1, 1, ..., 1] / S = np.range(N, 0, -1) V = I A = np.diag(np.range(N, 0, -1)) subspace_to_SVD should recover I and S from sections of A """ for N in range(2, 10): for K in range(2, N + 1): U = np.zeros((N, K)) U[N - 1, :] = np.ones(K) U[:K, :K] = np.eye(K) V = da.eye(K) U = da.array(U) U_q, _ = da.linalg.qr(U) S = da.arange(K, 0, -1) A = U.dot(da.diag(S)) for j in range(K, N + 1): subspace = A[:, 0:j] U_s, S_s, V_s = subspace_to_SVD(subspace, A, full_v=True) np.testing.assert_almost_equal(subspace_dist(V_s, V, S), 0, decimal=decimals) _, l, _ = da.linalg.svd(U_q.dot(U_s.T)) np.testing.assert_almost_equal(l[:K].compute(), np.ones(K)) np.testing.assert_almost_equal(l[K:].compute(), np.zeros(N - K))