def correlated_eigs( percent: float = 25, shape: Tuple[int, int] = (1000, 500), noise: float = 0.1, log: bool = True, return_mats: bool = False, ) -> Union[ndarray, Tuple[ndarray, ndarray, ndarray]]: """[WIP]. Generate a correlated system for examinatino with, e.g. Marcenko-Pastur. """ A = np.random.standard_normal(shape) correlated = np.random.permutation(A.shape[0] - 1) + 1 # don't select first row last = int(np.floor((percent / 100) * A.shape[0])) corr_indices = correlated[:last] # introduce correlation in A ch, unif, norm = np.random.choice, np.random.uniform, np.random.normal for i in corr_indices: A[i, :] = ch([-1, 1]) * unif( 1, 2) * (A[0, :] + norm(0, noise, size=A.shape[1])) M = correlate_fast(A) if log: print( f"\n{time.strftime('%H:%M:%S (%b%d)')} -- computing eigenvalues..." ) eigs = np.linalg.eigvalsh(M) if log: print(f"{time.strftime('%H:%M:%S (%b%d)')} -- computed eigenvalues.") n, t = shape eig_min, eig_max = (1 - np.sqrt(n / t))**2, (1 + np.sqrt(n / t))**2 print(f"Eigenvalues in ({eig_min},{eig_max}) are likely noise-related.") if return_mats: return eigs, A, M return eigs
def test_transpose_trick() -> None: # test for correlation for _ in range(10): A = np.random.standard_normal([1000, 250]) eigs = np.linalg.eigvalsh(correlate_fast(A, ddof=1))[-250:] eigsT = eigv(A, covariance=False) assert np.allclose(eigs, eigsT) # test for covariance ddof = 1 for _ in range(10): A = np.random.standard_normal([1000, 250]) eigs = np.linalg.eigvalsh(np.cov(A, ddof=ddof))[-250:] eigsT = eigv(A, covariance=True) assert np.allclose(eigs, eigsT)
def test_correlated_gaussian_noise() -> None: var = 0.1 for percent in [25, 50, 75, 95]: A = np.random.standard_normal([1000, 500]) correlated = np.random.permutation(A.shape[0] - 1) + 1 # don't select first row last = int(np.floor((percent / 100) * A.shape[0])) corr_indices = correlated[:last] # introduce correlation in A for i in corr_indices: A[i, :] = np.random.uniform(1, 2) * A[0, :] + np.random.normal( 0, var, size=A.shape[1]) M = correlate_fast(A) eigs = get_eigs(M) print(f"\nPercent correlated noise: {percent}%") unfold_and_plot(eigs, f"\nCorrelated noise: {percent}%") plt.show()
def test_axes_configuring() -> None: var = 0.1 percent = 25 A = np.random.standard_normal([1000, 500]) correlated = np.random.permutation(A.shape[0] - 1) + 1 # don't select first row last = int(np.floor((percent / 100) * A.shape[0])) corr_indices = correlated[:last] # introduce correlation in A for i in corr_indices: A[i, :] = np.random.uniform(1, 2) * A[0, :] + np.random.normal( 0, var, size=A.shape[1]) M = correlate_fast(A) eigs = get_eigs(M) print(f"\nPercent correlated noise: {percent}%") unfolded = Eigenvalues(eigs).unfold(degree=13) unfolded.plot_fit(mode="noblock") goe_unfolded(1000, log=True).plot_fit(mode="block")
def time_series_eigs(n: int = 1000, t: int = 200, dist: str = "normal", log: bool = True) -> ndarray: """Generate a correlation matrix for testing Marcenko-Pastur, other spectral observables.""" if dist == "normal": M_time = np.random.standard_normal([n, t]) if log: print( f"\n{time.strftime('%H:%M:%S (%b%d)')} -- computing correlations..." ) M = correlate_fast(M_time) if log: print( f"\n{time.strftime('%H:%M:%S (%b%d)')} -- computing eigenvalues..." ) eigs = np.linalg.eigvalsh(M) if log: print( f"\n{time.strftime('%H:%M:%S (%b%d)')} -- computed eigenvalues...") return eigs
def from_time_series( cls: Type[Eigens], data: ndarray, covariance: bool = True, trim_zeros: bool = True, zeros: Union[float, Literal["negative"]] = "negative", time_axis: int = 1, use_sparse: bool = False, **sp_args: Any, ) -> Eigens: """Use Marchenko-Pastur and positive semi-definiteness to identify likely noise values and zero-valued eigenvalues due to floating point imprecision Parameters ---------- data: ndarray A 2-dimensional matrix of time-series data. covariance: bool If True (default) compute the eigenvalues of the covariance matrix. If False, use the correlation matrix. trim_zeros: bool If True (default) only return eigenvalues greater than `zeros` (e.g. remove values that are likely unstable or actually zero due to floating point precision limitations). zeros: float If a float, The smallest acceptable value for an eigenvalue not to be considered zero. If "negative", trim invalid negative eigenvalues (e.g. because coviarance and correlation matrices are positive semi-definite) If "heuristic", trim away eigenvalues likely to be unstable: - if computed eigenvaleus are `eigs`, and if `emin = eigs.min()`, `emin < 0`, then trim to `eigs[eigs > 100 * np.abs(emin)] - if emin >= 0, trim to `eigs[eigs > 0]` time_axis: int If 0, assumes the data.shape == (n, T), where n is the number of features / variables, and T is the length (number of points) in each time series. use_sparse: bool Convert the interim correlation matrix to a sparse triangular matrix, and use `scipy.sparse.linalg.eigsh` to solve for the eigenvalues. This currently does not save memory (since we still compute an interim dense covariance matrix) but gives more control over what eigenvalues are returned. sp_args: Keyword arguments to pass to scipy.sparse.linalg.eigsh. Returns ------- eigenvalues: Eigenvalues The Eigenvalues object, with extra time-series relevant data: - Eigenvalues.marcenko_endpoints: (float, float) """ if len(data.shape) != 2: raise ValueError("Input `data` array must have dimension of 2.") if time_axis not in [0, 1]: raise ValueError("Invalid `time_axis`. Must be either 0 or 1.") if time_axis == 0: data = data.T N, T = data.shape M, eigs = None, None if N <= T: # no benefit from intermediate transposition M = np.cov(data, ddof=1) if covariance else correlate_fast(data, ddof=1) if use_sparse: M = sparse.tril(M) if sp_args.get("return_eigenvectors") is True: raise ValueError( "This function is intended only as a helper to extract eigenvalues from time-series." ) eigs = sparse.linalg.eigsh(M, **sp_args) else: eigs = np.linalg.eigvalsh(M) else: eigs = _eigs_via_transpose(data, covariance=covariance) if trim_zeros: if zeros == "heuristic": e_min = eigs.min() minval = 0 if e_min <= 0: minval = -100 * e_min eigs = eigs[eigs > minval] elif zeros == "negative": eigs = eigs[eigs > 0] else: try: zeros = float(zeros) except ValueError as e: raise ValueError( "`zeros` must be a either a float, 'heuristic' or 'negative'" ) from e eigs = eigs[eigs > zeros] eigenvalues = cls(eigs) N, T = data.shape eigenvalues._series_T = T eigenvalues._series_N = N # get some Marchenko-Pastur endpoints shift = 1 - eigs.max() / N r = np.sqrt(N / T) eigenvalues._marchenko = ((1 - r)**2, (1 + r)**2) eigenvalues._marchenko_shifted = (shift * (1 + r)**2, shift * (1 - r)**2) return eigenvalues # type: ignore
def test_uniform_noise() -> None: A = np.random.uniform(0, 1, size=[1000, 250]) M = correlate_fast(A) eigs = get_eigs(M) unfold_and_plot(eigs, "Uniform Noise")
def test_gaussian_noise() -> None: A = np.random.standard_normal([1000, 250]) M = correlate_fast(A) eigs = get_eigs(M) unfold_and_plot(eigs, "Gaussian Noise")