Пример #1
0
def correlated_eigs(
    percent: float = 25,
    shape: Tuple[int, int] = (1000, 500),
    noise: float = 0.1,
    log: bool = True,
    return_mats: bool = False,
) -> Union[ndarray, Tuple[ndarray, ndarray, ndarray]]:
    """[WIP]. Generate a correlated system for examinatino with, e.g.
    Marcenko-Pastur. """
    A = np.random.standard_normal(shape)
    correlated = np.random.permutation(A.shape[0] -
                                       1) + 1  # don't select first row
    last = int(np.floor((percent / 100) * A.shape[0]))
    corr_indices = correlated[:last]
    # introduce correlation in A
    ch, unif, norm = np.random.choice, np.random.uniform, np.random.normal
    for i in corr_indices:
        A[i, :] = ch([-1, 1]) * unif(
            1, 2) * (A[0, :] + norm(0, noise, size=A.shape[1]))
    M = correlate_fast(A)
    if log:
        print(
            f"\n{time.strftime('%H:%M:%S (%b%d)')} -- computing eigenvalues..."
        )
    eigs = np.linalg.eigvalsh(M)
    if log:
        print(f"{time.strftime('%H:%M:%S (%b%d)')} -- computed eigenvalues.")
    n, t = shape
    eig_min, eig_max = (1 - np.sqrt(n / t))**2, (1 + np.sqrt(n / t))**2
    print(f"Eigenvalues in ({eig_min},{eig_max}) are likely noise-related.")

    if return_mats:
        return eigs, A, M
    return eigs
Пример #2
0
def test_transpose_trick() -> None:
    # test for correlation
    for _ in range(10):
        A = np.random.standard_normal([1000, 250])
        eigs = np.linalg.eigvalsh(correlate_fast(A, ddof=1))[-250:]
        eigsT = eigv(A, covariance=False)
        assert np.allclose(eigs, eigsT)

    # test for covariance
    ddof = 1
    for _ in range(10):
        A = np.random.standard_normal([1000, 250])
        eigs = np.linalg.eigvalsh(np.cov(A, ddof=ddof))[-250:]
        eigsT = eigv(A, covariance=True)
        assert np.allclose(eigs, eigsT)
Пример #3
0
def test_correlated_gaussian_noise() -> None:
    var = 0.1
    for percent in [25, 50, 75, 95]:
        A = np.random.standard_normal([1000, 500])
        correlated = np.random.permutation(A.shape[0] -
                                           1) + 1  # don't select first row
        last = int(np.floor((percent / 100) * A.shape[0]))
        corr_indices = correlated[:last]
        # introduce correlation in A
        for i in corr_indices:
            A[i, :] = np.random.uniform(1, 2) * A[0, :] + np.random.normal(
                0, var, size=A.shape[1])
        M = correlate_fast(A)
        eigs = get_eigs(M)
        print(f"\nPercent correlated noise: {percent}%")
        unfold_and_plot(eigs, f"\nCorrelated noise: {percent}%")
    plt.show()
Пример #4
0
def test_axes_configuring() -> None:
    var = 0.1
    percent = 25
    A = np.random.standard_normal([1000, 500])
    correlated = np.random.permutation(A.shape[0] -
                                       1) + 1  # don't select first row
    last = int(np.floor((percent / 100) * A.shape[0]))
    corr_indices = correlated[:last]
    # introduce correlation in A
    for i in corr_indices:
        A[i, :] = np.random.uniform(1, 2) * A[0, :] + np.random.normal(
            0, var, size=A.shape[1])
    M = correlate_fast(A)
    eigs = get_eigs(M)
    print(f"\nPercent correlated noise: {percent}%")
    unfolded = Eigenvalues(eigs).unfold(degree=13)
    unfolded.plot_fit(mode="noblock")

    goe_unfolded(1000, log=True).plot_fit(mode="block")
Пример #5
0
def time_series_eigs(n: int = 1000,
                     t: int = 200,
                     dist: str = "normal",
                     log: bool = True) -> ndarray:
    """Generate a correlation matrix for testing Marcenko-Pastur, other spectral observables."""
    if dist == "normal":
        M_time = np.random.standard_normal([n, t])

    if log:
        print(
            f"\n{time.strftime('%H:%M:%S (%b%d)')} -- computing correlations..."
        )
    M = correlate_fast(M_time)
    if log:
        print(
            f"\n{time.strftime('%H:%M:%S (%b%d)')} -- computing eigenvalues..."
        )
    eigs = np.linalg.eigvalsh(M)
    if log:
        print(
            f"\n{time.strftime('%H:%M:%S (%b%d)')} -- computed eigenvalues...")
    return eigs
Пример #6
0
    def from_time_series(
        cls: Type[Eigens],
        data: ndarray,
        covariance: bool = True,
        trim_zeros: bool = True,
        zeros: Union[float, Literal["negative"]] = "negative",
        time_axis: int = 1,
        use_sparse: bool = False,
        **sp_args: Any,
    ) -> Eigens:
        """Use Marchenko-Pastur and positive semi-definiteness to identify likely noise
        values and zero-valued eigenvalues due to floating point imprecision

        Parameters
        ----------
        data: ndarray
            A 2-dimensional matrix of time-series data.

        covariance: bool
            If True (default) compute the eigenvalues of the covariance matrix.
            If False, use the correlation matrix.

        trim_zeros: bool
            If True (default) only return eigenvalues greater than `zeros`
            (e.g. remove values that are likely unstable or actually zero due
            to floating point precision limitations).

        zeros: float
            If a float, The smallest acceptable value for an eigenvalue not to
            be considered zero.
            If "negative", trim invalid negative eigenvalues (e.g. because
            coviarance and correlation matrices are positive semi-definite)
            If "heuristic", trim away eigenvalues likely to be unstable:
                - if computed eigenvaleus are `eigs`, and if `emin = eigs.min()`,
                  `emin < 0`, then trim to `eigs[eigs > 100 * np.abs(emin)]
                - if emin >= 0, trim to `eigs[eigs > 0]`

        time_axis: int
            If 0, assumes the data.shape == (n, T), where n is the number of
            features / variables, and T is the length (number of points) in each
            time series.

        use_sparse: bool
            Convert the interim correlation matrix to a sparse triangular
            matrix, and use `scipy.sparse.linalg.eigsh` to solve for the
            eigenvalues. This currently does not save memory (since we still
            compute an interim dense covariance matrix) but gives more control
            over what eigenvalues are returned.

        sp_args:
            Keyword arguments to pass to scipy.sparse.linalg.eigsh.


        Returns
        -------
        eigenvalues: Eigenvalues
            The Eigenvalues object, with extra time-series relevant data:
            - Eigenvalues.marcenko_endpoints: (float, float)
        """
        if len(data.shape) != 2:
            raise ValueError("Input `data` array must have dimension of 2.")
        if time_axis not in [0, 1]:
            raise ValueError("Invalid `time_axis`. Must be either 0 or 1.")
        if time_axis == 0:
            data = data.T

        N, T = data.shape
        M, eigs = None, None
        if N <= T:  # no benefit from intermediate transposition
            M = np.cov(data, ddof=1) if covariance else correlate_fast(data,
                                                                       ddof=1)
            if use_sparse:
                M = sparse.tril(M)
                if sp_args.get("return_eigenvectors") is True:
                    raise ValueError(
                        "This function is intended only as a helper to extract eigenvalues from time-series."
                    )

                eigs = sparse.linalg.eigsh(M, **sp_args)
            else:
                eigs = np.linalg.eigvalsh(M)
        else:
            eigs = _eigs_via_transpose(data, covariance=covariance)

        if trim_zeros:
            if zeros == "heuristic":
                e_min = eigs.min()
                minval = 0
                if e_min <= 0:
                    minval = -100 * e_min
                eigs = eigs[eigs > minval]
            elif zeros == "negative":
                eigs = eigs[eigs > 0]
            else:
                try:
                    zeros = float(zeros)
                except ValueError as e:
                    raise ValueError(
                        "`zeros` must be a either a float, 'heuristic' or 'negative'"
                    ) from e
                eigs = eigs[eigs > zeros]

        eigenvalues = cls(eigs)
        N, T = data.shape
        eigenvalues._series_T = T
        eigenvalues._series_N = N
        # get some Marchenko-Pastur endpoints
        shift = 1 - eigs.max() / N
        r = np.sqrt(N / T)
        eigenvalues._marchenko = ((1 - r)**2, (1 + r)**2)
        eigenvalues._marchenko_shifted = (shift * (1 + r)**2,
                                          shift * (1 - r)**2)
        return eigenvalues  # type: ignore
Пример #7
0
def test_uniform_noise() -> None:
    A = np.random.uniform(0, 1, size=[1000, 250])
    M = correlate_fast(A)
    eigs = get_eigs(M)
    unfold_and_plot(eigs, "Uniform Noise")
Пример #8
0
def test_gaussian_noise() -> None:
    A = np.random.standard_normal([1000, 250])
    M = correlate_fast(A)
    eigs = get_eigs(M)
    unfold_and_plot(eigs, "Gaussian Noise")