Exemplo n.º 1
0
def test_brody_fit() -> None:
    for N in [100, 250, 500, 1000]:
        unfolded = Eigenvalues(generate_eigs(N)).unfold(degree=7)
        # test fitting via max spacing
        res = unfolded.fit_brody(method="spacing")
        spacings = res["spacings"]
        if -np.inf in spacings or np.inf in spacings:
            raise ValueError("Return spacings contains infinities.")
        ecdf = res["ecdf"]
        if np.sum(ecdf < 0) > 0 or np.sum(ecdf > 1):
            raise ValueError("Invalid values in empirical cdf.")
        brody_cdf = res["brody_cdf"]
        if np.sum(brody_cdf < 0) > 0 or np.sum(brody_cdf > 1):
            raise ValueError("Invalid values in brody cdf.")

        # test fitting via mle
        res = unfolded.fit_brody(method="mle")
        spacings = res["spacings"]
        if -np.inf in spacings or np.inf in spacings:
            raise ValueError("Return spacings contains infinities.")
        ecdf = res["ecdf"]
        if np.sum(ecdf < 0) > 0 or np.sum(ecdf > 1):
            raise ValueError("Invalid values in empirical cdf.")
        brody_cdf = res["brody_cdf"]
        if np.sum(brody_cdf < 0) > 0 or np.sum(brody_cdf > 1):
            raise ValueError("Invalid values in brody cdf.")
Exemplo n.º 2
0
def test_trim_manual() -> None:
    vals = generate_eigs(2000)
    for i in range(20):
        m, n = np.sort(np.array(np.random.uniform(0, len(vals), 2), dtype=int))
        raw_trimmed = np.copy(vals[m:n])
        eigenvalues = Eigenvalues(vals)
        trimmed = eigenvalues.trim_manually(m, n)
        assert np.allclose(raw_trimmed, trimmed.vals)
Exemplo n.º 3
0
def precompute_levelvar(eigpaths: List[Path],
                        args: Any,
                        out: Path,
                        force: bool = False,
                        silent: bool = False) -> Path:
    """Take the eigenvalues saved in `eigpaths`, compute the levelvar, and save that in a DataFrame
    in `out`

    Parameters
    ----------
    eigpaths: List[Path]
        The values of either DATASETS or DATASETS_FULLPRE

    args: Args
        Contains the unfolding, trimming, normalization, etc options defined in
        run.py

    out: Path
        See usage below.

    force: bool
        If False (default), don't recompute the values if they already exist.

    silent: bool
        If False (default) display a tqdm progress bar while calculating.

    Returns
    -------
    pickle: Path
        Path to the pickle file saving the precomputed values.
    """
    if not force and out.exists():
        return out
    var_df = pd.DataFrame()
    desc = "{} - Levelvar"
    pbar = tqdm(total=len(eigpaths),
                desc=desc.format("eigs-XX"),
                disable=silent)
    for path in eigpaths:
        eigname = path.stem
        vals = np.load(path)
        if args.trim in ["(1,:)", "", "(0,:)"]:
            vals = vals[1:]  # smallest eigenvalue is always spurious here
        else:
            low, high = eval(args.trim)
            vals = vals[low:high]
        eigs = Eigenvalues(vals)
        unfolded = eigs.unfold(**args.unfold)
        pbar.set_description(desc=desc.format(path.stem))
        levelvar = unfolded.level_variance(**args.levelvar)
        pbar.update()
        if var_df.get("L") is None:
            var_df["L"] = levelvar["L"]
        var_df[eigname] = levelvar["sigma"]
    pbar.close()
    var_df.to_pickle(out)
    return out
Exemplo n.º 4
0
def precompute_brody(eigpaths: List[Path],
                     args: Any,
                     out: Path,
                     force: bool = False,
                     silent: bool = False) -> Path:
    """Take the eigenvalues saved in `eigpaths`, compute the Brody parameter beta,
    and save that in a DataFrame in `out`

    Parameters
    ----------
    eigpaths: List[Path]
        The values of either DATASETS or DATASETS_FULLPRE

    args: Args
        Contains the unfolding, trimming, normalization, etc options defined in
        run.py

    out: Path
        See usage below.

    force: bool
        If False (default), don't recompute the values if they already exist.

    silent: bool
        If False (default) display a tqdm progress bar while calculating.

    Returns
    -------
    pickle: Path
        Path to the pickle file saving the precomputed values.
    """
    if not force and out.exists():
        return out
    brod_df = pd.DataFrame(index=["beta"])
    desc = "{} - Brody"
    pbar = tqdm(total=len(eigpaths),
                desc=desc.format("eigs-XX"),
                disable=silent)
    for path in eigpaths:
        eigname = path.stem
        vals = np.load(path)
        if args.trim in ["(1,:)", "", "(0,:)"]:
            vals = vals[1:]  # smallest eigenvalue is always spurious here
        else:
            low, high = eval(args.trim)
            vals = vals[low:high]
        eigs = Eigenvalues(vals)
        unfolded = eigs.unfold(**args.unfold)
        # print(f"\t\tComputing Brody fit for {str(path.resolve().name)}...")
        pbar.set_description(desc=desc.format(path.stem))
        pbar.update()
        brody = unfolded.fit_brody(**args.brody)
        brod_df[eigname] = brody["beta"]
    pbar.close()
    brod_df.to_pickle(out)
    return out
Exemplo n.º 5
0
def test_trim_reports() -> None:
    eigs = Eigenvalues(generate_eigs(2000, seed=2))
    report = eigs.trim_report()
    best_smoothers, best_unfolds, best_indices, consistent_smoothers = (
        report.best_overall())
    assert np.array_equal(np.sort(consistent_smoothers),
                          np.sort(["poly_7", "poly_8", "poly_9"]))
    assert np.array_equal(best_indices, [(104, 1765), (231, 1765),
                                         (104, 2000)])

    report.plot_trim_steps(mode="test")
Exemplo n.º 6
0
def test_poisson() -> None:
    for i in range(1):
        vals = generate_eigs(5000, kind="poisson")
        unfolded = Eigenvalues(vals).unfold()
        unfolded.plot_nnsd(
            title="Poisson Spacing Test",
            bins=10,
            kde=True,
            mode="test",
            ensembles=["poisson"],
        )
Exemplo n.º 7
0
def test_unfold_methods() -> None:
    eigs = Eigenvalues(generate_eigs(500, seed=2))
    trimmed = eigs.get_best_trimmed()
    print("Trim starts and ends:")
    print(trimmed.vals[0])
    print(trimmed.vals[-1])
    assert np.allclose(trimmed.vals[0], -35.84918623729985)
    assert np.allclose(trimmed.vals[-1], 34.709818777689364)

    unfolded = eigs.trim_unfold_auto()
    print("Trim starts and ends:")
    print(unfolded.vals[0])
    print(unfolded.vals[-1])
    assert np.allclose(unfolded.vals[0], -2.473290621491799)
    assert np.allclose(unfolded.vals[-1], 504.2764217889801)
Exemplo n.º 8
0
def unfold_and_plot(eigs: ndarray, suptitle: str) -> None:
    unfolded = Eigenvalues(eigs).trim_unfold_auto(max_trim=0.5,
                                                  max_iters=9,
                                                  poly_degrees=[13],
                                                  gompertz=False)
    trimmed = np.round(100 - 100 * len(unfolded.vals) / len(eigs), 1)

    _observables(
        eigs=unfolded.original_eigs,
        unfolded=unfolded.vals,
        rigidity_df=unfolded.spectral_rigidity(c_iters=10000,
                                               show_progress=True),
        levelvar_df=unfolded.level_variance(show_progress=True),
        suptitle=suptitle + f" ({trimmed}% removed)",
        mode="noblock",
    )
Exemplo n.º 9
0
    def _get_formatted_data(self) -> Dict[str, Tuple[DataFrame, DataFrame]]:
        """Load all data based on self.args, and reformat for ML classifiers."""
        trim_args = self.args.trim
        unf_args = self.args.unfold
        # see if the raw eigs alone are more useful than RMT stats
        eigs1 = [np.load(p) for p in self.eigs1]
        eigs2 = [np.load(p) for p in self.eigs2]

        unfolded1 = DataFrame(
            data=[Eigenvalues(_trimmed_from_args(eigs, trim_args)).unfold(**unf_args).vals for eigs in eigs1]
        )
        unfolded2 = DataFrame(
            data=[Eigenvalues(_trimmed_from_args(eigs, trim_args)).unfold(**unf_args).vals for eigs in eigs2]
        )
        l1, l2 = np.min([len(eigs) for eigs in eigs1]), np.min([len(eigs) for eigs in eigs2])
        l_shared = np.min([l1, l2])
        eigs1 = [eigs[-l_shared:] for eigs in eigs1]  # use largest eigenvalues only
        eigs2 = [eigs[-l_shared:] for eigs in eigs2]
        eigs1, eigs2 = DataFrame(data=np.array(eigs1)), DataFrame(data=np.array(eigs2))

        largest1 = DataFrame([np.load(p).max() for p in self.eigs1])
        largest2 = DataFrame([np.load(p).max() for p in self.eigs2])
        largest20_1 = DataFrame([np.load(p)[-20:] for p in self.eigs1])
        largest20_2 = DataFrame([np.load(p)[-20:] for p in self.eigs2])
        noise1 = pd.read_pickle(self.marchenko[0]).loc["noise_ratio", :].T
        noise2 = pd.read_pickle(self.marchenko[1]).loc["noise_ratio", :].T
        noise_shifted1 = pd.read_pickle(self.marchenko[0]).loc["noise_ratio_shifted", :].T
        noise_shifted2 = pd.read_pickle(self.marchenko[1]).loc["noise_ratio_shifted", :].T
        brody1 = pd.read_pickle(self.brody[0]).loc["beta"].T
        brody2 = pd.read_pickle(self.brody[1]).loc["beta"].T
        rig1 = pd.read_pickle(self.rigidity[0]).set_index("L").T  # must be (n_samples, n_features)
        rig2 = pd.read_pickle(self.rigidity[1]).set_index("L").T
        var1 = pd.read_pickle(self.levelvar[0]).set_index("L").T
        var2 = pd.read_pickle(self.levelvar[1]).set_index("L").T

        return {
            "Raw Eigs": (eigs1, eigs2),
            "Unfolded": (unfolded1, unfolded2),
            "Largest": (largest1, largest2),
            "Largest20": (largest20_1, largest20_2),
            "Noise": (noise1, noise2),
            "Noise (shift)": (noise_shifted1, noise_shifted2),
            "Brody": (brody1, brody2),
            "Rigidity": (rig1, rig2),
            "Levelvar": (var1, var2),
        }
Exemplo n.º 10
0
def test_init_sanity() -> None:
    eigs = Eigenvalues(generate_eigs(1000))
    report = eigs.trim_report(
        max_iters=9,
        poly_degrees=[5, 7, 9],
        spline_degrees=[],
        spline_smooths=[],
        show_progress=True,
    )
    assert np.allclose(report._untrimmed, eigs.original_eigenvalues)
    assert isinstance(report.summary, pd.DataFrame)
    assert isinstance(report._trim_iters, list)
    assert isinstance(report._trim_iters[0], TrimIter)
    path = Path(".") / "trim_report.csv"
    report.to_csv(path)
    assert path.exists()
    path.unlink()
    report.plot_trim_steps(mode="test")
Exemplo n.º 11
0
def test_axes_configuring() -> None:
    var = 0.1
    percent = 25
    A = np.random.standard_normal([1000, 500])
    correlated = np.random.permutation(A.shape[0] -
                                       1) + 1  # don't select first row
    last = int(np.floor((percent / 100) * A.shape[0]))
    corr_indices = correlated[:last]
    # introduce correlation in A
    for i in corr_indices:
        A[i, :] = np.random.uniform(1, 2) * A[0, :] + np.random.normal(
            0, var, size=A.shape[1])
    M = correlate_fast(A)
    eigs = get_eigs(M)
    print(f"\nPercent correlated noise: {percent}%")
    unfolded = Eigenvalues(eigs).unfold(degree=13)
    unfolded.plot_fit(mode="noblock")

    goe_unfolded(1000, log=True).plot_fit(mode="block")
Exemplo n.º 12
0
def test_plot_rigidity() -> None:
    # good fit for max_L=50 when using generate_eigs(10000)
    # good fit for max_L=55 when using generate_eigs(20000)
    # not likely to be good fit for max_L beyond 20 for generate_eigs(1000)
    # L good | len(eigs) |     percent
    # -----------------------------------
    # 30-40  |    2000   |
    # 30-50  |    8000   |  0.375 - 0.625
    # 50-70  |   10000   |    0.5 - 0.7
    #   50   |   20000   |      0.25
    eigs = Eigenvalues(generate_eigs(2000, log=True))
    unfolded = eigs.unfold(smoother="poly", degree=19)

    unfolded.plot_nnsd(mode="test")
    # unfolded.plot_next_nnsd(mode="test")
    unfolded.plot_level_variance(
        L=np.arange(0.5, 100, 0.2), mode="test", ensembles=["goe", "poisson"]
    )
    unfolded.plot_spectral_rigidity(
        L=np.arange(1, 200, 0.5), c_iters=10000, mode="test"
    )
Exemplo n.º 13
0
def plot_pred_nnsd(
    args: Any,
    dataset_name: str,
    comparison: str,
    unfold: List[int] = [5, 7, 9, 11, 13],
    ensembles: bool = True,
    trim: float = 3.0,
    silent: bool = False,
    force: bool = False,
) -> None:
    global ARGS
    # ARGS.fullpre = True
    BINS = np.linspace(0, trim, 20)
    # for trim_idx in ["(1,-1)", "(1,-20)"]:
    for trim_idx in ["(1,-1)"]:
        ARGS.trim = trim_idx
        all_pairs = []
        for normalize in [False]:
            args.normalize = normalize
            for degree in unfold:
                ARGS.unfold["degree"] = degree
                pairings = Pairings(args, dataset_name)
                pairing = list(
                    filter(lambda p: p.label == comparison, pairings.pairs))
                if len(pairing) != 1:
                    raise ValueError("Too many pairings, something is wrong.")
                all_pairs.append(pairing[0])
        g1, _, g2 = all_pairs[0].label.split("_")  # groupnames
        fig: plt.Figure
        fig, axes = plt.subplots(nrows=1,
                                 ncols=len(all_pairs),
                                 sharex=True,
                                 squeeze=False)
        for i, (pair, unfold_degree) in enumerate(zip(all_pairs, unfold)):
            ax: plt.Axes = axes.flat[i]
            eigs1, eigs2 = pair.eigs1, pair.eigs2
            unfold_args = {**ARGS.unfold, **dict(degree=unfold_degree)}
            unf1 = [
                Eigenvalues(np.load(e)).unfold(**unfold_args) for e in eigs1
            ]
            unf2 = [
                Eigenvalues(np.load(e)).unfold(**unfold_args) for e in eigs2
            ]
            alpha1, alpha2 = 1 / len(unf1), 1 / len(unf2)
            # alpha_adj = 0.02  # good for just plotting hists, no brody
            alpha_adj = 0.00
            alpha1 += alpha_adj
            alpha2 += alpha_adj

            for j, unf in enumerate(unf1):
                spacings = unf.spacings
                if trim > 0.0:
                    spacings = spacings[spacings <= trim]
                beta = fit_brody_mle(spacings)
                brody = brody_dist(spacings, beta)
                # Generate expected distributions for classical ensembles
                sbn.distplot(
                    spacings,
                    norm_hist=True,
                    bins=BINS,
                    kde=False,
                    # label=g1 if j == 0 else None,
                    axlabel="spacing (s)",
                    color="#FD8208",
                    # hist_kws={"alpha": alpha1, "histtype": "step", "linewidth": 0.5},
                    hist_kws={"alpha": alpha1},
                    # kde_kws={"alpha": alpha1, "color":"#FD8208"},
                    ax=ax,
                )
                sbn.lineplot(x=spacings,
                             y=brody,
                             color="#FD8208",
                             ax=ax,
                             alpha=0.9,
                             label=g1 if j == 0 else None,
                             linewidth=0.5)

            for j, unf in enumerate(unf2):
                spacings = unf.spacings
                if trim > 0.0:
                    spacings = spacings[spacings <= trim]
                beta = fit_brody_mle(spacings)
                brody = brody_dist(spacings, beta)
                sbn.distplot(
                    spacings,
                    norm_hist=True,
                    bins=BINS,  # doane
                    kde=False,
                    # label=g2 if j == 0 else None,
                    axlabel="spacing (s)",
                    color="#000000",
                    # hist_kws={"alpha": alpha2, "histtype": "step", "linewidth": 0.5},
                    hist_kws={"alpha": alpha2},
                    # kde_kws={"alpha": alpha2, "color":"#000000"},
                    ax=ax,
                )
                sbn.lineplot(x=spacings,
                             y=brody,
                             color="#000000",
                             ax=ax,
                             alpha=0.9,
                             label=g2 if j == 0 else None,
                             linewidth=0.5)

            if ensembles:
                s = np.linspace(0, trim, 10000)
                poisson = GDE.nnsd(spacings=s)
                goe = GOE.nnsd(spacings=s)
                sbn.lineplot(x=s,
                             y=poisson,
                             color="#08FD4F",
                             label="Poisson",
                             ax=ax,
                             alpha=0.5)
                sbn.lineplot(x=s,
                             y=goe,
                             color="#0066FF",
                             label="GOE",
                             ax=ax,
                             alpha=0.5)
            ax.legend().set_visible(False)
            ax.set_title(f"Unfolding Degree {unfold[i]}")
            ax.set_xlabel("")
            ax.set_ylabel("")
        axes.flat[0].legend().set_visible(True)
        fig.text(0.5, 0.04, "spacing (s)", ha="center", va="center")  # xlabel
        fig.text(0.03,
                 0.5,
                 "p(s)",
                 ha="center",
                 va="center",
                 rotation="vertical")  # ylabel
        fig.set_size_inches(w=7, h=1.5)  # TMI full-page max width is 7 inches
        # fig.set_size_inches(w=3.5, h=3.5)  # TMI half-page max width is 3.5 inches
        fig.subplots_adjust(top=0.83,
                            bottom=0.2,
                            left=0.075,
                            right=0.955,
                            hspace=0.2,
                            wspace=0.23)
        # fontdic = {"fontname": "Arial", "fontsize": 10.0}
        # fig.suptitle(f"{dataset_name} {ARGS.trim} - NNSD", fontdict=fontdic)
        make_plot(fig, show=False, fmt="png", fignum="9")
Exemplo n.º 14
0
                    nargs=1,
                    action="store")

args = parser.parse_args()

img = nib.load(args.bold[0]).get_fdata()
mask = np.array(nib.load(args.mask[0]).get_fdata(), dtype=bool)

N, t = (np.prod(img.shape[:-1]), img.shape[-1])
img = img.reshape([N, t])
mask = mask.reshape(-1)

# remove dead or constant voxels
for i, signal in enumerate(img):
    if np.sum(signal) == 0 or np.sum(signal * signal) == 0 or np.std(
            signal, ddof=1) == 0:
        mask[i] = False

brain = img[mask, :]

eigs = Eigenvalues.from_time_series(brain, covariance=False, trim_zeros=False)
vals = eigs.vals

outfile = Path(args.outfile[0])
shape_outfile = outfile.parent / outfile.name.replace("eigs", "shapes")
parent = outfile.resolve().parent.resolve()
mkdirp(parent)
np.save(outfile, vals, allow_pickle=False)
np.save(shape_outfile, np.array(brain.shape, dtype=int), allow_pickle=False)
print(f"Saved eigenvalues to {args.outfile[0]}")
Exemplo n.º 15
0
    def plot_nnsd(
        self,
        trim_args: str,
        unfold_args: dict,
        n_bins: int = 20,
        max_spacing: float = 5.0,
        title: str = None,
        outdir: Path = None,
    ) -> None:
        # label is always g1_v_g2, we want "attention" to be orange, "nonattend"
        # to be black
        if self.label in ["rest_v_task", "nopain_v_pain", "control_v_control_pre", "park_pre_v_parkinsons"]:
            c1, c2 = "#000000", "#FD8208"
        elif self.label in [
            "allpain_v_nopain",
            "allpain_v_duloxetine",
            "high_v_low",
            "control_pre_v_park_pre",
            "control_v_parkinsons",
        ]:
            c1, c2 = "#FD8208", "#000000"
        elif self.label in ["control_pre_v_parkinsons", "control_v_park_pre"]:
            return  # meaningless comparison
        else:
            c1, c2 = "#EA00FF", "#FD8208"

        unfolded1 = []
        for path in self.eigs1:
            vals = np.load(path)
            if trim_args in ["(1,:)", "", "(0,:)"]:
                vals = vals[1:]  # smallest eigenvalue is always spurious here
            else:
                low, high = eval(trim_args)
                vals = vals[low:high]
            unfolded1.append(np.sort(Eigenvalues(vals).unfold(**unfold_args).vals))

        unfolded2 = []
        for path in self.eigs2:
            vals = np.load(path)
            if trim_args in ["(1,:)", "", "(0,:)"]:
                vals = vals[1:]  # smallest eigenvalue is always spurious here
            else:
                low, high = eval(trim_args)
                vals = vals[low:high]
            unfolded2.append(np.sort(Eigenvalues(vals).unfold(**unfold_args).vals))

        spacings1 = [np.diff(unfolded) for unfolded in unfolded1]
        spacings2 = [np.diff(unfolded) for unfolded in unfolded2]
        # trim largest histogram skewing spacing
        spacings1 = [spacings[spacings < max_spacing] for spacings in spacings1]
        spacings2 = [spacings[spacings < max_spacing] for spacings in spacings2]
        mean_brody1 = np.round(float(pd.read_pickle(self.brody[0]).mean(axis=1)), 2)
        mean_brody2 = np.round(float(pd.read_pickle(self.brody[1]).mean(axis=1)), 2)

        _configure_sbn_style()
        fig, axes = plt.subplots(ncols=2, sharex=True, sharey=True)

        # plot curves for each group
        for spacings in spacings1:
            sbn.distplot(
                spacings,
                norm_hist=True,
                bins=n_bins,
                kde=True,
                axlabel="spacing (s)",
                color=c1,
                kde_kws={"alpha": np.max([0.1, 1 / len(spacings1)])},
                hist_kws={"alpha": np.max([0.1, 1 / len(spacings1)])},
                ax=axes[0],
            )
        for spacings in spacings2:
            sbn.distplot(
                spacings,
                norm_hist=True,
                bins=n_bins,
                kde=True,
                axlabel="spacing (s)",
                color=c2,
                kde_kws={"alpha": np.max([0.1, 1 / len(spacings1)])},
                hist_kws={"alpha": np.max([0.1, 1 / len(spacings2)])},
                ax=axes[1],
            )

        # plot bootstrapped means and CIs for each group
        # boots = _percentile_boot(df1)
        # sbn.lineplot(x=L, y=boots["mean"], color=c1, label=self.subgroup1, ax=ax)
        # ax.fill_between(x=L, y1=boots["low"], y2=boots["high"], color=c1, alpha=0.3)

        # boots = _percentile_boot(df2)
        # sbn.lineplot(x=L, y=boots["mean"], color=c2, label=self.subgroup2, ax=ax)
        # ax.fill_between(x=L, y1=boots["low"], y2=boots["high"], color=c2, alpha=0.3)

        # plot theoretically-expected curves
        s = np.linspace(0, max_spacing, 5000)
        sbn.lineplot(x=s, y=Poisson.nnsd(spacings=s), color="#08FD4F", label="Poisson", ax=axes[0])
        sbn.lineplot(x=s, y=Poisson.nnsd(spacings=s), color="#08FD4F", label="Poisson", ax=axes[1])
        sbn.lineplot(x=s, y=GOE.nnsd(spacings=s), color="#0066FF", label="GOE", ax=axes[0])
        sbn.lineplot(x=s, y=GOE.nnsd(spacings=s), color="#0066FF", label="GOE", ax=axes[1])

        # ensure all plots have identical axes
        axes[0].set_ylim(top=2.0)
        axes[1].set_ylim(top=2.0)

        # ax.legend().set_visible(True)
        fig.suptitle(f"{self.label}: NNSD" if title is None else title)
        for i, ax in enumerate(axes):
            ax.set_xlabel("spacing (s)")
            ax.set_ylabel("density p(s)", fontname="DejaVu Sans")
            ax.set_title(
                f"{self.subgroup1 if i == 0 else self.subgroup2} (<β> = {mean_brody1 if i == 0 else mean_brody2})"
            )
        if outdir is None:
            plt.show()
            plt.close()
        else:
            os.makedirs(outdir, exist_ok=True)
            prefix = _prefix(trim_args, unfold_args)
            outfile = outdir / f"{prefix}_{self.label}_nnsd.png"
            fig.set_size_inches(10, 5)
            plt.savefig(outfile, dpi=300)
            plt.close()
            print(f"Pooled nnsd plot saved to {relpath(outfile)}")
Exemplo n.º 16
0
def plot_subject_nnsd(args: Any,
                      n_bins: int = 20,
                      outfile: Path = None) -> None:
    subjects = get_subjects_dict()
    sbn.set_context("paper")
    sbn.set_style("ticks", {"ytick.left": False})
    c1 = "#000000"
    fig, axes = plt.subplots(ncols=6, nrows=4, sharex=True, sharey=True)
    pbar = tqdm(desc="NNSD", total=8 * len(subjects))
    for i, (subj_id, subject) in enumerate(subjects.items()):
        ax: Axes = axes.flat[i]
        eigpaths = subject["runs"]
        unfoldeds = []
        for path in eigpaths:
            vals = np.load(path)
            if args.trim in ["(1,:)", "", "(0,:)"]:
                vals = vals[1:]  # smallest eigenvalue is always spurious here
            else:
                low, high = eval(args.trim)
                vals = vals[low:high]
            unfoldeds.append(
                np.sort(Eigenvalues(vals).unfold(**args.unfold).vals))
        all_spacings = [np.diff(unfolded) for unfolded in unfoldeds]
        kde_gridsize = 1000
        kdes = np.empty([len(all_spacings), kde_gridsize], dtype=float)
        s = np.linspace(0, 3, kde_gridsize)
        bins = np.linspace(0, 3, n_bins + 1)
        for j, spacings in enumerate(all_spacings):
            sbn.distplot(
                spacings[(spacings > 0) & (spacings <= 3)],
                norm_hist=True,
                bins=bins,
                kde=False,
                color=c1,
                hist_kws={
                    "alpha": 1.0 / 8,
                    "range": (0.0, 3.0)
                },
                ax=ax,
            )
            kde = _kde(spacings, grid=s)
            kdes[j, :] = kde
            pbar.update()
            # sbn.lineplot(x=s, y=kde, color=c1, alpha=1.0 / 8, ax=ax)
        # sbn.lineplot(x=s, y=kdes.mean(axis=0), color="#9d0000", label="Mean KDE", ax=ax)
        sbn.lineplot(x=s,
                     y=kdes.mean(axis=0),
                     color=c1,
                     label="Mean KDE",
                     ax=ax)
        sbn.lineplot(x=s,
                     y=Poisson.nnsd(spacings=s),
                     color="#08FD4F",
                     label="Poisson",
                     ax=ax)
        sbn.lineplot(x=s,
                     y=GOE.nnsd(spacings=s),
                     color="#0066FF",
                     label="GOE",
                     ax=ax)

        ax.set_title(f"subj-{subj_id}")
        ax.set_ylabel("")
        ax.legend(frameon=False, framealpha=0)
        plt.setp(ax.get_legend().get_texts(), fontsize="6")
        # if i != 0:
        handle, labels = ax.get_legend_handles_labels()
        ax._remove_legend(handle)
        ticks = [0, 1, 2, 3]
        ax.set_xticks(ticks)
        # ax.set_xticklabels(ticks, rotation=45, horizontalalignment="right")
        ax.set_xticklabels(ticks)
    pbar.close()

    fig.suptitle("Per-Subject NNSD")
    fig.set_size_inches(10, 6)
    fig.subplots_adjust(left=0.13, bottom=0.15, wspace=0.1, hspace=0.35)
    fig.text(0.5, 0.04, "spacing (s)", ha="center", va="center")  # xlabel
    fig.text(0.05,
             0.5,
             "density p(s)",
             ha="center",
             va="center",
             rotation="vertical")  # ylabel
    plt.setp(axes, xlim=(0.0, 3.0), ylim=(0.0, 1.2))  # better use of space
    handles, labels = axes.flat[-1].get_legend_handles_labels()
    fig.legend(handles,
               labels,
               loc="center right",
               frameon=False,
               framealpha=0,
               fontsize="8")
    if outfile is None:
        plt.show()
    else:
        fig.savefig(str(outfile.resolve()), dpi=300)
        print(f"Saved NNSD plot to {str(outfile.relative_to(DATA_ROOT))}")
    plt.close()
Exemplo n.º 17
0
def test_brody_plot() -> None:
    # test GOE eigs
    bw = 0.2
    # bw = "scott"
    mode: PlotMode = "test"
    ensembles = ["goe", "poisson"]
    for N in [100, 250, 500, 1000]:
        _configure_sbn_style()
        fig, axes = plt.subplots(2, 2)
        for i in range(4):
            eigs = generate_eigs(N)
            Eigenvalues(eigs).unfold(degree=7).plot_nnsd(
                brody=True,
                kde_bw=bw,
                title=f"GOE N={N}",
                ensembles=ensembles,
                mode="return",
                fig=fig,
                axes=axes.flat[i],
            )
        _handle_plot_mode(mode, fig, axes)

    # test time series
    _configure_sbn_style()
    fig, axes = plt.subplots(2, 2)
    for i in range(4):
        eigs = np.linalg.eigvalsh(
            np.corrcoef(np.random.standard_normal([1000, 250])))
        Eigenvalues(eigs).unfold(degree=7).plot_nnsd(
            brody=True,
            brody_fit="mle",
            mode=mode,
            title=f"t-series (untrimmed)(MLE)",
            ensembles=ensembles,
            kde_bw=bw,
            fig=fig,
            axes=axes.flat[i],
        )
    _handle_plot_mode(mode, fig, axes)

    _configure_sbn_style()
    fig, axes = plt.subplots(2, 2)
    for i in range(4):
        eigs = np.linalg.eigvalsh(
            np.corrcoef(np.random.standard_normal([1000, 250])))
        Eigenvalues(eigs).unfold(degree=7).plot_nnsd(
            brody=True,
            brody_fit="spacings",
            mode=mode,
            title=f"t-series (untrimmed)(spacings)",
            ensembles=ensembles,
            kde_bw=bw,
            fig=fig,
            axes=axes.flat[i],
        )
    _handle_plot_mode(mode, fig, axes)

    _configure_sbn_style()
    fig, axes = plt.subplots(2, 2)
    for i in range(4):
        eigs = np.linalg.eigvalsh(
            np.corrcoef(np.random.standard_normal([1000, 250])))
        eigs = eigs[eigs > 100 * np.abs(eigs.min())]
        Eigenvalues(eigs).unfold(degree=7).plot_nnsd(
            brody=True,
            brody_fit="spacings",
            mode=mode,
            title="t-series (trimmed)(spacings)",
            ensembles=ensembles,
            kde_bw=bw,
            fig=fig,
            axes=axes.flat[i],
        )
    _handle_plot_mode(mode, fig, axes)

    _configure_sbn_style()
    fig, axes = plt.subplots(2, 2)
    for i in range(4):
        eigs = np.linalg.eigvalsh(
            np.corrcoef(np.random.standard_normal([1000, 250])))
        eigs = eigs[eigs > 100 * np.abs(eigs.min())]
        Eigenvalues(eigs).unfold(degree=7).plot_nnsd(
            brody=True,
            brody_fit="mle",
            mode=mode,
            title="t-series (trimmed)(MLE)",
            ensembles=ensembles,
            kde_bw=bw,
            fig=fig,
            axes=axes.flat[i],
        )
    _handle_plot_mode(mode, fig, axes)

    if mode != "test":
        plt.show()
Exemplo n.º 18
0
def precompute_marchenko(eigpaths: List[Path],
                         out: Path,
                         force: bool = False,
                         silent: bool = False) -> Path:
    """Take the eigenvalues saved in `eigpaths`, compute the Marchenko-Pastur
    endpoints (both shifted and unshifted), and save that in a DataFrame at
    `out`. DataFrame will also contain information rated to proportion of
    eigenvalues within those bounds "noise_ratio" and "noise_ratio" shifted.

    Parameters
    ----------
    eigpaths: List[Path]
        The values of either DATASETS or DATASETS_FULLPRE

    out: Path
        See usage below.

    force: bool
        If False (default), don't recompute the values if they already exist.

    silent: bool
        If False (default) display a tqdm progress bar while calculating.

    Returns
    -------
    pickle: Path
        Path to the pickle file saving the precomputed values.
    """
    if not force and out.exists():
        return out
    marchenko_df = pd.DataFrame(index=[
        "low", "high", "low_shift", "high_shift", "noise_ratio",
        "noise_ratio_shifted"
    ],
                                dtype=int)
    desc = "{} - Marchenko"
    pbar = tqdm(total=len(eigpaths),
                desc=desc.format("eigs-XX"),
                disable=silent)
    for path in eigpaths:
        eigname = path.stem
        vals = np.load(path)
        # trim the phoney zero eigenvalue due to correlation rank
        vals = vals[1:]
        N, T = np.load(str(path).replace("eigs", "shapes"))
        eigs = Eigenvalues(vals)
        _, marchenko = eigs.trim_marchenko_pastur(series_length=T,
                                                  n_series=N,
                                                  use_shifted=False)
        _, marchenko_shifted = eigs.trim_marchenko_pastur(series_length=T,
                                                          n_series=N,
                                                          use_shifted=True)
        noise_ratio = np.mean((vals > marchenko[0]) & (vals < marchenko[1]))
        noise_ratio_shifted = np.mean((vals > marchenko_shifted[0])
                                      & (vals < marchenko_shifted[1]))
        marchenko_df[eigname] = [
            marchenko[0],
            marchenko[1],
            marchenko_shifted[0],
            marchenko_shifted[1],
            noise_ratio,
            noise_ratio_shifted,
        ]
        pbar.set_description(desc=desc.format(path.stem))
        pbar.update()
    pbar.close()
    # print(marchenko_df)
    marchenko_df.to_pickle(out)
    return out
Exemplo n.º 19
0
def test_unfold_compare() -> None:
    metrics: List[Metric] = ["msqd", "mad", "corr"]
    print("\n")
    print("=" * 80)
    print("Comparing a GOE matrix to GOE")
    print("=" * 80)
    eigs = Eigenvalues(generate_eigs(2000, seed=2))
    unfolded = eigs.unfold(degree=13)
    df = unfolded.ensemble_compare(ensemble=GOE,
                                   metrics=metrics,
                                   show_progress=True)
    print(df)

    print("\n")
    print("=" * 80)
    print("Comparing a Poisson / GDE matrix to GOE")
    print("=" * 80)
    eigs = Eigenvalues(generate_eigs(2000, kind="poisson", seed=2))
    unfolded = eigs.unfold(degree=13)
    df = unfolded.ensemble_compare(ensemble=GOE,
                                   metrics=metrics,
                                   show_progress=True)
    print(df)

    print("\n")
    print("=" * 80)
    print("Comparing a Poisson / GDE matrix to GOE")
    print("=" * 80)
    eigs = Eigenvalues(generate_eigs(2000, kind="poisson", seed=2))
    unfolded = eigs.unfold(degree=13)
    df = unfolded.ensemble_compare(ensemble=GOE,
                                   metrics=metrics,
                                   show_progress=True)
    print(df)

    print("\n")
    print("=" * 80)
    print("Comparing a Poisson / GDE matrix to Poisson / GDE")
    print("=" * 80)
    eigs = Eigenvalues(generate_eigs(2000, kind="poisson", seed=2))
    unfolded = eigs.unfold(degree=13)
    df = unfolded.ensemble_compare(ensemble=GDE,
                                   metrics=metrics,
                                   show_progress=True)
    print(df)