def make_surrogates(data, parcellation, scale, spatnull):
    """
    Generates surrogates for `data` using `spatnull` method

    Parameters
    ----------
    data : (N,) pd.DataFrame
    parcellation : {'atl-cammoun2012', 'atl-schaefer2018'}
    scale : str
    spatnull : {'burt2018', 'burt2020', 'moran'}

    Returns
    -------
    surrogates : (N, `N_PERM`) np.ndarray
    """

    if spatnull not in ('burt2018', 'burt2020', 'moran'):
        raise ValueError(f'Cannot make surrogates for null method {spatnull}')

    darr = np.asarray(data)
    dmin = darr[np.logical_not(np.isnan(darr))].min()

    surrogates = np.zeros((len(data), N_PERM))
    for hdata, dist, idx in putils.yield_data_dist(
        DISTDIR, parcellation, scale, data, inverse=(spatnull == 'moran')
    ):

        # handle NaNs before generating surrogates; should only be relevant
        # when using vertex-level data, but good nonetheless
        mask = np.logical_not(np.isnan(hdata))
        surrogates[idx[np.logical_not(mask)]] = np.nan
        hdata, dist, idx = hdata[mask], dist[np.ix_(mask, mask)], idx[mask]

        if spatnull == 'burt2018':
            # Box-Cox transformation requires positive data :man_facepalming:
            hdata += np.abs(dmin) + 0.1
            surrogates[idx] = \
                burt.batch_surrogates(dist, hdata, n_surr=N_PERM, seed=SEED)
        elif spatnull == 'burt2020':
            if parcellation == 'vertex':  # memmap is required for this shit
                index = np.argsort(dist, axis=-1)
                dist = np.sort(dist, axis=-1)
                knn = 1000 if USE_KNN else len(hdata)
                surrogates[idx] = \
                    mapgen.Sampled(hdata, dist, index, knn=knn,
                                   seed=SEED)(N_PERM).T
            else:
                surrogates[idx] = \
                    mapgen.Base(hdata, dist, seed=SEED)(N_PERM, 50).T
        elif spatnull == 'moran':
            mrs = moran.MoranRandomization(joint=True, n_rep=N_PERM,
                                           tol=1e-6, random_state=SEED)
            with threadpoolctl.threadpool_limits(limits=2):
                surrogates[idx] = mrs.fit(dist).randomize(hdata).T

    return surrogates
def make_surrogates(data, parcellation, scale, spatnull, fn=None):
    if spatnull not in ('burt2018', 'burt2020', 'moran'):
        raise ValueError(f'Cannot make surrogates for null method {spatnull}')

    darr = np.asarray(data)
    dmin = darr[np.logical_not(np.isnan(darr))].min()

    surrogates = np.zeros((len(data), N_PERM))
    for n, hemi in enumerate(('lh', 'rh')):
        dist = get_distmat(hemi, parcellation, scale, fn=fn)
        try:
            idx = np.asarray([
                n for n, f in enumerate(data.index)if f.startswith(hemi)
            ])
            hdata = np.squeeze(np.asarray(data.iloc[idx]))
        except AttributeError:
            idx = np.arange(n * (len(data) // 2), (n + 1) * (len(data) // 2))
            hdata = np.squeeze(data[idx])

        # handle NaNs before generating surrogates; should only be relevant
        # when using vertex-level data, but good nonetheless
        mask = np.logical_not(np.isnan(hdata))
        surrogates[idx[np.logical_not(mask)]] = np.nan
        hdata, dist, idx = hdata[mask], dist[np.ix_(mask, mask)], idx[mask]

        if spatnull == 'burt2018':
            # Box-Cox transformation requires positive data
            hdata += np.abs(dmin) + 0.1
            surrogates[idx] = \
                burt.batch_surrogates(dist, hdata, n_surr=N_PERM, seed=SEED)
        elif spatnull == 'burt2020':
            if parcellation == 'vertex':
                index = np.argsort(dist, axis=-1)
                dist = np.sort(dist, axis=-1)
                surrogates[idx] = \
                    mapgen.Sampled(hdata, dist, index, seed=SEED)(N_PERM).T
            else:
                surrogates[idx] = \
                    mapgen.Base(hdata, dist, seed=SEED)(N_PERM, 50).T
        elif spatnull == 'moran':
            dist = dist.astype('float64')  # required for some reason...
            np.fill_diagonal(dist, 1)
            dist **= -1
            mrs = moran.MoranRandomization(joint=True, n_rep=N_PERM,
                                           tol=1e-6, random_state=SEED)
            surrogates[idx] = mrs.fit(dist).randomize(hdata).T

    return surrogates
def run_null(netclass, parc, scale, spintype):
    """
    Runs spatial permutation null model for given combination of inputs

    Parameters
    ----------
    netclass : {'vek', 'yeo'}
        Network partition to test
    parc : str
        Name of parcellation to be used
    scale : str
        Scale of `parcellation` to be used
    spintype : str
        Name of spin method to be used

    Returns
    -------
    stats : pd.DataFrame
        Generated statistics with columns ['parcellation', 'scale', 'spintype',
        'netclass', 'network', 'zscore', 'pval']
    """
    data = load_data(netclass, parc, scale)

    # run the damn thing
    print(f'Running {spintype:>9} spins for {scale}: ', end='', flush=True)
    out = HCPDIR / parc / 'nulls' / netclass / spintype / f'{scale}_nulls.csv'
    if out.exists():
        permnets = np.loadtxt(out, delimiter=',')
    elif spintype == 'cornblath':
        # even though we're working with parcellated data we need to project
        # that to the surface + spin the vertices, so let's load our
        # pre-generated vertex-level spins
        spins = SPDIR / 'vertex' / 'vazquez-rodriguez' / 'fsaverage5_spins.csv'

        # get annotation files (we need these to project parcels to surface)
        fetcher = getattr(nndata, f"fetch_{parc.replace('atl-', '')}")
        annotations = fetcher('fsaverage5', data_dir=ROIDIR)[scale]

        # pre-load the spins for this function (assumes `spins` is array)
        print('Pre-loading spins...', end='\b' * 20, flush=True)
        spins = np.loadtxt(spins, delimiter=',', dtype='int32')
        # generate "spun" data; permdata will be an (R, T, n_rotate) array
        # where `R` is regions and `T` is 1 (myelination)
        permdata = nnsurf.spin_data(np.asarray(data['myelin']),
                                    version='fsaverage5',
                                    lhannot=annotations.lh,
                                    rhannot=annotations.rh,
                                    spins=spins,
                                    n_rotate=spins.shape[-1],
                                    verbose=True)
        permnets = np.vstack([
            _get_netmeans(permdata[..., n], data['networks'])
            for n in range(spins.shape[-1])
        ])
        putils.save_dir(out, permnets)
    elif spintype in ['burt2018', 'burt2020']:
        surrdir = SURRDIR / parc / spintype / 'hcp'
        surrogates = get_surrogates(data['myelin'], surrdir, scale)
        permnets = np.vstack([
            _get_netmeans(surrogates[..., n], data['networks'])
            for n in range(surrogates.shape[-1])
        ])
        putils.save_dir(out, permnets)
    elif spintype == 'moran':
        surrogates = np.zeros((len(data['myelin']), 10000))
        for hemi, dist, idx in putils.yield_data_dist(DISTDIR, parc, scale,
                                                      data['myelin']):
            mrs = moran.MoranRandomization(joint=True,
                                           n_rep=10000,
                                           tol=1e-6,
                                           random_state=1234)
            mrs.fit(dist)
            surrogates[idx] = np.squeeze(mrs.randomize(hemi)).T

        permnets = np.vstack([
            _get_netmeans(surrogates[..., n], data['networks'])
            for n in range(surrogates.shape[-1])
        ])
        putils.save_dir(out, permnets)
    else:
        spins = SPDIR / parc / spintype / f'{scale}_spins.csv'
        permnets = gen_permnets(data['myelin'], data['networks'], spins, out)

    # now get the real network averages and compare to the permuted values
    real = _get_netmeans(data['myelin'], data['networks'])
    zscores, pvals = get_fwe(real, permnets)

    out = pd.DataFrame(
        dict(parcellation=parc,
             scale=scale,
             spintype=spintype,
             netclass=netclass,
             network=list(NET_CODES[netclass].keys()),
             zscore=zscores,
             pval=pvals))

    return out
Пример #4
0
    lhdata, rhdata = data[:end - start], data[end - start:]
    lhdist = np.loadtxt(DISTDIR / name / 'nomedial' / f'{scale}_lh_dist.csv',
                        delimiter=',')
    rhdist = np.loadtxt(DISTDIR / name / 'nomedial' / f'{scale}_rh_dist.csv',
                        delimiter=',')
    plot = np.hstack((burt.make_surrogate(lhdist, lhdata + 1, seed=1234),
                      burt.make_surrogate(rhdist, rhdata + 1, seed=1234)))
    save_brainmap(plot, FIGDIR / 'burt2018_surf.png', lh, rh, **OPTS)

    # burt 2020 (need to rescale to original data range)
    plot = np.hstack((nnutils.rescale(
        Base(lhdata, lhdist, seed=1234)(200, 50).T[:, 180], lhdata.min(),
        lhdata.max()),
                      nnutils.rescale(
                          Base(rhdata, rhdist, seed=1234)(200, 50).T[:, 180],
                          rhdata.min(), rhdata.max())))
    save_brainmap(plot, FIGDIR / 'burt2020_surf.png', lh, rh, **OPTS)

    # moran spectral randomization
    np.fill_diagonal(lhdist, 1)
    np.fill_diagonal(rhdist, 1)
    lhdist **= -1
    rhdist **= -1
    mrs = moran.MoranRandomization(joint=True,
                                   n_rep=1000,
                                   tol=1e-6,
                                   random_state=1234)
    plot = np.hstack((np.squeeze(mrs.fit(lhdist).randomize(lhdata)),
                      np.squeeze(mrs.fit(rhdist).randomize(rhdata))))[611]
    save_brainmap(plot, FIGDIR / 'moran_surf.png', lh, rh, **OPTS)
def run_null(parcellation, scale, spintype):
    """
    Runs spatial permutation null model for given combination of inputs

    Parameters
    ----------
    parcellation : str
        Name of parcellation to be used
    scale : str
        Scale of `parcellation` to be used
    spintype : str
        Name of spin method to be used

    Returns
    -------
    stats : pd.DataFrame
        Generated statistics with columns ['parcellation', 'scale', 'spintype',
        'n_sig']
    """

    nsdata = load_data(parcellation, scale)

    # run the damn thing
    print(f'Running {spintype:>9} spins for {scale}: ', end='', flush=True)
    out = NSDIR / parcellation / 'nulls' / spintype / f'{scale}_nulls.csv'
    if out.exists():
        permcorrs = np.loadtxt(out).reshape(-1, 1)
    elif spintype == 'cornblath':
        # even though we're working with parcellated data we need to project
        # that to the surface + spin the vertices, so let's load our
        # pre-generated vertex-level spins
        spins = SPDIR / 'vertex' / 'vazquez-rodriguez' / 'fsaverage5_spins.csv'

        # get annotation files
        fetcher = getattr(nndata, f"fetch_{parcellation.replace('atl-', '')}")
        annotations = fetcher('fsaverage5', data_dir=ROIDIR)[scale]

        # pre-load the spins for this function (assumes `spins` is array)
        # permdata will be an (R, T, n_rotate) array
        print('Pre-loading spins...', end='\b' * 20, flush=True)
        spins = np.loadtxt(spins, delimiter=',', dtype='int32')
        permdata = nnsurf.spin_data(nsdata, version='fsaverage5',
                                    lhannot=annotations.lh,
                                    rhannot=annotations.rh,
                                    spins=spins, n_rotate=spins.shape[-1],
                                    verbose=True)
        permcorrs = np.vstack([
            _get_permcorr(nsdata, permdata[..., n])
            for n in range(permdata.shape[-1])
        ])
        putils.save_dir(out, permcorrs)
    elif spintype in ['burt2018', 'burt2020']:
        surrdir = SURRDIR / parcellation / spintype / 'neurosynth'
        # generate the permuted data from the surrogate resampling arrays
        print('Generating surrogates...', end='\b' * 24, flush=True)
        permdata = get_surrogates(nsdata, surrdir, scale)
        permcorrs = np.vstack([
            _get_permcorr(nsdata, permdata[..., n])
            for n in range(permdata.shape[-1])
        ])
        putils.save_dir(out, permcorrs)
    elif spintype == 'moran':
        surrogates = np.zeros((*nsdata.shape, 10000))
        for hemi, dist, idx in putils.yield_data_dist(DISTDIR, parcellation,
                                                      scale, nsdata):
            mrs = moran.MoranRandomization(joint=True, n_rep=10000,
                                           tol=1e-6, random_state=1234)
            mrs.fit(dist)
            surrogates[idx] = mrs.randomize(hemi).transpose(1, 2, 0)

        permcorrs = np.vstack([
            _get_permcorr(nsdata, surrogates[..., n])
            for n in range(surrogates.shape[-1])
        ])
        putils.save_dir(out, permcorrs)
    else:
        spins = SPDIR / parcellation / spintype / f'{scale}_spins.csv'
        permcorrs = gen_permcorrs(nsdata, spins, out)

    nsdata = nsdata.dropna(axis=0, how='all')
    pvals = get_fwe(np.corrcoef(nsdata.T), permcorrs)

    out = pd.DataFrame(dict(
        parcellation=parcellation,
        scale=scale,
        spintype=spintype,
        n_sig=np.sum(np.triu(pvals < ALPHA, k=1))
    ), index=[0])

    return out