Пример #1
0
def run_permutation_ttest(tmin=None, tmax=None, p_threshold = 0.05, n_permutations=1024, inverse_method='dSPM', n_jobs=1):
    for cond_id, cond_name in enumerate(events_id.keys()):
        #todo: calc the 36
        controls_data = get_morphed_epochs_stcs(tmin, tmax, cond_name, get_healthy_controls(),
            36, inverse_method)
        controls_data = abs(controls_data)
        for patient in get_patients():
            try:
                print(patient, cond_name)
                patient_data = get_morphed_epochs_stcs(tmin, tmax, cond_name, [patient], None, inverse_method)
                patient_data = abs(patient_data)
                print(patient_data.shape, controls_data.shape)
                data = controls_data - patient_data
                del patient_data
                gc.collect()
                data = np.transpose(data, [2, 1, 0])
                connectivity = spatial_tris_connectivity(grade_to_tris(5))
                t_threshold = -stats.distributions.t.ppf(p_threshold / 2., data.shape[0] - 1)
                T_obs, clusters, cluster_p_values, H0 = \
                    spatio_temporal_cluster_1samp_test(data, connectivity=connectivity, n_jobs=n_jobs,
                        threshold=t_threshold, n_permutations=n_permutations)
                results_file_name = op.join(LOCAL_ROOT_DIR, 'permutation_ttest_results', '{}_{}_{}'.format(patient, cond_name, inverse_method))
                np.savez(results_file_name, T_obs=T_obs, clusters=clusters, cluster_p_values=cluster_p_values, H0=H0)
                good_cluster_inds = np.where(cluster_p_values < 0.05)[0]
                print('good_cluster_inds: {}'.format(good_cluster_inds))
            except:
                print('bummer! {}, {}'.format(patient, cond_name))
                print(traceback.format_exc())
Пример #2
0
def stat_clus(X, tstep, time_thre=0, n_per=8192, p_threshold=0.01, p=0.05, fn_clu_out=None):
    print('Computing connectivity.')
    connectivity = spatial_tris_connectivity(grade_to_tris(5))
    #    Note that X needs to be a multi-dimensional array of shape
    #    samples (subjects) x time x space, so we permute dimensions
    X = np.transpose(X, [2, 1, 0])
    n_subjects = X.shape[0]
    fsave_vertices = [np.arange(X.shape[-1]/2), np.arange(X.shape[-1]/2)]
    #    Now let's actually do the clustering. This can take a long time...
    #    Here we set the threshold quite high to reduce computation.
    t_threshold = -stats.distributions.t.ppf(p_threshold / 2., n_subjects - 1)
    print('Clustering.')
    max_step = int(time_thre * 0.001 / tstep) + 1
    T_obs, clusters, cluster_p_values, H0 = clu = \
        spatio_temporal_cluster_1samp_test(X, connectivity=connectivity, n_jobs=1, max_step=max_step,
                                        threshold=t_threshold, n_permutations=n_per)
    #    Now select the clusters that are sig. at p < 0.05 (note that this value
    #    is multiple-comparisons corrected).
    good_cluster_inds = np.where(cluster_p_values < p)[0]
    print 'the amount of significant clusters are: %d' %good_cluster_inds.shape
    ###############################################################################
    # Save the clusters as stc file
    # ----------------------
    assert good_cluster_inds.shape != 0, ('Current p_threshold is %f %p_thr,\
                                 maybe you need to reset a lower p_threshold')
    np.savez(fn_clu_out, clu=clu, tstep=tstep, fsave_vertices=fsave_vertices)
def stats(X, connectivity=None, n_jobs=-1):
    """Cluster statistics to control for multiple comparisons.

    Parameters
    ----------
    X : array, shape (n_samples, n_space, n_times)
        The data, chance is assumed to be 0.
    connectivity : None | array, shape (n_space, n_times)
        The connectivity matrix to apply cluster correction. If None uses
        neighboring cells of X.
    n_jobs : int
        The number of parallel processors.
    """
    X = np.array(X)
    X = X[:, :, None] if X.ndim == 2 else X
    T_obs_, clusters, p_values, _ = spatio_temporal_cluster_1samp_test(
        X,
        out_type='mask',
        stat_fun=_stat_fun,
        n_permutations=1000,
        n_jobs=n_jobs,
        connectivity=connectivity)
    p_values_ = np.ones_like(X[0]).T
    for cluster, pval in zip(clusters, p_values):
        p_values_[cluster.T] = pval
    return np.squeeze(p_values_).T
Пример #4
0
def stats_tfce(X, n_permutations=2 ** 10, threshold=dict(start=0.1, step=0.1), n_jobs=2):
    X = np.array(X)
    T_obs_, clusters, p_values, _ = spatio_temporal_cluster_1samp_test(
        X, out_type="mask", stat_fun=_stat_fun, n_permutations=n_permutations, threshold=threshold, n_jobs=n_jobs
    )
    p_values = p_values.reshape(X.shape[1:])
    return p_values
Пример #5
0
def stats_tfce(X, n_permutations=1000, threshold=None, n_jobs=2):

    # calculate p-values using cluster permutation test
    _, _, p_values, _ = spatio_temporal_cluster_1samp_test(
        X,
        out_type='indices',
        stat_fun=_stat_fun,
        n_permutations=n_permutations,
        threshold=threshold,
        n_jobs=n_jobs)

    p_values = p_values.reshape(X.shape[1:])

    return p_values
Пример #6
0
def stats_tfce(X, n_permutations=2**10,threshold=dict(start=.1, step=.1), n_jobs=2):  # 2
    # threshold free cluster enhancement for GATs
    import numpy as np
    from mne.stats import spatio_temporal_cluster_1samp_test

    X = np.array(X)
    T_obs_, clusters, p_values, _ = spatio_temporal_cluster_1samp_test(
    X,out_type='mask',
    stat_fun=_stat_fun,
    n_permutations=n_permutations,
    threshold=threshold,
    n_jobs=n_jobs)
    p_values = p_values.reshape(X.shape[1:])
    return p_values
def perform_statistics_2(morphed_data, parameter_cache, vector, p_value=None):
    """Performs the statistical analysis using spatial_tris_connectivity.

    :param morphed_data: Morphed data obtained from morph_data function
    :param parameter_cache: Morphed parameter cache obtained from morph_data function.
    :param vector: Method to perform modelling ('sLORETA' etc.)
    :param p_value: Statistical p-value
    :return: clu, good_cluster_inds
    """
    # Unpack parameter cache dictionary
    n_subjects = parameter_cache['n_subjects']
    n_times = parameter_cache['n_times']

    # Take on the absolute
    X = np.abs(morphed_data)

    # Obtain the paired contrast
    if vector is False:
        X = X[:, :, :, 0] - X[:, :, :,
                              1]  # Dimension is (space, time, subjects)
    else:
        X = X[:, :, :, :,
              0] - X[:, :, :, :,
                     1]  # Dimension is (space, vector, time, subjects)

    print('Computing connectivity... ')
    connectivity_2 = mne.spatio_temporal_tris_connectivity(
        grade_to_tris(5), n_times)

    # Note that X needs to be a multi-dimensional array of shape [samples (subjects) x time x space]
    if vector is False:
        X = np.transpose(X, [2, 1, 0])
    else:
        X = np.transpose(X, [3, 2, 1, 0])  ##### TO DOUBLE CHECK #####

    # Perform the clustering
    p_threshold = p_value  # 0.001
    t_threshold = -stats.distributions.t.ppf(p_threshold / 2., n_subjects - 1)

    print('Clustering... ')
    T_obs, clusters, cluster_p_values, H0 = spatio_temporal_cluster_1samp_test(
        X, connectivity=connectivity_2, n_jobs=1, threshold=t_threshold)

    # Pack the outputs into tuple
    clu = (T_obs, clusters, cluster_p_values, H0)

    # Select the clusters that are sig. at p < p_value (Note this value is multiple-comparisons corrected)
    good_cluster_inds = np.where(cluster_p_values < p_value)[0]
    return clu, good_cluster_inds
def stats_decoding(scores):
    chance = 0.5
    x = scores - chance

    X = x[:, :, None] if x.ndim == 2 else x
    T_obs_, clusters, p_values, _ = spatio_temporal_cluster_1samp_test(
        X, out_type='mask', n_permutations=2 ** 12,
        n_jobs=n_jobs, connectivity=None)

    p_values_ = np.ones_like(X[0]).T
    for cluster, pval in zip(clusters, p_values):
        p_values_[cluster.T] = pval
    p_vals = np.squeeze(p_values_).T

    return p_vals
Пример #9
0
def stat_clus(X, tstep, n_per=8192, p_threshold=0.01, p=0.05, fn_clu_out=None):
    '''
      Calculate significant clusters using 1sample ttest.

      Parameter
      ---------
      X: array
        The shape of X should be (Vertices, timepoints, subjects)
      tstep: float
        The interval between timepoints.
      n_per: int
        The permutation for ttest.
      p_threshold: float
        The significant p_values.
      p: float
        The corrected p_values for comparisons.
      fn_clu_out: string
        The fnname for saving clusters.
    '''

    print('Computing connectivity.')
    connectivity = spatial_tris_connectivity(grade_to_tris(5))

    #    Note that X needs to be a multi-dimensional array of shape
    #    samples (subjects) x time x space, so we permute dimensions
    X = np.transpose(X, [2, 1, 0])
    n_subjects = X.shape[0]
    fsave_vertices = [np.arange(X.shape[-1]/2), np.arange(X.shape[-1]/2)]

    #    Now let's actually do the clustering. This can take a long time...
    #    Here we set the threshold quite high to reduce computation.
    t_threshold = -stats.distributions.t.ppf(p_threshold / 2., n_subjects - 1)
    print('Clustering.')
    T_obs, clusters, cluster_p_values, H0 = clu = \
        spatio_temporal_cluster_1samp_test(X, connectivity=connectivity,
                                           n_jobs=1, threshold=t_threshold,
                                           n_permutations=n_per)

    #    Now select the clusters that are sig. at p < 0.05 (note that this value
    #    is multiple-comparisons corrected).
    good_cluster_inds = np.where(cluster_p_values < p)[0]
    print 'the amount of significant clusters are: %d' %good_cluster_inds.shape

    # Save the clusters as stc file
    np.savez(fn_clu_out, clu=clu, tstep=tstep, fsave_vertices=fsave_vertices)
    assert good_cluster_inds.shape != 0, ('Current p_threshold is %f %p_thr,\
                                 maybe you need to reset a lower p_threshold')
def gat_stats(X):
    from mne.stats import spatio_temporal_cluster_1samp_test
    """Statistical test applied across subjects"""
    # check input
    X = np.array(X)
    X = X[:, :, None] if X.ndim == 2 else X

    # stats function report p_value for each cluster
    T_obs_, clusters, p_values, _ = spatio_temporal_cluster_1samp_test(
        X, out_type='mask', n_permutations=2**12, n_jobs=-1, verbose=False)

    # format p_values to get same dimensionality as X
    p_values_ = np.ones_like(X[0]).T
    for cluster, pval in zip(clusters, p_values):
        p_values_[cluster.T] = pval

    return np.squeeze(p_values_).T
Пример #11
0
def myStats(X, connectivity=None, n_jobs=-1, tail=0):

    X = np.array(X)
    X = X[:, :, None] if X.ndim == 2 else X
    T_obs_, clusters, p_values, _ = spatio_temporal_cluster_1samp_test(
        X,
        out_type='mask',
        stat_fun=_stat_fun,
        n_permutations=5000,
        n_jobs=n_jobs,
        connectivity=connectivity,
        tail=tail)
    p_values_ = np.ones_like(X[0].T)

    for cluster, pval in zip(clusters, p_values):
        p_values_[cluster.T] = pval

    return np.squeeze(p_values_).T
Пример #12
0
def mne_spatio_temporal_cluster_1samp_test(X, **kwargs):

    threshold_tfce = dict(start=0, step=0.2)

    # from MNE
    # X : array, shape (n_observations, n_times, n_vertices)
    #         The data to be clustered. The first dimension should correspond to the
    #         difference between paired samples (observations) in two conditions.
    # T_obs, _, p_values, _ = spatio_temporal_cluster_1samp_test(X, n_permutations=1000,
    #                                     threshold=threshold_tfce, tail=1,
    #                                     n_jobs=1, buffer_size=None,
    #                                     connectivity=None)

    T_obs, clusters, cluster_p_values, H0 = spatio_temporal_cluster_1samp_test(X, n_permutations=1000,
                                        threshold=None, tail=1,
                                        n_jobs=1, buffer_size=None,
                                        connectivity=None, out_type='mask')

    return T_obs, clusters, cluster_p_values, H0
Пример #13
0
    def __init__(self, insts, alpha=0.05, **kwargs):
        """
        Parameters
        ----------
        X : np.array (dims = n * space * time)
            data array
        alpha : float
            significance level

        Can take spatio_temporal_cluster_1samp_test() parameters.

        """
        from mne.stats import spatio_temporal_cluster_1samp_test

        # Convert lists of evoked in Epochs
        insts = [Evokeds_to_Epochs(i) if type(i) is list else i for i in insts]

        # Apply contrast: n * space * time
        X = np.array(insts[0]._data - insts[-1]._data).transpose([0, 2, 1])

        # Run stats
        self.T_obs_, clusters, p_values, _ = \
            spatio_temporal_cluster_1samp_test(X, out_type='mask', **kwargs)

        # Save sorted sig clusters
        inds = np.argsort(p_values)
        clusters = np.array(clusters)[inds, :, :]
        p_values = p_values[inds]
        inds = np.where(p_values < alpha)[0]
        self.sig_clusters_ = clusters[inds, :, :]
        self.p_values_ = p_values[inds]

        # By default, keep meta data from first epoch
        self.insts = insts
        self.times = self.insts[0].times
        self.info = self.insts[0].info
        self.ch_names = self.insts[0].ch_names

        return
Пример #14
0
    def __init__(self, insts, alpha=0.05, **kwargs):
        """
        Parameters
        ----------
        X : np.array (dims = n * space * time)
            data array
        alpha : float
            significance level

        Can take spatio_temporal_cluster_1samp_test() parameters.

        """
        from mne.stats import spatio_temporal_cluster_1samp_test

        # Convert lists of evoked in Epochs
        insts = [Evokeds_to_Epochs(i) if type(i) is list else i for i in insts]

        # Apply contrast: n * space * time
        X = np.array(insts[0]._data - insts[-1]._data).transpose([0, 2, 1])

        # Run stats
        self.T_obs_, clusters, p_values, _ = \
            spatio_temporal_cluster_1samp_test(X, out_type='mask', **kwargs)

        # Save sorted sig clusters
        inds = np.argsort(p_values)
        clusters = np.array(clusters)[inds, :, :]
        p_values = p_values[inds]
        inds = np.where(p_values < alpha)[0]
        self.sig_clusters_ = clusters[inds, :, :]
        self.p_values_ = p_values[inds]

        # By default, keep meta data from first epoch
        self.insts = insts
        self.times = self.insts[0].times
        self.info = self.insts[0].info
        self.ch_names = self.insts[0].ch_names

        return
Пример #15
0
from scipy import stats as stats

tail=0
p_threshold=0.05

src_fname = '/nashome1/wexu/MNE_data/AVLearn/subjects/fsaverage/bem/fsaverage-ico-5-src.fif'
src = mne.read_source_spaces(src_fname)
connectivity = mne.spatial_src_connectivity(src)

t_threshold = -stats.distributions.t.ppf(p_threshold / (1.+(tail==0)), len(X) - 1)
sigma = 1e-3  # sigma for the "hat" method
from functools import partial
stat_fun_hat = partial(ttest_1samp_no_p, sigma=sigma)
print('Clustering.')
T_obs, clusters, cluster_p_values, H0 = clu = \
    spatio_temporal_cluster_1samp_test(X, connectivity=connectivity, n_jobs=-1,stat_fun=stat_fun_hat,
                                  threshold=t_threshold,n_permutations=1000)
 

print('summary stats')
good_cluster_inds = np.where(cluster_p_values < 0.05)[0]
for ind in good_cluster_inds:    
    inds_t, inds_v = clusters[ind]
    inds_t=inds_t*tstep
    inds_p=cluster_p_values[ind]
    print(' cluster   %d \n p value:  %f \n time:     %s \n clusters: %s '%(ind,inds_p,inds_t,inds_v))

print('Visualizing clusters.')
fsave_vertices = [np.arange(10242), np.arange(10242)]
stc_all_cluster_vis = summarize_clusters_stc(clu,tstep=0.005,tmin=stat_tmin, vertices=fsave_vertices,subject='fsaverage',p_thresh=0.05)

Folder_name='/nashome1/wexu/Results/MNE_Results/AVLearn/'
def cluster_test_main(gat, A, chance_level = np.pi/6,
                        alpha = 0.05, n_permutations = 2 ** 11,
                        threshold = dict(start=1., step=.2), lims=None,
                        ylabel='Performance', title=None):
    """This function takes as inputs one array X and computes cluster analysis
    and plots associated graphs.
    Input:
    gat: gat object is used only to retrieve useful information to plot, like
        time points and gat.plot functions.
        gat.scores_ is replaced by X
    X: ndimensional array representing gat or diagonal performance.
        If A is diagonal, its dimensions should be n_subjects * n_time
        If A is GAT, its dimensions should be n_subjects * n_time * n_time
    chance_level: chance level to test against.
        pi/6 for circular data and 0 for deviations are normally used
    """
    # check that X is array otherwise convert
    if not(type(A).__module__ == np.__name__):
        A = np.array(A)

    # define X
    X = A - chance_level

    # define time points
    times = gat.train_times['times_']

    # ------ Run stats
    T_obs_, clusters, p_values, _ = spatio_temporal_cluster_1samp_test(
                                           X,
                                           out_type='mask',
                                           n_permutations=n_permutations,
                                           connectivity=None,
                                           threshold=threshold,
                                           n_jobs=-1)

    # ------ combine clusters and retrieve min p_values for each feature
    p_values = np.min(np.logical_not(clusters) +
                      [clusters[c] * p for c, p in enumerate(p_values)],
                      axis=0)
    x, y = np.meshgrid(gat.train_times['times_'],
                       gat.test_times_['times_'][0],
                       copy=False, indexing='xy')


    # PLOT
    # ------ Plot GAT
    gat.scores_ = np.mean(A, axis=0)
    if lims==None:
        lims = [np.min(gat.scores_),np.max(gat.scores_)]
    fig = gat.plot(vmin=lims[0], vmax=lims[1],
                   show=False)
    ax = fig.axes[0]
    ax.contour(x, y, p_values < alpha, colors='black', levels=[0])
    #plt.title(title)
    plt.show()

    # ------ Plot Decoding
    scores_diag = np.transpose([A[:, t, t] for t in range(len(times))])
    fig, ax = plt.subplots(1)
    plot_eb(times, np.mean(scores_diag, axis=0),
            np.std(scores_diag, axis=0) / np.sqrt(scores_diag.shape[0]),
            color='blue', ax=ax)
    ymin, ymax = ax.get_ylim()
    sig_times = times[np.where(np.diag(p_values) < alpha)[0]]
    sfreq = (times[1] - times[0]) / 1000
    fill_betweenx_discontinuous(ax, ymin, ymax, sig_times, freq=sfreq,
                                color='gray', alpha=.3)
    ax.axhline(chance_level, color='k', linestyle='--', label="Chance level")
    ax.set_xlabel('Time (s)')
    ax.set_ylabel(ylabel)
    #plt.title(title)
    plt.show()
    X_dys = np.transpose(X_dys, [1, 2, 0])
    X_con = np.transpose(X_con, [1, 2, 0])
    X_all = np.concatenate((X_con, X_dys), axis=0)

    # find clims
    # clustering
    n_subject_pairs = X_all.shape[0]
    t_threshold = -stats.distributions.t.ppf(p_initial_threshold / 2.,
                                             n_subject_pairs - 1)
    print('Clustering.')

    stat_fun = ttest_1samp_no_p
    T_obs, clusters, cluster_p_values, H0 = clu = \
        spatio_temporal_cluster_1samp_test(X_all, connectivity=connectivity,
                                           n_jobs=4, n_permutations=100,
                                           threshold=t_threshold, t_power=1,
                                           buffer_size=None, out_type='indices',
                                           verbose=True, stat_fun=stat_fun)

    #    Now let's build a convenient representation of each cluster, where each
    #    cluster becomes a "time point" in the SourceEstimate
    tstep = stc.tstep
    fsave_vertices = [np.arange(10242), np.arange(10242)]
    stc_all_cluster_vis = summarize_clusters_stc_AT(clu,
                                                    vertices=fsave_vertices,
                                                    subject='fsaverage')
    clim = dict(kind='value',
                lims=[
                    np.percentile(stc_all_cluster_vis.data[:, 0], clim_low),
                    np.percentile(stc_all_cluster_vis.data[:, 0], clim_mid),
                    np.percentile(stc_all_cluster_vis.data[:, 0], clim_high)
Пример #18
0
def sample1_clus(fn_list,
                 n_per=8192,
                 pct=99,
                 p=0.01,
                 tail=1,
                 del_vers=None,
                 n_jobs=1):
    '''
      Calculate significant clusters using 1sample ttest.

      Parameter
      ---------
      fn_list: list
        Paths of group arrays
      n_per: int
        The permutation for ttest.
      pct: int or float.
        The percentile of the baseline distribution.
      p: float
        The corrected p_values for comparisons.
      tail: 1 or 0
        if tail=1, that is 1 tail test
        if tail=0, that is 2 tail test 
      del_vers: None or _exclu_vers
        If is '_exclu_vers', delete the vertices in the medial wall.
    '''

    print('Computing connectivity.')
    connectivity = spatial_tris_connectivity(grade_to_tris(5))

    # Using the percentile of baseline array as the distribution threshold
    for fn_npz in fn_list:

        npz = np.load(fn_npz)
        tstep = npz['tstep'].flatten()[0]
        #    Note that X needs to be a multi-dimensional array of shape
        #    samples (subjects) x time x space, so we permute dimensions
        X = npz['X']
        X_b = X[1]
        X = X[0]
        fn_path = os.path.dirname(fn_npz)
        name = os.path.basename(fn_npz)

        if tail == 1:
            fn_out = fn_path + '/clu1sample_%s' % name[:name.rfind(
                '.npz')] + '_%d_%dtail_pct%.3f.npz' % (n_per, tail, pct)
            X = np.abs(X)
            t_threshold = np.percentile(np.abs(X_b), pct)
        elif tail == 0:
            fn_out = fn_path + '/clu1sample_%s' % name[:name.rfind(
                '.npz')] + '_%d_%dtail_pct%.3f.npz' % (n_per, tail + 2, pct)
            t_threshold = np.percentile(X_b, pct)

        fsave_vertices = [
            np.arange(X.shape[-1] / 2),
            np.arange(X.shape[-1] / 2)
        ]

        #n_subjects = X.shape[0]
        #t_threshold = -stats.distributions.t.ppf(0.01/(1+(tail==0)), n_subjects-1)

        print('Clustering.')
        T_obs, clusters, cluster_p_values, H0 = clu = \
            spatio_temporal_cluster_1samp_test(X, connectivity=connectivity,
                                            n_jobs=n_jobs, threshold=t_threshold,
                                            n_permutations=n_per, tail=tail, spatial_exclude=del_vers)

        #    Now select the clusters that are sig. at p < 0.05 (note that this value
        #    is multiple-comparisons corrected).
        good_cluster_inds = np.where(cluster_p_values < p)[0]
        print 'the amount of significant clusters are: %d' % good_cluster_inds.shape

        # Save the clusters as stc file
        np.savez(fn_out, clu=clu, tstep=tstep, fsave_vertices=fsave_vertices)
        assert good_cluster_inds.shape != 0, (
            'Current p_threshold is %f %p_thr,\
                                    maybe you need to reset a lower p_threshold'
        )
#    To use an algorithm optimized for spatio-temporal clustering, we
#    just pass the spatial connectivity matrix (instead of spatio-temporal)
print 'Computing connectivity.'
connectivity = spatial_tris_connectivity(grade_to_tris(5))

#    Note that X needs to be a multi-dimensional array of shape
#    samples (subjects) x time x space, so we permute dimensions
X = np.transpose(X, [2, 1, 0])

#    Now let's actually do the clustering. This can take a long time...
#    Here we set the threshold quite high to reduce computation.
p_threshold = 0.001
t_threshold = -stats.distributions.t.ppf(p_threshold / 2., n_subjects - 1)
print 'Clustering.'
T_obs, clusters, cluster_p_values, H0 = \
    spatio_temporal_cluster_1samp_test(X, connectivity=connectivity, n_jobs=2,
                                       threshold=t_threshold)
#    Now select the clusters that are sig. at p < 0.05 (note that this value
#    is multiple-comparisons corrected).
good_cluster_inds = np.where(cluster_p_values < 0.05)[0]

###############################################################################
# Visualize the clusters

print 'Visualizing clusters.'

#    Now let's build a convenient representation of each cluster, where each
#    cluster becomes a "time point" in the SourceEstimate
data = np.zeros((n_vertices_fsave, n_times))
data_summary = np.zeros((n_vertices_fsave, len(good_cluster_inds) + 1))
for ii, cluster_ind in enumerate(good_cluster_inds):
    data.fill(0)
    spatial_exclude=np.hstack((fsave_vertices[0][nnl], fsave_vertices[0][nnr]+10242))
           
        
        
    # # # adjacency = mne.spatial_src_adjacency(inv_op_SD['src'])
    source_space = mne.grade_to_tris(5)
    # as we only have one hemisphere we need only need half the connectivity
    print('Computing connectivity.')
    connectivity = mne.spatial_tris_connectivity(source_space)
    p_threshold = 0.05
    t_threshold = -stats.distributions.t.ppf(p_threshold / 2., n_subjects - 1)
       
    #     print('Clustering.')
    T_obs, clusters, cluster_p_values, H0 = clu = \
          spatio_temporal_cluster_1samp_test(Y, connectivity= connectivity,\
          n_jobs=10,threshold=t_threshold,n_permutations=5000,spatial_exclude =\
          spatial_exclude,step_down_p=0.05,t_power=1)


       
    # print('Visualizing clusters.')
         
    fsave_vertices = [np.arange(10242),np.arange(10242)]
    
    #     # Build a convenient representation of each cluster, where each
    #     # cluster becomes a "time point" in the SourceEstimate
    stc_all_cluster_vis = summarize_clusters_stc(clu,tstep=tstep*1000,\
                                                  vertices = fsave_vertices)
     
    idx = stc_all_cluster_vis.time_as_index(times=stc_all_cluster_vis.times)
    data = stc_all_cluster_vis.data[:, idx]
        X[si, ri, :] = np.convolve(X[si, ri, :], gaussian, "same")
    for ci in range(X.shape[2]):
        X[si, :, ci] = np.convolve(X[si, :, ci], gaussian, "same")

###############################################################################
# Do some statistics

#    Note that X needs to be a multi-dimensional array of shape
#    samples (subjects) x time x space, so we permute dimensions
X = X.reshape((n_subjects, 1, n_src))

#    Now let's do some clustering using the standard method. Note that not
#    specifying a connectivity matrix implies grid-like connectivity, which
#    we want here:
T_obs, clusters, p_values, H0 = spatio_temporal_cluster_1samp_test(
    X, n_jobs=2, threshold=threshold, connectivity=connectivity, tail=1, n_permutations=n_permutations
)

#    Let's put the cluster data in a readable format
ps = np.zeros(width * width)
for cl, p in zip(clusters, p_values):
    ps[cl[1]] = -np.log10(p)
ps = ps.reshape((width, width))
T_obs = T_obs.reshape((width, width))

#     To do a Bonferroni correction on these data is simple:
p = stats.distributions.t.sf(T_obs, n_subjects - 1)
p_bon = -np.log10(bonferroni_correction(p)[1])

#    Now let's do some clustering using the standard method with "hat":
stat_fun = partial(ttest_1samp_no_p, sigma=sigma)
Пример #22
0
    scores = list()
    for fname in subjects:
        with open(fname, 'rb') as f:
            [gat, score] = pickle.load(f)
        scores.append(score)
    scores = np.array(scores) - .5
    gat_list = [gat]

# STATS #######################################################################
from mne.stats import spatio_temporal_cluster_1samp_test
start = np.where(gat_list[0].train_times_['times'] >= 0.)[0][0]
# start = 0
X = scores[:, start::1, start::1]
T_obs_, clusters, p_values_, _ = spatio_temporal_cluster_1samp_test(
    X,
    out_type='mask',
    n_permutations=128,
    threshold=dict(start=2, step=2.),
    n_jobs=4)

p_values = p_values_.reshape(X.shape[1:])
h = p_values < .05

# PLOT ########################################################################
import matplotlib.pyplot as plt
from sandbox.graphs.utils import plot_graph, annotate_graph, animate_graph

times = 1e3 * gat_list[0].train_times_['times'][start:]
mean_scores = np.mean(X, axis=0)

# Summary figure
fig, ax = plt.subplots(1)
X = np.transpose(X, [2, 1, 0])
print np.shape(X)


##########################################################################################################################33
##############################################################################################################################3

#    Now let's actually do the clustering. This can take a long time...
#    Here we set the threshold quite high to reduce computation.
p_threshold = 0.01 #0.001
t_threshold = -stats.distributions.t.ppf(p_threshold / 2., n_subjects - 1)
print('Clustering.')
print t_threshold
print np.shape(X)
T_obs, clusters, cluster_p_values, H0 = clu = \
    spatio_temporal_cluster_1samp_test(X, connectivity=connectivity, n_jobs=2, threshold = t_threshold) 
                                       
print cluster_p_values                                           
#    Now select the clusters that are sig. at p < 0.05 (note that this value
#    is multiple-comparisons corrected).
good_cluster_inds = np.where(cluster_p_values < 0.1)[0]


################################################################################
# Visualize the clusters

print('Visualizing clusters.')
import os
os.environ["SUBJECTS_DIR"] = "/mnt/file1/binder/KRNS/anatomies/surfaces/"
os.environ["subjects_dir"] = "/mnt/file1/binder/KRNS/anatomies/surfaces/"
     # timecourse labels
     lab = ['short gap', 'long gap'][kk]
     _ = axs[ii].annotate(lab, (0, stim_y), xytext=(-6, 0),
                          textcoords='offset points', color=col,
                          ha='right', va='center', fontsize=9,
                          fontstyle='italic')
 # cue label
 _ = axs[ii].annotate('cue', xy=(stim_times[1], stim_ymax + thk),
                      xytext=(0, 2), textcoords='offset points',
                      fontsize=9, fontstyle='italic', ha='center',
                      va='bottom', color=cue)
 # stats
 if plot_signif:
     thresh = -1 * distributions.t.ppf(0.05 / 2, len(contr_diff) - 1)
     result = spatio_temporal_cluster_1samp_test(
         contr_diff, threshold=thresh, stat_fun=stat_fun, n_jobs=6,
         buffer_size=None, n_permutations=np.inf)
     tvals, clusters, cluster_pvals, H0 = result
     signif = np.where(np.array([p <= 0.05 for p in cluster_pvals]))[0]
     signif_clusters = [clusters[s] for s in signif]
     signif_cluster_pvals = cluster_pvals[signif]
     # plot stats
     for clu, pv in zip(signif_clusters, signif_cluster_pvals):
         '''
         # this index tells direction of tval, hence could be used to
         # decide which color to draw the significant cluster region
         # based on which curve is higher:
         idx = (np.sign(tvals[clu[0][0], 0]).astype(int) + 1) // 2
         '''
         clu = clu[0]
         cluster_ymin = ylim[0] * np.ones_like(t[clu])
Пример #25
0
def test_cluster_permutation_t_test_with_connectivity():
    """Test cluster level permutations T-test with connectivity matrix."""
    try:
        try:
            from sklearn.feature_extraction.image import grid_to_graph
        except ImportError:
            from scikits.learn.feature_extraction.image import grid_to_graph
    except ImportError:
        return

    out = permutation_cluster_1samp_test(condition1_1d, n_permutations=500)
    connectivity = grid_to_graph(1, condition1_1d.shape[1])
    out_connectivity = permutation_cluster_1samp_test(
        condition1_1d, n_permutations=500, connectivity=connectivity)
    assert_array_equal(out[0], out_connectivity[0])
    for a, b in zip(out_connectivity[1], out[1]):
        assert_true(np.sum(out[0][a]) == np.sum(out[0][b]))
        assert_true(np.all(a[b]))

    # test spatio-temporal with no time connectivity (repeat spatial pattern)
    connectivity_2 = sparse.coo_matrix(
        linalg.block_diag(connectivity.asfptype().todense(),
                          connectivity.asfptype().todense()))
    condition1_2 = np.concatenate((condition1_1d, condition1_1d), axis=1)

    out_connectivity_2 = permutation_cluster_1samp_test(
        condition1_2, n_permutations=500, connectivity=connectivity_2)
    # make sure we were operating on the same values
    split = len(out[0])
    assert_array_equal(out[0], out_connectivity_2[0][:split])
    assert_array_equal(out[0], out_connectivity_2[0][split:])

    # make sure we really got 2x the number of original clusters
    n_clust_orig = len(out[1])
    assert_true(len(out_connectivity_2[1]) == 2 * n_clust_orig)

    # Make sure that we got the old ones back
    n_pts = condition1_1d.shape[1]
    data_1 = set([np.sum(out[0][b[:n_pts]]) for b in out[1]])
    data_2 = set([
        np.sum(out_connectivity_2[0][a[:n_pts]])
        for a in out_connectivity_2[1][:]
    ])
    assert_true(len(data_1.intersection(data_2)) == len(data_1))

    # now use the other algorithm
    condition1_3 = np.reshape(condition1_2, (40, 2, 350))
    out_connectivity_3 = mnestats.spatio_temporal_cluster_1samp_test(
        condition1_3,
        n_permutations=500,
        connectivity=connectivity,
        max_step=0,
        threshold=1.67,
        check_disjoint=True)
    # make sure we were operating on the same values
    split = len(out[0])
    assert_array_equal(out[0], out_connectivity_3[0][0])
    assert_array_equal(out[0], out_connectivity_3[0][1])

    # make sure we really got 2x the number of original clusters
    assert_true(len(out_connectivity_3[1]) == 2 * n_clust_orig)

    # Make sure that we got the old ones back
    data_1 = set([np.sum(out[0][b[:n_pts]]) for b in out[1]])
    data_2 = set([
        np.sum(out_connectivity_3[0][a[0], a[1]])
        for a in out_connectivity_3[1]
    ])
    assert_true(len(data_1.intersection(data_2)) == len(data_1))
Пример #26
0
assert sensor_adjacency.shape == \
    (len(tfr_epochs.ch_names), len(tfr_epochs.ch_names))
assert epochs_power.data.shape == (len(epochs), len(tfr_epochs.ch_names),
                                   len(tfr_epochs.freqs),
                                   len(tfr_epochs.times))
adjacency = mne.stats.combine_adjacency(sensor_adjacency,
                                        len(tfr_epochs.freqs),
                                        len(tfr_epochs.times))

# our adjacency is square with each dim matching the data size
assert adjacency.shape[0] == adjacency.shape[1] == \
    len(tfr_epochs.ch_names) * len(tfr_epochs.freqs) * len(tfr_epochs.times)

# %%

threshold = 3.
n_permutations = 50  # Warning: 50 is way too small for real-world analysis.
# T_obs, clusters, cluster_p_values, H0 = \
#     permutation_cluster_1samp_test(epochs_power, n_permutations=n_permutations,
#                                    threshold=threshold, tail=0,
#                                    connectivity=None,
#                                    out_type='mask', verbose=True)

T_obs, clusters, cluster_p_values, H0 = \
    spatio_temporal_cluster_1samp_test(epochs_power, n_permutations=n_permutations,
                                   threshold=threshold, tail=0,
                                   connectivity=None,
                                   out_type='mask', verbose=True)

# %%
# just pass the spatial adjacency matrix (instead of spatio-temporal)
print('Computing adjacency.')
adjacency = mne.spatial_src_adjacency(src)

#    Note that X needs to be a multi-dimensional array of shape
#    samples (subjects) x time x space, so we permute dimensions
X = np.transpose(X, [2, 1, 0])

#    Now let's actually do the clustering. This can take a long time...
#    Here we set the threshold quite high to reduce computation.
p_threshold = 0.001
t_threshold = -stats.distributions.t.ppf(p_threshold / 2., n_subjects - 1)
print('Clustering.')
T_obs, clusters, cluster_p_values, H0 = clu = \
    spatio_temporal_cluster_1samp_test(X, adjacency=adjacency, n_jobs=1,
                                       threshold=t_threshold, buffer_size=None,
                                       verbose=True)
#    Now select the clusters that are sig. at p < 0.05 (note that this value
#    is multiple-comparisons corrected).
good_cluster_inds = np.where(cluster_p_values < 0.05)[0]

# %%
# Visualize the clusters
# ----------------------
print('Visualizing clusters.')

#    Now let's build a convenient representation of each cluster, where each
#    cluster becomes a "time point" in the SourceEstimate
stc_all_cluster_vis = summarize_clusters_stc(clu, tstep=tstep,
                                             vertices=fsave_vertices,
                                             subject='fsaverage')
Пример #28
0
def test_cluster_permutation_t_test_with_connectivity():
    """Test cluster level permutations T-test with connectivity matrix."""
    try:
        try:
            from sklearn.feature_extraction.image import grid_to_graph
        except ImportError:
            from scikits.learn.feature_extraction.image import grid_to_graph
    except ImportError:
        return

    out = permutation_cluster_1samp_test(condition1_1d, n_permutations=500)
    connectivity = grid_to_graph(1, condition1_1d.shape[1])
    out_connectivity = permutation_cluster_1samp_test(condition1_1d,
                  n_permutations=500, connectivity=connectivity)
    assert_array_equal(out[0], out_connectivity[0])
    for a, b in zip(out_connectivity[1], out[1]):
        assert_true(np.sum(out[0][a]) == np.sum(out[0][b]))
        assert_true(np.all(a[b]))

    # test spatio-temporal with no time connectivity (repeat spatial pattern)
    connectivity_2 = sparse.coo_matrix(
        linalg.block_diag(connectivity.asfptype().todense(),
                          connectivity.asfptype().todense()))
    condition1_2 = np.concatenate((condition1_1d,
                                   condition1_1d), axis=1)

    out_connectivity_2 = permutation_cluster_1samp_test(condition1_2,
                    n_permutations=500, connectivity=connectivity_2)
    # make sure we were operating on the same values
    split = len(out[0])
    assert_array_equal(out[0], out_connectivity_2[0][:split])
    assert_array_equal(out[0], out_connectivity_2[0][split:])

    # make sure we really got 2x the number of original clusters
    n_clust_orig = len(out[1])
    assert_true(len(out_connectivity_2[1]) == 2 * n_clust_orig)

    # Make sure that we got the old ones back
    n_pts = condition1_1d.shape[1]
    data_1 = set([np.sum(out[0][b[:n_pts]]) for b in out[1]])
    data_2 = set([np.sum(out_connectivity_2[0][a[:n_pts]]) for a in
        out_connectivity_2[1][:]])
    assert_true(len(data_1.intersection(data_2)) == len(data_1))

    # now use the other algorithm
    condition1_3 = np.reshape(condition1_2, (40, 2, 350))
    out_connectivity_3 = mnestats.spatio_temporal_cluster_1samp_test(
                             condition1_3, n_permutations=500,
                             connectivity=connectivity, max_step=0,
                             threshold=1.67, check_disjoint=True)
    # make sure we were operating on the same values
    split = len(out[0])
    assert_array_equal(out[0], out_connectivity_3[0][0])
    assert_array_equal(out[0], out_connectivity_3[0][1])

    # make sure we really got 2x the number of original clusters
    assert_true(len(out_connectivity_3[1]) == 2 * n_clust_orig)

    # Make sure that we got the old ones back
    data_1 = set([np.sum(out[0][b[:n_pts]]) for b in out[1]])
    data_2 = set([np.sum(out_connectivity_3[0][a[0], a[1]]) for a in
        out_connectivity_3[1]])
    assert_true(len(data_1.intersection(data_2)) == len(data_1))
Пример #29
0
def run_sensor_stats():
    for c in np.arange(len(config.stats_params)):
       
        # organise data and analysis parameters
        dat0_files = config.stats_params[c]['dat0_files']
        dat1_files = config.stats_params[c]['dat1_files']
        condnames = config.stats_params[c]['condnames']
        tmin, tmax = config.stats_params[c]['statwin']
        n_permutations = config.stats_params[c]['n_permutations']
        p_threshold = config.stats_params[c]['threshold']
        tail = config.stats_params[c]['tail']
        if tail == 0:
            p_threshold = p_threshold / 2
            tail_x = 1
        else:
            tail_x = tail

        if 'multi-subject' in config.stats_params[c] and config.stats_params[c]['multi-subject'] == True:
            # we will run the same analysis on each subject separately
            nruns = len(dat0_files)
            ismulti = True      
        else:
            nruns = 1     
            ismulti = False     

        results = [] # to store the results later

        for statrun in np.arange(nruns):                      
            
            if ismulti:
                # we will run the same analysis on each subject separately
                dat0, evokeds0, connectivity = collect_data([dat0_files[statrun]],condnames[0],tmin,tmax,ismulti)
                dat1, evokeds1, _ = collect_data([dat1_files[statrun]],condnames[1],tmin,tmax,ismulti)    
            else:
                # collect together the data to be compared
                dat0, evokeds0, connectivity = collect_data(dat0_files,condnames[0],tmin,tmax,ismulti)
                dat1, evokeds1, _ = collect_data(dat1_files,condnames[1],tmin,tmax,ismulti)        
                
            alldata = [] 

            # fix threshold to be one-sided if requested
            if type(p_threshold) != 'dict': # i.e. is NOT TFCE
                if config.stats_params[c]['stat'] == 'indep':
                    stat_fun = ttest_ind_no_p
                    if len(dat0_files) == 1: # ie is single subject stats
                        df = dat0.data.shape[0] - 1 + dat1.data.shape[0] - 1                        
                    else:
                        df = len(dat0_files) - 1 + len(dat1_files) - 1                                                            
                else: # ie is dependent data, and so is one-sample t test
                    # this will only ever be group data...
                    # If the length of dat0_files and dat1_files are different it'll crash later anyway
                    stat_fun = ttest_1samp_no_p
                    df = len(dat0_files) - 1
                threshold_stat = stats.distributions.t.ppf(1. - p_threshold, df) * tail_x
            else: # i.e. is TFCE
                threshold_stat = p_threshold      
        
            # run the stats
            if config.stats_params[c]['stat'] == 'indep':
                alldata = [dat0,dat1]
                cluster_stats = spatio_temporal_cluster_test(alldata, n_permutations=n_permutations,
                                                        threshold=threshold_stat, 
                                                        tail=tail, stat_fun=stat_fun,
                                                        n_jobs=1, buffer_size=None,
                                                        connectivity=connectivity)
            elif config.stats_params[c]['stat'] == 'dep':
                # we have to use 1-sample t-tests here so also need to subtract conditions
                alldata = dat0 - dat1
                cluster_stats = spatio_temporal_cluster_1samp_test(alldata, n_permutations=n_permutations,
                                                        threshold=threshold_stat, 
                                                        tail=tail, stat_fun=stat_fun,
                                                        n_jobs=1, buffer_size=None,
                                                        connectivity=connectivity)

            # extract stats of interest
            T_obs, clusters, p_values, _ = cluster_stats
            good_cluster_inds = np.where(p_values < config.stats_params[c]['p_accept'])[0]

            # tell the user the results
            print('There are {} significant clusters'.format(good_cluster_inds.size))
            if good_cluster_inds.size != 0:
                print('p-values: {}'.format(p_values[good_cluster_inds]))
            else:
                if p_values.any():
                    print('Minimum p-value: {}'.format(np.min(p_values)))
                else:
                    print('No clusters found')

            # some final averaging and tidying
            if len(evokeds0) == 1:
                dat0_avg = evokeds0[0].average()
                dat1_avg = evokeds1[0].average()
            else:
                dat0_avg = mne.grand_average(evokeds0)
                dat1_avg = mne.grand_average(evokeds1)
            diffcond_avg = mne.combine_evoked([dat0_avg, -dat1_avg], 'equal')
            
            # get sensor positions via layout
            pos = mne.find_layout(evokeds0[0].info).pos

            ## EVENTUALLY I WILL PUT THE PLOTTING IN A SEPARATE FUNCTION...
            do_plot = False
            
            if do_plot:
                # loop over clusters
                for i_clu, clu_idx in enumerate(good_cluster_inds):
                    # unpack cluster information, get unique indices
                    time_inds, space_inds = np.squeeze(clusters[clu_idx])
                    ch_inds = np.unique(space_inds)
                    time_inds = np.unique(time_inds)   

                    # get topography for F stat
                    f_map = T_obs[time_inds, ...].mean(axis=0)

                    # get topography of difference
                    time_shift = evokeds0[0].time_as_index(tmin)      # fix windowing shift
                    print('time_shift = {}'.format(time_shift))
                    sig_times_idx = time_inds + time_shift
                    diff_topo = np.mean(diffcond_avg.data[:,sig_times_idx],axis=1)
                    sig_times = evokeds0[0].times[sig_times_idx]
                    
                    # create spatial mask
                    mask = np.zeros((f_map.shape[0], 1), dtype=bool)
                    mask[ch_inds, :] = True

                    # initialize figure
                    fig, ax_topo = plt.subplots(1, 1, figsize=(10, 3))

                    # plot average difference and mark significant sensors
                    image, _ = plot_topomap(diff_topo, pos, mask=mask, axes=ax_topo, cmap='RdBu_r',
                                            vmin=np.min, vmax=np.max, show=False)

                    # create additional axes (for ERF and colorbar)
                    divider = make_axes_locatable(ax_topo)

                    # add axes for colorbar
                    ax_colorbar = divider.append_axes('right', size='5%', pad=0.05)
                    plt.colorbar(image, cax=ax_colorbar)
                    ax_topo.set_xlabel(
                        'Mean difference ({:0.3f} - {:0.3f} s)'.format(*sig_times[[0, -1]]))

                    # add new axis for time courses and plot time courses
                    ax_signals = divider.append_axes('right', size='300%', pad=1.2)
                    title = 'Cluster #{0}, {1} sensor'.format(i_clu + 1, len(ch_inds))
                    if len(ch_inds) > 1:
                        title += "s (mean)"
                    plot_compare_evokeds([dat0_avg, dat1_avg], title=title, picks=ch_inds, axes=ax_signals,
                                            colors=None, show=False,
                                            split_legend=False, truncate_yaxis='max_ticks')

                    # plot temporal cluster extent
                    ymin, ymax = ax_signals.get_ylim()
                    ax_signals.fill_betweenx((ymin, ymax), sig_times[0], sig_times[-1],
                                                color='orange', alpha=0.3)

                    # clean up viz
                    mne.viz.tight_layout(fig=fig)
                    fig.subplots_adjust(bottom=.05)
                    plt.show()   

            results.append({                    
                'cluster_stats': cluster_stats,
                'good_cluster_inds': good_cluster_inds,
                'alldata': alldata,
                'evokeds0': evokeds0,
                'evokeds1': evokeds1
            })

        # save
        save_name = op.join(config.stat_path, config.stats_params[c]['analysis_name'] + '.dat')        
        pickle_out = open(save_name,'wb')
        pickle.dump(results, pickle_out)
        pickle_out.close()
Пример #30
0
adjacency, ch_names = find_ch_adjacency(info, ch_type=ch_type)
# set cluster threshold
# threshold = 1.0
# set family-wise p-value
# p_accept = 0.05
p_accept = 0.14

X_low = X[y == LOW_CONF_EPOCH, ...].transpose(0, 2, 1)
X_high = X[y == HIGH_CONF_EPOCH, ...].transpose(0, 2, 1)
X_diff = X_high - X_low

cluster_stats = spatio_temporal_cluster_1samp_test(
    X_diff,
    n_permutations=100,
    # threshold=threshold,
    # tail=1,
    n_jobs=1,
    # buffer_size=None,
    adjacency=adjacency,
)

T_obs, clusters, p_values, _ = cluster_stats
good_cluster_inds = np.where(p_values < p_accept)[0]

colors = {"low": "crimson", "high": 'steelblue'}
linestyles = {"low": '-', "high": '--'}

# organize data for plotting
evokeds = {"low": erf_low, "high": erf_high}

#
# ------------------
#
# .. note::
#     X needs to be a multi-dimensional array of shape
#     samples (subjects) x time x space, so we permute dimensions:
X = X.reshape((n_subjects, 1, n_src))

###############################################################################
# Now let's do some clustering using the standard method.
#
# .. note::
#     Not specifying a connectivity matrix implies grid-like connectivity,
#     which we want here:
T_obs, clusters, p_values, H0 = \
    spatio_temporal_cluster_1samp_test(X, n_jobs=1, threshold=threshold,
                                       connectivity=connectivity,
                                       tail=1, n_permutations=n_permutations)

#    Let's put the cluster data in a readable format
ps = np.zeros(width * width)
for cl, p in zip(clusters, p_values):
    ps[cl[1]] = -np.log10(p)
ps = ps.reshape((width, width))
T_obs = T_obs.reshape((width, width))

#     To do a Bonferroni correction on these data is simple:
p = stats.distributions.t.sf(T_obs, n_subjects - 1)
p_bon = -np.log10(bonferroni_correction(p)[1])

#    Now let's do some clustering using the standard method with "hat":
stat_fun = partial(ttest_1samp_no_p, sigma=sigma)
    op.join(subjects_dir, 'fsaverage', 'bem', 'fsaverage-5-src.fif'))
connectivity = spatial_src_connectivity(fsaverage_src)
# something like 0.01 is a more typical value here (or use TFCE!), but
# for speed here we'll use 0.001 (fewer clusters to handle)
p_threshold = 0.001
t_threshold = -stats.distributions.t.ppf(p_threshold / 2., len(X) - 1)

###############################################################################
# Here we could do an exact test with ``n_permutations=2**(len(X)-1)``,
# i.e. 32768 permutations, but this would take a long time. For speed and
# simplicity we'll do 1024.

stat_fun = partial(ttest_1samp_no_p, sigma=1e-3)
T_obs, clusters, cluster_p_values, H0 = clu = \
    spatio_temporal_cluster_1samp_test(
        X, connectivity=connectivity, n_jobs=N_JOBS, threshold=t_threshold,
        stat_fun=stat_fun, buffer_size=None, seed=0, step_down_p=0.05,
        verbose=True)

good_cluster_inds = np.where(cluster_p_values < 0.05)[0]
for ind in good_cluster_inds:
    print('Found cluster with p=%g' % (cluster_p_values[ind], ))

###############################################################################
# Visualize the results:

stc_all_cluster_vis = summarize_clusters_stc(clu,
                                             tstep=tstep,
                                             vertices=fsaverage_vertices,
                                             subject='fsaverage')
pos_lims = [0, 0.1, 100 if l_freq is None else 30]
brain = stc_all_cluster_vis.plot(hemi='both',
# To use an algorithm optimized for spatio-temporal clustering, we
# just pass the spatial connectivity matrix (instead of spatio-temporal)
print('Computing connectivity.')
connectivity = mne.spatial_src_connectivity(src)

#    Note that X needs to be a multi-dimensional array of shape
#    samples (subjects) x time x space, so we permute dimensions
X = np.transpose(X, [2, 1, 0])

#    Now let's actually do the clustering. This can take a long time...
#    Here we set the threshold quite high to reduce computation.
p_threshold = 0.001
t_threshold = -stats.distributions.t.ppf(p_threshold / 2., n_subjects - 1)
print('Clustering.')
T_obs, clusters, cluster_p_values, H0 = clu = \
    spatio_temporal_cluster_1samp_test(X, connectivity=connectivity, n_jobs=1,
                                       threshold=t_threshold, buffer_size=None)
#    Now select the clusters that are sig. at p < 0.05 (note that this value
#    is multiple-comparisons corrected).
good_cluster_inds = np.where(cluster_p_values < 0.05)[0]

###############################################################################
# Visualize the clusters
# ----------------------
print('Visualizing clusters.')

#    Now let's build a convenient representation of each cluster, where each
#    cluster becomes a "time point" in the SourceEstimate
stc_all_cluster_vis = summarize_clusters_stc(clu, tstep=tstep,
                                             vertices=fsave_vertices,
                                             subject='fsaverage')
        print(
            'Computing connectivity'
        )  # need to use connectivity instead of adjacency in old mne version (0.19)
        src = mne.read_source_spaces(
            op.join(fsMRI_dir, 'fsaverage', 'bem', 'fsaverage-ico-5-src.fif'))
        connectivity = mne.spatial_src_connectivity(src)
        fsaverage_vertices = [s['vertno'] for s in src]

        # Clustering
        print('Clustering')
        # p_threshold = 0.05
        # t_threshold = -stats.distributions.t.ppf(p_threshold / 2., n_subjects - 1)
        T_obs, clusters, cluster_p_values, H0 = clu = spatio_temporal_cluster_1samp_test(
            X,
            connectivity=connectivity,
            n_jobs=6,
            n_permutations=500,
            threshold=None,
            buffer_size=None,
            verbose=True)  # with connectivity instead of adjacency

        # Save or plot clusters (if any significant one)
        good_cluster_inds = np.where(cluster_p_values < 0.05)[0]
        if len(good_cluster_inds) > 0:
            print(analysis_name + ': ' + str(len(good_cluster_inds)) +
                  ' good clusters')
            print('Saving clusters to ' +
                  op.join(results_path, analysis_name +
                          '_sources_clusters.pickle'))
            with open(
                    op.join(results_path,
                            analysis_name + '_sources_clusters.pickle'),
Пример #35
0
###############################################################################
# Compute statistic

#    To use an algorithm optimized for spatio-temporal clustering, we
#    just pass the spatial connectivity matrix (instead of spatio-temporal)
print 'Computing connectivity.'
connectivity = spatial_tris_connectivity(grade_to_tris(5))

#    Note that X needs to be a multi-dimensional array of shape
#    samples (subjects) x time x space, so we permute dimensions
X = np.transpose(X, [2, 1, 0])
#    Now let's actually do the clustering. This can take a long time...
t_threshold = -stats.distributions.t.ppf(p_threshold / 2., subjectCount - 1)
print 'Clustering with a threshold of t = {}'.format(t_threshold)
clu = spatio_temporal_cluster_1samp_test(X,
                                         connectivity=connectivity,
                                         n_jobs=multitasking,
                                         threshold=t_threshold)
T_obs, clusters, cluster_p_values, H0 = clu
# Save this data for later reference
tFile = open(tFilename, 'w')
pickle.dump(T_obs, tFile)
tFile.close()

clusterFile = open(clusterFilename, 'w')
pickle.dump(clusters, clusterFile)
clusterFile.close()

pFile = open(pFilename, 'w')
pickle.dump(cluster_p_values, pFile)
pFile.close()
     # cue label
     _ = ax.annotate('cue',
                     xy=(stim_times[1], stim_ymax + thk),
                     xytext=(0, 1.5),
                     textcoords='offset points',
                     fontsize=9,
                     fontstyle='italic',
                     ha='center',
                     va='bottom',
                     color=cue)
 # stats
 if plot_signif:
     thresh = -1 * distributions.t.ppf(0.05 / 2, len(contr_diff) - 1)
     result = spatio_temporal_cluster_1samp_test(contr_diff,
                                                 threshold=thresh,
                                                 stat_fun=stat_fun,
                                                 n_jobs=6,
                                                 buffer_size=None,
                                                 n_permutations=np.inf)
     tvals, clusters, cluster_pvals, H0 = result
     signif = np.where(np.array([p <= 0.05 for p in cluster_pvals]))[0]
     signif_clusters = [clusters[s] for s in signif]
     signif_cluster_pvals = cluster_pvals[signif]
     # plot stats
     for clu, pv in zip(signif_clusters, signif_cluster_pvals):
         '''
         # this index tells direction of tval, hence could be used to
         # decide which color to draw the significant cluster region
         # based on which curve is higher:
         idx = (np.sign(tvals[clu[0][0], 0]).astype(int) + 1) // 2
         '''
         clu = clu[0]
Пример #37
0
    def ttest(self, threshold_dict=None):
        """ Calculate one sample t-test across each voxel (two-sided)

        Args:
            threshold_dict: a dictionary of threshold parameters {'unc':.001} or {'fdr':.05} or {'permutation':tcfe,n_permutation:5000}

        Returns:
            out: dictionary of regression statistics in Brain_Data instances {'t','p'}
        
        """

        t = deepcopy(self)
        p = deepcopy(self)

        if threshold_dict is not None:
            if 'permutation' in threshold_dict:
                # Convert data to correct shape (subjects, time, space)
                data_convert_shape = deepcopy(self.data)
                data_convert_shape = np.expand_dims(data_convert_shape, axis=1)
                if 'n_permutations' in threshold_dict:
                    n_permutations = threshold_dict['n_permutations']
                else:
                    n_permutations = 1000
                    warnings.warn(
                        'n_permutations not set:  running with 1000 permutations'
                    )

                if 'connectivity' in threshold_dict:
                    connectivity = threshold_dict['connectivity']
                else:
                    connectivity = None

                if 'n_jobs' in threshold_dict:
                    n_jobs = threshold_dict['n_jobs']
                else:
                    n_jobs = 1

                if threshold_dict['permutation'] is 'tfce':
                    perm_threshold = dict(start=0, step=0.2)
                else:
                    perm_threshold = None

                if 'stat_fun' in threshold_dict:
                    stat_fun = threshold_dict['stat_fun']
                else:
                    stat_fun = ttest_1samp_no_p

                t.data, clusters, p_values, h0 = spatio_temporal_cluster_1samp_test(
                    data_convert_shape,
                    tail=0,
                    threshold=perm_threshold,
                    stat_fun=stat_fun,
                    connectivity=connectivity,
                    n_permutations=n_permutations,
                    n_jobs=n_jobs)

                t.data = t.data.squeeze()

                p = deepcopy(t)
                for cl, pval in zip(clusters, p_values):
                    p.data[cl[1][0]] = pval
            else:
                t.data, p.data = ttest_1samp(self.data, 0, 0)
        else:
            t.data, p.data = ttest_1samp(self.data, 0, 0)

        if threshold_dict is not None:
            if type(threshold_dict) is dict:
                if 'unc' in threshold_dict:
                    thr = threshold_dict['unc']
                elif 'fdr' in threshold_dict:
                    thr = fdr(p.data, q=threshold_dict['fdr'])
                elif 'permutation' in threshold_dict:
                    thr = .05
                thr_t = threshold(t, p, thr)
                out = {'t': t, 'p': p, 'thr_t': thr_t}
            else:
                raise ValueError(
                    "threshold_dict is not a dictionary.  Make sure it is in the form of {'unc':.001} or {'fdr':.05}"
                )
        else:
            out = {'t': t, 'p': p}

        return out
Пример #38
0
def extract_roi(stc, src, label=None, thresh=0.5):
    """Extract a functional ROI.

    Parameters
    ----------
    stc : instance of SourceEstimate
        The source estimate data. The maximum positive peak will be selected.
        If you want the maximum negative peak, consider passing
        abs(stc) or -stc.
    src : instance of SourceSpaces
        The associated source space.
    label : instance of Label | None
        The label within which to select the peak.
        Can be None to use the entire STC.
    thresh : float
        Threshold value (relative to the peak value) above which vertices
        will be taken.

    Returns
    -------
    roi : instance of Label
        The functional ROI.
    """
    assert isinstance(stc, SourceEstimate)
    if label is None:
        stc_label = stc.copy()
    else:
        stc_label = stc.in_label(label)
    del label
    max_vidx, max_tidx = np.unravel_index(np.argmax(stc_label.data),
                                          stc_label.data.shape)
    max_val = stc_label.data[max_vidx, max_tidx]
    if max_vidx < len(stc_label.vertices[0]):
        hemi = 'lh'
        max_vert = stc_label.vertices[0][max_vidx]
        max_vidx = list(stc.vertices[0]).index(max_vert)
    else:
        hemi = 'rh'
        max_vert = stc_label.vertices[1][max_vidx - len(stc_label.vertices[0])]
        max_vidx = list(stc.vertices[1]).index(max_vert)
        max_vidx += len(stc.vertices[0])
    del stc_label
    assert max_val == stc.data[max_vidx, max_tidx]

    # Get contiguous vertices within 50%
    threshold = max_val * thresh
    connectivity = spatial_src_adjacency(src, verbose='error')  # holes
    _, clusters, _, _ = spatio_temporal_cluster_1samp_test(
        np.array([stc.data]), threshold, n_permutations=1,
        stat_fun=lambda x: x.mean(0), tail=1,
        connectivity=connectivity)
    for cluster in clusters:
        if max_vidx in cluster[0] and max_tidx in cluster[1]:
            break  # found our cluster
    else:  # in case we did not "break"
        raise RuntimeError('Clustering failed somehow!')
    if hemi == 'lh':
        verts = stc.vertices[0][cluster]
    else:
        verts = stc.vertices[1][cluster - len(stc.vertices[0])]
    func_label = Label(verts, hemi=hemi, subject=stc.subject)
    func_label = func_label.fill(src)
    return func_label, max_vert, max_vidx, max_tidx
    for ci in range(X.shape[2]):
        X[si, :, ci] = np.convolve(X[si, :, ci], gaussian, 'same')

###############################################################################
# Do some statistics

#    Note that X needs to be a multi-dimensional array of shape
#    samples (subjects) x time x space, so we permute dimensions
X = X.reshape((n_subjects, 1, n_src))

#    Now let's do some clustering using the standard method. Note that not
#    specifying a connectivity matrix implies grid-like connectivity, which
#    we want here:
T_obs, clusters, p_values, H0 = \
    spatio_temporal_cluster_1samp_test(X, n_jobs=2, threshold=threshold,
                                       connectivity=connectivity,
                                       tail=1, n_permutations=n_permutations)

#    Let's put the cluster data in a readable format
ps = np.zeros(width * width)
for cl, p in zip(clusters, p_values):
    ps[cl[1]] = -np.log10(p)
ps = ps.reshape((width, width))
T_obs = T_obs.reshape((width, width))

#     To do a Bonferroni correction on these data is simple:
p = stats.distributions.t.sf(T_obs, n_subjects - 1)
p_bon = -np.log10(bonferroni_correction(p)[1])

#    Now let's do some clustering using the standard method with "hat":
stat_fun = partial(ttest_1samp_no_p, sigma=sigma)
               coords_as_verts=True,
               hemi='rh',
               color='blue',
               scale_factor=0.6,
               alpha=0.5)
plt.figure()
plt.plot(1e3 * stc.times, stc.data[::100, :].T)
plt.xlabel('time (ms)')
plt.ylabel('sLORETA value')
plt.show()

#%% SPATIOTEMPORAL CLUSTERING. !!!!!!!!!!!!!!!!!!!!TOOK TOO MUCH MEMORY. WON'T RUN IT ANYMORE!!!!!!!!!!!!
print(stc.data.shape)  #run def above with 'evoked_LSF'
print(stc2.data.shape
      )  #run def above with 'evoked_HSF' and saved it for once in stc2
n_vertices_sample, n_times = stc2.data.shape
n_subjects = 3

np.random.seed(0)
X = randn(n_vertices_sample, n_times, n_subjects, 2) * 10
X[:, :, :, 0] += stc2.data[:, :, np.newaxis]
X[:, :, :, 1] += stc.data[:, :, np.newaxis]

X = np.abs(X)  # only magnitude
X = X[:, :, :, 0] - X[:, :, :, 1]  # make paired contrast
X = np.transpose(X, [2, 1, 0])
print(X)
print('Clustering.')
T_obs, clusters, cluster_p_values, H0 = clu = \
    spatio_temporal_cluster_1samp_test(X, n_jobs=1, threshold=0.05)
                                                 np.array(gat.scores_)[:, :, None]),
                                                axis=2)

                # STATS
                # ------ Parameters XXX to be transfered to config?
                alpha = 0.05
                n_permutations = 2 ** 11
                threshold = dict(start=.2, step=.2)

                X = scores.transpose((2, 0, 1)) - .5

                # ------ Run stats
                T_obs_, clusters, p_values, _ = spatio_temporal_cluster_1samp_test(
                                                       X,
                                                       out_type='mask',
                                                       n_permutations=n_permutations,
                                                       connectivity=None,
                                                       threshold=threshold,
                                                       n_jobs=-1)

                # ------ combine clusters and retrieve min p_values for each feature
                p_values = np.min(np.logical_not(clusters) +
                                  [clusters[c] * p for c, p in enumerate(p_values)],
                                  axis=0)
                x, y = np.meshgrid(gat.train_times['times_'],
                                   gat.test_times_['times_'][0],
                                   copy=False, indexing='xy')


                # PLOT
                # ------ Plot GAT
Пример #42
0
    for fname in subjects:
        with open(fname, 'rb') as f:
            [gat, score] = pickle.load(f)
        scores.append(score)
    scores = np.array(scores) - .5
    gat_list = [gat]


# STATS #######################################################################
from mne.stats import spatio_temporal_cluster_1samp_test
start = np.where(gat_list[0].train_times_['times'] >= 0.)[0][0]
# start = 0
X = scores[:, start::1, start::1]
T_obs_, clusters, p_values_, _ = spatio_temporal_cluster_1samp_test(
    X,
    out_type='mask',
    n_permutations=128,
    threshold=dict(start=2, step=2.),
    n_jobs=4)

p_values = p_values_.reshape(X.shape[1:])
h = p_values < .05


# PLOT ########################################################################
import matplotlib.pyplot as plt
from sandbox.graphs.utils import plot_graph, annotate_graph, animate_graph

times = 1e3 * gat_list[0].train_times_['times'][start:]
mean_scores = np.mean(X, axis=0)

# Summary figure