def run_permutation_ttest(tmin=None, tmax=None, p_threshold = 0.05, n_permutations=1024, inverse_method='dSPM', n_jobs=1): for cond_id, cond_name in enumerate(events_id.keys()): #todo: calc the 36 controls_data = get_morphed_epochs_stcs(tmin, tmax, cond_name, get_healthy_controls(), 36, inverse_method) controls_data = abs(controls_data) for patient in get_patients(): try: print(patient, cond_name) patient_data = get_morphed_epochs_stcs(tmin, tmax, cond_name, [patient], None, inverse_method) patient_data = abs(patient_data) print(patient_data.shape, controls_data.shape) data = controls_data - patient_data del patient_data gc.collect() data = np.transpose(data, [2, 1, 0]) connectivity = spatial_tris_connectivity(grade_to_tris(5)) t_threshold = -stats.distributions.t.ppf(p_threshold / 2., data.shape[0] - 1) T_obs, clusters, cluster_p_values, H0 = \ spatio_temporal_cluster_1samp_test(data, connectivity=connectivity, n_jobs=n_jobs, threshold=t_threshold, n_permutations=n_permutations) results_file_name = op.join(LOCAL_ROOT_DIR, 'permutation_ttest_results', '{}_{}_{}'.format(patient, cond_name, inverse_method)) np.savez(results_file_name, T_obs=T_obs, clusters=clusters, cluster_p_values=cluster_p_values, H0=H0) good_cluster_inds = np.where(cluster_p_values < 0.05)[0] print('good_cluster_inds: {}'.format(good_cluster_inds)) except: print('bummer! {}, {}'.format(patient, cond_name)) print(traceback.format_exc())
def stat_clus(X, tstep, time_thre=0, n_per=8192, p_threshold=0.01, p=0.05, fn_clu_out=None): print('Computing connectivity.') connectivity = spatial_tris_connectivity(grade_to_tris(5)) # Note that X needs to be a multi-dimensional array of shape # samples (subjects) x time x space, so we permute dimensions X = np.transpose(X, [2, 1, 0]) n_subjects = X.shape[0] fsave_vertices = [np.arange(X.shape[-1]/2), np.arange(X.shape[-1]/2)] # Now let's actually do the clustering. This can take a long time... # Here we set the threshold quite high to reduce computation. t_threshold = -stats.distributions.t.ppf(p_threshold / 2., n_subjects - 1) print('Clustering.') max_step = int(time_thre * 0.001 / tstep) + 1 T_obs, clusters, cluster_p_values, H0 = clu = \ spatio_temporal_cluster_1samp_test(X, connectivity=connectivity, n_jobs=1, max_step=max_step, threshold=t_threshold, n_permutations=n_per) # Now select the clusters that are sig. at p < 0.05 (note that this value # is multiple-comparisons corrected). good_cluster_inds = np.where(cluster_p_values < p)[0] print 'the amount of significant clusters are: %d' %good_cluster_inds.shape ############################################################################### # Save the clusters as stc file # ---------------------- assert good_cluster_inds.shape != 0, ('Current p_threshold is %f %p_thr,\ maybe you need to reset a lower p_threshold') np.savez(fn_clu_out, clu=clu, tstep=tstep, fsave_vertices=fsave_vertices)
def stats(X, connectivity=None, n_jobs=-1): """Cluster statistics to control for multiple comparisons. Parameters ---------- X : array, shape (n_samples, n_space, n_times) The data, chance is assumed to be 0. connectivity : None | array, shape (n_space, n_times) The connectivity matrix to apply cluster correction. If None uses neighboring cells of X. n_jobs : int The number of parallel processors. """ X = np.array(X) X = X[:, :, None] if X.ndim == 2 else X T_obs_, clusters, p_values, _ = spatio_temporal_cluster_1samp_test( X, out_type='mask', stat_fun=_stat_fun, n_permutations=1000, n_jobs=n_jobs, connectivity=connectivity) p_values_ = np.ones_like(X[0]).T for cluster, pval in zip(clusters, p_values): p_values_[cluster.T] = pval return np.squeeze(p_values_).T
def stats_tfce(X, n_permutations=2 ** 10, threshold=dict(start=0.1, step=0.1), n_jobs=2): X = np.array(X) T_obs_, clusters, p_values, _ = spatio_temporal_cluster_1samp_test( X, out_type="mask", stat_fun=_stat_fun, n_permutations=n_permutations, threshold=threshold, n_jobs=n_jobs ) p_values = p_values.reshape(X.shape[1:]) return p_values
def stats_tfce(X, n_permutations=1000, threshold=None, n_jobs=2): # calculate p-values using cluster permutation test _, _, p_values, _ = spatio_temporal_cluster_1samp_test( X, out_type='indices', stat_fun=_stat_fun, n_permutations=n_permutations, threshold=threshold, n_jobs=n_jobs) p_values = p_values.reshape(X.shape[1:]) return p_values
def stats_tfce(X, n_permutations=2**10,threshold=dict(start=.1, step=.1), n_jobs=2): # 2 # threshold free cluster enhancement for GATs import numpy as np from mne.stats import spatio_temporal_cluster_1samp_test X = np.array(X) T_obs_, clusters, p_values, _ = spatio_temporal_cluster_1samp_test( X,out_type='mask', stat_fun=_stat_fun, n_permutations=n_permutations, threshold=threshold, n_jobs=n_jobs) p_values = p_values.reshape(X.shape[1:]) return p_values
def perform_statistics_2(morphed_data, parameter_cache, vector, p_value=None): """Performs the statistical analysis using spatial_tris_connectivity. :param morphed_data: Morphed data obtained from morph_data function :param parameter_cache: Morphed parameter cache obtained from morph_data function. :param vector: Method to perform modelling ('sLORETA' etc.) :param p_value: Statistical p-value :return: clu, good_cluster_inds """ # Unpack parameter cache dictionary n_subjects = parameter_cache['n_subjects'] n_times = parameter_cache['n_times'] # Take on the absolute X = np.abs(morphed_data) # Obtain the paired contrast if vector is False: X = X[:, :, :, 0] - X[:, :, :, 1] # Dimension is (space, time, subjects) else: X = X[:, :, :, :, 0] - X[:, :, :, :, 1] # Dimension is (space, vector, time, subjects) print('Computing connectivity... ') connectivity_2 = mne.spatio_temporal_tris_connectivity( grade_to_tris(5), n_times) # Note that X needs to be a multi-dimensional array of shape [samples (subjects) x time x space] if vector is False: X = np.transpose(X, [2, 1, 0]) else: X = np.transpose(X, [3, 2, 1, 0]) ##### TO DOUBLE CHECK ##### # Perform the clustering p_threshold = p_value # 0.001 t_threshold = -stats.distributions.t.ppf(p_threshold / 2., n_subjects - 1) print('Clustering... ') T_obs, clusters, cluster_p_values, H0 = spatio_temporal_cluster_1samp_test( X, connectivity=connectivity_2, n_jobs=1, threshold=t_threshold) # Pack the outputs into tuple clu = (T_obs, clusters, cluster_p_values, H0) # Select the clusters that are sig. at p < p_value (Note this value is multiple-comparisons corrected) good_cluster_inds = np.where(cluster_p_values < p_value)[0] return clu, good_cluster_inds
def stats_decoding(scores): chance = 0.5 x = scores - chance X = x[:, :, None] if x.ndim == 2 else x T_obs_, clusters, p_values, _ = spatio_temporal_cluster_1samp_test( X, out_type='mask', n_permutations=2 ** 12, n_jobs=n_jobs, connectivity=None) p_values_ = np.ones_like(X[0]).T for cluster, pval in zip(clusters, p_values): p_values_[cluster.T] = pval p_vals = np.squeeze(p_values_).T return p_vals
def stat_clus(X, tstep, n_per=8192, p_threshold=0.01, p=0.05, fn_clu_out=None): ''' Calculate significant clusters using 1sample ttest. Parameter --------- X: array The shape of X should be (Vertices, timepoints, subjects) tstep: float The interval between timepoints. n_per: int The permutation for ttest. p_threshold: float The significant p_values. p: float The corrected p_values for comparisons. fn_clu_out: string The fnname for saving clusters. ''' print('Computing connectivity.') connectivity = spatial_tris_connectivity(grade_to_tris(5)) # Note that X needs to be a multi-dimensional array of shape # samples (subjects) x time x space, so we permute dimensions X = np.transpose(X, [2, 1, 0]) n_subjects = X.shape[0] fsave_vertices = [np.arange(X.shape[-1]/2), np.arange(X.shape[-1]/2)] # Now let's actually do the clustering. This can take a long time... # Here we set the threshold quite high to reduce computation. t_threshold = -stats.distributions.t.ppf(p_threshold / 2., n_subjects - 1) print('Clustering.') T_obs, clusters, cluster_p_values, H0 = clu = \ spatio_temporal_cluster_1samp_test(X, connectivity=connectivity, n_jobs=1, threshold=t_threshold, n_permutations=n_per) # Now select the clusters that are sig. at p < 0.05 (note that this value # is multiple-comparisons corrected). good_cluster_inds = np.where(cluster_p_values < p)[0] print 'the amount of significant clusters are: %d' %good_cluster_inds.shape # Save the clusters as stc file np.savez(fn_clu_out, clu=clu, tstep=tstep, fsave_vertices=fsave_vertices) assert good_cluster_inds.shape != 0, ('Current p_threshold is %f %p_thr,\ maybe you need to reset a lower p_threshold')
def gat_stats(X): from mne.stats import spatio_temporal_cluster_1samp_test """Statistical test applied across subjects""" # check input X = np.array(X) X = X[:, :, None] if X.ndim == 2 else X # stats function report p_value for each cluster T_obs_, clusters, p_values, _ = spatio_temporal_cluster_1samp_test( X, out_type='mask', n_permutations=2**12, n_jobs=-1, verbose=False) # format p_values to get same dimensionality as X p_values_ = np.ones_like(X[0]).T for cluster, pval in zip(clusters, p_values): p_values_[cluster.T] = pval return np.squeeze(p_values_).T
def myStats(X, connectivity=None, n_jobs=-1, tail=0): X = np.array(X) X = X[:, :, None] if X.ndim == 2 else X T_obs_, clusters, p_values, _ = spatio_temporal_cluster_1samp_test( X, out_type='mask', stat_fun=_stat_fun, n_permutations=5000, n_jobs=n_jobs, connectivity=connectivity, tail=tail) p_values_ = np.ones_like(X[0].T) for cluster, pval in zip(clusters, p_values): p_values_[cluster.T] = pval return np.squeeze(p_values_).T
def mne_spatio_temporal_cluster_1samp_test(X, **kwargs): threshold_tfce = dict(start=0, step=0.2) # from MNE # X : array, shape (n_observations, n_times, n_vertices) # The data to be clustered. The first dimension should correspond to the # difference between paired samples (observations) in two conditions. # T_obs, _, p_values, _ = spatio_temporal_cluster_1samp_test(X, n_permutations=1000, # threshold=threshold_tfce, tail=1, # n_jobs=1, buffer_size=None, # connectivity=None) T_obs, clusters, cluster_p_values, H0 = spatio_temporal_cluster_1samp_test(X, n_permutations=1000, threshold=None, tail=1, n_jobs=1, buffer_size=None, connectivity=None, out_type='mask') return T_obs, clusters, cluster_p_values, H0
def __init__(self, insts, alpha=0.05, **kwargs): """ Parameters ---------- X : np.array (dims = n * space * time) data array alpha : float significance level Can take spatio_temporal_cluster_1samp_test() parameters. """ from mne.stats import spatio_temporal_cluster_1samp_test # Convert lists of evoked in Epochs insts = [Evokeds_to_Epochs(i) if type(i) is list else i for i in insts] # Apply contrast: n * space * time X = np.array(insts[0]._data - insts[-1]._data).transpose([0, 2, 1]) # Run stats self.T_obs_, clusters, p_values, _ = \ spatio_temporal_cluster_1samp_test(X, out_type='mask', **kwargs) # Save sorted sig clusters inds = np.argsort(p_values) clusters = np.array(clusters)[inds, :, :] p_values = p_values[inds] inds = np.where(p_values < alpha)[0] self.sig_clusters_ = clusters[inds, :, :] self.p_values_ = p_values[inds] # By default, keep meta data from first epoch self.insts = insts self.times = self.insts[0].times self.info = self.insts[0].info self.ch_names = self.insts[0].ch_names return
def __init__(self, insts, alpha=0.05, **kwargs): """ Parameters ---------- X : np.array (dims = n * space * time) data array alpha : float significance level Can take spatio_temporal_cluster_1samp_test() parameters. """ from mne.stats import spatio_temporal_cluster_1samp_test # Convert lists of evoked in Epochs insts = [Evokeds_to_Epochs(i) if type(i) is list else i for i in insts] # Apply contrast: n * space * time X = np.array(insts[0]._data - insts[-1]._data).transpose([0, 2, 1]) # Run stats self.T_obs_, clusters, p_values, _ = \ spatio_temporal_cluster_1samp_test(X, out_type='mask', **kwargs) # Save sorted sig clusters inds = np.argsort(p_values) clusters = np.array(clusters)[inds, :, :] p_values = p_values[inds] inds = np.where(p_values < alpha)[0] self.sig_clusters_ = clusters[inds, :, :] self.p_values_ = p_values[inds] # By default, keep meta data from first epoch self.insts = insts self.times = self.insts[0].times self.info = self.insts[0].info self.ch_names = self.insts[0].ch_names return
from scipy import stats as stats tail=0 p_threshold=0.05 src_fname = '/nashome1/wexu/MNE_data/AVLearn/subjects/fsaverage/bem/fsaverage-ico-5-src.fif' src = mne.read_source_spaces(src_fname) connectivity = mne.spatial_src_connectivity(src) t_threshold = -stats.distributions.t.ppf(p_threshold / (1.+(tail==0)), len(X) - 1) sigma = 1e-3 # sigma for the "hat" method from functools import partial stat_fun_hat = partial(ttest_1samp_no_p, sigma=sigma) print('Clustering.') T_obs, clusters, cluster_p_values, H0 = clu = \ spatio_temporal_cluster_1samp_test(X, connectivity=connectivity, n_jobs=-1,stat_fun=stat_fun_hat, threshold=t_threshold,n_permutations=1000) print('summary stats') good_cluster_inds = np.where(cluster_p_values < 0.05)[0] for ind in good_cluster_inds: inds_t, inds_v = clusters[ind] inds_t=inds_t*tstep inds_p=cluster_p_values[ind] print(' cluster %d \n p value: %f \n time: %s \n clusters: %s '%(ind,inds_p,inds_t,inds_v)) print('Visualizing clusters.') fsave_vertices = [np.arange(10242), np.arange(10242)] stc_all_cluster_vis = summarize_clusters_stc(clu,tstep=0.005,tmin=stat_tmin, vertices=fsave_vertices,subject='fsaverage',p_thresh=0.05) Folder_name='/nashome1/wexu/Results/MNE_Results/AVLearn/'
def cluster_test_main(gat, A, chance_level = np.pi/6, alpha = 0.05, n_permutations = 2 ** 11, threshold = dict(start=1., step=.2), lims=None, ylabel='Performance', title=None): """This function takes as inputs one array X and computes cluster analysis and plots associated graphs. Input: gat: gat object is used only to retrieve useful information to plot, like time points and gat.plot functions. gat.scores_ is replaced by X X: ndimensional array representing gat or diagonal performance. If A is diagonal, its dimensions should be n_subjects * n_time If A is GAT, its dimensions should be n_subjects * n_time * n_time chance_level: chance level to test against. pi/6 for circular data and 0 for deviations are normally used """ # check that X is array otherwise convert if not(type(A).__module__ == np.__name__): A = np.array(A) # define X X = A - chance_level # define time points times = gat.train_times['times_'] # ------ Run stats T_obs_, clusters, p_values, _ = spatio_temporal_cluster_1samp_test( X, out_type='mask', n_permutations=n_permutations, connectivity=None, threshold=threshold, n_jobs=-1) # ------ combine clusters and retrieve min p_values for each feature p_values = np.min(np.logical_not(clusters) + [clusters[c] * p for c, p in enumerate(p_values)], axis=0) x, y = np.meshgrid(gat.train_times['times_'], gat.test_times_['times_'][0], copy=False, indexing='xy') # PLOT # ------ Plot GAT gat.scores_ = np.mean(A, axis=0) if lims==None: lims = [np.min(gat.scores_),np.max(gat.scores_)] fig = gat.plot(vmin=lims[0], vmax=lims[1], show=False) ax = fig.axes[0] ax.contour(x, y, p_values < alpha, colors='black', levels=[0]) #plt.title(title) plt.show() # ------ Plot Decoding scores_diag = np.transpose([A[:, t, t] for t in range(len(times))]) fig, ax = plt.subplots(1) plot_eb(times, np.mean(scores_diag, axis=0), np.std(scores_diag, axis=0) / np.sqrt(scores_diag.shape[0]), color='blue', ax=ax) ymin, ymax = ax.get_ylim() sig_times = times[np.where(np.diag(p_values) < alpha)[0]] sfreq = (times[1] - times[0]) / 1000 fill_betweenx_discontinuous(ax, ymin, ymax, sig_times, freq=sfreq, color='gray', alpha=.3) ax.axhline(chance_level, color='k', linestyle='--', label="Chance level") ax.set_xlabel('Time (s)') ax.set_ylabel(ylabel) #plt.title(title) plt.show()
X_dys = np.transpose(X_dys, [1, 2, 0]) X_con = np.transpose(X_con, [1, 2, 0]) X_all = np.concatenate((X_con, X_dys), axis=0) # find clims # clustering n_subject_pairs = X_all.shape[0] t_threshold = -stats.distributions.t.ppf(p_initial_threshold / 2., n_subject_pairs - 1) print('Clustering.') stat_fun = ttest_1samp_no_p T_obs, clusters, cluster_p_values, H0 = clu = \ spatio_temporal_cluster_1samp_test(X_all, connectivity=connectivity, n_jobs=4, n_permutations=100, threshold=t_threshold, t_power=1, buffer_size=None, out_type='indices', verbose=True, stat_fun=stat_fun) # Now let's build a convenient representation of each cluster, where each # cluster becomes a "time point" in the SourceEstimate tstep = stc.tstep fsave_vertices = [np.arange(10242), np.arange(10242)] stc_all_cluster_vis = summarize_clusters_stc_AT(clu, vertices=fsave_vertices, subject='fsaverage') clim = dict(kind='value', lims=[ np.percentile(stc_all_cluster_vis.data[:, 0], clim_low), np.percentile(stc_all_cluster_vis.data[:, 0], clim_mid), np.percentile(stc_all_cluster_vis.data[:, 0], clim_high)
def sample1_clus(fn_list, n_per=8192, pct=99, p=0.01, tail=1, del_vers=None, n_jobs=1): ''' Calculate significant clusters using 1sample ttest. Parameter --------- fn_list: list Paths of group arrays n_per: int The permutation for ttest. pct: int or float. The percentile of the baseline distribution. p: float The corrected p_values for comparisons. tail: 1 or 0 if tail=1, that is 1 tail test if tail=0, that is 2 tail test del_vers: None or _exclu_vers If is '_exclu_vers', delete the vertices in the medial wall. ''' print('Computing connectivity.') connectivity = spatial_tris_connectivity(grade_to_tris(5)) # Using the percentile of baseline array as the distribution threshold for fn_npz in fn_list: npz = np.load(fn_npz) tstep = npz['tstep'].flatten()[0] # Note that X needs to be a multi-dimensional array of shape # samples (subjects) x time x space, so we permute dimensions X = npz['X'] X_b = X[1] X = X[0] fn_path = os.path.dirname(fn_npz) name = os.path.basename(fn_npz) if tail == 1: fn_out = fn_path + '/clu1sample_%s' % name[:name.rfind( '.npz')] + '_%d_%dtail_pct%.3f.npz' % (n_per, tail, pct) X = np.abs(X) t_threshold = np.percentile(np.abs(X_b), pct) elif tail == 0: fn_out = fn_path + '/clu1sample_%s' % name[:name.rfind( '.npz')] + '_%d_%dtail_pct%.3f.npz' % (n_per, tail + 2, pct) t_threshold = np.percentile(X_b, pct) fsave_vertices = [ np.arange(X.shape[-1] / 2), np.arange(X.shape[-1] / 2) ] #n_subjects = X.shape[0] #t_threshold = -stats.distributions.t.ppf(0.01/(1+(tail==0)), n_subjects-1) print('Clustering.') T_obs, clusters, cluster_p_values, H0 = clu = \ spatio_temporal_cluster_1samp_test(X, connectivity=connectivity, n_jobs=n_jobs, threshold=t_threshold, n_permutations=n_per, tail=tail, spatial_exclude=del_vers) # Now select the clusters that are sig. at p < 0.05 (note that this value # is multiple-comparisons corrected). good_cluster_inds = np.where(cluster_p_values < p)[0] print 'the amount of significant clusters are: %d' % good_cluster_inds.shape # Save the clusters as stc file np.savez(fn_out, clu=clu, tstep=tstep, fsave_vertices=fsave_vertices) assert good_cluster_inds.shape != 0, ( 'Current p_threshold is %f %p_thr,\ maybe you need to reset a lower p_threshold' )
# To use an algorithm optimized for spatio-temporal clustering, we # just pass the spatial connectivity matrix (instead of spatio-temporal) print 'Computing connectivity.' connectivity = spatial_tris_connectivity(grade_to_tris(5)) # Note that X needs to be a multi-dimensional array of shape # samples (subjects) x time x space, so we permute dimensions X = np.transpose(X, [2, 1, 0]) # Now let's actually do the clustering. This can take a long time... # Here we set the threshold quite high to reduce computation. p_threshold = 0.001 t_threshold = -stats.distributions.t.ppf(p_threshold / 2., n_subjects - 1) print 'Clustering.' T_obs, clusters, cluster_p_values, H0 = \ spatio_temporal_cluster_1samp_test(X, connectivity=connectivity, n_jobs=2, threshold=t_threshold) # Now select the clusters that are sig. at p < 0.05 (note that this value # is multiple-comparisons corrected). good_cluster_inds = np.where(cluster_p_values < 0.05)[0] ############################################################################### # Visualize the clusters print 'Visualizing clusters.' # Now let's build a convenient representation of each cluster, where each # cluster becomes a "time point" in the SourceEstimate data = np.zeros((n_vertices_fsave, n_times)) data_summary = np.zeros((n_vertices_fsave, len(good_cluster_inds) + 1)) for ii, cluster_ind in enumerate(good_cluster_inds): data.fill(0)
spatial_exclude=np.hstack((fsave_vertices[0][nnl], fsave_vertices[0][nnr]+10242)) # # # adjacency = mne.spatial_src_adjacency(inv_op_SD['src']) source_space = mne.grade_to_tris(5) # as we only have one hemisphere we need only need half the connectivity print('Computing connectivity.') connectivity = mne.spatial_tris_connectivity(source_space) p_threshold = 0.05 t_threshold = -stats.distributions.t.ppf(p_threshold / 2., n_subjects - 1) # print('Clustering.') T_obs, clusters, cluster_p_values, H0 = clu = \ spatio_temporal_cluster_1samp_test(Y, connectivity= connectivity,\ n_jobs=10,threshold=t_threshold,n_permutations=5000,spatial_exclude =\ spatial_exclude,step_down_p=0.05,t_power=1) # print('Visualizing clusters.') fsave_vertices = [np.arange(10242),np.arange(10242)] # # Build a convenient representation of each cluster, where each # # cluster becomes a "time point" in the SourceEstimate stc_all_cluster_vis = summarize_clusters_stc(clu,tstep=tstep*1000,\ vertices = fsave_vertices) idx = stc_all_cluster_vis.time_as_index(times=stc_all_cluster_vis.times) data = stc_all_cluster_vis.data[:, idx]
X[si, ri, :] = np.convolve(X[si, ri, :], gaussian, "same") for ci in range(X.shape[2]): X[si, :, ci] = np.convolve(X[si, :, ci], gaussian, "same") ############################################################################### # Do some statistics # Note that X needs to be a multi-dimensional array of shape # samples (subjects) x time x space, so we permute dimensions X = X.reshape((n_subjects, 1, n_src)) # Now let's do some clustering using the standard method. Note that not # specifying a connectivity matrix implies grid-like connectivity, which # we want here: T_obs, clusters, p_values, H0 = spatio_temporal_cluster_1samp_test( X, n_jobs=2, threshold=threshold, connectivity=connectivity, tail=1, n_permutations=n_permutations ) # Let's put the cluster data in a readable format ps = np.zeros(width * width) for cl, p in zip(clusters, p_values): ps[cl[1]] = -np.log10(p) ps = ps.reshape((width, width)) T_obs = T_obs.reshape((width, width)) # To do a Bonferroni correction on these data is simple: p = stats.distributions.t.sf(T_obs, n_subjects - 1) p_bon = -np.log10(bonferroni_correction(p)[1]) # Now let's do some clustering using the standard method with "hat": stat_fun = partial(ttest_1samp_no_p, sigma=sigma)
scores = list() for fname in subjects: with open(fname, 'rb') as f: [gat, score] = pickle.load(f) scores.append(score) scores = np.array(scores) - .5 gat_list = [gat] # STATS ####################################################################### from mne.stats import spatio_temporal_cluster_1samp_test start = np.where(gat_list[0].train_times_['times'] >= 0.)[0][0] # start = 0 X = scores[:, start::1, start::1] T_obs_, clusters, p_values_, _ = spatio_temporal_cluster_1samp_test( X, out_type='mask', n_permutations=128, threshold=dict(start=2, step=2.), n_jobs=4) p_values = p_values_.reshape(X.shape[1:]) h = p_values < .05 # PLOT ######################################################################## import matplotlib.pyplot as plt from sandbox.graphs.utils import plot_graph, annotate_graph, animate_graph times = 1e3 * gat_list[0].train_times_['times'][start:] mean_scores = np.mean(X, axis=0) # Summary figure fig, ax = plt.subplots(1)
X = np.transpose(X, [2, 1, 0]) print np.shape(X) ##########################################################################################################################33 ##############################################################################################################################3 # Now let's actually do the clustering. This can take a long time... # Here we set the threshold quite high to reduce computation. p_threshold = 0.01 #0.001 t_threshold = -stats.distributions.t.ppf(p_threshold / 2., n_subjects - 1) print('Clustering.') print t_threshold print np.shape(X) T_obs, clusters, cluster_p_values, H0 = clu = \ spatio_temporal_cluster_1samp_test(X, connectivity=connectivity, n_jobs=2, threshold = t_threshold) print cluster_p_values # Now select the clusters that are sig. at p < 0.05 (note that this value # is multiple-comparisons corrected). good_cluster_inds = np.where(cluster_p_values < 0.1)[0] ################################################################################ # Visualize the clusters print('Visualizing clusters.') import os os.environ["SUBJECTS_DIR"] = "/mnt/file1/binder/KRNS/anatomies/surfaces/" os.environ["subjects_dir"] = "/mnt/file1/binder/KRNS/anatomies/surfaces/"
# timecourse labels lab = ['short gap', 'long gap'][kk] _ = axs[ii].annotate(lab, (0, stim_y), xytext=(-6, 0), textcoords='offset points', color=col, ha='right', va='center', fontsize=9, fontstyle='italic') # cue label _ = axs[ii].annotate('cue', xy=(stim_times[1], stim_ymax + thk), xytext=(0, 2), textcoords='offset points', fontsize=9, fontstyle='italic', ha='center', va='bottom', color=cue) # stats if plot_signif: thresh = -1 * distributions.t.ppf(0.05 / 2, len(contr_diff) - 1) result = spatio_temporal_cluster_1samp_test( contr_diff, threshold=thresh, stat_fun=stat_fun, n_jobs=6, buffer_size=None, n_permutations=np.inf) tvals, clusters, cluster_pvals, H0 = result signif = np.where(np.array([p <= 0.05 for p in cluster_pvals]))[0] signif_clusters = [clusters[s] for s in signif] signif_cluster_pvals = cluster_pvals[signif] # plot stats for clu, pv in zip(signif_clusters, signif_cluster_pvals): ''' # this index tells direction of tval, hence could be used to # decide which color to draw the significant cluster region # based on which curve is higher: idx = (np.sign(tvals[clu[0][0], 0]).astype(int) + 1) // 2 ''' clu = clu[0] cluster_ymin = ylim[0] * np.ones_like(t[clu])
def test_cluster_permutation_t_test_with_connectivity(): """Test cluster level permutations T-test with connectivity matrix.""" try: try: from sklearn.feature_extraction.image import grid_to_graph except ImportError: from scikits.learn.feature_extraction.image import grid_to_graph except ImportError: return out = permutation_cluster_1samp_test(condition1_1d, n_permutations=500) connectivity = grid_to_graph(1, condition1_1d.shape[1]) out_connectivity = permutation_cluster_1samp_test( condition1_1d, n_permutations=500, connectivity=connectivity) assert_array_equal(out[0], out_connectivity[0]) for a, b in zip(out_connectivity[1], out[1]): assert_true(np.sum(out[0][a]) == np.sum(out[0][b])) assert_true(np.all(a[b])) # test spatio-temporal with no time connectivity (repeat spatial pattern) connectivity_2 = sparse.coo_matrix( linalg.block_diag(connectivity.asfptype().todense(), connectivity.asfptype().todense())) condition1_2 = np.concatenate((condition1_1d, condition1_1d), axis=1) out_connectivity_2 = permutation_cluster_1samp_test( condition1_2, n_permutations=500, connectivity=connectivity_2) # make sure we were operating on the same values split = len(out[0]) assert_array_equal(out[0], out_connectivity_2[0][:split]) assert_array_equal(out[0], out_connectivity_2[0][split:]) # make sure we really got 2x the number of original clusters n_clust_orig = len(out[1]) assert_true(len(out_connectivity_2[1]) == 2 * n_clust_orig) # Make sure that we got the old ones back n_pts = condition1_1d.shape[1] data_1 = set([np.sum(out[0][b[:n_pts]]) for b in out[1]]) data_2 = set([ np.sum(out_connectivity_2[0][a[:n_pts]]) for a in out_connectivity_2[1][:] ]) assert_true(len(data_1.intersection(data_2)) == len(data_1)) # now use the other algorithm condition1_3 = np.reshape(condition1_2, (40, 2, 350)) out_connectivity_3 = mnestats.spatio_temporal_cluster_1samp_test( condition1_3, n_permutations=500, connectivity=connectivity, max_step=0, threshold=1.67, check_disjoint=True) # make sure we were operating on the same values split = len(out[0]) assert_array_equal(out[0], out_connectivity_3[0][0]) assert_array_equal(out[0], out_connectivity_3[0][1]) # make sure we really got 2x the number of original clusters assert_true(len(out_connectivity_3[1]) == 2 * n_clust_orig) # Make sure that we got the old ones back data_1 = set([np.sum(out[0][b[:n_pts]]) for b in out[1]]) data_2 = set([ np.sum(out_connectivity_3[0][a[0], a[1]]) for a in out_connectivity_3[1] ]) assert_true(len(data_1.intersection(data_2)) == len(data_1))
assert sensor_adjacency.shape == \ (len(tfr_epochs.ch_names), len(tfr_epochs.ch_names)) assert epochs_power.data.shape == (len(epochs), len(tfr_epochs.ch_names), len(tfr_epochs.freqs), len(tfr_epochs.times)) adjacency = mne.stats.combine_adjacency(sensor_adjacency, len(tfr_epochs.freqs), len(tfr_epochs.times)) # our adjacency is square with each dim matching the data size assert adjacency.shape[0] == adjacency.shape[1] == \ len(tfr_epochs.ch_names) * len(tfr_epochs.freqs) * len(tfr_epochs.times) # %% threshold = 3. n_permutations = 50 # Warning: 50 is way too small for real-world analysis. # T_obs, clusters, cluster_p_values, H0 = \ # permutation_cluster_1samp_test(epochs_power, n_permutations=n_permutations, # threshold=threshold, tail=0, # connectivity=None, # out_type='mask', verbose=True) T_obs, clusters, cluster_p_values, H0 = \ spatio_temporal_cluster_1samp_test(epochs_power, n_permutations=n_permutations, threshold=threshold, tail=0, connectivity=None, out_type='mask', verbose=True) # %%
# just pass the spatial adjacency matrix (instead of spatio-temporal) print('Computing adjacency.') adjacency = mne.spatial_src_adjacency(src) # Note that X needs to be a multi-dimensional array of shape # samples (subjects) x time x space, so we permute dimensions X = np.transpose(X, [2, 1, 0]) # Now let's actually do the clustering. This can take a long time... # Here we set the threshold quite high to reduce computation. p_threshold = 0.001 t_threshold = -stats.distributions.t.ppf(p_threshold / 2., n_subjects - 1) print('Clustering.') T_obs, clusters, cluster_p_values, H0 = clu = \ spatio_temporal_cluster_1samp_test(X, adjacency=adjacency, n_jobs=1, threshold=t_threshold, buffer_size=None, verbose=True) # Now select the clusters that are sig. at p < 0.05 (note that this value # is multiple-comparisons corrected). good_cluster_inds = np.where(cluster_p_values < 0.05)[0] # %% # Visualize the clusters # ---------------------- print('Visualizing clusters.') # Now let's build a convenient representation of each cluster, where each # cluster becomes a "time point" in the SourceEstimate stc_all_cluster_vis = summarize_clusters_stc(clu, tstep=tstep, vertices=fsave_vertices, subject='fsaverage')
def test_cluster_permutation_t_test_with_connectivity(): """Test cluster level permutations T-test with connectivity matrix.""" try: try: from sklearn.feature_extraction.image import grid_to_graph except ImportError: from scikits.learn.feature_extraction.image import grid_to_graph except ImportError: return out = permutation_cluster_1samp_test(condition1_1d, n_permutations=500) connectivity = grid_to_graph(1, condition1_1d.shape[1]) out_connectivity = permutation_cluster_1samp_test(condition1_1d, n_permutations=500, connectivity=connectivity) assert_array_equal(out[0], out_connectivity[0]) for a, b in zip(out_connectivity[1], out[1]): assert_true(np.sum(out[0][a]) == np.sum(out[0][b])) assert_true(np.all(a[b])) # test spatio-temporal with no time connectivity (repeat spatial pattern) connectivity_2 = sparse.coo_matrix( linalg.block_diag(connectivity.asfptype().todense(), connectivity.asfptype().todense())) condition1_2 = np.concatenate((condition1_1d, condition1_1d), axis=1) out_connectivity_2 = permutation_cluster_1samp_test(condition1_2, n_permutations=500, connectivity=connectivity_2) # make sure we were operating on the same values split = len(out[0]) assert_array_equal(out[0], out_connectivity_2[0][:split]) assert_array_equal(out[0], out_connectivity_2[0][split:]) # make sure we really got 2x the number of original clusters n_clust_orig = len(out[1]) assert_true(len(out_connectivity_2[1]) == 2 * n_clust_orig) # Make sure that we got the old ones back n_pts = condition1_1d.shape[1] data_1 = set([np.sum(out[0][b[:n_pts]]) for b in out[1]]) data_2 = set([np.sum(out_connectivity_2[0][a[:n_pts]]) for a in out_connectivity_2[1][:]]) assert_true(len(data_1.intersection(data_2)) == len(data_1)) # now use the other algorithm condition1_3 = np.reshape(condition1_2, (40, 2, 350)) out_connectivity_3 = mnestats.spatio_temporal_cluster_1samp_test( condition1_3, n_permutations=500, connectivity=connectivity, max_step=0, threshold=1.67, check_disjoint=True) # make sure we were operating on the same values split = len(out[0]) assert_array_equal(out[0], out_connectivity_3[0][0]) assert_array_equal(out[0], out_connectivity_3[0][1]) # make sure we really got 2x the number of original clusters assert_true(len(out_connectivity_3[1]) == 2 * n_clust_orig) # Make sure that we got the old ones back data_1 = set([np.sum(out[0][b[:n_pts]]) for b in out[1]]) data_2 = set([np.sum(out_connectivity_3[0][a[0], a[1]]) for a in out_connectivity_3[1]]) assert_true(len(data_1.intersection(data_2)) == len(data_1))
def run_sensor_stats(): for c in np.arange(len(config.stats_params)): # organise data and analysis parameters dat0_files = config.stats_params[c]['dat0_files'] dat1_files = config.stats_params[c]['dat1_files'] condnames = config.stats_params[c]['condnames'] tmin, tmax = config.stats_params[c]['statwin'] n_permutations = config.stats_params[c]['n_permutations'] p_threshold = config.stats_params[c]['threshold'] tail = config.stats_params[c]['tail'] if tail == 0: p_threshold = p_threshold / 2 tail_x = 1 else: tail_x = tail if 'multi-subject' in config.stats_params[c] and config.stats_params[c]['multi-subject'] == True: # we will run the same analysis on each subject separately nruns = len(dat0_files) ismulti = True else: nruns = 1 ismulti = False results = [] # to store the results later for statrun in np.arange(nruns): if ismulti: # we will run the same analysis on each subject separately dat0, evokeds0, connectivity = collect_data([dat0_files[statrun]],condnames[0],tmin,tmax,ismulti) dat1, evokeds1, _ = collect_data([dat1_files[statrun]],condnames[1],tmin,tmax,ismulti) else: # collect together the data to be compared dat0, evokeds0, connectivity = collect_data(dat0_files,condnames[0],tmin,tmax,ismulti) dat1, evokeds1, _ = collect_data(dat1_files,condnames[1],tmin,tmax,ismulti) alldata = [] # fix threshold to be one-sided if requested if type(p_threshold) != 'dict': # i.e. is NOT TFCE if config.stats_params[c]['stat'] == 'indep': stat_fun = ttest_ind_no_p if len(dat0_files) == 1: # ie is single subject stats df = dat0.data.shape[0] - 1 + dat1.data.shape[0] - 1 else: df = len(dat0_files) - 1 + len(dat1_files) - 1 else: # ie is dependent data, and so is one-sample t test # this will only ever be group data... # If the length of dat0_files and dat1_files are different it'll crash later anyway stat_fun = ttest_1samp_no_p df = len(dat0_files) - 1 threshold_stat = stats.distributions.t.ppf(1. - p_threshold, df) * tail_x else: # i.e. is TFCE threshold_stat = p_threshold # run the stats if config.stats_params[c]['stat'] == 'indep': alldata = [dat0,dat1] cluster_stats = spatio_temporal_cluster_test(alldata, n_permutations=n_permutations, threshold=threshold_stat, tail=tail, stat_fun=stat_fun, n_jobs=1, buffer_size=None, connectivity=connectivity) elif config.stats_params[c]['stat'] == 'dep': # we have to use 1-sample t-tests here so also need to subtract conditions alldata = dat0 - dat1 cluster_stats = spatio_temporal_cluster_1samp_test(alldata, n_permutations=n_permutations, threshold=threshold_stat, tail=tail, stat_fun=stat_fun, n_jobs=1, buffer_size=None, connectivity=connectivity) # extract stats of interest T_obs, clusters, p_values, _ = cluster_stats good_cluster_inds = np.where(p_values < config.stats_params[c]['p_accept'])[0] # tell the user the results print('There are {} significant clusters'.format(good_cluster_inds.size)) if good_cluster_inds.size != 0: print('p-values: {}'.format(p_values[good_cluster_inds])) else: if p_values.any(): print('Minimum p-value: {}'.format(np.min(p_values))) else: print('No clusters found') # some final averaging and tidying if len(evokeds0) == 1: dat0_avg = evokeds0[0].average() dat1_avg = evokeds1[0].average() else: dat0_avg = mne.grand_average(evokeds0) dat1_avg = mne.grand_average(evokeds1) diffcond_avg = mne.combine_evoked([dat0_avg, -dat1_avg], 'equal') # get sensor positions via layout pos = mne.find_layout(evokeds0[0].info).pos ## EVENTUALLY I WILL PUT THE PLOTTING IN A SEPARATE FUNCTION... do_plot = False if do_plot: # loop over clusters for i_clu, clu_idx in enumerate(good_cluster_inds): # unpack cluster information, get unique indices time_inds, space_inds = np.squeeze(clusters[clu_idx]) ch_inds = np.unique(space_inds) time_inds = np.unique(time_inds) # get topography for F stat f_map = T_obs[time_inds, ...].mean(axis=0) # get topography of difference time_shift = evokeds0[0].time_as_index(tmin) # fix windowing shift print('time_shift = {}'.format(time_shift)) sig_times_idx = time_inds + time_shift diff_topo = np.mean(diffcond_avg.data[:,sig_times_idx],axis=1) sig_times = evokeds0[0].times[sig_times_idx] # create spatial mask mask = np.zeros((f_map.shape[0], 1), dtype=bool) mask[ch_inds, :] = True # initialize figure fig, ax_topo = plt.subplots(1, 1, figsize=(10, 3)) # plot average difference and mark significant sensors image, _ = plot_topomap(diff_topo, pos, mask=mask, axes=ax_topo, cmap='RdBu_r', vmin=np.min, vmax=np.max, show=False) # create additional axes (for ERF and colorbar) divider = make_axes_locatable(ax_topo) # add axes for colorbar ax_colorbar = divider.append_axes('right', size='5%', pad=0.05) plt.colorbar(image, cax=ax_colorbar) ax_topo.set_xlabel( 'Mean difference ({:0.3f} - {:0.3f} s)'.format(*sig_times[[0, -1]])) # add new axis for time courses and plot time courses ax_signals = divider.append_axes('right', size='300%', pad=1.2) title = 'Cluster #{0}, {1} sensor'.format(i_clu + 1, len(ch_inds)) if len(ch_inds) > 1: title += "s (mean)" plot_compare_evokeds([dat0_avg, dat1_avg], title=title, picks=ch_inds, axes=ax_signals, colors=None, show=False, split_legend=False, truncate_yaxis='max_ticks') # plot temporal cluster extent ymin, ymax = ax_signals.get_ylim() ax_signals.fill_betweenx((ymin, ymax), sig_times[0], sig_times[-1], color='orange', alpha=0.3) # clean up viz mne.viz.tight_layout(fig=fig) fig.subplots_adjust(bottom=.05) plt.show() results.append({ 'cluster_stats': cluster_stats, 'good_cluster_inds': good_cluster_inds, 'alldata': alldata, 'evokeds0': evokeds0, 'evokeds1': evokeds1 }) # save save_name = op.join(config.stat_path, config.stats_params[c]['analysis_name'] + '.dat') pickle_out = open(save_name,'wb') pickle.dump(results, pickle_out) pickle_out.close()
adjacency, ch_names = find_ch_adjacency(info, ch_type=ch_type) # set cluster threshold # threshold = 1.0 # set family-wise p-value # p_accept = 0.05 p_accept = 0.14 X_low = X[y == LOW_CONF_EPOCH, ...].transpose(0, 2, 1) X_high = X[y == HIGH_CONF_EPOCH, ...].transpose(0, 2, 1) X_diff = X_high - X_low cluster_stats = spatio_temporal_cluster_1samp_test( X_diff, n_permutations=100, # threshold=threshold, # tail=1, n_jobs=1, # buffer_size=None, adjacency=adjacency, ) T_obs, clusters, p_values, _ = cluster_stats good_cluster_inds = np.where(p_values < p_accept)[0] colors = {"low": "crimson", "high": 'steelblue'} linestyles = {"low": '-', "high": '--'} # organize data for plotting evokeds = {"low": erf_low, "high": erf_high} #
# ------------------ # # .. note:: # X needs to be a multi-dimensional array of shape # samples (subjects) x time x space, so we permute dimensions: X = X.reshape((n_subjects, 1, n_src)) ############################################################################### # Now let's do some clustering using the standard method. # # .. note:: # Not specifying a connectivity matrix implies grid-like connectivity, # which we want here: T_obs, clusters, p_values, H0 = \ spatio_temporal_cluster_1samp_test(X, n_jobs=1, threshold=threshold, connectivity=connectivity, tail=1, n_permutations=n_permutations) # Let's put the cluster data in a readable format ps = np.zeros(width * width) for cl, p in zip(clusters, p_values): ps[cl[1]] = -np.log10(p) ps = ps.reshape((width, width)) T_obs = T_obs.reshape((width, width)) # To do a Bonferroni correction on these data is simple: p = stats.distributions.t.sf(T_obs, n_subjects - 1) p_bon = -np.log10(bonferroni_correction(p)[1]) # Now let's do some clustering using the standard method with "hat": stat_fun = partial(ttest_1samp_no_p, sigma=sigma)
op.join(subjects_dir, 'fsaverage', 'bem', 'fsaverage-5-src.fif')) connectivity = spatial_src_connectivity(fsaverage_src) # something like 0.01 is a more typical value here (or use TFCE!), but # for speed here we'll use 0.001 (fewer clusters to handle) p_threshold = 0.001 t_threshold = -stats.distributions.t.ppf(p_threshold / 2., len(X) - 1) ############################################################################### # Here we could do an exact test with ``n_permutations=2**(len(X)-1)``, # i.e. 32768 permutations, but this would take a long time. For speed and # simplicity we'll do 1024. stat_fun = partial(ttest_1samp_no_p, sigma=1e-3) T_obs, clusters, cluster_p_values, H0 = clu = \ spatio_temporal_cluster_1samp_test( X, connectivity=connectivity, n_jobs=N_JOBS, threshold=t_threshold, stat_fun=stat_fun, buffer_size=None, seed=0, step_down_p=0.05, verbose=True) good_cluster_inds = np.where(cluster_p_values < 0.05)[0] for ind in good_cluster_inds: print('Found cluster with p=%g' % (cluster_p_values[ind], )) ############################################################################### # Visualize the results: stc_all_cluster_vis = summarize_clusters_stc(clu, tstep=tstep, vertices=fsaverage_vertices, subject='fsaverage') pos_lims = [0, 0.1, 100 if l_freq is None else 30] brain = stc_all_cluster_vis.plot(hemi='both',
# To use an algorithm optimized for spatio-temporal clustering, we # just pass the spatial connectivity matrix (instead of spatio-temporal) print('Computing connectivity.') connectivity = mne.spatial_src_connectivity(src) # Note that X needs to be a multi-dimensional array of shape # samples (subjects) x time x space, so we permute dimensions X = np.transpose(X, [2, 1, 0]) # Now let's actually do the clustering. This can take a long time... # Here we set the threshold quite high to reduce computation. p_threshold = 0.001 t_threshold = -stats.distributions.t.ppf(p_threshold / 2., n_subjects - 1) print('Clustering.') T_obs, clusters, cluster_p_values, H0 = clu = \ spatio_temporal_cluster_1samp_test(X, connectivity=connectivity, n_jobs=1, threshold=t_threshold, buffer_size=None) # Now select the clusters that are sig. at p < 0.05 (note that this value # is multiple-comparisons corrected). good_cluster_inds = np.where(cluster_p_values < 0.05)[0] ############################################################################### # Visualize the clusters # ---------------------- print('Visualizing clusters.') # Now let's build a convenient representation of each cluster, where each # cluster becomes a "time point" in the SourceEstimate stc_all_cluster_vis = summarize_clusters_stc(clu, tstep=tstep, vertices=fsave_vertices, subject='fsaverage')
print( 'Computing connectivity' ) # need to use connectivity instead of adjacency in old mne version (0.19) src = mne.read_source_spaces( op.join(fsMRI_dir, 'fsaverage', 'bem', 'fsaverage-ico-5-src.fif')) connectivity = mne.spatial_src_connectivity(src) fsaverage_vertices = [s['vertno'] for s in src] # Clustering print('Clustering') # p_threshold = 0.05 # t_threshold = -stats.distributions.t.ppf(p_threshold / 2., n_subjects - 1) T_obs, clusters, cluster_p_values, H0 = clu = spatio_temporal_cluster_1samp_test( X, connectivity=connectivity, n_jobs=6, n_permutations=500, threshold=None, buffer_size=None, verbose=True) # with connectivity instead of adjacency # Save or plot clusters (if any significant one) good_cluster_inds = np.where(cluster_p_values < 0.05)[0] if len(good_cluster_inds) > 0: print(analysis_name + ': ' + str(len(good_cluster_inds)) + ' good clusters') print('Saving clusters to ' + op.join(results_path, analysis_name + '_sources_clusters.pickle')) with open( op.join(results_path, analysis_name + '_sources_clusters.pickle'),
############################################################################### # Compute statistic # To use an algorithm optimized for spatio-temporal clustering, we # just pass the spatial connectivity matrix (instead of spatio-temporal) print 'Computing connectivity.' connectivity = spatial_tris_connectivity(grade_to_tris(5)) # Note that X needs to be a multi-dimensional array of shape # samples (subjects) x time x space, so we permute dimensions X = np.transpose(X, [2, 1, 0]) # Now let's actually do the clustering. This can take a long time... t_threshold = -stats.distributions.t.ppf(p_threshold / 2., subjectCount - 1) print 'Clustering with a threshold of t = {}'.format(t_threshold) clu = spatio_temporal_cluster_1samp_test(X, connectivity=connectivity, n_jobs=multitasking, threshold=t_threshold) T_obs, clusters, cluster_p_values, H0 = clu # Save this data for later reference tFile = open(tFilename, 'w') pickle.dump(T_obs, tFile) tFile.close() clusterFile = open(clusterFilename, 'w') pickle.dump(clusters, clusterFile) clusterFile.close() pFile = open(pFilename, 'w') pickle.dump(cluster_p_values, pFile) pFile.close()
# cue label _ = ax.annotate('cue', xy=(stim_times[1], stim_ymax + thk), xytext=(0, 1.5), textcoords='offset points', fontsize=9, fontstyle='italic', ha='center', va='bottom', color=cue) # stats if plot_signif: thresh = -1 * distributions.t.ppf(0.05 / 2, len(contr_diff) - 1) result = spatio_temporal_cluster_1samp_test(contr_diff, threshold=thresh, stat_fun=stat_fun, n_jobs=6, buffer_size=None, n_permutations=np.inf) tvals, clusters, cluster_pvals, H0 = result signif = np.where(np.array([p <= 0.05 for p in cluster_pvals]))[0] signif_clusters = [clusters[s] for s in signif] signif_cluster_pvals = cluster_pvals[signif] # plot stats for clu, pv in zip(signif_clusters, signif_cluster_pvals): ''' # this index tells direction of tval, hence could be used to # decide which color to draw the significant cluster region # based on which curve is higher: idx = (np.sign(tvals[clu[0][0], 0]).astype(int) + 1) // 2 ''' clu = clu[0]
def ttest(self, threshold_dict=None): """ Calculate one sample t-test across each voxel (two-sided) Args: threshold_dict: a dictionary of threshold parameters {'unc':.001} or {'fdr':.05} or {'permutation':tcfe,n_permutation:5000} Returns: out: dictionary of regression statistics in Brain_Data instances {'t','p'} """ t = deepcopy(self) p = deepcopy(self) if threshold_dict is not None: if 'permutation' in threshold_dict: # Convert data to correct shape (subjects, time, space) data_convert_shape = deepcopy(self.data) data_convert_shape = np.expand_dims(data_convert_shape, axis=1) if 'n_permutations' in threshold_dict: n_permutations = threshold_dict['n_permutations'] else: n_permutations = 1000 warnings.warn( 'n_permutations not set: running with 1000 permutations' ) if 'connectivity' in threshold_dict: connectivity = threshold_dict['connectivity'] else: connectivity = None if 'n_jobs' in threshold_dict: n_jobs = threshold_dict['n_jobs'] else: n_jobs = 1 if threshold_dict['permutation'] is 'tfce': perm_threshold = dict(start=0, step=0.2) else: perm_threshold = None if 'stat_fun' in threshold_dict: stat_fun = threshold_dict['stat_fun'] else: stat_fun = ttest_1samp_no_p t.data, clusters, p_values, h0 = spatio_temporal_cluster_1samp_test( data_convert_shape, tail=0, threshold=perm_threshold, stat_fun=stat_fun, connectivity=connectivity, n_permutations=n_permutations, n_jobs=n_jobs) t.data = t.data.squeeze() p = deepcopy(t) for cl, pval in zip(clusters, p_values): p.data[cl[1][0]] = pval else: t.data, p.data = ttest_1samp(self.data, 0, 0) else: t.data, p.data = ttest_1samp(self.data, 0, 0) if threshold_dict is not None: if type(threshold_dict) is dict: if 'unc' in threshold_dict: thr = threshold_dict['unc'] elif 'fdr' in threshold_dict: thr = fdr(p.data, q=threshold_dict['fdr']) elif 'permutation' in threshold_dict: thr = .05 thr_t = threshold(t, p, thr) out = {'t': t, 'p': p, 'thr_t': thr_t} else: raise ValueError( "threshold_dict is not a dictionary. Make sure it is in the form of {'unc':.001} or {'fdr':.05}" ) else: out = {'t': t, 'p': p} return out
def extract_roi(stc, src, label=None, thresh=0.5): """Extract a functional ROI. Parameters ---------- stc : instance of SourceEstimate The source estimate data. The maximum positive peak will be selected. If you want the maximum negative peak, consider passing abs(stc) or -stc. src : instance of SourceSpaces The associated source space. label : instance of Label | None The label within which to select the peak. Can be None to use the entire STC. thresh : float Threshold value (relative to the peak value) above which vertices will be taken. Returns ------- roi : instance of Label The functional ROI. """ assert isinstance(stc, SourceEstimate) if label is None: stc_label = stc.copy() else: stc_label = stc.in_label(label) del label max_vidx, max_tidx = np.unravel_index(np.argmax(stc_label.data), stc_label.data.shape) max_val = stc_label.data[max_vidx, max_tidx] if max_vidx < len(stc_label.vertices[0]): hemi = 'lh' max_vert = stc_label.vertices[0][max_vidx] max_vidx = list(stc.vertices[0]).index(max_vert) else: hemi = 'rh' max_vert = stc_label.vertices[1][max_vidx - len(stc_label.vertices[0])] max_vidx = list(stc.vertices[1]).index(max_vert) max_vidx += len(stc.vertices[0]) del stc_label assert max_val == stc.data[max_vidx, max_tidx] # Get contiguous vertices within 50% threshold = max_val * thresh connectivity = spatial_src_adjacency(src, verbose='error') # holes _, clusters, _, _ = spatio_temporal_cluster_1samp_test( np.array([stc.data]), threshold, n_permutations=1, stat_fun=lambda x: x.mean(0), tail=1, connectivity=connectivity) for cluster in clusters: if max_vidx in cluster[0] and max_tidx in cluster[1]: break # found our cluster else: # in case we did not "break" raise RuntimeError('Clustering failed somehow!') if hemi == 'lh': verts = stc.vertices[0][cluster] else: verts = stc.vertices[1][cluster - len(stc.vertices[0])] func_label = Label(verts, hemi=hemi, subject=stc.subject) func_label = func_label.fill(src) return func_label, max_vert, max_vidx, max_tidx
for ci in range(X.shape[2]): X[si, :, ci] = np.convolve(X[si, :, ci], gaussian, 'same') ############################################################################### # Do some statistics # Note that X needs to be a multi-dimensional array of shape # samples (subjects) x time x space, so we permute dimensions X = X.reshape((n_subjects, 1, n_src)) # Now let's do some clustering using the standard method. Note that not # specifying a connectivity matrix implies grid-like connectivity, which # we want here: T_obs, clusters, p_values, H0 = \ spatio_temporal_cluster_1samp_test(X, n_jobs=2, threshold=threshold, connectivity=connectivity, tail=1, n_permutations=n_permutations) # Let's put the cluster data in a readable format ps = np.zeros(width * width) for cl, p in zip(clusters, p_values): ps[cl[1]] = -np.log10(p) ps = ps.reshape((width, width)) T_obs = T_obs.reshape((width, width)) # To do a Bonferroni correction on these data is simple: p = stats.distributions.t.sf(T_obs, n_subjects - 1) p_bon = -np.log10(bonferroni_correction(p)[1]) # Now let's do some clustering using the standard method with "hat": stat_fun = partial(ttest_1samp_no_p, sigma=sigma)
coords_as_verts=True, hemi='rh', color='blue', scale_factor=0.6, alpha=0.5) plt.figure() plt.plot(1e3 * stc.times, stc.data[::100, :].T) plt.xlabel('time (ms)') plt.ylabel('sLORETA value') plt.show() #%% SPATIOTEMPORAL CLUSTERING. !!!!!!!!!!!!!!!!!!!!TOOK TOO MUCH MEMORY. WON'T RUN IT ANYMORE!!!!!!!!!!!! print(stc.data.shape) #run def above with 'evoked_LSF' print(stc2.data.shape ) #run def above with 'evoked_HSF' and saved it for once in stc2 n_vertices_sample, n_times = stc2.data.shape n_subjects = 3 np.random.seed(0) X = randn(n_vertices_sample, n_times, n_subjects, 2) * 10 X[:, :, :, 0] += stc2.data[:, :, np.newaxis] X[:, :, :, 1] += stc.data[:, :, np.newaxis] X = np.abs(X) # only magnitude X = X[:, :, :, 0] - X[:, :, :, 1] # make paired contrast X = np.transpose(X, [2, 1, 0]) print(X) print('Clustering.') T_obs, clusters, cluster_p_values, H0 = clu = \ spatio_temporal_cluster_1samp_test(X, n_jobs=1, threshold=0.05)
np.array(gat.scores_)[:, :, None]), axis=2) # STATS # ------ Parameters XXX to be transfered to config? alpha = 0.05 n_permutations = 2 ** 11 threshold = dict(start=.2, step=.2) X = scores.transpose((2, 0, 1)) - .5 # ------ Run stats T_obs_, clusters, p_values, _ = spatio_temporal_cluster_1samp_test( X, out_type='mask', n_permutations=n_permutations, connectivity=None, threshold=threshold, n_jobs=-1) # ------ combine clusters and retrieve min p_values for each feature p_values = np.min(np.logical_not(clusters) + [clusters[c] * p for c, p in enumerate(p_values)], axis=0) x, y = np.meshgrid(gat.train_times['times_'], gat.test_times_['times_'][0], copy=False, indexing='xy') # PLOT # ------ Plot GAT
for fname in subjects: with open(fname, 'rb') as f: [gat, score] = pickle.load(f) scores.append(score) scores = np.array(scores) - .5 gat_list = [gat] # STATS ####################################################################### from mne.stats import spatio_temporal_cluster_1samp_test start = np.where(gat_list[0].train_times_['times'] >= 0.)[0][0] # start = 0 X = scores[:, start::1, start::1] T_obs_, clusters, p_values_, _ = spatio_temporal_cluster_1samp_test( X, out_type='mask', n_permutations=128, threshold=dict(start=2, step=2.), n_jobs=4) p_values = p_values_.reshape(X.shape[1:]) h = p_values < .05 # PLOT ######################################################################## import matplotlib.pyplot as plt from sandbox.graphs.utils import plot_graph, annotate_graph, animate_graph times = 1e3 * gat_list[0].train_times_['times'][start:] mean_scores = np.mean(X, axis=0) # Summary figure