def find_clusters(subject, contrast_name, t_val, atlas, volume_name, input_fol='', load_from_annotation=True, n_jobs=1): if input_fol == '': input_fol = op.join(BLENDER_ROOT_DIR, subject, 'fmri') contrast, connectivity, verts = init_clusters(subject, volume_name, input_fol) clusters_labels = dict(threshold=t_val, values=[]) for hemi in utils.HEMIS: clusters, _ = mne_clusters._find_clusters( contrast[hemi], t_val, connectivity=connectivity[hemi]) # blobs_output_fname = op.join(input_fol, 'blobs_{}_{}.npy'.format(contrast_name, hemi)) # print('Saving blobs: {}'.format(blobs_output_fname)) # save_clusters_for_blender(clusters, contrast[hemi], blobs_output_fname) clusters_labels_hemi = find_clusters_overlapped_labeles( subject, clusters, contrast[hemi], atlas, hemi, verts[hemi], load_from_annotation, n_jobs) if clusters_labels_hemi is None: print("Can't find clusters in {}!".format(hemi)) else: clusters_labels['values'].extend(clusters_labels_hemi) # todo: should be pkl, not npy clusters_labels_output_fname = op.join( BLENDER_ROOT_DIR, subject, 'fmri', 'clusters_labels_{}.pkl'.format(volume_name)) print('Saving clusters labels: {}'.format(clusters_labels_output_fname)) utils.save(clusters_labels, clusters_labels_output_fname)
def find_clusters_tval_hist(subject, contrast_name, output_fol, input_fol='', n_jobs=1): contrast, connectivity, _ = init_clusters(subject, contrast_name, input_fol) clusters = {} tval_values = np.arange(2, 20, 0.1) now = time.time() for ind, tval in enumerate(tval_values): try: # utils.time_to_go(now, ind, len(tval_values), 5) clusters[tval] = {} for hemi in utils.HEMIS: clusters[tval][hemi], _ = mne_clusters._find_clusters( contrast[hemi], tval, connectivity=connectivity[hemi]) print('tval: {:.2f}, len rh: {}, lh: {}'.format(tval, max(map(len, clusters[tval]['rh'])), max(map(len, clusters[tval]['rh'])))) except: print('error with tval {}'.format(tval)) utils.save(clusters, op.join(output_fol, 'clusters_tval_hist.pkl'))
def _find_clusters_mne(data, threshold, adjacency, argname, min_adj_ch=0, full=True): from mne.stats.cluster_level import ( _find_clusters, _cluster_indices_to_mask) orig_data_shape = data.shape kwargs = {argname: adjacency} data = (data.ravel() if adjacency is not None else data) clusters, cluster_stats = _find_clusters( data, threshold=threshold, tail=0, **kwargs) if full: if adjacency is not None: clusters = _cluster_indices_to_mask(clusters, np.prod(data.shape)) clusters = [clst.reshape(orig_data_shape) for clst in clusters] return clusters, cluster_stats
def find_clusters(subject, contrast_name, t_val, atlas, volume_name, input_fol='', load_from_annotation=True, n_jobs=1): if input_fol == '': input_fol = op.join(BLENDER_ROOT_DIR, subject, 'fmri') contrast, connectivity, verts = init_clusters(subject, volume_name, input_fol) clusters_labels = dict(threshold=t_val, values=[]) for hemi in utils.HEMIS: clusters, _ = mne_clusters._find_clusters(contrast[hemi], t_val, connectivity=connectivity[hemi]) # blobs_output_fname = op.join(input_fol, 'blobs_{}_{}.npy'.format(contrast_name, hemi)) # print('Saving blobs: {}'.format(blobs_output_fname)) # save_clusters_for_blender(clusters, contrast[hemi], blobs_output_fname) clusters_labels_hemi = find_clusters_overlapped_labeles( subject, clusters, contrast[hemi], atlas, hemi, verts[hemi], load_from_annotation, n_jobs) if clusters_labels_hemi is None: print("Can't find clusters in {}!".format(hemi)) else: clusters_labels['values'].extend(clusters_labels_hemi) # todo: should be pkl, not npy clusters_labels_output_fname = op.join( BLENDER_ROOT_DIR, subject, 'fmri', 'clusters_labels_{}.pkl'.format(volume_name)) print('Saving clusters labels: {}'.format(clusters_labels_output_fname)) utils.save(clusters_labels, clusters_labels_output_fname)
def find_clusters_tval_hist(subject, contrast_name, output_fol, input_fol='', n_jobs=1): contrast, connectivity, _ = init_clusters(subject, contrast_name, input_fol) clusters = {} tval_values = np.arange(2, 20, 0.1) now = time.time() for ind, tval in enumerate(tval_values): try: # utils.time_to_go(now, ind, len(tval_values), 5) clusters[tval] = {} for hemi in utils.HEMIS: clusters[tval][hemi], _ = mne_clusters._find_clusters( contrast[hemi], tval, connectivity=connectivity[hemi]) print('tval: {:.2f}, len rh: {}, lh: {}'.format( tval, max(map(len, clusters[tval]['rh'])), max(map(len, clusters[tval]['rh'])))) except: print('error with tval {}'.format(tval)) utils.save(clusters, op.join(output_fol, 'clusters_tval_hist.pkl'))
f1=band_info[band]["freqs"][-1])) for epo_idx in range(data_temp.shape[0]): sel_inds = (dm["Block"]==cond) & (dm["Wav"]==wav) & (dm["Subj"]==sub) dm_new = dm_new.append(dm[sel_inds]) data.append(data_temp[epo_idx,]) group_id.append(sub_idx) idx_border += 1 idx_borders[-1].append(idx_border) data = np.array(data)[...,0] group_id = np.array(group_id) col_idx = dm_new.columns.get_loc(indep_var) formula = "Brain ~ {} + Block + Wav".format(indep_var) tvals, coeffs = mass_uv_mixedlmm(formula, dm_new, data, group_id, exclude=exclude) # find clusters clusters, cluster_stats = _find_clusters(tvals,threshold=threshold,connectivity=connectivity,include=include) main_result = {"formula":formula, "tvals":tvals, "coeffs":coeffs, "cluster_stats":cluster_stats} with open("{}dics_{}_{}_main_result".format(proc_dir, indep_var, band), "wb") as f: pickle.dump(main_result,f) # permute all_perm_cluster_stats = [] for i in range(perm_n): print("Permutation {} of {}".format(i, perm_n)) dm_perm = dm_new.copy() for idx_border in idx_borders: temp_slice = dm_perm[indep_var][idx_border[0]:idx_border[1]].copy() temp_slice = temp_slice.sample(frac=1) dm_perm.iloc[idx_border[0]:idx_border[1],col_idx] = temp_slice.values perm_tvals, _ = mass_uv_mixedlmm(formula, dm_perm, data, group_id, exclude=exclude)
# sensors=True) # ############################################################################### # 7) Plot results # set up channel adjacency matrix n_tests = betas.shape[1] adjacency, ch_names = find_ch_adjacency(epochs_info, ch_type='eeg') adjacency = _setup_adjacency(adjacency, n_tests, n_times) # threshold parameters for clustering threshold = dict(start=0.2, step=0.2) clusters, cluster_stats = _find_clusters(t_clust, t_power=1, threshold=threshold, adjacency=adjacency, tail=0) # get significant clusters cl_sig_mask = cluster_stats > cluster_thresh cl_sig_mask = np.transpose(cl_sig_mask.reshape((n_times, n_channels)), (1, 0)) cluster_stats = np.transpose(cluster_stats.reshape((n_times, n_channels)), (1, 0)) # create evoked object containing the resulting t-values cluster_map = dict() cluster_map['ST-clustering effect of cue (B-A)'] = EvokedArray( cluster_stats, epochs_info, tmin)
fr = [4, 30] frange = list(np.arange(fr[0], fr[1] + 1)) f_subs = [[0, 3], [7, 26]] ###### first get our clusters/masks stc_clu = mne.read_source_estimate("{}{}stc_clu_{}-{}Hz_{}_{}_A-lh.stc".format( proc_dir, stat_dir, fr[0], fr[1], cond_str, thresh_str)) stc_clu.subject = "fsaverage" with open( "{}{}clu_{}-{}Hz_{}_{}_A".format(proc_dir, stat_dir, fr[0], fr[1], cond_str, thresh_str), "rb") as f: f_obs, clusters, cluster_pv, H0 = pickle.load(f) f_thresh = np.quantile(H0, 0.95) # stc_clu = mne.stats.summarize_clusters_stc(clu,subject="fsaverage", # p_thresh=0.05, # vertices=stc_clu.vertices) meta_clusts = _find_clusters(stc_clu.data[:, 0], 1e-8, connectivity=cnx)[0] clust_labels = [] for mc in meta_clusts: temp_stc = stc_clu.copy() temp_stc.data[:] = np.zeros((temp_stc.data.shape[0], 1)) temp_stc.data[mc, 0] = 1 lab = [x for x in mne.stc_to_label(temp_stc, src=fs_src) if x][0] clust_labels.append(lab) X = np.zeros( (len(subjs), len(clust_labels), fr[1] - fr[0] + 1, len(conds), len(wavs))) for sub_idx, sub in enumerate(subjs): src = mne.read_source_spaces("{}{}_{}-src.fif".format( proc_dir, sub, spacing)) vertnos = [s["vertno"] for s in src] morph = mne.compute_source_morph(src,
# calculate Pearson's r for each vertex to Behavioral variable of the subject X_Rval = np.empty(X_diff.shape[1]) X_R_Tval = np.empty(X_diff.shape[1]) for vert_idx in range(X_diff.shape[1]): X_Rval[vert_idx], p = stats.pearsonr(X_diff[:,vert_idx],Behav) # calculate an according t-value for each r X_R_Tval = (X_Rval * np.sqrt((len(sub_dict)-2))) / np.sqrt(1 - X_Rval**2) # setup for clustering -- t-thresholds n_subjects=len(sub_dict) p_threshold = 0.05 t_threshold = -stats.distributions.t.ppf(p_threshold / 2., n_subjects - 1) src = mne.read_source_spaces("{}fsaverage-src.fif".format(mri_dir)) connectivity = mne.spatial_src_connectivity(src) # find clusters in the T-vals clusters, cluster_stats = _find_clusters(X_R_Tval,threshold=t_threshold, connectivity=connectivity, tail=0) # plot uncorrected correlation t-values on fsaverage X_R_Tval = np.expand_dims(X_R_Tval, axis=1) SOMA_all_stc_diff.data = X_R_Tval SOMA_all_stc_diff.plot(subjects_dir=mri_dir,subject='fsaverage',surface='white',hemi='both',time_viewer=True,colormap='coolwarm',clim={'kind':'value','pos_lims':(0,1.5,3)}) # do the random sign flip permutation # setup n_perms = 500 cluster_H0 = np.zeros(n_perms) # here comes the loop for i in range(n_perms): if i in [10,20,50,100,200,300,400]: print("{} th iteration".format(i)) # permute the behavioral values over subjects
resampled_betas[subj_ind, :] = resampled_betas[subj_ind, :] - \ betas.mean(axis=0) # compute t-values for bootstrap sample t_val = resampled_betas.mean(axis=0) / se # transform to f-values f_vals = t_val**2 # transpose for clustering f_vals = f_vals.reshape((n_channels, n_times)) f_vals = np.transpose(f_vals, (1, 0)) f_vals = f_vals.ravel() # compute clustering on squared t-values (i.e., f-values) clusters, cluster_stats = _find_clusters(f_vals, threshold=threshold, connectivity=connectivity, tail=1) # save max cluster mass. Combined, the max cluster mass values from # computed on the basis of the bootstrap samples provide an approximation # of the cluster mass distribution under H0 if len(clusters): cluster_H0[i] = cluster_stats.max() else: cluster_H0[i] = np.nan # save max f-value f_H0[i] = f_vals.max() ############################################################################### # estimate t-test based on original phase coherence betas
def cluster_1d(data, connectivity=None): from mne.stats.cluster_level import _find_clusters if connectivity is not None: connectivity = sparse.coo_matrix(connectivity) return _find_clusters(data, 0.5, connectivity=connectivity)
def cluster_based_regression(data, preds, conn, n_permutations=1000, progressbar=True): # data has to have observations as 1st dim and channels/vert as last dim from mypy.stats import compute_regression_t from mne.stats.cluster_level import (_setup_connectivity, _find_clusters, _cluster_indices_to_mask) # TODO - move this piece of code to utils # maybe a simple ProgressBar class? # - then support tqdm pbar as input if progressbar: if not progressbar == 'text': from tqdm import tqdm_notebook pbar = tqdm_notebook(total=n_permutations) else: from tqdm import tqdm pbar = tqdm(total=n_permutations) n_obs, n_times, n_channels = data.shape connectivity = _setup_connectivity(conn, n_channels * n_times, n_times) pos_dist = np.zeros(n_permutations) neg_dist = np.zeros(n_permutations) perm_preds = preds.copy() # regression on non-permuted data t_values = compute_regression_t(data, preds) clusters, cluster_stats = _find_clusters(t_values.ravel(), threshold=2., tail=0, connectivity=connectivity) clusters = _cluster_indices_to_mask(clusters, n_channels * n_times) clusters = [clst.reshape((n_times, n_channels)) for clst in clusters] if not clusters: print('No clusters found, permutations are not performed.') return t_values, clusters, cluster_stats else: msg = 'Found {} clusters, computing permutations.' print(msg.format(len(clusters))) # compute permutations for perm in range(n_permutations): perm_inds = np.random.permutation(n_obs) this_perm = perm_preds[perm_inds] perm_tvals = compute_regression_t(data, this_perm) _, perm_cluster_stats = _find_clusters( perm_tvals.ravel(), threshold=2., tail=0, connectivity=connectivity) # if any clusters were found - add max statistic if perm_cluster_stats.shape[0] > 0: max_val = perm_cluster_stats.max() min_val = perm_cluster_stats.min() if max_val > 0: pos_dist[perm] = max_val if min_val < 0: neg_dist[perm] = min_val if progressbar: pbar.update(1) # compute permutation probability cluster_p = np.array([(pos_dist > cluster_stat).mean() if cluster_stat > 0 else (neg_dist < cluster_stat).mean() for cluster_stat in cluster_stats]) cluster_p *= 2 # because we use two-tail cluster_p[cluster_p > 1.] = 1. # probability has to be >= 1. # sort clusters by p value cluster_order = np.argsort(cluster_p) cluster_p = cluster_p[cluster_order] clusters = [clusters[i] for i in cluster_order] return t_values, clusters, cluster_p
def cluster_correction_mcp(x, x_p, th, tail=1, **kwargs): """Cluster-based correction for MCP using non-parametric statistics. This function can be used to compute the p-values, corrected for multiple comparisons using cluster-based. Note that this function detect clusters for each ROI but can be performed across multiple other dimensions (e.g frequencies, times etc.). Parameters ---------- x : array_like Array of true effect size of shape (n_roi, ...) x_p : array_like Array of permutations of shape (n_perm, n_roi, ...) th : float The threshold to use tail : {-1, 0, 1} Type of comparison. Use -1 for the lower part of the distribution, 1 for the higher part and 0 for both kwargs : dict | {} Additional arguments are send to the :func:`mne.stats.cluster_level._find_clusters` Returns ------- pvalues : array_like Array of p-values of shape (n_roi, n_times) """ # get variables n_perm, n_roi = x_p.shape[0], x.shape[0] assert tail in [-1, 0, 1] kwargs['tail'] = tail logger.info(f" Cluster detection (threshold={th}; tail={tail})") # ------------------------------------------------------------------------- # identify clusters for the true x cl_loc, cl_mass = [], [] for r in range(n_roi): _cl_loc, _cl_mass = _find_clusters(x[r, ...], th, **kwargs) # for non-tfce, clusters are returned as a list of tuples _cl_loc = [k[0] if isinstance(k, tuple) else k for k in _cl_loc] # update cluster mass according to the tail if tail == 0: np.abs(_cl_mass, out=_cl_mass) elif tail == -1: if not isinstance(th, dict): _cl_mass *= -1 # save where clusters have been found and cluster size cl_loc += [_cl_loc] cl_mass += [_cl_mass] # ------------------------------------------------------------------------- # identify clusters for the permuted x cl_p_mass = [] for r in range(n_roi): _cl_p_null = [] for p in range(n_perm): _, __cl_p_null = _find_clusters(x_p[p, r, ...], th, **kwargs) # if no cluster have been found, set a cluster mass of 0 if not len(__cl_p_null): __cl_p_null = [0] # noqa # Max / Min cluster size across time if tail == 1: # max cluster size across time _cl_p_null += [np.r_[tuple(__cl_p_null)].max()] elif tail == -1: # min cluster size across time if isinstance(th, dict): _cl_p_null += [np.r_[tuple(__cl_p_null)].max()] else: _cl_p_null += [np.r_[tuple(__cl_p_null)].min()] elif tail == 0: # max of absolute cluster size across time _cl_p_null += [np.abs(np.r_[tuple(__cl_p_null)]).max()] cl_p_mass += [_cl_p_null] # array conversion of shape (n_roi, n_perm) cl_p_mass = np.asarray(cl_p_mass) # for maximum statistics, repeat the max across ROI cl_p_mass = np.tile(cl_p_mass.max(axis=0, keepdims=True), (n_roi, 1)) pv = _clusters_to_pvalues(x.shape, n_perm, cl_loc, cl_mass, cl_p_mass) pv = np.clip(pv, 1. / n_perm, 1.) return pv
data.append(data_temp[epo_idx, ]) idx_border += 1 idx_borders[-1].append(idx_border) data = np.array(data) dm_new = dm_new.drop("Subj", axis=1) for wav in wavs: dm_new = dm_new.drop(wav, axis=1) Y = Vectorizer().fit_transform(data) linear_model = LinearRegression(fit_intercept=False) linear_model.fit(dm_new, Y) pred_col = predictor_vars.index('Angenehm') coefs = get_coef(linear_model, 'coef_') betas = coefs[:, pred_col] # find clusters clusters, cluster_stats = _find_clusters(betas, threshold=threshold, connectivity=connectivity, tail=1) for i in range(perm_n): resampled_data = data.copy() print("{} of {}".format(i, perm_n)) # shuffle data within subject/condition for border in idx_borders: resample_inds = random.choice(range(border[0], border[1]), replace=False) resampled_data[border[0]:border[1], ] = resampled_data[resample_inds, ] Y = Vectorizer().fit_transform(resampled_data) linear_model.fit(dm_new, Y) coefs = get_coef(linear_model, 'coef_') betas = coefs[:, pred_col] # compute clustering on squared t-values (i.e., f-values) re_clusters, re_cluster_stats = _find_clusters(betas,
def _permutation_cluster_test_AT(X, threshold, tail, n_permutations, connectivity, n_jobs, seed, max_step, exclude, step_down_p, t_power, out_type, check_disjoint, buffer_size): n_jobs = check_n_jobs(n_jobs) """Aux Function. Note. X is required to be a list. Depending on the length of X either a 1 sample t-test or an F test / more sample permutation scheme is elicited. """ if out_type not in ['mask', 'indices']: raise ValueError('out_type must be either \'mask\' or \'indices\'') if not isinstance(threshold, dict) and (tail < 0 and threshold > 0 or tail > 0 and threshold < 0 or tail == 0 and threshold < 0): raise ValueError( 'incompatible tail and threshold signs, got %s and %s' % (tail, threshold)) # check dimensions for each group in X (a list at this stage). X = [x[:, np.newaxis] if x.ndim == 1 else x for x in X] n_times = X[0].shape[0] sample_shape = X[0].shape[1:] for x in X: if x.shape[1:] != sample_shape: raise ValueError('All samples mush have the same size') # # flatten the last dimensions in case the data is high dimensional # X = [np.reshape(x, (x.shape[0], -1)) for x in X] n_tests = X[0].shape[1] if connectivity is not None and connectivity is not False: connectivity = cluster_level._setup_connectivity( connectivity, n_tests, n_times) if (exclude is not None) and not exclude.size == n_tests: raise ValueError('exclude must be the same shape as X[0]') # determine if connectivity itself can be separated into disjoint sets if check_disjoint is True and (connectivity is not None and connectivity is not False): partitions = cluster_level._get_partitions_from_connectivity( connectivity, n_times) else: partitions = None max_clu_lens = np.zeros(n_permutations) for i in range(0, n_permutations): #logger.info('Running initial clustering') include = None out = cluster_level._find_clusters(X[i][0], threshold, tail, connectivity, max_step=max_step, include=include, partitions=partitions, t_power=t_power, show_info=True) clusters, cluster_stats = out logger.info('Found %d clusters' % len(clusters)) # convert clusters to old format if connectivity is not None and connectivity is not False: # our algorithms output lists of indices by default if out_type == 'mask': clusters = cluster_level._cluster_indices_to_mask( clusters, n_tests) else: # ndimage outputs slices or boolean masks by default if out_type == 'indices': clusters = cluster_level._cluster_mask_to_indices(clusters) # The clusters should have the same shape as the samples clusters = cluster_level._reshape_clusters(clusters, sample_shape) max_clu_len = 0 for j in range(0, len(clusters)): max_new = len(clusters[j][0]) if max_new > max_clu_len: max_clu_len = max_new logger.info('Max cluster length %d' % max_clu_len) max_clu_lens[i] = max_clu_len return max_clu_lens, clusters