def get_colorinfo(r_name, clusters): Nsig = ((clusters.loc[r_name].end_t - clusters.loc[r_name].start_t) / 10 + 1).sum() # if sufficiently many significant effects if Nsig >= 12: # set non-significant effects to NaN src_df_masked = ss.load_src_df(basefile, r_name, clusters, use_basefile) else: # there are not sufficiently many significant effects after FDR, # so don't mask src_df_masked = ss.load_src_df(basefile, r_name, None, use_basefile) if show_measure not in src_df_masked.columns: ss.add_measure(src_df_masked, show_measure) if Nsig >= 12: # make colormap based on distribution of significant effects colorinfo = { 'fmin': src_df_masked[show_measure].abs().min(), 'fmid': src_df_masked[show_measure].abs().median(), 'fmax': src_df_masked[show_measure].abs().max(), 'transparent': True, 'colormap': 'auto' } else: # make colormap based on distribution of all effects colorinfo = { 'fmin': src_df_masked[show_measure].abs().quantile(0.95), 'fmid': src_df_masked[show_measure].abs().quantile(0.99), 'fmax': src_df_masked[show_measure].abs().quantile(0.999), 'transparent': True, 'colormap': 'auto' } return colorinfo
stes = stds / np.sqrt(betas.iloc[:, 0].count()) tvals, pvals = scipy.stats.ttest_1samp(betas, 0, axis=0) return pd.Series(np.r_[betas.mean().values, stds, stes, tvals, pvals, -np.log10(pvals)], index=pd.MultiIndex.from_product( [['mean', 'std', 'ste', 'tval', 'pval', 'mlog10p'], row.index.levels[2]], names=['measure', 'regressor']), name=row.name) sl = fl.apply(statfun, axis=1) ss.add_measure(sl, 'mlog10p_fdr') #%% define colors for plotting r_colors = { 'intercept': 'C0', 'dot_x_time': 'C1', 'dot_y_time': 'C2', 'percupt_x_time': 'C3', 'percupt_y_time': 'C4' } r_labels = { 'percupt_y_time': 'PU-y', 'percupt_x_time': 'PU-x', 'dot_x_time': 'evidence', 'dot_y_time': 'y-coord', 'intercept': 'intercept'
elif r_name == 'response': if response_aligned: x_times = [-30, 0, 30, 50] else: x_times = [780, 820, 890] # load the selected regressors and mask as desired src_df_masked = pd.concat( [ss.load_src_df(basefile, reg, mask, use_basefile) for reg in regressors], keys=regressors, names=['regressor', 'label', 'time']) times = src_df_masked.index.levels[2] if show_measure not in src_df_masked.columns: ss.add_measure(src_df_masked, show_measure) if basefile.startswith('source_sequential'): # flip sign of all non-nan values in accev for which there is a nan value # in dot_x 100 ms later - these are the effects that are only present in # accumulated evidence, but not in dot_x fliptimes = times[times <= times[-1] - 100] accevvals = src_df_masked.loc[('accev', slice(None), fliptimes), show_measure] dotxvals = src_df_masked.loc[('dot_x', slice(None), fliptimes + 100), show_measure] flip = np.ones(dotxvals.size) flip[dotxvals.isnull().values] = -1 print('number of flips = %d' % np.sum(flip < 0)) # additionally flip all non-nan values for which the sign of the effect differs # note that the meaning of accev is such that its sign is flipped with respect
ind = fl.index.get_level_values('label').map( lambda x: x.startswith(labels[-1])) evoked.append(fl[ind].groupby('time').mean()) evoked = pd.concat(evoked, keys=labels, names=['label', 'time']) evoked_sl = pd.DataFrame(evoked.mean(axis=1)) evoked_sl.columns = ['mean'] evoked_sl['ste'] = evoked.std(axis=1) / pd.np.sqrt(evoked.shape[1]) evoked_sl['top'] = evoked_sl['mean'] + 2 * evoked_sl.ste evoked_sl['bottom'] = evoked_sl['mean'] - 2 * evoked_sl.ste tvals, pvals = ttest_1samp(evoked, 0, axis=1) evoked_sl['tval'] = tvals evoked_sl['mlog10p'] = -pd.np.log10(pvals) ss.add_measure(evoked_sl, 'p_fdr') print('largest absolute average t-values:') print(evoked_sl.groupby('time').mean().abs().tval.sort_values().tail()) #%% plot time course fig, ax = plt.subplots() lr = dict(L='left', R='right') sigy = dict(L=-0.025, R=-0.027) cols = dict(L='C0', R='C1') for label in labels: sl = evoked_sl.loc[label] ax.plot(sl.index, sl['mean'], label=lr[label[0]], color=cols[label[0]])
# exclude time-outs, trial_time, intercept, response # trial normalisation of data, local normalisation of DM basefile = 'source_singledot_201808291410.h5' show_measure = 'abstval' fdr_alpha = 0.01 #%% determine statistically significant effects sl = pd.read_hdf(os.path.join( helpers.resultsdir, basefile), 'second_level').loc[0] winnames = sl.index.get_level_values('time').unique() ss.add_measure(sl, 'mlog10p_fdr') sig = sl[sl[('mlog10p_fdr', r_name)] > -np.log10(fdr_alpha)].xs( r_name, level='regressor', axis=1) srcdf = sl.xs(r_name, level='regressor', axis=1).copy() srcdf[srcdf['mlog10p_fdr'] < -np.log10(fdr_alpha)] = 0 if show_measure not in srcdf.columns: ss.add_measure(srcdf, show_measure) #%% define some plotting functions def get_colorinfo(srcdf, measure): srcdf = srcdf[(srcdf[measure] != 0) & srcdf[measure].notna()] return {'fmin': srcdf[measure].abs().min(),
make_figures = True # vertices of pre-motor and motor areas, baseline (-0.3, 0), first 5 dots, # trialregs_dot=0, source GLM, sum_dot_y, constregs=0 for 1st dot, # subject-specific normalisation of DM without centering and scaling by std # label_tc normalised across trials, times and subjects basefile = 'source_sequential_201711271306.h5' src_df = pd.concat([ ss.load_src_df(basefile, r_name, use_basefile=True) for r_name in r_names ], keys=r_names, names=['regressor', 'label', 'time']) # this performs FDR-correction across all regressors, vertices and times ss.add_measure(src_df, 'p_fdr') #%% prepare plotting def get_colorinfo(measure, src_df, fdr_alpha=0.01): # find measure value that is the first one with p-value equal or smaller # than fdr_alpha pdiff = src_df.p_fdr - fdr_alpha try: fmid = src_df[pdiff <= 0].sort_values('p_fdr')[measure].abs().iloc[-1] except IndexError: print('No FDR-corrected significant effects!') if measure == 'tval': fmin = src_df[measure].abs().min() fmax = src_df[measure].abs().max() colorinfo = {
for r_name in r_names ], r_names) sl = second_level.loc[(labels, timeslice), ([measure, 'mlog10p'], r_names)].stack('regressor') # get rid of the data that I don't want to show and shouldn't influence # multiple comparison correction for r_name, label in labels.iteritems(): other = set(r_names).difference(set([r_name])).pop() sl.loc[(label, slice(None), other), measure] = np.nan sl.dropna(inplace=True) ss.add_measure(sl, 'p_fdr') #sl['significant'] = sl.p_fdr < 0.01 sl['significant'] = sl.mlog10p > -np.log10(0.01) #%% plot example time courses for the brain area which has the largest overall # average effect fig, axes = plt.subplots(1, 2, sharex=True, sharey=True, figsize=[7.5, 3]) rlabels = dict(dot_x='evidence', dot_y='y-coordinate') for r_name, ax in zip(r_names, axes): label = labels[r_name] l, l1 = plot_single_source_signal(r_name, label, ax, t_slice=timeslice)
#%% save all significant r_name clusters to csv-file def to_csv(areas, fname): areas['label'] = areas['label'].map(lambda x: x[:-7]) areas.to_csv(fname) to_csv( clusters.loc[r_name].copy().sort_values('start_t')[[ 'label', 'region', 'start_t', 'end_t', 'log10p' ]], os.path.join(figdir, 'significant_clusters_{}.csv'.format(r_name))) #%% src_df = ss.load_src_df(basefile, r_name, None, use_basefile) if show_measure not in src_df.columns: ss.add_measure(src_df, show_measure) labels = src_df.index.levels[0] def get_average_effects(twin, wname, top=5): winclusters = clusters.loc[r_name] winclusters = winclusters[((winclusters.start_t >= twin[0]) & (winclusters.start_t <= twin[1])) | ((winclusters.end_t >= twin[0]) & (winclusters.end_t <= twin[1]))] areas = winclusters.label.unique() time = int(np.mean(twin)) avsrcdf = pd.DataFrame(np.zeros((labels.size, src_df.shape[1])),