cols = [ 'model', 'pval', 'score_mean', 'score_std', 'train', 'window', ] df_post = {name: [] for name in cols} for (train, model_name, window), df_sub in df_plot.groupby( ['experiment_train', 'model', 'window']): df_sub ps = utils.resample_ttest(df_sub['score'].values, 0.5, n_ps=100, n_permutation=int(1e4), one_tail=True) df_post['model'].append(model_name) df_post['window'].append(window) df_post['train'].append(train) df_post['score_mean'].append(df_sub['score'].values.mean()) df_post['score_std'].append(df_sub['score'].values.std()) df_post['pval'].append(ps.mean()) df_post = pd.DataFrame(df_post) temp = [] for (train, model_name), df_sub in df_post.groupby(['train', 'model']): df_sub = df_sub.sort_values(['pval']) ps = df_sub['pval'].values converter = utils.MCPConverter(pvals=ps) d = converter.adjust_many()
feature_names, target_name, results, participant, experiment, dot_dir, window=n_back) c = pd.DataFrame(results) # tansform a dictionary object to a data frame c.to_csv('../results/Pos_6_features.csv', index=False) # save as a csv c = pd.read_csv('../results/Pos_6_features.csv') # now it is the nonparametric t test with random resampling ttest = dict(model=[], window=[], ps_mean=[], ps_std=[]) for (model, window), c_sub in c.groupby(['model', 'window']): ps = resample_ttest( c_sub['score'].values, # numpy-array baseline=0.5, # the value we want to compare against to n_ps=500, # estimate the p value 500 times n_permutation=int(5e4) # use 50000 resamplings to estimate 1 p value ) ttest['model'].append(model) ttest['window'].append(window) ttest['ps_mean'].append(np.mean(ps)) ttest['ps_std'].append(np.std(ps)) print('{} window {} {:.3f}'.format(model, window, np.mean(ps))) d = pd.DataFrame(ttest) # transform a dictionary object to data frame # now it is the p value correction for multiple comparison # note that the correction is done within each model along the number of windows # , and we have 3 models temp = [] for model, d_ in d.groupby('model'): # for each model idx_sort = np.argsort(d_['ps_mean'].values) for name in d_.columns:
value_vars = ['mean_variance','best_variance'], var_name = 'Variance Explained', value_name = 'Differences of Variance Explained') df_stat = dict( condition = [], roi = [], model = [], diff_mean = [], diff_std = [], ps_mean = [], ps_std = [],) col = 'mean_variance' for (condition,roi,model),df_sub in df_difference.groupby(['condition','roi','Model']): df_sub ps = utils.resample_ttest(df_sub[col].values,baseline = 0, n_ps = 100, n_permutation = int(5e4)) df_stat['condition'].append(condition) df_stat['roi'].append(roi) df_stat['model'].append(model) df_stat['diff_mean'].append(df_sub[col].values.mean()) df_stat['diff_std'].append(df_sub[col].values.std()) df_stat['ps_mean'].append(ps.mean()) df_stat['ps_std'].append(ps.std()) df_stat = pd.DataFrame(df_stat) temp = [] for (condition),df_sub in df_stat.groupby(['condition']): df_sub = df_sub.sort_values(['ps_mean']) converter = utils.MCPConverter(pvals = df_sub['ps_mean'].values) d = converter.adjust_many() df_sub['ps_corrected'] = d['bonferroni'].values
att.to_csv('../results/ATT_6_features.csv', index=False) df = pos.copy() ######################### compared against chance level ############### df = df[(0 < df['window']) & (df['window'] < 5)] results = dict( model=[], window=[], ps_mean=[], ps_std=[], ) for (model, window), df_sub in df.groupby(['model', 'window']): df_sub = df_sub.sort_values('sub') ps = resample_ttest(df_sub['score'].values, 0.5, one_tail=False, n_ps=1000, n_permutation=10000) results['model'].append(model) results['window'].append(window) results['ps_mean'].append(ps.mean()) results['ps_std'].append(ps.std()) results = pd.DataFrame(results) temp = [] for model, df_sub in results.groupby('model'): idx_sort = np.argsort(df_sub['ps_mean']) for name in results.columns: df_sub[name] = df_sub[name].values[idx_sort] convert = MCPConverter(pvals=df_sub['ps_mean'].values) df_pvals = convert.adjust_many() df_sub['ps_corrected'] = df_pvals['bonferroni'].values
window=[], attribute=[], ps_mean=[], ps_std=[], value_mean=[], value_std=[], baseline=[], ) for (model_name, window, attribute), df_sub in df_pos_plot.groupby( ['model_name', 'window', 'attr']): print(model_name, window, attribute, df_sub['value'].values.mean()) if model_name == 'LogisticRegression': baseline = 1 ps = resample_ttest(df_sub['value'].values, baseline=baseline, n_ps=100, n_permutation=int(1e6), one_tail=False) elif model_name == 'RandomForest': baseline = 0 ps = resample_ttest(df_sub['value'].values, baseline=baseline, n_ps=100, n_permutation=int(1e6), one_tail=False) ttest_results['model_name'].append(model_name) ttest_results['window'].append(window) ttest_results['attribute'].append(attribute) ttest_results['ps_mean'].append(ps.mean()) ttest_results['ps_std'].append(ps.std()) ttest_results['value_mean'].append(df_sub['value'].values.mean())
window=[], attribute=[], ps_mean=[], ps_std=[], value_mean=[], value_std=[], baseline=[], ) for (model_name, attribute, window), df_sub in c_test.groupby(['Models', 'Attributes', 'Window']): if model_name == "RandomForestClassifier": baseline = 1 / 3. ps = resample_ttest( df_sub['Values'].values, baseline=baseline, n_ps=n_ps, n_permutation=n_permutation, one_tail=True, ) elif model_name == "LogisticRegression": baseline = 1 ps = resample_ttest( df_sub['Values'].values, baseline=baseline, n_ps=n_ps, n_permutation=n_permutation, one_tail=True, ) ttest_results['model_name'].append(model_name) ttest_results['window'].append(window) ttest_results['attribute'].append(attribute)