예제 #1
0
cols = [
    'model',
    'pval',
    'score_mean',
    'score_std',
    'train',
    'window',
]
df_post = {name: [] for name in cols}
for (train, model_name, window), df_sub in df_plot.groupby(
    ['experiment_train', 'model', 'window']):
    df_sub
    ps = utils.resample_ttest(df_sub['score'].values,
                              0.5,
                              n_ps=100,
                              n_permutation=int(1e4),
                              one_tail=True)
    df_post['model'].append(model_name)
    df_post['window'].append(window)
    df_post['train'].append(train)
    df_post['score_mean'].append(df_sub['score'].values.mean())
    df_post['score_std'].append(df_sub['score'].values.std())
    df_post['pval'].append(ps.mean())
df_post = pd.DataFrame(df_post)
temp = []
for (train, model_name), df_sub in df_post.groupby(['train', 'model']):
    df_sub = df_sub.sort_values(['pval'])
    ps = df_sub['pval'].values
    converter = utils.MCPConverter(pvals=ps)
    d = converter.adjust_many()
                                                           feature_names,
                                                           target_name,
                                                           results,
                                                           participant,
                                                           experiment,
                                                           dot_dir,
                                                           window=n_back)
c = pd.DataFrame(results)  # tansform a dictionary object to a data frame
c.to_csv('../results/Pos_6_features.csv', index=False)  # save as a csv
c = pd.read_csv('../results/Pos_6_features.csv')
# now it is the nonparametric t test with random resampling
ttest = dict(model=[], window=[], ps_mean=[], ps_std=[])
for (model, window), c_sub in c.groupby(['model', 'window']):
    ps = resample_ttest(
        c_sub['score'].values,  # numpy-array
        baseline=0.5,  # the value we want to compare against to
        n_ps=500,  # estimate the p value 500 times
        n_permutation=int(5e4)  # use 50000 resamplings to estimate 1 p value
    )
    ttest['model'].append(model)
    ttest['window'].append(window)
    ttest['ps_mean'].append(np.mean(ps))
    ttest['ps_std'].append(np.std(ps))
    print('{} window {} {:.3f}'.format(model, window, np.mean(ps)))
d = pd.DataFrame(ttest)  # transform a dictionary object to data frame
# now it is the p value correction for multiple comparison
# note that the correction is done within each model along the number of windows
# , and we have 3 models
temp = []
for model, d_ in d.groupby('model'):  # for each model
    idx_sort = np.argsort(d_['ps_mean'].values)
    for name in d_.columns:
                  value_vars = ['mean_variance','best_variance'],
                  var_name = 'Variance Explained',
                  value_name = 'Differences of Variance Explained')

df_stat = dict(
        condition = [],
        roi = [],
        model = [],
        diff_mean = [],
        diff_std = [],
        ps_mean = [],
        ps_std = [],)
col = 'mean_variance'
for (condition,roi,model),df_sub in df_difference.groupby(['condition','roi','Model']):
    df_sub
    ps = utils.resample_ttest(df_sub[col].values,baseline = 0,
                              n_ps = 100, n_permutation = int(5e4))
    df_stat['condition'].append(condition)
    df_stat['roi'].append(roi)
    df_stat['model'].append(model)
    df_stat['diff_mean'].append(df_sub[col].values.mean())
    df_stat['diff_std'].append(df_sub[col].values.std())
    df_stat['ps_mean'].append(ps.mean())
    df_stat['ps_std'].append(ps.std())
df_stat = pd.DataFrame(df_stat)

temp = []
for (condition),df_sub in df_stat.groupby(['condition']):
    df_sub = df_sub.sort_values(['ps_mean'])
    converter = utils.MCPConverter(pvals = df_sub['ps_mean'].values)
    d = converter.adjust_many()
    df_sub['ps_corrected'] = d['bonferroni'].values
    att.to_csv('../results/ATT_6_features.csv', index=False)

    df = pos.copy()
    #########################  compared against chance level ###############
    df = df[(0 < df['window']) & (df['window'] < 5)]
    results = dict(
        model=[],
        window=[],
        ps_mean=[],
        ps_std=[],
    )
    for (model, window), df_sub in df.groupby(['model', 'window']):
        df_sub = df_sub.sort_values('sub')
        ps = resample_ttest(df_sub['score'].values,
                            0.5,
                            one_tail=False,
                            n_ps=1000,
                            n_permutation=10000)
        results['model'].append(model)
        results['window'].append(window)
        results['ps_mean'].append(ps.mean())
        results['ps_std'].append(ps.std())
    results = pd.DataFrame(results)
    temp = []
    for model, df_sub in results.groupby('model'):
        idx_sort = np.argsort(df_sub['ps_mean'])
        for name in results.columns:
            df_sub[name] = df_sub[name].values[idx_sort]
        convert = MCPConverter(pvals=df_sub['ps_mean'].values)
        df_pvals = convert.adjust_many()
        df_sub['ps_corrected'] = df_pvals['bonferroni'].values
예제 #5
0
    window=[],
    attribute=[],
    ps_mean=[],
    ps_std=[],
    value_mean=[],
    value_std=[],
    baseline=[],
)
for (model_name, window, attribute), df_sub in df_pos_plot.groupby(
    ['model_name', 'window', 'attr']):
    print(model_name, window, attribute, df_sub['value'].values.mean())
    if model_name == 'LogisticRegression':
        baseline = 1
        ps = resample_ttest(df_sub['value'].values,
                            baseline=baseline,
                            n_ps=100,
                            n_permutation=int(1e6),
                            one_tail=False)
    elif model_name == 'RandomForest':
        baseline = 0
        ps = resample_ttest(df_sub['value'].values,
                            baseline=baseline,
                            n_ps=100,
                            n_permutation=int(1e6),
                            one_tail=False)
    ttest_results['model_name'].append(model_name)
    ttest_results['window'].append(window)
    ttest_results['attribute'].append(attribute)
    ttest_results['ps_mean'].append(ps.mean())
    ttest_results['ps_std'].append(ps.std())
    ttest_results['value_mean'].append(df_sub['value'].values.mean())
     window=[],
     attribute=[],
     ps_mean=[],
     ps_std=[],
     value_mean=[],
     value_std=[],
     baseline=[],
 )
 for (model_name, attribute,
      window), df_sub in c_test.groupby(['Models', 'Attributes', 'Window']):
     if model_name == "RandomForestClassifier":
         baseline = 1 / 3.
         ps = resample_ttest(
             df_sub['Values'].values,
             baseline=baseline,
             n_ps=n_ps,
             n_permutation=n_permutation,
             one_tail=True,
         )
     elif model_name == "LogisticRegression":
         baseline = 1
         ps = resample_ttest(
             df_sub['Values'].values,
             baseline=baseline,
             n_ps=n_ps,
             n_permutation=n_permutation,
             one_tail=True,
         )
     ttest_results['model_name'].append(model_name)
     ttest_results['window'].append(window)
     ttest_results['attribute'].append(attribute)