Exemplo n.º 1
0
def mne_permutation_ttest(group, threshold, fc, tail=1):
    fc_group_1 = [fc[i] for i in dataset.group_indices[group]]
    tv, pv, H0 = permutation_t_test(np.array(fc_group_1), tail=tail, n_jobs=3)
    pv = -np.log10(pv)
    thresh_log = -np.log10(threshold)
    ind_threshold = np.where(pv < thresh_log)
    pv[ind_threshold] = 0
    return pv
Exemplo n.º 2
0
def mne_permutation_ttest(group, threshold, fc, tail=1):
    fc_group_1 = [fc[i] for i in dataset.group_indices[group]]
    tv, pv, H0 = permutation_t_test(np.array(fc_group_1), tail=tail, n_jobs=3)
    pv = -np.log10(pv)
    thresh_log = -np.log10(threshold)    
    ind_threshold = np.where(pv < thresh_log)
    pv[ind_threshold] = 0
    return pv
Exemplo n.º 3
0
def pair_perm_max_stat_t_test(cond1, cond2):
    """ Paired permutation t-test with maximum statistic correction for MC"""
    from mne.stats import permutation_t_test
    try:
        assert np.all(cond1.shape == cond2.shape)
    except AssertionError:
        raise ValueError('Different sizes for cond1 with shape {}and cond2 with shape {}'.format(cond1.shape, cond2.shape))
    except AttributeError:
        raise AttributeError('Both conditions should be of type numpy.ndarray')
    diff = cond1 - cond2
    T_obs, p_vals_corr, H0 = permutation_t_test(diff, n_permutations=100000)
    return p_vals_corr
include = []  # or stim channel ['STI 014']
raw.info['bads'] += ['MEG 2443', 'EEG 053']  # bads + 2 more

# pick MEG Gradiometers
picks = mne.pick_types(raw.info, meg='grad', eeg=False, stim=False, eog=True,
                       include=include, exclude='bads')
epochs = mne.Epochs(raw, events, event_id, tmin, tmax, picks=picks,
                    baseline=(None, 0), reject=dict(grad=4000e-13, eog=150e-6))
data = epochs.get_data()
times = epochs.times

temporal_mask = np.logical_and(0.04 <= times, times <= 0.06)
data = np.mean(data[:, :, temporal_mask], axis=2)

n_permutations = 50000
T0, p_values, H0 = permutation_t_test(data, n_permutations, n_jobs=2)

significant_sensors = picks[p_values <= 0.05]
significant_sensors_names = [raw.ch_names[k] for k in significant_sensors]

print("Number of significant sensors : %d" % len(significant_sensors))
print("Sensors names : %s" % significant_sensors_names)

###############################################################################
# View location of significantly active sensors

evoked = mne.EvokedArray(-np.log10(p_values)[:, np.newaxis],
                         epochs.info, tmin=0.)

# Extract mask and indices of active sensors in layout
stats_picks = mne.pick_channels(evoked.ch_names, significant_sensors_names)
include = []  # or stim channel ['STI 014']
raw.info['bads'] += ['MEG 2443', 'EEG 053']  # bads + 2 more

# pick MEG Gradiometers
picks = fiff.pick_types(raw.info, meg='grad', eeg=False, stim=False, eog=True,
                        include=include, exclude='bads')
epochs = mne.Epochs(raw, events, event_id, tmin, tmax, picks=picks,
                    baseline=(None, 0), reject=dict(grad=4000e-13, eog=150e-6))
data = epochs.get_data()
times = epochs.times

temporal_mask = np.logical_and(0.04 <= times, times <= 0.06)
data = np.squeeze(np.mean(data[:, :, temporal_mask], axis=2))

n_permutations = 50000
T0, p_values, H0 = permutation_t_test(data, n_permutations, n_jobs=2)

significant_sensors = picks[p_values <= 0.05]
significant_sensors_names = [raw.info['ch_names'][k]
                             for k in significant_sensors]

print "Number of significant sensors : %d" % len(significant_sensors)
print "Sensors names : %s" % significant_sensors_names

###############################################################################
# View location of significantly active sensors
import pylab as pl

# load sensor layout
from mne.layouts import read_layout
layout = read_layout('Vectorview-grad')
                                                           X,
                                                           y,
                                                           scoring="accuracy",
                                                           cv=3,
                                                           n_permutations=100,
                                                           n_jobs=1)

#scores = cross_val_multiscore(time_decod, X, y, cv=5, n_jobs=1)

scores = cross_val_multiscore(time_decod, X, y, cv=cv, n_jobs=1)
# Mean scores across cross-validation splits
scores = np.mean(scores, axis=0)

class_balance = np.mean(y == y[0])
class_balance = max(class_balance, 1. - class_balance)

# Plot
fig, ax = plt.subplots()
ax.plot(epochs.times, scores, label='score')
ax.axhline(class_balance, color='k', linestyle='--', label='chance')
ax.set_xlabel('Times')
ax.set_ylabel('AUC')  # Area Under the Curve
ax.legend()
ax.axvline(.0, color='k', linestyle='-')
ax.set_title('Sensor space decoding')
plt.show()

from mne.stats import permutation_t_test
n_permutations = 50000
T0, p_values, H0 = permutation_t_test(scores, n_permutations, n_jobs=1)
Exemplo n.º 7
0
# (which is :math:`2^{N_{samp}}-1` for a one-tailed and
# :math:`2^{N_{samp}-1}-1` for a two-tailed test, not counting the
# veridical distribution), instead of randomly exchanging conditions
# the null is formed from using all possible exchanges. This is known
# as a permutation test (or exact test).

# Here we have to do a bit of gymnastics to get our function to do
# a permutation test without correcting for multiple comparisons:

X.shape = (n_subjects, n_src)  # flatten the array for simplicity
titles.append('Permutation')
ts.append(np.zeros(width * width))
ps.append(np.zeros(width * width))
mccs.append(False)
for ii in range(n_src):
    ts[-1][ii], ps[-1][ii] = permutation_t_test(X[:, [ii]], verbose=False)[:2]
plot_t_p(ts[-1], ps[-1], titles[-1], mccs[-1])

###############################################################################
# Multiple comparisons
# --------------------
# So far, we have done no correction for multiple comparisons. This is
# potentially problematic for these data because there are
# :math:`40 \cdot 40 = 1600` tests being performed. If we use a threshold
# p < 0.05 for each individual test, we would expect many voxels to be declared
# significant even if there were no true effect. In other words, we would make
# many **type I errors** (adapted from `here <errors_>`_):
#
# .. rst-class:: skinnytable
#
#   +----------+--------+------------------+------------------+
#           instead of randomly exchanging conditions the null is formed
#           from using all possible exchanges. This is known as a permutation
#           test (or exact test) form of a non-parametric resampling test.

# Here we have to do a bit of gymnastics to get our function to do
# a permutation test without correcting for multiple comparisons:

# Let's flatten the array for simplicity
X.shape = (n_subjects, n_src)
titles.append('Permutation')
ts.append(np.zeros(width * width))
ps.append(np.zeros(width * width))
mccs.append(False)
for ii in range(n_src):
    ts[-1][ii], ps[-1][ii] = \
        permutation_t_test(X[:, [ii]], verbose=True if ii == 0 else False)[:2]
plot_t_p(ts[-1], ps[-1], titles[-1], mccs[-1])

###############################################################################
# Multiple comparisons
# --------------------
# So far, we have done no correction for multiple comparisons. This is
# potentially problematic for these data because there are
# :math:`40 \times 40 = 1600` tests being performed. If we just use
# a threshold ``p < 0.05`` for all of our tests, we would expect many
# voxels to be declared significant even if there were no true effect.
# In other words, we would make many **type I errors** (adapted from
# `here <https://en.wikipedia.org/wiki/Type_I_and_type_II_errors>`_):
#
# .. rst-class:: skinnytable
#
#           (which is :math:`2^{N_{samp}}-1` for a one-tailed and
#           :math:`2^{N_{samp}-1}-1` for a two-tailed test, not counting the
#           veridical distribution), instead of randomly exchanging conditions
#           the null is formed from using all possible exchanges. This is known
#           as a permutation test (or exact test).

# Here we have to do a bit of gymnastics to get our function to do
# a permutation test without correcting for multiple comparisons:

X.shape = (n_subjects, n_src)  # flatten the array for simplicity
titles.append('Permutation')
ts.append(np.zeros(width * width))
ps.append(np.zeros(width * width))
mccs.append(False)
for ii in range(n_src):
    ts[-1][ii], ps[-1][ii] = permutation_t_test(X[:, [ii]], verbose=False)[:2]
plot_t_p(ts[-1], ps[-1], titles[-1], mccs[-1])

###############################################################################
# Multiple comparisons
# --------------------
# So far, we have done no correction for multiple comparisons. This is
# potentially problematic for these data because there are
# :math:`40 \cdot 40 = 1600` tests being performed. If we use a threshold
# p < 0.05 for each individual test, we would expect many voxels to be declared
# significant even if there were no true effect. In other words, we would make
# many **type I errors** (adapted from `here <errors_>`_):
#
# .. rst-class:: skinnytable
#
#   +----------+--------+------------------+------------------+
Exemplo n.º 10
0
def test_measures_corr(subjects):
    from pandas.plotting import scatter_matrix
    from statsmodels.sandbox.stats import multicomp
    from mne.stats import permutation_t_test
    from scipy.stats import wilcoxon
    plt.style.use('ggplot')

    log_df = pd.read_csv(op.join(study_path, 'tables', 's_trial_dat.csv'))
    log_df = log_df.drop(['Unnamed: 0', 'Unnamed: 0.1', 'Unnamed: 0.1.1', 'a', 'b', 'c', 'd', 'e', 'f'], axis=1)
    log_df = set_dif_and_rt_exp(log_df)

    clf_scores = load_decoding(subjects)
    scores = [sc.scores_ for sc in clf_scores]
    times = clf_scores[0].train_times_['times']
    tds = np.array([np.diag(sc) for sc in scores])
    mean_td = np.mean(tds, axis=0)
    mean_scores = np.mean(tds[:, (times > -0.2) & (times < 0.7)], axis=1)
    clf_peak = np.max(tds, axis=1)
    clf_lat = times[np.argmax(tds, axis=1)]

    fig, ax = plt.subplots(3, 1, figsize=(18, 5), sharex=True, sharey=False)
    for ix, (c_mark, color) in enumerate(zip([90, 70], ['blue', 'red'])):
        ax[0].hist(log_df.loc[log_df.condition == c_mark, 'rt_exp'], bins=np.linspace(-1, 5, 31), align='left', alpha=0.5, color=color)
        ax[1].hist(log_df.loc[log_df.condition == c_mark, 'dif'], bins=np.linspace(-1, 5, 61), align='left', alpha=0.5, color=color)
    ax[1].set_xticks(np.linspace(-1, 5, 31))
    ax[1].set_ylim([0, 200])
    ax[1].set_xlim([-1.2, 5])
    ax[1].legend(['S2 Longer', 'S2 Shorter'])
    ax[2].plot(times[times > -0.7], mean_td[times > -0.7])
    ax[2].set_ylim([0.5, 0.65])
    ax[0].vlines(0, ymin=0, ymax=200, linestyles='--')
    ax[1].vlines(0, ymin=0, ymax=200, linestyles='--')
    ax[2].vlines(0, ymin=0.5, ymax=0.65, linestyles='--')

    corr_dat = pd.read_csv(op.join(study_path, 'tables', 'pow_table.csv'))
    corr_dat = corr_dat.drop('Unnamed: 0', axis=1)
    conn_dat = pd.read_csv(op.join(study_path, 'tables', 'conn_x_subj_lon_sho.csv'), header=None)
    corr_dat['conn_lon'] = conn_dat[0]
    corr_dat['conn_sho'] = conn_dat[1]
    corr_dat['conn_dif'] = corr_dat['conn_lon'] - corr_dat['conn_sho']
    rt_lon = log_df[log_df.condition == 90][['RT']].groupby(log_df['subject'], as_index=False).agg([np.nanmedian])['RT']['nanmedian'].tolist()
    rt_sho = log_df[log_df.condition == 70][['RT']].groupby(log_df['subject'], as_index=False).agg([np.nanmedian])['RT']['nanmedian'].tolist()
    rt = log_df[['RT']].groupby(log_df['subject'], as_index=False).agg([np.nanmedian])['RT']['nanmedian'].tolist()
    corr_dat['rt_sho'] = rt_sho
    corr_dat['rt_lon'] = rt_lon
    corr_dat['rt'] = rt
    corr_dat['clf'] = mean_scores
    corr_dat['clf_pk'] = clf_peak
    corr_dat['clf_lat'] = clf_lat
    corr_dat['acc_lon'] = log_df[log_df.condition == 90][['Accuracy']].groupby(log_df['subject'], as_index=False).agg(np.nanmean)
    corr_dat['acc_sho'] = log_df[log_df.condition == 70][['Accuracy']].groupby(log_df['subject'], as_index=False).agg(np.nanmean)
    corr_dat['acc'] = log_df[['Accuracy']].groupby(log_df['subject'], as_index=False).agg(np.nanmean)
    corr_dat['rt_exp'] = log_df[log_df.condition == 70][['rt_exp']].groupby(log_df['subject'], as_index=False).agg(np.nanmedian)
    corr_dat['abs'] = np.abs(corr_dat['clf_lat'] - corr_dat['rt_exp'])
    corr_dat['pow_dif'] = np.abs(corr_dat['pow_lon'] - corr_dat['pow_sho'])
    corr_dat.corr()
    corr_dat.to_csv(op.join(study_path, 'tables', 'corr_table.csv'))
    # plt.style.use('classic')
    # scatter_matrix(corr_dat[['clf_pk', 'rt_lon', 'rt_sho']])
    # corr_dat.corr()

    n_perm = 10000
    # r_sho, p_sho = permutation_pearson(corr_dat['rt_sho'], corr_dat['clf_pk'], n_perm)
    # r_lon, p_lon = permutation_pearson(corr_dat['rt_lon'], corr_dat['clf_pk'], n_perm)

    r_dec_rt, p_dec_rt = permutation_pearson(corr_dat['rt'], corr_dat['clf_pk'], n_perm)
    r_conn_rt, p_conn_rt = permutation_pearson(corr_dat['rt'], corr_dat['conn_dif'], n_perm)
    r_tf_rt, p_tf_rt = permutation_pearson(corr_dat['rt'], corr_dat['pow_dif'], n_perm)

    r_dec_lon, p_dec_lon = permutation_pearson(corr_dat['acc_lon'], corr_dat['clf_pk'], n_perm)
    r_dec_sho, p_dec_sho = permutation_pearson(corr_dat['acc_sho'], corr_dat['clf_pk'], n_perm)

    r_conn_lon, p_conn_lon = permutation_pearson(corr_dat['acc_lon'], corr_dat['conn_lon'], n_perm)
    r_conn_sho, p_conn_sho = permutation_pearson(corr_dat['acc_sho'], corr_dat['conn_sho'], n_perm)

    r_tf_lon, p_tf_lon = permutation_pearson(corr_dat['acc_lon'], corr_dat['pow_lon'], n_perm)
    r_tf_sho, p_tf_sho = permutation_pearson(corr_dat['acc_sho'], corr_dat['pow_sho'], n_perm)

    all_p = [p_dec_rt, p_conn_rt, p_tf_rt, p_dec_lon, p_dec_sho, p_conn_lon, p_conn_sho, p_tf_lon, p_tf_sho]

    p_corr = multicomp.multipletests([p_dec_rt, p_conn_rt, p_tf_rt],
                                     method='fdr_bh')

    seaborn.regplot(corr_dat['rt'], corr_dat['clf_pk'], ci=None)
    plt.title('r = %0.3f  p = %0.3f' % (r_dec_rt, p_dec_rt))
    plt.savefig(op.join(study_path, 'figures', 'clf_pk_vs_RT_all.eps'), dpi=300)

    fig, ax = plt.subplots(1, 2, sharey=True, sharex=True)
    seaborn.regplot(corr_dat['acc_lon'], corr_dat['conn_dif'], ax=ax[0], ci=None)
    seaborn.regplot(corr_dat['acc_sho'], corr_dat['conn_dif'], ax=ax[1], ci=None)
    [ax[ix].set_title('r = {} p = {}' .format(round(r, 3), round(p, 3))) for ix, (r, p) in enumerate(zip([r_conn_lon, r_conn_sho],
                                                                                                         [p_conn_lon, p_conn_sho]))]
    [ax[ix].set_ylabel('Accuracy') for ix in range(len(ax))]
    [ax[ix].set_xlabel('Connectivity Difference (wSMI) \n %s' % lab) for ix, lab in enumerate(['S2 Longer', 'S2 Shorter'])]
    # ax[0].set_xlim(0, 0.05)
    fig.savefig(op.join(study_path, 'figures', 'conn_vs_acc.eps'), dpi=300)

    seaborn.regplot(corr_dat['pow_lon'], corr_dat['acc_lon'], ci=None)
    plt.title('r = %0.3f p = %0.3f' % (r_tf_lon, p_tf_lon))
    plt.savefig(op.join(study_path, 'figures', 'pow_lon_vs_acc_lon.eps'), dpi=300)

    seaborn.regplot(corr_dat['conn_lon'], corr_dat['acc_lon'], ci=None)


    plt.violinplot([corr_dat['conn_lon'], corr_dat['conn_sho']])

    T_obs, p_values, H0 = permutation_t_test(np.array(corr_dat['abs'], ndmin=2).T, n_permutations=10000, tail=1)
    corr_dat['abs'].plot(kind='box')
    plt.title('p = %.8f' % p_values[0])
Exemplo n.º 11
0
i = 0
for val in col_values:

    t, p = ttest_1samp(col_values, val)
    if p < 0.05 and val < np.mean(col_values):
        print(names[i])
        print(p)
    i = i + 1

col_values.shape = (np.shape(col_values)[0], 1)

i = 0
for val in col_values:
    print("perm")
    values = np.subtract(col_values, val)
    T, p, HO = permutation_t_test(values, n_permutations=1000)
    if p < 0.05 and val < np.mean(col_values):
        print(names[i])
        print(p)
    i = 1 + i

##choose column with the top hit, and then identify any row name with significant diff by t-tse

print(ldskjf)

#create or get path single expression value vs all,excluding itself
res_top_hit_path = run_sim_single(top_hit[0],
                                  top_hit[1],
                                  top_hit[2],
                                  exclude=exclude)