示例#1
0
    def dabest_net_measures(self):
        """
		Computes Statistics on Graph Measures
		"""
        self.Net_df = pd.read_pickle(
            self.find(suffix='Graph-Measures-' + self.net_version,
                      filetype='.pkl'))
        # Result Dictionary
        dabest_list = []
        print('Started Graph Measure Stats.')
        for Freq in self.FrequencyBands.keys():
            with Pool(10) as p:
                freq_list = p.starmap(
                    self._parallel_net_dabest,
                    zip(self.GraphMeasures.keys(),
                        [Freq] * len(self.GraphMeasures.keys())))

            freq_df = pd.concat(freq_list)
            freq_df['Frequency'] = Freq
            dabest_list.append(freq_df)

            # Correct Bootstrapped p-values
            _, t_bon_corrected = bonferroni_correction(
                freq_df['pvalue_students_t'], alpha=0.05)
            _, t_fdr_corrected = fdr_correction(freq_df['pvalue_students_t'],
                                                alpha=0.05,
                                                method='indep')
            freq_df['t_bon_corrected'] = t_bon_corrected
            freq_df['t_fdr_corrected'] = t_fdr_corrected

            _, welch_bon_corrected = bonferroni_correction(
                freq_df['pvalue_welch'], alpha=0.05)
            _, welch_fdr_corrected = fdr_correction(freq_df['pvalue_welch'],
                                                    alpha=0.05,
                                                    method='indep')
            freq_df['welch_bon_corrected'] = welch_bon_corrected
            freq_df['welch_fdr_corrected'] = welch_fdr_corrected

            _, mann_whit_bon_corrected = bonferroni_correction(
                freq_df['pvalue_mann_whitney'], alpha=0.05)
            _, mann_whit_fdr_corrected = fdr_correction(
                freq_df['pvalue_mann_whitney'], alpha=0.05, method='indep')
            freq_df['mann_whit_bon_corrected'] = mann_whit_bon_corrected
            freq_df['mann_whit_fdr_corrected'] = mann_whit_fdr_corrected

        # Dabest Dataframe
        dabest_df = pd.concat(dabest_list)
        # save DataFrame to File
        FileName = self.createFileName(suffix='Graph-Measures-DABEST-' +
                                       self.net_version,
                                       filetype='.pkl')
        FilePath = self.createFilePath(self.NetMeasuresDir, self.net_version,
                                       FileName)
        dabest_df.to_pickle(FilePath)
        print('Graph Measure Statistics done.')
        pass
示例#2
0
def test_multi_pval_correction():
    """Test pval correction for multi comparison (FDR and Bonferroni)
    """
    rng = np.random.RandomState(0)
    X = rng.randn(10, 1000, 10)
    X[:, :50, 0] += 4.0  # 50 significant tests
    alpha = 0.05

    T, pval = stats.ttest_1samp(X, 0)

    n_samples = X.shape[0]
    n_tests = X.size / n_samples
    thresh_uncorrected = stats.t.ppf(1.0 - alpha, n_samples - 1)

    reject_bonferroni, pval_bonferroni = bonferroni_correction(pval, alpha)
    thresh_bonferroni = stats.t.ppf(1.0 - alpha / n_tests, n_samples - 1)
    assert_true(pval_bonferroni.ndim == 2)
    assert_true(reject_bonferroni.ndim == 2)

    fwer = np.mean(reject_bonferroni)
    assert_almost_equal(fwer, alpha, 1)

    reject_fdr, pval_fdr = fdr_correction(pval, alpha=alpha, method='indep')
    assert_true(pval_fdr.ndim == 2)
    assert_true(reject_fdr.ndim == 2)
    thresh_fdr = np.min(np.abs(T)[reject_fdr])
    assert_true(0 <= (reject_fdr.sum() - 50) <= 50 * 1.05)
    assert_true(thresh_uncorrected <= thresh_fdr <= thresh_bonferroni)

    reject_fdr, pval_fdr = fdr_correction(pval, alpha=alpha, method='negcorr')
    thresh_fdr = np.min(np.abs(T)[reject_fdr])
    assert_true(0 <= (reject_fdr.sum() - 50) <= 50 * 1.05)
    assert_true(thresh_uncorrected <= thresh_fdr <= thresh_bonferroni)
示例#3
0
def test_multi_pval_correction():
    """Test pval correction for multi comparison (FDR and Bonferroni)."""
    rng = np.random.RandomState(0)
    X = rng.randn(10, 1000, 10)
    X[:, :50, 0] += 4.0  # 50 significant tests
    alpha = 0.05

    T, pval = stats.ttest_1samp(X, 0)

    n_samples = X.shape[0]
    n_tests = X.size / n_samples
    thresh_uncorrected = stats.t.ppf(1.0 - alpha, n_samples - 1)

    reject_bonferroni, pval_bonferroni = bonferroni_correction(pval, alpha)
    thresh_bonferroni = stats.t.ppf(1.0 - alpha / n_tests, n_samples - 1)
    assert pval_bonferroni.ndim == 2
    assert reject_bonferroni.ndim == 2
    assert_allclose(pval_bonferroni / 10000, pval)
    reject_expected = pval_bonferroni < alpha
    assert_array_equal(reject_bonferroni, reject_expected)

    fwer = np.mean(reject_bonferroni)
    assert_almost_equal(fwer, alpha, 1)

    reject_fdr, pval_fdr = fdr_correction(pval, alpha=alpha, method='indep')
    assert pval_fdr.ndim == 2
    assert reject_fdr.ndim == 2
    thresh_fdr = np.min(np.abs(T)[reject_fdr])
    assert 0 <= (reject_fdr.sum() - 50) <= 50 * 1.05
    assert thresh_uncorrected <= thresh_fdr <= thresh_bonferroni
    pytest.raises(ValueError, fdr_correction, pval, alpha, method='blah')
    assert np.all(fdr_correction(pval, alpha=0)[0] == 0)

    reject_fdr, pval_fdr = fdr_correction(pval, alpha=alpha, method='negcorr')
    thresh_fdr = np.min(np.abs(T)[reject_fdr])
    assert 0 <= (reject_fdr.sum() - 50) <= 50 * 1.05
    assert thresh_uncorrected <= thresh_fdr <= thresh_bonferroni
event_id = 1
reject = dict(grad=4000e-13, eog=150e-6)
epochs = mne.Epochs(raw, events, event_id, tmin, tmax, picks=picks,
                    baseline=(None, 0), reject=reject)
X = epochs.get_data()  # as 3D matrix
X = X[:, 0, :]  # take only one channel to get a 2D array

###############################################################################
# Compute statistic
T, pval = stats.ttest_1samp(X, 0)
alpha = 0.05

n_samples, n_tests = X.shape
threshold_uncorrected = stats.t.ppf(1.0 - alpha, n_samples - 1)

reject_bonferroni, pval_bonferroni = bonferroni_correction(pval, alpha=alpha)
threshold_bonferroni = stats.t.ppf(1.0 - alpha / n_tests, n_samples - 1)

reject_fdr, pval_fdr = fdr_correction(pval, alpha=alpha, method='indep')
threshold_fdr = np.min(np.abs(T)[reject_fdr])

###############################################################################
# Plot
times = 1e3 * epochs.times

import matplotlib.pyplot as plt
plt.close('all')
plt.plot(times, T, 'k', label='T-stat')
xmin, xmax = plt.xlim()
plt.hlines(threshold_uncorrected, xmin, xmax, linestyle='--', colors='k',
           label='p=0.05 (uncorrected)', linewidth=2)
#     which we want here:
T_obs, clusters, p_values, H0 = \
    spatio_temporal_cluster_1samp_test(X, n_jobs=1, threshold=threshold,
                                       connectivity=connectivity,
                                       tail=1, n_permutations=n_permutations)

#    Let's put the cluster data in a readable format
ps = np.zeros(width * width)
for cl, p in zip(clusters, p_values):
    ps[cl[1]] = -np.log10(p)
ps = ps.reshape((width, width))
T_obs = T_obs.reshape((width, width))

#     To do a Bonferroni correction on these data is simple:
p = stats.distributions.t.sf(T_obs, n_subjects - 1)
p_bon = -np.log10(bonferroni_correction(p)[1])

#    Now let's do some clustering using the standard method with "hat":
stat_fun = partial(ttest_1samp_no_p, sigma=sigma)
T_obs_hat, clusters, p_values, H0 = \
    spatio_temporal_cluster_1samp_test(X, n_jobs=1, threshold=threshold,
                                       connectivity=connectivity,
                                       tail=1, n_permutations=n_permutations,
                                       stat_fun=stat_fun)

#    Let's put the cluster data in a readable format
ps_hat = np.zeros(width * width)
for cl, p in zip(clusters, p_values):
    ps_hat[cl[1]] = -np.log10(p)
ps_hat = ps_hat.reshape((width, width))
T_obs_hat = T_obs_hat.reshape((width, width))
#        \mathrm{E}(\frac{N_{\mathrm{type\ I}}}{N_{\mathrm{reject}}}
#        \mid N_{\mathrm{reject}} > 0) \cdot
#        \mathrm{P}(N_{\mathrm{reject}} > 0 \mid H_0)
#
# We cover some techniques that control FWER and FDR below.
#
# Bonferroni correction
# ^^^^^^^^^^^^^^^^^^^^^
# Perhaps the simplest way to deal with multiple comparisons, `Bonferroni
# correction <https://en.wikipedia.org/wiki/Bonferroni_correction>`__
# conservatively multiplies the p-values by the number of comparisons to
# control the FWER.

titles.append('Bonferroni')
ts.append(ts[-1])
ps.append(bonferroni_correction(ps[0])[1])
mccs.append(True)
plot_t_p(ts[-1], ps[-1], titles[-1], mccs[-1])

###############################################################################
# False discovery rate (FDR) correction
# ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
# Typically FDR is performed with the Benjamini-Hochberg procedure, which
# is less restrictive than Bonferroni correction for large numbers of
# comparisons (fewer type II errors), but provides less strict control of type
# I errors.

titles.append('FDR')
ts.append(ts[-1])
ps.append(fdr_correction(ps[0])[1])
mccs.append(True)
示例#7
0
def testwise_correction_mcp(x, x_p, tail=1, mcp='maxstat'):
    """Test-wise correction for MCP using non-parametric statistics.

    This function can be used to correct the p-values for multiple comparisons
    at the test level (i.e at each time point, each roi, each frequencies
    etc.). This kind of correction usually suffers from a low statistical power
    (i.e if an effect is present, you might miss it because the correction is
    to conservative).

    Parameters
    ----------
    x : array_like
        Array of true effect
    x_p : array_like
        Array of permutations of shape (n_perm, ...) where the other dimensions
        should be the same as `x`
    tail : {-1, 0, 1}
        Type of comparison. Use -1 for the lower part of the distribution,
        1 for the higher part and 0 for both
    mcp : {'maxstat', 'fdr', 'bonferroni'}
        Method to use for correcting p-values for the multiple comparison
        problem. By default, maximum statistics is used

    Returns
    -------
    pvalues : array_like
        Array of pvalues corrected for MCP with the same shape as the input `x`
    """
    assert tail in [-1, 0, 1]
    assert mcp in ['maxstat', 'fdr', 'bonferroni']
    assert isinstance(x, np.ndarray) and isinstance(x_p, np.ndarray)
    n_perm = x_p.shape[0]

    logger.info(f"    Perform correction for MCP (mcp={mcp}; tail={tail})")

    # -------------------------------------------------------------------------
    # change the distribution according to the tail (support inplace operation)
    if tail == 1:  # upper part of the distribution
        pass
    elif tail == -1:  # bottom part of the distribution
        x, x_p = -x, -x_p
    elif tail == 0:  # both part of the distribution
        x, x_p = np.abs(x), np.abs(x_p)
    x = x[np.newaxis, ...]

    # -------------------------------------------------------------------------
    # mcp correction
    if mcp == 'maxstat':
        x_p_sh = tuple([n_perm] + [1] * (x.ndim - 1))
        # maximum over all dimensions except the perm one
        x_p = x_p.reshape(n_perm, -1).max(1).reshape(*x_p_sh)
        pv = (x <= x_p).sum(0) / n_perm
        pv = np.clip(pv, 1. / n_perm, 1.)
    else:
        pv = (x <= x_p).sum(0) / n_perm
        if mcp == 'fdr':
            pv = fdr_correction(pv, .05)[1]
        if mcp == 'bonferroni':
            pv = bonferroni_correction(pv, .05)[1]
    pv = np.clip(pv, 0., 1.)

    return pv
示例#8
0
def test_bonferroni_pval_clip():
    """Test that p-values are never exceed 1.0."""
    p = (0.2, 0.9)
    _, p_corrected = bonferroni_correction(p)
    assert p_corrected.max() <= 1.0
示例#9
0
    def test_region_GBC(self):
        """
		Compute regionwise t-test between global connectivity values
		"""
        from mne.stats import bonferroni_correction, fdr_correction
        df = pd.read_pickle(
            self.find(suffix='GBC', filetype='.pkl', Freq=self.Frequencies))
        # Result Dictionary
        testdict = {
            'Frequency': [],
            'Region': [],
            't-value': [],
            'p-value': [],
            'welch-t-value': [],
            'welch-p-value': [],
            'levene-p-value': []
        }
        print('Started Statstical Test.')
        for Region in self.RegionNames:
            print(f'Testing {Region}')
            df_pivot = df.pivot_table(index=['Subject', 'Group'],
                                      columns='Frequency',
                                      values=Region).reset_index()
            df_control = df_pivot[df_pivot['Group'] == 'Control']
            df_fep = df_pivot[df_pivot['Group'] == 'FEP']
            for Freq in self.FrequencyBands.keys():
                testdict['Frequency'].append(Freq)
                testdict['Region'].append(Region)
                # Test for equal variance, levene test
                _, pval = scipy.stats.levene(df_fep[Freq], df_control[Freq])
                testdict['levene-p-value'].append(pval)
                # welch test if variances are not equal
                t, pval = scipy.stats.ttest_ind(df_fep[Freq],
                                                df_control[Freq],
                                                equal_var=False)
                testdict['welch-t-value'].append(t)
                testdict['welch-p-value'].append(pval)
                # Standard t-test
                t, pval = scipy.stats.ttest_ind(df_fep[Freq],
                                                df_control[Freq],
                                                equal_var=True)
                testdict['t-value'].append(t)
                testdict['p-value'].append(pval)

        # Transform to DataFrame
        df = pd.DataFrame(testdict)

        print('Bonferroni Correction.')
        # Calculate Bonferroni and FDR correction
        # Set up columns
        df['Bonferroni'] = df['FDR'] = np.NaN

        for Freq in self.FrequencyBands.keys():
            df_split = df[df['Frequency'] == Freq]
            _, p_bon = bonferroni_correction(df_split['p-value'], alpha=0.05)
            _, p_fdr = fdr_correction(df_split['p-value'],
                                      alpha=0.05,
                                      method='indep')
            df.loc[df['Frequency'] == Freq, 'Bonferroni'] = p_bon
            df.loc[df['Frequency'] == Freq, 'FDR'] = p_fdr

        # Save Results
        FileName = self.createFileName(suffix='GBC-Region-T-Test',
                                       filetype='.pkl',
                                       Freq=self.Frequencies)
        FilePath = self.createFilePath(self.EdgeStatsDir, 'GBC', FileName)
        df.to_pickle(FilePath)