예제 #1
0
def two_group_reproducibility(effect_sizes,
                              emphasis_primary,
                              nl=90,
                              sl=30,
                              alpha=0.05,
                              N=25,
                              n_iter=10,
                              method=tst):
    """Function for computing reproducibility in the two-group model under
    various effect sizes and amounts of emphasis on the primary study.

    Input arguments:
    ================
    effect_sizes : ndarray [n_effect_sizes, ]
        The tested effect sizes.

    emphasis_primary : ndarray [n_emphasis_values, ]
        The tested amounts of emphasis on the primary study.

    TODO: document rest of the parameters.

    Output arguments
    ================
    reproducibility : ndarray [n_effect_sizes, n_emphasis_values]
        The observed reproducibility at the tested effect sizes and amounts
        of emphasis on the primary study.
    """
    n_effect_sizes, n_emphasis = len(effect_sizes), len(emphasis_primary)
    """Compute the reproducibility rate for each effect size and
    primary study emphasis, for several iterations."""
    reproducible = np.zeros([n_effect_sizes, n_emphasis, n_iter])

    for ind in np.ndindex(n_effect_sizes, n_emphasis, n_iter):
        # Simulate new data.
        delta, emphasis = effect_sizes[ind[0]], emphasis_primary[ind[1]]
        X_pri = square_grid_model(nl, sl, N, delta, equal_var=True)[0]
        X_fol = square_grid_model(nl, sl, N, delta, equal_var=True)[0]
        X_pri, X_fol = X_pri.flatten(), X_fol.flatten()

        # Apply the correction and compute reproducibility.
        R = fwer_replicability(X_pri, X_fol, emphasis, method, alpha)
        R = np.reshape(R, [nl, nl])
        tp, _, _, fn = grid_model_counts(R, nl, sl)
        reproducible[ind] = tp / float(tp + fn)

    reproducible = np.mean(reproducible, axis=2)
    return reproducible
예제 #2
0
def two_group_model_power(nl=90,
                          sl=30,
                          deltas=np.linspace(0.2, 2.4, 12),
                          alpha=0.05,
                          N=25,
                          n_iter=10,
                          verbose=True):
    """Function for generating data under two-group model and visualizing
    power as a function of effect size.

    Input arguments:
    ================
    nl : int
        The length of a side of the simulated grid. There will be a total
        of nl squared tests.
    sl : int
        The length of a side of the signal region. In the simulation, there
        will be a total of sl squared tests where the alternative
        hypothesis is true.
    deltas : ndarray
        The tested effect sizes.
    alpha : float
        The desired critical level.
    N : int
        Sample size is both of the two groups.
    n_iter : int
        Number of iterations used for estimating the power.
    verbose : bool
        Flag for deciding whether to print progress reports to the console.
    """
    """Allocate memory for the results."""
    n_deltas = len(deltas)
    pwr = np.zeros([n_deltas, n_iter])
    """Simulate data at each effect size and compute empirical power."""
    for i, delta in enumerate(deltas):
        if (verbose):
            print('Effect size: %1.3f' % delta)
        for j in np.arange(0, n_iter):
            X = square_grid_model(nl, sl, N, delta, equal_var=True)[0]
            Y = tst(X.flatten(), alpha)
            Y = Y.reshape(nl, nl)
            tp, fp, tn, fn = grid_model_counts(Y, nl, sl)
            pwr[i, j] = empirical_power(tp, tp + fn)

    return np.mean(pwr, axis=1)
예제 #3
0
def permutation_test_fwer_replicability(effect_sizes,
                                        emphasis_primary,
                                        nl=90,
                                        sl=30,
                                        alpha=0.05,
                                        N=25,
                                        n_iter=20,
                                        t_threshold=1.0):
    """Estimate reproducibility in the two-group model using the
    Maris-Oostenveld permutation test with the Phipson-Smyth p-value
    correction.

    Input arguments:
    ================
    effect_sizes : ndarray
        Tested effect sizes (Cohen's d's).

    emphasis_primary : ndarray
        Amount of emphasis placed on the primary study.

    nl, sl : int
        The sizes of the noise and signal regions respectively.

    alpha : float
        The desired critical level.

    N : int
        Sample size in each of the two groups.

    n_iter : int
        Number of repetitions of the simulation at each distinct
        effect size.

    t_threshold : float
        The t-threshold used in the permutation test.
    """

    n_effect_sizes = len(effect_sizes)
    n_emphasis = len(emphasis_primary)
    reproducibility = np.zeros([n_effect_sizes, n_emphasis, n_iter])
    """Estimate reproducibility at each effect size."""
    for ind in np.ndindex(n_effect_sizes, n_emphasis, n_iter):
        # Generate new raw data.
        delta, emphasis = effect_sizes[ind[0]], emphasis_primary[ind[1]]
        T_primary = square_grid_model(nl, sl, N, delta)[1]
        T_followup = square_grid_model(nl, sl, N, delta)[1]
        ## X_raw_p, Y_raw_p = square_grid_model(nl, sl, N, delta)[2:4]
        ## X_raw_f, Y_raw_f = square_grid_model(nl, sl, N, delta)[2:4]

        # Here *_p = primary study, *_f = follow-up study.
        ## R = fwer_prep(X_raw_p, Y_raw_p, X_raw_f, Y_raw_f,
        ##               tfr_permutation_test, emphasis, alpha)
        R = fwer_rftrep(T_primary, T_followup, rft_2d, emphasis, alpha)
        tp, _, _, fn = grid_model_counts(R, nl, sl)
        reproducibility[ind] = tp / float(tp + fn)

    reproducibility = np.mean(reproducibility, axis=2)
    """Visualize the results."""
    sns.set_style('white')
    fig = plt.figure(figsize=(8, 5))
    ax = fig.add_subplot(111)
    colors = ['r', 'g', 'b']
    ax.plot(effect_sizes, reproducibility, '.')
    ax.plot(effect_sizes, reproducibility, '-')
    fig.tight_layout()
    plt.show()
예제 #4
0
def direct_replication_fwer_partial_conjunction():
    """Perform a comparison of the partial conjuction and FWER
    replicability methods using the two-group model."""

    N, nl, sl = 25, 90, 30
    effect_sizes = np.linspace(0.6, 2.4, 12)
    n_effect_sizes = len(effect_sizes)
    method = lsu  # hochberg #bonferroni
    emphasis = np.asarray(
        [0.02, 0.05, 0.10, 0.30, 0.50, 0.70, 0.90, 0.95, 0.98])
    n_emphasis = len(emphasis)
    """Generate the test data."""
    print('Simulating primary and follow-up experiments ..')

    # Allocate memory.
    pvals_pri = np.zeros([n_effect_sizes, nl, nl])
    pvals_sec = np.zeros(np.shape(pvals_pri))

    # Obtain the uncorrected p-values.
    for i, delta in enumerate(effect_sizes):
        pvals_pri[i] = square_grid_model(nl, sl, N, delta)[0]
        pvals_sec[i] = square_grid_model(nl, sl, N, delta)[0]
    """Find reproducible effects using the FWER replicability
    method."""
    print('Estimating reproducibility: FWER replicability ..')

    repr_fwer = np.zeros([n_effect_sizes, n_emphasis])

    for i in np.ndindex(n_effect_sizes, n_emphasis):
        # Find reproducible effects and rearrange the data.
        result = fwer_replicability(pvals_pri[i[0]].flatten(),
                                    pvals_sec[i[0]].flatten(), emphasis[i[1]],
                                    method)
        result = np.reshape(result, [nl, nl])

        # Compute the number reproducible true effects.
        repr_fwer[i] = (grid_model_counts(result, nl, sl)[0] / float(sl**2))
    """Find reproducible effects using the partial conjuction
    method."""
    print('Estimating reproducibility: Partial conjuction ..')

    repr_part = np.zeros([n_effect_sizes])

    for i in np.ndindex(n_effect_sizes):
        result = partial_conjuction(pvals_pri[i].flatten(),
                                    pvals_sec[i].flatten(), method)
        result = np.reshape(result, [nl, nl])
        repr_part[i] = (grid_model_counts(result, nl, sl)[0] / float(sl**2))
    """Visualize the data."""
    sns.set_style('white')
    fig = plt.figure(figsize=(8, 5))
    ax = fig.add_subplot(111)

    plot_logistic(effect_sizes,
                  repr_fwer[:, emphasis <= 0.5],
                  ax=ax,
                  color='k')
    plot_logistic(effect_sizes, repr_fwer[:, emphasis > 0.5], ax=ax, color='g')
    plot_logistic(effect_sizes, repr_part, ax=ax, color='b')

    ax.set_xlabel('Effect size')
    ax.set_ylabel('Reproducibility rate')

    fig.tight_layout()
    plt.show()
예제 #5
0
def rvalue_test(effect_sizes=np.linspace(0.2, 2.4, 12),
                emphasis=np.asarray([0.02, 0.5, 0.98]),
                method=tst,
                nl=90,
                sl=30,
                N=25,
                alpha=0.05,
                n_iter=10):
    """Function for simulating primary and follow-up experiments using the
    two-group model and testing which effects are reproducible using the FDR
    r-value method.

    Input arguments:
    ================
    effect_sizes : ndarray [n_effect_sizes, ]
        The tested effect sizes.

    emphasis : ndarray [n_emphasis, ]
        The tested amounts of emphasis placed on the primary study.

    n_iter : int
        The number of repetitions of each simulation.

    method : function
        The applied correction procedure.

    nl, sl : int
        The sizes of the noise and signal regions respectively.

    N : int
        The sample size in both groups.

    alpha : float
        The critical level. Default value is 0.05.

    n_iter : int
        The number of repetitions each simulation.
    """
    n_emphasis = len(emphasis)
    n_effect_sizes = len(effect_sizes)
    reproducibility = np.zeros([n_iter, n_effect_sizes, n_emphasis])

    for ind in np.ndindex(n_iter, n_effect_sizes, n_emphasis):
        print(ind)
        """Simulate primary and follow-up experiments."""
        delta, emph = effect_sizes[ind[1]], emphasis[ind[2]]
        p1 = square_grid_model(delta=delta, nl=nl, sl=sl, N=N)[0]
        p2 = square_grid_model(delta=delta, nl=nl, sl=sl, N=N)[0]
        """Test which hypotheses are significant in the primary study.
        This is done for selecting hypotheses for the follow-up study."""
        if (method.__name__ == 'qvalue'):
            significant_primary = method(p1.flatten(), alpha)[0]
        else:
            significant_primary = method(p1.flatten(), alpha)
        significant_primary = np.reshape(significant_primary, [nl, nl])
        """If there were significant hypotheses in the primary study,
        apply the r-value method to test which ones can be replicated in
        the follow-up study."""
        if (np.sum(significant_primary) > 0):
            rvals = fdr_rvalue(p1=p1[significant_primary],
                               p2=p2[significant_primary],
                               m=nl**2,
                               c2=emph)
            R = np.ones(np.shape(p1))
            R[significant_primary] = rvals
            tp, _, _, fn = grid_model_counts(R < alpha, nl, sl)
            reproducibility[ind] = tp / float(tp + fn)

    reproducibility = np.mean(reproducibility, axis=0)
    return reproducibility
예제 #6
0
Y_tst = Y_tst.reshape(nl, nl)

Y_permutation = tfr_permutation_test(X_raw,
                                     Y_raw,
                                     n_permutations=100,
                                     alpha=alpha,
                                     threshold=1)

Y_holm = holm_bonferroni(X.flatten(), alpha=alpha)
Y_holm = Y_holm.reshape(nl, nl)

Y_hochberg = hochberg(X.flatten(), alpha=alpha)
Y_hochberg = Y_hochberg.reshape(nl, nl)
"""Visualize the results."""
fig_nocor = plot_grid_model(X < alpha, nl, sl)
t_nocor = 'Uncorrected %1.3f %1.3f' % roc(grid_model_counts(X < alpha, nl, sl))
fig_nocor.axes[0].set_title(t_nocor)

fig_sidak = plot_grid_model(Y_sidak, nl, sl)
t_sidak = 'Sidak %1.3f %1.3f' % roc(grid_model_counts(Y_sidak, nl, sl))
fig_sidak.axes[0].set_title(t_sidak)

fig_fdr = plot_grid_model(Y_fdr, nl, sl)
t_fdr = 'FDR %1.3f %1.3f' % roc(grid_model_counts(Y_fdr, nl, sl))
fig_fdr.axes[0].set_title(t_fdr)

fig_qvalue = plot_grid_model(Y_qvalue, nl, sl)
t_qvalue = 'Q-value %1.3f %1.3f' % roc(grid_model_counts(Y_qvalue, nl, sl))
fig_qvalue.axes[0].set_title(t_qvalue)

fig_rft = plot_grid_model(Y_rft, nl, sl)
예제 #7
0
def two_group_model_power(deltas,
                          method,
                          nl=90,
                          sl=30,
                          alpha=0.05,
                          N=25,
                          n_iter=20,
                          verbose=True):
    """Function for generating data under two-group model at various effect
    sizes and computing the corresponding empirical power.

    Input arguments:
    ================
    nl : int
        The length of a side of the simulated grid. There will be a total
        of nl squared tests.

    sl : int
        The length of a side of the signal region. In the simulation, there
        will be a total of sl squared tests where the alternative
        hypothesis is true.

    deltas : ndarray
        The tested effect sizes.

    alpha : float
        The desired critical level.

    N : int
        Sample size is both of the two groups.

    n_iter : int
        Number of iterations used for estimating the power.

    verbose : bool
        Flag for deciding whether to print progress reports to the console.

    Output arguments:
    =================
    pwr : ndarray [n_deltas, n_iter]
        The power at each tested effect size at each iteration.

    fpr : ndarray [n_deltas, n_iter]
        The corresponding false positive rates.
    """
    """Allocate memory for the results."""
    n_deltas = len(deltas)
    pwr, fpr = np.zeros([n_deltas, n_iter]), np.zeros([n_deltas, n_iter])
    """Simulate data at each effect size and compute empirical power."""
    for i, delta in enumerate(deltas):
        if (verbose):
            print('Effect size: %1.3f' % delta)
        for j in np.arange(0, n_iter):
            # NOTE: output arguments 1-4 needed for permutation testing.
            # NOTE: output arguments 1-2 needed for RFT based testing.
            X = square_grid_model(nl, sl, N, delta, equal_var=True)[0]

            # TODO: q-value method returns a tuple with the first element
            # containing the decision.
            # Y = tfr_permutation_test(X_raw, Y_raw, alpha=alpha,
            #                          n_permutations=100, threshold=1)
            # Y = rft_2d(T, fwhm=3, alpha=alpha, verbose=True)[0]
            Y = method(X.flatten(), alpha)
            Y = Y.reshape(nl, nl)
            tp, fp, _, fn = grid_model_counts(Y, nl, sl)
            pwr[i, j] = empirical_power(tp, tp + fn)
            fpr[i, j] = float(fp) / float(nl**2 - sl**2)

    return np.mean(pwr, axis=1), np.mean(fpr, axis=1)