Exemplo n.º 1
0
def test_benchmark():
    a, b, c, d = _gen_rand_abcd()

    n = 100
    res = np.zeros(n)
    for i in range(n):
        startT = time.time()
        ORs, pvalues = fishersapi.fishers_vec(a,
                                              b,
                                              c,
                                              d,
                                              alternative='two-sided')
        res[i] = n / (time.time() - startT)
    print('Imported test: %1.2f tests per second' % np.mean(res))

    n = 1
    res_scipy = np.zeros(n)
    for i in range(n):
        startT = time.time()
        ORs, pvalues = fishersapi._scipy_fishers_vec(a,
                                                     b,
                                                     c,
                                                     d,
                                                     alternative='two-sided')
        res_scipy[i] = n / (time.time() - startT)
    print('scipy test: %1.2f tests per second' % np.mean(res_scipy))
Exemplo n.º 2
0
def test_fishers_vec():
    """Testing the vectorized version against scipy on random data."""
    a, b, c, d = _gen_rand_abcd()
    n = len(a)

    for alt in ['two-sided', 'less', 'greater']:
        ORs, pvalues = fishersapi.fishers_vec(a, b, c, d, alternative=alt)
        scipy_pvalues, scipy_ORs = np.zeros(n), np.zeros(n)
        for i in range(n):
            scipy_ORs[i], scipy_pvalues[i] = stats.fisher_exact(
                [[a[i], b[i]], [c[i], d[i]]], alternative=alt)
    npt.assert_allclose(ORs, scipy_ORs, rtol=1e-4)
    npt.assert_allclose(pvalues, scipy_pvalues, rtol=1e-4)
Exemplo n.º 3
0
def test_fishers_vec_minn():
    """Testing the vectorized version against scipy on random data."""
    a, b, c, d = _gen_rand_abcd()
    n = len(a)

    counts = a + b + c + d
    gtmin = np.sum(counts >= np.median(counts))
    for alt in ['two-sided', 'less', 'greater']:
        ORs, pvalues = fishersapi.fishers_vec(a,
                                              b,
                                              c,
                                              d,
                                              alternative=alt,
                                              min_n=np.median(counts))
        npt.assert_equal(gtmin, (~np.isnan(pvalues)).sum())
Exemplo n.º 4
0
def _fisherNBR(res_df, ct_cols):
    """Applies a Fisher's exact test to every row of res_df using the 4 columns provided
    in count_cols. For each row, the vector of counts in count_cols can
    be reshaped into a 2 x 2 contingency table.
    
    The count_cols should be in the following order:
    
    a   X+/MEM+
    b   X+/MEM-
    c   X-/MEM+
    d   X-/MEM-

    where X+ indicates the second level of x_col (e.g. 1 for [0, 1]). The result is that
    that an OR > 1 = [(a / c) / (b / d)] indicates enrichment of X
    within the cluster.

    Relative-rate of X in vs. out of the cluster is also provided.


    Parameters
    ----------
    res_df : pd.DataFrame [ntests x 4]
        Each row contains a set of 4 counts to be tested.
    count_cols : list
        Columns containing the counts in a "flattened" order such that
        it can be reshaped into a 2 x 2 contingency table

    Returns
    -------
    res : dict
        A dict of three numpy vectors containing the OR, the RR and the p-value.
        Vectors will have length ntests, same as res_df.shape[0]"""
    a = res_df[ct_cols[0]].values
    b = res_df[ct_cols[1]].values
    c = res_df[ct_cols[2]].values
    d = res_df[ct_cols[3]].values

    OR, p = fishers_vec(a, b, c, d, alternative='two-sided')

    RR = (a / (a + c)) / (b / (b + d))
    return {'RR':RR, 'OR':OR, 'pvalue':p}
Exemplo n.º 5
0
def test_integers():
    OR, pvalue = fishersapi.fishers_vec(10, 2, 15, 3)
    assert np.isscalar(OR)
    assert np.isscalar(pvalue)