示例#1
0
def test_fdr_bky():
    # test for fdrcorrection_twostage
    # example from BKY
    pvals = [0.0001, 0.0004, 0.0019, 0.0095, 0.0201, 0.0278, 0.0298, 0.0344, 0.0459,
             0.3240, 0.4262, 0.5719, 0.6528, 0.7590, 1.000 ]

    #no test for corrected p-values, but they are inherited
    #same number of rejection as in BKY paper:
    #single step-up:4, two-stage:8, iterated two-step:9
    #also alpha_star is the same as theirs for TST
    #print fdrcorrection0(pvals, alpha=0.05, method='indep')
    #print fdrcorrection_twostage(pvals, alpha=0.05, iter=False)
    res_tst = fdrcorrection_twostage(pvals, alpha=0.05, iter=False)
    assert_almost_equal([0.047619, 0.0649], res_tst[-1][:2],3) #alpha_star for stage 2
    assert_equal(8, res_tst[0].sum())
示例#2
0
def test_fdr_bky():
    # test for fdrcorrection_twostage
    # example from BKY
    pvals = [0.0001, 0.0004, 0.0019, 0.0095, 0.0201, 0.0278, 0.0298, 0.0344, 0.0459,
             0.3240, 0.4262, 0.5719, 0.6528, 0.7590, 1.000 ]

    #no test for corrected p-values, but they are inherited
    #same number of rejection as in BKY paper:
    #single step-up:4, two-stage:8, iterated two-step:9
    #also alpha_star is the same as theirs for TST
    #print fdrcorrection0(pvals, alpha=0.05, method='indep')
    #print fdrcorrection_twostage(pvals, alpha=0.05, iter=False)
    res_tst = fdrcorrection_twostage(pvals, alpha=0.05, iter=False)
    assert_almost_equal([0.047619, 0.0649], res_tst[-1][:2],3) #alpha_star for stage 2
    assert_equal(8, res_tst[0].sum())
示例#3
0
df_comments_sorted_T5.head()


# %%
# compare expected and observed using chi-square test
for idx in df_comments_sorted_T5.index:
    ## use 
    expected = df_comments_sorted.sum(0) * (df_comments_sorted.loc[idx,:].sum() / all_sum)
    observed = df_comments_sorted_T5.loc[idx,:]
    chi_result.append([idx,chisquare(observed,f_exp=expected)])


# %%
df_chi = pd.DataFrame([[a[0],a[1].statistic,a[1].pvalue] for a in chi_result])
df_chi.columns = ['condition','statistic','pvalue']
df_chi['adj_pvalue'] = fdrcorrection_twostage(df_chi['pvalue'])[1]
df_chi.head()


# %%

df_chi_sorted = df_chi[df_chi['adj_pvalue'] < 0.05].sort_values('pvalue')

df_chi_sorted.head(10)


# %%
plt.figure(figsize=(12,12))
i =1
for idx in df_chi_sorted['condition'][:10]: