Пример #1
0
    'direction': dirs,
    'regulation_type': txtypes
}
df = pd.DataFrame(list(itertools.product(*x.values())), columns=x.keys())
print('getting fishers results...')
temp = df.apply(Fishers_mins_w_regsize, axis=1, minlist=minlist, minov=minov)
fisher = pd.DataFrame(temp.tolist(),
                      columns=['Fisher_p_value', 'regulome_size'])

df = df.merge(fisher, how='outer', left_index=True, right_index=True)

#fdr only for the tests that were actually done
print('getting fdr...')
nan = df[np.isnan(df.Fisher_p_value)]
notnan = df[~np.isnan(df.Fisher_p_value)]
notnan['Fisher_FDR'] = mult(notnan['Fisher_p_value'], method='fdr_bh')[1]
df = pd.concat([nan, notnan])
df.Fisher_FDR = df.Fisher_FDR.astype(float)


#interpret on/off
def ONOFF(row):
    if row.Fisher_FDR < 0.05:
        if row.direction == 'up':
            if row.regulation_type == 'Transcriptional Activator':
                return 'ON'
            elif row.regulation_type == 'Transcriptional Repressor':
                return 'OFF'
        elif row.direction == 'down':
            if row.regulation_type == 'Transcriptional Activator':
                return 'OFF'
er = pd.read_csv('genesets.gmt', sep='\t', names=list(range(300)))

cand = pd.concat([go, er]).set_index(0)

##remove this to check everything
#cand=cand[cand.index.isin(finalsets)]
#print(cand.shape)

rslt = pd.DataFrame(list(product(sets.keys(), cand.index)),
                    columns=['dxset', 'candset'])
rslt['pval'] = rslt.apply(fishers, axis=1)
nan = rslt[np.isnan(rslt.pval)]
nan['fdr'] = np.nan
notnan = rslt[~np.isnan(rslt.pval)]
notnan['fdr'] = mult(notnan.pval, method='fdr_bh')[1]
rslt = pd.concat([nan, notnan])
rslt.reset_index(drop=True, inplace=True)

fdrs = rslt.pivot('candset', 'dxset', 'fdr')
print(fdrs.shape)
fdrs.dropna(how='all', inplace=True)
print(fdrs.shape)
fdrs.fillna(1., inplace=True)
fdrs = fdrs[fdrs[fdrs < 0.05].any(axis=1)]
print(fdrs.shape)
fdrs.to_csv('all_geneset_enrichment_fdr.tsv', sep='\t')
logged = -np.log10(fdrs)
logged.to_csv('all_geneset_enrichment_fdr_log_.tsv', sep='\t')

##read stuff back in after the first run
Пример #3
0
            temp.columns = ['shadow_p_value','shadow_overlap']
            new = pd.concat([new,temp],axis=1)
            overlaps = pd.concat([overlaps,new])

## of altered TF in Fishers
print('FISHERS:')
for time in times:
    timedf = tfs[tfs.time==time]
    print(str(len(timedf.regulator.unique()))+
    ' altered tfs at time '+str(time))
    
#fdr only for the tests that were actually done
print('Calculating FDR...')
nan = overlaps[np.isnan(overlaps.shadow_p_value)]
notnan = overlaps[~np.isnan(overlaps.shadow_p_value)]
notnan['shadow_FDR'] = mult(notnan['shadow_p_value'],method='fdr_bh')[1]
overlaps = pd.concat([nan,notnan])
overlaps.shadow_FDR = overlaps.shadow_FDR.astype(float)

overlaps.to_csv('TFs_shadow_'+tx+'.csv',index=False)

##for TF pairs that overlap, choose which TF is the better one
print('Calculating new Fishers exact tests with differential expression...')
df = overlaps[overlaps.shadow_FDR<0.05]
new = pd.DataFrame(df.apply(Fishers_shadow_choose,axis =1,minlist = minlist, minov= minov).tolist())
new.columns = ['p_value_1','p_value_2','p_value_intersection']
df = pd.concat([df.reset_index(drop=True),new],axis=1)

#combine the two new columns of p values, take fdr, then put the fdrs back in the dataframe
print('Calculating FDR...')
ser1= df.p_value_1