Exemplo n.º 1
0
)
bird_host_freqs = get_freqs(
    os.path.join(dir, "Gallus_gallus.init.elm_aa_freq"), os.path.join(dir, "elmdict_Gallus_gallus.init")
)

mammal = set(mammal_pre.keys()) - set(bird_pre.keys())
bird = set(bird_pre.keys()) - set(mammal_pre.keys())

mammal_control_pre2 = set(mammal_control_pre.keys()) - mammal
bird_control_pre2 = set(bird_control_pre.keys()) - bird
both_control = bird_control_pre2 & mammal_control_pre2
mammal_control = mammal_control_pre2 - both_control
bird_control = bird_control_pre2 - both_control

print "intr", len(mammal_control & mammal)
print "intr", len(bird_control & bird)
print "intr", len(mammal & bird)
print "intr", len(mammal_control & bird_control)

m1, m2 = evaluate("MAMMAL", mammal, mammal_host_freqs, bird_host_freqs)
c1, c2 = evaluate("MAMMAL CONTROL", mammal_control, mammal_host_freqs, bird_host_freqs)
mammal_pval = utils_stats.fisher_positive_pval((m1, m2), (c1, c2))
print mammal_pval

b1, b2 = evaluate("BIRD", bird, bird_host_freqs, mammal_host_freqs)
bc1, bc2 = evaluate("BIRD CONTROL", bird_control, bird_host_freqs, mammal_host_freqs)
bird_pval = utils_stats.fisher_positive_pval((b1, b2), (bc1, bc2))
print bird_pval

write_latex((m1, m2, c1, c2, mammal_pval), (b1, b2, bc1, bc2, bird_pval), outfile)
Exemplo n.º 2
0
    for line in f:
        [entrez, delNS1, 
         vRNA, replication] = [float(x) for x in line.strip().split('\t')]
        ID = str(int(entrez))
        if vRNA < float(0):
            replication_rnai[ID] = True
        all_rnai[ID] = True

bg = set(network_genes & set(all_rnai.keys()))
rep_set = set(replication_rnai.keys())
print 'background', len(bg)
print 'hubs in background', len(bg & hubs)
print 'replication bg', len(rep_set & bg)
print 'replication hubs', len(hubs & rep_set)

rep_nonHubs = set(set(rep_set & bg) - hubs)
bg_hubs = set(bg & hubs) - rep_set
bg_nonHubs = set(bg - hubs) - rep_nonHubs


print len(bg_hubs), len(bg_nonHubs)
print len(hubs & rep_set), len(rep_nonHubs)
p_pval = utils_stats.fisher_positive_pval([len(bg_hubs), len(bg_nonHubs)],
                                          [len(hubs & rep_set),
                                           len(rep_nonHubs)])
n_pval =  utils_stats.fisher_negative_pval([len(bg_hubs), len(bg_nonHubs)],
                                           [len(hubs & rep_set),
                                            len(rep_nonHubs)])
print 'positive assoc', p_pval
print 'neg assoc', n_pval
Exemplo n.º 3
0
with open('mammal_bird.different.' + str(cut) + '.notest', 'w') as f:       
    for elm in elm2freq:
        if not elm in test_elms:
            control_elms[elm] = True
            non_virus_elms_all += 1
            if not elm in ignore_elms:
                count,same = test_it(elm, elm2freq, f)
                non_virus_elms += count
                non_virus_elms_same += same            
                
diff_diff = virus_elm_count-virus_elms_same
diff_same = virus_elms_same + len(test_elms.keys())-virus_elm_count-len(utils_graph.intersectLists([test_elms,ignore_elms]))
diff_bg_diff = non_virus_elms-non_virus_elms_same
diff_bg_same = non_virus_elms_same + non_virus_elms_all-non_virus_elms-len(utils_graph.intersectLists([control_elms,ignore_elms]))
with open(str(cut) + '.different.results', 'w') as f:
    p = utils_stats.fisher_positive_pval([diff_diff,diff_same],
                                         [diff_bg_diff,diff_bg_same])
    f.write('pvalue\t' + str(p) + '\n')
    f.write('virus\t' + str(diff_diff)
            + '\t' + str(diff_same) + '\n')
    f.write('nvirus\t' + str(diff_bg_diff)
            + '\t' + str(diff_bg_same) + '\n')

test_elms_2 = {}
for c in common_all_elms:
    if not c in test_elms:
        test_elms_2[c] = True

virus_elms_same = 0
virus_elm_count = 0
non_virus_elms_same = 0
with open('mammal_bird.same.' + str(cut) + '.test', 'w') as f:
Exemplo n.º 4
0
            out.write(elm + '\tSAME\n')
    return same

test_elms = common_all_elms
virus_elms_same = 0
non_virus_elms_same = 0
with open('mammal_bird.same.' + str(cut) + '.test', 'w') as f:
    for elm in test_elms:
        virus_elms_same += test_it(elm, elm2freq, f)

non_virus_elms = 0
with open('mammal_bird.same.' + str(cut) + '.notest', 'w') as f:       
    for elm in elm2freq:
        if not elm in test_elms:
            non_virus_elms += 1
            non_virus_elms_same += test_it(elm, elm2freq, f)

virus_elm_count = len(test_elms.keys())
with open(str(cut) + '.same.results', 'w') as f:
    p = utils_stats.fisher_positive_pval([virus_elm_count-
                                          virus_elms_same,
                                          virus_elms_same],
                                         [non_virus_elms-
                                          non_virus_elms_same,
                                          non_virus_elms_same])
    f.write('pvalue\t' + str(p) + '\n')
    f.write('virus\t' + str(virus_elm_count-virus_elms_same)
            + '\t' + str(virus_elms_same) + '\n')
    f.write('nvirus\t' + str(non_virus_elms-non_virus_elms_same)
            + '\t' + str(non_virus_elms_same) + '\n')