) bird_host_freqs = get_freqs( os.path.join(dir, "Gallus_gallus.init.elm_aa_freq"), os.path.join(dir, "elmdict_Gallus_gallus.init") ) mammal = set(mammal_pre.keys()) - set(bird_pre.keys()) bird = set(bird_pre.keys()) - set(mammal_pre.keys()) mammal_control_pre2 = set(mammal_control_pre.keys()) - mammal bird_control_pre2 = set(bird_control_pre.keys()) - bird both_control = bird_control_pre2 & mammal_control_pre2 mammal_control = mammal_control_pre2 - both_control bird_control = bird_control_pre2 - both_control print "intr", len(mammal_control & mammal) print "intr", len(bird_control & bird) print "intr", len(mammal & bird) print "intr", len(mammal_control & bird_control) m1, m2 = evaluate("MAMMAL", mammal, mammal_host_freqs, bird_host_freqs) c1, c2 = evaluate("MAMMAL CONTROL", mammal_control, mammal_host_freqs, bird_host_freqs) mammal_pval = utils_stats.fisher_positive_pval((m1, m2), (c1, c2)) print mammal_pval b1, b2 = evaluate("BIRD", bird, bird_host_freqs, mammal_host_freqs) bc1, bc2 = evaluate("BIRD CONTROL", bird_control, bird_host_freqs, mammal_host_freqs) bird_pval = utils_stats.fisher_positive_pval((b1, b2), (bc1, bc2)) print bird_pval write_latex((m1, m2, c1, c2, mammal_pval), (b1, b2, bc1, bc2, bird_pval), outfile)
for line in f: [entrez, delNS1, vRNA, replication] = [float(x) for x in line.strip().split('\t')] ID = str(int(entrez)) if vRNA < float(0): replication_rnai[ID] = True all_rnai[ID] = True bg = set(network_genes & set(all_rnai.keys())) rep_set = set(replication_rnai.keys()) print 'background', len(bg) print 'hubs in background', len(bg & hubs) print 'replication bg', len(rep_set & bg) print 'replication hubs', len(hubs & rep_set) rep_nonHubs = set(set(rep_set & bg) - hubs) bg_hubs = set(bg & hubs) - rep_set bg_nonHubs = set(bg - hubs) - rep_nonHubs print len(bg_hubs), len(bg_nonHubs) print len(hubs & rep_set), len(rep_nonHubs) p_pval = utils_stats.fisher_positive_pval([len(bg_hubs), len(bg_nonHubs)], [len(hubs & rep_set), len(rep_nonHubs)]) n_pval = utils_stats.fisher_negative_pval([len(bg_hubs), len(bg_nonHubs)], [len(hubs & rep_set), len(rep_nonHubs)]) print 'positive assoc', p_pval print 'neg assoc', n_pval
with open('mammal_bird.different.' + str(cut) + '.notest', 'w') as f: for elm in elm2freq: if not elm in test_elms: control_elms[elm] = True non_virus_elms_all += 1 if not elm in ignore_elms: count,same = test_it(elm, elm2freq, f) non_virus_elms += count non_virus_elms_same += same diff_diff = virus_elm_count-virus_elms_same diff_same = virus_elms_same + len(test_elms.keys())-virus_elm_count-len(utils_graph.intersectLists([test_elms,ignore_elms])) diff_bg_diff = non_virus_elms-non_virus_elms_same diff_bg_same = non_virus_elms_same + non_virus_elms_all-non_virus_elms-len(utils_graph.intersectLists([control_elms,ignore_elms])) with open(str(cut) + '.different.results', 'w') as f: p = utils_stats.fisher_positive_pval([diff_diff,diff_same], [diff_bg_diff,diff_bg_same]) f.write('pvalue\t' + str(p) + '\n') f.write('virus\t' + str(diff_diff) + '\t' + str(diff_same) + '\n') f.write('nvirus\t' + str(diff_bg_diff) + '\t' + str(diff_bg_same) + '\n') test_elms_2 = {} for c in common_all_elms: if not c in test_elms: test_elms_2[c] = True virus_elms_same = 0 virus_elm_count = 0 non_virus_elms_same = 0 with open('mammal_bird.same.' + str(cut) + '.test', 'w') as f:
out.write(elm + '\tSAME\n') return same test_elms = common_all_elms virus_elms_same = 0 non_virus_elms_same = 0 with open('mammal_bird.same.' + str(cut) + '.test', 'w') as f: for elm in test_elms: virus_elms_same += test_it(elm, elm2freq, f) non_virus_elms = 0 with open('mammal_bird.same.' + str(cut) + '.notest', 'w') as f: for elm in elm2freq: if not elm in test_elms: non_virus_elms += 1 non_virus_elms_same += test_it(elm, elm2freq, f) virus_elm_count = len(test_elms.keys()) with open(str(cut) + '.same.results', 'w') as f: p = utils_stats.fisher_positive_pval([virus_elm_count- virus_elms_same, virus_elms_same], [non_virus_elms- non_virus_elms_same, non_virus_elms_same]) f.write('pvalue\t' + str(p) + '\n') f.write('virus\t' + str(virus_elm_count-virus_elms_same) + '\t' + str(virus_elms_same) + '\n') f.write('nvirus\t' + str(non_virus_elms-non_virus_elms_same) + '\t' + str(non_virus_elms_same) + '\n')