def cooccur(united, options, Motifs_List, Motifs_DB, list_size, db_size): PairedMotifs_Cnts_List = countPairs(united, options, Motifs_List, getBasename(options.list_file)) PairedMotifs_Cnts_DB = countPairs(united, options, Motifs_DB, "DB") (inputs, Ps, Edges) = ([], [], []) num_MotifPairs = len(PairedMotifs_Cnts_List) print "%d Pairs to Cacl Co-occuring" % num_MotifPairs num_MotifPairs = num_MotifPairs / 100 + 1 for pair in PairedMotifs_Cnts_List.iterkeys(): (uidA, uidB) = map(int, pair.split(',')) try: inputs.append((PairedMotifs_Cnts_List[pair], list_size, PairedMotifs_Cnts_DB[pair], db_size)) except KeyError: pair2 = "%s,%s" % (uidB, uidA) inputs.append((PairedMotifs_Cnts_List[pair], list_size, PairedMotifs_Cnts_DB[pair2], db_size)) #sys.stderr.write("Progress:%d%%\t%s\t%s\r" % (i / num_MotifPairs, uidA, uidB)) if options.parallel: print "Cacling Enrichment Ratio" ERs = ppCacl(options.parallel, inputs, enrichmen_ratio) print "Cacling pValues" Ps = ppCacl(options.parallel, inputs, hypergeo_cdf) else: print "Cacling Enrichment Ratio" ERs = ssCacl(inputs, enrichmen_ratio) print "Cacling pValues" Ps = ssCacl(inputs, hypergeo_cdf) print "Adjusting pValues" adjPs = holm_adjustment(Ps) Edges = formatEdges(united, PairedMotifs_Cnts_List, inputs, ERs, Ps, adjPs) return Edges
def enrichment(united, options, Motifs_List, Motifs_DB, list_size, db_size): print "Counting Motifs" SeqNames_Cnts_List = countSeqNames(Motifs_List) SeqNames_Cnts_DB = countSeqNames(Motifs_DB) print "%s Motifs to Cacl Enrichment" % len(SeqNames_Cnts_List.keys()) inputs = [(SeqNames_Cnts_List[uid], list_size, SeqNames_Cnts_DB[uid], db_size) \ for uid in SeqNames_Cnts_List.iterkeys()] if options.parallel: ERs = ppCacl(options.parallel, inputs, enrichmen_ratio) Ps = ppCacl(options.parallel, inputs, hypergeo_cdf_enrich) else: ERs = ssCacl(inputs, enrichmen_ratio) Ps = ssCacl(inputs, hypergeo_cdf_enrich) adjPs = holm_adjustment(Ps) Nodes = formatNodes(united, SeqNames_Cnts_List, list_size, SeqNames_Cnts_DB, db_size, ERs, Ps, adjPs) return Nodes