Exemplo n.º 1
0
def cooccur(united, options, Motifs_List, Motifs_DB, list_size, db_size):
    PairedMotifs_Cnts_List = countPairs(united, options, Motifs_List, getBasename(options.list_file))
    PairedMotifs_Cnts_DB = countPairs(united, options, Motifs_DB, "DB")

    (inputs, Ps, Edges) = ([], [], [])
    num_MotifPairs = len(PairedMotifs_Cnts_List)
    print "%d Pairs to Cacl Co-occuring" % num_MotifPairs
    num_MotifPairs = num_MotifPairs / 100 + 1
    for pair in PairedMotifs_Cnts_List.iterkeys():
        (uidA, uidB) = map(int, pair.split(','))
        try:
            inputs.append((PairedMotifs_Cnts_List[pair], list_size, PairedMotifs_Cnts_DB[pair], db_size))
        except KeyError:
            pair2 = "%s,%s" % (uidB, uidA)
            inputs.append((PairedMotifs_Cnts_List[pair], list_size, PairedMotifs_Cnts_DB[pair2], db_size))
        #sys.stderr.write("Progress:%d%%\t%s\t%s\r" % (i / num_MotifPairs, uidA, uidB))
    if options.parallel:
        print "Cacling Enrichment Ratio"
        ERs = ppCacl(options.parallel, inputs, enrichmen_ratio)
        print "Cacling pValues"
        Ps = ppCacl(options.parallel, inputs, hypergeo_cdf)
    else:
        print "Cacling Enrichment Ratio"
        ERs = ssCacl(inputs, enrichmen_ratio)
        print "Cacling pValues"
        Ps = ssCacl(inputs, hypergeo_cdf)
    print "Adjusting pValues"
    adjPs = holm_adjustment(Ps)
    Edges = formatEdges(united, PairedMotifs_Cnts_List, inputs, ERs, Ps, adjPs)
    return Edges
Exemplo n.º 2
0
def enrichment(united, options, Motifs_List, Motifs_DB, list_size, db_size):
    print "Counting Motifs"
    SeqNames_Cnts_List = countSeqNames(Motifs_List)
    SeqNames_Cnts_DB = countSeqNames(Motifs_DB)

    print "%s Motifs to Cacl Enrichment" % len(SeqNames_Cnts_List.keys())
    inputs = [(SeqNames_Cnts_List[uid], list_size, SeqNames_Cnts_DB[uid], db_size) \
                for uid in SeqNames_Cnts_List.iterkeys()]
    if options.parallel:
        ERs = ppCacl(options.parallel, inputs, enrichmen_ratio)
        Ps = ppCacl(options.parallel, inputs, hypergeo_cdf_enrich)
    else:
        ERs = ssCacl(inputs, enrichmen_ratio)
        Ps = ssCacl(inputs, hypergeo_cdf_enrich)
    adjPs = holm_adjustment(Ps)
    Nodes = formatNodes(united, SeqNames_Cnts_List, list_size,
                        SeqNames_Cnts_DB, db_size, ERs, Ps, adjPs)
    return Nodes