Exemplo n.º 1
def tamo2tamo(file, outname):
    global probefile, PROBESET, fsafile

    motifs = MotifTools.load(file)
    if fsafile:
        fsaname = fsafile
        fsaname = find_fsa(file)

    print '# FSA ', fsaname
    fsaD = MotifMetrics.fasta2seqs(fsaname, 'want_dict')
    probes = fsaD.keys()
    if not probefile:
        PROBESET = MotifMetrics.ProbeSet('YEAST')
        #PROBESET= pick_genome(fsaname)
    #for key,seq in fsaD.items():
    #    PROBESET.probes[key] = seq

    print "# %d motifs" % len(motifs)
    for motif in motifs:
        #motif.pvalue, motif.church = 1,1  #Comment this!
        if motif.pvalue == 1:
            motif.pvalue = PROBESET.p_value(motif, probes, 'v')
        if motif.church == 1:
            motif.church = PROBESET.church(motif, probes, 'v')
        #if motif.E_site == None: motif.E_site = PROBESET.E_sitef(motif,probes,3,'v')
        #if motif.E_chi2 == None: motif.E_chi2 = PROBESET.E_chi2(motif,probes,None,'v')
        #if motif.E_seq  == None: motif.E_seq  = PROBESET.E_seq(motif,probes,'v')
        if motif.ROC_auc == None:
            motif.ROC_auc = PROBESET.ROC_AUC(motif, probes, 'v')
        #if motif.MNCP   == None: motif.MNCP   = PROBESET.MNCP(motif,probes,'v')
        if motif.frac == None:
            motif.frac = PROBESET.frac(motif, probes, 'v', 0.7)
        if motif.numbound == 0:
            matching = PROBESET.matching_ids(motif, [], factor=0.7)
            matchbound = [x for x in matching if x in probes]
            motif.numbound = len(probes)
            motif.nummotif = len(matching)
            motif.numboundmotif = len(matchbound)
        if 0 and motif.CRA == None:
                CRA, Cfrac = PROBESET.cons_ROC_AUC(motif,
                motif.CRA = CRA
                motif.Cfrac = Cfrac

    MotifTools.save_motifs(motifs, outname)
Exemplo n.º 2
def tamo2tamo(file, outname):
    global probefile, PROBESET, fsafile
    motifs  = MotifTools.load(file)
    if fsafile:
        fsaname = fsafile
        fsaname = find_fsa(file)

    print '# FSA ',fsaname
    fsaD    = MotifMetrics.fasta2seqs(fsaname,'want_dict')
    probes  = fsaD.keys()
    if not probefile:
        PROBESET = MotifMetrics.ProbeSet('YEAST')
        #PROBESET= pick_genome(fsaname)
    #for key,seq in fsaD.items():
    #    PROBESET.probes[key] = seq

    print "# %d motifs"%len(motifs)
    for motif in motifs:
        #motif.pvalue, motif.church = 1,1  #Comment this!
        if motif.pvalue == 1: motif.pvalue = PROBESET.p_value(motif,probes,'v')
        if motif.church == 1: motif.church = PROBESET.church(motif,probes,'v')
        #if motif.E_site == None: motif.E_site = PROBESET.E_sitef(motif,probes,3,'v')
        #if motif.E_chi2 == None: motif.E_chi2 = PROBESET.E_chi2(motif,probes,None,'v')
        #if motif.E_seq  == None: motif.E_seq  = PROBESET.E_seq(motif,probes,'v')
        if motif.ROC_auc== None: motif.ROC_auc= PROBESET.ROC_AUC(motif,probes,'v')
        #if motif.MNCP   == None: motif.MNCP   = PROBESET.MNCP(motif,probes,'v')
        if motif.frac   == None: motif.frac   = PROBESET.frac(motif,probes,'v',0.7)
        if motif.numbound == 0:
            matching            = PROBESET.matching_ids(motif,[],factor=0.7)
            matchbound          = [x for x in matching if x in probes]
            motif.numbound      = len(probes)
            motif.nummotif      = len(matching)
            motif.numboundmotif = len(matchbound)
        if 0 and motif.CRA    == None:
                CRA, Cfrac = PROBESET.cons_ROC_AUC(motif,probes,'v',tuple='YES')
                motif.CRA = CRA
                motif.Cfrac = Cfrac
            except: pass
Exemplo n.º 3
def ace2tamo(filename, tamoname):
    global probefile, PROBESET
    if   re.search('\.ace$',filename):
        mdobject = AlignAce.AlignAce(filename)
    elif re.search('\.meme$',filename):
        mdobject = Meme.Meme(filename)

    fsaname = find_fsa(mdobject.fastafile)
    fsaD    = MotifMetrics.fasta2seqs(fsaname,'want_dict')
    probes  = fsaD.keys()
    if not probefile:
        PROBESET = MotifMetrics.ProbeSet('HUMAN_250')
        #PROBESET= pick_genome(fsaname)
    for key,seq in fsaD.items():
        PROBESET.probes[key] = seq

    for motif in mdobject.motifs:
        if motif.pvalue == 1: motif.pvalue = PROBESET.p_value(motif,probes,'v')
        if motif.church == 1: motif.church = PROBESET.church(motif,probes,'v')
        if motif.E_site == None: motif.E_site = PROBESET.E_sitef(motif,probes,3,'v')
        #if motif.E_chi2 == None: motif.E_chi2 = PROBESET.E_chi2(motif,probes,None,'v')
        if motif.E_seq  == None: motif.E_seq  = PROBESET.E_seq(motif,probes,'v')
        if motif.ROC_auc== None: motif.ROC_auc= PROBESET.ROC_AUC(motif,probes,'v')
        if motif.MNCP   == None: motif.MNCP   = PROBESET.MNCP(motif,probes,'v')
        if re.search('\.meme$',filename):
            motif.MAP = -math.log(motif.evalue)/math.log(10)

    i = 0
    for motif in mdobject.motifs:
        motif.seednum = i ; i=i+1
        kmers = motif.bogus_kmers(100)
        motif.maxscore = -100
        scores = [motif.scan(kmer)[2][0] for kmer in kmers]
        print Arith.avestd(scores)

    if re.search('\.meme$',filename):
        mdobject.motifs.sort(lambda x,y: cmp(x.pvalue, y.pvalue))
        mdobject.motifs.sort(lambda x,y: cmp(x.church, y.church))

Exemplo n.º 4
def ace2tamo(filename, tamoname):
    global probefile, PROBESET
    if   re.search('\.ace$',filename):
        mdobject = AlignAce.AlignAce(filename)
    elif re.search('\.meme$',filename):
        mdobject = Meme.Meme(filename)

    fsaname = find_fsa(mdobject.fastafile)
    fsaD    = MotifMetrics.fasta2seqs(fsaname,'want_dict')
    probes  = fsaD.keys()
    if not probefile:
        PROBESET = MotifMetrics.ProbeSet('HUMAN_250')
        #PROBESET= pick_genome(fsaname)
    for key,seq in fsaD.items():
        PROBESET.probes[key] = seq

    for motif in mdobject.motifs:
        if motif.pvalue == 1: motif.pvalue = PROBESET.p_value(motif,probes,'v')
        if motif.church == 1: motif.church = PROBESET.church(motif,probes,'v')
        if motif.E_site == None: motif.E_site = PROBESET.E_sitef(motif,probes,3,'v')
        #if motif.E_chi2 == None: motif.E_chi2 = PROBESET.E_chi2(motif,probes,None,'v')
        if motif.E_seq  == None: motif.E_seq  = PROBESET.E_seq(motif,probes,'v')
        if motif.ROC_auc== None: motif.ROC_auc= PROBESET.ROC_AUC(motif,probes,'v')
        if motif.MNCP   == None: motif.MNCP   = PROBESET.MNCP(motif,probes,'v')
        if re.search('\.meme$',filename):
            motif.MAP = -math.log(motif.evalue)/math.log(10)

    i = 0
    for motif in mdobject.motifs:
        motif.seednum = i ; i=i+1
        kmers = motif.bogus_kmers(100)
        motif.maxscore = -100
        scores = [motif.scan(kmer)[2][0] for kmer in kmers]
        print Arith.avestd(scores)

    if re.search('\.meme$',filename):
        mdobject.motifs.sort(lambda x,y: cmp(x.pvalue, y.pvalue))
        mdobject.motifs.sort(lambda x,y: cmp(x.church, y.church))

Exemplo n.º 5
def motifs2tamo(motifs, outname):
    global probefile, PROBESET
    fsaname = find_fsa(outname)
    fsaD    = MotifMetrics.fasta2seqs(fsaname,'want_dict')
    probes  = fsaD.keys()
    if not probefile:
        PROBESET = MotifMetrics.ProbeSet('YEAST')
        #PROBESET= pick_genome(fsaname)
    #for key,seq in fsaD.items():
    #    PROBESET.probes[key] = seq

    print "# %d motifs"%len(motifs)
    for motif in motifs:
        if motif.pvalue == 1: motif.pvalue = PROBESET.p_value(motif,probes,'v')
        if motif.church == 1: motif.church = PROBESET.church(motif,probes,'v')
        if motif.E_site == None: motif.E_site = PROBESET.E_sitef(motif,probes,3,'v')
        #if motif.E_chi2 == None: motif.E_chi2 = PROBESET.E_chi2(motif,probes,None,'v')
        if motif.E_seq  == None: motif.E_seq  = PROBESET.E_seq(motif,probes,'v')
        if motif.ROC_auc== None: motif.ROC_auc= PROBESET.ROC_AUC(motif,probes,'v')
        if motif.MNCP   == None: motif.MNCP   = PROBESET.MNCP(motif,probes,'v')