Пример #1
def main():
    seqsD = Fasta.load(sys.argv[1])
    seqs = seqsD.values()
    for w in range(1, 7):
        allnmers = permute(w)
        nmersT = MotifTools.top_nmers(w, seqs, 'with counts', 'purge Ns')
        nmersD = {}
        total = 0
        for nmer in allnmers:
            nmersD[nmer] = 1  #Pseudo count
            total = total + 1
        for nmer, count in nmersT[:]:
                rc = MotifTools.revcomplement(nmer)
                nmersD[nmer] = nmersD[nmer] + count
                nmersD[rc] = nmersD[rc] + count
                total = total + 2 * count
            except KeyError:
        _t = nmersD.keys()
        print "# freq in %s (total %d with pseudocounts)" % (sys.argv[1],
        for nmer in _t:
            print "%-7s %20.17f" % (nmer, float(nmersD[nmer]) / total)
Пример #2
def main():
    seqsD = Fasta.load(sys.argv[1])
    seqs  = seqsD.values()
    for w in range(1,7):
        allnmers = permute(w)
        nmersT = MotifTools.top_nmers(w,seqs,'with counts','purge Ns')
        nmersD = {}
        total = 0
        for nmer in allnmers:
            nmersD[nmer] = 1 #Pseudo count
            total = total + 1
        for nmer,count in nmersT[:]:
                rc = MotifTools.revcomplement(nmer)
                nmersD[nmer] = nmersD[nmer] + count
                nmersD[rc]   = nmersD[rc]   + count
                total = total + 2*count
            except KeyError:
        _t = nmersD.keys()
        print "# freq in %s (total %d with pseudocounts)"%(sys.argv[1],total)
        for nmer in _t:
            print "%-7s %20.17f"%(nmer,float(nmersD[nmer]) / total)
Пример #3
def main(fastafile, outDirectory):  # !! 1/2/09 AD added 'fastafile' var and changed 'if __name__' as way to call this from script.
    seqsD = Fasta.load(fastafile)
    seqs  = seqsD.values()
    output = []
    for w in range(1,7):
        allnmers = permute(w)
        nmersT = MotifTools.top_nmers(w,seqs,'with counts','purge Ns')
        nmersD = {}
        total = 0
        for nmer in allnmers:
            nmersD[nmer] = 1 #Pseudo count
            total = total + 1
        for nmer,count in nmersT[:]:
                rc = MotifTools.revcomplement(nmer)
                nmersD[nmer] = nmersD[nmer] + count
                nmersD[rc]   = nmersD[rc]   + count
                total = total + 2*count
            except KeyError:
        _t = nmersD.keys()
        output.append("# freq in %s (total %d with pseudocounts)\n"%(fastafile.split('/')[-1],total))  # AD 02-27-09 added a '\n' to make file look right
        for nmer in _t:
            output.append( "%-7s %20.17f\n"%(nmer,float(nmersD[nmer]) / total))  # AD 02-27-09 added a '\n' to make file look right
        # open output file and write out results
        outFile = '%s/%s.freq' % (outDirectory, fastafile.split('/')[-1])
        outFile = open(outFile, 'w')
        for index in output:
Пример #4
 def study_seqs(self,seqs):
     for depth in range(1,6):
         nmersT = MotifTools.top_nmers(depth, seqs, "TUPLES")
         total = 0
         for nmer,count in nmersT:
             total = total + count
             rc = MotifTools.revcomplement(nmer)
         for nmer,count in nmersT:
             f   = math.log(float(count)/total)/math.log(2)
             f_2 = math.log(0.5 * float(count)/total)/math.log(2)
             rc = MotifTools.revcomplement(nmer)
             if rc != nmer:
                 self.D[nmer] = f_2
                 self.D[rc]   = f_2
                 self.D[nmer] = f
     for depth in range(0):
         total = 0
         for k in self.D.keys():
             if len(k) == depth:
                 total = total + pow(2,self.D[k])
                 print k, pow(2,self.D[k])
         print depth,total
     self.highestorder = 5
Пример #5
 def freq_from_seqs(self,seqs):
    self.highestorder = 6
    for w in range(1,7):
        allnmers = permute(w)
        nmersT = MotifTools.top_nmers(w,seqs,'with counts','purge Ns')
        self.nmers_by_size[w] = allnmers[:]
        nmersD = {}
        total = 0.0
        for nmer in allnmers: #Pseudo count
            nmersD[nmer] = 1 
            total = total + 1
        for nmer,count in nmersT:
                rc = MotifTools.revcomplement(nmer)
                nmersD[nmer] = nmersD[nmer] + count
                nmersD[rc]   = nmersD[rc]   + count
                total = total + 2*count
            except KeyError:
        for nmer in nmersD.keys():
            rc = MotifTools.revcomplement(nmer)
            f  = nmersD[nmer]/total
            self.F[nmer] = f
            self.F[rc]   = f
Пример #6
 def freq_from_seqs_old(self,seqs):
     self.highestorder = 4
     for depth in range(1,6):
         nmersT = MotifTools.top_nmers(depth, seqs, "TUPLES")
         self.nmers_by_size[depth] = map(lambda x:x[0],nmersT)
         total = 0
         for nmer,count in nmersT:
             total = total + count
         for nmer,count in nmersT:
             rc = MotifTools.revcomplement(nmer)
             if nmer == rc:                       #correct top_nmers 
                 f   = float(count)/total         #palindrome count
                 f   = float(count)/total/2
             self.F[nmer] = f
             self.F[rc]   = f
     for depth in range(0):                       #For debugging
         total = 0
         for k in self.F.keys():
             if len(k) == depth:
                 total = total + self.F[k]
                 print k, self.F[k]
         print depth,total
Пример #7
    def all_Wmers(self,N,seq):
        forw = []
        rev  = []
        seqrc = MotifTools.revcomplement(seq)
        Mlh = theMarkovBackground.highestorder
        Mlb = theMarkovBackground.logbackground
        MCP = theMarkovBackground.CP
        Fbg = Mlb(seq)
        Rbg = Mlb(seqrc)
        nmask = map(lambda x:1-x, self.mask)

        ?? QUESTION: Is it sensible to compute the background probabilities
        this way?
        1) BG of complementary strand is taken as equal to primary strand.
        2) Letters inside the motif window are not used for conditional probabilities.
           As a result, the calculation essentially breaks down to the log probability the
           background emits the sequence to the left of the window plus the log probability
           the background emits the sequence to the right.
        3) I\'ve worked out an efficient way to compute this by
           a) Compute the background probability for the entire probe/sequence
           b) (Quick) Compute logQdiff below
           c) Subtract

        for i in range(len(seq)-N+1):
            subseq = seq[i:i+N]

            '''Build Wmer information'''
            #Wtmp        = Wmer(subseq)
            left        = seq[0:i]
            right       = seq[i+N:]
            #Wtmp.lflank = left
            #Wtmp.rflank = right
            #if i==0: Wtmp.src    = seq
            #Wtmp.srcQ   = Fbg
            #Wtmp.i      = i

            '''This is the fast way'''
            logQdiff = Mlb(left[-Mlh:] + subseq + right[0:Mlh]) - Mlb(left[-Mlh:]) - Mlb(right[0:Mlh])
            logQtot = Fbg - logQdiff

            '''Add a bit back for intervening bases in the "gap" '''
            gapbg = 0
            for p in range(N):
                gapbg = gapbg + MCP[subseq[p]] * nmask[p]
            logQtot = logQtot + gapbg

            '''Build Wmer-reverse complement information'''
            #Wtmprc = Wmer(Wtmp.rc)
            #Wtmprc.lflank = seqrc[0:-(i+N)]  #Check this in case it is ever necessary
            #if i!=0:
            #    Wtmprc.rflank = seqrc[-i:]   #Necessary [11-12-02]
            #    Wtmprc.rflank = ''
            #Wtmprc.logQtot = Wtmp.logQtot
            #Wtmprc.srcQ    = Wtmp.srcQ
            #Wtmprc.i       = i
        W = []
        #seq.c_wmerbgs = MDsupport.list2double(map(lambda x: x.logQtot, W))
Пример #8
 def has_wmer(self,wmer):
     rc = MotifTools.revcomplement(wmer)
     if (wmer in self.wmers) or (rc in self.wmers):
Пример #9
def probOvlp(A,B,thresh=0.7,verbose=None):
    if A.width >= B.width:
        Wide, Narrow = A, B
        Wide, Narrow = B, A

    RC = MotifTools.revcomplement
    if 1:
        newWide  = Wide[-1,Wide.width+1]
        if Wide.__dict__.has_key('bestWide'):
            bestWide = Wide.bestWide
            bestWideD = {}
            for x in newWide.bestseqs(thresh*newWide.maxscore):
                bestWideD[x] = 1
            for x in bestWideD.keys():
                bestWideD[RC(x)] = 1
            Wide.bestWide = bestWideD.keys()
            bestWide = Wide.bestWide
        Wide = newWide
        if Narrow.__dict__.has_key('bestNarrow'):
            bestNarrow = Narrow.bestNarrow
            bestNarrowD = {}
            for x in Narrow.bestseqs(thresh*Narrow.maxscore):
                bestNarrowD[x] = 1
            for x in bestNarrowD.keys():
                bestNarrowD[RC(x)] = 1
            bestNarrow = bestNarrowD.keys()
            Narrow.bestNarrow = bestNarrow
    #bestWide   = [x[1] for x in Wide.bestseqs  (thresh*Wide.maxscore)  ]
    #bestNarrow = [x[1] for x in Narrow.bestseqs(thresh*Narrow.maxscore)]

    countNarrow = len(bestNarrow)
    countWide   = len(bestWide)

    numtotal    = math.pow(4,Wide.width)
    fudgefactor = math.pow(4,Wide.width - Narrow.width)

    bestWideTups = [(x,MotifTools.revcomplement(x)) for x in bestWide]

    countBoth = 0
    for i in range(len(bestNarrow)):
        m_narrow = bestNarrow[i]
        delj = []

        for j in range(len(bestWideTups)):
            if (bestWideTups[j][0].find(m_narrow) >= 0) or (bestWideTups[j][1].find(m_narrow) >= 0):
                countBoth += 1

        delj.reverse()  #Chew in from the back
        for j in delj:

    if verbose: print '%10d %10d %10d %10d | %10d  %5d '%(
        countWide, numtotal, countNarrow *fudgefactor , countBoth , countNarrow, Wide.width - Narrow.width),
    p = Arith.hypgeomsummore(countWide,                 #Num Interesting
                             numtotal,                  #All k-mers
                             countNarrow * fudgefactor, #Number picked
                             countBoth                ) #Number found
    return p
