Пример #1
0
    def parse_out(self, fo):
        motifs = []
        nucs = {"A":0,"C":1,"G":2,"T":3}
        pseudo = 0.0 # Should be 1/sqrt(# of seqs)
        aligns = {}
        for line in fo.readlines():
            if line.startswith("#"):
                pass
            elif len(line) > 1:
                vals = line.strip().split("\t")
                id, site = [x.strip().split(" ")[1].replace('"',"") for x in vals[8].split(";") if x]
                #if vals[6] == "+":
                if site.upper().find("N") == -1:
                    aligns.setdefault(id, []).append(site)
                #else:
                #    print site, rc(site)
                #    aligns.setdefault(id, []).append(rc(site))
                        
        for id, align in aligns.items():
            #print id, len(align)

            width = len(align[0])
            pfm =  [[0 for x in range(4)] for x in range(width)]
            for row in align:
                for i in range(len(row)):
                    pfm[i][nucs[row[i]]] += 1
            total = float(len(align))
            pwm = [[(x + pseudo/4)/total+(pseudo) for x in row] for row in pfm]
            m = Motif()
            m.align = align[:]
            m.pwm = pwm[:]
            m.pfm = pfm[:]
            m.id = id
            motifs.append(m)    
        return motifs
Пример #2
0
    def parse_out(self, fo):
        """
        Convert MotifSampler output to motifs

        Parameters
        ----------
        fo : file-like
            File object containing MotifSampler output.

        Returns
        -------
        motifs : list
            List of Motif instances.
        """
        motifs = []
        nucs = {"A": 0, "C": 1, "G": 2, "T": 3}
        pseudo = 0.0  # Should be 1/sqrt(# of seqs)
        aligns = {}
        for line in fo.readlines():
            if line.startswith("#"):
                pass
            elif len(line) > 1:
                vals = line.strip().split("\t")
                m_id, site = [
                    x.strip().split(" ")[1].replace('"', "")
                    for x in vals[8].split(";")
                    if x
                ]
                # if vals[6] == "+":
                if site.upper().find("N") == -1:
                    aligns.setdefault(m_id, []).append(site)
                # else:
                #    print site, rc(site)
                #    aligns.setdefault(id, []).append(rc(site))

        for m_id, align in aligns.items():
            # print id, len(align)

            width = len(align[0])
            pfm = [[0 for x in range(4)] for x in range(width)]
            for row in align:
                for i in range(len(row)):
                    pfm[i][nucs[row[i]]] += 1
            total = float(len(align))
            pwm = [[(x + pseudo / 4) / total + (pseudo) for x in row] for row in pfm]
            m = Motif()
            m.align = align[:]
            m.pwm = pwm[:]
            m.pfm = pfm[:]
            m.id = m_id
            motifs.append(m)
        return motifs