Exemplo n.º 1
0
def run_meme(seqs, outdir, meme_settings):
    """Motif discovery on given sequences via MEME. Return MEME record"""
    infile = tempfile.NamedTemporaryFile()
    with open(infile.name, 'w') as f:
        f.write(seqs_to_fasta(seqs))

    # Call shell command
    cmd = [
        meme_settings['path'],
        infile.name,
        '-oc',
        outdir,
        '-dna',  # sequences use DNA alphabet
        '-revcomp',  # allow sites on both strands
        '-maxsize',
        10000000
    ]  # max dataset size on chars

    # extend meme command
    for opt in ['cons', 'nmotifs', 'mod', 'minw', 'maxw']:
        if opt in meme_settings:
            cmd.extend(['-' + opt, meme_settings[opt]])
    cmd = map(str, cmd)

    #print "Running MEME."
    print ' '.join(cmd)
    call(cmd)
    # Parse text output of the MEME
    with open(os.path.join(outdir, 'meme.txt')) as f:
        rec = MEME.read(f)
    return rec
Exemplo n.º 2
0
    def import_from_MEME(self,filename,n=1,mode='biotools'):
        """imports a motif from the output of MEME (meme.txt)

        if there are multiple motifs in the output, we will use motif n (the first is n=1, which is also the default)
        """
        import Bio.Motif.Parsers.MEME as MEME
        f = open(filename)
        MEME_object = MEME.read(f)
        motif_name = 'Motif ' + str(n)
        biopython_motif = MEME_object.get_motif_by_name(motif_name)
        if mode=='biopython': return biopython_motif
        if mode=='biotools':
            internal_n = len(biopython_motif)
            # this next line is instead of initializePositions
            biotools_motif = [CharDict(biopython_motif[i]) for i in range(internal_n)]
            self._L = biotools_motif
            self._n = internal_n    
        else: raise UserWarning('Not a valid mode.')
Exemplo n.º 3
0
def parse(handle, format):
    """Parses an output file of motif finding programs.

    Currently supported formats:
     - AlignAce:      AlignAce output file format
     - MEME:          MEME output file motif
     - TRANSFAC:      TRANSFAC database file format
     - pfm:           JASPAR-style position-frequency matrix
     - sites:         JASPAR-style sites file
     - jaspar-pfm:    JASPAR-style position-frequency matrix [DEPRECATED]
     - jaspar-sites:  JASPAR-style sites file [DEPRECATED]
    As files in the pfm and sites formats contain only a single motif,
    it is easier to use Bio.Motif.read() instead of Bio.Motif.parse()
    for those.

    For example:

    >>> from Bio import Motif
    >>> for motif in Motif.parse(open("Motif/alignace.out"),"AlignAce"):
    ...     print motif.consensus()
    TCTACGATTGAG
    CTGCACCTAGCTACGAGTGAG
    GTGCCCTAAGCATACTAGGCG
    GCCACTAGCAGAGCAGGGGGC
    CGACTCAGAGGTT
    CCACGCTAAGAGAAGTGCCGGAG
    GCACGTCCCTGAGCA
    GTCCATCGCAAAGCGTGGGGC
    GAGATCAGAGGGCCG
    TGGACGCGGGG
    GACCAGAGCCTCGCATGGGGG
    AGCGCGCGTG
    GCCGGTTGCTGTTCATTAGG
    ACCGACGGCAGCTAAAAGGG
    GACGCCGGGGAT
    CGACTCGCGCTTACAAGG
    >>> for motif in Motif.parse(open("Motif/alignace.out"),"alignace"):
    ...     print motif.consensus
    TCTACGATTGAG
    CTGCAGCTAGCTACGAGTGAG
    GTGCTCTAAGCATAGTAGGCG
    GCCACTAGCAGAGCAGGGGGC
    CGACTCAGAGGTT
    CCACGCTAAGAGAGGTGCCGGAG
    GCGCGTCGCTGAGCA
    GTCCATCGCAAAGCGTGGGGC
    GGGATCAGAGGGCCG
    TGGAGGCGGGG
    GACCAGAGCTTCGCATGGGGG
    GGCGTGCGTG
    GCTGGTTGCTGTTCATTAGG
    GCCGGCGGCAGCTAAAAGGG
    GAGGCCGGGGAT
    CGACTCGTGCTTAGAAGG
    """
    if format=="AlignAce":
        # Old Motif code
        from Bio.Motif.Parsers import AlignAce
        record = AlignAce.read(handle)
        return iter(record.motifs)
    elif format=="alignace":
        # Old Motif code
        from Bio.Motif import AlignAce
        record = AlignAce.read(handle)
        return record
    elif format=="MEME":
        from Bio.Motif.Parsers import MEME
        record = MEME.read(handle)
        return iter(record.motifs)
    elif format=="meme":
        from Bio.Motif import MEME
        record = MEME.read(handle)
        return record
    elif format=="TRANSFAC":
        from Bio.Motif import TRANSFAC
        record = TRANSFAC.read(handle)
        return record
    elif format in ('pfm', 'sites'):
        from Bio.Motif import Jaspar
        motif = Jaspar.read(handle, format)
    elif format=="jaspar-pfm":
        motif = OldMotif()._from_jaspar_pfm(handle)
        return iter([motif])
    elif format=="jaspar-sites":
        motif = OldMotif()._from_jaspar_sites(handle)
        return iter([motif])
    else:
        raise ValueError("Unknown format %s" % format)
    # Treat the single-motif formats
    motifs = [motif]
    return motifs
    fin = open(fasta_sequences, "r") 
    fout = open(fasta_with_weights, "w") 

    for line in fin:
        fout.write( line.replace (">WEIGHTS", weights_string) )

    fin.close()
    fout.close()
    
    #call command line meme
    subprocess.call(["meme", "-dna", "-revcomp", "seqweights.fasta", "-minw", "8", "-maxw", "14"])

    os.chdir(output_directory)
    f = open("meme.txt")

    record = MEME.read(f)
    for motif in record.motifs:
        for instance in motif.instances:
            if instance.sequence_name == myseq:
                print instance.sequence_name, instance.start, instance.length 
                found += 1
                for i in range(instance.length):
                    position = instance.start+i - 1 #subtract one to adjust for meme indexing from 1 rather than 0
                    promoter_array[position] += 1
            else: 
                print "not found"
                not_found += 1
                
    os.chdir(current_directory)
    
percent_found = found / (found+not_found)