Exemplo n.º 1
0
def parse(handle, format, strict=True):
    """Parse an output file from a motif finding program.

    Currently supported formats (case is ignored):
     - AlignAce:         AlignAce output file format
     - ClusterBuster:    Cluster Buster position frequency matrix format
     - XMS:              XMS matrix format
     - MEME:             MEME output file motif
     - MINIMAL:          MINIMAL MEME output file motif
     - MAST:             MAST output file motif
     - TRANSFAC:         TRANSFAC database file format
     - pfm-four-columns: Generic position-frequency matrix format with four columns. (cisbp, homer, hocomoco, neph, tiffin)
     - pfm-four-rows:    Generic position-frequency matrix format with four row. (scertf, yetfasco, hdpi, idmmpmm, flyfactor survey)
     - pfm:              JASPAR-style position-frequency matrix
     - jaspar:           JASPAR-style multiple PFM format
     - sites:            JASPAR-style sites file

    As files in the pfm and sites formats contain only a single motif,
    it is easier to use Bio.motifs.read() instead of Bio.motifs.parse()
    for those.

    For example:

    >>> from Bio import motifs
    >>> with open("motifs/alignace.out") as handle:
    ...     for m in motifs.parse(handle, "AlignAce"):
    ...         print(m.consensus)
    ...
    TCTACGATTGAG
    CTGCACCTAGCTACGAGTGAG
    GTGCCCTAAGCATACTAGGCG
    GCCACTAGCAGAGCAGGGGGC
    CGACTCAGAGGTT
    CCACGCTAAGAGAAGTGCCGGAG
    GCACGTCCCTGAGCA
    GTCCATCGCAAAGCGTGGGGC
    GAGATCAGAGGGCCG
    TGGACGCGGGG
    GACCAGAGCCTCGCATGGGGG
    AGCGCGCGTG
    GCCGGTTGCTGTTCATTAGG
    ACCGACGGCAGCTAAAAGGG
    GACGCCGGGGAT
    CGACTCGCGCTTACAAGG

    If strict is True (default), the parser will raise a ValueError if the
    file contents does not strictly comply with the specified file format.
    """
    format = format.lower()
    if format == "alignace":
        from Bio.motifs import alignace

        return alignace.read(handle)
    elif format == "meme":
        from Bio.motifs import meme

        return meme.read(handle)
    elif format == "minimal":
        from Bio.motifs import minimal

        return minimal.read(handle)
    elif format == "clusterbuster":
        from Bio.motifs import clusterbuster

        return clusterbuster.read(handle)
    elif format in ("pfm-four-columns", "pfm-four-rows"):
        from Bio.motifs import pfm

        return pfm.read(handle, format)
    elif format == "xms":
        from Bio.motifs import xms

        return xms.read(handle)
    elif format == "mast":
        from Bio.motifs import mast

        return mast.read(handle)
    elif format == "transfac":
        from Bio.motifs import transfac

        return transfac.read(handle, strict)
    elif format in ("pfm", "sites", "jaspar"):
        from Bio.motifs import jaspar

        return jaspar.read(handle, format)
    else:
        raise ValueError("Unknown format %s" % format)
Exemplo n.º 2
0
def parse(handle, format, strict=True):
    """Parse an output file from a motif finding program.

    Currently supported formats (case is ignored):
     - AlignAce:         AlignAce output file format
     - ClusterBuster:    Cluster Buster position frequency matrix format
     - XMS:              XMS matrix format
     - MEME:             MEME output file motif
     - MINIMAL:          MINIMAL MEME output file motif
     - MAST:             MAST output file motif
     - TRANSFAC:         TRANSFAC database file format
     - pfm-four-columns: Generic position-frequency matrix format with four columns. (cisbp, homer, hocomoco, neph, tiffin)
     - pfm-four-rows:    Generic position-frequency matrix format with four row. (scertf, yetfasco, hdpi, idmmpmm, flyfactor survey)
     - pfm:              JASPAR-style position-frequency matrix
     - jaspar:           JASPAR-style multiple PFM format
     - sites:            JASPAR-style sites file

    As files in the pfm and sites formats contain only a single motif,
    it is easier to use Bio.motifs.read() instead of Bio.motifs.parse()
    for those.

    For example:

    >>> from Bio import motifs
    >>> with open("motifs/alignace.out") as handle:
    ...     for m in motifs.parse(handle, "AlignAce"):
    ...         print(m.consensus)
    ...
    TCTACGATTGAG
    CTGCACCTAGCTACGAGTGAG
    GTGCCCTAAGCATACTAGGCG
    GCCACTAGCAGAGCAGGGGGC
    CGACTCAGAGGTT
    CCACGCTAAGAGAAGTGCCGGAG
    GCACGTCCCTGAGCA
    GTCCATCGCAAAGCGTGGGGC
    GAGATCAGAGGGCCG
    TGGACGCGGGG
    GACCAGAGCCTCGCATGGGGG
    AGCGCGCGTG
    GCCGGTTGCTGTTCATTAGG
    ACCGACGGCAGCTAAAAGGG
    GACGCCGGGGAT
    CGACTCGCGCTTACAAGG

    If strict is True (default), the parser will raise a ValueError if the
    file contents does not strictly comply with the specified file format.
    """
    format = format.lower()
    if format == "alignace":
        from Bio.motifs import alignace
        return alignace.read(handle)
    elif format == "meme":
        from Bio.motifs import meme
        return meme.read(handle)
    elif format == "minimal":
        from Bio.motifs import minimal
        return minimal.read(handle)
    elif format == "clusterbuster":
        from Bio.motifs import clusterbuster
        return clusterbuster.read(handle)
    elif format in ('pfm-four-columns', 'pfm-four-rows'):
        from Bio.motifs import pfm
        return pfm.read(handle, format)
    elif format == "xms":
        from Bio.motifs import xms
        return xms.read(handle)
    elif format == "mast":
        from Bio.motifs import mast
        return mast.read(handle)
    elif format == "transfac":
        from Bio.motifs import transfac
        return transfac.read(handle, strict)
    elif format in ('pfm', 'sites', 'jaspar'):
        from Bio.motifs import jaspar
        return jaspar.read(handle, format)
    else:
        raise ValueError("Unknown format %s" % format)