Exemplo n.º 1
0
def _handle_convert(in_handle, in_format, out_handle, out_format, alphabet=None):
    """SeqIO conversion function (PRIVATE)."""
    try:
        f = _converter[(in_format, out_format)]
    except KeyError:
        f = None
    if f:
        return f(in_handle, out_handle, alphabet)
    else:
        records = SeqIO.parse(in_handle, in_format, alphabet)
        return SeqIO.write(records, out_handle, out_format)
Exemplo n.º 2
0
    def __init__(self,
                 dir_path=None,
                 version=None,
                 scop=None,
                 astral_file=None,
                 db_handle=None):
        """
        Initialise the astral database.

        You must provide either a directory of SCOP files:

        dir_path - string, the path to location of the scopseq-x.xx directory
                   (not the directory itself), and
        version   -a version number.

        or, a FASTA file:

        astral_file - string, a path to a fasta file (which will be loaded in memory)

        or, a MYSQL database:

        db_handle - a database handle for a MYSQL database containing a table
                    'astral' with the astral data in it.  This can be created
                    using writeToSQL.
        """

        if astral_file is None and dir_path is None and db_handle is None:
            raise RuntimeError(
                "Need either file handle, or (dir_path + " +
                "version) or database handle to construct Astral")
        if not scop:
            raise RuntimeError("Must provide a Scop instance to construct")

        self.scop = scop
        self.db_handle = db_handle

        if not astral_file and not db_handle:
            if dir_path is None or version is None:
                raise RuntimeError("must provide dir_path and version")

            self.version = version
            self.path = os.path.join(dir_path, "scopseq-%s" % version)
            astral_file = "astral-scopdom-seqres-all-%s.fa" % self.version
            astral_file = os.path.join(self.path, astral_file)

        if astral_file:
            #Build a dictionary of SeqRecord objects in the FASTA file, IN MEMORY
            self.fasta_dict = SeqIO.to_dict(SeqIO.parse(astral_file, "fasta"))

        self.astral_file = astral_file
        self.EvDatasets = {}
        self.EvDatahash = {}
        self.IdDatasets = {}
        self.IdDatahash = {}
Exemplo n.º 3
0
def _SeqIO_to_alignment_iterator(handle,
                                 format,
                                 alphabet=None,
                                 seq_count=None):
    """Uses Bio.SeqIO to create an MultipleSeqAlignment iterator (PRIVATE).

    Arguments:
     - handle    - handle to the file.
     - format    - string describing the file format.
     - alphabet  - optional Alphabet object, useful when the sequence type
                   cannot be automatically inferred from the file itself
                   (e.g. fasta, phylip, clustal)
     - seq_count - Optional integer, number of sequences expected in each
                   alignment.  Recommended for fasta format files.

    If count is omitted (default) then all the sequences in the file are
    combined into a single MultipleSeqAlignment.
    """
    from SAP.Bio import SeqIO
    assert format in SeqIO._FormatToIterator

    if seq_count:
        #Use the count to split the records into batches.
        seq_record_iterator = SeqIO.parse(handle, format, alphabet)

        records = []
        for record in seq_record_iterator:
            records.append(record)
            if len(records) == seq_count:
                yield MultipleSeqAlignment(records, alphabet)
                records = []
        if len(records) > 0:
            raise ValueError("Check seq_count argument, not enough sequences?")
    else:
        #Must assume that there is a single alignment using all
        #the SeqRecord objects:
        records = list(SeqIO.parse(handle, format, alphabet))
        if records:
            yield MultipleSeqAlignment(records, alphabet)
    raise StopIteration
Exemplo n.º 4
0
def align(cmdline,
          pair,
          kbyte=None,
          force_type=None,
          dry_run=False,
          quiet=False,
          debug=False):
    """
    Returns a filehandle
    """
    if not pair or len(pair) != 2:
        raise ValueError("Expected pair of filename, not %s" % repr(pair))

    output_file = tempfile.NamedTemporaryFile(mode='r')
    input_files = tempfile.NamedTemporaryFile(
        mode="w"), tempfile.NamedTemporaryFile(mode="w")

    if dry_run:
        print(
            _build_align_cmdline(cmdline, pair, output_file.name, kbyte,
                                 force_type, quiet))
        return

    for filename, input_file in zip(pair, input_files):
        # Pipe the file through Biopython's Fasta parser/writer
        # to make sure it conforms to the Fasta standard (in particular,
        # Wise2 may choke on long lines in the Fasta file)
        records = SeqIO.parse(open(filename), 'fasta')
        SeqIO.write(records, input_file, 'fasta')
        input_file.flush()

    input_file_names = [input_file.name for input_file in input_files]

    cmdline_str = _build_align_cmdline(cmdline, input_file_names,
                                       output_file.name, kbyte, force_type,
                                       quiet)

    if debug:
        sys.stderr.write("%s\n" % cmdline_str)

    status = os.system(cmdline_str) >> 8

    if status > 1:
        if kbyte != 0:  # possible memory problem; could be None
            sys.stderr.write("INFO trying again with the linear model\n")
            return align(cmdline, pair, 0, force_type, dry_run, quiet, debug)
        else:
            raise OSError("%s returned %s" % (" ".join(cmdline), status))

    return output_file
Exemplo n.º 5
0
def _SeqIO_to_alignment_iterator(handle, format, alphabet=None, seq_count=None):
    """Uses Bio.SeqIO to create an MultipleSeqAlignment iterator (PRIVATE).

    Arguments:
     - handle    - handle to the file.
     - format    - string describing the file format.
     - alphabet  - optional Alphabet object, useful when the sequence type
                   cannot be automatically inferred from the file itself
                   (e.g. fasta, phylip, clustal)
     - seq_count - Optional integer, number of sequences expected in each
                   alignment.  Recommended for fasta format files.

    If count is omitted (default) then all the sequences in the file are
    combined into a single MultipleSeqAlignment.
    """
    from SAP.Bio import SeqIO
    assert format in SeqIO._FormatToIterator

    if seq_count:
        #Use the count to split the records into batches.
        seq_record_iterator = SeqIO.parse(handle, format, alphabet)

        records = []
        for record in seq_record_iterator:
            records.append(record)
            if len(records) == seq_count:
                yield MultipleSeqAlignment(records, alphabet)
                records = []
        if len(records) > 0:
            raise ValueError("Check seq_count argument, not enough sequences?")
    else:
        #Must assume that there is a single alignment using all
        #the SeqRecord objects:
        records = list(SeqIO.parse(handle, format, alphabet))
        if records:
            yield MultipleSeqAlignment(records, alphabet)
    raise StopIteration
Exemplo n.º 6
0
def align(cmdline, pair, kbyte=None, force_type=None, dry_run=False, quiet=False, debug=False):
    """
    Returns a filehandle
    """
    if not pair or len(pair) != 2:
        raise ValueError("Expected pair of filename, not %s" % repr(pair))

    output_file = tempfile.NamedTemporaryFile(mode='r')
    input_files = tempfile.NamedTemporaryFile(mode="w"), tempfile.NamedTemporaryFile(mode="w")

    if dry_run:
        print(_build_align_cmdline(cmdline,
                                   pair,
                                   output_file.name,
                                   kbyte,
                                   force_type,
                                   quiet))
        return

    for filename, input_file in zip(pair, input_files):
        # Pipe the file through Biopython's Fasta parser/writer
        # to make sure it conforms to the Fasta standard (in particular,
        # Wise2 may choke on long lines in the Fasta file)
        records = SeqIO.parse(open(filename), 'fasta')
        SeqIO.write(records, input_file, 'fasta')
        input_file.flush()

    input_file_names = [input_file.name for input_file in input_files]

    cmdline_str = _build_align_cmdline(cmdline,
                                       input_file_names,
                                       output_file.name,
                                       kbyte,
                                       force_type,
                                       quiet)

    if debug:
        sys.stderr.write("%s\n" % cmdline_str)

    status = os.system(cmdline_str) >> 8

    if status > 1:
        if kbyte != 0: # possible memory problem; could be None
            sys.stderr.write("INFO trying again with the linear model\n")
            return align(cmdline, pair, 0, force_type, dry_run, quiet, debug)
        else:
            raise OSError("%s returned %s" % (" ".join(cmdline), status))

    return output_file
Exemplo n.º 7
0
    def __init__( self, dir_path=None, version=None, scop=None,
                  astral_file=None, db_handle=None):
        """
        Initialise the astral database.

        You must provide either a directory of SCOP files:

        dir_path - string, the path to location of the scopseq-x.xx directory
                   (not the directory itself), and
        version   -a version number.

        or, a FASTA file:

        astral_file - string, a path to a fasta file (which will be loaded in memory)

        or, a MYSQL database:

        db_handle - a database handle for a MYSQL database containing a table
                    'astral' with the astral data in it.  This can be created
                    using writeToSQL.
        """

        if astral_file is None and dir_path is None and db_handle is None:
            raise RuntimeError("Need either file handle, or (dir_path + "
                       + "version) or database handle to construct Astral")
        if not scop:
            raise RuntimeError("Must provide a Scop instance to construct")

        self.scop = scop
        self.db_handle = db_handle

        if not astral_file and not db_handle:
            if dir_path is None or version is None:
                raise RuntimeError("must provide dir_path and version")

            self.version = version
            self.path = os.path.join( dir_path, "scopseq-%s" % version)
            astral_file = "astral-scopdom-seqres-all-%s.fa" % self.version
            astral_file = os.path.join(self.path, astral_file)

        if astral_file:
            #Build a dictionary of SeqRecord objects in the FASTA file, IN MEMORY
            self.fasta_dict = SeqIO.to_dict(SeqIO.parse(astral_file, "fasta"))

        self.astral_file = astral_file
        self.EvDatasets = {}
        self.EvDatahash = {}
        self.IdDatasets = {}
        self.IdDatahash = {}
Exemplo n.º 8
0
def _handle_convert(in_handle,
                    in_format,
                    out_handle,
                    out_format,
                    alphabet=None):
    """SeqIO conversion function (PRIVATE)."""
    try:
        f = _converter[(in_format, out_format)]
    except KeyError:
        f = None
    if f:
        return f(in_handle, out_handle, alphabet)
    else:
        records = SeqIO.parse(in_handle, in_format, alphabet)
        return SeqIO.write(records, out_handle, out_format)
Exemplo n.º 9
0
    def _count_codons(self, fasta_file):
        with open(fasta_file, 'r') as handle:

            # make the codon dictionary local
            self.codon_count = CodonsDict.copy()

            # iterate over sequence and count all the codons in the FastaFile.
            for cur_record in SeqIO.parse(handle, "fasta"):
                # make sure the sequence is lower case
                if str(cur_record.seq).islower():
                    dna_sequence = str(cur_record.seq).upper()
                else:
                    dna_sequence = str(cur_record.seq)
                for i in range(0, len(dna_sequence), 3):
                    codon = dna_sequence[i:i+3]
                    if codon in self.codon_count:
                        self.codon_count[codon] += 1
                    else:
                        raise TypeError("illegal codon %s in gene: %s" % (codon, cur_record.id))
Exemplo n.º 10
0
                elif isinstance(value, (int, float, basestring)):

                    attr = {"name": key, "value": str(value)}
                    self.xml_generator.startElement("property",
                                                    AttributesImpl(attr))
                    self.xml_generator.endElement("property")


if __name__ == "__main__":
    print("Running quick self test")

    from SAP.Bio import SeqIO
    import sys

    with open("Tests/SeqXML/protein_example.xml", "r") as fileHandle:
        records = list(SeqIO.parse(fileHandle, "seqxml"))

    from SAP.Bio._py3k import StringIO
    stringHandle = StringIO()

    SeqIO.write(records, stringHandle, "seqxml")
    SeqIO.write(records, sys.stdout, "seqxml")
    print("")

    stringHandle.seek(0)
    records = list(SeqIO.parse(stringHandle, "seqxml"))

    SeqIO.write(records, sys.stdout, "seqxml")
    print("")
Exemplo n.º 11
0
                            self.xml_generator.endElement("property")

                elif isinstance(value, (int, float, basestring)):

                    attr = {"name": key, "value": str(value)}
                    self.xml_generator.startElement(
                        "property", AttributesImpl(attr))
                    self.xml_generator.endElement("property")

if __name__ == "__main__":
    print("Running quick self test")

    from SAP.Bio import SeqIO
    import sys

    with open("Tests/SeqXML/protein_example.xml", "r") as fileHandle:
        records = list(SeqIO.parse(fileHandle, "seqxml"))

    from SAP.Bio._py3k import StringIO
    stringHandle = StringIO()

    SeqIO.write(records, stringHandle, "seqxml")
    SeqIO.write(records, sys.stdout, "seqxml")
    print("")

    stringHandle.seek(0)
    records = list(SeqIO.parse(stringHandle, "seqxml"))

    SeqIO.write(records, sys.stdout, "seqxml")
    print("")