def _handle_convert(in_handle, in_format, out_handle, out_format, alphabet=None): """SeqIO conversion function (PRIVATE).""" try: f = _converter[(in_format, out_format)] except KeyError: f = None if f: return f(in_handle, out_handle, alphabet) else: records = SeqIO.parse(in_handle, in_format, alphabet) return SeqIO.write(records, out_handle, out_format)
def __init__(self, dir_path=None, version=None, scop=None, astral_file=None, db_handle=None): """ Initialise the astral database. You must provide either a directory of SCOP files: dir_path - string, the path to location of the scopseq-x.xx directory (not the directory itself), and version -a version number. or, a FASTA file: astral_file - string, a path to a fasta file (which will be loaded in memory) or, a MYSQL database: db_handle - a database handle for a MYSQL database containing a table 'astral' with the astral data in it. This can be created using writeToSQL. """ if astral_file is None and dir_path is None and db_handle is None: raise RuntimeError( "Need either file handle, or (dir_path + " + "version) or database handle to construct Astral") if not scop: raise RuntimeError("Must provide a Scop instance to construct") self.scop = scop self.db_handle = db_handle if not astral_file and not db_handle: if dir_path is None or version is None: raise RuntimeError("must provide dir_path and version") self.version = version self.path = os.path.join(dir_path, "scopseq-%s" % version) astral_file = "astral-scopdom-seqres-all-%s.fa" % self.version astral_file = os.path.join(self.path, astral_file) if astral_file: #Build a dictionary of SeqRecord objects in the FASTA file, IN MEMORY self.fasta_dict = SeqIO.to_dict(SeqIO.parse(astral_file, "fasta")) self.astral_file = astral_file self.EvDatasets = {} self.EvDatahash = {} self.IdDatasets = {} self.IdDatahash = {}
def _SeqIO_to_alignment_iterator(handle, format, alphabet=None, seq_count=None): """Uses Bio.SeqIO to create an MultipleSeqAlignment iterator (PRIVATE). Arguments: - handle - handle to the file. - format - string describing the file format. - alphabet - optional Alphabet object, useful when the sequence type cannot be automatically inferred from the file itself (e.g. fasta, phylip, clustal) - seq_count - Optional integer, number of sequences expected in each alignment. Recommended for fasta format files. If count is omitted (default) then all the sequences in the file are combined into a single MultipleSeqAlignment. """ from SAP.Bio import SeqIO assert format in SeqIO._FormatToIterator if seq_count: #Use the count to split the records into batches. seq_record_iterator = SeqIO.parse(handle, format, alphabet) records = [] for record in seq_record_iterator: records.append(record) if len(records) == seq_count: yield MultipleSeqAlignment(records, alphabet) records = [] if len(records) > 0: raise ValueError("Check seq_count argument, not enough sequences?") else: #Must assume that there is a single alignment using all #the SeqRecord objects: records = list(SeqIO.parse(handle, format, alphabet)) if records: yield MultipleSeqAlignment(records, alphabet) raise StopIteration
def align(cmdline, pair, kbyte=None, force_type=None, dry_run=False, quiet=False, debug=False): """ Returns a filehandle """ if not pair or len(pair) != 2: raise ValueError("Expected pair of filename, not %s" % repr(pair)) output_file = tempfile.NamedTemporaryFile(mode='r') input_files = tempfile.NamedTemporaryFile( mode="w"), tempfile.NamedTemporaryFile(mode="w") if dry_run: print( _build_align_cmdline(cmdline, pair, output_file.name, kbyte, force_type, quiet)) return for filename, input_file in zip(pair, input_files): # Pipe the file through Biopython's Fasta parser/writer # to make sure it conforms to the Fasta standard (in particular, # Wise2 may choke on long lines in the Fasta file) records = SeqIO.parse(open(filename), 'fasta') SeqIO.write(records, input_file, 'fasta') input_file.flush() input_file_names = [input_file.name for input_file in input_files] cmdline_str = _build_align_cmdline(cmdline, input_file_names, output_file.name, kbyte, force_type, quiet) if debug: sys.stderr.write("%s\n" % cmdline_str) status = os.system(cmdline_str) >> 8 if status > 1: if kbyte != 0: # possible memory problem; could be None sys.stderr.write("INFO trying again with the linear model\n") return align(cmdline, pair, 0, force_type, dry_run, quiet, debug) else: raise OSError("%s returned %s" % (" ".join(cmdline), status)) return output_file
def align(cmdline, pair, kbyte=None, force_type=None, dry_run=False, quiet=False, debug=False): """ Returns a filehandle """ if not pair or len(pair) != 2: raise ValueError("Expected pair of filename, not %s" % repr(pair)) output_file = tempfile.NamedTemporaryFile(mode='r') input_files = tempfile.NamedTemporaryFile(mode="w"), tempfile.NamedTemporaryFile(mode="w") if dry_run: print(_build_align_cmdline(cmdline, pair, output_file.name, kbyte, force_type, quiet)) return for filename, input_file in zip(pair, input_files): # Pipe the file through Biopython's Fasta parser/writer # to make sure it conforms to the Fasta standard (in particular, # Wise2 may choke on long lines in the Fasta file) records = SeqIO.parse(open(filename), 'fasta') SeqIO.write(records, input_file, 'fasta') input_file.flush() input_file_names = [input_file.name for input_file in input_files] cmdline_str = _build_align_cmdline(cmdline, input_file_names, output_file.name, kbyte, force_type, quiet) if debug: sys.stderr.write("%s\n" % cmdline_str) status = os.system(cmdline_str) >> 8 if status > 1: if kbyte != 0: # possible memory problem; could be None sys.stderr.write("INFO trying again with the linear model\n") return align(cmdline, pair, 0, force_type, dry_run, quiet, debug) else: raise OSError("%s returned %s" % (" ".join(cmdline), status)) return output_file
def __init__( self, dir_path=None, version=None, scop=None, astral_file=None, db_handle=None): """ Initialise the astral database. You must provide either a directory of SCOP files: dir_path - string, the path to location of the scopseq-x.xx directory (not the directory itself), and version -a version number. or, a FASTA file: astral_file - string, a path to a fasta file (which will be loaded in memory) or, a MYSQL database: db_handle - a database handle for a MYSQL database containing a table 'astral' with the astral data in it. This can be created using writeToSQL. """ if astral_file is None and dir_path is None and db_handle is None: raise RuntimeError("Need either file handle, or (dir_path + " + "version) or database handle to construct Astral") if not scop: raise RuntimeError("Must provide a Scop instance to construct") self.scop = scop self.db_handle = db_handle if not astral_file and not db_handle: if dir_path is None or version is None: raise RuntimeError("must provide dir_path and version") self.version = version self.path = os.path.join( dir_path, "scopseq-%s" % version) astral_file = "astral-scopdom-seqres-all-%s.fa" % self.version astral_file = os.path.join(self.path, astral_file) if astral_file: #Build a dictionary of SeqRecord objects in the FASTA file, IN MEMORY self.fasta_dict = SeqIO.to_dict(SeqIO.parse(astral_file, "fasta")) self.astral_file = astral_file self.EvDatasets = {} self.EvDatahash = {} self.IdDatasets = {} self.IdDatahash = {}
def _count_codons(self, fasta_file): with open(fasta_file, 'r') as handle: # make the codon dictionary local self.codon_count = CodonsDict.copy() # iterate over sequence and count all the codons in the FastaFile. for cur_record in SeqIO.parse(handle, "fasta"): # make sure the sequence is lower case if str(cur_record.seq).islower(): dna_sequence = str(cur_record.seq).upper() else: dna_sequence = str(cur_record.seq) for i in range(0, len(dna_sequence), 3): codon = dna_sequence[i:i+3] if codon in self.codon_count: self.codon_count[codon] += 1 else: raise TypeError("illegal codon %s in gene: %s" % (codon, cur_record.id))
elif isinstance(value, (int, float, basestring)): attr = {"name": key, "value": str(value)} self.xml_generator.startElement("property", AttributesImpl(attr)) self.xml_generator.endElement("property") if __name__ == "__main__": print("Running quick self test") from SAP.Bio import SeqIO import sys with open("Tests/SeqXML/protein_example.xml", "r") as fileHandle: records = list(SeqIO.parse(fileHandle, "seqxml")) from SAP.Bio._py3k import StringIO stringHandle = StringIO() SeqIO.write(records, stringHandle, "seqxml") SeqIO.write(records, sys.stdout, "seqxml") print("") stringHandle.seek(0) records = list(SeqIO.parse(stringHandle, "seqxml")) SeqIO.write(records, sys.stdout, "seqxml") print("")
self.xml_generator.endElement("property") elif isinstance(value, (int, float, basestring)): attr = {"name": key, "value": str(value)} self.xml_generator.startElement( "property", AttributesImpl(attr)) self.xml_generator.endElement("property") if __name__ == "__main__": print("Running quick self test") from SAP.Bio import SeqIO import sys with open("Tests/SeqXML/protein_example.xml", "r") as fileHandle: records = list(SeqIO.parse(fileHandle, "seqxml")) from SAP.Bio._py3k import StringIO stringHandle = StringIO() SeqIO.write(records, stringHandle, "seqxml") SeqIO.write(records, sys.stdout, "seqxml") print("") stringHandle.seek(0) records = list(SeqIO.parse(stringHandle, "seqxml")) SeqIO.write(records, sys.stdout, "seqxml") print("")