def hits(self): from iotbx import bioinformatics for h in self.root.hits: pieces = h.identifier.split("_") assert 0 < len(pieces) if len(pieces) == 1: pdb = pieces[0] chain = "" else: (pdb, chain) = pieces[:2] alignment = bioinformatics.clustal_alignment( names=["target", "%s_%s" % (pdb, chain)], alignments=[ h.alignments[0].query.seq, h.alignments[0].match.seq ], program="WU-BLAST") yield bioinformatics.homology_search_hit(identifier=pdb, chain=chain, annotation=h.description, alignment=alignment)
def hits(self): names = self.alignment.names[ 1 : self.max_count ] alignments = self.alignment.alignments[ 1 : self.max_count ] try: descriptions = self.alignment.descriptions[ 1 : self.max_count ] except AttributeError: descriptions = alignments base = self.alignment.alignments[0] for ( n, seq, desc ) in zip( names, alignments, descriptions ): pieces = n.split( "_" ) assert 0 < len( pieces ) if len( pieces ) == 1: pdb = pieces[0] chain = "" else: ( pdb, chain ) = pieces[:2] alignment = bioinformatics.clustal_alignment( names = [ "target", "%s_%s" % ( pdb, chain ) ], alignments = [ base, seq ], program = "<unknown>" ) yield bioinformatics.homology_search_hit( identifier = pdb, chain = chain, annotation = desc, alignment = alignment )
def get_muscle_alignment_ordered(sequences, out = None): from iotbx import bioinformatics name_for = {} for ( i, seq ) in enumerate( sequences, start = 1 ): name = name_for.get( seq, "Chain_%d" % i ) name_for[ seq ] = name alignment = get_muscle_alignment( fasta_sequences = "\n".join( str( bioinformatics.sequence( name = name, sequence = seq.sequence ) ) for ( seq, name ) in name_for.items() ), out = out, ) lookup = dict( zip( alignment.names, alignment.alignments ) ) assert all( n in lookup for n in name_for.values() ) return bioinformatics.clustal_alignment( names = [ seq.name for seq in sequences ], alignments = [ lookup[ name_for[ seq ] ] for seq in sequences ], program = alignment.program )
def hits(self): if not self.root.iterations: raise StopIteration from iotbx import bioinformatics for h in self.root.iterations[-1].hits: pieces = h.accession.split("_") assert 0 < len(pieces) if len(pieces) == 1: pdb = pieces[0] chain = "" else: (pdb, chain) = pieces[:2] alignment = bioinformatics.clustal_alignment( names=["target", "%s_%s" % (pdb, chain)], alignments=[h.hsps[0].query.seq, h.hsps[0].hit.seq], program="NCBI-BLAST") yield bioinformatics.homology_search_hit(identifier=pdb, chain=chain, annotation=h.annotation, alignment=alignment)
def hits(self): if not self.root.iterations: raise StopIteration from iotbx import bioinformatics for h in self.root.iterations[-1].hits: pieces = h.accession.split( "_" ) assert 0 < len( pieces ) if len( pieces ) == 1: pdb = pieces[0] chain = "" else: ( pdb, chain ) = pieces[:2] alignment = bioinformatics.clustal_alignment( names = [ "target", "%s_%s" % ( pdb, chain ) ], alignments = [ h.hsps[0].query.seq, h.hsps[0].hit.seq ], program = "NCBI-BLAST" ) yield bioinformatics.homology_search_hit( identifier = pdb, chain = chain, annotation = h.annotation, alignment = alignment )
def hits(self): from iotbx import bioinformatics for h in self.root.hits: pieces = h.identifier.split( "_" ) assert 0 < len( pieces ) if len( pieces ) == 1: pdb = pieces[0] chain = "" else: ( pdb, chain ) = pieces[:2] alignment = bioinformatics.clustal_alignment( names = [ "target", "%s_%s" % ( pdb, chain ) ], alignments = [ h.alignments[0].query.seq, h.alignments[0].match.seq ], program = "WU-BLAST" ) yield bioinformatics.homology_search_hit( identifier = pdb, chain = chain, annotation = h.description, alignment = alignment )
def get_muscle_alignment_ordered(sequences, out = None): from iotbx import bioinformatics from iotbx.pdb.amino_acid_codes import validate_sequence name_for = {} for ( i, seq ) in enumerate( sequences, start = 1 ): name = name_for.get( seq, "Chain_%d" % i ) name_for[ seq ] = name alignment, errors = get_muscle_alignment( fasta_sequences = "\n".join( str( bioinformatics.sequence( name = name, sequence = seq.sequence ) ) for ( seq, name ) in name_for.items() ), out = out, ) # check for errors and handle: # invalid characters in sequences if (len(errors) > 0): for error in errors: error = error.strip() if ('Invalid character' in error): for seq in name_for.keys(): invalid = validate_sequence( seq.sequence, protein=True, strict_protein=False, nucleic_acid=True, strict_nucleic_acid=False) if (len(invalid) > 0): name_for.pop(seq) sequences = name_for.keys() elif (len(error) > 0): raise Sorry(error) lookup = dict( zip( alignment.names, alignment.alignments ) ) assert all( n in lookup for n in name_for.values() ) return bioinformatics.clustal_alignment( names = [ seq.name for seq in sequences ], alignments = [ lookup[ name_for[ seq ] ] for seq in sequences ], program = alignment.program )