def align_chain(self, i): import mmtbx.alignment chain = self.chains[i] best_alignment = None best_sequence = None best_seq_id = None best_identity = self.minimum_identity best_width = sys.maxsize best_length = sys.maxsize for i_seq, seq_object in enumerate(self.sequences): alignment = mmtbx.alignment.align( seq_a=chain.sequence, seq_b=seq_object.sequence).extract_alignment() identity = alignment.calculate_sequence_identity(skip_chars=['X']) # if the identities of two alignments are equal, then we prefer the # alignment that has the narrowest range for the match and the # shortest sequence width = alignment.match_codes.rfind('m') - alignment.match_codes.find('m') length = len(seq_object.sequence) if ((identity > best_identity) or (approx_equal_core(identity, best_identity, 1.e-6, 1.e10, None, "") and width <= best_width and length < best_length)): best_identity = identity best_alignment = alignment best_sequence = seq_object.name best_seq_id = i_seq best_width = width best_length = length return best_alignment, best_sequence, best_seq_id
def find_similar_chains (pdb_hierarchy, sequence, source_info=None, first_chain_only=False, remove_alt_confs=True, reset_chain_id=None, min_identity=0.95, min_identity_epsilon=0.02, log=null_out()) : """ Find all chains with at least the specified fractional sequence identity to the target, and extract as related_chain objects with new pdb hierarchies. """ import mmtbx.alignment import iotbx.pdb.hierarchy results = [] for chain in pdb_hierarchy.only_model().chains() : if (chain.is_protein()) : chain_seq = chain.as_padded_sequence(pad=True, substitute_unknown='X', pad_at_start=False) alignment = mmtbx.alignment.align( seq_a=chain_seq, seq_b=sequence).extract_alignment() identity = alignment.calculate_sequence_identity(skip_chars=['X']) if (identity >= min_identity - min_identity_epsilon) : root = iotbx.pdb.hierarchy.root() model = iotbx.pdb.hierarchy.model() root.append_model(model) chain_id = chain.id if (reset_chain_id is not None) : chain_id = reset_chain_id new_chain = iotbx.pdb.hierarchy.chain(id=chain_id) model.append_chain(new_chain) for residue_group in chain.residue_groups() : atom_groups = residue_group.atom_groups() new_rg = iotbx.pdb.hierarchy.residue_group( resseq=residue_group.resseq, icode=residue_group.icode) for k, atom_group in enumerate(atom_groups) : if ((remove_alt_confs) and ((k > 0) or (not atom_group.altloc.strip() in ['','A']))) : continue new_rg.append_atom_group(atom_group.detached_copy()) new_chain.append_residue_group(new_rg) xrs = root.extract_xray_structure() xrs.convert_to_isotropic() root.atoms().set_adps_from_scatterers(xrs.scatterers(), xrs.unit_cell()) results.append( related_chain( source_info=source_info, chain_id=chain.id, pdb_hierarchy=root, identity=identity)) if (first_chain_only) : break return results
def find_similar_chains(pdb_hierarchy, sequence, source_info=None, first_chain_only=False, remove_alt_confs=True, reset_chain_id=None, min_identity=0.95, min_identity_epsilon=0.02, log=null_out()): """ Find all chains with at least the specified fractional sequence identity to the target, and extract as related_chain objects with new pdb hierarchies. """ import mmtbx.alignment import iotbx.pdb.hierarchy results = [] for chain in pdb_hierarchy.only_model().chains(): if (chain.is_protein()): chain_seq = chain.as_padded_sequence(pad=True, substitute_unknown='X', pad_at_start=False) alignment = mmtbx.alignment.align( seq_a=chain_seq, seq_b=sequence).extract_alignment() identity = alignment.calculate_sequence_identity(skip_chars=['X']) if (identity >= min_identity - min_identity_epsilon): root = iotbx.pdb.hierarchy.root() model = iotbx.pdb.hierarchy.model() root.append_model(model) chain_id = chain.id if (reset_chain_id is not None): chain_id = reset_chain_id new_chain = iotbx.pdb.hierarchy.chain(id=chain_id) model.append_chain(new_chain) for residue_group in chain.residue_groups(): atom_groups = residue_group.atom_groups() new_rg = iotbx.pdb.hierarchy.residue_group( resseq=residue_group.resseq, icode=residue_group.icode) for k, atom_group in enumerate(atom_groups): if ((remove_alt_confs) and ((k > 0) or (not atom_group.altloc.strip() in ['','A']))): continue new_rg.append_atom_group(atom_group.detached_copy()) new_chain.append_residue_group(new_rg) xrs = root.extract_xray_structure() xrs.convert_to_isotropic() root.atoms().set_adps_from_scatterers(xrs.scatterers(), xrs.unit_cell()) results.append( related_chain( source_info=source_info, chain_id=chain.id, pdb_hierarchy=root, identity=identity)) if (first_chain_only) : break return results
def align_chain (self, i) : import mmtbx.alignment chain = self.chains[i] best_alignment = None best_sequence = None best_seq_id = None best_identity = self.minimum_identity best_width = sys.maxint for i_seq, seq_object in enumerate(self.sequences) : alignment = mmtbx.alignment.align( seq_a=chain.sequence, seq_b=seq_object.sequence).extract_alignment() identity = alignment.calculate_sequence_identity(skip_chars=['X']) # if the identities of two alignments are equal, then we prefer the # alignment that has the narrowest range for the match width = alignment.match_codes.rfind('m') - alignment.match_codes.find('m') if ((identity > best_identity) or (identity == best_identity and width < best_width)): best_identity = identity best_alignment = alignment best_sequence = seq_object.name best_seq_id = i_seq best_width = width return best_alignment, best_sequence, best_seq_id