def exercise_model_utils () : pdb_in = get_1yjp_pdb() residue = pdb_in.hierarchy.only_model().chains()[0].residue_groups()[0].only_atom_group() sele = pdb_in.hierarchy.atom_selection_cache().selection("resname TYR") water_sel = building.get_nearby_water_selection( pdb_hierarchy=pdb_in.hierarchy, xray_structure=pdb_in.input.xray_structure_simple(), selection=sele) assert (list(water_sel.iselection()) == [59, 60, 61, 62, 63]) from mmtbx.monomer_library import idealized_aa from mmtbx.monomer_library import server mon_lib_srv = server.server() ideal_dict = idealized_aa.residue_dict() for resname, hierarchy in ideal_dict.iteritems() : residue = hierarchy.only_model().only_chain().only_residue_group().only_atom_group() result = building.generate_sidechain_clusters(residue, mon_lib_srv) if (len(result) == 0) : # no side-chain clusters for UNK as well assert (residue.resname in ["ALA", "GLY", "UNK"]), residue.resname # show_chain_resseq_ranges resids = [ (1,''),(2,''),(2,'A'),(4,''),(5,''),(6,''),(10,'B') ] import iotbx.pdb.hierarchy chain = iotbx.pdb.hierarchy.chain(id='A') for (resseq, icode) in resids : rg = iotbx.pdb.hierarchy.residue_group(resseq="%4d" % resseq, icode=icode) chain.append_residue_group(rg) out = StringIO() building.show_chain_resseq_ranges(chain.residue_groups(), out=out, prefix=" ") assert out.getvalue() == """ chain 'A': 1-2A,4-6,10B\n""", out.getvalue()
def __init__(self, target_map, pdb_hierarchy, xray_structure, geometry_restraints_manager, rotamer_eval, d_min): adopt_init_args(self, locals()) from mmtbx.monomer_library import idealized_aa import mmtbx.monomer_library.server self.ideal_dict = idealized_aa.residue_dict() self.mon_lib_srv = mmtbx.monomer_library.server.server()
def exercise_00(): d = iaa.residue_dict() assert len(d) == 48 for aac in iotbx.pdb.amino_acid_codes.one_letter_given_three_letter: assert aac.lower() in d # mon_lib_srv = monomer_library.server.server() ener_lib = monomer_library.server.ener_lib() for aac in iotbx.pdb.amino_acid_codes.one_letter_given_three_letter: aac = aac.lower() for aac_ in [aac, aac + "_h"]: residue_as_string = iaa.__dict__[aac_] rs = flex.std_string(residue_as_string.splitlines()) processed_pdb_file = monomer_library.pdb_interpretation.process( mon_lib_srv=mon_lib_srv, ener_lib=ener_lib, raw_records=rs) sites_cart = processed_pdb_file.xray_structure().sites_cart() grm = processed_pdb_file.geometry_restraints_manager( show_energies=False, plain_pairs_radius=5.0) es = grm.energies_sites(sites_cart=sites_cart) b = es.bond_deviations() a = es.angle_deviations() b_z = es.bond_deviations_z() a_z = es.angle_deviations_z() print("%5s"%aac_, "bonds : %5.3f %5.3f %5.3f"%b, \ "angles : %5.3f %5.3f %5.3f"%a) assert a[2] < 1.2, a[2] assert b[2] < 0.005, b[2] print("%5s"%aac_, "bonds rmsZ: %5.3f %5.3f %5.3f"%b_z, \ "angles rmsZ: %5.3f %5.3f %5.3f"%a_z) assert a_z[2] < 0.7, a_z[2] assert b_z[2] < 0.7, b_z[2]
def exercise_00(): d = iaa.residue_dict() assert len(d.keys()) == 44 for aac in iotbx.pdb.amino_acid_codes.one_letter_given_three_letter: assert aac.lower() in d.keys() # mon_lib_srv = monomer_library.server.server() ener_lib = monomer_library.server.ener_lib() for aac in iotbx.pdb.amino_acid_codes.one_letter_given_three_letter: aac = aac.lower() for aac_ in [aac, aac+"_h"]: residue_as_string = iaa.__dict__[aac_] rs = flex.std_string(residue_as_string.splitlines()) processed_pdb_file = monomer_library.pdb_interpretation.process( mon_lib_srv = mon_lib_srv, ener_lib = ener_lib, raw_records = rs) sites_cart = processed_pdb_file.xray_structure().sites_cart() grm = processed_pdb_file.geometry_restraints_manager( show_energies = False, plain_pairs_radius = 5.0) es = grm.energies_sites( sites_cart = sites_cart) b = es.bond_deviations() a = es.angle_deviations() b_z = es.bond_deviations_z() a_z = es.angle_deviations_z() print "%5s"%aac_, "bonds : %5.3f %5.3f %5.3f"%b, \ "angles : %5.3f %5.3f %5.3f"%a assert a[2] < 1.2, a[2] assert b[2] < 0.005, b[2] print "%5s"%aac_, "bonds rmsZ: %5.3f %5.3f %5.3f"%b_z, \ "angles rmsZ: %5.3f %5.3f %5.3f"%a_z assert a_z[2] < 0.7, a_z[2] assert b_z[2] < 0.7, b_z[2]
def extend_protein_model(pdb_hierarchy, mon_lib_srv, add_hydrogens=None, selection=None): """ Rebuild a sidechain by substituting an ideal amino acid and rotating the sidechain to match the old conformation as closely as possible. Limited functionality: 1) Amino-acids only, 2) side chain atoms only. 3) Not terminii aware 4) Not aware of v2.3 vs v3.2 atom names e.g. HB1,HB2 vs HB2,HB3 """ from mmtbx.monomer_library import idealized_aa from mmtbx.rotamer import rotamer_eval from scitbx.array_family import flex ideal_dict = idealized_aa.residue_dict() pdb_atoms = pdb_hierarchy.atoms() if (selection is None): selection = flex.bool(pdb_atoms.size(), True) partial_sidechains = [] for chain in pdb_hierarchy.only_model().chains(): for residue_group in chain.residue_groups(): for residue in residue_group.atom_groups(): i_seqs = residue.atoms().extract_i_seq() residue_sel = selection.select(i_seqs) if (not residue.resname.lower() in ideal_dict.keys()): continue missing_atoms = rotamer_eval.eval_residue_completeness( residue=residue, mon_lib_srv=mon_lib_srv, ignore_hydrogens=False) if (len(missing_atoms) > 0): all_h = list(set([s.strip()[0] for s in missing_atoms ])) in [['H'], ['D'], ['T']] if (add_hydrogens is False and all_h): continue partial_sidechains.append(residue) for residue in partial_sidechains: residue_elements = [ e.strip() for e in residue.atoms().extract_element() ] res_key = residue.resname.lower() if (add_hydrogens is None): if ("H" in residue_elements): res_key += "_h" if (add_hydrogens is True): res_key += "_h" target_atom_group = ideal_dict[res_key].only_model().only_chain().\ only_residue_group().only_atom_group() new_residue = extend_residue(residue=residue, target_atom_group=target_atom_group, mon_lib_srv=mon_lib_srv) missing_atoms = rotamer_eval.eval_residue_completeness( residue=new_residue, mon_lib_srv=mon_lib_srv, ignore_hydrogens=False) #assert len(missing_atoms) == 0, missing_atoms rg = residue.parent() rg.remove_atom_group(residue) rg.append_atom_group(new_residue.detached_copy()) pdb_hierarchy.atoms().reset_i_seq() pdb_hierarchy.atoms().reset_serial() return len(partial_sidechains)
def __init__ (self, target_map, pdb_hierarchy, xray_structure, geometry_restraints_manager, rotamer_eval, d_min) : adopt_init_args(self, locals()) from mmtbx.monomer_library import idealized_aa import mmtbx.monomer_library.server self.ideal_dict = idealized_aa.residue_dict() self.mon_lib_srv = mmtbx.monomer_library.server.server()
def place_side_chains(hierarchy, original_h, rotamer_manager, placing_range): ideal_res_dict = idealized_aa.residue_dict() asc = original_h.atom_selection_cache() gly_atom_names = set([" N ", " CA ", " C ", " O "]) for rg in hierarchy.residue_groups(): if rg.resseq in placing_range: # cut extra atoms ag = rg.only_atom_group() for atom in ag.atoms(): if (atom.name not in gly_atom_names): ag.remove_atom(atom=atom) # get ag from original hierarchy orig_ag = original_h.select(asc.selection("resseq %d" % rg.resseq_as_int()) ).models()[0].chains()[0].residue_groups()[0].atom_groups()[0] # get ideal ideal_ag = ideal_res_dict[ag.resname.lower()].models()[0].chains()[0].\ residue_groups()[0].atom_groups()[0] # print "got to placement" side_chain_placement(ag, orig_ag, rotamer_manager)
def place_side_chains(hierarchy, original_h, rotamer_manager, placing_range): ideal_res_dict = idealized_aa.residue_dict() asc = original_h.atom_selection_cache() gly_atom_names = set([" N ", " CA ", " C ", " O "]) for rg in hierarchy.residue_groups(): if rg.resseq in placing_range: # cut extra atoms ag = rg.only_atom_group() for atom in ag.atoms(): if (atom.name not in gly_atom_names): ag.remove_atom(atom=atom) # get ag from original hierarchy orig_ag = original_h.select( asc.selection("resseq %d" % rg.resseq_as_int())).models( )[0].chains()[0].residue_groups()[0].atom_groups()[0] # get ideal # ideal_ag = ideal_res_dict[ag.resname.lower()].models()[0].chains()[0].\ # residue_groups()[0].atom_groups()[0] # print "got to placement" side_chain_placement(ag, orig_ag, rotamer_manager)
def check_missing_atom(pdb_filename): pdb_inp = iotbx.pdb.input(file_name=pdb_filename) pdb_hierarchy = pdb_inp.construct_hierarchy() ideal_dict = idealized_aa.residue_dict() pdb_atoms = pdb_hierarchy.atoms() selection = flex.bool(pdb_atoms.size(), True) partial_sidechains = [] for chain in pdb_hierarchy.only_model().chains(): for residue_group in chain.residue_groups(): if (residue_group.atom_groups_size() != 1): continue for residue in residue_group.atom_groups(): i_seqs = residue.atoms().extract_i_seq() residue_sel = selection.select(i_seqs) if (not residue.resname.lower() in ideal_dict.keys()): continue missing_atoms = rotamer_eval.eval_residue_completeness( residue=residue, mon_lib_srv=mon_lib_server, ignore_hydrogens=True) if (len(missing_atoms) > 0): return True return False
def __init__(self, pdb_hierarchy, params=None, secondary_structure_annotation=None, reference_map=None, crystal_symmetry=None, grm=None, rama_manager=None, rotamer_manager=None, log=null_out(), verbose=False, tried_rama_angles={}, tried_final_rama_angles={}, n_run=0): if len(pdb_hierarchy.models()) > 1: raise Sorry("Multi-model files are not supported") self.original_pdb_h = pdb_hierarchy self.secondary_structure_annotation=secondary_structure_annotation asc = pdb_hierarchy.atom_selection_cache() self.xrs = pdb_hierarchy.extract_xray_structure(crystal_symmetry=crystal_symmetry) self.reference_map = reference_map self.resulting_pdb_h = pdb_hierarchy.deep_copy() self.resulting_pdb_h.reset_atom_i_seqs() self.params = self.process_params(params) self.log = log self.verbose = verbose self.grm = grm self.r = rama_manager self.ideal_res_dict = idealized_aa.residue_dict() self.n_run = n_run if self.r is None: self.r = rama_eval() self.rotamer_manager = rotamer_manager if self.rotamer_manager is None: self.rotamer_manager = RotamerEval() ram = ramalyze.ramalyze(pdb_hierarchy=pdb_hierarchy) self.p_initial_rama_outliers = ram.out_percent self.p_before_minimization_rama_outliers = None self.p_after_minimiaztion_rama_outliers = None n_inputs = [reference_map, crystal_symmetry].count(None) if not (n_inputs == 0 or n_inputs == 2): print >> log, "Need to have both map and symmetry info. Not using map." self.reference_map = None # here we are recording what CCD solutions were used to fix particular # outliers to not use the same in the next CCD try. # Nested dict. First level: # key: chain id, value: dict # key: resid (string), value: list of tried variants. self.tried_rama_angles = tried_rama_angles self.tried_final_rama_angles = tried_final_rama_angles berkeley_count = utils.list_rama_outliers_h(self.resulting_pdb_h).count("\n") self.berkeley_p_before_minimization_rama_outliers = \ berkeley_count/float(self.resulting_pdb_h.overall_counts().n_residues)*100 n_bad_omegas = utils.n_bad_omegas(self.resulting_pdb_h) self.berkeley_p_after_minimiaztion_rama_outliers = self.berkeley_p_before_minimization_rama_outliers self.ref_exclusion_selection = "" self.number_of_ccd_trials = 0 # print "logic expr outcome:", (self.number_of_ccd_trials < 10 and self.berkeley_p_before_minimization_rama_outliers > 0.001) # print self.number_of_ccd_trials < 10 # print "berkeley before rama out:", self.berkeley_p_before_minimization_rama_outliers if (self.berkeley_p_before_minimization_rama_outliers <= 0.001 and (n_bad_omegas<1 and self.params.make_all_trans)): print >> self.log, "No ramachandran outliers, skipping CCD step." print "n_bad_omegas", n_bad_omegas print "self.params.make_all_trans",self.params.make_all_trans if not self.params.enabled: print >> self.log, "Loop idealization is not enabled, use 'enabled=True'." while (self.number_of_ccd_trials < self.params.number_of_ccd_trials and (self.berkeley_p_after_minimiaztion_rama_outliers > 0.001 or (n_bad_omegas>=1 and self.params.make_all_trans)) and self.params.enabled): print >> self.log, "CCD try number, outliers:", self.number_of_ccd_trials, self.berkeley_p_before_minimization_rama_outliers processed_chain_ids = [] for chain in self.resulting_pdb_h.only_model().chains(): if chain.id not in self.tried_rama_angles.keys(): self.tried_rama_angles[chain.id] = {} if chain.id not in self.tried_final_rama_angles.keys(): self.tried_final_rama_angles[chain.id] = {} print >> self.log, "Idealizing chain %s" % chain.id if chain.id not in processed_chain_ids: processed_chain_ids.append(chain.id) else: continue selection = "protein and chain %s and (name N or name CA or name C or name O)" % chain.id sel = asc.selection("chain %s" % chain.id) chain_h = self.resulting_pdb_h.select(sel) m = chain_h.only_model() i = 0 cutted_chain_h = None for c in m.chains(): if i == 0: cutted_chain_h = iotbx.pdb.hierarchy.new_hierarchy_from_chain(c) else: print >> self.log, "WARNING!!! Duplicating chain ids! Only the first chain will be processed." print >> self.log, " Removing chain %s with %d residues" % (c.id, len(c.residues())) m.remove_chain(c) i += 1 exclusions, ch_h = self.idealize_chain( hierarchy=(cutted_chain_h if cutted_chain_h else chain_h), tried_rama_angles_for_chain=self.tried_rama_angles[chain.id], tried_final_rama_angles_for_chain=self.tried_final_rama_angles[chain.id]) if ch_h is not None: set_xyz_smart( # dest_h=self.resulting_pdb_h, dest_h=chain, source_h=ch_h) for resnum in exclusions: selection += " and not resseq %s" % resnum self.ref_exclusion_selection += "(%s) or " % selection print "self.tried_rama_angles", self.tried_rama_angles print "self.tried_final_rama_angles", self.tried_final_rama_angles # # dumping and reloading hierarchy to do proper rounding of coordinates self.resulting_pdb_h = iotbx.pdb.input( source_info=None, lines=self.resulting_pdb_h.as_pdb_string()).construct_hierarchy() berkeley_count = utils.list_rama_outliers_h(self.resulting_pdb_h).count("\n") self.berkeley_p_before_minimization_rama_outliers = \ berkeley_count/float(self.resulting_pdb_h.overall_counts().n_residues)*100 if len(self.ref_exclusion_selection) > 0: self.ref_exclusion_selection = self.ref_exclusion_selection[:-3] ram = ramalyze.ramalyze(pdb_hierarchy=self.resulting_pdb_h) self.p_before_minimization_rama_outliers = ram.out_percent duke_count = ram.get_outliers_count_and_fraction()[0] if berkeley_count != duke_count: print >> self.log, "Discrepancy between berkeley and duke after ccd:", berkeley_count, duke_count self.resulting_pdb_h.write_pdb_file(file_name="%d%s_discrepancy.pdb" % (self.number_of_ccd_trials, self.params.output_prefix)) if self.params.debug: self.resulting_pdb_h.write_pdb_file( file_name="%d%s_all_not_minized.pdb" % (self.number_of_ccd_trials, self.params.output_prefix)) if self.params.minimize_whole: print >> self.log, "minimizing whole chain..." print >> self.log, "self.ref_exclusion_selection", self.ref_exclusion_selection # print >> sel # XXX but first let's check and fix rotamers... print >> self.log, "Fixing/checking rotamers in loop idealization..." excl_sel = self.ref_exclusion_selection if len(excl_sel) == 0: excl_sel = None non_outliers_for_check = asc.selection("(%s)" % self.ref_exclusion_selection) pre_result_h = mmtbx.utils.fix_rotamer_outliers( pdb_hierarchy=self.resulting_pdb_h, grm=self.grm.geometry, xrs=self.xrs, map_data=self.reference_map, radius=5, mon_lib_srv=None, rotamer_manager=self.rotamer_manager, backrub_range=None, # don't sample backrub at this point non_outliers_to_check=non_outliers_for_check, # bool selection asc=asc, verbose=True, log=self.log) if self.reference_map is None: minimize_wrapper_for_ramachandran( hierarchy=self.resulting_pdb_h, xrs=self.xrs, original_pdb_h=self.original_pdb_h, excl_string_selection=self.ref_exclusion_selection, grm=self.grm, log=None, ss_annotation=self.secondary_structure_annotation) else: mwwm = minimize_wrapper_with_map( pdb_h=self.resulting_pdb_h, xrs=self.xrs, target_map=self.reference_map, grm=self.grm, ss_annotation=self.secondary_structure_annotation, number_of_cycles=Auto, log=self.log) if self.params.debug: self.resulting_pdb_h.write_pdb_file( file_name="%d%s_all_minized.pdb" % (self.number_of_ccd_trials, self.params.output_prefix)) ram = ramalyze.ramalyze(pdb_hierarchy=self.resulting_pdb_h) self.p_after_minimiaztion_rama_outliers = ram.out_percent berkeley_count = utils.list_rama_outliers_h(self.resulting_pdb_h).count("\n") duke_count = ram.get_outliers_count_and_fraction()[0] n_bad_omegas = utils.n_bad_omegas(self.resulting_pdb_h) self.berkeley_p_after_minimiaztion_rama_outliers = \ berkeley_count/float(self.resulting_pdb_h.overall_counts().n_residues)*100 if berkeley_count != duke_count: print >> self.log, "Discrepancy between berkeley and duke after min:", berkeley_count, duke_count else: print >> self.log, "Number of Rama outliers after min:", berkeley_count print >> self.log, "Number of bad omegas:", n_bad_omegas self.number_of_ccd_trials += 1
def secondary_structure_from_sequence(pdb_str, sequence=None, pdb_hierarchy_template=None, rotamer_manager=None): """ Return pdb.hierarchy with secondary structure according to sequence or reference hierarcy. If reference hierarchy provided, the resulting hierarchy will be rigid body aligned to it. Residue numbers will start from 1. pdb_str - "ideal" structure at least 2 residues long. sequence - string with sequence (one-letter codes) pdb_hierarchy_template - reference hierarchy. """ if rotamer_manager is None: rotamer_manager = RotamerEval() pht = pdb_hierarchy_template assert [sequence, pht].count(None) == 1 if pht is not None: lk = len(pht.altloc_indices().keys()) if lk ==0: raise Sorry( "Hierarchy template in secondary_structure_from_sequence is empty") else: assert len(pht.altloc_indices().keys()) == 1, \ "Alternative conformations are not supported" number_of_residues = len(sequence) if sequence!=None else \ len(pht.models()[0].chains()[0].conformers()[0].residues()) if number_of_residues<1: raise Sorry('sequence should contain at least one residue.') ideal_res_dict = idealized_aa.residue_dict() real_res_list = None if pht: real_res_list = pht.models()[0].chains()[0].residue_groups() pdb_hierarchy = iotbx.pdb.input(source_info=None, lines=pdb_str).\ construct_hierarchy() truncate_to_poly_gly(pdb_hierarchy) chain = pdb_hierarchy.models()[0].chains()[0] current_gly_ag = chain.residue_groups()[0].atom_groups()[0] new_chain = iotbx.pdb.hierarchy.chain(id="A") new_chain.pre_allocate_residue_groups(number_of_additional_residue_groups=\ number_of_residues) r, t = get_r_t_matrices_from_structure(pdb_str) for j in range(number_of_residues): # put ALA rg = iotbx.pdb.hierarchy.residue_group(icode="") rg.resseq = j+1 new_chain.append_residue_group(residue_group=rg) ag_to_place = current_gly_ag.detached_copy() rg.append_atom_group(atom_group=ag_to_place) current_gly_ag.atoms().set_xyz( r.elems*current_gly_ag.atoms().extract_xyz()+t.elems) current_reference_ag = real_res_list[j].atom_groups()[0] if pht else \ ideal_res_dict[three_one[sequence[j]].lower()].models()[0].chains()[0].\ residue_groups()[0].atom_groups()[0] side_chain_placement(ag_to_place, current_reference_ag, rotamer_manager) new_pdb_h = iotbx.pdb.hierarchy.new_hierarchy_from_chain(new_chain) # align to real if pht != None: fixed_sites, moving_sites = get_matching_sites_cart_in_both_h(pht, new_pdb_h) assert len(fixed_sites) == len(moving_sites) lsq_fit_obj = superpose.least_squares_fit(reference_sites = fixed_sites, other_sites = moving_sites) new_pdb_h.atoms().set_xyz( lsq_fit_obj.r.elems*new_pdb_h.atoms().extract_xyz()+lsq_fit_obj.t.elems) return new_pdb_h
def correct_sequence (pdb_hierarchy, sequences, truncate_to_cbeta=False, out=sys.stdout) : """ Modify the sequence for the pdb hierarchy to match that of the aligned sequence. This will remove incompatible atoms; the sidechains will still need to be extended separated. For proteins only - mismatches in nucleic acids will only result in a warning. :param pdb_hierarchy: iotbx.pdb.hierarchy.root object :param sequences: list of iotbx.bioinformatics.sequence objects :param trucate_to_cbeta: chop off entire sidechain to C-beta (default: leave common atoms in place) :param out: output filehandle (default = stdout) :returns: number of atom_group objects renamed """ from mmtbx.monomer_library import idealized_aa import mmtbx.validation.sequence from iotbx.pdb.amino_acid_codes import three_letter_given_one_letter seq_validation = mmtbx.validation.sequence.validation( pdb_hierarchy=pdb_hierarchy, sequences=sequences, log=out) for chain_seq in seq_validation.chains : if (chain_seq.chain_type == mmtbx.validation.sequence.NUCLEIC_ACID) : if (len(chain_seq.mismatch) > 0) : print >> out, \ " WARNING: will skip %d mismatches in nucleic acid chain '%s'" % \ chain_seq.chain_id res_dict = idealized_aa.residue_dict() expected_names = {} for resname in res_dict.keys() : if (not "_h" in resname) : ideal_res = res_dict[resname] expected_names[resname] = set([ a.name for a in ideal_res.atoms() ]) n_changed = 0 for chain in pdb_hierarchy.only_model().chains() : if (not chain.is_protein()) : continue for chain_seq in seq_validation.chains : if (chain.id == chain_seq.chain_id) and (len(chain_seq.mismatch) > 0) : for residue_group in chain.residue_groups() : resid = residue_group.resid() if (resid in chain_seq.mismatch) : idx = chain_seq.mismatch.index(resid) new_code = chain_seq.actual_code[idx] new_resname = three_letter_given_one_letter.get(new_code) if (new_resname is not None) : expected_atoms = expected_names[new_resname.lower()] if (truncate_to_cbeta) : expected_atoms = expected_names["ala"] for atom_group in residue_group.atom_groups() : n_changed += 1 n_removed = 0 atom_group.resname = new_resname for atom in atom_group.atoms() : if (not atom.name in expected_atoms) : atom_group.remove_atom(atom) n_removed += 1 print >> out, " chain '%s' %s %s --> %s (%d atoms removed)" % \ (chain.id, resid, residue_group.atom_groups()[0].resname, new_resname, n_removed) pdb_hierarchy.atoms().reset_i_seq() return n_changed
def extend_protein_model(pdb_hierarchy, selection=None, hydrogens=Auto, max_atoms_missing=None, log=None, modify_segids=True, prefilter_callback=None, idealized_residue_dict=None, skip_non_protein_chains=True): """ Replace all sidechains with missing non-hydrogen atoms in a PDB hierarchy. """ from mmtbx.monomer_library import idealized_aa from mmtbx.rotamer import rotamer_eval import mmtbx.monomer_library.server from iotbx.pdb import common_residue_names_get_class from scitbx.array_family import flex if (prefilter_callback is not None): assert hasattr(prefilter_callback, "__call__") else: prefilter_callback = lambda r: True ideal_dict = idealized_residue_dict if (ideal_dict is None): ideal_dict = idealized_aa.residue_dict() if (log is None): log = null_out() mon_lib_srv = mmtbx.monomer_library.server.server() pdb_atoms = pdb_hierarchy.atoms() if (selection is None): selection = flex.bool(pdb_atoms.size(), True) partial_sidechains = [] for chain in pdb_hierarchy.only_model().chains(): if (not chain.is_protein()) and (skip_non_protein_chains): print >> log, " skipping non-protein chain '%s'" % chain.id continue for residue_group in chain.residue_groups(): atom_groups = residue_group.atom_groups() if (len(atom_groups) > 1): print >> log, " %s %s has multiple conformations, skipping" % \ (chain.id, residue_group.resid()) continue residue = atom_groups[0] i_seqs = residue.atoms().extract_i_seq() residue_sel = selection.select(i_seqs) if (not residue_sel.all_eq(True)): continue if (idealized_residue_dict is None): res_class = common_residue_names_get_class(residue.resname) if (res_class != "common_amino_acid"): print >> log, " skipping non-standard residue %s" % residue.resname continue else: key = residue.resname.lower() if (hydrogens == True): key = key + "_h" if (not key in idealized_residue_dict.keys()): pass missing_atoms = rotamer_eval.eval_residue_completeness( residue=residue, mon_lib_srv=mon_lib_srv, ignore_hydrogens=True) if (len(missing_atoms) > 0): print >> log, " missing %d atoms in %s: %s" % (len( missing_atoms), residue.id_str(), ",".join(missing_atoms)) if ((max_atoms_missing is None) or (len(missing_atoms) < max_atoms_missing)): if (prefilter_callback(residue)): partial_sidechains.append(residue) for residue in partial_sidechains: new_residue = extend_residue(residue=residue, ideal_dict=ideal_dict, hydrogens=hydrogens, mon_lib_srv=mon_lib_srv, match_conformation=True) if (modify_segids): for atom in new_residue.atoms(): atom.segid = "XXXX" rg = residue.parent() rg.remove_atom_group(residue) rg.append_atom_group(new_residue.detached_copy()) pdb_hierarchy.atoms().reset_i_seq() pdb_hierarchy.atoms().reset_serial() return len(partial_sidechains)
def correct_sequence(pdb_hierarchy, sequences, truncate_to_cbeta=False, out=sys.stdout): """ Modify the sequence for the pdb hierarchy to match that of the aligned sequence. This will remove incompatible atoms; the sidechains will still need to be extended separated. For proteins only - mismatches in nucleic acids will only result in a warning. :param pdb_hierarchy: iotbx.pdb.hierarchy.root object :param sequences: list of iotbx.bioinformatics.sequence objects :param trucate_to_cbeta: chop off entire sidechain to C-beta (default: leave common atoms in place) :param out: output filehandle (default = stdout) :returns: number of atom_group objects renamed """ from mmtbx.monomer_library import idealized_aa import mmtbx.validation.sequence from iotbx.pdb.amino_acid_codes import three_letter_given_one_letter seq_validation = mmtbx.validation.sequence.validation( pdb_hierarchy=pdb_hierarchy, sequences=sequences, log=out) for chain_seq in seq_validation.chains: if (chain_seq.chain_type == mmtbx.validation.sequence.NUCLEIC_ACID): if (len(chain_seq.mismatch) > 0): print(" WARNING: will skip %d mismatches in nucleic acid chain '%s'" % \ chain_seq.chain_id, file=out) res_dict = idealized_aa.residue_dict() expected_names = {} for resname in res_dict.keys(): if (not "_h" in resname): ideal_res = res_dict[resname] expected_names[resname] = set([a.name for a in ideal_res.atoms()]) n_changed = 0 for chain in pdb_hierarchy.only_model().chains(): if (not chain.is_protein()): continue for chain_seq in seq_validation.chains: if (chain.id == chain_seq.chain_id) and (len(chain_seq.mismatch) > 0): for residue_group in chain.residue_groups(): resid = residue_group.resid() if (resid in chain_seq.mismatch): idx = chain_seq.mismatch.index(resid) new_code = chain_seq.actual_code[idx] new_resname = three_letter_given_one_letter.get( new_code) if (new_resname is not None): expected_atoms = expected_names[ new_resname.lower()] if (truncate_to_cbeta): expected_atoms = expected_names["ala"] for atom_group in residue_group.atom_groups(): n_changed += 1 n_removed = 0 atom_group.resname = new_resname for atom in atom_group.atoms(): if (not atom.name in expected_atoms): atom_group.remove_atom(atom) n_removed += 1 print(" chain '%s' %s %s --> %s (%d atoms removed)" % \ (chain.id, resid, residue_group.atom_groups()[0].resname, new_resname, n_removed), file=out) pdb_hierarchy.atoms().reset_i_seq() return n_changed
def extend_protein_model (pdb_hierarchy, selection=None, hydrogens=Auto, max_atoms_missing=None, log=None, modify_segids=True, prefilter_callback=None, idealized_residue_dict=None, skip_non_protein_chains=True) : """ Replace all sidechains with missing non-hydrogen atoms in a PDB hierarchy. """ from mmtbx.monomer_library import idealized_aa from mmtbx.rotamer import rotamer_eval import mmtbx.monomer_library.server from iotbx.pdb import common_residue_names_get_class from scitbx.array_family import flex if (prefilter_callback is not None) : assert hasattr(prefilter_callback, "__call__") else : prefilter_callback = lambda r: True ideal_dict = idealized_residue_dict if (ideal_dict is None) : ideal_dict = idealized_aa.residue_dict() if (log is None) : log = null_out() mon_lib_srv = mmtbx.monomer_library.server.server() pdb_atoms = pdb_hierarchy.atoms() if (selection is None) : selection = flex.bool(pdb_atoms.size(), True) partial_sidechains = [] for chain in pdb_hierarchy.only_model().chains() : if (not chain.is_protein()) and (skip_non_protein_chains) : print >> log, " skipping non-protein chain '%s'" % chain.id continue for residue_group in chain.residue_groups() : atom_groups = residue_group.atom_groups() if (len(atom_groups) > 1) : print >> log, " %s %s has multiple conformations, skipping" % \ (chain.id, residue_group.resid()) continue residue = atom_groups[0] i_seqs = residue.atoms().extract_i_seq() residue_sel = selection.select(i_seqs) if (not residue_sel.all_eq(True)) : continue if (idealized_residue_dict is None) : res_class = common_residue_names_get_class(residue.resname) if (res_class != "common_amino_acid") : print >> log, " skipping non-standard residue %s" % residue.resname continue else : key = residue.resname.lower() if (hydrogens == True) : key = key + "_h" if (not key in idealized_residue_dict.keys()) : pass missing_atoms = rotamer_eval.eval_residue_completeness( residue=residue, mon_lib_srv=mon_lib_srv, ignore_hydrogens=True) if (len(missing_atoms) > 0) : print >> log, " missing %d atoms in %s: %s" % (len(missing_atoms), residue.id_str(), ",".join(missing_atoms)) if ((max_atoms_missing is None) or (len(missing_atoms) < max_atoms_missing)) : if (prefilter_callback(residue)) : partial_sidechains.append(residue) for residue in partial_sidechains : new_residue = extend_residue(residue=residue, ideal_dict=ideal_dict, hydrogens=hydrogens, mon_lib_srv=mon_lib_srv, match_conformation=True) if (modify_segids) : for atom in new_residue.atoms() : atom.segid = "XXXX" rg = residue.parent() rg.remove_atom_group(residue) rg.append_atom_group(new_residue.detached_copy()) pdb_hierarchy.atoms().reset_i_seq() pdb_hierarchy.atoms().reset_serial() return len(partial_sidechains)