def validate_residues(self): from mmtbx.conformation_dependent_library import generate_protein_threes from mmtbx.rotamer import ramachandran_eval, rotamer_eval # this is so we generate rama_eval only once rama_eval = ramachandran_eval.RamachandranEval() rota_eval = rotamer_eval.RotamerEval() rotamer_id = rotamer_eval.RotamerID() # loads in the rotamer names threes = generate_protein_threes(hierarchy=self.pdb_hierarchy, include_non_linked=True, backbone_only=False, geometry=None) for i, three in enumerate(threes): if i == 0: self.residues.append( ValidationResidue(three, rama_eval, rota_eval, rotamer_id, index=0)) self.residues.append( ValidationResidue(three, rama_eval, rota_eval, rotamer_id)) if three.end: self.residues.append( ValidationResidue(three, rama_eval, rota_eval, rotamer_id, index=2))
def exercise_phi_psi_extraction(): for n_prox, raw_records in [ ([0, 0], rec_1_residue), ([0, 0], rec_2_residues), ([4, 2], rec_3_residues), ([6, 4], rec_4_residues), ([0, 0], rec_2_chains), ([0, 0], rec_2_segids), ([8, 4], rec_2_acs_edge), ([8, 4], rec_2_acs_middle), ([6, 4], rec_4_residues_isertions), ([12, 10], pdb_1yjp), ([8, 4], pdb_1yjp_minus_4), ([4, 2], rec_3_res_ac_h), ([8, 4], rec_2_acs_middle_one_atom_1), ([8, 4], rec_2_acs_middle_one_atom_2), ([8, 4], rec_2_acs_middle_one_atom_3), ]: tmp_hierarchy = iotbx.pdb.input( source_info=None, lines=flex.split_lines(raw_records)).construct_hierarchy() for opp in range(2): proxies = [] for three in generate_protein_threes( hierarchy=tmp_hierarchy, geometry=None): ppp = three.get_dummy_dihedral_proxies(only_psi_phi_pairs=opp) print three,'ppp',len(ppp) proxies.extend(ppp) print len(proxies), n_prox assert len(proxies) == n_prox[opp], \ "Expected %d, got %d" % ( n_prox[opp], len(proxies), )
def update_restraints( hierarchy, geometry, # restraints_manager, current_geometry=None, # xray_structure!! sites_cart=None, cdl_proxies=None, ideal=True, esd=True, esd_factor=1.0, log=None, verbose=False, ): global registry registry = RestraintsRegistry() if current_geometry: assert not sites_cart sites_cart = current_geometry.sites_cart() if sites_cart: pdb_atoms = hierarchy.atoms() # XXX PDB_TRANSITION VERY SLOW for j_seq, atom in enumerate(pdb_atoms): atom.xyz = sites_cart[j_seq] threes = None average_updates = 0 total_updates = 0 for threes in generate_protein_threes( hierarchy, geometry, omega_cdl=True, # verbose=verbose, ): threes.apply_updates = apply_updates if threes.cis_group(): if verbose and 0: print "cis " * 20 print threes continue restraint_values = get_restraint_values(threes) if restraint_values is None: continue if restraint_values[0] == "I": average_updates += 1 else: total_updates += 1 threes.apply_updates(threes, restraint_values, cdl_proxies, ideal=ideal, esd=esd, esd_factor=esd_factor) if registry.n: threes.apply_average_updates(registry) assert 0 geometry.reset_internals() if verbose and threes and threes.errors: if log: log.write(" Residues not completely updated with CDL restraints\n\n") for line in threes.errors: if log: log.write("%s\n" % line) else: print line return geometry
def exercise_phi_psi_extraction(): for n_prox, raw_records in [ ([0, 0], rec_1_residue), ([0, 0], rec_2_residues), ([4, 2], rec_3_residues), ([6, 4], rec_4_residues), ([0, 0], rec_2_chains), ([0, 0], rec_2_segids), ([8, 4], rec_2_acs_edge), ([8, 4], rec_2_acs_middle), ([6, 4], rec_4_residues_isertions), ([12, 10], pdb_1yjp), ([8, 4], pdb_1yjp_minus_4), ([4, 2], rec_3_res_ac_h), ([8, 4], rec_2_acs_middle_one_atom_1), ([8, 4], rec_2_acs_middle_one_atom_2), ([8, 4], rec_2_acs_middle_one_atom_3), ]: tmp_hierarchy = iotbx.pdb.input( source_info=None, lines=flex.split_lines(raw_records)).construct_hierarchy() for opp in range(2): proxies = [] for three in generate_protein_threes(hierarchy=tmp_hierarchy, geometry=None): ppp = three.get_dummy_dihedral_proxies(only_psi_phi_pairs=opp) print(three, 'ppp', len(ppp)) proxies.extend(ppp) print(len(proxies), n_prox) assert len(proxies) == n_prox[opp], \ "Expected %d, got %d" % ( n_prox[opp], len(proxies), )
def extract_proxies(self): self.proxies = ext.shared_phi_psi_proxy() from mmtbx.conformation_dependent_library import generate_protein_threes selected_h = self.pdb_hierarchy.select(self.bool_atom_selection) n_seq = flex.max(selected_h.atoms().extract_i_seq()) for three in generate_protein_threes( hierarchy=selected_h, geometry=None): rc = three.get_phi_psi_atoms() if rc is None: continue phi_atoms, psi_atoms = rc rama_key = three.get_ramalyze_key() i_seqs = [atom.i_seq for atom in phi_atoms] + [psi_atoms[-1].i_seq] resnames = three.get_resnames() r_name = resnames[1] assert rama_key in range(6) text_rama_key = ramalyze.res_types[rama_key] assert text_rama_key in ["general", "glycine", "cis-proline", "trans-proline", "pre-proline", "isoleucine or valine"] proxy = ext.phi_psi_proxy( residue_name=r_name, residue_type=text_rama_key, i_seqs=i_seqs) if not is_proxy_present(self.proxies, n_seq, proxy): self.proxies.append(proxy) print >> self.log, "" print >> self.log, " %d Ramachandran restraints generated." % ( self.get_n_proxies())
def extract_proxies(self, log): self.proxies = ext.shared_phi_psi_proxy() from mmtbx.conformation_dependent_library import generate_protein_threes selected_h = self.pdb_hierarchy.select(self.bool_atom_selection) n_seq = flex.max(selected_h.atoms().extract_i_seq()) for three in generate_protein_threes(hierarchy=selected_h, geometry=None): rc = three.get_phi_psi_atoms() if rc is None: continue phi_atoms, psi_atoms = rc rama_key = three.get_ramalyze_key() i_seqs = [atom.i_seq for atom in phi_atoms] + [psi_atoms[-1].i_seq] resnames = three.get_resnames() r_name = resnames[1] assert rama_key in range(6) text_rama_key = ramalyze.res_types[rama_key] assert text_rama_key in [ "general", "glycine", "cis-proline", "trans-proline", "pre-proline", "isoleucine or valine" ] proxy = ext.phi_psi_proxy(residue_name=r_name, residue_type=text_rama_key, i_seqs=i_seqs) if not is_proxy_present(self.proxies, n_seq, proxy): self.proxies.append(proxy) print >> log, "" print >> log, " %d Ramachandran restraints generated." % ( self.get_n_proxies())
def run(): filename = 'tst_multi.pdb' f = open(filename, 'w') f.write(pdb_lines) f.close() pdb_inp = pdb.input(filename) pdb_hierarchy = pdb_inp.construct_hierarchy() from mmtbx.conformation_dependent_library.tst_rdl import \ get_geometry_restraints_manager geometry_restraints_manager = get_geometry_restraints_manager(filename) pdb_hierarchy.reset_i_seq_if_necessary() refine = [ False, # -179 True, # -44 False, # 86 True, # -22 False, # -179 ] refine += [True] * 5 refine += [False] * 14 omegalyze = [ False, False, False, True, False, ] omegalyze += [True] * 3 omegalyze += [False] * 16 from mmtbx.conformation_dependent_library import generate_protein_threes for i, threes in enumerate( generate_protein_threes( pdb_hierarchy, geometry_restraints_manager, cdl_class=True, #verbose=verbose, )): print(i, threes) print(' omega %5.1f' % threes.get_omega_value()) print(" cis? %-5s %s" % (threes.cis_group(), threes.cis_group(limit=30))) print(" trans? %-5s %s" % (threes.trans_group(), threes.trans_group(limit=30))) print(" rama %s" % threes.get_ramalyze_key()) print(' conf %s' % threes.is_pure_main_conf()) assert threes.cis_group() == refine[i], '%s!=%s' % (threes.cis_group(), refine[i]) assert threes.cis_group(limit=30) == omegalyze[i] for j in range(0, 181, 10): i += 1 print(" %3d %-5s %-8s %-5s" % ( j, threes._define_omega_a_la_duke_using_limit(j) == 'cis', threes._define_omega_a_la_duke_using_limit(j, limit=30), refine[i], )) assert (threes._define_omega_a_la_duke_using_limit(j) == 'cis' ) == refine[i]
def extract_proxies(self, hierarchy): self.hierarchy = hierarchy selected_h = hierarchy.select(self.bool_atom_selection) n_seq = flex.max(selected_h.atoms().extract_i_seq()) # Drop all previous proxies self._oldfield_proxies = ext.shared_phi_psi_proxy() self._emsley_proxies = ext.shared_phi_psi_proxy() # it would be great to save rama_eval, but the fact that this is called in # pdb_interpretation, not in mmtbx.model makes it impossible if self.need_filtering: self.rama_eval = rama_eval() for three in generate_protein_threes(hierarchy=selected_h, geometry=None): rc = three.get_phi_psi_atoms() if rc is None: continue rama_key = three.get_ramalyze_key() if self.need_filtering: angles = three.get_phi_psi_angles() rama_score = self.rama_eval.get_score(rama_key, angles[0], angles[1]) r_evaluation = self.rama_eval.evaluate_score( rama_key, rama_score) phi_atoms, psi_atoms = rc i_seqs = [atom.i_seq for atom in phi_atoms] + [psi_atoms[-1].i_seq] resnames = three.get_resnames() r_name = resnames[1] assert rama_key in range(6) text_rama_key = ramalyze.res_types[rama_key] assert text_rama_key in [ "general", "glycine", "cis-proline", "trans-proline", "pre-proline", "isoleucine or valine" ] proxy = ext.phi_psi_proxy(residue_name=r_name, residue_type=text_rama_key, i_seqs=i_seqs) # pick where to put... if self.params.rama_potential == "oldfield": if self.need_filtering: if r_evaluation == ramalyze.RAMALYZE_FAVORED: self.append_oldfield_proxies(proxy, n_seq) elif r_evaluation == ramalyze.RAMALYZE_ALLOWED and self.params.restrain_rama_allowed: self.append_oldfield_proxies(proxy, n_seq) elif r_evaluation == ramalyze.RAMALYZE_OUTLIER and self.params.restrain_rama_outliers: self.append_oldfield_proxies(proxy, n_seq) elif self.params.restrain_allowed_outliers_with_emsley: self.append_emsley_proxies(proxy, n_seq) else: self.append_oldfield_proxies(proxy, n_seq) else: # self.params.rama_potential == "emsley": self.append_emsley_proxies(proxy, n_seq) print("", file=self.log) print(" %d Ramachandran restraints generated." % (self.get_n_proxies()), file=self.log) print(" %d Oldfield and %d Emsley." % (self.get_n_oldfield_proxies(), self.get_n_emsley_proxies()), file=self.log)
def get_phi_psi_atoms(hierarchy): phi_psi_atoms = [] for three in generate_protein_threes( hierarchy=hierarchy, geometry=None): phi_atoms, psi_atoms = three.get_phi_psi_atoms() rama_key = three.get_ramalyze_key() # print "rama_key", rama_key phi_psi_atoms.append(([phi_atoms, psi_atoms],rama_key)) return phi_psi_atoms
def get_phi_psi_atoms(hierarchy): phi_psi_atoms = [] for three in generate_protein_threes(hierarchy=hierarchy, geometry=None): psatoms = three.get_phi_psi_atoms() if psatoms is not None: phi_atoms, psi_atoms = psatoms else: phi_atoms, psi_atoms = None, None rama_key = three.get_ramalyze_key() # print "rama_key", rama_key phi_psi_atoms.append(([phi_atoms, psi_atoms], rama_key)) return phi_psi_atoms
def test_phi_psi_key(hierarchy, filename, restraints_manager, ): for i, threes in enumerate(cdl.generate_protein_threes( hierarchy, #restraints_manager=restraints_manager geometry=restraints_manager.geometry, ) ): key = threes.get_cdl_key(force_plus_one=True) print key, filenames[filename][1] assert key == filenames[filename][1][i]
def test_phi_psi_key(hierarchy, filename, restraints_manager, ): for i, threes in enumerate(cdl.generate_protein_threes( hierarchy, #restraints_manager=restraints_manager geometry=restraints_manager.geometry, ) ): key = threes.get_cdl_key(force_plus_one=True) print key, filenames[filename][1] assert key == filenames[filename][1][i]
def add_main_chain_atoms( hierarchy, geometry_restraints_manager, verbose=False, ): from mmtbx.conformation_dependent_library import generate_protein_threes for three in generate_protein_threes( hierarchy, geometry_restraints_manager, verbose=verbose, ): print(three) add_main_chain_atoms_to_protein_three(three) assert 0
def get_dihedrals_and_phi_psi(model): from cctbx.geometry_restraints import dihedral_proxy_registry dihedral_registry = dihedral_proxy_registry(strict_conflict_handling=True) dihedral_registry.initialize_table() from mmtbx.conformation_dependent_library import generate_protein_threes grm = model._processed_pdb_file.geometry_restraints_manager() dihedral_proxies = grm.get_dihedral_proxies().deep_copy() for p in dihedral_proxies: dihedral_registry.add_if_not_duplicated(p) for three in generate_protein_threes(hierarchy=model.get_hierarchy(), geometry=None): proxies = three.get_dummy_dihedral_proxies(only_psi_phi_pairs=False) for p in proxies: dihedral_registry.add_if_not_duplicated(p) return dihedral_registry.proxies
def get_phi_psi_dict(pdb_hierarchy): rc = {} for i, three in enumerate( generate_protein_threes(hierarchy=pdb_hierarchy, geometry=None)): phi_psi_angles = three.get_phi_psi_angles() is_alt_conf = ' ' relevant_atoms = {} for atom in three[1].atoms(): if (atom.name in relevant_atom_names): if (len(atom.parent().altloc) != 0): is_alt_conf = atom.parent().altloc break id_str = '|%s:%s|' % (three[1].id_str(), is_alt_conf) rc[id_str] = phi_psi_angles return rc
def test_average(hierarchy, filename, restraints_manager, ): for i, threes in enumerate(cdl.generate_protein_threes( hierarchy, geometry=restraints_manager.geometry, ) ): if threes.registry.n: atoms = hierarchy.atoms() for key in threes.registry.n: print key for atom in key: print atoms[atom].quote() #assert threes.registry.n.keys() == filenames[filename][3] assert filenames[filename][3][0] in threes.registry.n
def test_average(hierarchy, filename, restraints_manager, ): for i, threes in enumerate(cdl.generate_protein_threes( hierarchy, geometry=restraints_manager.geometry, ) ): if threes.registry.n: atoms = hierarchy.atoms() for key in threes.registry.n: print key for atom in key: print atoms[atom].quote() #assert threes.registry.n.keys() == filenames[filename][3] assert filenames[filename][3][0] in threes.registry.n
def get_complete_dihedral_proxies_2(model, log=None): from six.moves import cStringIO as StringIO from cctbx.geometry_restraints import dihedral_proxy_registry from mmtbx.conformation_dependent_library import generate_protein_threes if log is None: log = StringIO dihedral_registry = dihedral_proxy_registry(strict_conflict_handling=True) dihedral_registry.initialize_table() grm = model.get_restraints_manager().geometry dihedral_proxies = grm.get_dihedral_proxies().deep_copy() for p in dihedral_proxies: dihedral_registry.add_if_not_duplicated(p) for three in generate_protein_threes(hierarchy=model.get_hierarchy(), geometry=None): proxies = three.get_dummy_dihedral_proxies(only_psi_phi_pairs=False) for p in proxies: dihedral_registry.add_if_not_duplicated(p) return dihedral_registry.proxies
def get_phi_psi_atoms(hierarchy, omega=False): phi_psi_atoms = [] for three in generate_protein_threes(hierarchy=hierarchy, geometry=None, cdl_class=True): psatoms = three.get_phi_psi_atoms() if psatoms is not None: phi_atoms, psi_atoms = psatoms else: phi_atoms, psi_atoms = None, None rama_key = three.get_ramalyze_key() # print "rama_key", rama_key if omega: phi_psi_atoms.append( ([phi_atoms, psi_atoms], rama_key, three.get_omega_value())) else: phi_psi_atoms.append(([phi_atoms, psi_atoms], rama_key)) return phi_psi_atoms
def get_dihedrals_and_phi_psi(processed_pdb_file): from cctbx.geometry_restraints import dihedral_proxy_registry dihedral_registry = dihedral_proxy_registry( strict_conflict_handling=True) dihedral_registry.initialize_table() from mmtbx.conformation_dependent_library import generate_protein_threes grm = processed_pdb_file.geometry_restraints_manager() dihedral_proxies = grm.get_dihedral_proxies().deep_copy() for p in dihedral_proxies: dihedral_registry.add_if_not_duplicated(p) for three in generate_protein_threes( hierarchy=processed_pdb_file.all_chain_proxies.pdb_hierarchy, geometry=None): proxies = three.get_dummy_dihedral_proxies( only_psi_phi_pairs=False) for p in proxies: dihedral_registry.add_if_not_duplicated(p) return dihedral_registry.proxies
def validate_residues(self) : from mmtbx.conformation_dependent_library import generate_protein_threes from mmtbx.rotamer import ramachandran_eval,rotamer_eval # this is so we generate rama_eval only once rama_eval = ramachandran_eval.RamachandranEval() rota_eval = rotamer_eval.RotamerEval() rotamer_id = rotamer_eval.RotamerID() # loads in the rotamer names threes = generate_protein_threes( hierarchy = self.pdb_hierarchy, include_non_linked=True, backbone_only=False, geometry=None) for i,three in enumerate(threes) : if i == 0 : self.residues.append(ValidationResidue(three,rama_eval, rota_eval,rotamer_id,index=0)) self.residues.append(ValidationResidue(three,rama_eval, rota_eval,rotamer_id)) if three.end : self.residues.append(ValidationResidue(three,rama_eval, rota_eval,rotamer_id,index=2))
def test_cdl_lookup(hierarchy, filename, restraints_manager, ): for i, threes in enumerate(cdl.generate_protein_threes( hierarchy, #restraints_manager=restraints_manager geometry=restraints_manager.geometry, ) ): res_type_group = cdl_utils.get_res_type_group( threes[1].resname, threes[2].resname, ) key = threes.get_cdl_key(force_plus_one=True) key = key[-2:] restraint_values = omega_database[res_type_group][key] print i, key, restraint_values[:4], filenames[filename][2] del threes.registry.n threes.registry.n = {} assert restraint_values[:4] == filenames[filename][2][i]
def test_cdl_lookup(hierarchy, filename, restraints_manager, ): for i, threes in enumerate(cdl.generate_protein_threes( hierarchy, #restraints_manager=restraints_manager geometry=restraints_manager.geometry, ) ): res_type_group = cdl_utils.get_res_type_group( threes[1].resname, threes[2].resname, ) key = threes.get_cdl_key(force_plus_one=True) key = key[-2:] print 'res_type_group',res_type_group,key restraint_values = omega_database[res_type_group][key] print i, key, restraint_values[:4], filenames[filename][2] del threes.registry.n threes.registry.n = {} assert restraint_values[:4] == filenames[filename][2][i]
def __init__(self, models, log): db_path = libtbx.env.find_in_repositories( relative_path="chem_data/rama_z/top8000_rama_z_dict.pkl", test=os.path.isfile) self.log = log # this takes ~0.15 seconds, so I don't see a need to cache it somehow. self.db = easy_pickle.load(db_path) # ========================================================================= # change keys in pickle to Python 3 string # very temporary fix until pickle is updated if sys.version_info.major == 3: from libtbx.utils import to_str for key in list(self.db.keys()): self.db[to_str(key)] = self.db[key] for subkey in list(self.db[key].keys()): self.db[to_str(key)][to_str(subkey)] = self.db[key][subkey] # ========================================================================= self.calibration_values = { 'H': (-0.045355950779513175, 0.1951165524439217), 'S': (-0.0425581278436754, 0.20068584887814633), 'L': (-0.018457764754231075, 0.15788374669456848), 'W': (-0.016806654295023003, 0.12044960331869274) } self.residue_counts = {"H": 0, "S": 0, "L": 0} self.z_score = {"H": None, "S": None, "L": None, 'W': None} self.means = {"H": {}, "S": {}, "L": {}} self.stds = {"H": {}, "S": {}, "L": {}} self.phi_step = 4 self.psi_step = 4 self.n_phi_half = 45 self.n_psi_half = 45 # this is needed to disable e.g. selection functionality when # multiple models are present self.n_models = len(models) self.res_info = [] for model in models: if model.get_hierarchy().models_size() > 1: hierarchy = iotbx.pdb.hierarchy.root() m = model.get_hierarchy().models()[0].detached_copy() hierarchy.append_model(m) asc = hierarchy.atom_selection_cache() else: hierarchy = model.get_hierarchy() asc = model.get_atom_selection_cache() sec_str_master_phil = iotbx.phil.parse(sec_str_master_phil_str) ss_params = sec_str_master_phil.fetch().extract() ss_params.secondary_structure.protein.search_method = "from_ca" ss_params.secondary_structure.from_ca_conservative = True ssm = ss_manager( hierarchy, atom_selection_cache=asc, geometry_restraints_manager=None, sec_str_from_pdb_file=None, # params=None, params=ss_params.secondary_structure, was_initialized=False, mon_lib_srv=None, verbose=-1, log=null_out(), # log=sys.stdout, ) filtered_ann = ssm.actual_sec_str.deep_copy() filtered_ann.remove_short_annotations( helix_min_len=4, sheet_min_len=4, keep_one_stranded_sheets=True) self.helix_sel = asc.selection( filtered_ann.overall_helices_selection()) self.sheet_sel = asc.selection( filtered_ann.overall_sheets_selection()) used_atoms = set() for three in generate_protein_threes(hierarchy=hierarchy, geometry=None): main_residue = three[1] phi_psi_atoms = three.get_phi_psi_atoms() if phi_psi_atoms is None: continue phi_atoms, psi_atoms = phi_psi_atoms key = [x.i_seq for x in phi_atoms] + [psi_atoms[-1].i_seq] key = "%s" % key if key not in used_atoms: phi, psi = three.get_phi_psi_angles() rkey = three.get_ramalyze_key() resname = main_residue.resname ss_type = self._figure_out_ss(three) self.res_info.append( ["", rkey, resname, ss_type, phi, psi]) self.residue_counts[ss_type] += 1 used_atoms.add(key) self.residue_counts["W"] = self.residue_counts[ "H"] + self.residue_counts["S"] + self.residue_counts["L"]
def __init__ (self, pdb_hierarchy, outliers_only=False, show_errors=False, out=sys.stdout, quiet=False) : # Optimization hint: make it possible to pass # ramachandran_eval.RamachandranEval() from outside. # Better - convert this to using mmtbx.model.manager where # RamachandranEval is already available. validation.__init__(self) self.n_allowed = 0 self.n_favored = 0 self.n_type = [ 0 ] * 6 self._outlier_i_seqs = flex.size_t() pdb_atoms = pdb_hierarchy.atoms() all_i_seqs = pdb_atoms.extract_i_seq() if (all_i_seqs.all_eq(0)) : pdb_atoms.reset_i_seq() use_segids = utils.use_segids_in_place_of_chainids( hierarchy=pdb_hierarchy) analysis = "" output_list = [] count_keys = [] uniqueness_keys = [] r = ramachandran_eval.RamachandranEval() ##if use_segids: ## chain_id = utils.get_segid_as_chainid(chain=chain) ## else: ## chain_id = chain.id for three in generate_protein_threes(hierarchy=pdb_hierarchy, geometry=None): main_residue = three[1] phi_psi_atoms = three.get_phi_psi_atoms() if phi_psi_atoms is None: continue phi_atoms, psi_atoms = phi_psi_atoms phi = get_dihedral(phi_atoms) psi = get_dihedral(psi_atoms) coords = get_center(main_residue) #should find the CA of the center residue if (phi is not None and psi is not None): res_type = RAMA_GENERAL #self.n_total += 1 if (main_residue.resname[0:3] == "GLY"): res_type = RAMA_GLYCINE elif (main_residue.resname[0:3] == "PRO"): is_cis = is_cis_peptide(three) if is_cis: res_type = RAMA_CISPRO else: res_type = RAMA_TRANSPRO elif (three[2].resname == "PRO"): res_type = RAMA_PREPRO elif (main_residue.resname[0:3] == "ILE" or \ main_residue.resname[0:3] == "VAL"): res_type = RAMA_ILE_VAL #self.n_type[res_type] += 1 value = r.evaluate(res_types[res_type], [phi, psi]) ramaType = self.evaluateScore(res_type, value) is_outlier = ramaType == RAMALYZE_OUTLIER c_alphas = None # XXX only save kinemage data for outliers if is_outlier : c_alphas = get_cas_from_three(three) assert (len(c_alphas) == 3) markup = self.as_markup_for_kinemage(c_alphas) else: markup = None result = ramachandran( model_id=main_residue.parent().parent().parent().id, chain_id=main_residue.parent().parent().id, resseq=main_residue.resseq, icode=main_residue.icode, resname=main_residue.resname, #altloc=main_residue.parent().altloc, altloc=get_altloc_from_three(three), segid=None, # XXX ??? phi=phi, psi=psi, rama_type=ramaType, res_type=res_type, score=value*100, outlier=is_outlier, xyz=coords, markup=markup) #if result.chain_id+result.resseq+result.icode not in count_keys: result_key = result.model_id+result.chain_id+result.resseq+result.icode if result.altloc in ['','A'] and result_key not in count_keys: self.n_total += 1 self.n_type[res_type] += 1 self.add_to_validation_counts(ramaType) count_keys.append(result_key) if (not outliers_only or is_outlier) : if (result.altloc != '' or result_key not in uniqueness_keys): #the threes/conformers method results in some redundant result # calculations in structures with alternates. Using the # uniqueness_keys list prevents redundant results being added to # the final list self.results.append(result) uniqueness_keys.append(result_key) if is_outlier : i_seqs = main_residue.atoms().extract_i_seq() assert (not i_seqs.all_eq(0)) self._outlier_i_seqs.extend(i_seqs) self.results.sort(key=lambda r: r.model_id+r.id_str()) out_count, out_percent = self.get_outliers_count_and_fraction() fav_count, fav_percent = self.get_favored_count_and_fraction() self.out_percent = out_percent * 100.0 self.fav_percent = fav_percent * 100.0
def extract_proxies(self, hierarchy): def _get_motifs(): from phenix.programs.phi_psi_2 import results_manager as pp2 pp2_manager = pp2(model=None, log=self.log) phi_psi_2_motifs = pp2_manager.get_overall_motif_count_and_output( None, self.hierarchy, return_rama_restraints=True, ) return phi_psi_2_motifs phi_psi_2_motifs = None favored = ramalyze.RAMALYZE_FAVORED allowed = ramalyze.RAMALYZE_ALLOWED outlier = ramalyze.RAMALYZE_OUTLIER self.hierarchy = hierarchy bool_atom_selection = self._determine_bool_atom_selection(hierarchy) selected_h = hierarchy.select(bool_atom_selection) n_seq = flex.max(selected_h.atoms().extract_i_seq()) # Drop all previous proxies self._oldfield_proxies = ext.shared_phi_psi_proxy() self._emsley_proxies = ext.shared_phi_psi_proxy() self._emsley8k_proxies = ext.shared_phi_psi_proxy() self._phi_psi_2_proxies = ext.shared_phi_psi_proxy() # it would be great to save rama_eval, but the fact that this is called in # pdb_interpretation, not in mmtbx.model makes it impossible self.rama_eval = rama_eval() outl = [] for three in generate_protein_threes(hierarchy=selected_h, geometry=None): rc = three.get_phi_psi_atoms() if rc is None: continue rama_key = three.get_ramalyze_key() angles = three.get_phi_psi_angles() rama_score = self.rama_eval.get_score(rama_key, angles[0], angles[1]) r_eval = self.rama_eval.evaluate_score(rama_key, rama_score) phi_atoms, psi_atoms = rc i_seqs = [atom.i_seq for atom in phi_atoms] + [psi_atoms[-1].i_seq] resnames = three.get_resnames() r_name = resnames[1] assert rama_key in range(6) text_rama_key = ramalyze.res_types[rama_key] assert text_rama_key in [ "general", "glycine", "cis-proline", "trans-proline", "pre-proline", "isoleucine or valine" ] # pick where to put... ev_match_dict = { favored: self.params.favored, allowed: self.params.allowed, outlier: self.params.outlier } r_type = ev_match_dict[r_eval] if r_type == 'oldfield': proxy = ext.phi_psi_proxy(residue_type=text_rama_key, i_seqs=i_seqs, weight=1) # XXX Not used in oldfield self.append_oldfield_proxies(proxy, n_seq) ### THIS IS CRUEL. REMOVE ONCE favored/allowed/outlier are made multiple! if (self.params.inject_emsley8k_into_oldfield_favored): proxy = ext.phi_psi_proxy(residue_type=text_rama_key, i_seqs=i_seqs, weight=5) self.append_emsley8k_proxies(proxy, n_seq) ### elif r_type == 'emsley': weight = self.params.emsley.weight proxy = ext.phi_psi_proxy(residue_type=text_rama_key, i_seqs=i_seqs, weight=weight) self.append_emsley_proxies(proxy, n_seq) elif r_type == 'emsley8k': if (r_eval is favored): weight = self.params.emsley8k.weight_favored elif (r_eval is allowed): weight = self.params.emsley8k.weight_allowed elif (r_eval is outlier): weight = self.params.emsley8k.weight_outlier else: raise RuntimeError("Rama eveluation failed.") proxy = ext.phi_psi_proxy(residue_type=text_rama_key, i_seqs=i_seqs, weight=weight) self.append_emsley8k_proxies(proxy, n_seq) elif r_type == 'phi_psi_2': from phenix.pdb_tools.phi_psi_2_data import get_phi_psi_key_for_rama_proxy if phi_psi_2_motifs is None: phi_psi_2_motifs = _get_motifs() if (r_eval is favored): strategy = self.params.phi_psi_2.favored_strategy elif (r_eval is allowed): strategy = self.params.phi_psi_2.allowed_strategy elif (r_eval is outlier): strategy = self.params.phi_psi_2.outlier_strategy else: raise RuntimeError("Rama eveluation failed.") if strategy == 'closest': strategy += '_%0.1f_%0.1f' % tuple( three.get_phi_psi_angles()) pp2_key = get_phi_psi_key_for_rama_proxy( phi_psi_2_motifs, three, strategy=strategy, ) if pp2_key is None: continue weight = 1 proxy = ext.phi_psi_proxy(residue_type=pp2_key, i_seqs=i_seqs, weight=weight) outl.append([proxy.residue_type, three]) self.append_phi_psi_2_proxies(proxy, n_seq) elif (r_type is None): pass else: raise RuntimeError("Not an option: %s" % str(r_type)) print("", file=self.log) print(" %d Ramachandran restraints generated." % (self.get_n_proxies()), file=self.log) print(" %d Oldfield, %d Emsley, %d emsley8k and %d Phi/Psi/2." % (self.get_n_oldfield_proxies(), self.get_n_emsley_proxies(), self.get_n_emsley8k_proxies(), self.get_n_phi_psi_2_proxies()), file=self.log) if outl: print(' Rama restraints by Phi/Psi/2') for pp2, three in outl: print(' %s : %s' % (three[1].id_str(), pp2.split('|')[0]), file=self.log)
def fix_rama_outlier(self, pdb_hierarchy, out_res_num_list, prefix="", minimize=True, ss_annotation=None, tried_rama_angles_for_chain={}, tried_final_rama_angles_for_chain={}): def comb_pair_in_bad_pairs(comb_pair, bad_pairs): if None in comb_pair: return False all_combs = [comb_pair] all_combs.append((comb_pair[0]-20, comb_pair[1])) all_combs.append((comb_pair[0]+20, comb_pair[1])) all_combs.append((comb_pair[0], comb_pair[1]-20)) all_combs.append((comb_pair[0], comb_pair[1]+20)) all_c_adj = [] for p in all_combs: new_p = p if p[0] > 180: new_p = (p[0]-180, p[1]) if p[0] < -180: new_p = (p[0]+180, p[1]) if p[1] > 180: new_p = (p[0], p[1]-180) if p[0] < -180: new_p = (p[0], p[1]+180) all_c_adj.append(new_p) for p in all_c_adj: if p in bad_pairs: return True return False original_pdb_h = pdb_hierarchy.deep_copy() original_pdb_h.reset_atom_i_seqs() original_pdb_h_asc = original_pdb_h.atom_selection_cache() chain_id = original_pdb_h.only_model().only_chain().id all_results = [] # only forward # variants_searches = [ # #ccd_radius, change_all, change_radius, direction_forward # ((1, False, 0, True ),1), # # ((1, False, 0, False),1), # ((2, False, 0, True ),1), # # ((2, False, 0, False),1), # ((3, False, 0, True ),2), # # ((3, False, 0, False),2), # ((2, True, 1, True ),1), # # ((2, True, 1, False),1), # ((3, True, 1, True ),2), # # ((3, True, 1, False),2), # ((3, True, 2, True ),3), # # ((3, True, 2, False),3), # ] # only backward # variants_searches = [ # #ccd_radius, change_all, change_radius, direction_forward # # ((1, False, 0, True ),1), # ((1, False, 0, False),1), # # ((2, False, 0, True ),1), # ((2, False, 0, False),1), # # ((3, False, 0, True ),2), # ((3, False, 0, False),2), # # ((2, True, 1, True ),1), # ((2, True, 1, False),1), # # ((3, True, 1, True ),2), # ((3, True, 1, False),2), # # ((3, True, 2, True ),3), # ((3, True, 2, False),3), # ] # both variants_searches = [ #ccd_radius, change_all, change_radius, direction_forward ((1, False, 0, True ),1), ((1, False, 0, False),1), ((2, False, 0, True ),1), ((2, False, 0, False),1), ((3, False, 0, True ),2), ((3, False, 0, False),2), ((2, True, 1, True ),1), ((2, True, 1, False),1), ((3, True, 1, True ),2), ((3, True, 1, False),2), ((3, True, 2, True ),3), ((3, True, 2, False),3), ] decided_variants = [] for variant, level in variants_searches: if level <= self.params.variant_search_level: decided_variants.append(variant) for ccd_radius, change_all, change_radius, direction_forward in decided_variants: # while ccd_radius <= 3: fixing_omega = False print >> self.log, " Starting optimization with radius=%d, " % ccd_radius, print >> self.log, "change_all=%s, change_radius=%d, " % (change_all, change_radius), print >> self.log, "direction=forward" if direction_forward else "direction=backwards" self.log.flush() # (moving_h, moving_ref_atoms_iseqs, fixed_ref_atoms, m_selection, contains_ss_element, anchor_present) = get_fixed_moving_parts( pdb_hierarchy=pdb_hierarchy, out_res_num_list=out_res_num_list, # n_following=1, # n_previous=ccd_radius+ccd_radius-1, n_following=ccd_radius, n_previous=ccd_radius, ss_annotation=ss_annotation, direction_forward=direction_forward, log=self.log) # print " moving_ref_atoms_iseqs", moving_ref_atoms_iseqs print " moving_h resseqs:", [x.resseq for x in moving_h.residue_groups()] moving_h_set = [] all_angles_combination_f = starting_conformations.get_all_starting_conformations( moving_h, change_radius, n_outliers=len(out_res_num_list), direction_forward=direction_forward, cutoff=self.params.variant_number_cutoff, change_all=change_all, # log=self.log, check_omega=self.params.make_all_trans, ) # # print "len(all_angles_combination_f)", len(all_angles_combination_f) if len(all_angles_combination_f) == 0: print "In starting conformations - outlier was fixed?" # return result else: # here we should filter first ones that in # tried_rama_angles_for_chain filter_out = [] # [[tried values],[tried values],...] for three in generate_protein_threes( hierarchy=moving_h, geometry=None): if three[1].resseq in tried_rama_angles_for_chain.keys(): filter_out.append(tried_rama_angles_for_chain[three[1].resseq]) else: filter_out.append((None, None)) ff_all_angles = [] print "filter_out", filter_out for comb in all_angles_combination_f: good = True for comb_pair, bad_pairs in zip(comb, filter_out): if bad_pairs == (None, None): continue # print "comb_pair, bad_pairs", comb_pair, bad_pairs # if comb_pair in bad_pairs: if comb_pair_in_bad_pairs(comb_pair, bad_pairs): good = False # print " Rejecting comb_pair", comb_pair break if good: ff_all_angles.append(comb) print "len(all_angles_combination_f)", len(all_angles_combination_f) print "len(ff_all_angles)", len(ff_all_angles) n_added = 0 n_all_combination = len(ff_all_angles) if n_all_combination == 0: print >> self.log, "Strange - got 0 combinations." i_max = min(self.params.variant_number_cutoff, n_all_combination) # assert i_max > 0 step = 0 if i_max > 1: step = float(n_all_combination-1)/float(i_max-1) if step < 1: step = 1 for i in range(i_max): comb = ff_all_angles[int(round(step*i))] setted_h, fixed_omega = starting_conformations.set_rama_angles( moving_h, list(comb), direction_forward=direction_forward, check_omega=self.params.make_all_trans) fixing_omega = fixing_omega or fixed_omega moving_h_set.append(setted_h) # print >> self.log, "Model %d, angles:" % i, comb if self.params.make_all_trans and utils.n_bad_omegas(moving_h_set[-1]) != 0: print "Model_%d_angles_%s.pdb" % (i, comb), print "got ", utils.n_bad_omegas(moving_h_set[-1]), "bad omegas" moving_h_set[-1].write_pdb_file("Model_%d_angles_%s.pdb" % (i, comb)) utils.list_omega(moving_h_set[-1], self.log) assert 0 if len(moving_h_set) == 0: # outlier was fixed before somehow... # or there's a bug in get_starting_conformations print >> self.log, "outlier was fixed before somehow" return original_pdb_h print "self.tried_rama_angles inside", self.tried_rama_angles print "tried_rama_angles_for_chain", tried_rama_angles_for_chain print "checking values", ccd_radius, change_all, change_radius, direction_forward for i, h in enumerate(moving_h_set): # if [x in tried_rama_angles_for_chain.keys() for x in out_res_num_list].count(True) > 0: # print >> self.log, "Warning!!! make something here (check angles or so)" # print >> self.log, "Skipping nonstable solution, tried previously:", (ccd_radius, change_all, change_radius, direction_forward, i) # continue resulting_rmsd = None n_iter = 0 if anchor_present: fixed_ref_atoms_coors = [x.xyz for x in fixed_ref_atoms] # print "params to constructor", fixed_ref_atoms, h, moving_ref_atoms_iseqs # easy_pickle.dump(file_name="crash.pkl", obj=[ # fixed_ref_atoms_coors, # h, # moving_ref_atoms_iseqs, # direction_forward, # self.params.save_states]) ccd_obj = ccd_cpp(fixed_ref_atoms_coors, h, moving_ref_atoms_iseqs) ccd_obj.run(direction_forward=direction_forward, save_states=self.params.save_states) resulting_rmsd = ccd_obj.resulting_rmsd n_iter = ccd_obj.n_iter if self.params.save_states: states = ccd_obj.states states.write(file_name="%s%s_%d_%s_%d_%i_states.pdb" % (chain_id, out_res_num_list[0], ccd_radius, change_all, change_radius, i)) map_target = 0 if self.reference_map is not None: map_target = maptbx.real_space_target_simple( unit_cell = self.xrs.crystal_symmetry().unit_cell(), density_map = self.reference_map, sites_cart = h.atoms().extract_xyz()) mc_rmsd = get_main_chain_rmsd_range(moving_h, h, all_atoms=True) if self.verbose: print >> self.log, "Resulting anchor and backbone RMSDs, mapcc, n_iter for model %d:" % i, print >> self.log, resulting_rmsd, ",", mc_rmsd, ",", map_target, ",", n_iter self.log.flush() # # setting new coordinates # moved_with_side_chains_h = pdb_hierarchy.deep_copy() # setting xyz # for i_source, i_dest in enumerate(m_selection): moved_with_side_chains_h.atoms()[i_dest].set_xyz(h.atoms()[i_source].xyz) # set_xyz_smart( # dest_h=moved_with_side_chains_h, # source_h=h) # # placing side-chains # # moved_with_side_chains_h.write_pdb_file( # file_name="%s_before_sc_placement_%d.pdb" % (prefix, i)) placing_range = get_res_nums_around(moved_with_side_chains_h, center_resnum_list=out_res_num_list, n_following=ccd_radius, n_previous=ccd_radius, include_intermediate=True, avoid_ss_annot=ss_annotation) place_side_chains(moved_with_side_chains_h, original_pdb_h, original_pdb_h_asc, self.rotamer_manager, placing_range, self.ideal_res_dict) # moved_with_side_chains_h.write_pdb_file( # file_name="%s_after_sc_placement_%d.pdb" % (prefix, i)) # # finalizing with geometry_minimization # # determining angles of interest # print "Recording picked angle for outliers" threes = generate_protein_threes( # hierarchy=moving_h, hierarchy=h, geometry=None) start_angles = [] final_angles = [] for angle_pair, three in zip(ff_all_angles[int(round(step*i))], threes): # print "three[1].resseq in out_res_num_list, angle_pair", three[1].resseq, out_res_num_list, angle_pair if three[1].resseq in out_res_num_list: # if three[1].resseq not in tried_rama_angles_for_chain.keys(): # tried_rama_angles_for_chain[three[1].resseq] = [] start_angles.append((three[1].resseq, angle_pair)) ps_angles = three.get_phi_psi_angles() final_angles.append((three[1].resseq, tuple(ps_angles))) # tried_rama_angles_for_chain[three[1].resseq].append(angle_pair) # print >> self.log, "Ended up with", three[1].resseq, "%.1f %.1f" % (ps_angles[0], ps_angles[1]) # print "Updated tried_rama_angles_for_chain:", tried_rama_angles_for_chain if (not self.ccd_solution_is_duplicated( final_angles=final_angles, tried_final_rama_angles_for_chain=tried_final_rama_angles_for_chain)): all_results.append((moved_with_side_chains_h.deep_copy(), mc_rmsd, resulting_rmsd, map_target, n_iter)) else: continue if self.ccd_solution_is_ok( anchor_rmsd=resulting_rmsd, mc_rmsd=mc_rmsd, n_outliers=len(out_res_num_list), ccd_radius=ccd_radius, change_all_angles=change_all, change_radius=change_radius, contains_ss_element=contains_ss_element, fixing_omega=fixing_omega): print "Choosen result (mc_rmsd, anchor_rmsd, map_target, n_iter):", mc_rmsd, resulting_rmsd, map_target, n_iter # Save to tried_ccds for rn, angles in start_angles: if rn not in tried_rama_angles_for_chain.keys(): tried_rama_angles_for_chain[rn] = [] tried_rama_angles_for_chain[rn].append(angles) # Save final angles for rn, angles in final_angles: if rn not in tried_final_rama_angles_for_chain.keys(): tried_final_rama_angles_for_chain[rn] = [] tried_final_rama_angles_for_chain[rn].append(angles) print >> self.log, "Ended up with", final_angles print >> self.log, "Updated tried_rama_angles_for_chain:", tried_rama_angles_for_chain print >> self.log, "Updated tried_final_rama_angles_for_chain:", tried_final_rama_angles_for_chain self.log.flush() if minimize: print >> self.log, "minimizing..." # moved_with_side_chains_h.write_pdb_file( # file_name="%s_result_before_min_%d.pdb" % (prefix, i)) if self.reference_map is None: minimize_wrapper_for_ramachandran( hierarchy=moved_with_side_chains_h, xrs=xrs, original_pdb_h=original_pdb_h, log=self.log, grm=self.grm, ss_annotation=self.secondary_structure_annotation) else: mwwm = minimize_wrapper_with_map( pdb_h=moved_with_side_chains_h, xrs=xrs, target_map=self.reference_map, grm=self.grm, ss_annotation=self.secondary_structure_annotation, log=self.log) # moved_with_side_chains_h.write_pdb_file( # file_name="%s_result_minimized_%d.pdb" % (prefix, i)) final_rmsd = get_main_chain_rmsd_range(moved_with_side_chains_h, original_pdb_h, placing_range) print >> self.log, "FINAL RMSD after minimization:", final_rmsd return moved_with_side_chains_h all_results.sort(key=lambda tup: tup[1]) if self.verbose: print >> self.log, "ALL RESULTS:" i = 0 for ar in all_results: print >> self.log, ar[1:], if ar[2] < 0.4: # fn = "variant_%d.pdb" % i # ar[0].write_pdb_file(file_name=fn) # print fn i += 1 else: print >> self.log, " no output" if self.params.force_rama_fixes: # find and apply the best varian from all_results. This would be the one # with the smallest rmsd given satisfactory closure print >> self.log, "Applying the best found variant:", i = 0 while i < len(all_results) and all_results[i][2] > 1.5: i += 1 # apply # === duplication!!!! if i < len(all_results): print >> self.log, all_results[i][1:] if minimize: print >> self.log, "minimizing..." # all_results[i][0].write_pdb_file( # file_name="%s_result_before_min_%d.pdb" % (prefix, i)) if self.reference_map is None: minimize_wrapper_for_ramachandran( hierarchy=all_results[i][0], xrs=xrs, original_pdb_h=original_pdb_h, log=self.log, grm=self.grm, ss_annotation=self.secondary_structure_annotation) else: mwwm = minimize_wrapper_with_map( pdb_h=all_results[i][0], xrs=xrs, target_map=self.reference_map, grm=self.grm, ss_annotation=self.secondary_structure_annotation, log=self.log) # all_results[i][0].write_pdb_file( # file_name="%s_result_minimized_%d.pdb" % (prefix, i)) final_rmsd = get_main_chain_rmsd_range(all_results[i][0], original_pdb_h, placing_range) print >> self.log, "FINAL RMSD after minimization:", final_rmsd return all_results[i][0] else: print >> self.log, " NOT FOUND!" for i in all_results: print >> self.log, i[1:] # === end of duplication!!!! else: print >> self.log, "Epic FAIL: failed to fix rama outlier:", out_res_num_list print >> self.log, " Options were: (mc_rmsd, resultign_rmsd, n_iter)" for i in all_results: print >> self.log, i[1:] return original_pdb_h
def extract_proxies(self, hierarchy): favored = ramalyze.RAMALYZE_FAVORED allowed = ramalyze.RAMALYZE_ALLOWED outlier = ramalyze.RAMALYZE_OUTLIER self.hierarchy = hierarchy bool_atom_selection = self._determine_bool_atom_selection(hierarchy) selected_h = hierarchy.select(bool_atom_selection) n_seq = flex.max(selected_h.atoms().extract_i_seq()) # Drop all previous proxies self._oldfield_proxies = ext.shared_phi_psi_proxy() self._emsley_proxies = ext.shared_phi_psi_proxy() self._emsley8k_proxies = ext.shared_phi_psi_proxy() # it would be great to save rama_eval, but the fact that this is called in # pdb_interpretation, not in mmtbx.model makes it impossible for three in generate_protein_threes(hierarchy=selected_h, geometry=None): rc = three.get_phi_psi_atoms() if rc is None: continue rama_key = three.get_ramalyze_key() angles = three.get_phi_psi_angles() rama_score = self.rama_eval.get_score(rama_key, angles[0], angles[1]) r_eval = self.rama_eval.evaluate_score(rama_key, rama_score) phi_atoms, psi_atoms = rc i_seqs = [atom.i_seq for atom in phi_atoms] + [psi_atoms[-1].i_seq] resnames = three.get_resnames() r_name = resnames[1] assert rama_key in range(6) text_rama_key = ramalyze.res_types[rama_key] assert text_rama_key in [ "general", "glycine", "cis-proline", "trans-proline", "pre-proline", "isoleucine or valine" ] # pick where to put... ev_match_dict = { favored: self.params.favored, allowed: self.params.allowed, outlier: self.params.outlier } r_type = ev_match_dict[r_eval] if r_type == 'oldfield': proxy = ext.phi_psi_proxy(residue_type=text_rama_key, i_seqs=i_seqs, weight=1) # XXX Not used in oldfield self.append_oldfield_proxies(proxy, n_seq) ### THIS IS CRUEL. REMOVE ONCE favored/allowed/outlier are made multiple! if (self.params.inject_emsley8k_into_oldfield_favored): proxy = ext.phi_psi_proxy(residue_type=text_rama_key, i_seqs=i_seqs, weight=5) self.append_emsley8k_proxies(proxy, n_seq) ### elif r_type == 'emsley': weight = self.params.emsley.weight proxy = ext.phi_psi_proxy(residue_type=text_rama_key, i_seqs=i_seqs, weight=weight) self.append_emsley_proxies(proxy, n_seq) elif r_type == 'emsley8k': if (r_eval is favored): weight = self.params.emsley8k.weight_favored elif (r_eval is allowed): weight = self.params.emsley8k.weight_allowed elif (r_eval is outlier): weight = self.params.emsley8k.weight_outlier else: raise RuntimeError("Rama eveluation failed.") proxy = ext.phi_psi_proxy(residue_type=text_rama_key, i_seqs=i_seqs, weight=weight) self.append_emsley8k_proxies(proxy, n_seq) elif (r_type is None): pass else: raise RuntimeError("Not an option: %s" % str(r_type)) print("", file=self.log) print(" %d Ramachandran restraints generated." % (self.get_n_proxies()), file=self.log) print(" %d Oldfield and %d Emsley and %d emsley8k." % (self.get_n_oldfield_proxies(), self.get_n_emsley_proxies(), self.get_n_emsley8k_proxies()), file=self.log)
bond.distance_ideal = averages[key]/averages.n[key] elif len(key)==3: rkey = (key[2],key[1],key[0]) averages.n[rkey]=averages.n[key] for angle in self.geometry.angle_proxies: if angle.i_seqs in averages.n: key = angle.i_seqs if key not in averages: assert 0 angle.angle_ideal = averages[key]/averages.n[key] if __name__=="__main__": import sys from iotbx import pdb from test_rdl import get_geometry_restraints_manager filename=sys.argv[1] pdb_inp = pdb.input(filename) pdb_hierarchy = pdb_inp.construct_hierarchy() geometry_restraints_manager = get_geometry_restraints_manager(filename) pdb_hierarchy.reset_i_seq_if_necessary() from mmtbx.conformation_dependent_library import generate_protein_threes for threes in generate_protein_threes(pdb_hierarchy, geometry_restraints_manager, #verbose=verbose, ): print threes print " cis? %s" % threes.cis_group() print " rama %s" % threes.get_ramalyze_key() print ' conf %s' % threes.is_pure_main_conf() print "OK"
def add_terminal_hydrogens_threes( hierarchy, geometry_restraints_manager, terminate_all_N_terminals=False, terminate_all_C_terminals=False, use_capping_hydrogens=False, append_to_end_of_model=False, verbose=False, ): from mmtbx.conformation_dependent_library import generate_protein_threes additional_hydrogens = [] #hierarchy_utils.smart_add_atoms() for three in generate_protein_threes( hierarchy, geometry_restraints_manager, #include_non_linked=False, backbone_only=False, include_linked_via_restraints_manager=True, verbose=verbose, ): # print three #print dir(three) # print geometry_restraints_manager #print dir(geometry_restraints_manager) bond_params_table = geometry_restraints_manager.bond_params_table # print bond_params_table #print dir(bond_params_table) # print 'use_capping_hydrogens',use_capping_hydrogens def get_bonds(): bonds = {} for i, a1 in enumerate(residue_group.atoms()): for j, a2 in enumerate(residue_group.atoms()): if i >= j: continue bond = three.bond_params_table.lookup(a1.i_seq, a2.i_seq) if bond: bonds[(a1.i_seq, a2.i_seq)] = True bonds[(a2.i_seq, a1.i_seq)] = True return bonds if use_capping_hydrogens: for i in range(len(three)): residue_group = three.get_residue_group_from_hierarchy( hierarchy, i) rc = conditional_add_cys_hg_to_atom_group( geometry_restraints_manager, residue_group) #assert not rc, '%s' % rc if three.start: residue_group = three.get_residue_group_from_hierarchy( hierarchy, 0) rc = add_n_terminal_hydrogens_to_residue_group( residue_group, bonds=get_bonds(), use_capping_hydrogens=use_capping_hydrogens, append_to_end_of_model=append_to_end_of_model, ) if rc: additional_hydrogens.append(rc) if three.end: residue_group = three.get_residue_group_from_hierarchy( hierarchy, 2) rc = add_c_terminal_oxygens_to_residue_group( residue_group, bonds=get_bonds(), use_capping_hydrogens=use_capping_hydrogens, append_to_end_of_model=append_to_end_of_model, ) if rc: additional_hydrogens.append(rc) return additional_hydrogens
def __init__ (self, pdb_hierarchy, outliers_only=False, show_errors=False, out=sys.stdout, quiet=False) : validation.__init__(self) self.n_allowed = 0 self.n_favored = 0 self.n_type = [ 0 ] * 6 self._outlier_i_seqs = flex.size_t() pdb_atoms = pdb_hierarchy.atoms() all_i_seqs = pdb_atoms.extract_i_seq() if (all_i_seqs.all_eq(0)) : pdb_atoms.reset_i_seq() use_segids = utils.use_segids_in_place_of_chainids( hierarchy=pdb_hierarchy) analysis = "" output_list = [] count_keys = [] uniqueness_keys = [] r = ramachandran_eval.RamachandranEval() ##if use_segids: ## chain_id = utils.get_segid_as_chainid(chain=chain) ## else: ## chain_id = chain.id for three in generate_protein_threes(hierarchy=pdb_hierarchy, geometry=None): main_residue = three[1] phi_psi_atoms = three.get_phi_psi_atoms() if phi_psi_atoms is None: continue phi_atoms, psi_atoms = phi_psi_atoms phi = get_dihedral(phi_atoms) psi = get_dihedral(psi_atoms) coords = get_center(main_residue) #should find the CA of the center residue if (phi is not None and psi is not None): res_type = RAMA_GENERAL #self.n_total += 1 if (main_residue.resname[0:3] == "GLY"): res_type = RAMA_GLYCINE elif (main_residue.resname[0:3] == "PRO"): is_cis = is_cis_peptide(three) if is_cis: res_type = RAMA_CISPRO else: res_type = RAMA_TRANSPRO elif (three[2].resname == "PRO"): res_type = RAMA_PREPRO elif (main_residue.resname[0:3] == "ILE" or \ main_residue.resname[0:3] == "VAL"): res_type = RAMA_ILE_VAL #self.n_type[res_type] += 1 value = r.evaluate(res_types[res_type], [phi, psi]) ramaType = self.evaluateScore(res_type, value) is_outlier = ramaType == RAMALYZE_OUTLIER c_alphas = None # XXX only save kinemage data for outliers if is_outlier : c_alphas = get_cas_from_three(three) assert (len(c_alphas) == 3) markup = self.as_markup_for_kinemage(c_alphas) else: markup = None result = ramachandran( chain_id=main_residue.parent().parent().id, resseq=main_residue.resseq, icode=main_residue.icode, resname=main_residue.resname, #altloc=main_residue.parent().altloc, altloc=get_altloc_from_three(three), segid=None, # XXX ??? phi=phi, psi=psi, rama_type=ramaType, res_type=res_type, score=value*100, outlier=is_outlier, xyz=coords, markup=markup) #if result.chain_id+result.resseq+result.icode not in count_keys: if result.altloc in ['','A'] and result.chain_id+result.resseq+result.icode not in count_keys: self.n_total += 1 self.n_type[res_type] += 1 self.add_to_validation_counts(ramaType) count_keys.append(result.chain_id+result.resseq+result.icode) if (not outliers_only or is_outlier) : if (result.altloc != '' or result.chain_id+result.resseq+result.icode not in uniqueness_keys): #the threes/conformers method results in some redundant result # calculations in structures with alternates. Using the # uniqueness_keys list prevents redundant results being added to # the final list self.results.append(result) uniqueness_keys.append(result.chain_id+result.resseq+result.icode) if is_outlier : i_seqs = main_residue.atoms().extract_i_seq() assert (not i_seqs.all_eq(0)) self._outlier_i_seqs.extend(i_seqs) self.results.sort(key=lambda r: r.id_str()) out_count, out_percent = self.get_outliers_count_and_fraction() fav_count, fav_percent = self.get_favored_count_and_fraction() self.out_percent = out_percent * 100.0 self.fav_percent = fav_percent * 100.0
def update_restraints( hierarchy, geometry, # restraints_manager, current_geometry=None, # xray_structure!! sites_cart=None, cdl_proxies=None, ideal=True, esd=True, esd_factor=1., log=None, verbose=False, ): global registry registry = RestraintsRegistry() if current_geometry: assert not sites_cart sites_cart = current_geometry.sites_cart() if sites_cart: pdb_atoms = hierarchy.atoms() # XXX PDB_TRANSITION VERY SLOW for j_seq, atom in enumerate(pdb_atoms): atom.xyz = sites_cart[j_seq] threes = None average_updates = 0 total_updates = 0 for threes in generate_protein_threes( hierarchy, geometry, cdl_class=True, omega_cdl=True, #verbose=verbose, ): threes.apply_updates = apply_updates if threes.cis_group(omega_cdl=True): if verbose and 0: print 'cis ' * 20 print threes continue restraint_values = get_restraint_values(threes) if restraint_values is None: continue if restraint_values[0] == "I": average_updates += 1 else: total_updates += 1 threes.apply_updates( threes, restraint_values, cdl_proxies, ideal=ideal, esd=esd, esd_factor=esd_factor, ) if registry.n: threes.apply_average_updates(registry) assert 0 geometry.reset_internals() if verbose and threes and threes.errors: if log: log.write( " Residues not completely updated with CDL restraints\n\n") for line in threes.errors: if log: log.write("%s\n" % line) else: print line return geometry
bond.distance_ideal = averages[key]/averages.n[key] elif len(key)==3: rkey = (key[2],key[1],key[0]) averages.n[rkey]=averages.n[key] for angle in self.geometry.angle_proxies: if angle.i_seqs in averages.n: key = angle.i_seqs if key not in averages: assert 0 angle.angle_ideal = averages[key]/averages.n[key] if __name__=="__main__": import sys from iotbx import pdb from test_rdl import get_geometry_restraints_manager filename=sys.argv[1] pdb_inp = pdb.input(filename) pdb_hierarchy = pdb_inp.construct_hierarchy() geometry_restraints_manager = get_geometry_restraints_manager(filename) pdb_hierarchy.reset_i_seq_if_necessary() from mmtbx.conformation_dependent_library import generate_protein_threes for threes in generate_protein_threes(pdb_hierarchy, geometry_restraints_manager, #verbose=verbose, ): print threes print " cis? %s" % threes.cis_group() print " rama %s" % threes.get_ramalyze_key() print ' conf %s' % threes.is_pure_main_conf() print "OK"
def __init__(self, model, log): db_path = libtbx.env.find_in_repositories( relative_path="chem_data/rama_z/top8000_rama_z_dict.pkl", test=os.path.isfile) rmsd_path = libtbx.env.find_in_repositories( relative_path="chem_data/rama_z/rmsd.pkl", test=os.path.isfile) self.log = log # this takes ~0.15 seconds, so I don't see a need to cache it somehow. self.db = easy_pickle.load(db_path) self.rmsd_estimator = easy_pickle.load(rmsd_path) self.calibration_values = { 'H': (-0.045355950779513175, 0.1951165524439217), 'S': (-0.0425581278436754, 0.20068584887814633), 'L': (-0.018457764754231075, 0.15788374669456848), 'W': (-0.016806654295023003, 0.12044960331869274)} self.residue_counts = {"H": 0, "S": 0, "L":0} self.z_score = {"H": None, "S": None, "L":None, 'W': None} self.interpolation_fs = {"H": {}, "S": {}, "L": {}} self.means = {"H": {}, "S": {}, "L": {}} self.stds = {"H": {}, "S": {}, "L": {}} self.phi_step = 4 self.psi_step = 4 self.n_phi_half = 45 self.n_psi_half = 45 self.res_info = [] asc = model.get_atom_selection_cache() sec_str_master_phil = iotbx.phil.parse(sec_str_master_phil_str) ss_params = sec_str_master_phil.fetch().extract() ss_params.secondary_structure.protein.search_method = "from_ca" ss_params.secondary_structure.from_ca_conservative = True self.ssm = ss_manager(model.get_hierarchy(), atom_selection_cache=asc, geometry_restraints_manager=None, sec_str_from_pdb_file=None, # params=None, params = ss_params.secondary_structure, was_initialized=False, mon_lib_srv=None, verbose=-1, log=null_out(), # log=sys.stdout, ) filtered_ann = self.ssm.actual_sec_str.deep_copy() filtered_ann.remove_short_annotations( helix_min_len=4, sheet_min_len=4, keep_one_stranded_sheets=True) self.helix_sel = asc.selection(filtered_ann.overall_helices_selection()) self.sheet_sel = asc.selection(filtered_ann.overall_sheets_selection()) used_atoms = set() for three in generate_protein_threes(hierarchy=model.get_hierarchy(), geometry=None): main_residue = three[1] phi_psi_atoms = three.get_phi_psi_atoms() if phi_psi_atoms is None: continue phi_atoms, psi_atoms = phi_psi_atoms key = [x.i_seq for x in phi_atoms]+[psi_atoms[-1].i_seq] key = "%s" % key if key not in used_atoms: phi, psi = three.get_phi_psi_angles() rkey = three.get_ramalyze_key() resname = main_residue.resname ss_type = self._figure_out_ss(three) self.res_info.append( ["", rkey, resname, ss_type, phi, psi] ) self.residue_counts[ss_type] += 1 used_atoms.add(key) self.residue_counts["W"] = self.residue_counts["H"] + self.residue_counts["S"] + self.residue_counts["L"] for i in self.res_info: print(i, file=self.log)