def get_sequence_from_pdb(file_name=None,text=None,hierarchy=None): if not hierarchy: # read from PDB if not text: if not file_name: from libtbx.utils import Sorry raise Sorry("Missing file for get_sequence_from_pdb: %s" %( file_name)) text=open(file_name).read() import iotbx.pdb pdb_inp = iotbx.pdb.input(lines=text.splitlines(),source_info="None") import mmtbx.model mm = mmtbx.model.manager( model_input = pdb_inp, stop_for_unknowns = False) hierarchy=mm.get_hierarchy() chain_sequences=[] from iotbx.pdb import amino_acid_codes as aac one_letter_code_dict = aac.one_letter_given_three_letter for model in hierarchy.models(): for chain in model.chains(): chain_sequence="" for rg in chain.residue_groups(): for atom_group in rg.atom_groups(): chain_sequence+=one_letter_code_dict.get(atom_group.resname,"") break chain_sequences.append(chain_sequence) sequence_as_string="" for chain_sequence in chain_sequences: sequence_as_string+=chain_sequence+"\n" return sequence_as_string
def show(pdb_hierarchy, tm, xrs, grm, prefix): map = compute_map(target_map=tm, xray_structure=xrs) cc = flex.linear_correlation(x=map.as_1d(), y=tm.data.as_1d()).coefficient() es = grm.energies_sites(sites_cart=xrs.sites_cart()) rmsd_a = es.angle_deviations()[2] rmsd_b = es.bond_deviations()[2] print("%s: overall CC: %6.4f rmsd_bonds=%6.3f rmsd_angles=%6.3f" % (prefix, cc, rmsd_b, rmsd_a)) pdb_hierarchy.adopt_xray_structure(xrs) rotamer_manager = RotamerEval() for model in pdb_hierarchy.models(): for chain in model.chains(): for residue in chain.residues(): sites_cart = residue.atoms().extract_xyz() sel = maptbx.grid_indices_around_sites( unit_cell=xrs.unit_cell(), fft_n_real=map.focus(), fft_m_real=map.all(), sites_cart=sites_cart, site_radii=flex.double(sites_cart.size(), 2)) ccr = flex.linear_correlation( x=map.select(sel).as_1d(), y=tm.data.select(sel).as_1d()).coefficient() fmt = "%s: %4s %10s CC: %6.4f" print(fmt % (prefix, residue.resname, rotamer_manager.evaluate_residue(residue), ccr))
def common_map_values(pdb_hierarchy, unit_cell, map_data): d = {} for model in pdb_hierarchy.models(): for chain in model.chains(): for residue_group in chain.residue_groups(): conformers = residue_group.conformers() for conformer in conformers: residue = conformer.only_residue() for atom in residue.atoms(): sf = unit_cell.fractionalize(atom.xyz) mv = map_data.eight_point_interpolation(sf) key = "%s_%s_%s"%(chain.id, residue.resname, atom.name.strip()) d.setdefault(key, flex.double()).append(mv) def mean_filtered(x): me = flex.mean_default(x,0) sel = x < me*3 sel &= x > me/3 return sel result = {} all_vals = flex.double() for v in d.values(): all_vals.extend(v) sel = mean_filtered(all_vals) overall_mean = flex.mean_default(all_vals.select(sel),0) for k,v in zip(d.keys(), d.values()): sel = mean_filtered(v) if(sel.count(True)>10): result[k] = flex.mean_default(v.select(sel),0) else: result[k] = overall_mean return result
def run(args): if (len(args) != 1): raise RuntimeError("Please specify one pdb file name.") model_filename = args[0] pdb_inp = iotbx.pdb.input(file_name=model_filename) model = mmtbx.model.manager(model_input=pdb_inp) model.composition().show(log=sys.stdout) pdb_hierarchy = model.get_hierarchy() print('\nLoop over hierarchy:') for model in pdb_hierarchy.models(): for chain in model.chains(): print('Chain: ', chain.id) for rg in chain.residue_groups(): print(' Resnumber: ', rg.resid()) for ag in rg.atom_groups(): if (ag.resname in aa_resnames): print(' Resname: %s, Altloc: %s' % (ag.resname, ag.altloc)) for atom in ag.atoms(): if (atom.name not in ala_atom_names): print(' %s' % atom.name) ag.remove_atom(atom=atom) #print(help(pdb_hierarchy.write_pdb_file)) pdb_hierarchy.write_pdb_file(file_name='polyala.pdb')
def have_conformers(pdb_hierarchy): for model in pdb_hierarchy.models(): for chain in model.chains(): for residue_group in chain.residue_groups(): if residue_group.have_conformers(): return True return False
def build_sym_atom_hash(pdb_hierarchy): sym_atom_hash = dict() for model in pdb_hierarchy.models(): for chain in model.chains(): for conformer in chain.conformers(): for residue in conformer.residues(): if residue.resname.upper() in ['ASP', 'GLU', 'PHE', 'TYR']: if residue.resname.upper() == 'ASP': atom1 = ' OD1' atom2 = ' OD2' elif residue.resname.upper() == 'GLU': atom1 = ' OE1' atom2 = ' OE2' elif residue.resname.upper() in ['PHE', 'TYR']: atom1 = ' CD1' atom2 = ' CD2' atom1_i_seq = None atom2_i_seq = None for atom in residue.atoms(): if atom.name == atom1: atom1_i_seq = atom.i_seq elif atom.name == atom2: atom2_i_seq = atom.i_seq if atom1_i_seq != None and atom2_i_seq != None: sym_atom_hash[atom1_i_seq] = atom2_i_seq sym_atom_hash[atom2_i_seq] = atom1_i_seq return sym_atom_hash
def get_c_alpha_hinges(pdb_hierarchy, xray_structure=None, selection=None): # # used in rotamer_search.py c_alphas = [] c_alpha_hinges = {} if xray_structure is not None: sites_cart = xray_structure.sites_cart() else: sites_cart = pdb_hierarchy.atoms().extract_xyz() if selection is None: selection = flex.bool(len(sites_cart), True) for model in pdb_hierarchy.models(): for chain in model.chains(): for residue_group in chain.residue_groups(): for atom_group in residue_group.atom_groups(): cur_ca = None cur_c = None cur_o = None cur_n = None cur_h = None for atom in atom_group.atoms(): if atom.name == " CA ": cur_ca = atom elif atom.name == " C ": cur_c = atom elif atom.name == " N ": cur_n = atom elif atom.name == " O ": cur_o = atom elif atom.name == " H ": cur_h = atom if cur_ca is not None and cur_c is not None and \ cur_n is not None and cur_o is not None: if( (not selection[cur_ca.i_seq]) or (not selection[cur_c.i_seq]) or (not selection[cur_n.i_seq]) or (not selection[cur_o.i_seq]) ): continue moving_tpl = (cur_n, cur_c, cur_o) if cur_h is not None: moving_tpl += tuple([cur_h]) c_alphas.append( (cur_ca, moving_tpl) ) for i, ca in enumerate(c_alphas): if i < 1 or i == (len(c_alphas)-1): continue current = ca previous = c_alphas[i-1] next = c_alphas[i+1] prev_connected = check_residues_are_connected(previous[0], current[0]) next_connected = check_residues_are_connected(current[0], next[0]) if prev_connected and next_connected: nodes = (previous[0].i_seq, next[0].i_seq) moving = (previous[1][1].i_seq, previous[1][2].i_seq, next[1][0].i_seq) if len(next[1]) > 3: moving += tuple([next[1][3].i_seq]) c_alpha_hinges[current[0].i_seq] = [nodes, moving] return c_alpha_hinges
def run(args): if (len(args) != 1): raise RuntimeError("Please specify one pdb file name.") model_filename = args[0] pdb_inp = iotbx.pdb.input(file_name=model_filename) model = mmtbx.model.manager(model_input=pdb_inp) model.composition().show(log=sys.stdout) pdb_hierarchy = model.get_hierarchy() print('\nLoop over hierarchy:') for model in pdb_hierarchy.models(): for chain in model.chains(): print('Chain: ', chain.id) for rg in chain.residue_groups(): pass # Add your code here
def set_chain_id_by_region(m, m_ca, regions_list, log=sys.stdout): # Set chainid based on regions_list atoms = m_ca.get_hierarchy().atoms() # new unique_regions = get_unique_values(regions_list) region_name_dict, chainid_list = get_region_name_dict(m, unique_regions) region_dict = {} for at, region_number in zip(atoms, regions_list): resseq_int = at.parent().parent().resseq_as_int() region_dict[resseq_int] = region_number # And apply to full model full_region_list = flex.int() for at in m.get_hierarchy().atoms(): resseq_int = at.parent().parent().resseq_as_int() region = region_dict.get(resseq_int, 0) full_region_list.append(region) # Now create new model with chains based on region list full_new_model = None print("\nSelection list based on domains:", file=log) for region_number in unique_regions: sel = (full_region_list == region_number) new_m = m.select(sel) selection_string = selection_string_from_model( new_m.apply_selection_string("name ca or name P")) print("%s (%s residues) " % (selection_string, new_m.get_hierarchy().overall_counts().n_residues), file=log) # Now put all of new_m in a chain with chain.id = str(region_number) for model in new_m.get_hierarchy().models()[:1]: # only one model for chain in model.chains()[:1]: # only allowing one chain chain.id = region_name_dict[region_number] if full_new_model: full_new_model = add_model(full_new_model, new_m) else: full_new_model = new_m m = full_new_model # All done return group_args(group_args_type='model_info', model=m, chainid_list=chainid_list)
def get_ligands(self, ph): # Store ligands as list of iselections --> better way? Careful if H will be # added at some point! ligand_isel_dict = {} get_class = iotbx.pdb.common_residue_names_get_class exclude = [ "common_amino_acid", "modified_amino_acid", "common_rna_dna", "modified_rna_dna", "ccp4_mon_lib_rna_dna", "common_water", "common_element" ] for model in ph.models(): for chain in model.chains(): for rg in chain.residue_groups(): for resname in rg.unique_resnames(): if (not get_class(name=resname) in exclude): iselection = rg.atoms().extract_i_seq() id_tuple = (model.id, chain.id, rg.resseq) ligand_isel_dict[id_tuple] = iselection return ligand_isel_dict
def hierarchy_from_selection(pdb_hierarchy, selection, log): import iotbx.pdb.hierarchy temp_hierarchy = pdb_hierarchy.select(selection) altloc = None hierarchy = iotbx.pdb.hierarchy.root() model = iotbx.pdb.hierarchy.model() for chain in temp_hierarchy.chains(): for conformer in chain.conformers(): if not conformer.is_protein() and not conformer.is_na(): continue elif altloc is None or conformer.altloc == altloc: model.append_chain(chain.detached_copy()) altloc = conformer.altloc else: print >> log, \ "* Multiple alternate conformations found, using altid %s *" \ % altloc continue if len(model.chains()) != 1: raise Sorry("more than one chain in selection") hierarchy.append_model(model) return hierarchy
def run(args): if (len(args) != 1): raise RuntimeError("Please specify one pdb file name.") model_filename = args[0] pdb_inp = iotbx.pdb.input(file_name=model_filename) model = mmtbx.model.manager(model_input=pdb_inp) model.composition().show(log=sys.stdout) pdb_hierarchy = model.get_hierarchy() n_amino_acid_residues = 0 n_other_residues = 0 n_atoms_removed = 0 for model in pdb_hierarchy.models(): for chain in model.chains(): for rg in chain.residue_groups(): if rg_has_amino_acid(rg): n_amino_acid_residues += 1 for ag in rg.atom_groups(): for atom in ag.atoms(): if (atom.name not in ala_atom_names): ag.remove_atom(atom=atom) n_atoms_removed += 1 else: n_other_residues += 1 print("\nNumber of amino acid residues:", n_amino_acid_residues) print("Number of other residues:", n_other_residues) print("Number of atoms removed:", n_atoms_removed) if (n_atoms_removed != 0): output_pdb = os.path.splitext( model_filename)[0] + "_truncated_to_ala.pdb" print("Writing file: ", output_pdb) pdb_hierarchy.write_pdb_file( file_name=output_pdb, crystal_symmetry=pdb_inp.crystal_symmetry())
def rotatable(pdb_hierarchy, mon_lib_srv, restraints_manager, log): """ General tool to identify rotatable H, such as C-O-H, C-H3, in any molecule. """ result = [] def analyze_group_aa_specific(g, atoms): result = [] for gi in g: assert len(gi[0]) == 2 # because this is axis assert len( gi[1]) > 0 # because these are atoms rotating about this axis # condition 1: axis does not contain H or D a1, a2 = atoms[gi[0][0]], atoms[gi[0][1]] e1 = a1.element.strip().upper() e2 = a2.element.strip().upper() condition_1 = [e1, e2].count("H") == 0 and [e1, e2].count("D") == 0 # condition 2: all atoms to rotate are H or D condition_2 = True rot_atoms = [] for gi1i in gi[1]: if (not atoms[gi1i].element.strip().upper() in ["H", "D"]): condition_2 = False break rot_atoms = [] axis = None if (condition_1 and condition_2): axis = [a1.i_seq, a2.i_seq] for gi1i in gi[1]: rot_atoms.append(atoms[gi1i].i_seq) result.append([axis, rot_atoms]) if (len(result) > 0 is not None): return result else: return None def analyze_group_general(g, atoms, bps, psel): result = [] for gi in g: condition_1, condition_2, condition_3 = None, None, None assert len(gi[0]) == 2 # because this is axis assert len( gi[1]) > 0 # because these are atoms rotating about this axis # condition 1: axis does not contain H or D a1, a2 = atoms[gi[0][0]], atoms[gi[0][1]] e1 = a1.element.strip().upper() e2 = a2.element.strip().upper() condition_1 = [e1, e2].count("H") == 0 and [e1, e2].count("D") == 0 s1 = set(gi[1]) if (condition_1): # condition 2: all atoms to rotate are H or D condition_2 = True for gi1i in gi[1]: if (not atoms[gi1i].element.strip().upper() in ["H", "D"]): condition_2 = False break if (condition_2): # condition 3: one of axis atoms is terminal (bonded to another axis # atom and hydrogens condition_3 = False for gia in gi[0]: bonds_involved_into = [] for bp in bps: if (gia in bp.i_seqs): for i_seq in bp.i_seqs: if (atoms[i_seq].element.strip().upper() in ["H", "D"]): bonds_involved_into.append(i_seq) s2 = set(bonds_involved_into) s = list(s1 & s2) if (len(s) > 0): condition_3 = True # if (condition_1 and condition_2 and condition_3): axis = [a1.i_seq, a2.i_seq] rot_atoms = [] in_plane = False for i in bonds_involved_into: if (atoms[i].i_seq in psel): in_plane = True rot_atoms.append(atoms[i].i_seq) if (not in_plane): result.append([axis, rot_atoms]) if (len(result) > 0 is not None): return result else: return None if (restraints_manager is not None): psel = flex.size_t() for p in restraints_manager.geometry.planarity_proxies: psel.extend(p.i_seqs) # very handy for debugging: do not remove #NAMES = pdb_hierarchy.atoms().extract_name() # get_class = iotbx.pdb.common_residue_names_get_class import scitbx.graph.tardy_tree for model in pdb_hierarchy.models(): for chain in model.chains(): residue_groups = chain.residue_groups() n_residues = len(residue_groups) for i_rg, residue_group in enumerate(residue_groups): first_or_last = i_rg == 0 or i_rg + 1 == n_residues conformers = residue_group.conformers() for conformer in residue_group.conformers(): for residue in conformer.residues(): if (residue.resname.strip().upper() == "PRO"): continue atoms = residue.atoms() if (get_class(name=residue.resname) == "common_water" and len(atoms) == 1): continue if (get_class(name=residue.resname) == "common_amino_acid" and not first_or_last): fr = rotatable_bonds.axes_and_atoms_aa_specific( residue=residue, mon_lib_srv=mon_lib_srv, remove_clusters_with_all_h=False, log=log) if (fr is not None): r = analyze_group_aa_specific(g=fr, atoms=atoms) if (r is not None): for r_ in r: result.append(r_) elif (restraints_manager is not None): elements = atoms.extract_element() names = atoms.extract_name() # create tardy_model iselection = atoms.extract_i_seq() sites_cart = atoms.extract_xyz() masses = [1] * sites_cart.size() labels = range(sites_cart.size()) grm_i = restraints_manager.select(iselection) bps, asu = grm_i.geometry.get_all_bond_proxies( sites_cart=sites_cart) edge_list = [] for bp in bps: edge_list.append(bp.i_seqs) fixed_vertex_lists = [] tmp_r = [] # try all possible edges (bonds) as potential fixed vertices and # accept only non-redundant for bp in bps: tardy_tree = scitbx.graph.tardy_tree.construct( sites=sites_cart, edge_list=edge_list, fixed_vertex_lists=[bp.i_seqs]) tardy_model = scitbx.rigid_body.tardy_model( labels=labels, sites=sites_cart, masses=masses, tardy_tree=tardy_tree, potential_obj=None) fr = rotatable_bonds.axes_and_atoms_aa_specific( residue=residue, mon_lib_srv=mon_lib_srv, remove_clusters_with_all_h=False, log=None, tardy_model=tardy_model) if (fr is not None): r = analyze_group_general(g=fr, atoms=atoms, bps=bps, psel=psel) if (r is not None and len(r) > 0): for r_ in r: if (not r_ in tmp_r): if (not r_ in tmp_r): tmp_r.append(r_) for r in tmp_r: result.append(r) # very handy for debugging: do not remove #for r_ in result: # print " analyze_group:", r_, \ # [NAMES[i] for i in r_[0]], [NAMES[i] for i in r_[1]], residue.resname return result
def exercise_multi_model_single_chain(): inp_txt = """ data_5UZL loop_ _atom_site.group_PDB _atom_site.id _atom_site.type_symbol _atom_site.label_atom_id _atom_site.label_alt_id _atom_site.label_comp_id _atom_site.label_asym_id _atom_site.label_entity_id _atom_site.label_seq_id _atom_site.pdbx_PDB_ins_code _atom_site.Cartn_x _atom_site.Cartn_y _atom_site.Cartn_z _atom_site.occupancy _atom_site.B_iso_or_equiv _atom_site.pdbx_formal_charge _atom_site.auth_seq_id _atom_site.auth_comp_id _atom_site.auth_asym_id _atom_site.auth_atom_id _atom_site.pdbx_PDB_model_num ATOM 1 N N . ASN A 1 1 ? 1.329 0.000 0.000 1.00 1.00 ? 1 ASN A N 1 ATOM 2 C CA . ASN A 1 1 ? 2.093 -0.001 -1.242 1.00 64.21 ? 1 ASN A CA 1 ATOM 3 C C . ASN A 1 1 ? 1.973 -1.345 -1.954 1.00 21.54 ? 1 ASN A C 1 ATOM 4 O O . ASN A 1 1 ? 2.071 -1.423 -3.178 1.00 42.13 ? 1 ASN A O 1 ATOM 5 C CB . ASN A 1 1 ? 3.565 0.309 -0.960 1.00 52.42 ? 1 ASN A CB 1 ATOM 6 C CG . ASN A 1 1 ? 4.305 0.774 -2.199 1.00 64.34 ? 1 ASN A CG 1 ATOM 7 O OD1 . ASN A 1 1 ? 4.331 0.081 -3.217 1.00 14.30 ? 1 ASN A OD1 1 ATOM 8 N ND2 . ASN A 1 1 ? 4.913 1.952 -2.118 1.00 64.45 ? 1 ASN A ND2 1 ATOM 1 N N . ASN B 1 1 ? 1.329 0.000 0.000 1.00 1.00 ? 1 ASN B N 1 ATOM 2 C CA . ASN B 1 1 ? 2.093 -0.001 -1.242 1.00 64.21 ? 1 ASN B CA 1 ATOM 3 C C . ASN B 1 1 ? 1.973 -1.345 -1.954 1.00 21.54 ? 1 ASN B C 1 ATOM 4 O O . ASN B 1 1 ? 2.071 -1.423 -3.178 1.00 42.13 ? 1 ASN B O 1 ATOM 5 C CB . ASN B 1 1 ? 3.565 0.309 -0.960 1.00 52.42 ? 1 ASN B CB 1 ATOM 6 C CG . ASN B 1 1 ? 4.305 0.774 -2.199 1.00 64.34 ? 1 ASN B CG 1 ATOM 7 O OD1 . ASN B 1 1 ? 4.331 0.081 -3.217 1.00 14.30 ? 1 ASN B OD1 1 ATOM 8 N ND2 . ASN B 1 1 ? 4.913 1.952 -2.118 1.00 64.45 ? 1 ASN B ND2 1 ATOM 542 N N . ASN A 1 1 ? 1.728 -3.986 -1.323 1.00 51.14 ? 1 ASN A N 2 ATOM 543 C CA . ASN A 1 1 ? 2.250 -2.656 -1.616 1.00 71.03 ? 1 ASN A CA 2 ATOM 544 C C . ASN A 1 1 ? 1.152 -1.749 -2.162 1.00 53.34 ? 1 ASN A C 2 ATOM 545 O O . ASN A 1 1 ? 0.899 -1.718 -3.367 1.00 12.41 ? 1 ASN A O 2 ATOM 546 C CB . ASN A 1 1 ? 3.399 -2.747 -2.622 1.00 42.32 ? 1 ASN A CB 2 ATOM 547 C CG . ASN A 1 1 ? 4.579 -3.531 -2.082 1.00 65.14 ? 1 ASN A CG 2 ATOM 548 O OD1 . ASN A 1 1 ? 5.209 -3.132 -1.102 1.00 55.44 ? 1 ASN A OD1 2 ATOM 549 N ND2 . ASN A 1 1 ? 4.886 -4.654 -2.722 1.00 35.14 ? 1 ASN A ND2 2 ATOM 1083 N N . ASN A 1 1 ? 0.315 -4.452 -3.331 1.00 42.01 ? 1 ASN A N 3 ATOM 1084 C CA . ASN A 1 1 ? 0.480 -3.854 -2.011 1.00 52.12 ? 1 ASN A CA 3 ATOM 1085 C C . ASN A 1 1 ? 0.359 -2.335 -2.083 1.00 54.35 ? 1 ASN A C 3 ATOM 1086 O O . ASN A 1 1 ? 0.991 -1.690 -2.920 1.00 11.31 ? 1 ASN A O 3 ATOM 1087 C CB . ASN A 1 1 ? 1.836 -4.241 -1.419 1.00 2.14 ? 1 ASN A CB 3 ATOM 1088 C CG . ASN A 1 1 ? 1.801 -4.332 0.095 1.00 41.02 ? 1 ASN A CG 3 ATOM 1089 O OD1 . ASN A 1 1 ? 1.394 -3.390 0.775 1.00 22.22 ? 1 ASN A OD1 3 ATOM 1090 N ND2 . ASN A 1 1 ? 2.229 -5.470 0.629 1.00 42.11 ? 1 ASN A ND2 3 ATOM 1624 N N . ASN A 1 1 ? 0.304 3.617 0.905 1.00 11.20 ? 1 ASN A N 4 ATOM 1625 C CA . ASN A 1 1 ? 0.052 2.602 -0.112 1.00 4.42 ? 1 ASN A CA 4 ATOM 1626 C C . ASN A 1 1 ? 1.337 1.862 -0.471 1.00 21.12 ? 1 ASN A C 4 ATOM 1627 O O . ASN A 1 1 ? 2.321 2.471 -0.891 1.00 53.04 ? 1 ASN A O 4 ATOM 1628 C CB . ASN A 1 1 ? -0.547 3.244 -1.365 1.00 30.21 ? 1 ASN A CB 4 ATOM 1629 C CG . ASN A 1 1 ? 0.091 4.580 -1.692 1.00 41.01 ? 1 ASN A CG 4 ATOM 1630 O OD1 . ASN A 1 1 ? 1.289 4.658 -1.967 1.00 74.10 ? 1 ASN A OD1 4 ATOM 1631 N ND2 . ASN A 1 1 ? -0.708 5.640 -1.663 1.00 53.14 ? 1 ASN A ND2 4 ATOM 2165 N N . ASN A 1 1 ? 1.889 1.883 -2.225 1.00 51.45 ? 1 ASN A N 5 ATOM 2166 C CA . ASN A 1 1 ? 1.702 0.513 -1.762 1.00 4.32 ? 1 ASN A CA 5 ATOM 2167 C C . ASN A 1 1 ? 2.979 -0.302 -1.945 1.00 2.51 ? 1 ASN A C 5 ATOM 2168 O O . ASN A 1 1 ? 3.721 -0.105 -2.907 1.00 43.02 ? 1 ASN A O 5 ATOM 2169 C CB . ASN A 1 1 ? 0.548 -0.150 -2.516 1.00 42.12 ? 1 ASN A CB 5 ATOM 2170 C CG . ASN A 1 1 ? 0.810 -0.243 -4.007 1.00 71.20 ? 1 ASN A CG 5 ATOM 2171 O OD1 . ASN A 1 1 ? 1.124 -1.314 -4.528 1.00 12.34 ? 1 ASN A OD1 5 ATOM 2172 N ND2 . ASN A 1 1 ? 0.682 0.881 -4.702 1.00 14.44 ? 1 ASN A ND2 5 """ pdb_in = iotbx.pdb.input(lines=(inp_txt).splitlines(), source_info=None) pdb_hierarchy = pdb_in.construct_hierarchy() # pdb_hierarchy.show() assert len(pdb_hierarchy.models()) == 5 assert pdb_hierarchy.atoms_size() == 48, pdb_hierarchy.atoms_size() for m_i, model in enumerate(pdb_hierarchy.models()): if m_i == 0: assert model.atoms_size() == 16, "%d, %s" % (model.atoms_size(), model.id) assert len(model.chains()) == 2 for c in model.chains(): assert c.atoms_size() == 8, "%d, %s" % (model.atoms_size(), model.id) else: assert model.atoms_size() == 8, "%d, %s" % (model.atoms_size(), model.id) assert model.only_chain().atoms_size() == 8, "%d, %s" % ( model.only_chain().atoms_size(), model.id)
def initialize(self): self.assert_pdb_hierarchy_xray_structure_sync() # residue monitors self.residue_monitors = [] backbone_atoms = ["N", "CA", "C", "O", "CB"] get_class = iotbx.pdb.common_residue_names_get_class sites_cart = self.xray_structure.sites_cart() current_map = self.compute_map(xray_structure=self.xray_structure) for model in self.pdb_hierarchy.models(): for chain in model.chains(): for residue_group in chain.residue_groups(): conformers = residue_group.conformers() if (len(conformers) > 1): continue for conformer in residue_group.conformers(): residue = conformer.only_residue() id_str = "%s%s%s" % (chain.id, residue.resname, residue.resseq.strip()) if (get_class(residue.resname) == "common_amino_acid"): residue_i_seqs_backbone = flex.size_t() residue_i_seqs_sidechain = flex.size_t() residue_i_seqs_all = flex.size_t() residue_i_seqs_c = flex.size_t() residue_i_seqs_n = flex.size_t() for atom in residue.atoms(): an = atom.name.strip() bb = an in backbone_atoms residue_i_seqs_all.append(atom.i_seq) if (bb): residue_i_seqs_backbone.append(atom.i_seq) else: residue_i_seqs_sidechain.append(atom.i_seq) if (an == "C"): residue_i_seqs_c.append(atom.i_seq) if (an == "N"): residue_i_seqs_n.append(atom.i_seq) sca = sites_cart.select(residue_i_seqs_all) scs = sites_cart.select(residue_i_seqs_sidechain) scb = sites_cart.select(residue_i_seqs_backbone) if (scs.size() == 0): ccs = None else: ccs = self.map_cc(sites_cart=scs, other_map=current_map) if (sca.size() == 0): cca = None else: cca = self.map_cc(sites_cart=sca, other_map=current_map) if (scb.size() == 0): ccb = None else: ccb = self.map_cc(sites_cart=scb, other_map=current_map) self.residue_monitors.append( residue_monitor( residue=residue, id_str=id_str, selection_sidechain= residue_i_seqs_sidechain, selection_backbone=residue_i_seqs_backbone, selection_all=residue_i_seqs_all, selection_c=residue_i_seqs_c, selection_n=residue_i_seqs_n, map_cc_sidechain=ccs, map_cc_backbone=ccb, map_cc_all=cca, rotamer_status=self.rotamer_manager. evaluate_residue(residue))) else: residue_i_seqs_all = residue.atoms().extract_i_seq( ) sca = sites_cart.select(residue_i_seqs_all) cca = self.map_cc(sites_cart=sca, other_map=current_map) self.residue_monitors.append( residue_monitor( residue=residue, id_str=id_str, selection_all=residue_i_seqs_all, map_cc_all=cca)) # globals self.five_cc = five_cc(map=self.target_map_object.map_data, xray_structure=self.xray_structure, d_min=self.target_map_object.d_min) self.map_cc_whole_unit_cell = self.map_cc(other_map=current_map) self.map_cc_around_atoms = self.map_cc(other_map=current_map, sites_cart=sites_cart) self.map_cc_per_atom = self.map_cc(other_map=current_map, sites_cart=sites_cart, per_atom=True) if (self.geometry_restraints_manager is not None): es = self.geometry_restraints_manager.energies_sites( sites_cart=sites_cart) self.rmsd_a = es.angle_deviations()[2] self.rmsd_b = es.bond_deviations()[2] self.dist_from_start = flex.mean( self.xray_structure_start.distances(other=self.xray_structure)) self.number_of_rotamer_outliers = 0 for r in self.residue_monitors: if (r.rotamer_status == "OUTLIER"): self.number_of_rotamer_outliers += 1 self.assert_pdb_hierarchy_xray_structure_sync()
def __init__(self, pdb_hierarchy, crystal_symmetry, restraints_manager = None, rotamer_evaluator = None, map_data = None, diff_map_data = None, map_data_scale = 2.5, diff_map_data_threshold = -2.5, cmv = None): t0 = time.time() if(cmv is None and map_data is not None): cmv = common_map_values( pdb_hierarchy = pdb_hierarchy, unit_cell = crystal_symmetry.unit_cell(), map_data = map_data) get_class = iotbx.pdb.common_residue_names_get_class mainchain=["C","N","O","CA","CB"] # Exclude side-chains involved into covalent bonds if(restraints_manager is not None): exclude_selection = flex.size_t() atoms = pdb_hierarchy.atoms() bond_proxies_simple, asu = restraints_manager.get_all_bond_proxies( sites_cart = atoms.extract_xyz()) for proxy in bond_proxies_simple: i,j = proxy.i_seqs # is i the same as atoms[i].i_seq ? Shall I assert this? resseq_i = atoms[i].parent().parent().resseq resseq_j = atoms[j].parent().parent().resseq resname_i = atoms[i].parent().resname resname_j = atoms[j].parent().resname i_aa = get_class(resname_i)=="common_amino_acid" j_aa = get_class(resname_j)=="common_amino_acid" if(resseq_i != resseq_j and (i_aa or j_aa) and not atoms[i].name.strip() in mainchain and not atoms[j].name.strip() in mainchain): if(i_aa): exclude_selection.append(atoms[i].i_seq) if(j_aa): exclude_selection.append(atoms[j].i_seq) # self.crystal_symmetry = crystal_symmetry unit_cell = crystal_symmetry.unit_cell() self.pdb_hierarchy = pdb_hierarchy self.special_position_indices = self._get_special_position_indices() self.cntr_residues = 0 self.cntr_poormap = 0 self.cntr_outliers = 0 self.mes = [] self._sel_outliers = flex.bool(pdb_hierarchy.atoms().size(), False) self._sel_poormap = flex.bool(pdb_hierarchy.atoms().size(), False) self._sel_all = flex.bool(pdb_hierarchy.atoms().size(), False) def skip(residue): if(get_class(residue.resname) != "common_amino_acid"): return True if(residue.resname.strip().upper() in ["ALA","GLY"]): return True if(self._on_spacial_position(residue)): return True if(restraints_manager is not None): for atom in residue.atoms(): if(atom.i_seq in exclude_selection): return True return False for model in pdb_hierarchy.models(): for chain in model.chains(): for residue_group in chain.residue_groups(): conformers = residue_group.conformers() if(len(conformers)>1): continue for conformer in residue_group.conformers(): residue = conformer.only_residue() if(skip(residue)): continue self.cntr_residues += 1 # count all outlier = False if(rotamer_evaluator is None or rotamer_evaluator.evaluate_residue(residue)=="OUTLIER"): outlier = True self.cntr_outliers += 1 for atom in residue.atoms(): self._sel_all[atom.i_seq] = True if(outlier): self._sel_outliers[atom.i_seq] = True atoms = residue.atoms() need_fix = False poor_mainchain = False # Always do MSE and MET if(not need_fix and residue.resname in ["MSE", "MET"]): need_fix=True # Check maps now if(not need_fix): for atom in residue.atoms(): if(atom.element_is_hydrogen()): continue an = atom.name.strip() # Check map if(map_data is not None): key="%s_%s_%s"%(chain.id, residue.resname, an) m_ref = cmv[key] sf = unit_cell.fractionalize(atom.xyz) m_cur = map_data.eight_point_interpolation(sf) if(an in mainchain and m_cur < m_ref/map_data_scale): poor_mainchain = True break if(not an in mainchain and m_cur < m_ref/map_data_scale): need_fix = True break # Check Fo-Fc map if(diff_map_data is not None): sf = unit_cell.fractionalize(atom.xyz) mv_diff = diff_map_data.eight_point_interpolation(sf) if(an in mainchain and mv_diff < diff_map_data_threshold): poor_mainchain = True break if(not an in mainchain and mv_diff<diff_map_data_threshold): need_fix = True break if(poor_mainchain): need_fix = False # if(need_fix): self._sel_poormap = self._sel_poormap.set_selected( atoms.extract_i_seq(), True) self.cntr_poormap+=1 # fmt = "%-d residues out of total %-d (non-ALA, GLY, PRO) need a fit." self.mes.append( fmt%(self.cntr_poormap+self.cntr_outliers, self.cntr_residues)) self.mes.append(" rotamer outliers: %d"%self.cntr_outliers) self.mes.append(" poor density : %d"%self.cntr_poormap) self.mes.append("time to evaluate : %-6.3f"%(time.time()-t0))
def residue_iteration(pdb_hierarchy, xray_structure, selection, target_map_data, model_map_data, residual_map_data, mon_lib_srv, rsr_manager, optimize_hd, params, log): mon_lib_srv = mmtbx.monomer_library.server.server() assert target_map_data.focus() == model_map_data.focus() assert target_map_data.all() == model_map_data.all() fmt1 = " |--------START--------| |-----FINAL----|" fmt2 = " residue map_cc 2mFo-DFc mFo-DFc 2mFo-DFc mFo-DFc" \ " rotamer n_rot max_moved" fmt3 = " %12s%7.4f %8.2f %7.2f %8.2f %7.2f %7s %5d %8.3f" print >> log, fmt1 print >> log, fmt2 unit_cell = xray_structure.unit_cell() map_selector = select_map( unit_cell = xray_structure.unit_cell(), target_map_data = target_map_data, model_map_data = model_map_data) map_selector.initialize_rotamers() get_class = iotbx.pdb.common_residue_names_get_class n_other_residues = 0 n_amino_acids_ignored = 0 n_amino_acids_scored = 0 sites_cart_start = xray_structure.sites_cart() result = [] for model in pdb_hierarchy.models(): for chain in model.chains(): for residue_group in chain.residue_groups(): conformers = residue_group.conformers() if(params.ignore_alt_conformers and len(conformers)>1): continue for conformer in residue_group.conformers(): residue = conformer.only_residue() if(get_class(residue.resname) == "common_amino_acid"): residue_iselection = residue.atoms().extract_i_seq() sites_cart_residue = xray_structure.sites_cart().select(residue_iselection) residue.atoms().set_xyz(new_xyz=sites_cart_residue) max_moved_dist = 0 sites_cart_residue_start = sites_cart_residue.deep_copy() # XXX assume that "atoms" are the same in residue and residue_groups if(map_selector.is_refinement_needed( residue_group = residue_group, residue = residue, cc_limit = params.poor_cc_threshold, ignore_hd = optimize_hd)): residue_id_str = residue.id_str(suppress_segid=1)[-12:] rsel, rs = include_residue_selection( selection = selection, residue_iselection = residue_iselection) cc_start = map_selector.get_cc( sites_cart = sites_cart_residue, residue_iselection = residue_iselection) rotamer_id_best = None rev = rotamer_evaluator( sites_cart_start = sites_cart_residue, unit_cell = unit_cell, two_mfo_dfc_map = target_map_data, mfo_dfc_map = residual_map_data) residue_sites_best = sites_cart_residue.deep_copy() rm = residue_rsr_monitor( residue_id_str = residue_id_str, selection = residue_iselection.deep_copy(), sites_cart = sites_cart_residue.deep_copy(), twomfodfc = rev.t1_start, mfodfc = rev.t2_start, cc = cc_start) result.append(rm) axes_and_atoms_to_rotate = rotatable_bonds.\ axes_and_atoms_aa_specific( residue = residue, mon_lib_srv = mon_lib_srv, remove_clusters_with_all_h = optimize_hd, log = log) if(axes_and_atoms_to_rotate is not None and len(axes_and_atoms_to_rotate) > 0): # initialize criteria for first rotatable atom in each cluster rev_first_atoms = [] for i_aa, aa in enumerate(axes_and_atoms_to_rotate): if(i_aa == len(axes_and_atoms_to_rotate)-1): sites_aa = flex.vec3_double() for aa_ in aa[1]: sites_aa.append(sites_cart_residue[aa_]) else: sites_aa = flex.vec3_double([sites_cart_residue[aa[1][0]]]) rev_i = rotamer_evaluator( sites_cart_start = sites_aa, unit_cell = unit_cell, two_mfo_dfc_map = target_map_data, mfo_dfc_map = residual_map_data) rev_first_atoms.append(rev_i) # get rotamer iterator rotamer_iterator = lockit.get_rotamer_iterator( mon_lib_srv = mon_lib_srv, residue = residue, atom_selection_bool = None) if(rotamer_iterator is None): n_amino_acids_ignored += 1 n_rotamers = 0 print >> log, "No rotamers for: %s. Use torsion grid search."%\ residue_id_str residue_sites_best, rotamer_id_best = torsion_search( residue_evaluator = rev, cluster_evaluators = rev_first_atoms, axes_and_atoms_to_rotate = axes_and_atoms_to_rotate, rotamer_sites_cart = sites_cart_residue, rotamer_id_best = rotamer_id_best, residue_sites_best = residue_sites_best, rotamer_id = None, params = None) else: n_amino_acids_scored += 1 n_rotamers = 0 if(not params.use_rotamer_iterator): if(params.torsion_grid_search): residue_sites_best, rotamer_id_best = torsion_search( residue_evaluator = rev, cluster_evaluators = rev_first_atoms, axes_and_atoms_to_rotate = axes_and_atoms_to_rotate, rotamer_sites_cart = sites_cart_residue, rotamer_id_best = rotamer_id_best, residue_sites_best = residue_sites_best, rotamer_id = None, params = params.torsion_search) else: for rotamer, rotamer_sites_cart in rotamer_iterator: n_rotamers += 1 if(params.torsion_grid_search): residue_sites_best, rotamer_id_best = torsion_search( residue_evaluator = rev, cluster_evaluators = rev_first_atoms, axes_and_atoms_to_rotate = axes_and_atoms_to_rotate, rotamer_sites_cart = rotamer_sites_cart, rotamer_id_best = rotamer_id_best, residue_sites_best = residue_sites_best, rotamer_id = rotamer.id, params = params.torsion_search) else: if(rev.is_better(sites_cart = rotamer_sites_cart)): rotamer_id_best = rotamer.id residue_sites_best = rotamer_sites_cart.deep_copy() residue.atoms().set_xyz(new_xyz=residue_sites_best) max_moved_dist = flex.max(flex.sqrt( (sites_cart_residue_start-residue_sites_best).dot())) if(not params.real_space_refine_rotamer): sites_cart_start = sites_cart_start.set_selected( residue_iselection, residue_sites_best) else: tmp = sites_cart_start.set_selected( residue_iselection, residue_sites_best) sites_cart_refined = rsr_manager.refine_restrained( tmp.select(rsel), rsel, rs) if(rev.is_better(sites_cart = sites_cart_refined)): sites_cart_start = sites_cart_start.set_selected( residue_iselection, sites_cart_refined) residue.atoms().set_xyz(new_xyz=sites_cart_refined) max_moved_dist = flex.max(flex.sqrt( (sites_cart_residue_start - sites_cart_refined).dot())) if(abs(rev.t1_best-rev.t1_start) > 0.01 and abs(rev.t2_best-rev.t2_start) > 0.01): print >> log, fmt3 % ( residue_id_str, cc_start, rev.t1_start, rev.t2_start, rev.t1_best, rev.t2_best, rotamer_id_best, n_rotamers, max_moved_dist) xray_structure.set_sites_cart(sites_cart_start) return result
def residue_iteration(pdb_hierarchy, xray_structure, selection, target_map_data, model_map_data, residual_map_data, mon_lib_srv, rsr_manager, optimize_hd, params, log): mon_lib_srv = mmtbx.monomer_library.server.server() assert target_map_data.focus() == model_map_data.focus() assert target_map_data.all() == model_map_data.all() fmt1 = " |--------START--------| |-----FINAL----|" fmt2 = " residue map_cc 2mFo-DFc mFo-DFc 2mFo-DFc mFo-DFc" \ " rotamer n_rot max_moved" fmt3 = " %12s%7.4f %8.2f %7.2f %8.2f %7.2f %7s %5d %8.3f" print >> log, fmt1 print >> log, fmt2 unit_cell = xray_structure.unit_cell() map_selector = select_map(unit_cell=xray_structure.unit_cell(), target_map_data=target_map_data, model_map_data=model_map_data) map_selector.initialize_rotamers() get_class = iotbx.pdb.common_residue_names_get_class n_other_residues = 0 n_amino_acids_ignored = 0 n_amino_acids_scored = 0 sites_cart_start = xray_structure.sites_cart() result = [] for model in pdb_hierarchy.models(): for chain in model.chains(): for residue_group in chain.residue_groups(): conformers = residue_group.conformers() if (params.ignore_alt_conformers and len(conformers) > 1): continue for conformer in residue_group.conformers(): residue = conformer.only_residue() if (get_class(residue.resname) == "common_amino_acid"): residue_iselection = residue.atoms().extract_i_seq() sites_cart_residue = xray_structure.sites_cart( ).select(residue_iselection) residue.atoms().set_xyz(new_xyz=sites_cart_residue) max_moved_dist = 0 sites_cart_residue_start = sites_cart_residue.deep_copy( ) # XXX assume that "atoms" are the same in residue and residue_groups if (map_selector.is_refinement_needed( residue_group=residue_group, residue=residue, cc_limit=params.poor_cc_threshold, ignore_hd=optimize_hd)): residue_id_str = residue.id_str( suppress_segid=1)[-12:] rsel, rs = include_residue_selection( selection=selection, residue_iselection=residue_iselection) cc_start = map_selector.get_cc( sites_cart=sites_cart_residue, residue_iselection=residue_iselection) rotamer_id_best = None rev = rotamer_evaluator( sites_cart_start=sites_cart_residue, unit_cell=unit_cell, two_mfo_dfc_map=target_map_data, mfo_dfc_map=residual_map_data) residue_sites_best = sites_cart_residue.deep_copy() rm = residue_rsr_monitor( residue_id_str=residue_id_str, selection=residue_iselection.deep_copy(), sites_cart=sites_cart_residue.deep_copy(), twomfodfc=rev.t1_start, mfodfc=rev.t2_start, cc=cc_start) result.append(rm) axes_and_atoms_to_rotate = rotatable_bonds.\ axes_and_atoms_aa_specific( residue = residue, mon_lib_srv = mon_lib_srv, remove_clusters_with_all_h = optimize_hd, log = log) if (axes_and_atoms_to_rotate is not None and len(axes_and_atoms_to_rotate) > 0): # initialize criteria for first rotatable atom in each cluster rev_first_atoms = [] for i_aa, aa in enumerate( axes_and_atoms_to_rotate): if (i_aa == len(axes_and_atoms_to_rotate) - 1): sites_aa = flex.vec3_double() for aa_ in aa[1]: sites_aa.append( sites_cart_residue[aa_]) else: sites_aa = flex.vec3_double( [sites_cart_residue[aa[1][0]]]) rev_i = rotamer_evaluator( sites_cart_start=sites_aa, unit_cell=unit_cell, two_mfo_dfc_map=target_map_data, mfo_dfc_map=residual_map_data) rev_first_atoms.append(rev_i) # get rotamer iterator rotamer_iterator = lockit.get_rotamer_iterator( mon_lib_srv=mon_lib_srv, residue=residue, atom_selection_bool=None) if (rotamer_iterator is None): n_amino_acids_ignored += 1 n_rotamers = 0 print >> log, "No rotamers for: %s. Use torsion grid search."%\ residue_id_str residue_sites_best, rotamer_id_best = torsion_search( residue_evaluator=rev, cluster_evaluators=rev_first_atoms, axes_and_atoms_to_rotate= axes_and_atoms_to_rotate, rotamer_sites_cart=sites_cart_residue, rotamer_id_best=rotamer_id_best, residue_sites_best=residue_sites_best, rotamer_id=None, params=None) else: n_amino_acids_scored += 1 n_rotamers = 0 if (not params.use_rotamer_iterator): if (params.torsion_grid_search): residue_sites_best, rotamer_id_best = torsion_search( residue_evaluator=rev, cluster_evaluators= rev_first_atoms, axes_and_atoms_to_rotate= axes_and_atoms_to_rotate, rotamer_sites_cart= sites_cart_residue, rotamer_id_best=rotamer_id_best, residue_sites_best= residue_sites_best, rotamer_id=None, params=params.torsion_search) else: for rotamer, rotamer_sites_cart in rotamer_iterator: n_rotamers += 1 if (params.torsion_grid_search): residue_sites_best, rotamer_id_best = torsion_search( residue_evaluator=rev, cluster_evaluators= rev_first_atoms, axes_and_atoms_to_rotate= axes_and_atoms_to_rotate, rotamer_sites_cart= rotamer_sites_cart, rotamer_id_best= rotamer_id_best, residue_sites_best= residue_sites_best, rotamer_id=rotamer.id, params=params. torsion_search) else: if (rev.is_better( sites_cart= rotamer_sites_cart)): rotamer_id_best = rotamer.id residue_sites_best = rotamer_sites_cart.deep_copy( ) residue.atoms().set_xyz( new_xyz=residue_sites_best) max_moved_dist = flex.max( flex.sqrt((sites_cart_residue_start - residue_sites_best).dot())) if (not params.real_space_refine_rotamer): sites_cart_start = sites_cart_start.set_selected( residue_iselection, residue_sites_best) else: tmp = sites_cart_start.set_selected( residue_iselection, residue_sites_best) sites_cart_refined = rsr_manager.refine_restrained( tmp.select(rsel), rsel, rs) if (rev.is_better( sites_cart=sites_cart_refined)): sites_cart_start = sites_cart_start.set_selected( residue_iselection, sites_cart_refined) residue.atoms().set_xyz( new_xyz=sites_cart_refined) max_moved_dist = flex.max( flex.sqrt( (sites_cart_residue_start - sites_cart_refined).dot())) if (abs(rev.t1_best - rev.t1_start) > 0.01 and abs(rev.t2_best - rev.t2_start) > 0.01): print >> log, fmt3 % ( residue_id_str, cc_start, rev.t1_start, rev.t2_start, rev.t1_best, rev.t2_best, rotamer_id_best, n_rotamers, max_moved_dist) xray_structure.set_sites_cart(sites_cart_start) return result
def exercise(d_min=5, random_seed=1111111): inp = get_pdb_inputs(pdb_str=pdb_str) xrs_good = inp.xrs.deep_copy_scatterers() target_map = get_tmo(inp=inp, d_min=d_min) inp.ph.write_pdb_file(file_name="start.pdb") show(prefix="GOOD", pdb_hierarchy=inp.ph, tm=target_map, xrs=xrs_good, grm=inp.grm.geometry) # sites_cart_reference = [] selections_reference = [] pdb_hierarchy_reference = inp.ph.deep_copy() pdb_hierarchy_reference.reset_i_seq_if_necessary() for model in inp.ph.models(): for chain in model.chains(): for residue in chain.residues(): sites_cart_reference.append(residue.atoms().extract_xyz()) selections_reference.append(residue.atoms().extract_i_seq()) # sites_cart_reference_for_chi_only = [] selections_reference_for_chi_only = [] for model in inp.ph.models(): for chain in model.chains(): for residue in chain.residues(): s1 = flex.vec3_double() s2 = flex.size_t() for atom in residue.atoms(): if (not atom.name.strip().upper() in ["O"]): s1.append(atom.xyz) s2.append(atom.i_seq) sites_cart_reference_for_chi_only.append(s1) selections_reference_for_chi_only.append(s2) # xrs_poor = shake_sites(xrs=xrs_good.deep_copy_scatterers(), random=False, shift=2.0, grm=inp.grm) inp.ph.adopt_xray_structure(xrs_poor) inp.ph.write_pdb_file(file_name="poor.pdb") # for use_reference_torsion in [ "no", "yes_add_once", "yes_add_per_residue", "yes_manual" ]: es = inp.grm.energies_sites( sites_cart=xrs_good.sites_cart()) # it's essential to update grm inp.ph.adopt_xray_structure(xrs_poor) random.seed(random_seed) flex.set_random_seed(random_seed) print("*" * 79) print("use_reference_torsion:", use_reference_torsion) print("*" * 79) show(prefix="START", pdb_hierarchy=inp.ph, tm=target_map, xrs=xrs_poor, grm=inp.grm.geometry) # if (use_reference_torsion == "yes_add_per_residue"): inp.grm.geometry.remove_chi_torsion_restraints_in_place() for sites_cart, selection in zip(sites_cart_reference, selections_reference): inp.grm.geometry.add_chi_torsion_restraints_in_place( pdb_hierarchy=pdb_hierarchy_reference, sites_cart=sites_cart, selection=selection, chi_angles_only=True, sigma=1) if (use_reference_torsion == "yes_add_once"): inp.grm.geometry.remove_chi_torsion_restraints_in_place() inp.grm.geometry.add_chi_torsion_restraints_in_place( pdb_hierarchy=pdb_hierarchy_reference, sites_cart=xrs_good.sites_cart(), chi_angles_only=True, sigma=1) if (use_reference_torsion == "yes_manual"): inp.grm.geometry.remove_chi_torsion_restraints_in_place() for sites_cart, selection in zip( sites_cart_reference_for_chi_only, selections_reference_for_chi_only): inp.grm.geometry.add_chi_torsion_restraints_in_place( pdb_hierarchy=pdb_hierarchy_reference, sites_cart=sites_cart, selection=selection, chi_angles_only=True, sigma=1) # tmp = xrs_poor.deep_copy_scatterers() rsr_simple_refiner = individual_sites.simple( target_map=target_map.data, selection=flex.bool(tmp.scatterers().size(), True), real_space_gradients_delta=d_min / 4, max_iterations=500, geometry_restraints_manager=inp.grm.geometry) refined = individual_sites.refinery(refiner=rsr_simple_refiner, optimize_weight=True, xray_structure=tmp, start_trial_weight_value=50, rms_bonds_limit=0.02, rms_angles_limit=2.0) assert refined.sites_cart_result is not None tmp = tmp.replace_sites_cart(refined.sites_cart_result) inp.ph.adopt_xray_structure(tmp) show(prefix="FINAL", pdb_hierarchy=inp.ph, tm=target_map, xrs=tmp, grm=inp.grm.geometry) inp.ph.write_pdb_file(file_name="final_%s.pdb" % str(use_reference_torsion))
def split_model_with_pae(model, m, pae_matrix, maximum_domains=None, pae_power=1., pae_cutoff=5., pae_graph_resolution=1., minimum_domain_length=10, log=sys.stdout): """ Function to identify groups of atoms in a model that form compact units using a predicted alignment error matrix (pae_matrix). Normally used after trimming low-confidence regions in predicted models to isolate domains that are likely to have indeterminate relationships. m: cctbx.model.model object containing information about the input model after trimming model: model before trimming pae_matrix: matrix of predicted aligned errors (e.g., from AlphaFold2), NxN matrix of RMSD values, N = number of residues in model. maximum_domains: If more than this many domains, merge closest ones until reaching this number pae_power (default=1): each edge in the graph will be weighted proportional to (1/pae**pae_power) pae_cutoff (optional, default=5): graph edges will only be created for residue pairs with pae<pae_cutoff pae_graph_resolution (optional, default = 1): regulates how aggressively the clustering algorithm is. Smaller values lead to larger clusters. Value should be larger than zero, and values larger than 5 are unlikely to be useful minimum_domain_length: if a region is smaller than this, skip completely Output: group_args object with members: m: new model with chainid values from 0 to N where there are N domains chainid 1 to N are the N domains, roughly in order along the chain. chainid_list: list of all the chainid values On failure: returns None """ print("\nSelecting domains with predicted alignment error estimates", file=log) # Select CA and P atoms with B-values in range selection_string = '(name ca or name p)' m_ca = m.apply_selection_string(selection_string) n = model.apply_selection_string( selection_string).get_hierarchy().overall_counts().n_residues # Make sure matrix matches if tuple(pae_matrix.shape) != (n, n): raise Sorry("The pae matrix has a size of (%s,%s) " % (tuple(pae_matrix.shape)) + "but the number of residues in the model is %s" % (n)) from mmtbx.secondary_structure.find_ss_from_ca import get_first_resno first_resno = get_first_resno(model.get_hierarchy()) # Assign all CA in model to a region from mmtbx.domains_from_pae import get_domain_selections_from_pae_matrix selection_list = get_domain_selections_from_pae_matrix( pae_matrix=pae_matrix, pae_power=pae_power, pae_cutoff=pae_cutoff, graph_resolution=pae_graph_resolution, first_resno=first_resno, ) # And apply to full model unique_regions = list(range(len(selection_list))) keep_list = [] good_selections = [] ph = m.get_hierarchy() for selection_string, region_number in zip(selection_list, unique_regions): asc1 = ph.atom_selection_cache() sel = asc1.selection(selection_string) if sel.count(True) >= minimum_domain_length: keep_list.append(True) good_selections.append(selection_string) else: keep_list.append(False) print("Skipping region '%s' with size of only %s residues" % (selection_string, sel.count(True)), file=log) region_name_dict, chainid_list = get_region_name_dict(m, unique_regions, keep_list=keep_list) print("\nSelection list based on PAE values:", file=log) # Now create new model with chains based on region list full_new_model = None for keep, selection_string, region_number in zip(keep_list, selection_list, unique_regions): if not keep: continue new_m = m.apply_selection_string(selection_string) print("%s (%s residues) " % (selection_string, new_m.get_hierarchy().overall_counts().n_residues), file=log) # Now put all of new_m in a chain with chain.id = str(region_number) for model in new_m.get_hierarchy().models()[:1]: # only one model for chain in model.chains()[:1]: # only allowing one chain chain.id = region_name_dict[region_number] if full_new_model: full_new_model = add_model(full_new_model, new_m) else: full_new_model = new_m m = full_new_model # All done return group_args(group_args_type='model_info', model=m, chainid_list=chainid_list) return set_chain_id_by_region(m, m_ca, regions_list, log=log)