def get_torsion_id(dp, name_hash, phi_psi=False, chi_only=False, omega=False): # # used in torsion_ncs id = None chi_atoms = False atom_list = [] altloc = None if phi_psi: return name_hash[dp.i_seqs[1]][4:] elif omega: #LIMITATION: doesn't work with segIDs currently return name_hash[dp.i_seqs[0]][4:], \ name_hash[dp.i_seqs[3]][4:] for i_seq in dp.i_seqs: cur_id = name_hash[i_seq][4:] atom = name_hash[i_seq][:4] atom_list.append(atom) cur_altloc = name_hash[i_seq][4:5] if id == None: id = cur_id if cur_altloc != " " and altloc: altloc = cur_altloc elif cur_id != id: return None resname = cur_id[1:4] if common_residue_names_get_class( resname, consider_ccp4_mon_lib_rna_dna=True) != "common_amino_acid": return None if chi_only: if atom not in [' N ', ' CA ', ' C ', ' O ', ' CB ', ' OXT']: chi_atoms = True if chi_only and not chi_atoms: return None return id
def pperp_outliers(hierarchy, chain): kin_out = "@vectorlist {ext} color= magenta master= {base-P perp}\n" rv = rna_validate.rna_puckers(pdb_hierarchy=hierarchy) outliers = rv.results params = rna_sugar_pucker_analysis.master_phil.extract() outlier_key_list = [] for outlier in outliers: outlier_key_list.append(outlier.id_str()) for conformer in chain.conformers(): for residue in conformer.residues(): if common_residue_names_get_class( residue.resname) != "common_rna_dna": continue ra1 = residue_analysis( residue_atoms=residue.atoms(), distance_tolerance=params.bond_detection_distance_tolerance) if (ra1.problems is not None): continue if (not ra1.is_rna): continue try: key = residue.find_atom_by(name=" C1'").pdb_label_columns()[4:] except Exception: continue if key in outlier_key_list: if rv.pucker_perp_xyz[key][0] is not None: perp_xyz = rv.pucker_perp_xyz[key][0] #p_perp_xyz else: perp_xyz = rv.pucker_perp_xyz[key][1] #o3p_perp_xyz if rv.pucker_dist[key][0] is not None: perp_dist = rv.pucker_dist[key][0] if perp_dist < 2.9: pucker_text = " 2'?" else: pucker_text = " 3'?" else: perp_dist = rv.pucker_dist[key][1] if perp_dist < 2.4: pucker_text = " 2'?" else: pucker_text = " 3'?" key = key[0:4].lower() + key[4:] key += pucker_text kin_out += kin_vec(key, perp_xyz[0], key, perp_xyz[1]) a = matrix.col(perp_xyz[1]) b = matrix.col(residue.find_atom_by(name=" C1'").xyz) c = (a - b).normalize() new = a - (c * .8) kin_out += kin_vec(key, perp_xyz[1], key, tuple(new), 4) new = a + (c * .4) kin_out += kin_vec(key, perp_xyz[1], key, tuple(new), 4) r_vec = matrix.col(perp_xyz[1]) - matrix.col(perp_xyz[0]) r = r_vec.axis_and_angle_as_r3_rotation_matrix(angle=90, deg=True) new = r * (new - a) + a kin_out += kin_vec(key, perp_xyz[1], key, tuple(new), 4) r = r_vec.axis_and_angle_as_r3_rotation_matrix(angle=180, deg=True) new = r * (new - a) + a kin_out += kin_vec(key, perp_xyz[1], key, tuple(new), 4) return kin_out
def modernize_rna_resname(resname): if common_residue_names_get_class(resname, consider_ccp4_mon_lib_rna_dna=True) == "common_rna_dna" or \ common_residue_names_get_class(resname, consider_ccp4_mon_lib_rna_dna=True) == "ccp4_mon_lib_rna_dna": tmp_resname = resname.strip() if len(tmp_resname) == 1: return " " + tmp_resname elif len(tmp_resname) == 2: if tmp_resname[0:1].upper() == 'D': return " " + tmp_resname.upper() elif tmp_resname[1:].upper() == 'D': return " D" + tmp_resname[0:1].upper() elif tmp_resname[1:].upper() == 'R': return " " + tmp_resname[0:1].upper() elif tmp_resname in ["ADE", "CYT", "GUA", "URI"]: return " " + tmp_resname[0:1].upper() return resname
def modernize_rna_resname(resname): if common_residue_names_get_class(resname, consider_ccp4_mon_lib_rna_dna=True) == "common_rna_dna" or \ common_residue_names_get_class(resname, consider_ccp4_mon_lib_rna_dna=True) == "ccp4_mon_lib_rna_dna": tmp_resname = resname.strip() if len(tmp_resname) == 1: return " "+tmp_resname elif len(tmp_resname) == 2: if tmp_resname[0:1].upper() == 'D': return " "+tmp_resname.upper() elif tmp_resname[1:].upper() == 'D': return " D"+tmp_resname[0:1].upper() elif tmp_resname[1:].upper() == 'R': return " "+tmp_resname[0:1].upper() elif tmp_resname in ["ADE", "CYT", "GUA", "URI"]: return " "+tmp_resname[0:1].upper() return resname
def pperp_outliers(hierarchy, chain): kin_out = "@vectorlist {ext} color= magenta master= {base-P perp}\n" rv = rna_validate.rna_puckers(pdb_hierarchy=hierarchy) outliers = rv.results params = rna_sugar_pucker_analysis.master_phil.extract() outlier_key_list = [] for outlier in outliers: outlier_key_list.append(outlier.id_str()) for conformer in chain.conformers(): for residue in conformer.residues(): if common_residue_names_get_class(residue.resname) != "common_rna_dna": continue ra1 = residue_analysis( residue_atoms=residue.atoms(), distance_tolerance=params.bond_detection_distance_tolerance) if (ra1.problems is not None): continue if (not ra1.is_rna): continue try: key = residue.find_atom_by(name=" C1'").pdb_label_columns()[4:] except Exception: continue if key in outlier_key_list: if rv.pucker_perp_xyz[key][0] is not None: perp_xyz = rv.pucker_perp_xyz[key][0] #p_perp_xyz else: perp_xyz = rv.pucker_perp_xyz[key][1] #o3p_perp_xyz if rv.pucker_dist[key][0] is not None: perp_dist = rv.pucker_dist[key][0] if perp_dist < 2.9: pucker_text = " 2'?" else: pucker_text = " 3'?" else: perp_dist = rv.pucker_dist[key][1] if perp_dist < 2.4: pucker_text = " 2'?" else: pucker_text = " 3'?" key = key[0:4].lower()+key[4:] key += pucker_text kin_out += kin_vec(key, perp_xyz[0], key, perp_xyz[1]) a = matrix.col(perp_xyz[1]) b = matrix.col(residue.find_atom_by(name=" C1'").xyz) c = (a-b).normalize() new = a-(c*.8) kin_out += kin_vec(key, perp_xyz[1], key, tuple(new), 4) new = a+(c*.4) kin_out += kin_vec(key, perp_xyz[1], key, tuple(new), 4) r_vec = matrix.col(perp_xyz[1]) - matrix.col(perp_xyz[0]) r = r_vec.axis_and_angle_as_r3_rotation_matrix(angle=90, deg=True) new = r*(new-a)+a kin_out += kin_vec(key, perp_xyz[1], key, tuple(new), 4) r = r_vec.axis_and_angle_as_r3_rotation_matrix(angle=180, deg=True) new = r*(new-a)+a kin_out += kin_vec(key, perp_xyz[1], key, tuple(new), 4) return kin_out
def get_res_type(res_name): """(string) -> string process residue type by the residue name """ res_type = common_residue_names_get_class(res_name) if res_type == 'other': res_type = get_type(res_name) if res_type == None: res_type = 'None' return res_type
def build_i_seq_hash(pdb_hierarchy): name_i_seq_hash = dict() for atom in pdb_hierarchy.atoms(): atom_name = atom.pdb_label_columns()[0:4] resname = atom.pdb_label_columns()[5:8] updated_resname = modernize_rna_resname(resname) if common_residue_names_get_class(updated_resname) == "common_rna_dna": updated_atom = modernize_rna_atom_name(atom=atom_name) else: updated_atom = atom_name key = updated_atom+atom.pdb_label_columns()[4:5]+\ updated_resname+atom.pdb_label_columns()[8:]+\ atom.segid name_i_seq_hash[key] = atom.i_seq return name_i_seq_hash
def build_i_seq_hash(pdb_hierarchy): name_i_seq_hash = dict() for atom in pdb_hierarchy.atoms(): atom_name = atom.pdb_label_columns()[0:4] resname = atom.pdb_label_columns()[5:8] updated_resname = modernize_rna_resname(resname) if common_residue_names_get_class(updated_resname) == "common_rna_dna": updated_atom = modernize_rna_atom_name(atom=atom_name) else: updated_atom = atom_name key = updated_atom+atom.pdb_label_columns()[4:5]+\ updated_resname+atom.pdb_label_columns()[8:]+\ atom.segid name_i_seq_hash[key]=atom.i_seq return name_i_seq_hash
def get_torsion_id(dp, name_hash, phi_psi=False, chi_only=False, omega=False): # # used in torsion_ncs id = None chi_atoms = False atom_list = [] altloc = None if phi_psi: return name_hash[dp.i_seqs[1]][4:] elif omega: #LIMITATION: doesn't work with segIDs currently return name_hash[dp.i_seqs[0]][4:], \ name_hash[dp.i_seqs[3]][4:] for i_seq in dp.i_seqs: cur_id = name_hash[i_seq][4:] atom = name_hash[i_seq][:4] atom_list.append(atom) cur_altloc = name_hash[i_seq][4:5] if id == None: id = cur_id if cur_altloc != " " and altloc: altloc = cur_altloc elif cur_id != id: return None resname = cur_id[1:4] if common_residue_names_get_class(resname, consider_ccp4_mon_lib_rna_dna=True) != "common_amino_acid": return None if chi_only: if atom not in [' N ', ' CA ', ' C ', ' O ', ' CB ', ' OXT']: chi_atoms = True if chi_only and not chi_atoms: return None return id
def get_phil_base_pairs(pdb_hierarchy, nonbonded_proxies, prefix=None, params=None, log=sys.stdout, add_segid=None, verbose=-1): hbond_distance_cutoff = 3.4 if params is not None: hbond_distance_cutoff = params.hbond_distance_cutoff hbonds = [] result = "" atoms = pdb_hierarchy.atoms() sites_cart = atoms.extract_xyz() get_sorted_result = nonbonded_proxies.get_sorted(by_value="delta", sites_cart=sites_cart) if get_sorted_result is None: return result sorted_nonb, n_not_shown = get_sorted_result # Get potential hbonds n_nonb = len(sorted_nonb) i = 0 while i < n_nonb and sorted_nonb[i][3] < hbond_distance_cutoff: (labels, i_seq, j_seq, dist, vdw_distance, sym_op_j, rt_mx) = sorted_nonb[i] a1 = atoms[i_seq] ag1 = a1.parent() a2 = atoms[j_seq] ag2 = a2.parent() if (common_residue_names_get_class(ag1.resname, consider_ccp4_mon_lib_rna_dna=True) in \ ["common_rna_dna", "ccp4_mon_lib_rna_dna"] and common_residue_names_get_class(ag2.resname, consider_ccp4_mon_lib_rna_dna=True) in \ ["common_rna_dna", "ccp4_mon_lib_rna_dna"] and (a1.element in ["N", "O"] and a2.element in ["N", "O"]) and a1.name.find("P") < 0 and a2.name.find("P") < 0 and a1.name.find("'") < 0 and a2.name.find("'") < 0 and not consecutive_residues(a1, a2) and (ag1.altloc.strip() == ag2.altloc.strip()) and final_link_direction_check(a1, a2)): hbonds.append((i_seq, j_seq)) i += 1 # check and define basepairs pairs = [] for hb in hbonds: if verbose > 1: print >> log, "Making pair with", atoms[hb[0]].id_str(), atoms[ hb[1]].id_str() new_hbonds, class_number = get_h_bonds_for_basepair( atoms[hb[0]], atoms[hb[1]], distance_cutoff=hbond_distance_cutoff, log=log, verbose=verbose) if verbose > 1: print >> log, " Picked class: %d, number of h-bonds under cutoff:%d" % ( class_number, len(new_hbonds)), if len(new_hbonds) > 1: p = make_phil_base_pair_record(atoms[hb[0]].parent(), atoms[hb[1]].parent(), params, saenger_class=class_number, add_segid=add_segid) if verbose > 1: print >> log, " OK" pairs.append(p) else: if verbose > 0: s = " ".join([ "Basepairing for residues '%s' and '%s'" % (atoms[hb[0]].id_str()[10:-1], atoms[hb[1]].id_str()[10:-1]), "was rejected because only 1 h-bond was found" ]) if verbose > 1: print >> log, "Rejected" phil_str = "" # print "N basepairs:", len(pairs) for pair_phil in pairs: phil_str += pair_phil if prefix is not None: result = "%s {\n%s}" % (prefix, phil_str) else: result = phil_str return result
def get_kin_lots(chain, bond_hash, i_seq_name_hash, pdbID=None, index=0, show_hydrogen=True): mc_atoms = ["N", "CA", "C", "O", "OXT", "P", "OP1", "OP2", "OP3", "O5'", "C5'", "C4'", "O4'", "C1'", "C3'", "O3'", "C2'", "O2'"] mc_veclist = "" sc_veclist = "" mc_h_veclist = "" sc_h_veclist = "" ca_trace = "" virtual_bb = "" water_list = "" ion_list = "" kin_out = "" color = get_chain_color(index) mc_veclist = "@vectorlist {mc} color= %s master= {mainchain}\n" % color sc_veclist = "@vectorlist {sc} color= cyan master= {sidechain}\n" ca_trace = "@vectorlist {Calphas} color= %s master= {Calphas}\n" % color virtual_bb = "@vectorlist {Virtual BB} color= %s off master= {Virtual BB}\n" % color water_list = "@balllist {water O} color= peachtint radius= 0.15 master= {water}\n" hets = "@vectorlist {het} color= pink master= {hets}\n" het_h = "@vectorlist {ht H} color= gray nobutton master= {hets} master= {H's}\n" if show_hydrogen: mc_h_veclist = \ "@vectorlist {mc H} color= gray nobutton master= {mainchain} master= {H's}\n" sc_h_veclist = \ "@vectorlist {sc H} color= gray nobutton master= {sidechain} master= {H's}\n" prev_resid = None cur_resid = None prev_C_xyz = {} prev_C_key = {} prev_CA_xyz = {} prev_CA_key = {} prev_O3_xyz = {} prev_O3_key = {} p_hash_key = {} p_hash_xyz = {} c1_hash_key = {} c1_hash_xyz = {} c4_hash_key = {} c4_hash_xyz = {} drawn_bonds = [] for residue_group in chain.residue_groups(): altloc_hash = {} iseq_altloc = {} cur_C_xyz = {} cur_C_key = {} cur_CA_xyz = {} cur_CA_key = {} cur_O3_xyz = {} cur_O3_key = {} for atom_group in residue_group.atom_groups(): altloc = atom_group.altloc for atom in atom_group.atoms(): if altloc_hash.get(atom.name.strip()) is None: altloc_hash[atom.name.strip()] = [] altloc_hash[atom.name.strip()].append(altloc) iseq_altloc[atom.i_seq] = altloc cur_resid = residue_group.resid() for conformer in residue_group.conformers(): for residue in conformer.residues(): cur_resid = residue.resid() key_hash = {} xyz_hash = {} het_hash = {} altloc = conformer.altloc if altloc == '': altloc = ' ' for atom in residue.atoms(): cur_altlocs = altloc_hash.get(atom.name.strip()) if cur_altlocs == ['']: cur_altloc = ' ' elif altloc in cur_altlocs: cur_altloc = altloc else: # TO_DO: handle branching from altlocs cur_altloc == ' ' key = "%s%s%s %s%s B%.2f %s" % ( atom.name.lower(), cur_altloc.lower(), residue.resname.lower(), chain.id, residue_group.resid(), atom.b, pdbID) key_hash[atom.name.strip()] = key xyz_hash[atom.name.strip()] = atom.xyz if(common_residue_names_get_class(residue.resname) == "common_amino_acid"): if atom.name == ' C ': cur_C_xyz[altloc] = atom.xyz cur_C_key[altloc] = key if atom.name == ' CA ': cur_CA_xyz[altloc] = atom.xyz cur_CA_key[altloc] = key if len(prev_CA_key) > 0 and len(prev_CA_xyz) > 0: if int(residue_group.resseq_as_int()) - int(prev_resid[0:4]) == 1: try: prev_key = prev_CA_key.get(altloc) prev_xyz = prev_CA_xyz.get(altloc) if prev_key is None: prev_key = prev_CA_key.get(' ') prev_xyz = prev_CA_xyz.get(' ') if prev_key is None: continue ca_trace += kin_vec(prev_key, prev_xyz, key, atom.xyz) except Exception: pass if atom.name == ' N ': if len(prev_C_key) > 0 and len(prev_C_xyz) > 0: if int(residue_group.resseq_as_int()) - int(prev_resid[0:4]) == 1: try: prev_key = prev_C_key.get(altloc) prev_xyz = prev_C_xyz.get(altloc) if prev_key is None: prev_key = prev_C_key.get(' ') prev_xyz = prev_C_xyz.get(' ') if prev_key is None: continue mc_veclist += kin_vec(prev_key, prev_xyz, key, atom.xyz) except Exception: pass elif(common_residue_names_get_class(residue.resname) == "common_rna_dna"): if atom.name == " O3'": cur_O3_xyz[altloc] = atom.xyz cur_O3_key[altloc] = key elif atom.name == ' P ': if len(prev_O3_key) > 0 and len(prev_O3_xyz) > 0: if int(residue_group.resseq_as_int()) - int(prev_resid[0:4]) == 1: try: prev_key = prev_O3_key.get(altloc) prev_xyz = prev_O3_xyz.get(altloc) if prev_key is None: prev_key = prev_O3_key.get(' ') prev_xyz = prev_O3_xyz.get(' ') if prev_key is None: continue mc_veclist += kin_vec(prev_key, prev_xyz, key, atom.xyz) except Exception: pass p_hash_key[residue_group.resseq_as_int()] = key p_hash_xyz[residue_group.resseq_as_int()] = atom.xyz elif atom.name == " C1'": c1_hash_key[residue_group.resseq_as_int()] = key c1_hash_xyz[residue_group.resseq_as_int()] = atom.xyz elif atom.name == " C4'": c4_hash_key[residue_group.resseq_as_int()] = key c4_hash_xyz[residue_group.resseq_as_int()] = atom.xyz elif(common_residue_names_get_class(residue.resname) == "common_element"): ion_list += "{%s} %.3f %.3f %.3f\n" % ( key, atom.xyz[0], atom.xyz[1], atom.xyz[2]) elif( (common_residue_names_get_class(residue.resname) == "other") and (len(residue.atoms())==1) ): ion_list += "{%s} %.3f %.3f %.3f\n" % ( key, atom.xyz[0], atom.xyz[1], atom.xyz[2]) elif residue.resname.lower() == 'hoh': if atom.name == ' O ': water_list += "{%s} P %.3f %.3f %.3f\n" % ( key, atom.xyz[0], atom.xyz[1], atom.xyz[2]) else: het_hash[atom.name.strip()] = [key, atom.xyz] if(common_residue_names_get_class(residue.resname) == "common_rna_dna"): try: virtual_bb += kin_vec(c4_hash_key[residue_group.resseq_as_int()-1], c4_hash_xyz[residue_group.resseq_as_int()-1], p_hash_key[residue_group.resseq_as_int()], p_hash_xyz[residue_group.resseq_as_int()]) except Exception: pass try: virtual_bb += kin_vec(p_hash_key[residue_group.resseq_as_int()], p_hash_xyz[residue_group.resseq_as_int()], c4_hash_key[residue_group.resseq_as_int()], c4_hash_xyz[residue_group.resseq_as_int()]) except Exception: pass try: virtual_bb += kin_vec(c4_hash_key[residue_group.resseq_as_int()], c4_hash_xyz[residue_group.resseq_as_int()], c1_hash_key[residue_group.resseq_as_int()], c1_hash_xyz[residue_group.resseq_as_int()]) except Exception: pass cur_i_seqs = [] for atom in residue.atoms(): cur_i_seqs.append(atom.i_seq) for atom in residue.atoms(): try: cur_bonds = bond_hash[atom.i_seq] except Exception: continue for bond in cur_bonds: atom_1 = i_seq_name_hash.get(atom.i_seq) if atom_1 is not None: atom_1 = atom_1[0:4].strip() atom_2 = i_seq_name_hash.get(bond) if atom_2 is not None: atom_2 = atom_2[0:4].strip() if atom_1 is None or atom_2 is None: continue # handle altlocs ######## if (key_hash.get(atom_1) == None) or \ (key_hash.get(atom_2) == None): continue drawn_key = key_hash[atom_1]+key_hash[atom_2] if drawn_key in drawn_bonds: continue altloc_2 = iseq_altloc.get(bond) if altloc_2 != altloc and altloc_2 != '': continue ######################### if (common_residue_names_get_class(residue.resname) == 'other' or \ common_residue_names_get_class(residue.resname) == 'common_small_molecule'): if atom_1.startswith('H') or atom_2.startswith('H') or \ atom_1.startswith('D') or atom_2.startswith('D'): if show_hydrogen: try: het_h += kin_vec(het_hash[atom_1][0], het_hash[atom_1][1], het_hash[atom_2][0], het_hash[atom_2][1]) except Exception: pass else: try: hets += kin_vec(het_hash[atom_1][0], het_hash[atom_1][1], het_hash[atom_2][0], het_hash[atom_2][1]) except Exception: pass elif common_residue_names_get_class(residue.resname) == "common_amino_acid" or \ common_residue_names_get_class(residue.resname) == "common_rna_dna": if atom_1 in mc_atoms and atom_2 in mc_atoms: try: if atom_1 == "C" and atom_2 == "N": pass elif atom_1 == "O3'" and atom_2 == "P": pass else: mc_veclist += kin_vec(key_hash[atom_1], xyz_hash[atom_1], key_hash[atom_2], xyz_hash[atom_2]) except Exception: pass elif atom_1.startswith('H') or atom_2.startswith('H') or \ atom_1.startswith('D') or atom_2.startswith('D'): if show_hydrogen: if (atom_1 in mc_atoms or atom_2 in mc_atoms): try: mc_h_veclist += kin_vec(key_hash[atom_1], xyz_hash[atom_1], key_hash[atom_2], xyz_hash[atom_2]) except Exception: pass else: try: sc_h_veclist += kin_vec(key_hash[atom_1], xyz_hash[atom_1], key_hash[atom_2], xyz_hash[atom_2]) except Exception: pass else: try: sc_veclist += kin_vec(key_hash[atom_1], xyz_hash[atom_1], key_hash[atom_2], xyz_hash[atom_2]) except Exception: pass drawn_bonds.append(drawn_key) prev_CA_xyz = cur_CA_xyz prev_CA_key = cur_CA_key prev_C_xyz = cur_C_xyz prev_C_key = cur_C_key prev_resid = cur_resid prev_O3_key = cur_O3_key prev_O3_xyz = cur_O3_xyz ion_kin = None if len(ion_list) > 1: ion_kin = get_ions(ion_list) #clean up empty lists: if len(mc_veclist.splitlines()) > 1: kin_out += mc_veclist if len(mc_h_veclist.splitlines()) > 1: kin_out += mc_h_veclist if len(ca_trace.splitlines()) > 1: kin_out += ca_trace if len(sc_veclist.splitlines()) > 1: kin_out += sc_veclist if len(sc_h_veclist.splitlines()) > 1: kin_out += sc_h_veclist if len(water_list.splitlines()) > 1: kin_out += water_list if len(virtual_bb.splitlines()) > 1: kin_out += virtual_bb if len(hets.splitlines()) > 1: kin_out += hets if ion_kin is not None: kin_out += ion_kin if len(het_h.splitlines()) > 1: kin_out += het_h return kin_out
def get_phil_base_pairs(pdb_hierarchy, nonbonded_proxies, prefix=None, params=None, log=sys.stdout, add_segid=None, verbose=-1): hbond_distance_cutoff = 3.4 if params is not None: hbond_distance_cutoff = params.hbond_distance_cutoff hbonds = [] result = "" atoms = pdb_hierarchy.atoms() sites_cart = atoms.extract_xyz() get_sorted_result = nonbonded_proxies.get_sorted( by_value="delta", sites_cart=sites_cart) if get_sorted_result is None: return result sorted_nonb, n_not_shown = get_sorted_result # Get potential hbonds n_nonb = len(sorted_nonb) i = 0 while i < n_nonb and sorted_nonb[i][3] < hbond_distance_cutoff: (labels, i_seq, j_seq, dist, vdw_distance, sym_op_j, rt_mx) = sorted_nonb[i] a1 = atoms[i_seq] ag1 = a1.parent() a2 = atoms[j_seq] ag2 = a2.parent() if (common_residue_names_get_class(ag1.resname, consider_ccp4_mon_lib_rna_dna=True) in \ ["common_rna_dna", "ccp4_mon_lib_rna_dna"] and common_residue_names_get_class(ag2.resname, consider_ccp4_mon_lib_rna_dna=True) in \ ["common_rna_dna", "ccp4_mon_lib_rna_dna"] and (a1.element in ["N", "O"] and a2.element in ["N", "O"]) and a1.name.find("P") < 0 and a2.name.find("P") < 0 and a1.name.find("'") < 0 and a2.name.find("'") < 0 and not consecutive_residues(a1, a2) and (ag1.altloc.strip() == ag2.altloc.strip()) and final_link_direction_check(a1, a2)): hbonds.append((i_seq, j_seq)) i += 1 # check and define basepairs pairs = [] for hb in hbonds: if verbose > 1: print >> log, "Making pair with", atoms[hb[0]].id_str(), atoms[hb[1]].id_str() new_hbonds, class_number = get_h_bonds_for_basepair( atoms[hb[0]], atoms[hb[1]], distance_cutoff=hbond_distance_cutoff, log=log, verbose=verbose) if verbose > 1: print >> log, " Picked class: %d, number of h-bonds under cutoff:%d" % (class_number, len(new_hbonds)), if len(new_hbonds) > 1: p = make_phil_base_pair_record(atoms[hb[0]].parent(), atoms[hb[1]].parent(), params, saenger_class=class_number, add_segid=add_segid) if verbose > 1: print >> log, " OK" pairs.append(p) else: if verbose > 0: s = " ".join(["Basepairing for residues '%s' and '%s'" % ( atoms[hb[0]].id_str()[10:-1], atoms[hb[1]].id_str()[10:-1]), "was rejected because only 1 h-bond was found"]) if verbose > 1: print >> log, "Rejected" phil_str = "" # print "N basepairs:", len(pairs) for pair_phil in pairs: phil_str += pair_phil if prefix is not None: result = "%s {\n%s}" % (prefix, phil_str) else: result = phil_str return result
def filter_before_build ( pdb_hierarchy, fmodel, geometry_restraints_manager, selection=None, params=None, verbose=True, log=sys.stdout) : """ Pick residues suitable for building alternate conformations - by default, this means no MolProbity/geometry outliers, good fit to map, no missing atoms, and no pre-existing alternates, but with significant difference density nearby. """ from mmtbx.validation import molprobity from mmtbx.rotamer import rotamer_eval import mmtbx.monomer_library.server from mmtbx import building from iotbx.pdb import common_residue_names_get_class from scitbx.array_family import flex if (selection is None) : selection = flex.bool(fmodel.xray_structure.scatterers().size(), True) pdb_atoms = pdb_hierarchy.atoms() assert (pdb_atoms.size() == fmodel.xray_structure.scatterers().size()) pdb_atoms.reset_i_seq() full_validation = molprobity.molprobity( pdb_hierarchy=pdb_hierarchy, fmodel=fmodel, geometry_restraints_manager=geometry_restraints_manager, outliers_only=False, rotamer_library="8000") if (verbose) : full_validation.show(out=log) multi_criterion = full_validation.as_multi_criterion_view() if (params is None) : params = libtbx.phil.parse(filter_params_str).extract() mon_lib_srv = mmtbx.monomer_library.server.server() two_fofc_map, fofc_map = building.get_difference_maps(fmodel=fmodel) residues = [] filters = params.discard_outliers make_sub_header("Identifying candidates for building", out=log) # TODO parallelize for chain in pdb_hierarchy.only_model().chains() : if (not chain.is_protein()) : continue for residue_group in chain.residue_groups() : atom_groups = residue_group.atom_groups() id_str = residue_group.id_str() i_seqs = residue_group.atoms().extract_i_seq() residue_sel = selection.select(i_seqs) if (not residue_sel.all_eq(True)) : continue if (len(atom_groups) > 1) : print >> log, " %s is already multi-conformer" % id_str continue atom_group = atom_groups[0] res_class = common_residue_names_get_class(atom_group.resname) if (res_class != "common_amino_acid") : print >> log, " %s: non-standard residue" % id_str continue missing_atoms = rotamer_eval.eval_residue_completeness( residue=atom_group, mon_lib_srv=mon_lib_srv, ignore_hydrogens=True) if (len(missing_atoms) > 0) : # residues modeled as pseudo-ALA are allowed by default; partially # missing sidechains are more problematic if ((building.is_stub_residue(atom_group)) and (not params.ignore_stub_residues)) : pass else : print >> log, " %s: missing or incomplete sidechain" % \ (id_str, len(missing_atoms)) continue validation = multi_criterion.get_residue_group_data(residue_group) is_outlier = is_validation_outlier(validation, params) if (is_outlier) : print >> log, " %s" % str(validation) continue if (params.use_difference_map) : i_seqs_no_hd = building.get_non_hydrogen_atom_indices(residue_group) map_stats = building.local_density_quality( fofc_map=fofc_map, two_fofc_map=two_fofc_map, atom_selection=i_seqs_no_hd, xray_structure=fmodel.xray_structure, radius=params.sampling_radius) if ((map_stats.number_of_atoms_below_fofc_map_level() == 0) and (map_stats.fraction_of_nearby_grid_points_above_cutoff()==0)) : if (verbose) : print >> log, " no difference density for %s" % id_str continue residues.append(residue_group.only_atom_group()) if (len(residues) == 0) : raise Sorry("No residues passed the filtering criteria.") print >> log, "" print >> log, "Alternate conformations will be tried for %d residue(s):" % \ len(residues) building.show_chain_resseq_ranges(residues, out=log, prefix=" ") print >> log, "" return residues
def extend_protein_model(pdb_hierarchy, selection=None, hydrogens=Auto, max_atoms_missing=None, log=None, modify_segids=True, prefilter_callback=None, idealized_residue_dict=None, skip_non_protein_chains=True): """ Replace all sidechains with missing non-hydrogen atoms in a PDB hierarchy. """ from mmtbx.monomer_library import idealized_aa from mmtbx.rotamer import rotamer_eval import mmtbx.monomer_library.server from iotbx.pdb import common_residue_names_get_class from scitbx.array_family import flex if (prefilter_callback is not None): assert hasattr(prefilter_callback, "__call__") else: prefilter_callback = lambda r: True ideal_dict = idealized_residue_dict if (ideal_dict is None): ideal_dict = idealized_aa.residue_dict() if (log is None): log = null_out() mon_lib_srv = mmtbx.monomer_library.server.server() pdb_atoms = pdb_hierarchy.atoms() if (selection is None): selection = flex.bool(pdb_atoms.size(), True) partial_sidechains = [] for chain in pdb_hierarchy.only_model().chains(): if (not chain.is_protein()) and (skip_non_protein_chains): print >> log, " skipping non-protein chain '%s'" % chain.id continue for residue_group in chain.residue_groups(): atom_groups = residue_group.atom_groups() if (len(atom_groups) > 1): print >> log, " %s %s has multiple conformations, skipping" % \ (chain.id, residue_group.resid()) continue residue = atom_groups[0] i_seqs = residue.atoms().extract_i_seq() residue_sel = selection.select(i_seqs) if (not residue_sel.all_eq(True)): continue if (idealized_residue_dict is None): res_class = common_residue_names_get_class(residue.resname) if (res_class != "common_amino_acid"): print >> log, " skipping non-standard residue %s" % residue.resname continue else: key = residue.resname.lower() if (hydrogens == True): key = key + "_h" if (not key in idealized_residue_dict.keys()): pass missing_atoms = rotamer_eval.eval_residue_completeness( residue=residue, mon_lib_srv=mon_lib_srv, ignore_hydrogens=True) if (len(missing_atoms) > 0): print >> log, " missing %d atoms in %s: %s" % (len( missing_atoms), residue.id_str(), ",".join(missing_atoms)) if ((max_atoms_missing is None) or (len(missing_atoms) < max_atoms_missing)): if (prefilter_callback(residue)): partial_sidechains.append(residue) for residue in partial_sidechains: new_residue = extend_residue(residue=residue, ideal_dict=ideal_dict, hydrogens=hydrogens, mon_lib_srv=mon_lib_srv, match_conformation=True) if (modify_segids): for atom in new_residue.atoms(): atom.segid = "XXXX" rg = residue.parent() rg.remove_atom_group(residue) rg.append_atom_group(new_residue.detached_copy()) pdb_hierarchy.atoms().reset_i_seq() pdb_hierarchy.atoms().reset_serial() return len(partial_sidechains)
def extend_protein_model (pdb_hierarchy, selection=None, hydrogens=Auto, max_atoms_missing=None, log=None, modify_segids=True, prefilter_callback=None, idealized_residue_dict=None, skip_non_protein_chains=True) : """ Replace all sidechains with missing non-hydrogen atoms in a PDB hierarchy. """ from mmtbx.monomer_library import idealized_aa from mmtbx.rotamer import rotamer_eval import mmtbx.monomer_library.server from iotbx.pdb import common_residue_names_get_class from scitbx.array_family import flex if (prefilter_callback is not None) : assert hasattr(prefilter_callback, "__call__") else : prefilter_callback = lambda r: True ideal_dict = idealized_residue_dict if (ideal_dict is None) : ideal_dict = idealized_aa.residue_dict() if (log is None) : log = null_out() mon_lib_srv = mmtbx.monomer_library.server.server() pdb_atoms = pdb_hierarchy.atoms() if (selection is None) : selection = flex.bool(pdb_atoms.size(), True) partial_sidechains = [] for chain in pdb_hierarchy.only_model().chains() : if (not chain.is_protein()) and (skip_non_protein_chains) : print >> log, " skipping non-protein chain '%s'" % chain.id continue for residue_group in chain.residue_groups() : atom_groups = residue_group.atom_groups() if (len(atom_groups) > 1) : print >> log, " %s %s has multiple conformations, skipping" % \ (chain.id, residue_group.resid()) continue residue = atom_groups[0] i_seqs = residue.atoms().extract_i_seq() residue_sel = selection.select(i_seqs) if (not residue_sel.all_eq(True)) : continue if (idealized_residue_dict is None) : res_class = common_residue_names_get_class(residue.resname) if (res_class != "common_amino_acid") : print >> log, " skipping non-standard residue %s" % residue.resname continue else : key = residue.resname.lower() if (hydrogens == True) : key = key + "_h" if (not key in idealized_residue_dict.keys()) : pass missing_atoms = rotamer_eval.eval_residue_completeness( residue=residue, mon_lib_srv=mon_lib_srv, ignore_hydrogens=True) if (len(missing_atoms) > 0) : print >> log, " missing %d atoms in %s: %s" % (len(missing_atoms), residue.id_str(), ",".join(missing_atoms)) if ((max_atoms_missing is None) or (len(missing_atoms) < max_atoms_missing)) : if (prefilter_callback(residue)) : partial_sidechains.append(residue) for residue in partial_sidechains : new_residue = extend_residue(residue=residue, ideal_dict=ideal_dict, hydrogens=hydrogens, mon_lib_srv=mon_lib_srv, match_conformation=True) if (modify_segids) : for atom in new_residue.atoms() : atom.segid = "XXXX" rg = residue.parent() rg.remove_atom_group(residue) rg.append_atom_group(new_residue.detached_copy()) pdb_hierarchy.atoms().reset_i_seq() pdb_hierarchy.atoms().reset_serial() return len(partial_sidechains)