def analyze_dihedral(self): """ Deprecated. Please use class Dihedral_Analisys """ angles = list() cov_atm_lig = self.ligand.child_dict[self.ligand_dict['cov_atm']] ##dihedral between CA < CB < SG < ligand angle_1 = math.degrees( bp.calc_dihedral(self.covalent_res.child_dict['CA'].get_vector(), self.covalent_res.child_dict['CB'].get_vector(), self.covalent_atm_res.get_vector(), cov_atm_lig.get_vector())) angles.append(angle_1) ##all dihedral of CB < SG < ligand-covalent-atom < other ligand atoms ns = bp.NeighborSearch(list(self.ligand.get_atom())) neigh = ns.search(cov_atm_lig.get_coord(), 2) neigh = filter(lambda x: x.name != self.ligand_dict['cov_atm'], neigh) # removes the atom itself for i in neigh: ang = math.degrees( bp.calc_dihedral( self.covalent_res.child_dict['CB'].get_vector(), self.covalent_atm_res.get_vector(), cov_atm_lig.get_vector(), i.get_vector())) angles.append(ang) open('/'.join([self.path, DIHEDRAL_OUTPUT]), 'w').write( reduce(lambda x, ang: ' '.join([x, str(ang)]), angles, ''))
def annotate_fallback(chain_list): """ If neither DSSR nor MC-Annotate are available, we use an ad-hoc implementation of canonical basepair detection as fallback. This does not work well for missing atoms or modified residues. """ kdtree = bpdb.NeighborSearch( [atom for chain in chain_list for atom in chain.get_atoms()]) pairs = kdtree.search_all(10, "R") basepairs = {} # Sorted, so conflicting basepairs are deterministically solved for res1, res2 in sorted(pairs): if res1.resname.strip() not in RNA_RESIDUES or res1.id[0].startswith( "H_"): continue if res2.resname.strip() not in RNA_RESIDUES or res2.id[0].startswith( "H_"): continue labels = {res1.resname.strip(), res2.resname.strip()} try: is_bp = is_basepair_pair(res1, res2) if is_bp: res1_id = fgr.resid_from_biopython(res1) res2_id = fgr.resid_from_biopython(res2) if res1_id in basepairs: warnings.warn("More than one basepair detected for {}." " Ignoring {}-{} because {}-{} is already" " part of the structure".format( res1_id, res1_id, res2_id, res1_id, basepairs[res1_id])) continue if res2_id in basepairs: warnings.warn("More than one basepair detected for {}." " Ignoring {}-{} because {}-{} is already" " part of the structure".format( res2_id, res2_id, res1_id, res2_id, basepairs[res2_id])) continue basepairs[res1_id] = res2_id basepairs[res2_id] = res1_id except KeyError as e: log.debug("Missing atom %s. %s has atoms %s, %s has atoms %s", e, res1, res1.child_dict, res2, res2.child_dict) pass seq_ids = [] for chain in sorted(chain_list, key=lambda x: x.id): for residue in chain: seq_ids.append(fgr.resid_from_biopython(residue)) bpseq = "" chain_dict = {c.id: c for c in chain_list} for i, seqid in enumerate(seq_ids): if seqid in basepairs: bp = seq_ids.index(basepairs[seqid]) + 1 else: bp = 0 bpseq += "{} {} {}\n".format( i + 1, chain_dict[seqid.chain][seqid.resid].resname.strip(), bp) return bpseq, seq_ids
def get_atom_neighbors(self, atom, atoms, cov_dist=2): ns = bp.NeighborSearch(atoms) neighbors = ns.search(atom.get_coord(), cov_dist) covalent = [ atm for atm in neighbors if not (atm.name.startswith('H') or atm.name == atom.name) ] #filter Hydrogens & the atom itself return covalent
def is_contact(res_1, other_atoms, cutoff): for atom in res_1: ns = PDB.NeighborSearch(other_atoms) center = atom.get_coord() neighbors = ns.search(center, cutoff) # 5.0 for distance in angstrom residue_list = PDB.Selection.unfold_entities(neighbors, 'R') # R for residues if len(residue_list) > 0: return True return False
def interchain_contacts(struct): all_atoms = bpdb.Selection.unfold_entities(struct, 'A') ns = bpdb.NeighborSearch(all_atoms) pairs = ns.search_all(2.8) ic_pairs = [] for (a1, a2) in pairs: if a1.parent.parent != a2.parent.parent: ic_pairs += [(a1, a2)] return ic_pairs
def noncovalent_distances(chain, cutoff=0.3): ''' Print out the distances between all non-covalently bonded atoms which are closer than cutoff to each other. :param chain: The Bio.PDB chain. :param cutoff: The maximum distance ''' all_atoms = bpdb.Selection.unfold_entities(chain, 'A') ns = bpdb.NeighborSearch(all_atoms) contacts = ns.search_all(cutoff) return [ftuv.magnitude(c[1] - c[0]) for c in contacts if not is_covalent(c)]
def num_noncovalent_clashes(chain): ''' Check if a chain has non-covalent clashes. Non-covalent clashes are found when two atoms that aren't covalently linked are within 1.8 A of each other. :param chain: The chain to evaluate :param return: The number of non-covalent clashes. ''' all_atoms = bpdb.Selection.unfold_entities(chain, 'A') ns = bpdb.NeighborSearch(all_atoms) contacts = ns.search_all(1.9) return len([c for c in contacts if not is_covalent(c)])
def neighbourhood(atoms,radius,show): pairs=set() for r1,r2 in PDB.NeighborSearch(atoms).search_all(radius,'A'): chain_list_1 = PDB.Selection.unfold_entities([r1], 'C') chain_list_2 = PDB.Selection.unfold_entities([r2], 'C') res_list_1 = PDB.Selection.unfold_entities([r1], 'R') res_list_2 = PDB.Selection.unfold_entities([r2], 'R') if chain_list_1[0].get_id() != chain_list_2[0].get_id(): printverbose(sys.stdout,show,(" %s %s (Atom %s) (chain %s) contact with %s %s (Atom %s) (chain %s) \n"% (res_list_1[0].get_resname(),res_list_1[0].get_id()[1],r1.get_id(),chain_list_1[0].get_id(), res_list_2[0].get_resname(),res_list_2[0].get_id()[1],r2.get_id(),chain_list_2[0].get_id()))) pairs.add( ( res_list_1[0], chain_list_1[0].get_id(), res_list_2[0], chain_list_2[0].get_id() ) ) pairs.add( ( res_list_2[0], chain_list_2[0].get_id(), res_list_1[0], chain_list_1[0].get_id() ) ) return pairs
def _find_covalent_atom(self): atoms = list(self.ligand.get_atom())#ligand atoms for res in self.structure.get_residues(): if res.id[0] == ' ':#add all residues atoms atoms = atoms + res.get_list() ns = bp.NeighborSearch(atoms) neighbors = ns.search_all(self.covalent_distance) covalent = [cpl for cpl in neighbors if (cpl[0].parent.id[0] == ' ' and cpl[1].parent.id == self.ligand.id) or (cpl[0].parent.id == self.ligand.id and cpl[1].parent.id[0] == ' ' )] # filter only intermolecule bonds covalent = [cpl for cpl in covalent if not (cpl[0].name.startswith('H') or cpl[1].name.startswith('H'))] #filter Hydrogens cov_cpl = min(covalent, key = lambda cpl: abs(cpl[0] - cpl[1])) ## decouple ligand & receptor covalent atoms self.receptor_cov_atom = [atom for atom in cov_cpl if atom.parent.id[0] == ' '][0] self.ligand_cov_atom = [atom for atom in cov_cpl if atom.parent.id == self.ligand.id][0]
def enumerate_interactions_kdtree(model): relevant_atoms = [ a for a in model.get_atoms() if a.name[0] in ["C", "N", "O"] ] if not relevant_atoms: return set() kdtree = bpdb.NeighborSearch(relevant_atoms) pairs = kdtree.search_all(6, "A") res_pair_list = set() for a1, a2 in pairs: if a1.name not in all_side_chains and a2.name not in all_side_chains: continue p1 = a1.get_parent() p2 = a2.get_parent() if p1.id == p2.id: continue elif p1 < p2: res_pair_list.add((p1, p2)) else: res_pair_list.add((p2, p1)) interacting_residues = set() for res1, res2 in res_pair_list: rna_res = None other_res = None if res1.resname.strip( ) in RNA_RESIDUES and not res1.id[0].startswith("H_"): rna_res = res1 else: other_res = res1 if res2.resname.strip( ) in RNA_RESIDUES and not res2.id[0].startswith("H_"): rna_res = res2 else: other_res = res2 if rna_res is None or other_res is None: continue log.debug("%s(chain %s) and %s(chain %s, resname %s) are close", rna_res, rna_res.parent.id, other_res, other_res.parent.id, other_res.resname.strip()) # Only consider C and N. So no ions etc if any(a.name[0] in ["C", "N"] for a in other_res.get_atoms()): interacting_residues.add(rna_res) else: log.debug("but %s has wrong atoms %s", other_res, list(a.name for a in other_res.get_atoms())) log.debug("Interacting: %s", interacting_residues) return interacting_residues
def are_clashing(chain_one, chain_two, max_clashes=300, contact_distance=1.0): """ Compares the CA atoms of two chains and checks for clashes according to the contact distance. Returns a boolean. """ atoms_one = [atom for atom in chain_one.get_atoms() if atom.get_id() == 'CA'] atoms_two = [atom for atom in chain_two.get_atoms() if atom.get_id() == 'CA'] ns = pdb.NeighborSearch(atoms_one) clashes = 0 for atom_two in atoms_two: for atom in ns.search(atom_two.get_coord(), contact_distance, 'A'): clashes += 1 if clashes == max_clashes: if main.options.verbose: sys.stderr.write("Clash Found!\n") return True return False
def has_clashes(move_atoms, model): """Compares the atoms backbone atoms of the moving chain with the backbone atoms of the model""" backbone = {"CA", "C1\'"} chain_atoms = [atom for atom in move_atoms if atom.id in backbone] # Gets only the backbone atoms model_atoms = [atom for atom in model.get_atoms() if atom.id in backbone] ns = PDB.NeighborSearch( model_atoms ) # Generates a neigbour search tree to speed up distance calculations clashes = 0 for atom in chain_atoms: clashes += bool(ns.search( atom.coord, 2)) # If this atom shows clashes, add 1 to the clashes counter if clashes / len( chain_atoms ) >= 0.03: # If more than 3% of atoms show clashes return yes return True else: # Otherwise return no return False
def clashing_filter(chain_one, chain_two, max_clashes=30, contact_distance=2.0): """ Given a maximum number of clashes and a contact distance, the function checks for clashes in the alpha carbons (CA) of two different chains (chain_one and chain_two). Returns true if the maximum number of clashes is reached, else returns false. """ atoms_one = [atom for atom in chain_one.get_atoms() if atom.get_id() == 'CA' or atom.get_id() =="C5'"] atoms_two = [atom for atom in chain_two.get_atoms() if atom.get_id() == 'CA' or atom.get_id() =="C5'"] ns = pdb.NeighborSearch(atoms_one) clashes = 0 for atom_two in atoms_two: for atom in ns.search(atom_two.get_coord(), contact_distance, 'A'): clashes += 1 if clashes == max_clashes: if main.options.verbose: sys.stderr.write("Clash Found!\n") return True return False
def has_clashes(move_atoms, model, clash_dist): """Compares the backbone atoms of the moving chain with the backbone atoms of the model. Returns a boolean value based on the presence and abundancy of clashes. Keyword arguments: move_atoms -- set of atoms from the chain that is potentially going to be added to the macrocomplex model -- current macrocomplex model in construction clas_dist -- minimum clash distance between 2 atoms. The default minimum is 2 A Considerations: Alpha carbons are used as backbone atoms. Threshold: if 3% of atoms of the chain that is been comparing to the model shows clashes, <True> is returned. Hence, that chain won't be added to the model.""" backbone = {"CA", "C1\'"} chain_atoms = [atom for atom in move_atoms if atom.id in backbone] model_atoms = [atom for atom in model.get_atoms() if atom.id in backbone] ns = PDB.NeighborSearch(model_atoms) clashes = 0 for atom in chain_atoms: clashes += bool(ns.search(atom.coord, float(clash_dist))) if clashes / len(chain_atoms) >= 0.03: return True else: return False
def HydrogenBondFilter(receptor_atoms, molecule,affinity): ligand_id = os.path.basename(molecule).replace(".pdb","") ligand = parser.get_structure(ligand_id, molecule) checklist = [] global targethydrogen, use_angle ### Loop through list of all atoms we know contribute to hydrogen bonds from the protein. At least one is essential for function so poses only pass if they form at least one with the defined targets for target in targets: target_res = target.get_parent() protein_hydrogen = [atom for atom in bp.Selection.unfold_entities(target.get_parent(),"A") if ("H" in atom.get_id()) and target - atom <= 1.1] if protein_hydrogen: use_angle = True targethydrogen = protein_hydrogen[0] else: use_angle = False PosePassInfo = [] ### Grab the hydrogen bond info of each pose of each drug. Unfold the atoms of the drug and the combine it with the atoms of the the receptor. Then Search is performed on close atoms for posenum, pose in enumerate(ligand): pose_atoms = bp.Selection.unfold_entities(pose, "A") all_atoms = receptor_atoms + pose_atoms ns = bp.NeighborSearch(all_atoms) ### Search for atoms that are within X angstrom of defined target. Then check whether found atoms are ligand atoms and if they are capable of hydrogen bond formation close_atoms = ns.search(target.coord, 3.6) Hbond_LigAtoms = [atom for atom in close_atoms if (atom in pose_atoms) and (atom.get_name().translate(None,"0123456789") in Filter_atoms)] ### Check if H-bond is within defined parameters per pose and put that in a list per drug. Poses that pass have their ligand efficiency written, poses that don't have 0.0 Pass = HBondCheck(Hbond_LigAtoms,pose_atoms,posenum,affinity,target) PosePassInfo.append(Pass) ### In case of multiple possible hydrogen bonds a list is kept of poses that have passed at least one of them if checklist: for posenum, entry in enumerate(PosePassInfo): if (entry != 0.0) and (checklist[posenum] == 0.0): checklist[posenum] = entry else: checklist = PosePassInfo return checklist
def process_queue(id): pdir = "/home/kenneth/proj/proMin/proteins/hagai/2018/pdbs" df = dfHag.iloc[id] # print(df) pdbFile = os.path.join(pdir, df.pdb + ".pdb") #Create a PDBParser object parser = PDBParser() #PERMISSIVE = 0 will list all errors with PDB file STR = parser.get_structure(df.pdb, os.path.join(pdbFile)) envComp = "" atoms = STR[0][df.chain].get_atoms() atomID = "" for atom in atoms: # print(atom.get_fullname()) # print(atom.get_fullname().replace(' ','')) if atom.get_name() == df.atomName: resID = atom.get_parent().get_full_id() # print(resID,'blah') if (resID[3][0].find(df.resName) > -1) and (int(resID[3][1]) == df.resNum): atomID = resID[3] if atomID == '': print(df.coord, resID, 'blank') continue chain = STR[0][df.chain] atom = chain[atomID][atom.get_name()] atom_list = PDB.Selection.unfold_entities(STR[0], 'A') ns = PDB.NeighborSearch(atom_list) atoms = ns.search(atom.get_coord(), 2.8, level='A') # print(atoms) envComp = df.coord + "," + df.resName + "," + ",".join( map(str, list(resID))) + "," + get_environment(atoms) # print(envComp) process_queue.q.put(envComp) return envComp
def Collision_Check(model_atom_list, addition_atom_list, radius): """Return a list of tuple containing the residue id interacting and the coordinates in conflict Input: -model = list of atoms take as reference for the collitions -addition = list of atoms being us to check against the model -radius = integrer required for become the empty radious (Amstrongs) around each atom Output: -collisions_list = list of tuples of coordinates where atoms collide """ model_ns = pdb.NeighborSearch(model_atom_list) collisions_list = [] for atom in addition_atom_list: collision_list = model_ns.search(atom.get_coord(), radius, "R") collisions_list.extend([ tuple([ str(atom.get_parent().get_id()[1]), str(x.get_id()[1]), str(atom.get_coord()) ]) for x in collision_list ]) return collisions_list
STR = get_cif_STR(cifPath) try: RES = STR[0][chain][("H_" + LIG, int(resID), " ")] except Exception as e: print(" ", i.Microenvironment_ID, ("H_" + LIG, int(resID), " ")) print(e) continue resName = RES.get_resname() feATOMS = [atom for atom in RES.get_atoms() if "FE" in atom.get_name()] atom_list = PDB.Selection.unfold_entities(STR, "A") ns = PDB.NeighborSearch(atom_list) for atom in feATOMS: atomName = atom.get_name() feAtom = { "ligName": resName, "ligChain": chain, "resID": resID, "ligAtom": atomName, "Metal": i.Metal, "ligCoords": atom.get_coord() } annotations = {attr: getattr(i, attr) for attr in hagaiNames} neighbors = ns.search(atom.get_coord(), 3, level='A') neigh_dict = { i.Microenvironment_ID: {
directory = sys.argv[1] # Folder with the chain pdbs distance = float(sys.argv[2]) directory_out = os.path.join(directory, 'pairs') # Folder where pairs will be saved PDBparser = PDB.PDBParser(QUIET=True) done = [] for pdb_1 in list(filter(lambda x: x.endswith('.pdb'), os.listdir(directory))): for pdb_2 in list(filter(lambda x: x.endswith('.pdb'), os.listdir(directory))): if (pdb_1 == pdb_2): continue # Not to duplicate itself elif not (pdb_1, pdb_2) in done or not (pdb_2, pdb_1) in done: # Not to duplicate pairs done.append((pdb_1, pdb_2)) structure1 = PDBparser.get_structure(pdb_1[:-4], os.path.join(directory,pdb_1)) structure2 = PDBparser.get_structure(pdb_2[:-4], os.path.join(directory,pdb_2)) for chain in structure2.get_chains(): atoms = list(chain.get_atoms()) ns = PDB.NeighborSearch(atoms) # An object to search chains near an atom for target_atom in structure1.get_atoms(): near = ns.search(target_atom.coord, distance) if(near and (pdb_1[-5:-4],pdb_2[-5:-4]) not in done and (pdb_2[-5:-4],pdb_1[-5:-4]) not in done): # If there's an atom near, they interact print('Joining chain {a} and {b}'.format(a=pdb_1[-5:-4], b=pdb_2[-5:-4])) done.append((pdb_1[-5:-4],pdb_2[-5:-4])) io = PDB.PDBIO() structure1[0].child_list += [chain for chain in structure2.get_chains()] # Joins all the chains io.set_structure(structure1) if(not os.path.exists(os.path.join(directory_out))): os.mkdir(os.path.join(directory_out)) p = re.search('^[A-Za-z0-9]+_([A-Za-z0-9]+).pdb$', pdb_1 ).group(1) q = re.search('^[A-Za-z0-9]+_([A-Za-z0-9]+).pdb$', pdb_2 ).group(1) io.save(os.path.join(directory_out,pdb_1[:-5]+p+q+'.pdb')) print(os.path.join(directory,pdb_1[:-4]+'.fa')) if(os.path.exists(os.path.join(directory,pdb_1[:-4]+'.fa'))) and os.path.exists(os.path.join(directory,pdb_2[:-4]+'.fa')):
def process_queue(id): # print(id) # print(c) # print(i) # print("i:", i) # iDict = list_tuples_to_dict(chunks) # print(type(iDict)) # print(iDict.keys()) # iDict = list_to_dict(i) # print(id) # pbar.update(1) series = feDF.loc[feDF['Microenvironment_ID'] == id].to_dict() # print(list(series['Metal'].values())[0]) # annotations = {attr:series[attr] for attr in hagaiNames} pdb, LIG, chain, resID, function = re.split('[._]', id) # # print(pdb,LIG,chain,resID,function,i.Metal) cifPath = os.path.join(pdir, pdb + ".cif") STR = get_cif_STR(cifPath) try: RES = STR[0][chain][("H_" + LIG, int(resID), " ")] except Exception as e: print(" ", series['Microenvironment_ID'], ("H_" + LIG, int(resID), " ")) print(e) # continue resName = RES.get_resname() feATOMS = [atom for atom in RES.get_atoms() if "FE" in atom.get_name()] atom_list = PDB.Selection.unfold_entities(STR[0], "A") ns = PDB.NeighborSearch(atom_list) #return dataframe output = [] for atom in feATOMS: atomName = atom.get_name() feAtom = { "id_hagai": id, "ligName": resName, "ligChain": chain, "resID": resID, "ligAtom": atomName, "Metal": list(series['Metal'].values())[0], "ligX": atom.get_coord()[0], "ligY": atom.get_coord()[1], "ligZ": atom.get_coord()[2] } #,"micro_annotations":annotations} neighbors = ns.search(atom.get_coord(), 3, level='A') # output.append((id,len(neighbors))) c = 0 for neigh in neighbors: if neigh - atom != 0: feAtom["N_" + str(c) + "_resName"] = neigh.get_parent().get_resname() feAtom["N_" + str(c) + "_atomName"] = neigh.get_name() feAtom["N_" + str(c) + "_resNum"] = neigh.get_parent().get_id()[1] feAtom["N_" + str(c) + "_x"] = neigh.get_coord()[0] feAtom["N_" + str(c) + "_y"] = neigh.get_coord()[1] feAtom["N_" + str(c) + "_z"] = neigh.get_coord()[2] feAtom["N_" + str(c) + "_dist"] = neigh - atom feAtom["N_" + str(c) + "_dx"] = neigh.get_coord()[0] - atom.get_coord()[0] feAtom["N_" + str(c) + "_dy"] = neigh.get_coord()[1] - atom.get_coord()[1] feAtom["N_" + str(c) + "_dz"] = neigh.get_coord()[2] - atom.get_coord()[2] count += 1 output.append(feAtom) # neigh_dict = {id:{"feAtom":feAtom,"Microenvironment_annotations":annotations,"Neighbors":{"Neighbor_"+str(idx): {"N_"+str(idx)+"_resName":neigh.get_parent().get_resname(),"N_"+str(idx)+"_atomName":neigh.get_name(),"N_"+str(idx)+"_resNum":neigh.get_parent().get_id()[1],"N_"+str(idx)+"_atomCoord":neigh.get_coord(),"N_"+str(idx)+"_distance":neigh-atom,"N_"+str(idx)+"_dx":neigh.get_coord()[0]-atom.get_coord()[0],"N_"+str(idx)+"_dy":neigh.get_coord()[1]-atom.get_coord()[1],"N_"+str(idx)+"_dz":neigh.get_coord()[2]-atom.get_coord()[2]} for idx, neigh in enumerate(neighbors) if neigh-atom != 0 }}} # print(neigh_dict) process_queue.q.put(output) return output
def is_steric_clash(structure, rotating_chain, distance_for_clash=2.5): """Check if there is a steric clash between a rotating chain and current structure. Return False (no clash), 1 (clash between two different chains) or 2 (same chain). Also returns the ids of the chains in structure that are clashing Keyword arguments: structure -- whole structure rotating_chain -- chain to be analyzed with respect to structure distance_for_clash -- threshold to consider clash between atoms. Default = 2.5 (Armstrongs) Clash criteria: at least 20 of the atoms are at a lower distance than distance_for_clash Same chain criteria: RMSD between them <= 3.0 """ # initialize the neighbor search NS = pdb.NeighborSearch(list(structure.get_atoms())) clashing_chains = set() # the set of clashing chains (in structure) n_clashes = 0 # the number of clashes for at in rotating_chain.get_atoms(): neighbors = NS.search(at.get_coord(), distance_for_clash) if len(neighbors) > 0: for neigh in neighbors: clashing_chains.add(neigh.get_parent().get_parent().id) n_clashes += 1 if len(clashing_chains) > 1 and n_clashes > 20: # a clash against different chains: val_to_return = 1 elif len(clashing_chains) == 1 and n_clashes > 20 and rotating_chain.id[ 1] == list(clashing_chains)[0][1]: # a clash MAYBE because you are trying to superimpose something in the place it was already # define the clashing chain: clash_chain = structure[0][list(clashing_chains)[0]] res_chain1 = list(clash_chain.get_residues()) res_chain2 = list(rotating_chain.get_residues()) # so first we obtain a list of the common residues common_res_s1 = get_list_of_common_res(res_chain1, res_chain2) common_res_s2 = get_list_of_common_res(res_chain2, res_chain1) # then we obtain a list of atom objects to use it later common_atoms_s1 = np.array([ list(x.get_coord()) for x in get_atom_list_from_res_list(common_res_s1) ]) common_atoms_s2 = np.array([ list(x.get_coord()) for x in get_atom_list_from_res_list(common_res_s2) ]) # debug the atoms provided if len(common_atoms_s1) == len(common_atoms_s2): RMSD = rmsd.kabsch_rmsd(common_atoms_s1, common_atoms_s2) else: RMSD = 1000 # like being different if RMSD <= 3.0: # it is the same chain val_to_return = 2 else: # it is another chain or the same with different structure val_to_return = 1 elif n_clashes > 20: # it is ine chain and the previous conditions are not fullfilled val_to_return = 1 else: # no clash val_to_return = False if val_to_return: return val_to_return, clashing_chains else: return val_to_return, None
def count_receptor_contacts(cls, paths, complexname, receptor_chain, ligand_chain, nwindows, dbf, model_db_file, query_dict): """ Count number of paths contacting each receptor """ wd = os.path.dirname(model_db_file) pdb_kwargs = dict(complexname=complexname, receptor_chain=receptor_chain, ligand_chain=ligand_chain, nwindows=nwindows) pdbwindowid = "{complexname}{receptor_chain}{ligand_chain}{nwindows}".format( **pdb_kwargs) outfile = os.path.join(wd, "{0}_path_contacts.pdb".format(pdbwindowid)) path_score_file = os.path.join( wd, "{0}_receptor_occupancy.csv".format(pdbwindowid)) if not shared.missing(outfile) and not shared.missing(path_score_file): logging.debug("%s exists", outfile) return cutoff = 5.0 residue_fmt = "{chain}_{resname}{resid[1]}" def make_key(residue): __, __, chainid, residueid = residue.get_full_id() return residue_fmt.format(chain=chainid, resname=residue.get_resname(), resid=residueid) # Drop window1 (modelid) column orig_window_vars = [ x for x in paths.columns.values.tolist() if x.startswith("window") ] for window_var in orig_window_vars: paths = paths.drop(window_var, axis=1) # Get model filepaths for paths filepaths = cls.get_paths(paths[['pathsid']], dbf=dbf, model_db_file=model_db_file, query_dict=query_dict) window_vars = [ x for x in filepaths.columns.values.tolist() if x.startswith("window") ] get_files = lambda row: [row[w] for w in window_vars] parser = PDB.PDBParser(QUIET=True) # Remove hydrogens get_structure = lambda x: parser.get_structure( os.path.splitext(os.path.basename(x))[0], shared.strip_h(x)) modelid = 0 receptor_contacts = collections.defaultdict(set) for x, row in filepaths.iterrows(): pathsid = row['pathsid'] path_files = get_files(row) for fn in path_files: structure = get_structure(fn) atoms = [ atom for chain in structure[modelid] for residue in chain for atom in residue ] if not atoms: raise PlotPathsError("No atoms in %s" % fn) ns = PDB.NeighborSearch(atoms) search = ns.search_all(radius=cutoff, level="R") for res1, res2 in search: __, __, c1, r1 = res1.get_full_id() __, __, c2, r2 = res2.get_full_id() # Skip if chains are both ligand or both receptor if (c1 == ligand_chain) == (c2 == ligand_chain): continue if c1 in receptor_chain: key = make_key(res1) elif c2 in receptor_chain: key = make_key(res2) else: raise PlotPathsError("Neither %s nor %s is receptor" % (c1, c2)) receptor_contacts[key].add(pathsid) # Convert from defaultdict to normal dict receptor_contacts = dict(receptor_contacts) # Count paths contacting each receptor residue emptyset = set() # Chains have been combined r_ch = receptor_chain[0] # Deliberately using last structure from loop for residue in structure[modelid][r_ch]: key = make_key(residue) mypaths = receptor_contacts.get(key, emptyset) count = len(mypaths) for atom in residue: atom.set_bfactor(count) # Write out structure with b-factor #structure[modelid].detach_child(ligand_chain) #io = PDB.PDBIO() #io.set_structure(structure) #io.save(outfile) # Count receptor contacts for each path path_score_dict = collections.defaultdict(int) for contacts in receptor_contacts.itervalues(): n_contacts = len(contacts) for pathid in contacts: path_score_dict[pathid] += n_contacts path_score_df = pd.DataFrame(path_score_dict.items(), columns=["pathid", "occupancyscore"]) path_score_df.to_csv(path_score_file, index=False)
def __init__(self, protein_pdbpath: str, distance: float = 1.5): parser = PDB.PDBParser(QUIET=True, PERMISSIVE=True) s = parser.get_structure('protein', protein_pdbpath) self.kd = PDB.NeighborSearch(list(s.get_atoms())) self.radius = distance
HSEB = PDB.HSExposureCB(s) HSEB_dict = HSEB.property_dict HSEB_keys = HSEB.property_keys HSEB_list = HSEB.property_list depth = PDB.ResidueDepth(s) dep_dict = depth.property_dict dep_keys = depth.property_keys dep_list = depth.property_list dssp = PDB.DSSP(s, "3skpFH.pdb") dssp_dict = dssp.property_dict nb_dict = {} nb = PDB.NeighborSearch(ca_list) for a in ca_list: t = nb.search(a.get_coord(), 8) aa = a.get_parent() aa_id = (aa.get_parent().get_id(), aa.get_id()) nb_dict[aa_id] = t dic = {} dic["res_id"] = [] for a in aa_list: dic["res_id"].append(a.get_id()) dic["res_name"] = [] for a in aa_list: dic["res_name"].append(a.get_resname())
def calculate_sphere_variance(structure, chain, md_df, mapping): """ Calculates the sphere variance and returns the corresponding statistic Works on a single seq/structure :param structure: Bio.PDB structure object :param chain: Chain ID as str :param md_df: Dataframe containing sensitivity (masked_dump file) :param mapping: Dict mapping sensitivity coordinates to PDB coordinates :return: A data frame containing the statistics per GO """ # get the sequence: seq_len = len(md_df['AA'][1:].tolist()) # get the list of gos: gos = {c.split('_')[0] for c in md_df.columns if c.startswith('GO:')} allowed = set(mapping.values()) # get the residiues: residues = structure[0][chain] sphere_variances_per_go = {go: [] for go in gos} mean_variances = {go: 0. for go in gos} mean_fakes = {go: np.zeros(bs) for go in gos} p_vals = {} emp_vals = {} bs_mean = {} bs_median = {} # for residue in protein n_clean = 0 for seq_idx in range(-1, seq_len): if seq_idx == -1: # set all to nan: for go in gos: sphere_variances_per_go[go].append(np.nan) elif seq_idx not in mapping: for go in gos: sphere_variances_per_go[go].append(np.nan) continue else: try: res = residues[mapping[seq_idx]] ca = res['CA'] except: for go in gos: sphere_variances_per_go[go].append(np.nan) continue # get the neighbors: glob_resseq = mapping[seq_idx][1] center = ca.get_coord() search = pdb.NeighborSearch(atom_list=list(structure.get_atoms())) neighbors = search.search(center=center, radius=radius, level="R") n_clean_neighbors = 0 imps = {x: [] for x in gos} fakes = {x: [] for x in gos} for n in neighbors: het_atm, resseq, _ = n.id if het_atm.strip() == '' and resseq >= 0: if abs(resseq - glob_resseq) >= exclude and n.id in allowed: for go in gos: try: imps[go].append(n.sensitivity[go]) fakes[go].append(n.fake[go]) n_clean_neighbors += 1 except AttributeError: break if n_clean_neighbors == 0: for go in gos: sphere_variances_per_go[go].append(np.nan) continue for go in gos: current_var = np.nanvar(imps[go]) current_fake = np.nanvar(fakes[go], axis=0) sphere_variances_per_go[go].append(current_var) mean_variances[go] += np.nan_to_num(current_var) mean_fakes[go] += np.nan_to_num(current_fake) n_clean += 1 # normalize the variances: if n_clean > 1: for go in gos: mean_variances[go] /= n_clean mean_fakes[go] /= n_clean # do the test _, p_vals[go] = stats.ttest_1samp(mean_fakes[go], popmean=mean_variances[go], nan_policy="omit") emp_vals[go] = ( 1 + np.sum(mean_fakes[go] <= mean_variances[go])) / (1 + bs) bs_mean[go] = np.nanmean(mean_fakes[go]) bs_median[go] = np.nanmedian(mean_fakes[go]) # now convert the sphere vars to df and append: # sphere_variances_df = pd.DataFrame.from_dict(sphere_variances_per_go, orient='columns') # sphere_variances_df.columns = ['%s_%s_svar' % (c, sens_to_use) for c in sphere_variances_per_go] # as the keys were go terms # md_df_svar = pd.concat([md_df, sphere_variances_df], axis=1) record = { "mean_variances": mean_variances, "p_vals": p_vals, "emp_vals": emp_vals, "bs_mean": bs_mean, "bs_median": bs_median } df_svar = pd.DataFrame.from_records(record) return df_svar
def build_complex(threshold, distance, stoichiometry, sequences, structures, verbose, initial): ''' This is the core function of the program. Takes a list of structures and tries to join them in a single model, taking into account diferent parameters, such as an intial model, a threshold for the pairwise alignments and the distance to consider that two chains are in the same position and it must be discarded (different atoms cannot occupy the same space) ''' # Initiate empty structure and check some errors seqs = get_fastas_from_structs(structures, sequences) # Initializing an empty structure to save the model full_structure = PDB.Structure.Structure('full') full_structure.add(PDB.Model.Model('model')) # We will need to assign ids. For now is limited to up to 64 different chains, but more single characters can be added ids = _chain_id() iters = 0 pair_num = len(structures) # Prepare a dict that saves how many chains of every one in the stoichiometry has the model in construction if(stoichiometry): current_number_of_chains = {chain_id:0 for chain_id in set(stoichiometry)} # We need at least 2 pdbs with an interaction. Can be the same one repeated. This is done to avoid trouble if(pair_num < 2): raise ValueError('Needed at least 2 pdbs to superpose') failed = [] current = 0 done = [] # If an initial structure was given, introduce it into the model if(initial): if verbose: print('Initializing complex') # Obtaining the sequences of the chains init_seqs = get_fastas_from_structs([initial], sequences) seqs[full_structure.id] = {} # For each chain, add it to the model with sequential ids: A->B-> etc for idx, chain in enumerate(initial.get_chains()): chain_id = next(ids) seqs[full_structure.id][chain_id] = init_seqs[initial.id][chain.id] chain_new = PDB.Chain.Chain(chain_id) chain_new.child_list = list(chain.get_residues()) model = next(full_structure.get_models()) # If stoichiometry is selected, then count which sequences will be added if(stoichiometry): chain_sequence = _get_chain_sequence(chain_new) for seq in sequences: alignment = pairwise2.align.globalxx(chain_sequence, seq.seq, one_alignment_only=True) # This chain corresponds to this stoichiometry chain if(alignment[0][2]/len(alignment[0][0])> threshold and current_number_of_chains[seq.id] < stoichiometry[seq.id]): model.child_list.append(chain_new) current_number_of_chains[seq.id] += 1 if(stoichiometry): count = 0 for chain in current_number_of_chains: if current_number_of_chains[chain] == stoichiometry[chain]: count += 1 if count == len(stoichiometry): print('Stoichiometry fullfilled!') return full_structure model.child_list.append(chain_new) done.append(initial.id) if verbose: print('Starting to build') # The main loop of the function while True: if verbose: print('Loop #%i' % current) print('Current number of chains: %i' % len(list(full_structure.get_chains()))) # All the structures where added correctly (should only be for non stoichiometric uses) if(not stoichiometry and len(done) == len(structures)): break structure = structures[current % len(structures)] if(structure.id in done): current += 1 continue # If is already in, continue to the next one count = list(failed.count(element) for element in failed) # If failed has an element twice, that means that no more available chains could be added # Therefor, it will start an endless loop, as the structure remain equal no matter which # of the left structures is trying to be added. We stop it here if 3 in count: # We are repeating structures if verbose: print('Some pdbs could not be joined.\nAnd endless loop started and no more chains could be added\nFinishing the model in the current state') break if len(list(full_structure.get_chains())) == 0: if verbose: print('Initializing complex') seqs[full_structure.id] = {} for idx, chain in enumerate(structure.get_chains()): chain_id = next(ids) # Selecting the next id available seqs[full_structure.id][chain_id] = seqs[structure.id][chain.id] # Assigning the new chains its sequence # Empty new chain with the new id chain_new = PDB.Chain.Chain(chain_id) # Adding the residues to the new chain and adding the chain to the new model chain_new.child_list = list(chain.get_residues()) model = next(full_structure.get_models()) # Check stoichiometry if needed if(stoichiometry): # Need the exact sequence to align chain_sequence = _get_chain_sequence(chain_new) # Align with every sequence in the fasta to know which sequence is for seq in sequences: alignment = pairwise2.align.globalxx(chain_sequence, seq.seq, one_alignment_only=True) try: current_number_of_chains[seq.id] except KeyError: # The sequence chain is not in the stoichiometry raise errors.chain_in_stoic_not_in_fasta(seq.id) # If homologous and does not surpass the stoichiometry, add it if(alignment[0][2]/len(alignment[0][0])> threshold and current_number_of_chains[seq.id] < stoichiometry[seq.id]): model.child_list.append(chain_new) current_number_of_chains[seq.id] += 1 count = 0 for chain in current_number_of_chains: if current_number_of_chains[chain] == stoichiometry[chain]: count += 1 if count == len(stoichiometry): print('Stoichiometry fullfilled!') return full_structure model.child_list.append(chain_new) done.append(structure.id) current += 1 continue # The Enseble class keeps code ordered and abstracts the alignment # and superimposition of the code pair = Ensemble(full_structure, structure) pair_seqs = (seqs[full_structure.id], seqs[structure.id]) alignment = pair.get_best_alignment(pair_seqs) # If score is > 0.95, they should be homologs for align in alignment: if(align[1] > threshold): #try: atoms_of_chains = pair.superimpose(align[0][0], align[0][1]) #except PDB.PDBExceptions.PDBException: # current += 1 # continue iters += 1 neighbor = PDB.NeighborSearch(list(full_structure.get_atoms())) clashes = 0 for chain in atoms_of_chains: for atom in atoms_of_chains: for atom in chain.get_atoms(): close_atoms = neighbor.search(atom.get_coord(), distance) # If there are atoms within 2 angstroms, consider a clash if len(close_atoms) > 0: clashes += 1 if (clashes < 10 and pair.rms < 0.05): # The chain can be added, so let's add it for chain in atoms_of_chains: chain_id = next(ids) # Chosing an id chain2 = PDB.Chain.Chain(chain_id) # An empty chain which will add the residues to join chain2.child_list += list(chain.get_residues()) model = next(full_structure.get_models()) if(stoichiometry): # Taking the stoichiometry into account chain_sequence = _get_chain_sequence(chain) for seq in sequences: alignment = pairwise2.align.globalxx(chain_sequence, seq.seq, one_alignment_only=True) # This chain corresponds to this stoichiometry chain if(alignment[0][2]/len(alignment[0][0])> threshold and current_number_of_chains[seq.id] < stoichiometry[seq.id]): model.child_list.append(chain2) current_number_of_chains[seq.id] += 1 count = 0 for chain in current_number_of_chains: if current_number_of_chains[chain] == stoichiometry[chain]: count += 1 if verbose: print('Current number of chains: %i' % sum(current_number_of_chains.values())) if count == len(stoichiometry): if verbose: print('Stoichiometry fullfilled!') return full_structure else: model.child_list.append(chain2) if verbose and stoichiometry: print('Current number of chains: %i' % sum(current_number_of_chains.values())) done.append(structure.id) failed = [] break else: failed.append(structure.id) # Pdbs clash current += 1 return full_structure