def searchNeighborAtom(substruct_parsed, lig_query_parsed, struct_type, log_file, thresold_superimposed_ribose=2.5, thresold_superimposed_pi=3): l_atom_substituate = [] if struct_type == "ribose": for atom_substruct in substruct_parsed: for atom_query in lig_query_parsed: if parsePDB.distanceTwoatoms( atom_substruct, atom_query) <= thresold_superimposed_ribose: out = copy(atom_query) if not out in l_atom_substituate: l_atom_substituate.append(out) else: l_atom_interest = retrievePi(substruct_parsed) for atom_interest in l_atom_interest: for atom_query in lig_query_parsed: if parsePDB.distanceTwoatoms( atom_interest, atom_query) <= thresold_superimposed_pi: out = copy(atom_query) if not out in l_atom_substituate: l_atom_substituate.append(out) # control out empty if l_atom_substituate == []: log_file.write("[Not substituate] -> " + substruct_parsed[0]["resName"] + struct_type + "\n") return [] else: return l_atom_substituate
def searchNeighborAtom(substruct_parsed, lig_query_parsed, struct_type, log_file, thresold_superimposed_ribose = 2.5, thresold_superimposed_pi = 3 ) : l_atom_substituate = [] if struct_type == "ribose" : for atom_substruct in substruct_parsed : for atom_query in lig_query_parsed : if parsePDB.distanceTwoatoms(atom_substruct, atom_query) <= thresold_superimposed_ribose : out = copy(atom_query) if not out in l_atom_substituate : l_atom_substituate.append (out) else : l_atom_interest = retrievePi (substruct_parsed) for atom_interest in l_atom_interest : for atom_query in lig_query_parsed : if parsePDB.distanceTwoatoms(atom_interest, atom_query) <= thresold_superimposed_pi : out = copy(atom_query) if not out in l_atom_substituate : l_atom_substituate.append (out) # control out empty if l_atom_substituate == [] : log_file.write ("[Not substituate] -> " + substruct_parsed[0] ["resName"]+ struct_type + "\n") return [] else : return l_atom_substituate
def RMSDTwoList (l_atom1, l_atom2) : nb_ca = 0.0 d_max = {"value": 0.0} diff_position_all = 0.0 diff_position_ca = 0.0 if len (l_atom1) != len (l_atom2) or len (l_atom2) == 0 : print "ERROR - RMSD: list length different or null" return [] else : i = 0 while i < len (l_atom1): if l_atom1[i]["name"] != l_atom2[i]["name"] and l_atom1[i]["resName"] != l_atom2[i]["resName"]: print l_atom1[i]["name"] , l_atom2[i]["name"] print "ERROR" return [] else : d_atom = parsePDB.distanceTwoatoms(l_atom1[i], l_atom2[i]) diff_position_all = diff_position_all + d_atom if l_atom1[i]["name"] == "CA" : diff_position_ca = diff_position_ca + d_atom nb_ca = nb_ca + 1 if d_atom > d_max["value"] : d_max["value"] = d_atom d_max["atom"] = l_atom1[i]["name"] + "-" + l_atom2[i]["name"] + "_" + l_atom1[i]["resName"] + "-" + l_atom2[i]["resName"] i = i + 1 # print d_max return [sqrt(diff_position_all / len (l_atom1)), sqrt (diff_position_ca / nb_ca), d_max["value"], len (l_atom1)]
def retrieveSubstructSuperimposed (name_lig, thresold_BS = 4.5, thresold_superimposed_ribose = 2.5, thresold_superimposed_pi = 3, thresold_shaep = 0.4): # ouput p_dir_dataset = pathManage.dataset(name_lig) p_dir_result = pathManage.result(name_lig ) l_folder_ref = listdir(p_dir_dataset) # log control p_log = open(p_dir_result + "log_superimposed.txt", "w") # control extraction d_control = {} d_control["pr ref"] = 0 d_control["lig query"] = 0 d_control["subref"] = {} d_control["subref empty"] = {} d_control["out sheap"] = {} filout_control = open (p_dir_result + "quality_extraction.txt", "w") # stock smile code d_smile = {} # sheap control d_filout_sheap = {} d_filout_sheap ["list"] = [p_dir_result + "shaep_global.txt"] d_filout_sheap["global"] = open (p_dir_result + "shaep_global.txt", "w") d_filout_sheap["global"].write ("name\tbest_similarity\tshape_similarity\tESP_similarity\n") for ref_folder in l_folder_ref : # control folder reference name if len (ref_folder) != 4 : p_log.write ("[ERROR folder] -> " + ref_folder + "\n") continue # reference p_lig_ref = pathManage.findligandRef(p_dir_dataset + ref_folder + "/", name_lig) try: lig_ref_parsed = parsePDB.loadCoordSectionPDB(p_lig_ref, "HETATM") # print len (lig_ref_parsed) except: p_log.write ("[ERROR ligand ref] -> " + p_lig_ref + "\n") continue #control d_control["pr ref"] = d_control["pr ref"] + 1 # output by reference p_dir_result_ref = pathManage.result(name_lig + "/" + ref_folder) d_filout_superimposed = {} d_filout_superimposed["global"] = open (p_dir_result_ref + "all_ligand_aligned.pdb", "w") d_filout_superimposed["sheap"] = open (p_dir_result_ref + "all_ligand_aligned_" + str (thresold_shaep) + ".pdb", "w") # write lig ref -> connect matrix corrrect in all reference and all sheap writePDBfile.coordinateSection(d_filout_superimposed["global"], lig_ref_parsed, "HETATM", connect_matrix = 1) writePDBfile.coordinateSection(d_filout_superimposed["sheap"], lig_ref_parsed, "HETATM", connect_matrix = 1) # inspect folder dataset l_pdbfile = listdir(p_dir_dataset + ref_folder + "/") for pdbfile in l_pdbfile : # no ligand file if len (pdbfile.split ("_")) == 1 : continue pdbfile = pdbfile[:-4] # remove extention if len(pdbfile.split ("_")[0]) == 3 and len(pdbfile.split ("_")[1]) == 4 and pdbfile.split ("_")[1] != ref_folder: p_lig = p_dir_dataset + ref_folder + "/" + pdbfile + ".pdb" if p_lig_ref != p_lig : # pass case where ligand replace same ligand -> does not need run if pdbfile.split ("_")[0] == name_lig : p_log.write ("[REMOVE] -> same ligand substituate") continue # parsed ligand query lig_parsed = parsePDB.loadCoordSectionPDB(p_lig, "HETATM") # find matrix of rotation p_matrix = pathManage.findMatrix(p_lig_ref, p_lig, name_lig) # control file matrix exist if not path.exists(p_matrix) : p_log.write ("[ERROR] -> Matrix transloc " + p_lig_ref + " " + p_lig + " " + name_lig + "\n") continue # control d_control["lig query"] = d_control["lig query"] + 1 # find the path of complex used p_complex = p_dir_dataset + ref_folder + "/" + p_lig.split ("/")[-1][4:] # ligand rotated -> change the referentiel superposeStructure.applyMatrixLigand(lig_parsed, p_matrix) # use substruct l_p_substruct_ref = pathManage.findSubstructRef (pathManage.dataset(name_lig) + ref_folder + "/" , name_lig) for p_substruct_ref in l_p_substruct_ref : # ribose or phosphate struct_type = p_substruct_ref.split ("_")[-2] substruct_parsed = parsePDB.loadCoordSectionPDB(p_substruct_ref, "HETATM") l_atom_substituate = neighborSearch.searchNeighborAtom(substruct_parsed, lig_parsed, struct_type, p_log, thresold_superimposed_ribose = thresold_superimposed_ribose, thresold_superimposed_pi = thresold_superimposed_pi) # control find if len (l_atom_substituate) == 0 : if not struct_type in d_control["subref empty"].keys () : d_control["subref empty"][struct_type] = 1 else : d_control["subref empty"][struct_type] = d_control["subref empty"][struct_type] + 1 continue else : if not struct_type in d_control["subref"].keys () : d_control["subref"][struct_type] = 1 else : d_control["subref"][struct_type] = d_control["subref"][struct_type] + 1 # write PDB file, convert smile p_substituate_pdb = p_dir_result_ref + "substituent_" + pdbfile.split ("_")[0] + "_" + pdbfile.split ("_")[1] + "_" + struct_type + ".pdb" writePDBfile.coordinateSection(p_substituate_pdb, l_atom_substituate, recorder="HETATM", header=0, connect_matrix = 1) # sheap reference on part of ligand p_sheap = runOtherSoft.runShaep (p_substruct_ref, p_substituate_pdb, p_substituate_pdb[0:-4] + ".hit", clean = 0) val_sheap = parseShaep.parseOutputShaep (p_sheap) if val_sheap == {} : p_log.write ("[ERROR] -> ShaEP " + p_substituate_pdb + " " + p_substruct_ref + "\n") if not struct_type in d_control["out sheap"].keys () : d_control["out sheap"][struct_type] = 1 else : d_control["out sheap"][struct_type] = d_control["out sheap"][struct_type] + 1 continue # control thresold sheap if not struct_type in d_filout_sheap.keys () : d_filout_sheap[struct_type] = {} d_filout_sheap[struct_type] = open (p_dir_result + "shaep_global_" + struct_type + ".txt", "w") d_filout_sheap[struct_type].write ("name\tbest_similarity\tshape_similarity\tESP_similarity\n") d_filout_sheap["list"].append (p_dir_result + "shaep_global_" + struct_type + ".txt") # to improve with python function # write value in ShaEP control d_filout_sheap[struct_type].write (ref_folder + "_" + str(pdbfile.split ("_")[1]) + "_" + struct_type + "_" + str (pdbfile.split ("_")[0]) + "\t" + str(val_sheap["best_similarity"]) + "\t" + str(val_sheap["shape_similarity"]) + "\t" + str(val_sheap["ESP_similarity"]) + "\n") d_filout_sheap["global"].write (ref_folder + "_" + str(pdbfile.split ("_")[1]) + "_" + struct_type + "_" + str (pdbfile.split ("_")[0]) + "\t" + str(val_sheap["best_similarity"]) + "\t" + str(val_sheap["shape_similarity"]) + "\t" + str(val_sheap["ESP_similarity"]) + "\n") # rename file substituent with shaEP value rename(p_substituate_pdb, p_substituate_pdb[:-4] + "_" + str (val_sheap["best_similarity"]) + ".pdb") # rename and change the file name p_substituate_pdb = p_substituate_pdb[:-4] + "_" + str (val_sheap["best_similarity"]) + ".pdb" # write all substruct in global file writePDBfile.coordinateSection(d_filout_superimposed["global"], lig_parsed, recorder= "HETATM", header = str(p_lig.split ("/")[-1]) + "_" + str (val_sheap["best_similarity"]) , connect_matrix = 1) # control sheap thresold if float(val_sheap["best_similarity"]) >= thresold_shaep : # write subligand superimposed selected in global files writePDBfile.coordinateSection(d_filout_superimposed["sheap"], lig_parsed, recorder= "HETATM", header = str(p_lig.split ("/")[-1]) + "_" + str (val_sheap["best_similarity"]) , connect_matrix = 1) ############ # write BS # ############ # not only protein superimposed -> also ion and water l_atom_complex = parsePDB.loadCoordSectionPDB(p_complex) superposeStructure.applyMatrixProt(l_atom_complex, p_matrix) p_file_cx = p_dir_result_ref + "CX_" + p_lig.split ("/")[-1] # write CX writePDBfile.coordinateSection(p_file_cx, l_atom_complex, recorder="ATOM", header= p_lig.split ("/")[-1], connect_matrix = 0) # search atom in BS l_atom_binding_site = [] for atom_complex in l_atom_complex : for atom_substruct in lig_parsed : if parsePDB.distanceTwoatoms (atom_substruct, atom_complex) <= thresold_BS : if not atom_complex in l_atom_binding_site : l_atom_binding_site.append (deepcopy(atom_complex)) # 3. retrieve complet residue l_atom_BS_res = parsePDB.getResidues(l_atom_binding_site, l_atom_complex) # 4. write binding site p_binding = p_dir_result_ref + "BS_" + p_lig.split ("/")[-1] writePDBfile.coordinateSection(p_binding, l_atom_BS_res, "ATOM", p_binding, connect_matrix = 0) # smile code substituate analysis # Step smile -> not conversion if shaep not validate smile_find = runOtherSoft.babelConvertPDBtoSMILE(p_substituate_pdb) if not struct_type in d_smile.keys () : d_smile[struct_type] = {} d_smile[struct_type][smile_find] = {} d_smile[struct_type][smile_find]["count"] = 1 d_smile[struct_type][smile_find]["PDB"] = [pdbfile.split ("_")[1]] d_smile[struct_type][smile_find]["ligand"] = [pdbfile.split ("_")[0]] d_smile[struct_type][smile_find]["ref"] = [ref_folder] else : if not smile_find in d_smile[struct_type].keys () : d_smile[struct_type][smile_find] = {} d_smile[struct_type][smile_find]["count"] = 1 d_smile[struct_type][smile_find]["PDB"] = [pdbfile.split ("_")[1]] d_smile[struct_type][smile_find]["ligand"] = [pdbfile.split ("_")[0]] d_smile[struct_type][smile_find]["ref"] = [ref_folder] else : d_smile[struct_type][smile_find]["count"] = d_smile[struct_type][smile_find]["count"] + 1 d_smile[struct_type][smile_find]["PDB"].append (pdbfile.split ("_")[1]) d_smile[struct_type][smile_find]["ligand"].append (pdbfile.split ("_")[0]) d_smile[struct_type][smile_find]["ref"].append (ref_folder) else : if not struct_type in d_control["out sheap"].keys () : d_control["out sheap"][struct_type] = 1 else : d_control["out sheap"][struct_type] = d_control["out sheap"][struct_type] + 1 tool.closeDicoFile (d_filout_superimposed) # sheap control tool.closeDicoFile (d_filout_sheap) for p_file_sheap in d_filout_sheap["list"] : runOtherSoft.RhistogramMultiple (p_file_sheap) # write list of smile for substruct in d_smile.keys () : p_list_smile = pathManage.result(name_lig) + "list_" + substruct + "_" + str (thresold_shaep) + "_smile.txt" filout_smile = open (p_list_smile, "w") for smile_code in d_smile[substruct].keys () : l_lig = d_smile[substruct][smile_code]["ligand"] l_PDB = d_smile[substruct][smile_code]["PDB"] l_ref = d_smile[substruct][smile_code]["ref"] filout_smile.write (str (smile_code) + "\t" + str (d_smile[substruct][smile_code]["count"]) + "\t" + " ".join (l_PDB) + "\t" + " ".join (l_ref) + "\t" + " ".join(l_lig) + "\n") filout_smile.close () p_log.close () # control filout_control.write ("NB ref: " + str(d_control["pr ref"]) + "\n") filout_control.write ("Ligand query: " + str(d_control["lig query"]) + "\n") for k in d_control["subref"].keys () : filout_control.write ("LSR " + str (k) + ": " + str(d_control["subref"][k]) + "\n") for k in d_control["subref empty"].keys () : filout_control.write ("NB LSR empty " + str (k) + ": " + str(d_control["subref empty"][k]) + "\n") for k in d_control["out sheap"].keys () : filout_control.write ("LSR out by sheap " + str (k) + ": " + str(d_control["out sheap"][k]) + "\n") filout_control.write ("**********************\n\n") for k in d_control["subref"].keys () : filout_control.write ("LSR keep" + str (k) + ": " + str(d_control["subref"][k] - d_control["out sheap"][k]) + "\n") filout_control.close () return 1
def enantiomer(l_ligand, name_folder_final, debug = 1) : "to do file output" pr_final = pathManage.result("final_" + name_folder_final) pr_enantiomer = pathManage.generatePath(pr_final + "enantiomer/") l_ref = [] d_filout = {} for ligand in l_ligand : d_filout[ligand] = {} d_filout[ligand]["O3OP"]= open (pr_enantiomer + ligand + "_" + "O3OP" , "w") d_filout[ligand]["O4O5"]= open (pr_enantiomer + ligand + "_" + "O4O5" , "w") d_filout[ligand]["OPOP"]= open (pr_enantiomer + ligand + "_" + "OPOP" , "w") l_pr_type_ref = listdir(pr_final) for pr_type_ref in l_pr_type_ref : if debug : print "1", pr_type_ref # case where pr_substruct is a file not a folder try : l_pr_sub = listdir(pr_final + pr_type_ref + "/") except : continue for pr_sub in l_pr_sub : print "2", pr_sub # case cycle -> append in list respertory with new folder if pr_sub == "cycle" : l_pr_sub.remove ("cycle") l_pr_sub_cycle = listdir (pr_final + pr_type_ref + "/cycle") for pr_sub_cycle in l_pr_sub_cycle : l_pr_sub.append ("cycle/" + pr_sub_cycle) break for pr_sub in l_pr_sub : try : l_pr_ref = listdir (pr_final + pr_type_ref + "/" + pr_sub) except : pass if debug : print "3", pr_sub for pr_ref in l_pr_ref : if debug : print "4", pr_ref # case no folder try : l_file = listdir(pr_final + pr_type_ref + "/" + pr_sub + "/" + pr_ref + "/LGD/") except : continue for name_file in l_file : if search("LGD_REF_A",name_file) and search(".pdb",name_file): #print "2222", l_ref if name_file.split("_")[3][:4] in l_ref : print "!!!!!", "IN" break else : l_ref.append (name_file.split ("_")[3][:4]) ligand = name_file.split ("_")[2] l_atom_ligand = parsePDB.loadCoordSectionPDB(pr_final + pr_type_ref + "/" + pr_sub + "/" + pr_ref + "/LGD/" + name_file, "HETATM") d_minO3OP = 100 for atom_ligand in l_atom_ligand : if atom_ligand["name"] == "O4'" : atom_O4 = atom_ligand elif atom_ligand["name"] == "O5'" : atom_O5 = atom_ligand elif atom_ligand["name"] == "O3'" : atom_O3 = atom_ligand elif atom_ligand["name"] == "O1A" : atom_O1A = atom_ligand elif atom_ligand["name"] == "O2A" : atom_O2A = atom_ligand elif atom_ligand["name"] == "O1B" : atom_O1B = atom_ligand elif atom_ligand["name"] == "O2B" : atom_O2B = atom_ligand #elif atom_ligand["name"] == "O3B" : # atom_O3B = atom_ligand # d O4 - O5 try : d_O4O5 = parsePDB.distanceTwoatoms(atom_O4, atom_O5) except : continue d_filout[ligand]["O4O5"].write (pr_ref + "_" + pr_type_ref + "\t" + str (d_O4O5) + "\n") # d O3 - OP for atom_ligand in l_atom_ligand : if ligand == "AMP" : if atom_ligand["name"] == "O1P" or atom_ligand["name"] == "O2P" or atom_ligand["name"] == "O3P" : d_tempO3OP = parsePDB.distanceTwoatoms(atom_O3, atom_ligand) if d_tempO3OP < d_minO3OP : d_minO3OP = d_tempO3OP atom_tempO3OP = deepcopy(atom_ligand) else : if atom_ligand["name"] == "O1A" or atom_ligand["name"] == "O2A" or atom_ligand["name"] == "O3A" : d_tempO3OP = parsePDB.distanceTwoatoms(atom_O4, atom_ligand) if d_tempO3OP < d_minO3OP : d_minO3OP = d_tempO3OP atom_tempO3OP = deepcopy(atom_ligand) d_filout[ligand]["O3OP"].write (pr_ref + "_" + pr_type_ref +"_" + str(atom_tempO3OP["name"]) + "\t" + str (d_minO3OP) + "\n") # d OP OP d_OP = {} if ligand == "ATP" or ligand == "ADP" : d_OP ["O1AO1B"] = parsePDB.distanceTwoatoms(atom_O1A, atom_O1B) d_OP ["O1AO2B"] = parsePDB.distanceTwoatoms(atom_O1A, atom_O2B) #d_OP ["O1AO3B"] = parsePDB.distanceTwoatoms(atom_O1A, atom_O3B) d_OP ["O2AO1B"] = parsePDB.distanceTwoatoms(atom_O2A, atom_O1B) d_OP ["O2AO2B"] = parsePDB.distanceTwoatoms(atom_O2A, atom_O2B) #d_OP ["O2AO3B"] = parsePDB.distanceTwoatoms(atom_O2A, atom_O3B) d_minOPOP = min (d_OP.values()) #print d_minOPOP k_min = [name for name, age in d_OP.items() if age == min (d_OP.values())][0] #print k_min d_filout[ligand]["OPOP"].write (pr_ref + "_" + pr_type_ref + "_" + str(k_min) + "\t" + str (d_minOPOP) + "\n") try : del d_OP del atom_O1A del atom_O1B del atom_O2A del atom_O2B except : pass try : del atom_O3 del atom_O4 del atom_O5 except : pass # close files for lig in l_ligand : for type_dist in d_filout[lig].keys () : p_file = d_filout[lig][type_dist].name d_filout[lig][type_dist].close () runOtherSoft.Rhistogram(p_file, type_dist, brk = 20)
def retrieveSubstructSuperimposed(name_lig, thresold_BS=4.5, thresold_superimposed_ribose=2.5, thresold_superimposed_pi=3, thresold_shaep=0.4): # ouput p_dir_dataset = pathManage.dataset(name_lig) p_dir_result = pathManage.result(name_lig) l_folder_ref = listdir(p_dir_dataset) # log control p_log = open(p_dir_result + "log_superimposed.txt", "w") # control extraction d_control = {} d_control["pr ref"] = 0 d_control["lig query"] = 0 d_control["subref"] = {} d_control["subref empty"] = {} d_control["out sheap"] = {} filout_control = open(p_dir_result + "quality_extraction.txt", "w") # stock smile code d_smile = {} # sheap control d_filout_sheap = {} d_filout_sheap["list"] = [p_dir_result + "shaep_global.txt"] d_filout_sheap["global"] = open(p_dir_result + "shaep_global.txt", "w") d_filout_sheap["global"].write( "name\tbest_similarity\tshape_similarity\tESP_similarity\n") for ref_folder in l_folder_ref: # control folder reference name if len(ref_folder) != 4: p_log.write("[ERROR folder] -> " + ref_folder + "\n") continue # reference p_lig_ref = pathManage.findligandRef(p_dir_dataset + ref_folder + "/", name_lig) try: lig_ref_parsed = parsePDB.loadCoordSectionPDB(p_lig_ref, "HETATM") # print len (lig_ref_parsed) except: p_log.write("[ERROR ligand ref] -> " + p_lig_ref + "\n") continue #control d_control["pr ref"] = d_control["pr ref"] + 1 # output by reference p_dir_result_ref = pathManage.result(name_lig + "/" + ref_folder) d_filout_superimposed = {} d_filout_superimposed["global"] = open( p_dir_result_ref + "all_ligand_aligned.pdb", "w") d_filout_superimposed["sheap"] = open( p_dir_result_ref + "all_ligand_aligned_" + str(thresold_shaep) + ".pdb", "w") # write lig ref -> connect matrix corrrect in all reference and all sheap writePDBfile.coordinateSection(d_filout_superimposed["global"], lig_ref_parsed, "HETATM", connect_matrix=1) writePDBfile.coordinateSection(d_filout_superimposed["sheap"], lig_ref_parsed, "HETATM", connect_matrix=1) # inspect folder dataset l_pdbfile = listdir(p_dir_dataset + ref_folder + "/") for pdbfile in l_pdbfile: # no ligand file if len(pdbfile.split("_")) == 1: continue pdbfile = pdbfile[:-4] # remove extention if len(pdbfile.split("_")[0]) == 3 and len(pdbfile.split( "_")[1]) == 4 and pdbfile.split("_")[1] != ref_folder: p_lig = p_dir_dataset + ref_folder + "/" + pdbfile + ".pdb" if p_lig_ref != p_lig: # pass case where ligand replace same ligand -> does not need run if pdbfile.split("_")[0] == name_lig: p_log.write("[REMOVE] -> same ligand substituate") continue # parsed ligand query lig_parsed = parsePDB.loadCoordSectionPDB(p_lig, "HETATM") # find matrix of rotation p_matrix = pathManage.findMatrix(p_lig_ref, p_lig, name_lig) # control file matrix exist if not path.exists(p_matrix): p_log.write("[ERROR] -> Matrix transloc " + p_lig_ref + " " + p_lig + " " + name_lig + "\n") continue # control d_control["lig query"] = d_control["lig query"] + 1 # find the path of complex used p_complex = p_dir_dataset + ref_folder + "/" + p_lig.split( "/")[-1][4:] # ligand rotated -> change the referentiel superposeStructure.applyMatrixLigand(lig_parsed, p_matrix) # use substruct l_p_substruct_ref = pathManage.findSubstructRef( pathManage.dataset(name_lig) + ref_folder + "/", name_lig) for p_substruct_ref in l_p_substruct_ref: # ribose or phosphate struct_type = p_substruct_ref.split("_")[-2] substruct_parsed = parsePDB.loadCoordSectionPDB( p_substruct_ref, "HETATM") l_atom_substituate = neighborSearch.searchNeighborAtom( substruct_parsed, lig_parsed, struct_type, p_log, thresold_superimposed_ribose= thresold_superimposed_ribose, thresold_superimposed_pi=thresold_superimposed_pi) # control find if len(l_atom_substituate) == 0: if not struct_type in d_control[ "subref empty"].keys(): d_control["subref empty"][struct_type] = 1 else: d_control["subref empty"][ struct_type] = d_control["subref empty"][ struct_type] + 1 continue else: if not struct_type in d_control["subref"].keys(): d_control["subref"][struct_type] = 1 else: d_control["subref"][struct_type] = d_control[ "subref"][struct_type] + 1 # write PDB file, convert smile p_substituate_pdb = p_dir_result_ref + "substituent_" + pdbfile.split( "_")[0] + "_" + pdbfile.split( "_")[1] + "_" + struct_type + ".pdb" writePDBfile.coordinateSection(p_substituate_pdb, l_atom_substituate, recorder="HETATM", header=0, connect_matrix=1) # sheap reference on part of ligand p_sheap = runOtherSoft.runShaep( p_substruct_ref, p_substituate_pdb, p_substituate_pdb[0:-4] + ".hit", clean=0) val_sheap = parseShaep.parseOutputShaep(p_sheap) if val_sheap == {}: p_log.write("[ERROR] -> ShaEP " + p_substituate_pdb + " " + p_substruct_ref + "\n") if not struct_type in d_control[ "out sheap"].keys(): d_control["out sheap"][struct_type] = 1 else: d_control["out sheap"][ struct_type] = d_control["out sheap"][ struct_type] + 1 continue # control thresold sheap if not struct_type in d_filout_sheap.keys(): d_filout_sheap[struct_type] = {} d_filout_sheap[struct_type] = open( p_dir_result + "shaep_global_" + struct_type + ".txt", "w") d_filout_sheap[struct_type].write( "name\tbest_similarity\tshape_similarity\tESP_similarity\n" ) d_filout_sheap["list"].append( p_dir_result + "shaep_global_" + struct_type + ".txt") # to improve with python function # write value in ShaEP control d_filout_sheap[struct_type].write( ref_folder + "_" + str(pdbfile.split("_")[1]) + "_" + struct_type + "_" + str(pdbfile.split("_")[0]) + "\t" + str(val_sheap["best_similarity"]) + "\t" + str(val_sheap["shape_similarity"]) + "\t" + str(val_sheap["ESP_similarity"]) + "\n") d_filout_sheap["global"].write( ref_folder + "_" + str(pdbfile.split("_")[1]) + "_" + struct_type + "_" + str(pdbfile.split("_")[0]) + "\t" + str(val_sheap["best_similarity"]) + "\t" + str(val_sheap["shape_similarity"]) + "\t" + str(val_sheap["ESP_similarity"]) + "\n") # rename file substituent with shaEP value rename( p_substituate_pdb, p_substituate_pdb[:-4] + "_" + str(val_sheap["best_similarity"]) + ".pdb") # rename and change the file name p_substituate_pdb = p_substituate_pdb[:-4] + "_" + str( val_sheap["best_similarity"]) + ".pdb" # write all substruct in global file writePDBfile.coordinateSection( d_filout_superimposed["global"], lig_parsed, recorder="HETATM", header=str(p_lig.split("/")[-1]) + "_" + str(val_sheap["best_similarity"]), connect_matrix=1) # control sheap thresold if float(val_sheap["best_similarity"] ) >= thresold_shaep: # write subligand superimposed selected in global files writePDBfile.coordinateSection( d_filout_superimposed["sheap"], lig_parsed, recorder="HETATM", header=str(p_lig.split("/")[-1]) + "_" + str(val_sheap["best_similarity"]), connect_matrix=1) ############ # write BS # ############ # not only protein superimposed -> also ion and water l_atom_complex = parsePDB.loadCoordSectionPDB( p_complex) superposeStructure.applyMatrixProt( l_atom_complex, p_matrix) p_file_cx = p_dir_result_ref + "CX_" + p_lig.split( "/")[-1] # write CX writePDBfile.coordinateSection( p_file_cx, l_atom_complex, recorder="ATOM", header=p_lig.split("/")[-1], connect_matrix=0) # search atom in BS l_atom_binding_site = [] for atom_complex in l_atom_complex: for atom_substruct in lig_parsed: if parsePDB.distanceTwoatoms( atom_substruct, atom_complex) <= thresold_BS: if not atom_complex in l_atom_binding_site: l_atom_binding_site.append( deepcopy(atom_complex)) # 3. retrieve complet residue l_atom_BS_res = parsePDB.getResidues( l_atom_binding_site, l_atom_complex) # 4. write binding site p_binding = p_dir_result_ref + "BS_" + p_lig.split( "/")[-1] writePDBfile.coordinateSection( p_binding, l_atom_BS_res, "ATOM", p_binding, connect_matrix=0) # smile code substituate analysis # Step smile -> not conversion if shaep not validate smile_find = runOtherSoft.babelConvertPDBtoSMILE( p_substituate_pdb) if not struct_type in d_smile.keys(): d_smile[struct_type] = {} d_smile[struct_type][smile_find] = {} d_smile[struct_type][smile_find][ "count"] = 1 d_smile[struct_type][smile_find]["PDB"] = [ pdbfile.split("_")[1] ] d_smile[struct_type][smile_find][ "ligand"] = [pdbfile.split("_")[0]] d_smile[struct_type][smile_find]["ref"] = [ ref_folder ] else: if not smile_find in d_smile[ struct_type].keys(): d_smile[struct_type][smile_find] = {} d_smile[struct_type][smile_find][ "count"] = 1 d_smile[struct_type][smile_find][ "PDB"] = [pdbfile.split("_")[1]] d_smile[struct_type][smile_find][ "ligand"] = [ pdbfile.split("_")[0] ] d_smile[struct_type][smile_find][ "ref"] = [ref_folder] else: d_smile[struct_type][smile_find][ "count"] = d_smile[struct_type][ smile_find]["count"] + 1 d_smile[struct_type][smile_find][ "PDB"].append( pdbfile.split("_")[1]) d_smile[struct_type][smile_find][ "ligand"].append( pdbfile.split("_")[0]) d_smile[struct_type][smile_find][ "ref"].append(ref_folder) else: if not struct_type in d_control[ "out sheap"].keys(): d_control["out sheap"][struct_type] = 1 else: d_control["out sheap"][ struct_type] = d_control["out sheap"][ struct_type] + 1 tool.closeDicoFile(d_filout_superimposed) # sheap control tool.closeDicoFile(d_filout_sheap) for p_file_sheap in d_filout_sheap["list"]: runOtherSoft.RhistogramMultiple(p_file_sheap) # write list of smile for substruct in d_smile.keys(): p_list_smile = pathManage.result( name_lig) + "list_" + substruct + "_" + str( thresold_shaep) + "_smile.txt" filout_smile = open(p_list_smile, "w") for smile_code in d_smile[substruct].keys(): l_lig = d_smile[substruct][smile_code]["ligand"] l_PDB = d_smile[substruct][smile_code]["PDB"] l_ref = d_smile[substruct][smile_code]["ref"] filout_smile.write( str(smile_code) + "\t" + str(d_smile[substruct][smile_code]["count"]) + "\t" + " ".join(l_PDB) + "\t" + " ".join(l_ref) + "\t" + " ".join(l_lig) + "\n") filout_smile.close() p_log.close() # control filout_control.write("NB ref: " + str(d_control["pr ref"]) + "\n") filout_control.write("Ligand query: " + str(d_control["lig query"]) + "\n") for k in d_control["subref"].keys(): filout_control.write("LSR " + str(k) + ": " + str(d_control["subref"][k]) + "\n") for k in d_control["subref empty"].keys(): filout_control.write("NB LSR empty " + str(k) + ": " + str(d_control["subref empty"][k]) + "\n") for k in d_control["out sheap"].keys(): filout_control.write("LSR out by sheap " + str(k) + ": " + str(d_control["out sheap"][k]) + "\n") filout_control.write("**********************\n\n") for k in d_control["subref"].keys(): filout_control.write("LSR keep" + str(k) + ": " + str(d_control["subref"][k] - d_control["out sheap"][k]) + "\n") filout_control.close() return 1
def analyseIons (pr_dataset, name_ligand, p_filout, thresold_max_interaction = 4.0) : l_folder_ref = listdir(pr_dataset) filout = open (p_filout, "w") if name_ligand == "ATP" : filout.write ("PDB\tIon\tD1\tD2\tD3\tAngle1\tAngle2\tAt1\tAt2\tA3\n") else : filout.write ("PDB\tIon\tD1\tD2\tAngle\tAt1\tAt2\n") # dictionnary of counting d_count = {} d_count["CX"] = 0 d_count["CX + ions"] = 0 d_count["BS + ions"] = 0 d_count["BS + 1-ion"] = 0 d_count["BS + 2-ions"] = 0 d_count["BS + more-ions"] = 0 d_count["Interact-1"] = 0 d_count["Interact-2"] = 0 # dictionnary by ions d_ions = {} for ref_folder in l_folder_ref : only_one = 0 if len (ref_folder) != 4 : continue d_count["CX"] = d_count["CX"] + 1 l_temp = [] # path and complex p_lig_ref = pathManage.findligandRef(pr_dataset + ref_folder + "/", name_ligand) p_complex = pathManage.findPDBRef(pr_dataset + ref_folder + "/") # parsing lig_ref_parsed = parsePDB.loadCoordSectionPDB(p_lig_ref, "HETATM") l_het_parsed = parsePDB.loadCoordSectionPDB(p_complex, "HETATM") # retrieve phosphate l_pi = retrieveTwoAtomForAngle (lig_ref_parsed, name_ligand) if l_pi == [] : # case ligand without phosphate continue flag_interact = 0 flag_between_1 = 0 flag_between_2 = 0 for het_parsed in l_het_parsed : if het_parsed["resName"] in l_ions : d_count["CX + ions"] = d_count["CX + ions"] + 1 if not het_parsed ["resName"] in d_ions.keys () : d_ions[het_parsed["resName"]] = 0 if not het_parsed["resName"] in l_temp : d_ions[het_parsed["resName"]] = d_ions[het_parsed["resName"]] + 1 l_temp.append (het_parsed["resName"]) PDB_id = ref_folder d1 = parsePDB.distanceTwoatoms(l_pi[0], het_parsed) d2 = parsePDB.distanceTwoatoms(l_pi[1], het_parsed) if name_ligand == "ATP" : # print len(l_pi), ref_folder, p_lig_ref d3 = parsePDB.distanceTwoatoms(l_pi[2], het_parsed) angle_bis = parsePDB.angleVector(l_pi[1], het_parsed, l_pi[2]) angle = parsePDB.angleVector(l_pi[0], het_parsed, l_pi[1]) if d1 < 10 and d2 < 10 : if not het_parsed["resName"] in d_count.keys () : d_count[het_parsed["resName"]] = 0 if only_one == 0 : d_count[het_parsed["resName"]] = d_count[het_parsed["resName"]] + 1 only_one = 1 d_count["BS + ions"] = d_count["BS + ions"] + 1 flag_interact = flag_interact + 1 if d1 < thresold_max_interaction and d2 < thresold_max_interaction : flag_between_1 = flag_between_1 + 1 if name_ligand == "ATP" : if d3 < thresold_max_interaction and d2 < thresold_max_interaction : flag_between_2 = flag_between_2 + 1 filout.write (str (PDB_id) + "\t" + str(het_parsed["resName"]) + "\t" + str(d1) + "\t" + str(d2) + "\t" + str (d3) + "\t" + str(angle) + "\t" + str(angle_bis) + "\t" + str(l_pi[0]["serial"]) + "\t" + str(l_pi[1]["serial"]) + "\t" + str(l_pi[2]["serial"]) + "\n") else : filout.write (str (PDB_id) + "\t" + str(het_parsed["resName"]) + "\t" + str(d1) + "\t" + str(d2) + "\t" + str(angle) + "\t" + str(l_pi[0]["serial"]) + "\t" + str(l_pi[1]["serial"]) + "\n") if flag_interact == 1 : d_count["BS + 1-ion"] = d_count["BS + 1-ion"] + 1 elif flag_interact == 2 : d_count["BS + 2-ions"] = d_count["BS + 2-ions"] + 1 elif flag_interact > 2 : d_count["BS + more-ions"] = d_count["BS + more-ions"] + 1 if flag_between_1 >= 1 : d_count["Interact-1"] = d_count["Interact-1"] + flag_between_1 if flag_between_2 >= 1 : d_count["Interact-2"] = d_count["Interact-2"] + flag_between_2 filout.close () filout_count = open (p_filout[0:-4] + "count.txt", "w") filout_count.write ("CX: " + str (d_count["CX"]) + "\n") filout_count.write ("CX + ions: " + str (d_count["CX + ions"]) + "\n") filout_count.write ("BS + ions: " + str(d_count["BS + ions"]) + "\n") filout_count.write ("BS + 1-ion: " + str(d_count["BS + 1-ion"]) + "\n") filout_count.write ("BS + 2-ions: " + str(d_count["BS + 2-ions"]) + "\n") filout_count.write ("BS + more-ions: " + str(d_count["BS + more-ions"]) + "\n") filout_count.write ("Interact Pi-alpha + Pi-beta: " + str(d_count["Interact-1"]) + "\n") filout_count.write ("Interact Pi-beta + Pi-gama: " + str(d_count["Interact-2"]) + "\n") filout_count.close () filout_by_ion = open(p_filout[0:-4] + "byIons_" + name_ligand, "w") l_k = d_ions.keys () for k in l_k : filout_by_ion.write (str (k.capitalize()) + "\t" + str (d_ions[k]) + "\n") filout_by_ion.close () runOtherSoft.barplot (p_filout[0:-4] + "byIons_" + name_ligand)
def computeRMSDBS (p_ref, p_query, p_substruct, pr_result, thresold_BS = 6) : l_atom_query_parsed = parsePDB.loadCoordSectionPDB(p_query, "ATOM") l_atom_ref_parsed = parsePDB.loadCoordSectionPDB(p_ref, "ATOM") l_atom_substruct = parsePDB.loadCoordSectionPDB(p_substruct) l_BS_ref = [] for atom_substruct in l_atom_substruct : for atom_ref in l_atom_ref_parsed : d_atom = parsePDB.distanceTwoatoms(atom_substruct, atom_ref) if d_atom <= thresold_BS : l_BS_ref.append (atom_ref) # retrieve residue full l_BS_ref = parsePDB.getResidues(l_BS_ref, l_atom_ref_parsed) # print len (l_BS_ref) # print len (l_atom_query_parsed) l_BS_query = [] flag_identic_crystal = 1 for atomBS_ref in l_BS_ref : # print atomBS_parsed d_max = 100.0 for atom_query in l_atom_query_parsed : if atom_query["resName"] == atomBS_ref["resName"] and atom_query["name"] == atomBS_ref["name"] : d = parsePDB.distanceTwoatoms(atom_query, atomBS_ref) if d < d_max : d_max = d res_temp = atom_query #if d_max < thresold_BS : if "res_temp" in locals () : l_BS_query.append (deepcopy(res_temp)) # identic check number if res_temp["resSeq"] != atomBS_ref["resSeq"] : flag_identic_crystal = 0 #else : # case structure not found # return [] # print len (l_BS_query), len (l_BS_ref) l_RMSD = RMSDTwoList (l_BS_query, l_BS_ref) # write PDB #p_filout_pdb = pr_result + p_query.split ("/")[-1][0:-4] + "_" + str (flag_identic_crystal) + "_" + p_substruct.split ("_")[-2] + "_" + p_ref.split ("/")[-1] #filout_pdb = open (p_filout_pdb, "w") #writePDBfile.coordinateSection(filout_pdb, l_BS_ref, recorder = "ATOM") #writePDBfile.coordinateSection(filout_pdb, l_BS_query, recorder = "ATOM", header = 0 ) #filout_pdb.close () if l_RMSD == [] : return [] else : return l_RMSD + [flag_identic_crystal]