def filterBlastResult (d_dataset, p_dir_dataset, substruct, thresold_RX = 2.5, thresold_blast = 1e-4, debug = 1) : """ Filter resolution PDB Filter evalue """ for pdb_ref in d_dataset.keys () : if d_dataset[pdb_ref]["conserve"] == 0 : continue for pdb_blast_chain in d_dataset[pdb_ref]["align"].keys () : # filter e.value if debug == 1 : print d_dataset[pdb_ref]["align"][pdb_blast_chain], pdb_ref, thresold_blast # remove thresold and reference cleanner, remove if alignement on same protein if d_dataset[pdb_ref]["align"][pdb_blast_chain] <= thresold_blast and pdb_blast_chain [0:4].upper() != pdb_ref: if debug == 1 :print "CONTROL", pdb_blast_chain, d_dataset[pdb_ref]["align"][pdb_blast_chain] # dowload PDB files pdb_blast = pdb_blast_chain[0:4] p_pdb_blast = downloadFile.importPDB(pdb_blast, p_dir_dataset + pdb_ref + "/", dir_by_PDB=0, dbPDB = "/home/borrel/PDB/") if p_pdb_blast == 0 : continue # print p_pdb_blast, pdb_blast, "************" l_queries_by_chain = separeByChain(p_pdb_blast) # divise chain in reference folder try : RX = parsePDB.resolution(p_pdb_blast) except : RX = 100.0 l_ligand = parsePDB.retrieveListLigand(p_pdb_blast) if debug == 1 : print p_pdb_blast, l_ligand, RX, "control blast\n" + " ".join(l_queries_by_chain) + "\n" # remove apo forms and remove not substiuant + list ligand do not considered if l_ligand == [] or substruct in l_ligand: remove (p_pdb_blast) for queries_by_chain in l_queries_by_chain : remove (queries_by_chain) continue # case RMN structure try : RX = float(RX) except: continue if debug == 1 :print "----", RX, thresold_RX, "----" if float(RX) <= thresold_RX : if not "blast" in d_dataset[pdb_ref].keys () : d_dataset[pdb_ref]["blast"] = [pdb_blast_chain.upper ()] else : # control if other chain in list flag_in = 0 for PDB_in in d_dataset[pdb_ref]["blast"] : if PDB_in.split ("_")[0] == pdb_blast_chain.upper ().split ("_")[0] : flag_in = 1 if flag_in == 0 : d_dataset[pdb_ref]["blast"].append (pdb_blast_chain.upper ()) if debug == 1 :print d_dataset[pdb_ref]["blast"], pdb_ref
def extractReference(p_list_ligand, p_dir_dataset, p_dir_result, substruct): # struct reference d_dataset = {} # retrieve list of ligand in PDB d_ligand = tool.parseLigandPDBList(p_list_ligand) # download PDB and fasta associated l_p_PDB = [] l_p_fasta = [] for PDB_ID in d_ligand[substruct]: PDB_ID = PDB_ID.upper() p_pdb = downloadFile.importPDB(PDB_ID, p_dir_dataset, dir_by_PDB=1, debug=1, dbPDB="/home/borrel/PDB/") p_fasta = downloadFile.importFasta( PDB_ID, p_dir_dataset, dir_by_PDB=1, debug=1, fastaGlobal="/home/borrel/Yue_project/pdb_seqres.txt") if p_pdb != 0 and p_fasta != 0: l_p_pdb_chain = separeByChain(p_pdb) l_p_fasta_chain = separeChainFasta(p_fasta) d_dataset[PDB_ID] = {} d_dataset[PDB_ID]["p_pdb"] = p_pdb d_dataset[PDB_ID]["p_fasta"] = p_fasta d_dataset[PDB_ID]["p_pdb_chain"] = l_p_pdb_chain d_dataset[PDB_ID]["p_fasta_chain"] = l_p_fasta_chain d_dataset[PDB_ID]["conserve"] = 1 # plot resolution p_file_RX = p_dir_result + "resolution_ref.txt" file_RX = open(p_file_RX, "w") for PDB_ID in d_dataset.keys(): RX = parsePDB.resolution(d_dataset[PDB_ID]["p_pdb"]) try: d_dataset[PDB_ID]["RX"] = float(RX) except: d_dataset[PDB_ID]["RX"] = 100.0 file_RX.write(PDB_ID + "\t" + str(RX) + "\n") file_RX.close() runOtherSoft.Rhistogram(p_file_RX, "RX_ref_no_filter") return d_dataset
def extractReference (p_list_ligand, p_dir_dataset, p_dir_result, substruct): # struct reference d_dataset = {} # retrieve list of ligand in PDB d_ligand = tool.parseLigandPDBList (p_list_ligand) # download PDB and fasta associated l_p_PDB = [] l_p_fasta = [] for PDB_ID in d_ligand[substruct] : PDB_ID = PDB_ID.upper() p_pdb = downloadFile.importPDB(PDB_ID, p_dir_dataset, dir_by_PDB = 1, debug = 1, dbPDB = "/home/borrel/PDB/" ) p_fasta = downloadFile.importFasta(PDB_ID, p_dir_dataset, dir_by_PDB = 1, debug = 1, fastaGlobal = "/home/borrel/Yue_project/pdb_seqres.txt") if p_pdb != 0 and p_fasta != 0 : l_p_pdb_chain = separeByChain (p_pdb) l_p_fasta_chain = separeChainFasta(p_fasta) d_dataset[PDB_ID] = {} d_dataset[PDB_ID] ["p_pdb"] = p_pdb d_dataset[PDB_ID] ["p_fasta"] = p_fasta d_dataset[PDB_ID] ["p_pdb_chain"] = l_p_pdb_chain d_dataset[PDB_ID] ["p_fasta_chain"] = l_p_fasta_chain d_dataset[PDB_ID] ["conserve"] = 1 # plot resolution p_file_RX = p_dir_result + "resolution_ref.txt" file_RX = open (p_file_RX, "w") for PDB_ID in d_dataset.keys () : RX = parsePDB.resolution(d_dataset[PDB_ID]["p_pdb"]) try : d_dataset[PDB_ID] ["RX"] = float(RX) except : d_dataset[PDB_ID] ["RX"] = 100.0 file_RX.write (PDB_ID + "\t" + str (RX) + "\n") file_RX.close () runOtherSoft.Rhistogram (p_file_RX, "RX_ref_no_filter") return d_dataset
def filterBlastResult(d_dataset, p_dir_dataset, substruct, thresold_RX=2.5, thresold_blast=1e-4, debug=1): """ Filter resolution PDB Filter evalue """ for pdb_ref in d_dataset.keys(): if d_dataset[pdb_ref]["conserve"] == 0: continue for pdb_blast_chain in d_dataset[pdb_ref]["align"].keys(): # filter e.value if debug == 1: print d_dataset[pdb_ref]["align"][ pdb_blast_chain], pdb_ref, thresold_blast # remove thresold and reference cleanner, remove if alignement on same protein if d_dataset[pdb_ref]["align"][ pdb_blast_chain] <= thresold_blast and pdb_blast_chain[ 0:4].upper() != pdb_ref: if debug == 1: print "CONTROL", pdb_blast_chain, d_dataset[pdb_ref][ "align"][pdb_blast_chain] # dowload PDB files pdb_blast = pdb_blast_chain[0:4] p_pdb_blast = downloadFile.importPDB(pdb_blast, p_dir_dataset + pdb_ref + "/", dir_by_PDB=0, dbPDB="/home/borrel/PDB/") if p_pdb_blast == 0: continue # print p_pdb_blast, pdb_blast, "************" l_queries_by_chain = separeByChain( p_pdb_blast) # divise chain in reference folder try: RX = parsePDB.resolution(p_pdb_blast) except: RX = 100.0 l_ligand = parsePDB.retrieveListLigand(p_pdb_blast) if debug == 1: print p_pdb_blast, l_ligand, RX, "control blast\n" + " ".join( l_queries_by_chain) + "\n" # remove apo forms and remove not substiuant + list ligand do not considered if l_ligand == [] or substruct in l_ligand: remove(p_pdb_blast) for queries_by_chain in l_queries_by_chain: remove(queries_by_chain) continue # case RMN structure try: RX = float(RX) except: continue if debug == 1: print "----", RX, thresold_RX, "----" if float(RX) <= thresold_RX: if not "blast" in d_dataset[pdb_ref].keys(): d_dataset[pdb_ref]["blast"] = [pdb_blast_chain.upper()] else: # control if other chain in list flag_in = 0 for PDB_in in d_dataset[pdb_ref]["blast"]: if PDB_in.split("_")[0] == pdb_blast_chain.upper( ).split("_")[0]: flag_in = 1 if flag_in == 0: d_dataset[pdb_ref]["blast"].append( pdb_blast_chain.upper()) if debug == 1: print d_dataset[pdb_ref]["blast"], pdb_ref