def computeFP(self, FPtype): # set SMI after cleanning prSMIclean = self.prDesc + "SMIclean/" pathFolder.createFolder(prSMIclean) self.prSMIclean = prSMIclean dFP = {} i = 1 for pSMI in listdir( self.prSMI ): # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! # for pSMI in ["/home/borrela2/interference/spDataAnalysis/Desc/SMIclean/1212-72-2.smi"]: # to verify for one chem cas = pSMI.split("/")[-1].split(".")[0] print cas, i, len(listdir(self.prSMI)) i += 1 psmiles = self.prSMI + cas + ".smi" if path.exists(self.prSMI + cas + ".smi"): fsmiles = open(psmiles, "r") smiles = fsmiles.readlines()[0].strip() fsmiles.close() # chemical chem = chemical.chemical(cas, smiles) chem.prepareChem(prSMIclean) error = chem.computeFP(FPtype) if error == 1: print "ERROR FP" continue else: dFP[cas] = chem.FP self.dFP = dFP
def search_release (term): sFktname = "search_release" term = term.replace(" ","+") ePrint(1, sFktname, "search the web") # search on chemical query_chemical = chemical.chemical() results = query_chemical.search(term) # case if nothing was found if len(results) == 0: ePrint(1, sFktname, "nothing found on chemical") return None # case, if more then one was found if len(results) > 1: ePrint(1, sFktname, "Multiple Links found. Make a Choice\n") k = 0 for i in results: print("{0:3d}:\t{1}".format(k , results[k][1])) k = k + 1 print("\n") choice = int(input(" <-- ")) results = [ [ results[choice][0], results[choice][1] ] ] # create an release instance and feed it rel = release() rel.shortinfo = results[0][1] rel.infopage = results[0][0] rel = query_chemical.getReleaseInfo(rel) return rel
def computeDesc(self, opera=0, RDkitPhysico=1, pOperaDesc=""): pdesc1D2D = self.prDesc + "tableDesc1D2D" self.pdesc1D2D = pdesc1D2D if opera == 1: pdesc1D2D = pdesc1D2D + "Opera" self.pdesc1D2D = pdesc1D2D if RDkitPhysico == 0: pdesc1D2D = pdesc1D2D + "NoRDKITPhyChem" self.pdesc1D2D = pdesc1D2D prSMIclean = self.prDesc + "SMIclean/" pathFolder.createFolder(prSMIclean) self.prSMIclean = prSMIclean prDescbyCAS = self.prDesc + "DESCbyCAS/" pathFolder.createFolder(prDescbyCAS) self.prDescByCAS = prDescbyCAS if path.exists(pdesc1D2D) and path.getsize(pdesc1D2D) > 100: return pdesc1D2D else: plog = self.prDesc + "log.log" flog = open(plog, "w") print pdesc1D2D, "No found" fdesc1D2D = open(pdesc1D2D, "w") if opera == 0: ldesc = chemical.getLdesc("1D2D", RDkitPhysico) fdesc1D2D.write("CAS\t" + "\t".join(ldesc) + "\n") else: ldesc = chemical.getLdesc("1D2D", RDkitPhysico) ldesc = ldesc + chemical.getLdesc("Opera", RDkitPhysico) doperaDesc = loadAllOperaDesc(pOperaDesc) fdesc1D2D.write("CAS\t" + "\t".join(ldesc) + "\n") #print ldesc for pSMI in listdir(self.prSMI): #for pSMI in ["/home/borrela2/interference/spDataAnalysis/Desc/SMIclean/1212-72-2.smi"]: # to verify for one chem cas = pSMI.split("/")[-1].split(".")[0] #print cas psmiles = self.prSMI + cas + ".smi" if path.exists(self.prSMI + cas + ".smi"): fsmiles = open(psmiles, "r") smiles = fsmiles.readlines()[0].strip() fsmiles.close() # chemical chem = chemical.chemical(cas, smiles) chem.prepareChem(prSMIclean) chem.compute1D2DDesc(prDescbyCAS) if opera == 1: chem.loadOperaDesc(doperaDesc, flog) err = chem.writeTablesDescCAS(prDescbyCAS) # if err == 1: chem.writelog(self.prlog) #Write in the table chem.writeDesc(ldesc, fdesc1D2D) fdesc1D2D.close() flog.close()
def search_clever (tr): sFktname = "search_clever" # build search-term if tr.artist != "" and tr.title != "": term = tr.artist + "+" + tr.title term = term.replace("&","") term = term.replace("(","") term = term.replace(")","") term = term.replace(" ","+") # first try to find the track in the release # cache if len(cache.rels_found) > 0: for rel in cache.rels_found: ePrint(1, sFktname, "search in cache") # search track in release res = rel.search_track(tr) # nothing found if res == None: continue # more than one item found if len(res) > 1: ePrint(1, sFktname, "Multiple tracks found in release, make a choice") k = 0 for t in res: print("{0:3d}:\t{1}".format(k , t)) k = k + 1 print("\n") choice = int(input(" <-- ")) res = res[choice] return res return res[0] ePrint(1, sFktname, "search the web") # search on chemical query_chemical = chemical.chemical() results = query_chemical.search(term) # case if nothing was found if len(results) == 0: ePrint(1, sFktname, "nothing found on chemical") return None # case, if more then one was found if len(results) > 1: ePrint(1, sFktname, "Multiple Links found. Make a Choice\n") k = 0 for i in results: print("{0:3d}:\t{1}".format(k , results[k][1])) k = k + 1 print("\n") choice = int(input(" <-- ")) results = [ [ results[choice][0], results[choice][1] ] ] # create an release instance and feed it rel = release() rel.shortinfo = results[0][1] rel.infopage = results[0][0] rel = query_chemical.getReleaseInfo(rel) # append the release to the cache cache.rels_found.append(rel) # search track in release res = rel.search_track(tr) # nothing found if res == None: ePrint(1, sFktname, "Track not found in release page") return None # more than one item found if len(res) > 1: ePrint(1, sFktname, "Multible tracks found in release, make a choice") k = 0 for t in res: print("{0:3d}:\t{1}".format(k , t)) k = k + 1 print("\n") choice = int(input(" <-- ")) res = res[choice] return res[0]
def computeDesc(passay, PRDESC, PRSMI, prout, nbfile=1, update=0): # by pass pdescout = prout + "descMat" paff = prout + "aff.txt" if path.exists(pdescout) and update == 0 and nbfile == 1: return pdescout elif path.exists(pdescout) and update == 0 and nbfile == 2 and path.exists( paff): return [pdescout, paff] dchem = toolbox.loadMatrix(passay) lchemID = dchem.keys() try: lchemID.remove("RESULT_UNIT") except: pass shuffle(lchemID) i = 0 nbi = len(lchemID) while i < nbi: if search("error", dchem[lchemID[i]] ["SMILES"].lower()): # case of the table is computed before del dchem[lchemID[i]] del lchemID[i] nbi = nbi - 1 continue if dchem[lchemID[i]]["Active"] == "Inconclusive" or search( "Error", dchem[lchemID[i]]["SMILES"]): del dchem[lchemID[i]] del lchemID[i] nbi = nbi - 1 continue # compute descriptors cchem = chemical.chemical(lchemID[i], dchem[lchemID[i]]["SMILES"]) cchem.prepareChem(PRSMI) if search("error", cchem.log.lower()): del dchem[lchemID[i]] del lchemID[i] nbi = nbi - 1 continue cchem.compute1D2DDesc(PRDESC) if search("error", cchem.log.lower()): del dchem[lchemID[i]] del lchemID[i] nbi = nbi - 1 continue cchem.computeOpera(update=update) if search("error", cchem.log.lower()): del dchem[lchemID[i]] del lchemID[i] nbi = nbi - 1 i = i - 1 continue cchem.writeTablesDesc(PRDESC, update=update) i = i + 1 if nbfile == 1: fildesc = open(pdescout, "w") ldesc = chemical.getLdesc("1D2D", 1) + chemical.getLdesc("Opera", 0) fildesc.write("ID," + ",".join(ldesc) + ",Aff" + "\n") for chemID in lchemID: print chemID if dchem[chemID]["Active"] == "Active": aff = 1 else: aff = 0 pdesc = PRDESC + chemID + ".txt" if path.exists(pdesc): ddesc = toolbox.loadMatrix(pdesc) lval = [] for desc in ldesc: if not desc in ddesc[chemID].keys(): lval.append("NA") else: lval.append(str(ddesc[chemID][desc])) fildesc.write(chemID + "," + ",".join(lval) + "," + str(aff) + "\n") fildesc.close() return pdescout else: fildesc = open(pdescout, "w") paff = prout + "aff.txt" filaff = open(paff, "w") ldesc = chemical.getLdesc("1D2D", 1) + chemical.getLdesc("Opera", 0) fildesc.write("ID," + ",".join(ldesc) + "\n") filaff.write("ID\tAff\n") for chemID in lchemID: print chemID if dchem[chemID]["Active"] == "Active": aff = 1 else: aff = 0 pdesc = PRDESC + chemID + ".txt" if path.exists(pdesc): ddesc = toolbox.loadMatrix(pdesc) lval = [] for desc in ldesc: if not desc in ddesc[chemID].keys(): lval.append("NA") else: lval.append(str(ddesc[chemID][desc])) fildesc.write(chemID + "," + ",".join(lval) + "," + str(aff) + "\n") filaff.write(chemID + "\t" + str(aff) + "\n") fildesc.close() filaff.close() return [pdescout, paff]
def predictSMI(self, nameChemical, smiles, plot=0, verbose=0): dpred = {} prresult = pathFolder.createFolder(self.prout + nameChemical + "/") ppred = prresult + "pred" if path.exists(ppred): dpred = toolbox.loadMatrix(ppred) return dpred chem = chemical.chemical(nameChemical, smiles) chem.prepareChem(prresult) chem.compute1D2DDesc(prresult) chem.writeTablesDescCAS(prresult) chem.computeFP(typeFP="All") for channel in self.dcluster: for cell in self.dcluster[channel].keys(): kpred = str(cell) + "_" + str(channel) dpred[kpred] = {} for typeDesc in self.dcluster[channel][cell].keys(): if verbose == 1: print channel, cell, typeDesc print self.dcluster[channel][cell].keys() if search("Desc", typeDesc): distMeth = typeDesc.split("-")[1] aggMeth = typeDesc.split("-")[2] enrichment = runExternalSoft.findCluster( self.cDB.pdesc1D2Dclean, chem.pdesc, self.dcluster[channel][cell][typeDesc]["files"][0], self.dcluster[channel][cell][typeDesc]["files"][1], distMeth, aggMeth) else: # generate FP typeFP = typeDesc.split("-")[0] metric = typeDesc.split("-")[-1].split("_")[0] metricAgg = typeDesc.split("-")[-1] if verbose == 1: print typeFP, metric dFP = {} for CASID in self.cDB.dFP.keys(): if verbose == 1: print self.cDB.dFP[CASID] print chem.FP[typeFP] print metric dFP[CASID] = float( toolbox.computeSimilarityFP( self.cDB.dFP[CASID][typeFP], chem.FP[typeFP], metric)) maxSim = max(dFP.values()) i = 0 imax = len(dFP.keys()) lCAS = dFP.keys() while i < imax: if float(dFP[lCAS[i]] == maxSim): CASclose = lCAS[i] i += 1 if verbose == 1: print CASclose print channel, cell print self.ChemClust[CASclose][channel][cell] clusterfound = self.ChemClust[CASclose][channel][cell][ str(typeFP) + "-" + str(metricAgg)] enrichment = self.dcluster[channel][cell][typeDesc][ clusterfound]['Enrichment'] dpred[kpred][typeDesc] = enrichment if plot == 1: self.writeResultBySMI(dpred, prresult) return dpred