def testSubStructureSearchBase(self): matchOpts = self.__myKwargs.get("matchOpts", "sub-struct-graph-relaxed") numProc = self.__numProcSearch oemp = OeMoleculeProvider(**self.__myKwargs) ok = oemp.testCache() self.assertTrue(ok) oesU = OeSubStructSearchUtils(oemp) # ccIdxP = ChemCompIndexProvider(**self.__myKwargs) ok = ccIdxP.testCache(minCount=self.__minCount) self.assertTrue(ok) ccIdxD = ccIdxP.getIndex() # ky = next(iter(ccIdxD)) oeMol = oemp.getMol(ky) # numMols = 10 for ccId, _ in list(ccIdxD.items())[:numMols]: # ---- startTime = time.time() oeMol = oemp.getMol(ccId) ccIdL = oesU.prefilterIndex(oeMol, ccIdxP, matchOpts=matchOpts) logger.info("%s search length %d in (%.4f seconds)", ccId, len(ccIdL), time.time() - startTime) # retStatus, mL = oesU.searchSubStructure(oeMol, ccIdList=ccIdL, matchOpts=matchOpts, numProc=numProc) logger.info("%s result length %d in (%.4f seconds)", ccId, len(mL), time.time() - startTime) self.assertTrue(retStatus) self.assertTrue(self.__resultContains(ccId, mL))
def testFingerPrintSearch(self): oemp = OeMoleculeProvider(**self.__myKwargs) # This will reload the oe binary cache. oeMol = oemp.getMol("004") self.assertGreaterEqual(len(list(oeMol.GetAtoms())), 12) # ok = oemp.testCache() ccmP = ChemCompIndexProvider(**self.__myKwargs) ccIdxD = ccmP.getIndex() ok = ccmP.testCache(minCount=self.__minCount) self.assertTrue(ok) minScore = 0.50 maxResults = 50 numMols = 50 oesU = OeSearchUtils(oemp, fpTypeList=self.__fpTypeList) # ---- startTime = time.time() for ccId, _ in list(ccIdxD.items())[:numMols]: for fpType in self.__fpTypeList: oeMol = oemp.getMol(ccId) retStatus, mL = oesU.searchFingerPrints( oeMol, fpType=fpType, minFpScore=minScore, maxFpResults=maxResults) self.assertTrue(retStatus) self.assertTrue(self.__resultContains(ccId, mL)) # self.assertGreaterEqual(len(mL), 1) logger.info("%s fingerprints search on %d in (%.4f seconds)", len(self.__fpTypeList), numMols, time.time() - startTime)
def testSubStructureSearchWithFingerPrint(self): oemp = OeMoleculeProvider(**self.__myKwargs) # ok = oemp.testCache() ccmP = ChemCompIndexProvider(**self.__myKwargs) ccIdxD = ccmP.getIndex() ok = ccmP.testCache(minCount=self.__minCount) self.assertTrue(ok) minFpScore = 0.40 maxFpResults = 50 numMols = 20 matchOpts = "graph-relaxed" oesU = OeSearchUtils(oemp, fpTypeList=self.__fpTypeList) # ---- startTime = time.time() for ccId, _ in list(ccIdxD.items())[:numMols]: for fpType in self.__fpTypeList: oeMol = oemp.getMol(ccId) retStatus, mL = oesU.searchSubStructureWithFingerPrint( oeMol, fpType, minFpScore, maxFpResults, matchOpts=matchOpts) self.assertTrue(retStatus) self.assertTrue(self.__resultContains(ccId, mL)) logger.info("%s fingerprints search on %d in (%.4f seconds)", len(self.__fpTypeList), numMols, time.time() - startTime)
def testSubStructureSearch(self): oemp = OeMoleculeProvider(**self.__myKwargs) ok = oemp.testCache() ccmP = ChemCompIndexProvider(**self.__myKwargs) ccIdxD = ccmP.getIndex() ok = ccmP.testCache(minCount=self.__minCount) self.assertTrue(ok) oesU = OeSearchUtils(oemp, fpTypeList=self.__fpTypeList) numMols = 10 for ccId, _ in list(ccIdxD.items())[:numMols]: # ---- startTime = time.time() oeMol = oemp.getMol(ccId) retStatus, mL = oesU.searchSubStructure(oeMol, matchOpts="relaxed") logger.info("%s match length %d in (%.4f seconds)", ccId, len(mL), time.time() - startTime) self.assertTrue(retStatus) self.assertTrue(self.__resultContains(ccId, mL))
def __testBuildMoleculeCacheFiles(self, **kwargs): """Test build OE cache files from full component dictionary""" ccUrlTarget = kwargs.get("ccUrlTarget", None) birdUrlTarget = kwargs.get("birdUrlTarget", None) molLimit = kwargs.get("molLimit", 0) quietFlag = kwargs.get("quietFlag", True) molBuildType = kwargs.get("molBuildType", "ideal-xyz") fpTypeList = kwargs.get("fpTypeList", ["TREE"]) screenTypeList = kwargs.get("screenTypeList", []) ccFileNamePrefix = kwargs.get("ccFileNamePrefix", "cc") oeFileNamePrefix = kwargs.get("oeFileNamePrefix", "oe") # startTime = time.time() oemp = OeMoleculeProvider( ccUrlTarget=ccUrlTarget, birdUrlTarget=birdUrlTarget, cachePath=self.__cachePath, ccFileNamePrefix=ccFileNamePrefix, oeFileNamePrefix=oeFileNamePrefix, molBuildType=molBuildType, useCache=False, quietFlag=quietFlag, fpTypeList=fpTypeList, screenTypeList=screenTypeList, ) ok = oemp.testCache() self.assertTrue(ok) oemp = OeMoleculeProvider( ccUrlTarget=ccUrlTarget, birdUrlTarget=birdUrlTarget, cachePath=self.__cachePath, ccFileNamePrefix=ccFileNamePrefix, oeFileNamePrefix=oeFileNamePrefix, molBuildType=molBuildType, useCache=True, ) endTime = time.time() logger.info(">> Completed load molBuildType %r molLimit %r (%.4f seconds)", molBuildType, molLimit, endTime - startTime) # # --- deltaMol = 2 minMol = minNumFp = molLimit - deltaMol if molLimit else 30000 for fpType in fpTypeList: fpDb = oemp.getFingerPrintDb(fpType="TREE") logger.debug("fpType %r length %d", fpType, fpDb.NumFingerPrints()) self.assertGreaterEqual(fpDb.NumFingerPrints(), minNumFp) # ccId = "004" oeMol = oemp.getMol(ccId) logger.debug("%s atom count %d", ccId, len(list(oeMol.GetAtoms()))) # if molBuildType in ["oe-iso-smiles"]: self.assertGreaterEqual(len(list(oeMol.GetAtoms())), 12) else: self.assertGreaterEqual(len(list(oeMol.GetAtoms())), 20) # oeDb, oeDbIdx = oemp.getOeMolDatabase() logger.debug("Type db %r length %d type idx %r length %d", type(oeDb), oeDb.NumMols(), type(oeDbIdx), len(oeDbIdx)) self.assertGreaterEqual(oeDb.NumMols(), minMol) self.assertGreaterEqual(len(oeDbIdx), minMol) # if molBuildType in ["oe-iso-smiles"] and screenTypeList: ssDb = oemp.getSubSearchDb() self.assertGreaterEqual(ssDb.NumMolecules(), minMol) return True