def testSubStructureSearchScreened(self): oeioU = OeIoUtils() oemp = OeMoleculeProvider(**self.__myKwargs) ok = oemp.testCache() ccmP = ChemCompIndexProvider(**self.__myKwargs) ccIdxD = ccmP.getIndex() ok = ccmP.testCache(minCount=self.__minCount) self.assertTrue(ok) oesU = OeSearchUtils(oemp, screenType=self.__screenType, numProc=self.__numProc) numMols = 20 missL = [] for ccId, ccD in list(ccIdxD.items())[:numMols]: # ---- startTime = time.time() if "oe-smiles" not in ccD: continue logger.info("Search %s %r", ccId, ccD["oe-smiles"]) oeQMol = oeioU.smartsToQmol(ccD["oe-smiles"]) retStatus, mL = oesU.searchSubStructureScreened(oeQMol, maxMatches=100) if retStatus: logger.info("%s (status=%r) match length %d in (%.4f seconds)", ccId, retStatus, len(mL), time.time() - startTime) if not self.__resultContains(ccId, mL): missL.append(ccId) # # self.assertGreaterEqual(len(mL), 1) # ---- logger.info("Missed searches (%d) %r", len(missL), missL)
def __subStructureSearchScreened(self, numMols, **kwargs): # buildTypeList = kwargs.get("buildTypeList", ["oe-iso-smiles"]) screenTypeList = kwargs.get("screenTypeList", ["SMARTS"]) oesmP, ccIdxD = self.__getSearchDataProviders(**kwargs) for screenType in screenTypeList: oesU = OeSearchUtils(oesmP, screenType=screenType, numProc=self.__numProc) oeioU = OeIoUtils() # missL = [] numMols = min(len(ccIdxD), numMols) if numMols else len(ccIdxD) for ( ii, ccId, ) in enumerate(list(ccIdxD.keys())[:numMols]): ccD = ccIdxD[ccId] for buildType in buildTypeList: if buildType in ccD: if screenType == "SMARTS": smiles = oeioU.descriptorToSmiles(ccD[buildType], buildType, messageTag=ccId + ":" + buildType) oeQMol = oeioU.descriptorToMol(smiles, "SMARTS", messageTag=ccId + ":" + buildType) else: oeQMol = oeioU.descriptorToQMol(ccD[buildType], "SMARTS", messageTag=ccId + ":" + buildType) if not oeQMol: logger.debug("%s build failed for %s - skipping", ccId, buildType) continue # ---- startTime = time.time() retStatus, mL = oesU.searchSubStructureScreened( oeQMol, maxMatches=100) if retStatus: logger.debug( "%s - %s - %s (status=%r) match length %d in (%.4f seconds)", ccId, buildType, screenType, retStatus, len(mL), time.time() - startTime) if not self.__resultContains(ccId, mL): missL.append((ccId, buildType, screenType)) # ---- if ii % 100 == 0: logger.info("Completed %d of %d missed count %d", ii, numMols, len(missL)) logger.info("Screen %r missed searches (%d) %r", screenType, len(missL), missL) return True
def testSubStructureSearchScreenedFiltered(self): myKwargs = { "cachePath": self.__cachePath, "useCache": True, "fpTypeList": self.__fpTypeList, "ccFileNamePrefix": "cc-filtered", "oeFileNamePrefix": "oe-filtered", "molBuildType": "oe-iso-smiles", "limitPerceptions": False, } oeioU = OeIoUtils() oemp = OeMoleculeProvider(**myKwargs) ok = oemp.testCache() ccmP = ChemCompIndexProvider(**myKwargs) ccIdxD = ccmP.getIndex() ok = ccmP.testCache(minCount=self.__minCount) self.assertTrue(ok) oesU = OeSearchUtils(oemp, screenType=self.__screenType, numProc=self.__numProc) numMols = 5000 missL = [] for ccId, ccD in list(ccIdxD.items())[:numMols]: # ---- startTime = time.time() if "oe-smiles" not in ccD: continue logger.info("Search %s %r", ccId, ccD["oe-smiles"]) oeQMol = oeioU.smartsToQmol(ccD["oe-smiles"]) retStatus, mL = oesU.searchSubStructureScreened(oeQMol, maxMatches=100) logger.info("%s (status=%r)match length %d in (%.4f seconds)", ccId, retStatus, len(mL), time.time() - startTime) if not self.__resultContains(ccId, mL): missL.append(ccId) # self.assertGreaterEqual(len(mL), 1) # ---- logger.info("Missed searches (%d) %r", len(missL), missL)