def __fingerPrintScores(self, numMols, **kwargs): maxFpResults = kwargs.get("maxResults", 50) limitPerceptions = kwargs.get("limitPerceptions", False) fpTypeCuttoffList = kwargs.get("fpTypeCuttoffList", [("TREE", 0.6)]) buildTypeList = kwargs.get("buildTypeList", ["oe-iso-smiles"]) doDisplay = kwargs.get("doDisplay", False) # oesmP, ccIdxD = self.__getSearchDataProviders(**kwargs) oesU = OeSearchUtils(oesmP, fpTypeList=[tup[0] for tup in fpTypeCuttoffList]) oeioU = OeIoUtils() # This will reload the oe binary cache. oeMol = oesmP.getMol("004") self.assertGreaterEqual(len(list(oeMol.GetAtoms())), 12) # missedFpD = {} missedBuildD = {} numMols = min(len(ccIdxD), numMols) if numMols else len(ccIdxD) logger.info("Begin finger print score search on %d molecules", numMols) # ---- startTime = time.time() for ( ii, ccId, ) in enumerate(list(ccIdxD.keys())[:numMols]): ccD = ccIdxD[ccId] for buildType in buildTypeList: if buildType in ccD: oeMol = oeioU.descriptorToMol( ccD[buildType], buildType, limitPerceptions=limitPerceptions, messageTag=ccId + ":" + buildType) if not oeMol: logger.debug("%s build failed for %s - skipping", ccId, buildType) continue maxHits = 0 minHits = maxFpResults selfHit = False # startTime1 = time.time() for fpType, minFpScore in fpTypeCuttoffList: retStatus, mL = oesU.getFingerPrintScores( oeMol, fpType, minFpScore, maxFpResults) self.assertTrue(retStatus) logger.debug("%s fpType %r hits %d", ccId, fpType, len(mL)) maxHits = max(maxHits, len(mL)) minHits = min(minHits, len(mL)) matchedSelf = self.__resultContains(ccId, mL) selfHit = selfHit or matchedSelf if not matchedSelf: missedFpD.setdefault(ccId, []).append( (buildType, fpType, len(mL))) # if not selfHit: missedBuildD.setdefault(ccId, []).append(buildType) # if maxHits < 1 or not selfHit: logger.info( "%s buildType %r min hits %d max hits %d (%.4f seconds)", ccId, buildType, minHits, maxHits, time.time() - startTime1) else: logger.debug("%s missing descriptor %r", ccId, buildType) if ii % 100 == 0: logger.info("Completed %d of %d missed count %d", ii, numMols, len(missedBuildD)) # ------ for ccId, bTL in missedBuildD.items(): logger.info("%s missed all fptypes: buildtype list %r", ccId, bTL) if ccId in missedFpD: logger.info("%s unmatched by fpTypes %r", ccId, missedFpD[ccId]) # if doDisplay: for ccId, bTL in missedBuildD.items(): idxD = ccIdxD[ccId] if "oe-iso-smiles" in idxD: for bT in bTL: self.__displayAlignedDescriptorPair( ccId, idxD["oe-iso-smiles"], "oe-iso-smiles", idxD[bT], bT, title=None, limitPerceptions=True) logger.info("%s fingerprints search on %d in (%.4f seconds)", len(fpTypeCuttoffList), numMols, time.time() - startTime) return True
def testFingerPrintScores(self): oemp = OeMoleculeProvider(**self.__myKwargs) # ok = oemp.testCache() ccmP = ChemCompIndexProvider(**self.__myKwargs) ccIdxD = ccmP.getIndex() ok = ccmP.testCache(minCount=self.__minCount) self.assertTrue(ok) limitPerceptions = False maxResults = 100 numMols = 20 oeioU = OeIoUtils() oesU = OeSearchUtils(oemp, fpTypeList=self.__fpTypeList) missedFpD = {} missedBuildD = {} # ---- startTime = time.time() for ccId, ccD in list(ccIdxD.items())[:numMols]: for buildType in [ "oe-iso-smiles", "oe-smiles", "acdlabs-smiles", "cactvs-iso-smiles", "cactvs-smiles", "inchi" ]: if buildType in ccD: logger.debug("Search %s %r", ccId, ccD[buildType]) if buildType in ["inchi"]: # oeMol = oeioU.inchiToMol(ccD[buildType], limitPerceptions=limitPerceptions) # oemf = OeMoleculeFactory() oemf.setDescriptor(ccD["inchi"], "inchi", ccId) ok = oemf.build(molBuildType="inchi", limitPerceptions=limitPerceptions) if not ok: logger.info("%s build failed with InChI %r", ccId, ccD["inchi"]) else: oeMol = oemf.getMol() if oemf.getInChI() != ccD["inchi"]: logger.info( "%s regenerated InChI differs\n%r\n%s", ccId, ccD["inchi"], oemf.getInChI()) # else: oeMol = oeioU.smilesToMol( ccD[buildType], limitPerceptions=limitPerceptions) if not oeMol: continue maxHits = 0 minHits = maxResults selfHit = False # for fpType, minFpScore in self.__fpTypeCuttoffList: retStatus, mL = oesU.getFingerPrintScores( oeMol, fpType, minFpScore, maxResults) self.assertTrue(retStatus) logger.info("%s fpType %r hits %d", ccId, fpType, len(mL)) maxHits = max(maxHits, len(mL)) minHits = min(minHits, len(mL)) matchedSelf = self.__resultContains(ccId, mL) selfHit = selfHit or matchedSelf if not matchedSelf: missedFpD.setdefault(ccId, []).append( (buildType, fpType, len(mL))) # if not selfHit: missedBuildD.setdefault(ccId, []).append(buildType) logger.info("%s buildType %r min hits %d max hits %d", ccId, buildType, minHits, maxHits) else: logger.info("%s missing descriptor %r", ccId, buildType) # for ccId, bTL in missedBuildD.items(): logger.info("%s missed build type list %r", ccId, bTL) if ccId in missedFpD: logger.info("%s unmatched fpTypes %r", ccId, missedFpD[ccId]) # doDepict = False if doDepict: for ccId, bTL in missedBuildD.items(): idxD = ccIdxD[ccId] if "oe-iso-smiles" in idxD: for bT in bTL: self.__displayAlignedDescriptorPair( ccId, idxD["oe-iso-smiles"], "oe-iso-smiles", idxD[bT], bT, title=None, limitPerceptions=True) logger.info("%s fingerprints search on %d in (%.4f seconds)", len(self.__fpTypeList), numMols, time.time() - startTime)