def getSubSearchDb(self,
                    screenType="SMARTS",
                    numProc=1,
                    forceRefresh=False):
     if not self.__ssDb or forceRefresh:
         oeIo = OeIoUtils()
         fp = os.path.join(self.__dirPath,
                           self.__getSubSearchFileName(screenType))
         logger.info("Opening screened substructure search database %r", fp)
         self.__ssDb = oeIo.loadOeSubSearchDatabase(fp,
                                                    screenType,
                                                    numProc=numProc)
     return self.__ssDb
 def __exhaustiveSubStructureSearch(self, numMols, **kwargs):
     """Exhaustive substructure search."""
     try:
         limitPerceptions = kwargs.get("limitPerceptions", False)
         buildTypeList = kwargs.get("buildTypeList", ["oe-iso-smiles"])
         oesmP, ccIdxD = self.__getSearchDataProviders(**kwargs)
         oesU = OeSearchUtils(oesmP, fpTypeList=[])
         oeioU = OeIoUtils()
         #
         for ccId, ccD in list(ccIdxD.items())[:numMols]:
             matchCount = 0
             mtS = set()
             for buildType in buildTypeList:
                 if buildType in ccD:
                     oeMol = oeioU.descriptorToMol(
                         ccD[buildType],
                         buildType,
                         limitPerceptions=limitPerceptions,
                         messageTag=ccId + ":" + buildType)
                     if not oeMol:
                         logger.error(
                             "%s %s build query molecule build fails (skipping)",
                             ccId, buildType)
                         continue
                     # ----
                     startTime = time.time()
                     retStatus, mL = oesU.searchSubStructure(
                         oeMol, matchOpts="graph-strict")
                     if not retStatus:
                         logger.info("%s match fails for build type %s",
                                     ccId, buildType)
                     elif not self.__resultContains(ccId, mL):
                         logger.info(
                             "%s failed match length %d build type %s in (%.4f seconds)",
                             ccId, len(mL), buildType,
                             time.time() - startTime)
                     elif self.__resultContains(ccId, mL):
                         mtS.update([m.ccId for m in mL])
                         matchCount += 1
                     self.assertTrue(retStatus)
                     self.assertTrue(self.__resultContains(ccId, mL))
             if matchCount:
                 logger.info("%s MATCHES %d: %r", ccId, matchCount, mtS)
             else:
                 logger.info("%s NO MATCHES", ccId)
             # ----
         return True
     except Exception as e:
         logger.exception("Failing with %s", str(e))
         self.fail()
     return False
示例#3
0
    def makeFiles(self, fmt="sdf"):
        """ Create files (mol, mol2) for all public chemical components.
        """
        try:

            if fmt not in ["mol", "mol2", "mol2h", "sdf"]:
                return False
            if not self.__setLicense(self.__licensePath):
                logger.error("Invalid license details - exiting")
                return False
            for ccId, oeMol in self.__oeMolD.items():
                if self.__molBuildType == "ideal-xyz":
                    filePath = os.path.join(self.__fileDirPath, fmt, ccId[0], ccId + "_ideal." + fmt)
                    oeioU = OeIoUtils()
                    oeioU.write(filePath, oeMol, constantMol=True)
                else:
                    filePath = os.path.join(self.__fileDirPath, fmt, ccId[0], ccId + "_model." + fmt)
                    oeioU = OeIoUtils()
                    oeioU.write(filePath, oeMol, constantMol=True)

            return True
        except Exception as e:
            logger.exception("Failing with %s", str(e))
        return False
 def getFingerPrintDb(self, fpType, fpDbType="STANDARD", rebuild=False):
     if fpType not in self.__fpDbD or rebuild:
         oeIo = OeIoUtils()
         fastFpDbPath = os.path.join(self.__dirPath,
                                     self.__getFastFpDbFileName(fpType))
         oeMolDbFilePath = os.path.join(self.__dirPath,
                                        self.__getOeMolDbFileName())
         fpDb = oeIo.loadOeFingerPrintDatabase(oeMolDbFilePath,
                                               fastFpDbPath,
                                               inMemory=True,
                                               fpType=fpType,
                                               fpDbType=fpDbType)
         if fpDb:
             self.__fpDbD[fpType] = fpDb
     #
     return self.__fpDbD[fpType]
 def getOeMolD(self):
     try:
         if not self.__oeMolD:
             startTime = time.time()
             oeIo = OeIoUtils()
             self.__oeMolD = oeIo.readOeBinaryMolCache(
                 os.path.join(self.__dirPath,
                              self.__getOeSearchMolFileName()))
             logger.info(
                 "Loading OE binary molecule cache length %d (%.4f seconds)",
                 len(self.__oeMolD),
                 time.time() - startTime)
         return self.__oeMolD
     except Exception as e:
         logger.exception("Failing with %s", str(e))
     return None
示例#6
0
    def __toMolFile(self, oeMol, molfilePath, **kwargs):
        """Write the

        Args:
            oeMol (object): instance of an OE graph molecule
            molfilePath (string): file path for molfile (type determined by extension)

        Returns:
            bool: True for success or False otherwise
        """
        try:
            _ = kwargs
            oeio = OeIoUtils()
            oeio.write(molfilePath, oeMol, constantMol=True)
            return True
        except Exception as e:
            logger.exception("Failing with %s", str(e))
        return False
 def __getMol(self,
              query,
              queryType,
              queryId,
              limitPerceptions=False,
              suppressHydrogens=True):
     oeioU = OeIoUtils()
     if queryType == "CC":
         oeMol = self.__oesmP.getMol(query)
     else:
         oeMol = oeioU.descriptorToMol(query,
                                       queryType,
                                       limitPerceptions=limitPerceptions,
                                       messageTag=queryId)
     #
     if suppressHydrogens:
         oeMol = oeioU.suppressHydrogens(oeMol)
     oeMol.SetTitle(queryId)
     return oeMol
示例#8
0
    def testDepictSMILES(self):
        """Test case -  create depiction from SMILES descriptor."""
        try:
            imagePath = os.path.join(self.__workPath, "benzene-from-smi.svg")
            oeio = OeIoUtils()
            oeMol = oeio.smilesToMol("c1ccccc1")

            oed = OeDepict()
            oed.setMolTitleList([("benzene", oeMol, "Title for benzene")])
            oed.setDisplayOptions(labelAtomName=False,
                                  labelAtomCIPStereo=True,
                                  labelBondCIPStereo=True,
                                  labelAtomIndex=False,
                                  labelBondIndex=False,
                                  bondDisplayWidth=1.0)
            oed.setGridOptions(rows=1, cols=1)
            oed.prepare()
            oed.write(imagePath)
        except Exception as e:
            logger.exception("Failing with %s", str(e))
            self.fail()
示例#9
0
    def testSubStructureSearchScreenedFiltered(self):
        myKwargs = {
            "cachePath": self.__cachePath,
            "useCache": True,
            "fpTypeList": self.__fpTypeList,
            "ccFileNamePrefix": "cc-filtered",
            "oeFileNamePrefix": "oe-filtered",
            "molBuildType": "oe-iso-smiles",
            "limitPerceptions": False,
        }
        oeioU = OeIoUtils()
        oemp = OeMoleculeProvider(**myKwargs)
        ok = oemp.testCache()
        ccmP = ChemCompIndexProvider(**myKwargs)
        ccIdxD = ccmP.getIndex()
        ok = ccmP.testCache(minCount=self.__minCount)
        self.assertTrue(ok)
        oesU = OeSearchUtils(oemp,
                             screenType=self.__screenType,
                             numProc=self.__numProc)
        numMols = 5000
        missL = []
        for ccId, ccD in list(ccIdxD.items())[:numMols]:
            # ----
            startTime = time.time()
            if "oe-smiles" not in ccD:
                continue
            logger.info("Search %s %r", ccId, ccD["oe-smiles"])
            oeQMol = oeioU.smartsToQmol(ccD["oe-smiles"])
            retStatus, mL = oesU.searchSubStructureScreened(oeQMol,
                                                            maxMatches=100)
            logger.info("%s (status=%r)match length %d in (%.4f seconds)",
                        ccId, retStatus, len(mL),
                        time.time() - startTime)
            if not self.__resultContains(ccId, mL):
                missL.append(ccId)

            # self.assertGreaterEqual(len(mL), 1)
            # ----
        logger.info("Missed searches (%d) %r", len(missL), missL)
示例#10
0
 def testIoOps(self):
     """Test IO operation on generated related molecules"""
     try:
         oeIoU = OeIoUtils()
         mU = MarshalUtil()
         mU.mkdir(self.__molfileDirPath)
         ccMolD = self.__getChemCompDefs()
         oemf = OeMoleculeFactory()
         for ccId, ccObj in list(ccMolD.items())[:10]:
             # ----
             tId = oemf.setChemCompDef(ccObj)
             self.assertEqual(tId, ccId)
             relatedIdxD = oemf.buildRelated(limitPerceptions=False)
             logger.info("%s generated %d molecular forms", ccId,
                         len(relatedIdxD))
             for sId, idxD in relatedIdxD.items():
                 logger.info("sId %r smiles %r", sId, idxD["smiles"])
                 mol2Path = os.path.join(self.__molfileDirPath,
                                         sId + ".mol2")
                 oeMol = oeIoU.descriptorToMol(idxD["smiles"],
                                               "oe-iso-smiles",
                                               limitPerceptions=False,
                                               messageTag=None)
                 oeIoU.write(mol2Path,
                             oeMol,
                             constantMol=True,
                             addSdTags=True)
                 sdfPath = os.path.join(self.__molfileDirPath, sId + ".mol")
                 oeMol = oeIoU.descriptorToMol(idxD["smiles"],
                                               "oe-iso-smiles",
                                               limitPerceptions=False,
                                               messageTag=None)
                 oeIoU.write(sdfPath,
                             oeMol,
                             constantMol=True,
                             addSdTags=True)
             # ----
     except Exception as e:
         logger.exception("Failing with %s", str(e))
         self.fail()
示例#11
0
    def alignMoleculePair(self,
                          refIdentifier,
                          refIdentifierType,
                          fitIdentifier,
                          fitIdentifierType,
                          imagePath=None,
                          **kwargs):
        """Create aligned depiction for a target molecule InChI, SMILES descriptors or PDB identifier."""
        try:
            imagePath = imagePath if imagePath else self.__makeImagePath()
            oeio = OeIoUtils()
            ccsw = ChemCompSearchWrapper()
            oesmP = ccsw.getSearchMoleculeProvider()
            # ---
            if refIdentifierType.lower() in ["smiles"]:
                oeMolRef = oeio.smilesToMol(refIdentifier)
            elif refIdentifierType.lower() in ["inchi"]:
                oeMolRef = oeio.inchiToMol(refIdentifier)
            elif refIdentifierType.lower() in ["identifierpdb"]:
                oeMolRef = oesmP.getMol(refIdentifier)
            #
            if fitIdentifierType.lower() in ["smiles"]:
                oeMolFit = oeio.smilesToMol(fitIdentifier)
            elif fitIdentifierType.lower() in ["inchi"]:
                oeMolFit = oeio.inchiToMol(fitIdentifier)
            elif fitIdentifierType.lower() in ["identifierpdb"]:
                oeMolFit = oesmP.getMol(fitIdentifier)
            # ---
            logger.info("oeMolRef atoms %r", oeMolRef.NumAtoms())
            logger.info("oeMolFit atoms %r", oeMolFit.NumAtoms())

            displayIdRef = "Ref"
            displayIdFit = "Fit"
            ok = self.__depictAlignedPair(oeMolRef, displayIdRef, oeMolFit,
                                          displayIdFit, imagePath, **kwargs)
            return imagePath if ok else None
        except Exception as e:
            logger.exception("Failing with %s", str(e))
        return None
示例#12
0
 def testDepictOneSDF(self):
     """Test case -  get, read, build OE molecule from SDF file, and depict the molecule."""
     try:
         imagePath = os.path.join(self.__workPath, "benzene-from-smi.svg")
         sdfPath = os.path.join(self.__dataPath, "ATP.sdf")
         oeio = OeIoUtils()
         oeMolL = oeio.fileToMols(sdfPath)
         #
         oed = OeDepict()
         oed.setMolTitleList([("ATP", oeMolL[0], "Title for ATP")])
         oed.setDisplayOptions(labelAtomName=True,
                               labelAtomCIPStereo=True,
                               labelBondCIPStereo=True,
                               labelAtomIndex=False,
                               labelBondIndex=False,
                               bondDisplayWidth=0.5)
         oed.setGridOptions(rows=1, cols=1)
         oed.prepare()
         oed.write(imagePath)
     except Exception as e:
         logger.exception("Failing with %s", str(e))
         self.fail()
示例#13
0
 def depictMolecule(self,
                    identifier,
                    identifierType,
                    imagePath=None,
                    **kwargs):
     """Create depiction from InChI, SMILES descriptors or PDB identifier."""
     try:
         imagePath = imagePath if imagePath else self.__makeImagePath()
         oeio = OeIoUtils()
         if identifierType.lower() in ["smiles"]:
             oeMol = oeio.smilesToMol(identifier)
         elif identifierType.lower() in ["inchi"]:
             oeMol = oeio.inchiToMol(identifier)
         elif identifierType.lower() in ["identifierpdb"]:
             ccsw = ChemCompSearchWrapper()
             oesmP = ccsw.getSearchMoleculeProvider()
             oeMol = oesmP.getMol(identifier)
         #
         ok = self.__depictOne(oeMol, imagePath, **kwargs)
         return imagePath if ok else None
     except Exception as e:
         logger.exception("Failing with %s", str(e))
     return None
 def __displayAlignedDescriptorPair(self,
                                    ccId,
                                    descrRef,
                                    buildTypeRef,
                                    descrFit,
                                    buildTypeFit,
                                    title=None,
                                    limitPerceptions=True):
     oeioU = OeIoUtils()
     oeMolRef = oeioU.descriptorToMol(descrRef,
                                      buildTypeRef,
                                      limitPerceptions=limitPerceptions,
                                      messageTag=ccId + ":" + buildTypeRef)
     oeMolFit = oeioU.descriptorToMol(descrFit,
                                      buildTypeFit,
                                      limitPerceptions=limitPerceptions,
                                      messageTag=ccId + ":" + buildTypeFit)
     #
     oed = OeDepictMCSAlignPage()
     oed.setSearchType(sType="graph-relaxed", minAtomMatchFraction=0.50)
     oed.setDisplayOptions(labelAtomName=True,
                           labelAtomCIPStereo=True,
                           labelAtomIndex=False,
                           labelBondIndex=False,
                           highlightStyleFit="ballAndStickInverse",
                           bondDisplayWidth=0.5)
     oed.setRefMol(oeMolRef, ccId)
     oed.setFitMol(oeMolFit, ccId)
     myTitle = title if title else buildTypeRef + "-" + buildTypeFit
     imgPath = os.path.join(self.__workPath, myTitle + "-" + ccId + ".svg")
     logger.info("Using image path %r", imgPath)
     aML = oed.alignPair(imagePath=imgPath)
     if aML:
         logger.info("%s aligned image path %r", ccId, imgPath)
         for (rCC, rAt, tCC, tAt) in aML:
             logger.debug("%5s %-5s %5s %-5s", rCC, rAt, tCC, tAt)
 def __exhaustiveSubStructureSearch(self, numMols, **kwargs):
     """Exhaustive substructure search."""
     try:
         limitPerceptions = kwargs.get("limitPerceptions", False)
         buildTypeList = kwargs.get("buildTypeList", ["oe-iso-smiles"])
         oesmP, ccIdxD = self.__getSearchDataProviders(**kwargs)
         oesU = OeSearchUtils(oesmP, fpTypeList=[])
         oeioU = OeIoUtils()
         #
         for ccId, ccD in list(ccIdxD.items())[:numMols]:
             for buildType in buildTypeList:
                 if buildType in ccD:
                     oeMol = oeioU.descriptorToMol(
                         ccD[buildType],
                         buildType,
                         limitPerceptions=limitPerceptions,
                         messageTag=ccId + ":" + buildType)
                     if not oeMol:
                         continue
                     # ----
                     startTime = time.time()
                     retStatus, mL = oesU.searchSubStructure(
                         oeMol, matchOpts="graph-strict")
                     if not self.__resultContains(ccId, mL):
                         logger.info(
                             "%s match length %d build type %s in (%.4f seconds)",
                             ccId, len(mL), buildType,
                             time.time() - startTime)
                     self.assertTrue(retStatus)
                     self.assertTrue(self.__resultContains(ccId, mL))
             # ----
         return True
     except Exception as e:
         logger.exception("Failing with %s", str(e))
         self.fail()
     return False
    def __fingerPrintScores(self, numMols, **kwargs):
        maxFpResults = kwargs.get("maxResults", 50)
        limitPerceptions = kwargs.get("limitPerceptions", True)
        fpTypeCuttoffList = kwargs.get("fpTypeCuttoffList", [("TREE", 0.6)])
        buildTypeList = kwargs.get("buildTypeList", ["oe-iso-smiles"])
        doDisplay = kwargs.get("doDisplay", False)
        failedIdList = kwargs.get("failedIdList", [])
        #
        oesmP, ccIdxD = self.__getSearchDataProviders(**kwargs)
        oesU = OeSearchUtils(oesmP,
                             fpTypeList=[tup[0] for tup in fpTypeCuttoffList])
        oeioU = OeIoUtils()
        # This will reload the oe binary cache.
        oeMol = oesmP.getMol("004")
        self.assertGreaterEqual(len(list(oeMol.GetAtoms())), 12)
        #
        missedFpD = {}
        missedBuildD = {}
        numMols = min(len(ccIdxD), numMols) if numMols else len(ccIdxD)
        logger.info("Begin finger print score search on %d molecules", numMols)
        # ----
        startTime = time.time()
        # for ccId, ccD in list(ccIdxD.items())[:numMols]:
        for ii, ccId in enumerate(failedIdList[:numMols]):
            ccD = ccIdxD[ccId]
            for buildType in buildTypeList:
                if buildType in ccD:
                    oeMol = oeioU.descriptorToMol(
                        ccD[buildType],
                        buildType,
                        limitPerceptions=limitPerceptions,
                        messageTag=ccId + ":" + buildType)
                    if not oeMol:
                        logger.debug("%s build failed for %s - skipping", ccId,
                                     buildType)
                        continue
                    maxHits = 0
                    minHits = maxFpResults
                    selfHit = False
                    #
                    startTime1 = time.time()
                    for fpType, minFpScore in fpTypeCuttoffList:
                        retStatus, mL = oesU.getFingerPrintScores(
                            oeMol, fpType, minFpScore, maxFpResults)
                        self.assertTrue(retStatus)
                        logger.debug("%s fpType %r hits %d", ccId, fpType,
                                     len(mL))
                        maxHits = max(maxHits, len(mL))
                        minHits = min(minHits, len(mL))
                        matchedSelf = self.__resultContains(ccId, mL)
                        selfHit = selfHit or matchedSelf
                        if not matchedSelf:
                            missedFpD.setdefault(ccId, []).append(
                                (buildType, fpType, len(mL)))
                    #
                    if not selfHit:
                        missedBuildD.setdefault(ccId, []).append(buildType)
                    #
                    if maxHits < 1 or not selfHit:
                        logger.info(
                            "%s MISSED for buildType %r min hits %d max hits %d (%.4f seconds)",
                            ccId, buildType, minHits, maxHits,
                            time.time() - startTime1)
                    else:
                        logger.debug(
                            "%s MATCHED for buildType %r min hits %d max hits %d (%.4f seconds)",
                            ccId, buildType, minHits, maxHits,
                            time.time() - startTime1)
                else:
                    logger.debug("%s missing descriptor %r", ccId, buildType)
            if ii % 100 == 0:
                logger.info(
                    "Completed %d of %d missed count %d in (%.4f seconds)", ii,
                    len(failedIdList), len(missedBuildD),
                    time.time() - startTime)

        # ------
        for ccId, bTL in missedBuildD.items():
            logger.info("%s missed all fptypes:  buildtype list %r", ccId, bTL)

        if ccId in missedFpD:
            logger.info("%s unmatched by fpTypes %r", ccId, missedFpD[ccId])

        #
        if doDisplay:
            for ccId, bTL in missedBuildD.items():
                idxD = ccIdxD[ccId]
                if "oe-iso-smiles" in idxD:
                    for bT in bTL:
                        self.__displayAlignedDescriptorPair(
                            ccId,
                            idxD["oe-iso-smiles"],
                            "oe-iso-smiles",
                            idxD[bT],
                            bT,
                            title=None,
                            limitPerceptions=True)

        logger.info("%s fingerprints search on %d in (%.4f seconds)",
                    len(fpTypeCuttoffList), numMols,
                    time.time() - startTime)
    def __sssWithFingerPrintFromDescriptor(self, numMols, **kwargs):
        maxFpResults = kwargs.get("maxResults", 50)
        limitPerceptions = kwargs.get("limitPerceptions", False)
        fpTypeCuttoffList = kwargs.get("fpTypeCuttoffList", [("TREE", 0.6)])
        buildTypeList = kwargs.get("buildTypeList", ["oe-iso-smiles"])
        doDisplay = kwargs.get("doDisplay", False)
        #
        oesmP, ccIdxD = self.__getSearchDataProviders(**kwargs)
        oesU = OeSearchUtils(oesmP,
                             fpTypeList=[tup[0] for tup in fpTypeCuttoffList])
        oeioU = OeIoUtils()
        # This will reload the oe binary cache.
        oeMol = oesmP.getMol("004")
        self.assertGreaterEqual(len(list(oeMol.GetAtoms())), 12)

        # matchOpts = "graph-relaxed"
        matchOpts = "graph-strict"
        missTupL = []
        missedD = {}
        missedFpD = {}
        numMols = min(len(ccIdxD), numMols) if numMols else len(ccIdxD)
        logger.info(
            "Begin substructure search w/ finger print filter on %d molecules",
            numMols)
        # ----
        startTime = time.time()
        for (
                ii,
                ccId,
        ) in enumerate(list(ccIdxD.keys())[:numMols]):
            ccD = ccIdxD[ccId]
            for buildType in buildTypeList:
                if buildType in ccD:
                    startTime1 = time.time()
                    oeMol = oeioU.descriptorToMol(
                        ccD[buildType],
                        buildType,
                        limitPerceptions=limitPerceptions,
                        messageTag=ccId + ":" + buildType)
                    if not oeMol:
                        logger.debug("%s build failed for %s - skipping", ccId,
                                     buildType)
                        continue
                    maxHits = 0
                    minHits = maxFpResults
                    selfHit = False
                    for fpType, minFpScore in fpTypeCuttoffList:
                        retStatus, mL = oesU.searchSubStructureWithFingerPrint(
                            oeMol,
                            fpType,
                            minFpScore,
                            maxFpResults,
                            matchOpts=matchOpts)
                        self.assertTrue(retStatus)
                        logger.debug("%s fpType %r hits %d", ccId, fpType,
                                     len(mL))
                        maxHits = max(maxHits, len(mL))
                        minHits = min(minHits, len(mL))
                        matchedSelf = self.__resultContains(ccId, mL)
                        selfHit = selfHit or matchedSelf
                        if not matchedSelf:
                            missedFpD.setdefault(ccId, []).append(
                                (buildType, fpType, len(mL)))
                    if not selfHit:
                        missedD.setdefault(ccId, []).append(buildType)

                    if maxHits < 1 or not selfHit:
                        logger.info(
                            "%s (%r) buildType %r min hits %d max hits %d (%.4f seconds)",
                            ccId, selfHit, buildType, minHits, maxHits,
                            time.time() - startTime1)
                else:
                    logger.debug("%s missing descriptor %r", ccId, buildType)
            if ii % 100 == 0:
                logger.info("Completed %d of %d missed count %d", ii, numMols,
                            len(missedD))
        #
        for ccId, missL in missedD.items():
            logger.info("%s missed list %r", ccId, missL)
            if ccId in missedFpD:
                logger.info("%s unmatched for fpTypes %r", ccId,
                            missedFpD[ccId])
        # ----
        if doDisplay:
            mD = {}
            for missTup in missTupL:
                mD.setdefault(missTup[0], []).append(missTup[1])

            for ccId, buildTypeL in mD.items():
                idxD = ccIdxD[ccId]
                if "oe-iso-smiles" in idxD:
                    for buildType in buildTypeL:
                        self.__displayAlignedDescriptorPair(
                            ccId,
                            idxD["oe-iso-smiles"],
                            "oe-iso-smiles",
                            idxD[buildType],
                            buildType,
                            title=None,
                            limitPerceptions=True)

        logger.info("%s fingerprints search on %d in (%.4f seconds)",
                    len(fpTypeCuttoffList), numMols,
                    time.time() - startTime)
        return True
    def __reload(self, **kwargs):
        """Reload the dictionary of OE molecules and related data artifacts for chemical component definitions.

        Args:
            limitPerceptions(bool): process input descriptors in essentially verbatim mode (default: True)
            fpTypeList (list): fingerprint type (TREE,PATH,MACCS,CIRCULAR,LINGO)
            screenTypeList (list): fast sub search screen type (MOLECULE, SMARTS, MDL, ... )
            useCache (bool, optional): flag to use cached files. Defaults to True.
            cachePath (str): path to the top cache directory. Defaults to '.'.
            numProc (int): number processors to engage in screen substructure search database generation.
            suppressHydrogens (bool, optional): flag to suppress explicit hydrogens in the OE data store.
            molLimit (int):

        Returns:
            (bool) : True for success or False othewise

        """
        try:
            useCache = kwargs.get("useCache", True)
            cachePath = kwargs.get("cachePath", ".")
            numProc = kwargs.get("numProc", 2)
            molLimit = kwargs.get("molLimit", None)
            fpTypeList = kwargs.get(
                "fpTypeList", ["TREE", "PATH", "MACCS", "CIRCULAR", "LINGO"])
            # screenTypeList = kwargs.get("screenTypeList", ["SMARTS"])
            screenTypeList = kwargs.get("screenTypeList", None)

            limitPerceptions = kwargs.get("limitPerceptions", False)
            suppressHydrogens = kwargs.get("suppressHydrogens", False)
            quietFlag = kwargs.get("quietFlag", True)
            logSizes = kwargs.get("logSizes", False)
            fpDbType = "STANDARD"
            buildScreenedDb = True
            #
            oeCount = 0
            errCount = 0
            failIdList = []
            oeIo = OeIoUtils(quietFlag=quietFlag)
            # --------
            oeSearchMolFilePath = os.path.join(self.__dirPath,
                                               self.__getOeSearchMolFileName())
            if not useCache or (useCache
                                and not self.__mU.exists(oeSearchMolFilePath)):
                cmpKwargs = {
                    k: v
                    for k, v in kwargs.items()
                    if k not in ["cachePath", "useCache", "molLimit"]
                }
                ccsiP = ChemCompSearchIndexProvider(cachePath=cachePath,
                                                    useCache=True,
                                                    molLimit=molLimit,
                                                    **cmpKwargs)
                ok = ccsiP.testCache(minCount=molLimit, logSizes=logSizes)
                # ----
                ccIdxD = ccsiP.getIndex() if ok else {}
                idxCount = len(ccIdxD)
                # ------- JDW OE mol construction here -----
                startTime = time.time()
                oeCount, errCount, failIdList = oeIo.buildOeBinaryMolCacheFromIndex(
                    oeSearchMolFilePath,
                    ccIdxD,
                    quietFlag=quietFlag,
                    fpTypeList=fpTypeList,
                    limitPerceptions=limitPerceptions,
                    suppressHydrogens=suppressHydrogens)
                if failIdList:
                    logger.info("failures %r", failIdList)
                endTime = time.time()
                logger.info(
                    "Constructed %d/%d cached oeMols  (unconverted %d) (%.4f seconds)",
                    oeCount, idxCount, errCount, endTime - startTime)
            # --------
            oeMolDbFilePath = os.path.join(self.__dirPath,
                                           self.__getOeMolDbFileName())
            if not useCache or (useCache
                                and not self.__mU.exists(oeMolDbFilePath)):
                startTime = time.time()
                molCount = oeIo.createOeBinaryDatabaseAndIndex(
                    oeSearchMolFilePath, oeMolDbFilePath)
                endTime = time.time()
                logger.info(
                    "Created and stored %d indexed oeMols in OE database format (%.4f seconds)",
                    molCount, endTime - startTime)

            # --------
            if fpDbType == "FAST":
                for fpType in fpTypeList:
                    startTime = time.time()
                    #  Fast FP search database file names
                    fpPath = os.path.join(self.__dirPath,
                                          self.__getFastFpDbFileName(fpType))
                    if not useCache or (useCache
                                        and not self.__mU.exists(fpPath)):
                        ok = oeIo.createOeFingerPrintDatabase(oeMolDbFilePath,
                                                              fpPath,
                                                              fpType=fpType)
                        endTime = time.time()
                        logger.info(
                            "Created and stored %s fingerprint database (%.4f seconds)",
                            fpType, endTime - startTime)
            # --------
            if buildScreenedDb and screenTypeList:
                for screenType in screenTypeList:
                    startTime = time.time()
                    fp = os.path.join(self.__dirPath,
                                      self.__getSubSearchFileName(screenType))
                    if not useCache or (useCache and not self.__mU.exists(fp)):
                        ok = oeIo.createOeSubSearchDatabase(
                            oeSearchMolFilePath,
                            fp,
                            screenType=screenType,
                            numProc=numProc)
                        endTime = time.time()
                        logger.info(
                            "Constructed screened substructure database (status %r) with screenType %s (%.4f seconds)",
                            ok, screenType, endTime - startTime)
                        # ---------
                        ssDb = oeIo.loadOeSubSearchDatabase(
                            fp, screenType=screenType, numProc=numProc)
                        ok = ssDb.NumMolecules() == oeCount
                        # ----------
            #
            return True
        except Exception as e:
            logger.exception("Failing with %s", str(e))
        return False
示例#19
0
    def buildSearchFiles(self, **kwargs):
        """Build cif, sdf (optional), and mol2 files for components in the chemical component search index.
           Exclude ions or other extraneous molecules lacking bonds.

        Args:
            ccUrlTarget (str): locator for source chemical component dictionary (default: full public dictionary)
            birdUrlTarget (str): locator for source BIRD dictionary (default: full public dictionary)
            limitPerceptions (bool): restrict automatic perceptions in OE molecular build operations (default: False)
            numProc (int): number of processors
            useCache (bool): use existing resource file where possible (default: True)
            molLimit (str):  limit the number to ingested chemical compont (default: None)
            quietFlag (bool): suppress output in OE library operations (default: True)

        Returns:
            (int): number molfiles generated
        """
        cachePath = self.__cachePath
        ccUrlTarget = kwargs.get("ccUrlTarget", None)
        birdUrlTarget = kwargs.get("birdUrlTarget", None)
        molLimit = kwargs.get("molLimit", None)
        quietFlag = kwargs.get("quietFlag", True)
        fpTypeList = kwargs.get("fpTypeList", [])
        screenTypeList = kwargs.get("screenTypeList", [])
        ccFileNamePrefix = "cc-%s" % self.__prefix if self.__prefix else "cc-full"
        oeFileNamePrefix = "oe-%s" % self.__prefix if self.__prefix else "oe-cc-full"
        numProc = kwargs.get("numProc", 2)
        minCount = kwargs.get("minCount", 0)
        useCache = kwargs.get("useCache", True)
        useSdf = kwargs.get("useSdf", True)
        useMol2 = kwargs.get("useMol2", False)
        limitPerceptions = kwargs.get("limitPerceptions", False)
        logSizes = False
        #
        startTime = time.time()
        ccmP = ChemCompMoleculeProvider(cachePath=cachePath,
                                        useCache=useCache,
                                        ccFileNamePrefix=ccFileNamePrefix,
                                        ccUrlTarget=ccUrlTarget,
                                        birdUrlTarget=birdUrlTarget,
                                        molLimit=molLimit)
        ok = ccmP.testCache(minCount=minCount, logSizes=logSizes)
        logger.info(
            "Completed chemical component provider load %r (%.4f seconds)", ok,
            time.time() - startTime)
        #
        startTime = time.time()
        oesmp = OeSearchMoleculeProvider(
            ccUrlTarget=ccUrlTarget,
            birdUrlTarget=birdUrlTarget,
            cachePath=cachePath,
            ccFileNamePrefix=ccFileNamePrefix,
            oeFileNamePrefix=oeFileNamePrefix,
            useCache=useCache,
            quietFlag=quietFlag,
            fpTypeList=fpTypeList,
            screenTypeList=screenTypeList,
            numProc=numProc,
            molLimit=molLimit,
            limitPerceptions=limitPerceptions,
        )
        ok = oesmp.testCache()
        logger.info("Completed OE molecule provider load %r (%.4f seconds)",
                    ok,
                    time.time() - startTime)
        #
        startTime = time.time()
        ccSIdxP = ChemCompSearchIndexProvider(
            cachePath=cachePath,
            useCache=useCache,
            ccFileNamePrefix=ccFileNamePrefix,
            limitPerceptions=limitPerceptions,
            numProc=numProc)
        ok = ccSIdxP.testCache()
        logger.info(
            "Completed chemical component search index load %r (%.4f seconds)",
            ok,
            time.time() - startTime)
        #
        ccSIdx = ccSIdxP.getIndex() if ccSIdxP and ok else {}
        logger.info("Search index status %r index length %d", ok, len(ccSIdx))
        #
        ccIdD = {}
        mU = MarshalUtil()
        oeU = OeIoUtils(dirPath=cachePath)
        numMols = 0
        searchFileDirPath = self.getSearchDirFilePath()
        pathTupList = []
        for sId in ccSIdx:
            ccId = sId.split("|")[0]
            # standard CIF definition
            if ccId not in ccIdD:
                cifPath = os.path.join(searchFileDirPath, ccId[0], ccId,
                                       ccId + ".cif")
                if not (useCache and mU.exists(cifPath)):
                    ccMol = ccmP.getMol(ccId)
                    if not self.__checkCif(ccMol):
                        continue
                    mU.doExport(cifPath, [ccMol], fmt="mmcif")
            #
            oeMol = oesmp.getMol(sId)
            if not self.__checkOeMol(oeMol):
                continue
            #
            # Sanity checks on the generated OE molecule
            #
            cifPath = os.path.join(searchFileDirPath, ccId[0], ccId,
                                   sId + ".cif")
            if sId != ccId and not (useCache and mU.exists(cifPath)):
                oeccU = OeChemCompUtils()
                ok = oeccU.addOeMol(sId,
                                    oeMol,
                                    missingModelXyz=True,
                                    writeIdealXyz=False)
                if ok:
                    oeccU.write(cifPath)

            if useSdf:
                molFilePath = os.path.join(searchFileDirPath, ccId[0], ccId,
                                           sId + ".sdf")
                if not (useCache and mU.exists(molFilePath)):
                    ok = oeU.write(molFilePath,
                                   oeMol,
                                   constantMol=False,
                                   addSdTags=True)
                    if ok:
                        pathTupList.append((sId, molFilePath, "sdf"))
            #
            if useMol2:
                mol2FilePath = os.path.join(searchFileDirPath, ccId[0], ccId,
                                            sId + ".mol2")
                if not (useCache and mU.exists(mol2FilePath)):
                    oeU.write(mol2FilePath,
                              oeMol,
                              constantMol=False,
                              addSdTags=True)
                    if ok:
                        pathTupList.append((sId, mol2FilePath, "mol2"))
            numMols += 1
        #
        self.__storePathList(pathTupList)
        return numMols
    def __reload(self, **kwargs):
        """Reload the dictionary of OE molecules and related data artifacts for chemical component definitions.

        Args:
            molBuildType (str):  coordinates to use in building OE molecules from CIF components (model, ideal or None)
            limitPerceptions(bool): process input descriptors in essentially verbatim mode (default: True)
            fpTypeList (list): fingerprint type (TREE,PATH,MACCS,CIRCULAR,LINGO)
            screenTypeList (list): fast sub search screen type (MOLECULE, SMARTS, MDL, ... )
            useCache (bool, optional): flag to use cached files. Defaults to True.
            cachePath (str): path to the top cache directory. Defaults to '.'.
            numProc (int): number processors to engage in screen substructure search database generation.
            molLimit (int, optional): limiting number of molecules in data store (default: 0 no limit)
            suppressHydrogens (bool, optional): flag to suppress explicit hydrogens in the OE data store.

        Returns:
            (dict): dictionary of constructed OE molecules

        """
        useCache = kwargs.get("useCache", True)
        cachePath = kwargs.get("cachePath", ".")
        numProc = kwargs.get("numProc", 2)
        molLimit = kwargs.get("molLimit", 0)
        fpTypeList = kwargs.get("fpTypeList",
                                ["TREE", "PATH", "MACCS", "CIRCULAR", "LINGO"])
        # screenTypeList = kwargs.get("screenTypeList", ["SMARTS"])
        screenTypeList = kwargs.get("screenTypeList", [])
        molBuildType = kwargs.get("molBuildType", "model-xyz")
        limitPerceptions = kwargs.get("limitPerceptions", False)
        quietFlag = kwargs.get("quietFlag", True)
        suppressHydrogens = kwargs.get("suppressHydrogens", False)
        logSizes = kwargs.get("logSizes", False)
        fpDbType = "STANDARD"
        #
        ccCount = 0
        oeCount = 0
        errCount = 0
        failIdList = []
        oeIo = OeIoUtils(quietFlag=quietFlag)
        # --------
        oeMolFilePath = os.path.join(self.__dirPath, self.__getOeMolFileName())
        if not useCache or (useCache and not self.__mU.exists(oeMolFilePath)):
            cmpKwargs = {
                k: v
                for k, v in kwargs.items()
                if k not in ["cachePath", "useCache", "molLimit"]
            }
            ccmP = ChemCompMoleculeProvider(cachePath=cachePath,
                                            useCache=True,
                                            molLimit=molLimit,
                                            **cmpKwargs)
            ok = ccmP.testCache(minCount=molLimit, logSizes=logSizes)
            ccObjD = ccmP.getMolD() if ok else {}
            ccCount = len(ccObjD)
            # -------
            startTime = time.time()
            oeCount, errCount, failIdList = oeIo.buildOeBinaryMolCache(
                oeMolFilePath,
                ccObjD,
                molBuildType=molBuildType,
                quietFlag=quietFlag,
                fpTypeList=fpTypeList,
                limitPerceptions=limitPerceptions,
                suppressHydrogens=suppressHydrogens)
            logger.info(
                "Stored %d/%d OeMols (suppressH = %r) created with molBuildType %r (unconverted %d)",
                oeCount, ccCount, suppressHydrogens, molBuildType, errCount)
            if failIdList:
                logger.info("%r failures %r", molBuildType, failIdList)
            endTime = time.time()
            logger.info("Constructed %d/%d cached oeMols (%.4f seconds)",
                        oeCount, ccCount, endTime - startTime)
        # --------
        oeMolDbFilePath = os.path.join(self.__dirPath,
                                       self.__getOeMolDbFileName())
        if not useCache or (useCache
                            and not self.__mU.exists(oeMolDbFilePath)):
            startTime = time.time()
            molCount = oeIo.createOeBinaryDatabaseAndIndex(
                oeMolFilePath, oeMolDbFilePath)
            endTime = time.time()
            logger.info(
                "Created and stored %d indexed OeMols in OE database format (%.4f seconds)",
                molCount, endTime - startTime)

        # --------
        if fpDbType == "FAST":
            for fpType in fpTypeList:
                startTime = time.time()
                #  Fast FP search database file names
                fpPath = os.path.join(self.__dirPath,
                                      self.__getFastFpDbFileName(fpType))
                if not useCache or (useCache and not self.__mU.exists(fpPath)):
                    ok = oeIo.createOeFingerPrintDatabase(oeMolDbFilePath,
                                                          fpPath,
                                                          fpType=fpType)
                    endTime = time.time()
                    logger.info(
                        "Created and stored %s fingerprint database (%.4f seconds)",
                        fpType, endTime - startTime)
        # --------
        if molBuildType in ["oe-iso-smiles"]:
            for screenType in screenTypeList:
                startTime = time.time()
                fp = os.path.join(self.__dirPath,
                                  self.__getSubSearchFileName(screenType))
                if not useCache or (useCache and not self.__mU.exists(fp)):
                    ok = oeIo.createOeSubSearchDatabase(oeMolFilePath,
                                                        fp,
                                                        screenType=screenType,
                                                        numProc=numProc)
                    endTime = time.time()
                    logger.info(
                        "Constructed screened substructure database (status %r) with screenType %s (%.4f seconds)",
                        ok, screenType, endTime - startTime)
                    # ---------
                    ssDb = oeIo.loadOeSubSearchDatabase(fp,
                                                        screenType=screenType,
                                                        numProc=numProc)
                    ok = ssDb.NumMolecules() == oeCount
                    # ----------
        return oeCount
示例#21
0
    def testSssWithFingerPrintFromDescriptor(self):
        oemp = OeMoleculeProvider(**self.__myKwargs)
        ok = oemp.testCache()
        ccmP = ChemCompIndexProvider(**self.__myKwargs)
        ccIdxD = ccmP.getIndex()
        ok = ccmP.testCache(minCount=self.__minCount)
        self.assertTrue(ok)
        limitPerceptions = False
        # minFpScore = 0.5
        maxFpResults = 50
        matchOpts = "graph-relaxed"
        numMols = 20
        oeioU = OeIoUtils()
        oesU = OeSearchUtils(oemp, fpTypeList=self.__fpTypeList)
        missTupL = []
        missedD = {}
        missedFpD = {}
        # ----
        startTime = time.time()
        for ccId, ccD in list(ccIdxD.items())[:numMols]:
            for buildType in [
                    "oe-iso-smiles", "oe-smiles", "acdlabs-smiles",
                    "cactvs-iso-smiles", "cactvs-smiles", "inchi"
            ]:
                if buildType in ccD:
                    logger.debug("Search %s %r", ccId, ccD[buildType])
                    if buildType in ["inchi"]:
                        oemf = OeMoleculeFactory()
                        oemf.setDescriptor(ccD["inchi"], "inchi", ccId)
                        ok = oemf.build(molBuildType="inchi",
                                        limitPerceptions=limitPerceptions)
                        if not ok:
                            logger.info("%s build failed with InChI %r", ccId,
                                        ccD["inchi"])
                        else:
                            oeMol = oemf.getMol()
                            if oemf.getInChI() != ccD["inchi"]:
                                logger.info(
                                    "%s regenerated InChI differs\n%r\n%s",
                                    ccId, ccD["inchi"], oemf.getInChI())
                    else:
                        oeMol = oeioU.smilesToMol(
                            ccD[buildType], limitPerceptions=limitPerceptions)
                    if not oeMol:
                        continue
                    maxHits = 0
                    minHits = maxFpResults
                    selfHit = False
                    for fpType, minFpScore in self.__fpTypeCuttoffList:
                        retStatus, mL = oesU.searchSubStructureWithFingerPrint(
                            oeMol,
                            fpType,
                            minFpScore,
                            maxFpResults,
                            matchOpts=matchOpts)
                        self.assertTrue(retStatus)
                        logger.debug("%s fpType %r hits %d", ccId, fpType,
                                     len(mL))
                        maxHits = max(maxHits, len(mL))
                        minHits = min(minHits, len(mL))
                        matchedSelf = self.__resultContains(ccId, mL)
                        selfHit = selfHit or matchedSelf
                        if not matchedSelf:
                            missedFpD.setdefault(ccId, []).append(
                                (buildType, fpType, len(mL)))
                    if not selfHit:
                        missedD.setdefault(ccId, []).append(buildType)

                    logger.info("%s (%r) buildType %r min hits %d max hits %d",
                                ccId, selfHit, buildType, minHits, maxHits)
                else:
                    logger.info("%s missing descriptor %r", ccId, buildType)
        #
        for ccId, missL in missedD.items():
            logger.info("%s missed list %r", ccId, missL)
            if ccId in missedFpD:
                logger.info("%s unmatched for fpTypes %r", ccId,
                            missedFpD[ccId])
        # ----
        doDepict = False
        if doDepict:
            mD = {}
            for missTup in missTupL:
                mD.setdefault(missTup[0], []).append(missTup[1])

            for ccId, buildTypeL in mD.items():
                idxD = ccIdxD[ccId]
                if "oe-iso-smiles" in idxD:
                    for buildType in buildTypeL:
                        self.__displayAlignedDescriptorPair(
                            ccId,
                            idxD["oe-iso-smiles"],
                            "oe-iso-smiles",
                            idxD[buildType],
                            buildType,
                            title=None,
                            limitPerceptions=True)

        logger.info("%s fingerprints search on %d in (%.4f seconds)",
                    len(self.__fpTypeList), numMols,
                    time.time() - startTime)
示例#22
0
    def __getMiscFile(self, filePath, suppressHydrogens=False, importType="2D", title=None, largestPart=False):
        """Fetch a miscellaneous chemical file (ccPath) and build OE molecules
        for comparison.

        """
        try:
            oeioU = OeIoUtils()
            oeMolL = oeioU.fileToMols(filePath, use3D=importType == "3D", largestPart=largestPart)
            logger.info("Read (%d) from %s ", len(oeMolL), filePath)
            oeMol = oeMolL[0]

            ccId = title if title else oeMol.GetTitle()
            if title:
                oeMol.SetTitle(ccId)
            #
            oemf = OeMoleculeFactory()
            if not self.__verbose:
                oemf.setQuiet()
            oemf.setOeMol(oeMol, ccId)
            #
            fD = oemf.getOeMoleculeFeatures()
            if self.__verbose:
                logger.info("  Title              = %s", title)
                logger.info("  Title OEMF         = %s", oemf.getTitle())
                logger.info("  SMILES             = %s", oemf.getCanSMILES())
                logger.info("  SMILES (stereo)    = %s", oemf.getIsoSMILES())
                logger.info("  Formula (Hill)     = %s", oemf.getFormula())
                logger.info("  InChI key          = %s", oemf.getInChIKey())
                logger.info("  InChI              = %s", oemf.getInChI())
            #
            ccId = oemf.getTitle()
            if suppressHydrogens:
                tMol = oemf.getGraphMolSuppressH()
            else:
                tMol = oemf.getMol()

            molXyzL = []
            if importType == "3D":
                for atm in tMol.GetAtoms():
                    xyzL = oechem.OEFloatArray(3)
                    tMol.GetCoords(atm, xyzL)
                    molXyzL.append(
                        ComponentAtomDetails(
                            atIdx=atm.GetIdx(),
                            atNo=atm.GetAtomicNum(),
                            atName=atm.GetName(),
                            atType=atm.GetType(),
                            x=xyzL[0],
                            y=xyzL[1],
                            z=xyzL[2],
                            atFormalCharge=atm.GetFormalCharge(),
                        )
                    )
            fD = {}
            fD = {
                "Formula": oemf.getFormula(),
                "SMILES": oemf.getCanSMILES(),
                "SMILES_STEREO": oemf.getIsoSMILES(),
                "InChI": oemf.getInChI(),
                "InChIKey": oemf.getInChIKey(),
                "xyz": molXyzL,
            }
            for atm in tMol.GetAtoms():
                xyzL = oechem.OEFloatArray(3)
                tMol.GetCoords(atm, xyzL)
                if self.__verbose:
                    logger.debug("atom  %s %s %s %s %r", atm.GetIdx(), atm.GetAtomicNum(), atm.GetName(), atm.GetType(), xyzL)

            fD["OEMOL"] = tMol
            return (ccId, tMol, fD)
        except Exception as e:
            logger.exception("Failing with %s", str(e))

        return None, None, None