Пример #1
0
 def getSubSearchDb(self,
                    screenType="SMARTS",
                    numProc=1,
                    forceRefresh=False):
     if not self.__ssDb or forceRefresh:
         oeIo = OeIoUtils()
         fp = os.path.join(self.__dirPath,
                           self.__getSubSearchFileName(screenType))
         logger.info("Opening screened substructure search database %r", fp)
         self.__ssDb = oeIo.loadOeSubSearchDatabase(fp,
                                                    screenType,
                                                    numProc=numProc)
     return self.__ssDb
Пример #2
0
    def __reload(self, **kwargs):
        """Reload the dictionary of OE molecules and related data artifacts for chemical component definitions.

        Args:
            molBuildType (str):  coordinates to use in building OE molecules from CIF components (model, ideal or None)
            limitPerceptions(bool): process input descriptors in essentially verbatim mode (default: True)
            fpTypeList (list): fingerprint type (TREE,PATH,MACCS,CIRCULAR,LINGO)
            screenTypeList (list): fast sub search screen type (MOLECULE, SMARTS, MDL, ... )
            useCache (bool, optional): flag to use cached files. Defaults to True.
            cachePath (str): path to the top cache directory. Defaults to '.'.
            numProc (int): number processors to engage in screen substructure search database generation.
            molLimit (int, optional): limiting number of molecules in data store (default: 0 no limit)
            suppressHydrogens (bool, optional): flag to suppress explicit hydrogens in the OE data store.

        Returns:
            (dict): dictionary of constructed OE molecules

        """
        useCache = kwargs.get("useCache", True)
        cachePath = kwargs.get("cachePath", ".")
        numProc = kwargs.get("numProc", 2)
        molLimit = kwargs.get("molLimit", 0)
        fpTypeList = kwargs.get("fpTypeList",
                                ["TREE", "PATH", "MACCS", "CIRCULAR", "LINGO"])
        # screenTypeList = kwargs.get("screenTypeList", ["SMARTS"])
        screenTypeList = kwargs.get("screenTypeList", [])
        molBuildType = kwargs.get("molBuildType", "model-xyz")
        limitPerceptions = kwargs.get("limitPerceptions", False)
        quietFlag = kwargs.get("quietFlag", True)
        suppressHydrogens = kwargs.get("suppressHydrogens", False)
        logSizes = kwargs.get("logSizes", False)
        fpDbType = "STANDARD"
        #
        ccCount = 0
        oeCount = 0
        errCount = 0
        failIdList = []
        oeIo = OeIoUtils(quietFlag=quietFlag)
        # --------
        oeMolFilePath = os.path.join(self.__dirPath, self.__getOeMolFileName())
        if not useCache or (useCache and not self.__mU.exists(oeMolFilePath)):
            cmpKwargs = {
                k: v
                for k, v in kwargs.items()
                if k not in ["cachePath", "useCache", "molLimit"]
            }
            ccmP = ChemCompMoleculeProvider(cachePath=cachePath,
                                            useCache=True,
                                            molLimit=molLimit,
                                            **cmpKwargs)
            ok = ccmP.testCache(minCount=molLimit, logSizes=logSizes)
            ccObjD = ccmP.getMolD() if ok else {}
            ccCount = len(ccObjD)
            # -------
            startTime = time.time()
            oeCount, errCount, failIdList = oeIo.buildOeBinaryMolCache(
                oeMolFilePath,
                ccObjD,
                molBuildType=molBuildType,
                quietFlag=quietFlag,
                fpTypeList=fpTypeList,
                limitPerceptions=limitPerceptions,
                suppressHydrogens=suppressHydrogens)
            logger.info(
                "Stored %d/%d OeMols (suppressH = %r) created with molBuildType %r (unconverted %d)",
                oeCount, ccCount, suppressHydrogens, molBuildType, errCount)
            if failIdList:
                logger.info("%r failures %r", molBuildType, failIdList)
            endTime = time.time()
            logger.info("Constructed %d/%d cached oeMols (%.4f seconds)",
                        oeCount, ccCount, endTime - startTime)
        # --------
        oeMolDbFilePath = os.path.join(self.__dirPath,
                                       self.__getOeMolDbFileName())
        if not useCache or (useCache
                            and not self.__mU.exists(oeMolDbFilePath)):
            startTime = time.time()
            molCount = oeIo.createOeBinaryDatabaseAndIndex(
                oeMolFilePath, oeMolDbFilePath)
            endTime = time.time()
            logger.info(
                "Created and stored %d indexed OeMols in OE database format (%.4f seconds)",
                molCount, endTime - startTime)

        # --------
        if fpDbType == "FAST":
            for fpType in fpTypeList:
                startTime = time.time()
                #  Fast FP search database file names
                fpPath = os.path.join(self.__dirPath,
                                      self.__getFastFpDbFileName(fpType))
                if not useCache or (useCache and not self.__mU.exists(fpPath)):
                    ok = oeIo.createOeFingerPrintDatabase(oeMolDbFilePath,
                                                          fpPath,
                                                          fpType=fpType)
                    endTime = time.time()
                    logger.info(
                        "Created and stored %s fingerprint database (%.4f seconds)",
                        fpType, endTime - startTime)
        # --------
        if molBuildType in ["oe-iso-smiles"]:
            for screenType in screenTypeList:
                startTime = time.time()
                fp = os.path.join(self.__dirPath,
                                  self.__getSubSearchFileName(screenType))
                if not useCache or (useCache and not self.__mU.exists(fp)):
                    ok = oeIo.createOeSubSearchDatabase(oeMolFilePath,
                                                        fp,
                                                        screenType=screenType,
                                                        numProc=numProc)
                    endTime = time.time()
                    logger.info(
                        "Constructed screened substructure database (status %r) with screenType %s (%.4f seconds)",
                        ok, screenType, endTime - startTime)
                    # ---------
                    ssDb = oeIo.loadOeSubSearchDatabase(fp,
                                                        screenType=screenType,
                                                        numProc=numProc)
                    ok = ssDb.NumMolecules() == oeCount
                    # ----------
        return oeCount
    def __reload(self, **kwargs):
        """Reload the dictionary of OE molecules and related data artifacts for chemical component definitions.

        Args:
            limitPerceptions(bool): process input descriptors in essentially verbatim mode (default: True)
            fpTypeList (list): fingerprint type (TREE,PATH,MACCS,CIRCULAR,LINGO)
            screenTypeList (list): fast sub search screen type (MOLECULE, SMARTS, MDL, ... )
            useCache (bool, optional): flag to use cached files. Defaults to True.
            cachePath (str): path to the top cache directory. Defaults to '.'.
            numProc (int): number processors to engage in screen substructure search database generation.
            suppressHydrogens (bool, optional): flag to suppress explicit hydrogens in the OE data store.
            molLimit (int):

        Returns:
            (bool) : True for success or False othewise

        """
        try:
            useCache = kwargs.get("useCache", True)
            cachePath = kwargs.get("cachePath", ".")
            numProc = kwargs.get("numProc", 2)
            molLimit = kwargs.get("molLimit", None)
            fpTypeList = kwargs.get(
                "fpTypeList", ["TREE", "PATH", "MACCS", "CIRCULAR", "LINGO"])
            # screenTypeList = kwargs.get("screenTypeList", ["SMARTS"])
            screenTypeList = kwargs.get("screenTypeList", None)

            limitPerceptions = kwargs.get("limitPerceptions", False)
            suppressHydrogens = kwargs.get("suppressHydrogens", False)
            quietFlag = kwargs.get("quietFlag", True)
            logSizes = kwargs.get("logSizes", False)
            fpDbType = "STANDARD"
            buildScreenedDb = True
            #
            oeCount = 0
            errCount = 0
            failIdList = []
            oeIo = OeIoUtils(quietFlag=quietFlag)
            # --------
            oeSearchMolFilePath = os.path.join(self.__dirPath,
                                               self.__getOeSearchMolFileName())
            if not useCache or (useCache
                                and not self.__mU.exists(oeSearchMolFilePath)):
                cmpKwargs = {
                    k: v
                    for k, v in kwargs.items()
                    if k not in ["cachePath", "useCache", "molLimit"]
                }
                ccsiP = ChemCompSearchIndexProvider(cachePath=cachePath,
                                                    useCache=True,
                                                    molLimit=molLimit,
                                                    **cmpKwargs)
                ok = ccsiP.testCache(minCount=molLimit, logSizes=logSizes)
                # ----
                ccIdxD = ccsiP.getIndex() if ok else {}
                idxCount = len(ccIdxD)
                # ------- JDW OE mol construction here -----
                startTime = time.time()
                oeCount, errCount, failIdList = oeIo.buildOeBinaryMolCacheFromIndex(
                    oeSearchMolFilePath,
                    ccIdxD,
                    quietFlag=quietFlag,
                    fpTypeList=fpTypeList,
                    limitPerceptions=limitPerceptions,
                    suppressHydrogens=suppressHydrogens)
                if failIdList:
                    logger.info("failures %r", failIdList)
                endTime = time.time()
                logger.info(
                    "Constructed %d/%d cached oeMols  (unconverted %d) (%.4f seconds)",
                    oeCount, idxCount, errCount, endTime - startTime)
            # --------
            oeMolDbFilePath = os.path.join(self.__dirPath,
                                           self.__getOeMolDbFileName())
            if not useCache or (useCache
                                and not self.__mU.exists(oeMolDbFilePath)):
                startTime = time.time()
                molCount = oeIo.createOeBinaryDatabaseAndIndex(
                    oeSearchMolFilePath, oeMolDbFilePath)
                endTime = time.time()
                logger.info(
                    "Created and stored %d indexed oeMols in OE database format (%.4f seconds)",
                    molCount, endTime - startTime)

            # --------
            if fpDbType == "FAST":
                for fpType in fpTypeList:
                    startTime = time.time()
                    #  Fast FP search database file names
                    fpPath = os.path.join(self.__dirPath,
                                          self.__getFastFpDbFileName(fpType))
                    if not useCache or (useCache
                                        and not self.__mU.exists(fpPath)):
                        ok = oeIo.createOeFingerPrintDatabase(oeMolDbFilePath,
                                                              fpPath,
                                                              fpType=fpType)
                        endTime = time.time()
                        logger.info(
                            "Created and stored %s fingerprint database (%.4f seconds)",
                            fpType, endTime - startTime)
            # --------
            if buildScreenedDb and screenTypeList:
                for screenType in screenTypeList:
                    startTime = time.time()
                    fp = os.path.join(self.__dirPath,
                                      self.__getSubSearchFileName(screenType))
                    if not useCache or (useCache and not self.__mU.exists(fp)):
                        ok = oeIo.createOeSubSearchDatabase(
                            oeSearchMolFilePath,
                            fp,
                            screenType=screenType,
                            numProc=numProc)
                        endTime = time.time()
                        logger.info(
                            "Constructed screened substructure database (status %r) with screenType %s (%.4f seconds)",
                            ok, screenType, endTime - startTime)
                        # ---------
                        ssDb = oeIo.loadOeSubSearchDatabase(
                            fp, screenType=screenType, numProc=numProc)
                        ok = ssDb.NumMolecules() == oeCount
                        # ----------
            #
            return True
        except Exception as e:
            logger.exception("Failing with %s", str(e))
        return False