示例#1
0
    def getCCDefObj(self, dataContainer, molBuildType="model-xyz", suppressHydrogens=False):
        """Build OE molecule from the input chemical component definition object."""
        #
        oemf = OeMoleculeFactory()
        if not self.__verbose:
            oemf.setQuiet()
        ccId = oemf.setChemCompDef(dataContainer)
        oemf.build(molBuildType=molBuildType)

        if self.__verbose:
            logger.info("  CCId               = %s", ccId)
            logger.info("  Title              = %s", oemf.getTitle())
            logger.info("  SMILES             = %s", oemf.getCanSMILES())
            logger.info("  SMILES (stereo)    = %s", oemf.getIsoSMILES())
            logger.info("  Formula (Hill)     = %s", oemf.getFormula())
            logger.info("  InChI key          = %s", oemf.getInChIKey())
            logger.info("  InChI              = %s", oemf.getInChI())

        fD = {}
        fD = {"Formula": oemf.getFormula(), "SMILES": oemf.getCanSMILES(), "SMILES_STEREO": oemf.getIsoSMILES(), "InChI": oemf.getInChI(), "InChIKey": oemf.getInChIKey()}

        if suppressHydrogens:
            tMol = oemf.getGraphMolSuppressH()
        else:
            tMol = oemf.getMol()

        fD["OEMOL"] = tMol
        fD["xyz"] = oemf.getAtomDetails(xyzType="model")

        return (ccId, tMol, fD)
示例#2
0
    def getCCDefFile(self, ccFilePath, molBuildType="model-xyz", suppressHydrogens=False):
        """Fetch the molecule definition (ccPath) and build OE molecules
        for comparison.

        """
        #
        mU = MarshalUtil(workPath=self.__workPath)
        rdCcObjL = mU.doImport(ccFilePath, fmt="mmcif")
        oemf = OeMoleculeFactory()
        if not self.__verbose:
            oemf.setQuiet()
        ccId = oemf.setChemCompDef(rdCcObjL[0])
        oemf.build(molBuildType=molBuildType)

        if self.__verbose:
            logger.info("  CCId               = %s", ccId)
            logger.info("  Title              = %s", oemf.getTitle())
            logger.info("  SMILES             = %s", oemf.getCanSMILES())
            logger.info("  SMILES (stereo)    = %s", oemf.getIsoSMILES())
            logger.info("  Formula (Hill)     = %s", oemf.getFormula())
            logger.info("  InChI key          = %s", oemf.getInChIKey())
            logger.info("  InChI              = %s", oemf.getInChI())

        fD = {}
        fD = {"Formula": oemf.getFormula(), "SMILES": oemf.getCanSMILES(), "SMILES_STEREO": oemf.getIsoSMILES(), "InChI": oemf.getInChI(), "InChIKey": oemf.getInChIKey()}

        if suppressHydrogens:
            tMol = oemf.getGraphMolSuppressH()
        else:
            tMol = oemf.getMol()

        fD["OEMOL"] = tMol
        fD["xyz"] = oemf.getAtomDetails(xyzType="model")

        return (ccId, tMol, fD)
示例#3
0
    def testBuildCifFromOE(self):
        """Build chemical component definitions from OE Mol object"""
        try:
            ccMolD = self.__getChemCompDefs()
            oemf = OeMoleculeFactory()
            #
            for ccId, ccObj in list(ccMolD.items())[:10]:
                # ----
                tId = oemf.setChemCompDef(ccObj)
                self.assertEqual(tId, ccId)
                ok = oemf.build(molBuildType="model-xyz")
                self.assertTrue(ok)
                fp = os.path.join(self.__ccCifPath, ccId + "-gen.cif")
                oeMol = oemf.getMol()
                oeccU = OeChemCompUtils()
                ok = oeccU.addOeMol(ccId,
                                    oeMol,
                                    missingModelXyz=False,
                                    writeIdealXyz=False)
                self.assertTrue(ok)
                ok = oeccU.write(fp)

                # ----
        except Exception as e:
            logger.exception("Failing with %s", str(e))
            self.fail()
示例#4
0
    def __buildChemCompSearchIndex(self, ccObjD, descrD, limitPerceptions=False, molLimit=None):
        """Internal method return a dictionary of extracted chemical component descriptors and formula."""
        rD = {}
        try:
            for ii, ccId in enumerate(ccObjD, 1):
                if molLimit and ii > molLimit:
                    break
                # ----
                oemf = OeMoleculeFactory()
                oemf.setQuiet()
                tId = oemf.setChemCompDef(ccObjD[ccId])
                if tId != ccId:
                    logger.error("%s chemical component definition import error", ccId)
                # ----
                oemf.clearExternalDescriptors()
                for smi in descrD[ccId] if ccId in descrD else []:
                    oemf.addExternalDescriptor("smiles", smi, "chemaxon-smiles")
                # ----
                smiD = oemf.buildRelated(limitPerceptions=limitPerceptions)
                logger.debug("%s related molecular forms %d", ccId, len(smiD))
                rD.update(smiD)
        except Exception as e:
            logger.exception("Failing with %s", str(e))

        return rD
示例#5
0
 def __buildChemCompSearchIndex(self, procName, ccIdList, descrD, limitPerceptions=False, quietFlag=False):
     """Internal method return a dictionary of extracted chemical component descriptors and formula."""
     rL = []
     fL = []
     try:
         for ccId in ccIdList:
             if ccId not in self.__ccObjD:
                 logger.error("%s missing chemical definition for %s", procName, ccId)
                 fL.append(ccId)
                 continue
             dataContainer = self.__ccObjD[ccId]
             # ----
             oemf = OeMoleculeFactory()
             if quietFlag:
                 oemf.setQuiet()
             tId = oemf.setChemCompDef(dataContainer)
             if tId != ccId:
                 logger.error("%s %s chemical component definition import error", procName, ccId)
                 fL.append(ccId)
                 continue
             # ----
             oemf.clearExternalDescriptors()
             for smi in descrD[ccId] if ccId in descrD else []:
                 oemf.addExternalDescriptor("smiles", smi, "chemaxon-smiles")
             # ----
             relD = oemf.buildRelated(limitPerceptions=limitPerceptions)
             logger.debug("%s %s related molecular forms %d", procName, ccId, len(relD))
             if relD:
                 rL.extend([relD[v] for v in relD])
             else:
                 fL.append(ccId)
     except Exception as e:
         logger.exception("%s failing with %s", procName, str(e))
     return rL, fL
示例#6
0
    def testRoundTripOps(self):
        """Test IO operation on generated related molecules"""
        try:
            oeIoU = OeIoUtils()
            mU = MarshalUtil()
            mU.mkdir(self.__molfileDirPath)
            ccMolD = self.__getChemCompDefs()
            oemf = OeMoleculeFactory()
            for ccId, ccObj in list(ccMolD.items())[:10]:
                # ----
                tId = oemf.setChemCompDef(ccObj)
                self.assertEqual(tId, ccId)
                relatedIdxD = oemf.buildRelated(limitPerceptions=False)
                logger.info("%s generated %d molecular forms", ccId,
                            len(relatedIdxD))
                for sId, idxD in relatedIdxD.items():
                    logger.info("sId %r smiles %r", sId, idxD["smiles"])
                    mol2Path = os.path.join(self.__molfileDirPath,
                                            sId + ".mol2")
                    oeMol = oeIoU.descriptorToMol(idxD["smiles"],
                                                  "oe-iso-smiles",
                                                  limitPerceptions=False,
                                                  messageTag=None)
                    oeIoU.write(mol2Path,
                                oeMol,
                                constantMol=True,
                                addSdTags=True)
                    tMolL = oeIoU.fileToMols(mol2Path)
                    #
                    nextMol2Path = os.path.join(self.__molfileDirPath,
                                                sId + "-next.mol2")
                    oeIoU.write(nextMol2Path,
                                tMolL[0],
                                constantMol=True,
                                addSdTags=True)

                    sdfPath = os.path.join(self.__molfileDirPath, sId + ".mol")
                    oeMol = oeIoU.descriptorToMol(idxD["smiles"],
                                                  "oe-iso-smiles",
                                                  limitPerceptions=False,
                                                  messageTag=None)
                    oeIoU.write(sdfPath,
                                oeMol,
                                constantMol=True,
                                addSdTags=True)
                    #
                    tMolL = oeIoU.fileToMols(sdfPath)
                    nextSdfPath = os.path.join(self.__molfileDirPath,
                                               sId + "-next.sdf")
                    oeIoU.write(nextSdfPath,
                                tMolL[0],
                                constantMol=True,
                                addSdTags=True)
                # ----
        except Exception as e:
            logger.exception("Failing with %s", str(e))
            self.fail()
示例#7
0
 def __displayAlignedDescriptorPair(self,
                                    ccId,
                                    descrRef,
                                    buildTypeRef,
                                    descrFit,
                                    buildTypeFit,
                                    title=None,
                                    limitPerceptions=True):
     oemfRef = OeMoleculeFactory()
     oemfRef.setDescriptor(descrRef, buildTypeRef, ccId)
     oemfRef.build(molBuildType=buildTypeRef,
                   limitPerceptions=limitPerceptions)
     oeMolRef = oemfRef.getMol()
     #
     oemfFit = OeMoleculeFactory()
     oemfFit.setDescriptor(descrFit, buildTypeFit, ccId)
     oemfFit.build(molBuildType=buildTypeFit,
                   limitPerceptions=limitPerceptions)
     oeMolFit = oemfFit.getMol()
     #
     oed = OeDepictMCSAlignPage()
     oed.setSearchType(sType="graph-relaxed", minAtomMatchFraction=0.50)
     oed.setDisplayOptions(labelAtomName=True,
                           labelAtomCIPStereo=True,
                           labelAtomIndex=False,
                           labelBondIndex=False,
                           highlightStyleFit="ballAndStickInverse",
                           bondDisplayWidth=0.5)
     oed.setRefMol(oeMolRef, ccId)
     oed.setFitMol(oeMolFit, ccId)
     myTitle = title if title else buildTypeRef + "-" + buildTypeFit
     imgPath = os.path.join(self.__workPath, myTitle + "-" + ccId + ".svg")
     logger.info("Using image path %r", imgPath)
     aML = oed.alignPair(imagePath=imgPath)
     if aML:
         logger.info("%s aligned image path %r", ccId, imgPath)
         for (rCC, rAt, tCC, tAt) in aML:
             logger.debug("%5s %-5s %5s %-5s", rCC, rAt, tCC, tAt)
    def prefilterIndex(self,
                       oeQueryMol,
                       idxP,
                       matchOpts="relaxed",
                       skipFeatures=False):
        """Filter the full search index base on minimum chemical formula an feature criteria.

        Args:
            oeQueryMol (object): search target moleculed (OEMol)
            idxP (object): instance ChemCompSearchIndexProvider()
            matchOpts (str, optional): search criteria options. Defaults to "default".
            skipFeatures (bool, optional): skip feature filters. Defaults to False.

        Returns:
            (list): list of chemical component identifiers in the filtered search space
        """
        startTime = time.time()
        oemf = OeMoleculeFactory()
        oemf.setOeMol(oeQueryMol, "queryTarget")
        typeCountD = oemf.getElementCounts(useSymbol=True)
        # ccIdL1 = idxP.filterMinimumMolecularFormula(typeCountD)
        #
        featureCountD = oemf.getFeatureCounts() if not skipFeatures else {}
        # Adjust filter according to search options
        if matchOpts in matchOpts in [
                "relaxed", "graph-relaxed", "simple",
                "sub-struct-graph-relaxed"
        ]:
            for ky in ["rings_ar", "at_ar", "at_ch"]:
                featureCountD.pop(ky, None)
        elif matchOpts in [
                "relaxed-stereo", "graph-relaxed-stereo",
                "sub-struct-graph-relaxed-stereo", "graph-relaxed-stereo-sdeq",
                "sub-struct-graph-relaxed-stereo-sdeq"
        ]:
            for ky in ["rings_ar", "at_ar"]:
                featureCountD.pop(ky, None)
        elif matchOpts in [
                "default", "strict", "graph-strict", "graph-default",
                "sub-struct-graph-strict"
        ]:
            pass
        ccIdL = idxP.filterMinimumFormulaAndFeatures(typeCountD, featureCountD)
        logger.info(
            "Pre-filtering results for formula+feature %d (%.4f seconds)",
            len(ccIdL),
            time.time() - startTime)
        return ccIdL
 def testBuildRelated(self):
     """Test build molecules chemical definitions -"""
     try:
         logger.info("Starting")
         ccMolD = self.__getChemCompDefs()
         oemf = OeMoleculeFactory(quietMode=True)
         relD = {}
         for ccId, ccObj in list(ccMolD.items())[:100]:
             # ----
             tId = oemf.setChemCompDef(ccObj)
             self.assertEqual(tId, ccId)
             tD = oemf.buildRelated(limitPerceptions=False)
             logger.info("%s related molecular forms %d", ccId, len(tD))
             relD.update(tD)
         logger.info("Total molecular forms (%d)", len(relD))
         # ----
     except Exception as e:
         logger.exception("Failing with %s", str(e))
         self.fail()
示例#10
0
    def write(self, filePath, oeMol, constantMol=False, addSdTags=True):
        """Write an oeMol with format type inferred from the filePath extension (e.g. .mol)

        Args:
            filePath (str): filepath with a chemical type extension
            constantMol (bool, optional): copies molecule before performing format specific perceptions

        Returns:
            bool: True for success or False otherwise
        """
        try:
            molId = os.path.splitext(os.path.basename(filePath))[0]
            fmt = os.path.splitext(os.path.basename(filePath))[1][1:].lower()
            #
            if addSdTags:
                oemf = OeMoleculeFactory()
                oemf.setOeMol(oeMol, molId)
                oemf.addSdTags()
                oeMol = oemf.getMol()
            #
            self.__mU.mkdir(os.path.dirname(filePath))
            ofs = oechem.oemolostream()
            ofs.open(filePath)
            logger.debug("Writing (fmt=%s) molId %s path %s title %s", fmt,
                         molId, filePath, oeMol.GetTitle())
            #
            if constantMol:
                oechem.OEWriteConstMolecule(ofs, oeMol)
            else:
                oechem.OEWriteMolecule(ofs, oeMol)
            #
            # If this is a mol2 file, we need to replace the resname
            if fmt.startswith("mol2"):
                # If this is a mol2/mol2h substitute the default substructure id
                with open(filePath, "r", encoding="utf-8") as ifh:
                    lines = ifh.readlines()
                lines = [line.replace("<0>", molId) for line in lines]
                with open(filePath, "w", encoding="utf-8") as ofh:
                    ofh.writelines(lines)
            return True
        except Exception as e:
            logger.exception("Failing for %s with %s", filePath, str(e))
        return False
示例#11
0
    def stringToMols(self, txt, sType="mol2", use3D=False):
        """Parse the input string as input format type (sType) returning a list of
        molecule objects (OeGraphMol)

        Args:
            txt (str): string text of molecule data
            sType (str, optional): string data format (mol2, sdf, smiles) . Defaults to "mol2".

        Returns:
            list: list of OeGraphMol() objects
        """
        #
        mL = []
        oemf = OeMoleculeFactory()
        try:
            if sType not in ["mol2", "sdf", "smiles"]:
                logger.error("Unsupported string data format")
                return None
            fD = {
                "mol2": oechem.OEFormat_MOL2,
                "sdf": oechem.OEFormat_SDF,
                "smiles": oechem.OEFormat_SMI
            }
            ifs = oechem.oemolistream()
            ifs.SetFormat(fD["sType"])
            if not ifs.openstring(txt):
                logger.error("Unable open string data for molecule reader")
                return None
            for tMol in ifs.GetOEGraphMols():
                oeMol = oechem.OEGraphMol(tMol)
                if use3D:
                    mL.append(
                        oemf.updateOePerceptions3D(
                            oeMol, aromaticModel=oechem.OEAroModelOpenEye))
                else:
                    mL.append(
                        oemf.updateOePerceptions2D(
                            oeMol, aromaticModel=oechem.OEAroModelOpenEye))

        except Exception as e:
            logger.exception("Failing with %s", str(e))
        return mL
示例#12
0
 def chemCompToMol(self,
                   ccdFilePath,
                   molBuildType="model-xyz",
                   quietFlag=False):
     retMolL = []
     try:
         rdCcObjL = self.__mU.doImport(ccdFilePath, fmt="mmcif")
         logger.info("Read %s with %d definitions", ccdFilePath,
                     len(rdCcObjL))
         oemf = OeMoleculeFactory()
         if quietFlag:
             oemf.setQuiet()
         for ccObj in rdCcObjL:
             ccId = oemf.setChemCompDef(ccObj)
             if ccId:
                 ok = oemf.build(molBuildType=molBuildType)
                 if ok:
                     oeMol = oemf.getMol()
                     retMolL.append(oeMol)
     except Exception as e:
         logger.exception("Loading %s failing with %s", ccdFilePath, str(e))
     return retMolL
 def testDepictByBuildType(self):
     """Compare depictions constructed molecules with various builds from chemical defintions -"""
     try:
         ccIdList = self.__ccIdList
         ccMolD = self.__getChemCompDefs()
         #
         limitPerceptions = True
         molBuildTypeL = ["model-xyz", "ideal-xyz", "connection-table", "oe-iso-smiles"]
         #
         startTime = time.time()
         oefm = OeMoleculeFactory()
         for molBuildType in molBuildTypeL:
             for ccId in ccIdList:
                 ccObj = ccMolD[ccId]
                 # ----
                 tId = oefm.setChemCompDef(ccObj)
                 self.assertEqual(tId, ccId)
                 ok = oefm.build(molBuildType=molBuildType, limitPerceptions=limitPerceptions)
                 if not ok:
                     logger.info("Build using %r failed for %s", molBuildType, ccId)
                     continue
                 #
                 oeMol = oefm.getGraphMol()
                 pS = "-limited" if limitPerceptions else ""
                 imagePath = os.path.join(self.__workPath, ccId + "-%s%s.svg" % (molBuildType, pS))
                 oed = OeDepict()
                 title = ""
                 oed.setMolTitleList([(ccId, oeMol, title)])
                 oed.setDisplayOptions(labelAtomName=False, labelAtomCIPStereo=True, labelAtomIndex=False, labelBondIndex=False, cellBorders=False, bondDisplayWidth=0.5)
                 oed.setGridOptions(rows=1, cols=1)
                 oed.prepare()
                 oed.write(imagePath)
         logger.info("Completed depictions on %d molecules (%.4f seconds)", len(ccIdList) * len(molBuildTypeL), time.time() - startTime)
         #
     except Exception as e:
         logger.exception("Failing with %s", str(e))
         self.fail()
 def testBuilders(self):
     try:
         ccMolD = self.__getChemCompDefs()
         quietFlag = False
         molBuildTypeL = ["model-xyz", "ideal-xyz"]
         for molBuildType in molBuildTypeL:
             oemf = OeMoleculeFactory()
             if quietFlag:
                 oemf.setQuiet()
             #
             eCount = 0
             for tId, ccObj in ccMolD.items():
                 ccId = oemf.setChemCompDef(ccObj)
                 self.assertEqual(tId, ccId)
                 logger.debug("Building %s using molBuildType %r", ccId,
                              molBuildType)
                 if ccId:
                     ok = oemf.build(molBuildType=molBuildType)
                     logger.debug(
                         "Comparing built component %s using molBuildType %r",
                         ccId, molBuildType)
                     # ok = oemf.compare()
                     ok = True
                     if not ok:
                         logger.info("Failing on %s molBuildType %r", ccId,
                                     molBuildType)
                         eCount += 1
                     # self.assertTrue(ok)
                 else:
                     logger.error("Cannot process %r", ccObj.getName())
             logger.info(
                 "Processed %d components molBuildType %r errors %d",
                 len(ccMolD), molBuildType, eCount)
             #
     except Exception as e:
         logger.exception("Failing with %s", str(e))
         self.fail()
示例#15
0
    def fileToMols(self, filePath, use3D=False, largestPart=False):
        """Parse the input path returning a list of molecule objects (OeGraphMol).

        Args:
            filePath (str): file path must have strandard recognized extension ('mol', 'sdf', 'smi', 'oeb').

        Returns:
            list : list of OeGraphMol() objects

        """
        mL = []
        oemf = OeMoleculeFactory()
        try:
            ifs = oechem.oemolistream()
            if ifs.open(filePath):
                for tMol in ifs.GetOEGraphMols():
                    oeMol = oechem.OEGraphMol(tMol)
                    # if oechem.OEReadMolecule(ifs, oeMol):
                    if largestPart:
                        molL = oemf.getParts(oeMol)
                        if len(molL) > 0:
                            oeMol = molL[0]
                            logger.info(
                                "Using largest bonded molecule part (%d/%d)",
                                len(molL), oeMol.NumAtoms())
                    if use3D:
                        mL.append(
                            oemf.updateOePerceptions3D(
                                oeMol, aromaticModel=oechem.OEAroModelOpenEye))
                    else:
                        mL.append(
                            oemf.updateOePerceptions2D(
                                oeMol, aromaticModel=oechem.OEAroModelOpenEye))
        except Exception as e:
            logger.exception("Failing with %s", str(e))
        return mL
    def testBuildRelatedExtra(self):
        """Test build molecules chemical definitions including extra chemaxon descriptors"""

        try:
            caxP = ChemAxonDescriptorProvider(
                ccUrlTarget=self.__ccUrlTarget,
                birdUrlTarget=self.__birdUrlTarget,
                cachePath=self.__cachePath,
                useCache=True,
                ccFileNamePrefix="cc-abbrev")
            ok = caxP.testCache()
            self.assertTrue(ok)
            descrD = caxP.getDescriptorIndex()
            #
            ccMolD = self.__getChemCompDefs()
            oemf = OeMoleculeFactory(quietMode=True)
            relD = {}
            for ccId, ccObj in list(ccMolD.items())[:100]:
                # ----
                tId = oemf.setChemCompDef(ccObj)
                self.assertEqual(tId, ccId)
                #
                oemf.clearExternalDescriptors()
                for smi in descrD[ccId] if ccId in descrD else []:
                    oemf.addExternalDescriptor("smiles", smi,
                                               "chemaxon-smiles")
                #
                tD = oemf.buildRelated(limitPerceptions=False)
                logger.info("%s related molecular forms %d", ccId, len(tD))
                relD.update(tD)
            logger.info("Total molecular forms (%d)", len(relD))

            # ----
        except Exception as e:
            logger.exception("Failing with %s", str(e))
            self.fail()
示例#17
0
    def buildOeBinaryMolCacheFromIndex(self,
                                       filePath,
                                       ccIdxD,
                                       quietFlag=False,
                                       fpTypeList=None,
                                       limitPerceptions=False,
                                       suppressHydrogens=False):
        """Build cache of OEGraphMol() objects from the input chemical component search index.

        Args:
            filePath (str): output cache file path
            ccIdxD (dict): search index dictionary
            quietFlag (bool, optional): suppress OE output. Defaults to False.
            fpTypeList (list, optional): list of fingerprint types. Defaults to None.
            limitPerceptions (bool, optional): suppress automatic chemical perceptions. Defaults to False.
            suppressHydrogens (bool, optional): suppress explicit hydrogen count. Defaults to False.

        Returns:
            (int, int, list): chem comp success count, error count, chem comp identifier failure list
        """
        failIdList = []
        ccCount = 0
        errCount = 0
        startTime = time.time()
        try:
            ofs = oechem.oemolostream()
            ofs.SetFormat(oechem.OEFormat_OEB)
            if ofs.open(filePath):
                oemf = OeMoleculeFactory()
                if quietFlag:
                    oemf.setQuiet()
                for searchCcId, ccIdx in ccIdxD.items():
                    oemf.setDescriptor(ccIdx["smiles"], "oe-iso-smiles",
                                       searchCcId)
                    ok = oemf.build(molBuildType="oe-iso-smiles",
                                    limitPerceptions=limitPerceptions)
                    if ok and fpTypeList:
                        fpOk = oemf.addFingerPrints(fpTypeList)
                        if not fpOk:
                            logger.info("Fingerprint generation fails for %r",
                                        searchCcId)
                    if ok:
                        if not suppressHydrogens:
                            oemf.addExplicitHydrogens()
                            oemf.setSimpleAtomNames()
                        oeMol = oemf.getMol(
                            suppressHydrogens=suppressHydrogens)
                        oechem.OEWriteMolecule(ofs, oeMol)
                        ccCount += 1
                    if not ok:
                        # build failed incomplete component (e.g. missing atoms or bonds)
                        errCount += 1
                        failIdList.append(searchCcId)
            else:
                logger.error("Unable to open cache database %s", filePath)
                errCount += 1
        except Exception as e:
            logger.exception("Failing with %s", str(e))
        #
        endTime = time.time()
        logger.info("Completed operation at %s (%.4f seconds)",
                    time.strftime("%Y %m %d %H:%M:%S", time.localtime()),
                    endTime - startTime)
        return ccCount, errCount, failIdList
示例#18
0
    def buildOeBinaryMolCache(self,
                              filePath,
                              ccObjD,
                              molBuildType="model-xyz",
                              quietFlag=False,
                              fpTypeList=None,
                              limitPerceptions=False,
                              suppressHydrogens=False):
        """Build cache of OEMol() objects from the input chemical component definition list.

        Args:
            filePath (str): output cache file path
            ccObjD (dict):  chemical component object dictionary
            molBuildType (str, optional): [description]. Defaults to "model-xyz".
            quietFlag (bool, optional): [description]. Defaults to False.
            fpTypeList (list, optional): fingerprint type list. Defaults to None.
            limitPerceptions (bool, optional): suppress automatic chemical perceptions. Defaults to False.
            suppressHydrogens (bool, optional): suppress explicit hydrogen count. Defaults to False.

        Returns:
            (int, int, list): chem comp success count, error count, chem comp identifier failure list

        """
        ok = False
        startTime = time.time()
        failIdList = []
        ccCount = 0
        errCount = 0
        try:
            ofs = oechem.oemolostream()
            ofs.SetFormat(oechem.OEFormat_OEB)
            if ofs.open(filePath):
                oemf = OeMoleculeFactory()
                if quietFlag:
                    oemf.setQuiet()
                for ccId, ccObj in ccObjD.items():
                    tId = oemf.setChemCompDef(ccObj)
                    if tId and tId == ccId:
                        ok = oemf.build(molBuildType=molBuildType,
                                        limitPerceptions=limitPerceptions)
                        if ok and fpTypeList:
                            fpOk = oemf.addFingerPrints(fpTypeList)
                            if not fpOk:
                                logger.info(
                                    "Fingerprint generation fails for %r",
                                    ccId)
                        if ok:
                            oeMol = oemf.getMol(
                                suppressHydrogens=suppressHydrogens)
                            oechem.OEWriteMolecule(ofs, oeMol)
                            ccCount += 1
                    if not ok or not tId:
                        # build failed incomplete component (e.g. missing atoms or bonds)
                        errCount += 1
                        failIdList.append(ccId)
            else:
                logger.error("Unable to open cache database %s", filePath)
                errCount += 1
        except Exception as e:
            logger.exception("Failing with %s", str(e))
        #
        endTime = time.time()
        logger.info("Completed operation at %s (%.4f seconds)",
                    time.strftime("%Y %m %d %H:%M:%S", time.localtime()),
                    endTime - startTime)
        return ccCount, errCount, failIdList
    def testSelfConsistency(self):
        """Compare constructed molecule with underlying chemical definitions -"""
        try:
            failL = []
            ccMolD = self.__getChemCompDefs()
            #
            # molBuildTypeL = ["model-xyz", "ideal-xyz", None]
            # molBuildTypeL = [None]
            #
            oemf = OeMoleculeFactory()
            macmp = MoleculeAnnotationsCompare()

            limitPerceptions = False
            # buildTypeRef = "oe-iso-smiles"
            buildTypeRef = "model-xyz"
            filterHydrogens = False
            if buildTypeRef in [
                    "oe-iso-smiles", "oe-smiles", "cactvs-smiles",
                    "cactvs-iso-smiles", "acdlabs-smiles", "inchi"
            ]:
                filterHydrogens = True
            #
            for ccId, ccObj in ccMolD.items():
                # ----
                tId = oemf.setChemCompDef(ccObj)
                self.assertEqual(tId, ccId)
                ok = oemf.build(molBuildType=buildTypeRef,
                                limitPerceptions=limitPerceptions,
                                normalize=False)
                if not ok:
                    logger.info("Build using %r failed for %s", buildTypeRef,
                                ccId)
                    continue
                #
                doTautomers = False
                if doTautomers:
                    tautomerMolL = oemf.getTautomerMolList()
                    logger.info("%s number reasonable tautomers %d", ccId,
                                len(tautomerMolL))
                #
                refFD = macmp.getChemCompFeatures(
                    ccObj,
                    descriptorProgram="OPENEYE",
                    filterHydrogens=filterHydrogens)
                tstFD = oemf.getOeMoleculeFeatures(
                    filterHydrogens=filterHydrogens)
                # logger.info("tstFD %r", tstFD)
                ok, retCmp = macmp.compare(refFD,
                                           tstFD,
                                           tstInfo="Openeye ISO SMILES")
                if not ok:
                    logger.info("Comparison failed build type %r and %r",
                                buildTypeRef, ccId)
                    logger.debug(
                        "diff -> atomatic atoms %r stereo atoms %r bond types %r aromatic bonds %r",
                        retCmp.difAromaticAtoms,
                        retCmp.difStereoAtoms,
                        retCmp.difTypeBonds,
                        retCmp.difAromaticBonds,
                    )
                    failL.append(ccId)
                #
            logger.info("Failures (%d) %r: ", len(failL), failL)
        except Exception as e:
            logger.exception("Failing with %s", str(e))
            self.fail()
    def testCompareByBuildType(self):
        """Compare depictions constructed molecules with various builds from chemical defintions -
        all build types 8769 (all)
        connect - smiles 6743
        model vs iso smiles 5937
        ideal va iso smiles  7047
        """
        doDepict = False
        ccResultD = {}
        genResultD = {}
        smilesByBuildTypeD = {}
        try:
            ccMolD, ccIdxD = self.__getChemCompDefs()
            #
            limitPerceptions = True
            # molBuildTypeL = ["model-xyz", "ideal-xyz", "connection-table", "oe-iso-smiles"]
            molBuildTypeL = ["ideal-xyz", "oe-iso-smiles"]
            #
            startTime = time.time()
            oefm = OeMoleculeFactory()
            oefm.setQuiet()
            for molBuildType in molBuildTypeL:
                for ccId, idxD in ccIdxD.items():
                    ccObj = ccMolD[ccId]
                    # ----
                    ccIsoSmiles = idxD["oe-iso-smiles"]
                    ccSmiles = idxD["oe-smiles"]
                    # ----
                    tId = oefm.setChemCompDef(ccObj)
                    if not tId:
                        logger.info("Skipping bad component %r", ccId)
                        continue
                    self.assertEqual(tId, ccId)
                    ok = oefm.build(molBuildType=molBuildType,
                                    limitPerceptions=limitPerceptions)
                    if not ok:
                        logger.info("Build using %r failed for %s",
                                    molBuildType, ccId)
                        continue
                    # ------
                    oeMol = oefm.getGraphMol()
                    oeIsoSmiles = oefm.getIsoSMILES()
                    oeSmiles = oefm.getCanSMILES()
                    ccEq = oeIsoSmiles == ccIsoSmiles and oeSmiles == ccSmiles
                    #
                    oefmR = OeMoleculeFactory()
                    oefmR.setQuiet()
                    ccIdGen = ccId + "_gen"
                    oefmR.setDescriptor(oeIsoSmiles, "oe-iso-smiles", ccIdGen)
                    ok = oefmR.build(molBuildType="oe-iso-smiles",
                                     limitPerceptions=limitPerceptions)
                    if not ok:
                        logger.info("Build using %r failed for %s",
                                    molBuildType, ccIdGen)
                        continue
                    # ------
                    #
                    # oeMolGen = oefmR.getGraphMol()
                    oeIsoSmilesGen = oefmR.getIsoSMILES()
                    oeSmilesGen = oefmR.getCanSMILES()
                    genEq = oeIsoSmiles == oeIsoSmilesGen and oeSmiles == oeSmilesGen
                    smilesByBuildTypeD.setdefault(ccId, {}).setdefault(
                        molBuildType, []).append(oeIsoSmilesGen)
                    #
                    logger.debug("%s buildType %s ccEq %r genEq %r", ccId,
                                 molBuildType, ccEq, genEq)
                    if not ccEq:
                        ccResultD.setdefault(molBuildType, []).append(ccId)
                    if not genEq:
                        genResultD.setdefault(molBuildType, []).append(ccId)

                    if doDepict:
                        pS = "-limited" if limitPerceptions else ""
                        imagePath = os.path.join(
                            self.__workPath,
                            ccId + "-%s%s.svg" % (molBuildType, pS))
                        oed = OeDepict()
                        title = ""
                        oed.setMolTitleList([(ccId, oeMol, title)])
                        oed.setDisplayOptions(labelAtomName=False,
                                              labelAtomCIPStereo=True,
                                              labelAtomIndex=False,
                                              labelBondIndex=False,
                                              cellBorders=False,
                                              bondDisplayWidth=0.5)
                        oed.setGridOptions(rows=1, cols=1)
                        oed.prepare()
                        oed.write(imagePath)
            logger.info(
                "Completed comparing %d molecules in %d builds (%.4f seconds)",
                len(ccIdxD), len(molBuildTypeL),
                time.time() - startTime)
            #
            #
            for molBuildType in molBuildTypeL:
                if molBuildType in genResultD:
                    logger.info("GEN %s (%d) %r", molBuildType,
                                len(genResultD[molBuildType]),
                                genResultD[molBuildType])

            numDiff = 0
            for ccId, btD in smilesByBuildTypeD.items():
                tS = set()
                for molBuildType, sL in btD.items():
                    tS.add(sL[0])
                if len(tS) > 1:
                    numDiff += 1
                    logger.debug("%s diff smiles (%d) %r", ccId, len(tS), tS)
            logger.info("Components with inconsistent SMILES %d", numDiff)
            #
        except Exception as e:
            logger.exception("Failing with %s", str(e))
            self.fail()
    def __testReproduceDescriptors(self, molBuildType, limitPerceptions=True):
        #
        ccMolD, ccIdxD = self.__getChemCompDefs()
        oemf = OeMoleculeFactory()
        countD = defaultdict(int)
        for ccId, ccDef in ccMolD.items():
            tId = oemf.setChemCompDef(ccDef)
            if ccId != tId:
                continue
            oemf.build(molBuildType=molBuildType,
                       limitPerceptions=limitPerceptions)
            oeMol = oemf.getMol()
            #
            countD["total components"] += 1
            if ccId not in ccIdxD:
                logger.info("Missing ccIndex entry for %s", ccId)
                continue
            ccdD = ccIdxD[ccId]
            if ccdD["ambiguous"]:
                countD["ambiguous component"] += 1
                continue
            #
            countD["total molecules"] += 1

            nativeCanIsoSmiles = oechem.OECreateIsoSmiString(oeMol)
            canIsoSmiles = oechem.OEMolToSmiles(oeMol)
            isoSmiles = oemf.getIsoSMILES()
            canSmiles = oemf.getCanSMILES()
            # check interal consistency
            if nativeCanIsoSmiles != isoSmiles:
                logger.error("%s stored and calculated OE smiles differ %s %s",
                             ccId, nativeCanIsoSmiles, isoSmiles)
            if canIsoSmiles != isoSmiles:
                logger.error(
                    "%s calculated OE ISO and canonical smiles differ %s %s",
                    ccId, isoSmiles, canIsoSmiles)

            # compare with archived values
            if isoSmiles != ccdD["oe-iso-smiles"]:
                logger.info("%s ISO SMILES differ \nccd: %r  \nOE:  %r", ccId,
                            ccdD["oe-iso-smiles"], isoSmiles)
                countD["iso_smiles_diff"] += 1
            # ----------
            if canSmiles != ccdD["oe-smiles"]:
                logger.info("%s CAN SMILES differ \nccd: %r  \nOE:  %r", ccId,
                            ccdD["oe-smiles"], canSmiles)
                countD["smiles_diff"] += 1

            formula = oemf.getFormula()
            if formula.upper() != ccdD["formula"].upper():
                logger.debug("%s formulas differ \nccd: %r  \nOE:  %r", ccId,
                             ccdD["formula"], formula)
                countD["formula_diff"] += 1
            # ---------
            inchiKey = oemf.getInChIKey()
            if inchiKey != ccdD["inchikey"]:
                logger.debug("%s InChI keys differ \nccd: %r  \nOE:  %r", ccId,
                             ccdD["inchikey"], inchiKey)
                countD["inchikey_diff"] += 1
            #
            inchi = oemf.getInChI()
            if inchi != ccdD["inchi"]:
                logger.debug("%s InChIs differ \nccd: %r  \nOE:  %r", ccId,
                             ccdD["inchi"], inchi)
                countD["inchi_diff"] += 1
        #
        #
        for ky, vl in countD.items():
            logger.info("%-12s %6d", ky, vl)
示例#22
0
    def testSssWithFingerPrintFromDescriptor(self):
        oemp = OeMoleculeProvider(**self.__myKwargs)
        ok = oemp.testCache()
        ccmP = ChemCompIndexProvider(**self.__myKwargs)
        ccIdxD = ccmP.getIndex()
        ok = ccmP.testCache(minCount=self.__minCount)
        self.assertTrue(ok)
        limitPerceptions = False
        # minFpScore = 0.5
        maxFpResults = 50
        matchOpts = "graph-relaxed"
        numMols = 20
        oeioU = OeIoUtils()
        oesU = OeSearchUtils(oemp, fpTypeList=self.__fpTypeList)
        missTupL = []
        missedD = {}
        missedFpD = {}
        # ----
        startTime = time.time()
        for ccId, ccD in list(ccIdxD.items())[:numMols]:
            for buildType in [
                    "oe-iso-smiles", "oe-smiles", "acdlabs-smiles",
                    "cactvs-iso-smiles", "cactvs-smiles", "inchi"
            ]:
                if buildType in ccD:
                    logger.debug("Search %s %r", ccId, ccD[buildType])
                    if buildType in ["inchi"]:
                        oemf = OeMoleculeFactory()
                        oemf.setDescriptor(ccD["inchi"], "inchi", ccId)
                        ok = oemf.build(molBuildType="inchi",
                                        limitPerceptions=limitPerceptions)
                        if not ok:
                            logger.info("%s build failed with InChI %r", ccId,
                                        ccD["inchi"])
                        else:
                            oeMol = oemf.getMol()
                            if oemf.getInChI() != ccD["inchi"]:
                                logger.info(
                                    "%s regenerated InChI differs\n%r\n%s",
                                    ccId, ccD["inchi"], oemf.getInChI())
                    else:
                        oeMol = oeioU.smilesToMol(
                            ccD[buildType], limitPerceptions=limitPerceptions)
                    if not oeMol:
                        continue
                    maxHits = 0
                    minHits = maxFpResults
                    selfHit = False
                    for fpType, minFpScore in self.__fpTypeCuttoffList:
                        retStatus, mL = oesU.searchSubStructureWithFingerPrint(
                            oeMol,
                            fpType,
                            minFpScore,
                            maxFpResults,
                            matchOpts=matchOpts)
                        self.assertTrue(retStatus)
                        logger.debug("%s fpType %r hits %d", ccId, fpType,
                                     len(mL))
                        maxHits = max(maxHits, len(mL))
                        minHits = min(minHits, len(mL))
                        matchedSelf = self.__resultContains(ccId, mL)
                        selfHit = selfHit or matchedSelf
                        if not matchedSelf:
                            missedFpD.setdefault(ccId, []).append(
                                (buildType, fpType, len(mL)))
                    if not selfHit:
                        missedD.setdefault(ccId, []).append(buildType)

                    logger.info("%s (%r) buildType %r min hits %d max hits %d",
                                ccId, selfHit, buildType, minHits, maxHits)
                else:
                    logger.info("%s missing descriptor %r", ccId, buildType)
        #
        for ccId, missL in missedD.items():
            logger.info("%s missed list %r", ccId, missL)
            if ccId in missedFpD:
                logger.info("%s unmatched for fpTypes %r", ccId,
                            missedFpD[ccId])
        # ----
        doDepict = False
        if doDepict:
            mD = {}
            for missTup in missTupL:
                mD.setdefault(missTup[0], []).append(missTup[1])

            for ccId, buildTypeL in mD.items():
                idxD = ccIdxD[ccId]
                if "oe-iso-smiles" in idxD:
                    for buildType in buildTypeL:
                        self.__displayAlignedDescriptorPair(
                            ccId,
                            idxD["oe-iso-smiles"],
                            "oe-iso-smiles",
                            idxD[buildType],
                            buildType,
                            title=None,
                            limitPerceptions=True)

        logger.info("%s fingerprints search on %d in (%.4f seconds)",
                    len(self.__fpTypeList), numMols,
                    time.time() - startTime)
    def testRoundTrip(self):
        """Round trip smiles comparisons -"""
        try:
            ccMolD = self.__getChemCompDefs()
            # useCache = True
            # quietFlag = False
            # molBuildTypeL = ["model-xyz", "ideal-xyz", None]
            # molBuildTypeL = [None]
            buildTypeRef = "oe-iso-smiles"
            oemf1 = OeMoleculeFactory()
            oemf2 = OeMoleculeFactory()
            #
            for ccId, ccObj in ccMolD.items():
                # ----
                ccIt = iter(PdbxChemCompIt(ccObj))
                cc = next(ccIt)
                formula = cc.getFormulaWithCharge()
                # ccId = cc.getId()
                ccName = cc.getName()
                ifCharge = cc.getFormalChargeAsInt()
                isAmbiguous = cc.getAmbiguousFlag() in ["Y", "y"]
                isCurrent = cc.getReleaseStatus() in ["REL"]
                logger.debug("%s name %r formula %r charge %d", ccId, ccName,
                             formula, ifCharge)
                # ----
                ccId = oemf1.setChemCompDef(ccObj)
                ok = oemf1.build(molBuildType=buildTypeRef,
                                 limitPerceptions=False)
                if not ok:
                    logger.info(
                        "Build using %r failed for %s (ambiguous flag %r current %r)",
                        buildTypeRef, ccId, isAmbiguous, isCurrent)
                #
                isDiff = False
                #
                if isDiff:
                    genIsoSmi = oemf1.getCanSMILES()
                    oemf2 = OeMoleculeFactory()
                    oemf2.setDescriptor(genIsoSmi, "oe-iso-smiles", ccId)
                    oemf2.build(molBuildType="oe-iso-smiles",
                                limitPerceptions=False)
                    regenIsoSmi = oemf2.getIsoSMILES()
                    if genIsoSmi != regenIsoSmi:
                        logger.info(
                            "%s  regenerated ISOSMILES differ \n -- INP: %s\n -- OUT: %s",
                            ccId, genIsoSmi, regenIsoSmi)

                    oed = OeDepictMCSAlignPage()
                    oed.setDisplayOptions(
                        labelAtomName=True,
                        labelAtomCIPStereo=True,
                        labelAtomIndex=False,
                        labelBondIndex=False,
                        labelBondCIPStereo=True,
                        highlightStyleFit="ballAndStickInverse",
                        highLightNotMatchColorRef="pink",
                        bondDisplayWidth=0.5,
                    )
                    oed.setRefMol(oemf1.getGraphMol(), ccId)
                    oed.setFitMol(oemf2.getGraphMol(), ccId)
                    imgPath = os.path.join(
                        self.__workPath,
                        "compare-assigned-" + ccId + "-calc-" + ccId + ".svg")
                    logger.info("Using image path %r", imgPath)
                    aML = oed.alignPair(imagePath=imgPath)
                    if aML:
                        for (rCC, rAt, tCC, tAt) in aML:
                            logger.info("%5s %-5s %5s %-5s", rCC, rAt, tCC,
                                        tAt)
                else:
                    logger.debug("%s matched all cases", ccId)

        except Exception as e:
            logger.exception("Failing with %s", str(e))
            self.fail()
    def __buildChemCompIndex(self,
                             cD,
                             molBuildType="model-xyz",
                             doFeatures=True):
        """Internal method return a dictionary of extracted chemical component descriptors and formula."""
        rD = {}
        try:
            quietFlag = True
            for _, dataContainer in cD.items():
                ccIt = iter(PdbxChemCompIt(dataContainer))
                cc = next(ccIt, None)
                ccId = cc.getId()
                formula = str(cc.getFormula()).replace(" ", "")
                ambiguousFlag = cc.getAmbiguousFlag().upper() in ["Y", "YES"]
                tch = cc.getFormalCharge()
                fcharge = int(tch) if tch and tch not in [".", "?"] else 0
                #
                logger.debug("ccId %r formula %r ambiguous %r fcharge %r",
                             ccId, formula, ambiguousFlag, fcharge)
                if fcharge:
                    sign = "+" if fcharge > 0 else "-"
                    mag = str(abs(fcharge)) if abs(fcharge) > 1 else ""
                    formula = formula + sign + mag
                #
                atIt = PdbxChemCompAtomIt(dataContainer)
                typeCounts = defaultdict(int)
                for at in atIt:
                    aType = at.getType().upper()
                    typeCounts[aType] += 1
                #
                rD[ccId] = {
                    "formula": formula,
                    "type-counts": typeCounts,
                    "ambiguous": ambiguousFlag,
                    "feature-counts": {}
                }
                desIt = PdbxChemCompDescriptorIt(dataContainer)
                for des in desIt:
                    desBuildType = des.getMolBuildType()
                    tS = des.getDescriptor()
                    descr = tS.strip() if tS else None
                    if not descr:
                        continue
                    if desBuildType in [
                            "oe-iso-smiles", "oe-smiles", "acdlabs-smiles",
                            "cactvs-iso-smiles", "cactvs-smiles", "inchi",
                            "inchikey"
                    ]:
                        rD[ccId][desBuildType] = descr
                    else:
                        logger.error("%s unexpected descriptor build type %r",
                                     ccId, desBuildType)
                if doFeatures:
                    oemf = OeMoleculeFactory()
                    if quietFlag:
                        oemf.setQuiet()
                    tId = oemf.setChemCompDef(dataContainer)
                    if tId != ccId:
                        logger.error(
                            "%s chemical component definition import error",
                            ccId)
                        continue
                    ok = oemf.build(molBuildType=molBuildType)
                    if ok:
                        rD[ccId]["feature-counts"] = oemf.getFeatureCounts()

        except Exception as e:
            logger.exception("Failing with %s", str(e))

        return rD
示例#25
0
    def __getMiscFile(self, filePath, suppressHydrogens=False, importType="2D", title=None, largestPart=False):
        """Fetch a miscellaneous chemical file (ccPath) and build OE molecules
        for comparison.

        """
        try:
            oeioU = OeIoUtils()
            oeMolL = oeioU.fileToMols(filePath, use3D=importType == "3D", largestPart=largestPart)
            logger.info("Read (%d) from %s ", len(oeMolL), filePath)
            oeMol = oeMolL[0]

            ccId = title if title else oeMol.GetTitle()
            if title:
                oeMol.SetTitle(ccId)
            #
            oemf = OeMoleculeFactory()
            if not self.__verbose:
                oemf.setQuiet()
            oemf.setOeMol(oeMol, ccId)
            #
            fD = oemf.getOeMoleculeFeatures()
            if self.__verbose:
                logger.info("  Title              = %s", title)
                logger.info("  Title OEMF         = %s", oemf.getTitle())
                logger.info("  SMILES             = %s", oemf.getCanSMILES())
                logger.info("  SMILES (stereo)    = %s", oemf.getIsoSMILES())
                logger.info("  Formula (Hill)     = %s", oemf.getFormula())
                logger.info("  InChI key          = %s", oemf.getInChIKey())
                logger.info("  InChI              = %s", oemf.getInChI())
            #
            ccId = oemf.getTitle()
            if suppressHydrogens:
                tMol = oemf.getGraphMolSuppressH()
            else:
                tMol = oemf.getMol()

            molXyzL = []
            if importType == "3D":
                for atm in tMol.GetAtoms():
                    xyzL = oechem.OEFloatArray(3)
                    tMol.GetCoords(atm, xyzL)
                    molXyzL.append(
                        ComponentAtomDetails(
                            atIdx=atm.GetIdx(),
                            atNo=atm.GetAtomicNum(),
                            atName=atm.GetName(),
                            atType=atm.GetType(),
                            x=xyzL[0],
                            y=xyzL[1],
                            z=xyzL[2],
                            atFormalCharge=atm.GetFormalCharge(),
                        )
                    )
            fD = {}
            fD = {
                "Formula": oemf.getFormula(),
                "SMILES": oemf.getCanSMILES(),
                "SMILES_STEREO": oemf.getIsoSMILES(),
                "InChI": oemf.getInChI(),
                "InChIKey": oemf.getInChIKey(),
                "xyz": molXyzL,
            }
            for atm in tMol.GetAtoms():
                xyzL = oechem.OEFloatArray(3)
                tMol.GetCoords(atm, xyzL)
                if self.__verbose:
                    logger.debug("atom  %s %s %s %s %r", atm.GetIdx(), atm.GetAtomicNum(), atm.GetName(), atm.GetType(), xyzL)

            fD["OEMOL"] = tMol
            return (ccId, tMol, fD)
        except Exception as e:
            logger.exception("Failing with %s", str(e))

        return None, None, None