def getCCDefObj(self, dataContainer, molBuildType="model-xyz", suppressHydrogens=False): """Build OE molecule from the input chemical component definition object.""" # oemf = OeMoleculeFactory() if not self.__verbose: oemf.setQuiet() ccId = oemf.setChemCompDef(dataContainer) oemf.build(molBuildType=molBuildType) if self.__verbose: logger.info(" CCId = %s", ccId) logger.info(" Title = %s", oemf.getTitle()) logger.info(" SMILES = %s", oemf.getCanSMILES()) logger.info(" SMILES (stereo) = %s", oemf.getIsoSMILES()) logger.info(" Formula (Hill) = %s", oemf.getFormula()) logger.info(" InChI key = %s", oemf.getInChIKey()) logger.info(" InChI = %s", oemf.getInChI()) fD = {} fD = {"Formula": oemf.getFormula(), "SMILES": oemf.getCanSMILES(), "SMILES_STEREO": oemf.getIsoSMILES(), "InChI": oemf.getInChI(), "InChIKey": oemf.getInChIKey()} if suppressHydrogens: tMol = oemf.getGraphMolSuppressH() else: tMol = oemf.getMol() fD["OEMOL"] = tMol fD["xyz"] = oemf.getAtomDetails(xyzType="model") return (ccId, tMol, fD)
def getCCDefFile(self, ccFilePath, molBuildType="model-xyz", suppressHydrogens=False): """Fetch the molecule definition (ccPath) and build OE molecules for comparison. """ # mU = MarshalUtil(workPath=self.__workPath) rdCcObjL = mU.doImport(ccFilePath, fmt="mmcif") oemf = OeMoleculeFactory() if not self.__verbose: oemf.setQuiet() ccId = oemf.setChemCompDef(rdCcObjL[0]) oemf.build(molBuildType=molBuildType) if self.__verbose: logger.info(" CCId = %s", ccId) logger.info(" Title = %s", oemf.getTitle()) logger.info(" SMILES = %s", oemf.getCanSMILES()) logger.info(" SMILES (stereo) = %s", oemf.getIsoSMILES()) logger.info(" Formula (Hill) = %s", oemf.getFormula()) logger.info(" InChI key = %s", oemf.getInChIKey()) logger.info(" InChI = %s", oemf.getInChI()) fD = {} fD = {"Formula": oemf.getFormula(), "SMILES": oemf.getCanSMILES(), "SMILES_STEREO": oemf.getIsoSMILES(), "InChI": oemf.getInChI(), "InChIKey": oemf.getInChIKey()} if suppressHydrogens: tMol = oemf.getGraphMolSuppressH() else: tMol = oemf.getMol() fD["OEMOL"] = tMol fD["xyz"] = oemf.getAtomDetails(xyzType="model") return (ccId, tMol, fD)
def __testReproduceDescriptors(self, molBuildType, limitPerceptions=True): # ccMolD, ccIdxD = self.__getChemCompDefs() oemf = OeMoleculeFactory() countD = defaultdict(int) for ccId, ccDef in ccMolD.items(): tId = oemf.setChemCompDef(ccDef) if ccId != tId: continue oemf.build(molBuildType=molBuildType, limitPerceptions=limitPerceptions) oeMol = oemf.getMol() # countD["total components"] += 1 if ccId not in ccIdxD: logger.info("Missing ccIndex entry for %s", ccId) continue ccdD = ccIdxD[ccId] if ccdD["ambiguous"]: countD["ambiguous component"] += 1 continue # countD["total molecules"] += 1 nativeCanIsoSmiles = oechem.OECreateIsoSmiString(oeMol) canIsoSmiles = oechem.OEMolToSmiles(oeMol) isoSmiles = oemf.getIsoSMILES() canSmiles = oemf.getCanSMILES() # check interal consistency if nativeCanIsoSmiles != isoSmiles: logger.error("%s stored and calculated OE smiles differ %s %s", ccId, nativeCanIsoSmiles, isoSmiles) if canIsoSmiles != isoSmiles: logger.error( "%s calculated OE ISO and canonical smiles differ %s %s", ccId, isoSmiles, canIsoSmiles) # compare with archived values if isoSmiles != ccdD["oe-iso-smiles"]: logger.info("%s ISO SMILES differ \nccd: %r \nOE: %r", ccId, ccdD["oe-iso-smiles"], isoSmiles) countD["iso_smiles_diff"] += 1 # ---------- if canSmiles != ccdD["oe-smiles"]: logger.info("%s CAN SMILES differ \nccd: %r \nOE: %r", ccId, ccdD["oe-smiles"], canSmiles) countD["smiles_diff"] += 1 formula = oemf.getFormula() if formula.upper() != ccdD["formula"].upper(): logger.debug("%s formulas differ \nccd: %r \nOE: %r", ccId, ccdD["formula"], formula) countD["formula_diff"] += 1 # --------- inchiKey = oemf.getInChIKey() if inchiKey != ccdD["inchikey"]: logger.debug("%s InChI keys differ \nccd: %r \nOE: %r", ccId, ccdD["inchikey"], inchiKey) countD["inchikey_diff"] += 1 # inchi = oemf.getInChI() if inchi != ccdD["inchi"]: logger.debug("%s InChIs differ \nccd: %r \nOE: %r", ccId, ccdD["inchi"], inchi) countD["inchi_diff"] += 1 # # for ky, vl in countD.items(): logger.info("%-12s %6d", ky, vl)
def __getMiscFile(self, filePath, suppressHydrogens=False, importType="2D", title=None, largestPart=False): """Fetch a miscellaneous chemical file (ccPath) and build OE molecules for comparison. """ try: oeioU = OeIoUtils() oeMolL = oeioU.fileToMols(filePath, use3D=importType == "3D", largestPart=largestPart) logger.info("Read (%d) from %s ", len(oeMolL), filePath) oeMol = oeMolL[0] ccId = title if title else oeMol.GetTitle() if title: oeMol.SetTitle(ccId) # oemf = OeMoleculeFactory() if not self.__verbose: oemf.setQuiet() oemf.setOeMol(oeMol, ccId) # fD = oemf.getOeMoleculeFeatures() if self.__verbose: logger.info(" Title = %s", title) logger.info(" Title OEMF = %s", oemf.getTitle()) logger.info(" SMILES = %s", oemf.getCanSMILES()) logger.info(" SMILES (stereo) = %s", oemf.getIsoSMILES()) logger.info(" Formula (Hill) = %s", oemf.getFormula()) logger.info(" InChI key = %s", oemf.getInChIKey()) logger.info(" InChI = %s", oemf.getInChI()) # ccId = oemf.getTitle() if suppressHydrogens: tMol = oemf.getGraphMolSuppressH() else: tMol = oemf.getMol() molXyzL = [] if importType == "3D": for atm in tMol.GetAtoms(): xyzL = oechem.OEFloatArray(3) tMol.GetCoords(atm, xyzL) molXyzL.append( ComponentAtomDetails( atIdx=atm.GetIdx(), atNo=atm.GetAtomicNum(), atName=atm.GetName(), atType=atm.GetType(), x=xyzL[0], y=xyzL[1], z=xyzL[2], atFormalCharge=atm.GetFormalCharge(), ) ) fD = {} fD = { "Formula": oemf.getFormula(), "SMILES": oemf.getCanSMILES(), "SMILES_STEREO": oemf.getIsoSMILES(), "InChI": oemf.getInChI(), "InChIKey": oemf.getInChIKey(), "xyz": molXyzL, } for atm in tMol.GetAtoms(): xyzL = oechem.OEFloatArray(3) tMol.GetCoords(atm, xyzL) if self.__verbose: logger.debug("atom %s %s %s %s %r", atm.GetIdx(), atm.GetAtomicNum(), atm.GetName(), atm.GetType(), xyzL) fD["OEMOL"] = tMol return (ccId, tMol, fD) except Exception as e: logger.exception("Failing with %s", str(e)) return None, None, None