def prefilterIndex(self, oeQueryMol, idxP, matchOpts="relaxed", skipFeatures=False): """Filter the full search index base on minimum chemical formula an feature criteria. Args: oeQueryMol (object): search target moleculed (OEMol) idxP (object): instance ChemCompSearchIndexProvider() matchOpts (str, optional): search criteria options. Defaults to "default". skipFeatures (bool, optional): skip feature filters. Defaults to False. Returns: (list): list of chemical component identifiers in the filtered search space """ startTime = time.time() oemf = OeMoleculeFactory() oemf.setOeMol(oeQueryMol, "queryTarget") typeCountD = oemf.getElementCounts(useSymbol=True) # ccIdL1 = idxP.filterMinimumMolecularFormula(typeCountD) # featureCountD = oemf.getFeatureCounts() if not skipFeatures else {} # Adjust filter according to search options if matchOpts in matchOpts in [ "relaxed", "graph-relaxed", "simple", "sub-struct-graph-relaxed" ]: for ky in ["rings_ar", "at_ar", "at_ch"]: featureCountD.pop(ky, None) elif matchOpts in [ "relaxed-stereo", "graph-relaxed-stereo", "sub-struct-graph-relaxed-stereo", "graph-relaxed-stereo-sdeq", "sub-struct-graph-relaxed-stereo-sdeq" ]: for ky in ["rings_ar", "at_ar"]: featureCountD.pop(ky, None) elif matchOpts in [ "default", "strict", "graph-strict", "graph-default", "sub-struct-graph-strict" ]: pass ccIdL = idxP.filterMinimumFormulaAndFeatures(typeCountD, featureCountD) logger.info( "Pre-filtering results for formula+feature %d (%.4f seconds)", len(ccIdL), time.time() - startTime) return ccIdL
def __buildChemCompIndex(self, cD, molBuildType="model-xyz", doFeatures=True): """Internal method return a dictionary of extracted chemical component descriptors and formula.""" rD = {} try: quietFlag = True for _, dataContainer in cD.items(): ccIt = iter(PdbxChemCompIt(dataContainer)) cc = next(ccIt, None) ccId = cc.getId() formula = str(cc.getFormula()).replace(" ", "") ambiguousFlag = cc.getAmbiguousFlag().upper() in ["Y", "YES"] tch = cc.getFormalCharge() fcharge = int(tch) if tch and tch not in [".", "?"] else 0 # logger.debug("ccId %r formula %r ambiguous %r fcharge %r", ccId, formula, ambiguousFlag, fcharge) if fcharge: sign = "+" if fcharge > 0 else "-" mag = str(abs(fcharge)) if abs(fcharge) > 1 else "" formula = formula + sign + mag # atIt = PdbxChemCompAtomIt(dataContainer) typeCounts = defaultdict(int) for at in atIt: aType = at.getType().upper() typeCounts[aType] += 1 # rD[ccId] = { "formula": formula, "type-counts": typeCounts, "ambiguous": ambiguousFlag, "feature-counts": {} } desIt = PdbxChemCompDescriptorIt(dataContainer) for des in desIt: desBuildType = des.getMolBuildType() tS = des.getDescriptor() descr = tS.strip() if tS else None if not descr: continue if desBuildType in [ "oe-iso-smiles", "oe-smiles", "acdlabs-smiles", "cactvs-iso-smiles", "cactvs-smiles", "inchi", "inchikey" ]: rD[ccId][desBuildType] = descr else: logger.error("%s unexpected descriptor build type %r", ccId, desBuildType) if doFeatures: oemf = OeMoleculeFactory() if quietFlag: oemf.setQuiet() tId = oemf.setChemCompDef(dataContainer) if tId != ccId: logger.error( "%s chemical component definition import error", ccId) continue ok = oemf.build(molBuildType=molBuildType) if ok: rD[ccId]["feature-counts"] = oemf.getFeatureCounts() except Exception as e: logger.exception("Failing with %s", str(e)) return rD