def SerializeInput(itf): ims = oechem.oemolistream() if not ims.open(itf.GetString("-input")): oechem.OEThrow.Fatal("Unable to open %s for reading:" + itf.GetString("-input")) oechem.OEThrow.SetLevel(oechem.OEErrorLevel_Warning) # @ <SNIPPET-MATCHEDPAIRANALYZER-EXPORT-COMPRESS-MMP> # create analyzer class with defaults # - compression option disabled by default mmpAnalyzer = oemedchem.OEMatchedPairAnalyzer() # for serialization, enable export compression to # remove singleton index nodes by modifying analyzer mmpAnalyzer.ModifyOptions(oemedchem.OEMatchedPairOptions_ExportCompression, 0) # @ </SNIPPET-MATCHEDPAIRANALYZER-EXPORT-COMPRESS-MMP> # @ <SNIPPET-MATCHEDPAIRANALYZER-EXPORT-COMPRESS-OPTS> # create options class with defaults # - compression option disabled by default mmpOpts = oemedchem.OEMatchedPairAnalyzerOptions() # for serialization, enable export compression to # remove singleton index nodes by modifying analyzer mmpOpts.SetOptions(oemedchem.OEMatchedPairOptions_Default | oemedchem.OEMatchedPairOptions_ExportCompression) # create analyzer class with compression option enabled mmpAnalyzer = oemedchem.OEMatchedPairAnalyzer(mmpOpts) # @ </SNIPPET-MATCHEDPAIRANALYZER-EXPORT-COMPRESS-OPTS> # @ <SNIPPET-MATCHEDPAIRANALYZER-SERIALIZE-INDEX> mmp = oemedchem.OEMatchedPairAnalyzer() for recindex, mol in enumerate(ims.GetOEGraphMols()): status = mmp.AddMol(mol, recindex) if status != recindex: oechem.OEThrow.Warning( "{0}: molecule indexing error, status={1}".format( recindex, oemedchem.OEMatchedPairIndexStatusName(status))) print("Index complete, matched pairs = {0}".format(mmp.NumMatchedPairs())) # check for output filename with .mmpidx extension mmpexport = itf.GetString("-output") if not oemedchem.OEIsMatchedPairAnalyzerFileType(mmpexport): oechem.OEThrow.Info('Not a valid matched pair index output file, ' + mmpexport) elif not oemedchem.OEWriteMatchedPairAnalyzer(mmpexport, mmp): oechem.OEThrow.Fatal("Index serialization failed") else: oechem.OEThrow.Info("Index serialization complete") # @ </SNIPPET-MATCHEDPAIRANALYZER-SERIALIZE-INDEX> print("Index serialization complete") mmpimport = mmpexport # now try to reload serialized index # @ <SNIPPET-MATCHEDPAIRANALYZER-DESERIALIZE-INDEX> mmp = oemedchem.OEMatchedPairAnalyzer() if not oemedchem.OEIsMatchedPairAnalyzerFileType(mmpimport): oechem.OEThrow.Fatal('Not a valid matched pair index input file, ' + mmpimport) elif not oemedchem.OEReadMatchedPairAnalyzer(mmpimport, mmp): oechem.OEThrow.Fatal("Index deserialization failed") else: oechem.OEThrow.Info("Index deserialization complete") # @ </SNIPPET-MATCHEDPAIRANALYZER-DESERIALIZE-INDEX> print("Index deserialization complete") return True
def FindSimpleMatchedPairs(itf): ims = oechem.oemolistream() if not ims.open(itf.GetString("-input")): oechem.OEThrow.Fatal("Unable to open %s for reading: " + itf.GetString("-input")) maxrecs = itf.GetInt("-maxrec") oechem.OEThrow.SetLevel(oechem.OEErrorLevel_Warning) # @ <SNIPPET-FINDSIMPLEMATCHEDPAIRS-EXAMPLE> # create options class with defaults mmpOpts = oemedchem.OEMatchedPairAnalyzerOptions() # for 'simple' pairs, alter default indexing options # - single cuts only, heavy atom substituents only (HMember indexing off) mmpOpts.SetOptions(oemedchem.OEMatchedPairOptions_SingleCuts | oemedchem.OEMatchedPairOptions_ComboCuts | oemedchem.OEMatchedPairOptions_UniquesOnly) # - limit substituent size to no more than 20% of input structure mmpOpts.SetIndexableFragmentRange(80., 100.) # create analyzer class with nondefault options mmpAnalyzer = oemedchem.OEMatchedPairAnalyzer(mmpOpts) # ignore common index status returns sIgnoreStatus = 'FragmentRangeFilter,DuplicateStructure,' sIgnoreStatus += 'FragmentationLimitFilter,HeavyAtomFilter' # index the input structures for recindex, mol in enumerate(ims.GetOEGraphMols(), start=1): # consider only the largest input fragment oechem.OEDeleteEverythingExceptTheFirstLargestComponent(mol) # ignore stereochemistry oechem.OEUncolorMol(mol, (oechem.OEUncolorStrategy_RemoveAtomStereo | oechem.OEUncolorStrategy_RemoveBondStereo)) # explicitly provide a 1-based index to refer to indexed structures # - to allow references back to external data elsewhere status = mmpAnalyzer.AddMol(mol, recindex) if status != recindex: if not oemedchem.OEMatchedPairIndexStatusName( status) in sIgnoreStatus: oechem.OEThrow.Warning( "{0}: molecule indexing error, status={1}".format( recindex, oemedchem.OEMatchedPairIndexStatusName(status))) # if limiting input, quit after limit if maxrecs and recindex >= maxrecs: break print("Index complete, matched pairs = {0}".format( mmpAnalyzer.NumMatchedPairs())) # specify how transforms are extracted (direction and allowed properties) extractMode = ( oemedchem.OEMatchedPairTransformExtractMode_Sorted | oemedchem.OEMatchedPairTransformExtractMode_NoSMARTS | oemedchem.OEMatchedPairTransformExtractMode_AddMCSCorrespondence) extractOptions = oemedchem.OEMatchedPairTransformExtractOptions() # specify amount of chemical context at the site of the substituent change # in the transform extractOptions.SetContext(oemedchem.OEMatchedPairContext_Bond0) extractOptions.SetOptions(extractMode) # walk the transforms and print the matched pairs xfmidx = 0 for mmpxform in oemedchem.OEMatchedPairGetTransforms( mmpAnalyzer, extractOptions): xfmidx += 1 print("{0:2} {1}".format(xfmidx, mmpxform.GetTransform())) # dump matched molecular pairs and index identifiers # (recindex from indexing loop above) for mmppair in mmpxform.GetMatchedPairs(): print("\tmatched pair molecule indices=({0},{1})".format( mmppair.FromIndex(), mmppair.ToIndex())) # @ </SNIPPET-FINDSIMPLEMATCHEDPAIRS-EXAMPLE> return True
def MMPTransformList(itf): # check MMP index mmpimport = itf.GetString("-mmpindex") if not oemedchem.OEIsMatchedPairAnalyzerFileType(mmpimport): oechem.OEThrow.Fatal( 'Not a valid matched pair index input file, {}'.format(mmpimport)) # load MMP index mmp = oemedchem.OEMatchedPairAnalyzer() if not oemedchem.OEReadMatchedPairAnalyzer(mmpimport, mmp): oechem.OEThrow.Fatal("Unable to load index {}".format(mmpimport)) if not mmp.NumMols(): oechem.OEThrow.Fatal( 'No records in loaded MMP index file: {}'.format(mmpimport)) if not mmp.NumMatchedPairs(): oechem.OEThrow.Fatal( 'No matched pairs found in MMP index file, ' + 'use -fragGe,-fragLe options to extend indexing range') # request a specific context for the transform activity, here 0-bonds chemctxt = oemedchem.OEMatchedPairContext_Bond0 askcontext = itf.GetString("-context")[:1] if askcontext == '0': chemctxt = oemedchem.OEMatchedPairContext_Bond0 elif askcontext == '1': chemctxt = oemedchem.OEMatchedPairContext_Bond1 elif askcontext == '2': chemctxt = oemedchem.OEMatchedPairContext_Bond2 elif askcontext == '3': chemctxt = oemedchem.OEMatchedPairContext_Bond3 elif askcontext == 'a' or askcontext == 'A': chemctxt = oemedchem.OEMatchedPairContext_AllBonds else: oechem.OEThrow.Fatal("Invalid context specified: " + askcontext + ", only 0|1|2|3|A allowed") bPrintTransforms = itf.GetBool("-printlist") # if a data field was specified, retrieve the SD data field name field = None if itf.HasString("-datafield"): field = itf.GetString("-datafield") if not bPrintTransforms and field is None: oechem.OEThrow.Info( 'Specify one of -datafield or -printlist, otherwise nothing to do!' ) return extractOptions = oemedchem.OEMatchedPairTransformExtractOptions() # specify amount of chemical context at the site of the substituent change extractOptions.SetContext(chemctxt) # controls how transforms are extracted (direction and allowed properties) extractOptions.SetOptions( oemedchem.OEMatchedPairTransformExtractMode_Sorted | oemedchem.OEMatchedPairTransformExtractMode_NoSMARTS) # walk the transforms from the indexed matched pairs xforms = [] xfmidx = 0 for mmpxform in oemedchem.OEMatchedPairGetTransforms(mmp, extractOptions): xfmidx += 1 if bPrintTransforms: print("{0:2} {1}".format(xfmidx, mmpxform.GetTransform())) # compute delta property mmpidx = 0 prop = [] for mmppair in mmpxform.GetMatchedPairs(): mmpidx += 1 mmpinfo = "\t{0:2}: ({1:2},{2:2})".format(mmpidx, mmppair.FromIndex(), mmppair.ToIndex()) for tag in mmppair.GetDataTags(): mmpinfo = mmpinfo + " {0}=({1},{2})".format( tag, mmppair.GetFromSDData(tag), mmppair.GetToSDData(tag)) if tag == field: fromData = None toData = None try: fromData = float(mmppair.GetFromSDData(tag)) except ValueError: fromData = None try: toData = float(mmppair.GetToSDData(tag)) except ValueError: toData = None if fromData is not None and toData is not None: prop.append(toData - fromData) if bPrintTransforms: print(mmpinfo) # skip if property not found if len(prop): xforms.append( MMPXform(mmpxform, average(prop), stddev(prop), len(prop))) if not field: return 0 if field and not len(xforms): oechem.OEThrow.Error( "No matched pairs found with {} data".format(field)) print("") print("*** Transforms sorted by delta ({})".format(field)) xforms.sort(key=lambda c: -abs(float(c.avg))) idx = 0 for xfm in xforms: idx += 1 if (extractOptions.GetOptions() & oemedchem.OEMatchedPairTransformExtractMode_NoSMARTS) != 0: # not 'invertable' if SMARTS qualifiers were applied if xfm.avg < 0.: xfm.avg = -1. * xfm.avg xfm.xform.Invert() print("{0:2} {2}=(avg={3:.2f},stdev={4:.2f},num={5}) {1}".format( idx, xfm.xform.GetTransform(), field, xfm.avg, xfm.std, xfm.num))
def MMPIndex(itf): # output index file mmpindexfile = itf.GetString("-output") if not oemedchem.OEIsMatchedPairAnalyzerFileType(mmpindexfile): oechem.OEThrow.Fatal("Output file is not a matched pair index type - \ needs .mmpidx extension: {}" .format(mmpindexfile)) # create options class with defaults mmpopts = oemedchem.OEMatchedPairAnalyzerOptions() # set up options from command line if not oemedchem.OESetupMatchedPairIndexOptions(mmpopts, itf): oechem.OEThrow.Fatal("Error setting matched pair indexing options!") # input structures to index ifsindex = oechem.oemolistream() if not ifsindex.open(itf.GetString("-input")): oechem.OEThrow.Fatal("Unable to open {} for reading" .format(itf.GetString("-input"))) # get requested verbosity setting verbose = itf.GetBool("-verbose") vverbose = itf.GetBool("-vverbose") if vverbose: verbose = vverbose maxrec = max(itf.GetInt("-maxrec"), 0) statusrec = itf.GetInt("-status") if itf.GetBool("-exportcompress"): if not mmpopts.SetOptions(mmpopts.GetOptions() | oemedchem.OEMatchedPairOptions_ExportCompression): oechem.OEThrow.Warning("Error enabling export compression!") stripstereo = itf.GetBool("-stripstereo") stripsalts = itf.GetBool("-stripsalts") keepFields = [] if itf.HasString("-keepSD"): for field in itf.GetStringList("-keepSD"): keepFields.append(field) if verbose: oechem.OEThrow.Info('Retaining SD data fields: {}'.format(' '.join(keepFields))) alldata = itf.GetBool("-allSD") cleardata = itf.GetBool("-clearSD") if keepFields: if verbose and (alldata or cleardata): oechem.OEThrow.Info("Option -keepSD overriding -allSD, -clearSD") alldata = False cleardata = False elif cleardata: alldata = False if verbose: oechem.OEThrow.Info("Forced clearing of all input SD data") elif alldata: if verbose: oechem.OEThrow.Info("Retaining all input SD data") cleardata = False elif verbose: oechem.OEThrow.Info("No SD data handling option specified, -allSD assumed") if cleardata: keepFields = ['-CLEARSD'] elif alldata or not keepFields: keepFields = ['-ALLSD'] if verbose: if not mmpopts.HasIndexableFragmentHeavyAtomRange(): oechem.OEThrow.Info("Indexing all fragments") else: oechem.OEThrow.Info("Limiting fragment cores to {0:.2f}-{1:.2f}% of input molecules" .format(mmpopts.GetIndexableFragmentRangeMin(), mmpopts.GetIndexableFragmentRangeMax())) if statusrec: oechem.OEThrow.Info("Status output after every {0} records".format(statusrec)) if maxrec: oechem.OEThrow.Info("Indexing a maximum of {0} records".format(maxrec)) if itf.GetBool("-exportcompress"): oechem.OEThrow.Info("Removing singleton index nodes from index") if stripstereo: oechem.OEThrow.Info("Stripping stereo") if stripsalts: oechem.OEThrow.Info("Stripping salts") if itf.GetBool("-clearSD"): oechem.OEThrow.Info("Clearing all input SD data") elif alldata: oechem.OEThrow.Info("Retaining all input SD data") elif keepFields: oechem.OEThrow.Info('Retaining floating point SD data fields: {}' .format(''.join(keepFields))) # create indexing engine mmp = oemedchem.OEMatchedPairAnalyzer(mmpopts) # interpret SD fields as floating point data validdata = FilterSDData(keepFields, True) # add molecules to be indexed record = 0 unindexed = 0 for mol in ifsindex.GetOEGraphMols(): if not alldata: # filter the input molecule SD data based on allowed fields validdata.FilterMolData(mol) if stripsalts: oechem.OEDeleteEverythingExceptTheFirstLargestComponent(mol) if stripstereo: oechem.OEUncolorMol(mol, (oechem.OEUncolorStrategy_RemoveAtomStereo | oechem.OEUncolorStrategy_RemoveBondStereo | oechem.OEUncolorStrategy_RemoveGroupStereo)) status = mmp.AddMol(mol, record) if status != record: unindexed += 1 if vverbose: oechem.OEThrow.Info('Input structure not added to index, record=%d status=%s' % (record, oemedchem.OEMatchedPairIndexStatusName(status))) record += 1 if maxrec and record >= maxrec: break if statusrec and (record % statusrec) == 0: oechem.OEThrow.Info("Records: {} Indexed: {} Unindexed: {}" .format(record, (record - unindexed), unindexed)) if not mmp.NumMols(): oechem.OEThrow.Fatal('No records in index structure file') if not mmp.NumMatchedPairs(): oechem.OEThrow.Fatal('No matched pairs found from indexing, ' + 'use -fragGe,-fragLe options to extend indexing range') if not oemedchem.OEWriteMatchedPairAnalyzer(mmpindexfile, mmp): oechem.OEThrow.Fatal('Error serializing MMP index: {}' .format(mmpindexfile)) # return some status information oechem.OEThrow.Info("Records: {}, Indexed: {}, matched pairs: {:,d}" .format(record, mmp.NumMols(), mmp.NumMatchedPairs())) return 0
def MMPTransform(itf): # input structure(s) to process ifsmols = oechem.oemolistream() if not ifsmols.open(itf.GetString("-input")): oechem.OEThrow.Fatal("Unable to open %s for reading" % itf.GetString("-input")) # check MMP index mmpimport = itf.GetString("-mmpindex") if not oemedchem.OEIsMatchedPairAnalyzerFileType(mmpimport): oechem.OEThrow.Fatal( 'Not a valid matched pair index input file, {}'.format(mmpimport)) # load MMP index mmp = oemedchem.OEMatchedPairAnalyzer() if not oemedchem.OEReadMatchedPairAnalyzer(mmpimport, mmp): oechem.OEThrow.Fatal("Unable to load index {}".format(mmpimport)) if not mmp.NumMols(): oechem.OEThrow.Fatal( 'No records in loaded MMP index file: {}'.format(mmpimport)) if not mmp.NumMatchedPairs(): oechem.OEThrow.Fatal( 'No matched pairs found in MMP index file, ' + 'use -fragGe,-fragLe options to extend indexing range') # output (transformed) structure(s) ofs = oechem.oemolostream() if not ofs.open(itf.GetString("-output")): oechem.OEThrow.Fatal("Unable to open %s for writing" % itf.GetString("-output")) # request a specific context for the transform activity, here 0-bonds chemctxt = oemedchem.OEMatchedPairContext_Bond0 askcontext = itf.GetString("-context")[:1] if askcontext == '0': chemctxt = oemedchem.OEMatchedPairContext_Bond0 elif askcontext == '1': chemctxt = oemedchem.OEMatchedPairContext_Bond1 elif askcontext == '2': chemctxt = oemedchem.OEMatchedPairContext_Bond2 elif askcontext == '3': chemctxt = oemedchem.OEMatchedPairContext_Bond3 elif askcontext == 'a' or askcontext == 'A': chemctxt = oemedchem.OEMatchedPairContext_AllBonds else: oechem.OEThrow.Fatal("Invalid context specified: " + askcontext + ", only 0|1|2|3|A allowed") verbose = itf.GetBool("-verbose") # return some status information if verbose: oechem.OEThrow.Info("{}: molecules: {:d}, matched pairs: {:,d}".format( mmpimport, mmp.NumMols(), mmp.NumMatchedPairs())) minpairs = itf.GetInt("-minpairs") if minpairs > 1 and verbose: oechem.OEThrow.Info( 'Requiring at least %d matched pairs to apply transformations' % minpairs) errs = None if itf.GetBool("-nowarnings"): errs = oechem.oeosstream() oechem.OEThrow.SetOutputStream(errs) orec = 0 ocnt = 0 for mol in ifsmols.GetOEGraphMols(): orec += 1 iter = oemedchem.OEMatchedPairApplyTransforms(mol, mmp, chemctxt, minpairs) if not iter.IsValid(): if verbose: # as minpairs increases, fewer transformed mols are generated - output if requested name = mol.GetTitle() if not mol.GetTitle(): name = 'Record ' + str(orec) oechem.OEThrow.Info("%s did not produce any output" % name) continue if errs is not None: errs.clear() for outmol in iter: ocnt += 1 oechem.OEWriteMolecule(ofs, outmol) if errs is not None: errs.clear() if not orec: oechem.OEThrow.Fatal('No records in input structure file to transform') if not ocnt: oechem.OEThrow.Warning('No transformed structures generated') print("Input molecules={} Output molecules={}".format(orec, ocnt)) return 0