Exemplo n.º 1
0
def SerializeInput(itf):
    ims = oechem.oemolistream()
    if not ims.open(itf.GetString("-input")):
        oechem.OEThrow.Fatal("Unable to open %s for reading:" +
                             itf.GetString("-input"))

    oechem.OEThrow.SetLevel(oechem.OEErrorLevel_Warning)

    # @ <SNIPPET-MATCHEDPAIRANALYZER-EXPORT-COMPRESS-MMP>
    # create analyzer class with defaults
    #   - compression option disabled by default
    mmpAnalyzer = oemedchem.OEMatchedPairAnalyzer()
    # for serialization, enable export compression to
    #   remove singleton index nodes by modifying analyzer
    mmpAnalyzer.ModifyOptions(oemedchem.OEMatchedPairOptions_ExportCompression,
                              0)
    # @ </SNIPPET-MATCHEDPAIRANALYZER-EXPORT-COMPRESS-MMP>

    # @ <SNIPPET-MATCHEDPAIRANALYZER-EXPORT-COMPRESS-OPTS>
    # create options class with defaults
    #   - compression option disabled by default
    mmpOpts = oemedchem.OEMatchedPairAnalyzerOptions()
    # for serialization, enable export compression to
    #   remove singleton index nodes by modifying analyzer
    mmpOpts.SetOptions(oemedchem.OEMatchedPairOptions_Default
                       | oemedchem.OEMatchedPairOptions_ExportCompression)
    # create analyzer class with compression option enabled
    mmpAnalyzer = oemedchem.OEMatchedPairAnalyzer(mmpOpts)
    # @ </SNIPPET-MATCHEDPAIRANALYZER-EXPORT-COMPRESS-OPTS>

    # @ <SNIPPET-MATCHEDPAIRANALYZER-SERIALIZE-INDEX>
    mmp = oemedchem.OEMatchedPairAnalyzer()
    for recindex, mol in enumerate(ims.GetOEGraphMols()):
        status = mmp.AddMol(mol, recindex)
        if status != recindex:
            oechem.OEThrow.Warning(
                "{0}: molecule indexing error, status={1}".format(
                    recindex, oemedchem.OEMatchedPairIndexStatusName(status)))

    print("Index complete, matched pairs = {0}".format(mmp.NumMatchedPairs()))

    # check for output filename with .mmpidx extension
    mmpexport = itf.GetString("-output")
    if not oemedchem.OEIsMatchedPairAnalyzerFileType(mmpexport):
        oechem.OEThrow.Info('Not a valid matched pair index output file, ' +
                            mmpexport)
    elif not oemedchem.OEWriteMatchedPairAnalyzer(mmpexport, mmp):
        oechem.OEThrow.Fatal("Index serialization failed")
    else:
        oechem.OEThrow.Info("Index serialization complete")
    # @ </SNIPPET-MATCHEDPAIRANALYZER-SERIALIZE-INDEX>
    print("Index serialization complete")

    mmpimport = mmpexport
    # now try to reload serialized index
    # @ <SNIPPET-MATCHEDPAIRANALYZER-DESERIALIZE-INDEX>
    mmp = oemedchem.OEMatchedPairAnalyzer()
    if not oemedchem.OEIsMatchedPairAnalyzerFileType(mmpimport):
        oechem.OEThrow.Fatal('Not a valid matched pair index input file, ' +
                             mmpimport)
    elif not oemedchem.OEReadMatchedPairAnalyzer(mmpimport, mmp):
        oechem.OEThrow.Fatal("Index deserialization failed")
    else:
        oechem.OEThrow.Info("Index deserialization complete")
    # @ </SNIPPET-MATCHEDPAIRANALYZER-DESERIALIZE-INDEX>
    print("Index deserialization complete")

    return True
Exemplo n.º 2
0
def FindSimpleMatchedPairs(itf):
    ims = oechem.oemolistream()
    if not ims.open(itf.GetString("-input")):
        oechem.OEThrow.Fatal("Unable to open %s for reading: " +
                             itf.GetString("-input"))

    maxrecs = itf.GetInt("-maxrec")

    oechem.OEThrow.SetLevel(oechem.OEErrorLevel_Warning)

    # @ <SNIPPET-FINDSIMPLEMATCHEDPAIRS-EXAMPLE>
    # create options class with defaults
    mmpOpts = oemedchem.OEMatchedPairAnalyzerOptions()

    # for 'simple' pairs, alter default indexing options
    # - single cuts only, heavy atom substituents only (HMember indexing off)
    mmpOpts.SetOptions(oemedchem.OEMatchedPairOptions_SingleCuts
                       | oemedchem.OEMatchedPairOptions_ComboCuts
                       | oemedchem.OEMatchedPairOptions_UniquesOnly)
    # - limit substituent size to no more than 20% of input structure
    mmpOpts.SetIndexableFragmentRange(80., 100.)

    # create analyzer class with nondefault options
    mmpAnalyzer = oemedchem.OEMatchedPairAnalyzer(mmpOpts)

    # ignore common index status returns
    sIgnoreStatus = 'FragmentRangeFilter,DuplicateStructure,'
    sIgnoreStatus += 'FragmentationLimitFilter,HeavyAtomFilter'

    # index the input structures
    for recindex, mol in enumerate(ims.GetOEGraphMols(), start=1):
        # consider only the largest input fragment
        oechem.OEDeleteEverythingExceptTheFirstLargestComponent(mol)
        # ignore stereochemistry
        oechem.OEUncolorMol(mol, (oechem.OEUncolorStrategy_RemoveAtomStereo
                                  | oechem.OEUncolorStrategy_RemoveBondStereo))

        # explicitly provide a 1-based index to refer to indexed structures
        #   - to allow references back to external data elsewhere
        status = mmpAnalyzer.AddMol(mol, recindex)
        if status != recindex:
            if not oemedchem.OEMatchedPairIndexStatusName(
                    status) in sIgnoreStatus:
                oechem.OEThrow.Warning(
                    "{0}: molecule indexing error, status={1}".format(
                        recindex,
                        oemedchem.OEMatchedPairIndexStatusName(status)))
        # if limiting input, quit after limit
        if maxrecs and recindex >= maxrecs:
            break

    print("Index complete, matched pairs = {0}".format(
        mmpAnalyzer.NumMatchedPairs()))

    # specify how transforms are extracted (direction and allowed properties)
    extractMode = (
        oemedchem.OEMatchedPairTransformExtractMode_Sorted
        | oemedchem.OEMatchedPairTransformExtractMode_NoSMARTS
        | oemedchem.OEMatchedPairTransformExtractMode_AddMCSCorrespondence)

    extractOptions = oemedchem.OEMatchedPairTransformExtractOptions()
    # specify amount of chemical context at the site of the substituent change
    #   in the transform
    extractOptions.SetContext(oemedchem.OEMatchedPairContext_Bond0)
    extractOptions.SetOptions(extractMode)

    # walk the transforms and print the matched pairs
    xfmidx = 0
    for mmpxform in oemedchem.OEMatchedPairGetTransforms(
            mmpAnalyzer, extractOptions):
        xfmidx += 1
        print("{0:2} {1}".format(xfmidx, mmpxform.GetTransform()))
        # dump matched molecular pairs and index identifiers
        #   (recindex from indexing loop above)
        for mmppair in mmpxform.GetMatchedPairs():
            print("\tmatched pair molecule indices=({0},{1})".format(
                mmppair.FromIndex(), mmppair.ToIndex()))
    # @ </SNIPPET-FINDSIMPLEMATCHEDPAIRS-EXAMPLE>

    return True
def MMPTransformList(itf):

    # check MMP index
    mmpimport = itf.GetString("-mmpindex")
    if not oemedchem.OEIsMatchedPairAnalyzerFileType(mmpimport):
        oechem.OEThrow.Fatal(
            'Not a valid matched pair index input file, {}'.format(mmpimport))

    # load MMP index
    mmp = oemedchem.OEMatchedPairAnalyzer()
    if not oemedchem.OEReadMatchedPairAnalyzer(mmpimport, mmp):
        oechem.OEThrow.Fatal("Unable to load index {}".format(mmpimport))

    if not mmp.NumMols():
        oechem.OEThrow.Fatal(
            'No records in loaded MMP index file: {}'.format(mmpimport))

    if not mmp.NumMatchedPairs():
        oechem.OEThrow.Fatal(
            'No matched pairs found in MMP index file, ' +
            'use -fragGe,-fragLe options to extend indexing range')

    # request a specific context for the transform activity, here 0-bonds
    chemctxt = oemedchem.OEMatchedPairContext_Bond0
    askcontext = itf.GetString("-context")[:1]
    if askcontext == '0':
        chemctxt = oemedchem.OEMatchedPairContext_Bond0
    elif askcontext == '1':
        chemctxt = oemedchem.OEMatchedPairContext_Bond1
    elif askcontext == '2':
        chemctxt = oemedchem.OEMatchedPairContext_Bond2
    elif askcontext == '3':
        chemctxt = oemedchem.OEMatchedPairContext_Bond3
    elif askcontext == 'a' or askcontext == 'A':
        chemctxt = oemedchem.OEMatchedPairContext_AllBonds
    else:
        oechem.OEThrow.Fatal("Invalid context specified: " + askcontext +
                             ", only 0|1|2|3|A allowed")

    bPrintTransforms = itf.GetBool("-printlist")
    # if a data field was specified, retrieve the SD data field name
    field = None
    if itf.HasString("-datafield"):
        field = itf.GetString("-datafield")

    if not bPrintTransforms and field is None:
        oechem.OEThrow.Info(
            'Specify one of -datafield or -printlist, otherwise nothing to do!'
        )
        return

    extractOptions = oemedchem.OEMatchedPairTransformExtractOptions()
    # specify amount of chemical context at the site of the substituent change
    extractOptions.SetContext(chemctxt)
    # controls how transforms are extracted (direction and allowed properties)
    extractOptions.SetOptions(
        oemedchem.OEMatchedPairTransformExtractMode_Sorted
        | oemedchem.OEMatchedPairTransformExtractMode_NoSMARTS)

    # walk the transforms from the indexed matched pairs
    xforms = []

    xfmidx = 0
    for mmpxform in oemedchem.OEMatchedPairGetTransforms(mmp, extractOptions):
        xfmidx += 1
        if bPrintTransforms:
            print("{0:2} {1}".format(xfmidx, mmpxform.GetTransform()))

        # compute delta property
        mmpidx = 0
        prop = []
        for mmppair in mmpxform.GetMatchedPairs():
            mmpidx += 1
            mmpinfo = "\t{0:2}: ({1:2},{2:2})".format(mmpidx,
                                                      mmppair.FromIndex(),
                                                      mmppair.ToIndex())
            for tag in mmppair.GetDataTags():
                mmpinfo = mmpinfo + " {0}=({1},{2})".format(
                    tag, mmppair.GetFromSDData(tag), mmppair.GetToSDData(tag))
                if tag == field:
                    fromData = None
                    toData = None
                    try:
                        fromData = float(mmppair.GetFromSDData(tag))
                    except ValueError:
                        fromData = None
                    try:
                        toData = float(mmppair.GetToSDData(tag))
                    except ValueError:
                        toData = None
                    if fromData is not None and toData is not None:
                        prop.append(toData - fromData)

            if bPrintTransforms:
                print(mmpinfo)

        # skip if property not found
        if len(prop):
            xforms.append(
                MMPXform(mmpxform, average(prop), stddev(prop), len(prop)))

    if not field:
        return 0

    if field and not len(xforms):
        oechem.OEThrow.Error(
            "No matched pairs found with {} data".format(field))

    print("")
    print("*** Transforms sorted by delta ({})".format(field))

    xforms.sort(key=lambda c: -abs(float(c.avg)))
    idx = 0
    for xfm in xforms:
        idx += 1
        if (extractOptions.GetOptions()
                & oemedchem.OEMatchedPairTransformExtractMode_NoSMARTS) != 0:
            # not 'invertable' if SMARTS qualifiers were applied
            if xfm.avg < 0.:
                xfm.avg = -1. * xfm.avg
                xfm.xform.Invert()
        print("{0:2} {2}=(avg={3:.2f},stdev={4:.2f},num={5}) {1}".format(
            idx, xfm.xform.GetTransform(), field, xfm.avg, xfm.std, xfm.num))
Exemplo n.º 4
0
def MMPIndex(itf):
    # output index file
    mmpindexfile = itf.GetString("-output")
    if not oemedchem.OEIsMatchedPairAnalyzerFileType(mmpindexfile):
        oechem.OEThrow.Fatal("Output file is not a matched pair index type - \
                             needs .mmpidx extension: {}"
                             .format(mmpindexfile))

    # create options class with defaults
    mmpopts = oemedchem.OEMatchedPairAnalyzerOptions()
    # set up options from command line
    if not oemedchem.OESetupMatchedPairIndexOptions(mmpopts, itf):
        oechem.OEThrow.Fatal("Error setting matched pair indexing options!")

    # input structures to index
    ifsindex = oechem.oemolistream()
    if not ifsindex.open(itf.GetString("-input")):
        oechem.OEThrow.Fatal("Unable to open {} for reading"
                             .format(itf.GetString("-input")))

    # get requested verbosity setting
    verbose = itf.GetBool("-verbose")
    vverbose = itf.GetBool("-vverbose")
    if vverbose:
        verbose = vverbose

    maxrec = max(itf.GetInt("-maxrec"), 0)
    statusrec = itf.GetInt("-status")

    if itf.GetBool("-exportcompress"):
        if not mmpopts.SetOptions(mmpopts.GetOptions() |
                                  oemedchem.OEMatchedPairOptions_ExportCompression):
            oechem.OEThrow.Warning("Error enabling export compression!")

    stripstereo = itf.GetBool("-stripstereo")
    stripsalts = itf.GetBool("-stripsalts")

    keepFields = []
    if itf.HasString("-keepSD"):
        for field in itf.GetStringList("-keepSD"):
            keepFields.append(field)
        if verbose:
            oechem.OEThrow.Info('Retaining SD data fields: {}'.format(' '.join(keepFields)))

    alldata = itf.GetBool("-allSD")
    cleardata = itf.GetBool("-clearSD")

    if keepFields:
        if verbose and (alldata or cleardata):
            oechem.OEThrow.Info("Option -keepSD overriding -allSD, -clearSD")
        alldata = False
        cleardata = False
    elif cleardata:
        alldata = False
        if verbose:
            oechem.OEThrow.Info("Forced clearing of all input SD data")
    elif alldata:
        if verbose:
            oechem.OEThrow.Info("Retaining all input SD data")
        cleardata = False
    elif verbose:
        oechem.OEThrow.Info("No SD data handling option specified, -allSD assumed")

    if cleardata:
        keepFields = ['-CLEARSD']
    elif alldata or not keepFields:
        keepFields = ['-ALLSD']

    if verbose:
        if not mmpopts.HasIndexableFragmentHeavyAtomRange():
            oechem.OEThrow.Info("Indexing all fragments")
        else:
            oechem.OEThrow.Info("Limiting fragment cores to {0:.2f}-{1:.2f}% of input molecules"
                                .format(mmpopts.GetIndexableFragmentRangeMin(),
                                        mmpopts.GetIndexableFragmentRangeMax()))
        if statusrec:
            oechem.OEThrow.Info("Status output after every {0} records".format(statusrec))
        if maxrec:
            oechem.OEThrow.Info("Indexing a maximum of {0} records".format(maxrec))

        if itf.GetBool("-exportcompress"):
            oechem.OEThrow.Info("Removing singleton index nodes from index")

        if stripstereo:
            oechem.OEThrow.Info("Stripping stereo")

        if stripsalts:
            oechem.OEThrow.Info("Stripping salts")

        if itf.GetBool("-clearSD"):
            oechem.OEThrow.Info("Clearing all input SD data")
        elif alldata:
            oechem.OEThrow.Info("Retaining all input SD data")
        elif keepFields:
            oechem.OEThrow.Info('Retaining floating point SD data fields: {}'
                                .format(''.join(keepFields)))

    # create indexing engine
    mmp = oemedchem.OEMatchedPairAnalyzer(mmpopts)

    # interpret SD fields as floating point data
    validdata = FilterSDData(keepFields, True)

    # add molecules to be indexed
    record = 0
    unindexed = 0
    for mol in ifsindex.GetOEGraphMols():
        if not alldata:
            # filter the input molecule SD data based on allowed fields
            validdata.FilterMolData(mol)

        if stripsalts:
            oechem.OEDeleteEverythingExceptTheFirstLargestComponent(mol)

        if stripstereo:
            oechem.OEUncolorMol(mol,
                                (oechem.OEUncolorStrategy_RemoveAtomStereo |
                                 oechem.OEUncolorStrategy_RemoveBondStereo |
                                 oechem.OEUncolorStrategy_RemoveGroupStereo))

        status = mmp.AddMol(mol, record)
        if status != record:
            unindexed += 1
            if vverbose:
                oechem.OEThrow.Info('Input structure not added to index, record=%d status=%s' %
                                    (record, oemedchem.OEMatchedPairIndexStatusName(status)))
        record += 1
        if maxrec and record >= maxrec:
            break
        if statusrec and (record % statusrec) == 0:
            oechem.OEThrow.Info("Records: {} Indexed: {} Unindexed: {}"
                                .format(record, (record - unindexed), unindexed))

    if not mmp.NumMols():
        oechem.OEThrow.Fatal('No records in index structure file')

    if not mmp.NumMatchedPairs():
        oechem.OEThrow.Fatal('No matched pairs found from indexing, ' +
                             'use -fragGe,-fragLe options to extend indexing range')

    if not oemedchem.OEWriteMatchedPairAnalyzer(mmpindexfile, mmp):
        oechem.OEThrow.Fatal('Error serializing MMP index: {}'
                             .format(mmpindexfile))

    # return some status information
    oechem.OEThrow.Info("Records: {}, Indexed: {}, matched pairs: {:,d}"
                        .format(record,
                                mmp.NumMols(),
                                mmp.NumMatchedPairs()))
    return 0
Exemplo n.º 5
0
def MMPTransform(itf):
    # input structure(s) to process
    ifsmols = oechem.oemolistream()
    if not ifsmols.open(itf.GetString("-input")):
        oechem.OEThrow.Fatal("Unable to open %s for reading" %
                             itf.GetString("-input"))

    # check MMP index
    mmpimport = itf.GetString("-mmpindex")
    if not oemedchem.OEIsMatchedPairAnalyzerFileType(mmpimport):
        oechem.OEThrow.Fatal(
            'Not a valid matched pair index input file, {}'.format(mmpimport))

    # load MMP index
    mmp = oemedchem.OEMatchedPairAnalyzer()
    if not oemedchem.OEReadMatchedPairAnalyzer(mmpimport, mmp):
        oechem.OEThrow.Fatal("Unable to load index {}".format(mmpimport))

    if not mmp.NumMols():
        oechem.OEThrow.Fatal(
            'No records in loaded MMP index file: {}'.format(mmpimport))

    if not mmp.NumMatchedPairs():
        oechem.OEThrow.Fatal(
            'No matched pairs found in MMP index file, ' +
            'use -fragGe,-fragLe options to extend indexing range')

    # output (transformed) structure(s)
    ofs = oechem.oemolostream()
    if not ofs.open(itf.GetString("-output")):
        oechem.OEThrow.Fatal("Unable to open %s for writing" %
                             itf.GetString("-output"))

    # request a specific context for the transform activity, here 0-bonds
    chemctxt = oemedchem.OEMatchedPairContext_Bond0
    askcontext = itf.GetString("-context")[:1]
    if askcontext == '0':
        chemctxt = oemedchem.OEMatchedPairContext_Bond0
    elif askcontext == '1':
        chemctxt = oemedchem.OEMatchedPairContext_Bond1
    elif askcontext == '2':
        chemctxt = oemedchem.OEMatchedPairContext_Bond2
    elif askcontext == '3':
        chemctxt = oemedchem.OEMatchedPairContext_Bond3
    elif askcontext == 'a' or askcontext == 'A':
        chemctxt = oemedchem.OEMatchedPairContext_AllBonds
    else:
        oechem.OEThrow.Fatal("Invalid context specified: " + askcontext +
                             ", only 0|1|2|3|A allowed")

    verbose = itf.GetBool("-verbose")

    # return some status information
    if verbose:
        oechem.OEThrow.Info("{}: molecules: {:d}, matched pairs: {:,d}".format(
            mmpimport, mmp.NumMols(), mmp.NumMatchedPairs()))

    minpairs = itf.GetInt("-minpairs")
    if minpairs > 1 and verbose:
        oechem.OEThrow.Info(
            'Requiring at least %d matched pairs to apply transformations' %
            minpairs)

    errs = None
    if itf.GetBool("-nowarnings"):
        errs = oechem.oeosstream()
        oechem.OEThrow.SetOutputStream(errs)

    orec = 0
    ocnt = 0
    for mol in ifsmols.GetOEGraphMols():
        orec += 1
        iter = oemedchem.OEMatchedPairApplyTransforms(mol, mmp, chemctxt,
                                                      minpairs)
        if not iter.IsValid():
            if verbose:
                # as minpairs increases, fewer transformed mols are generated - output if requested
                name = mol.GetTitle()
                if not mol.GetTitle():
                    name = 'Record ' + str(orec)
                oechem.OEThrow.Info("%s did not produce any output" % name)
            continue
        if errs is not None:
            errs.clear()
        for outmol in iter:
            ocnt += 1
            oechem.OEWriteMolecule(ofs, outmol)
        if errs is not None:
            errs.clear()

    if not orec:
        oechem.OEThrow.Fatal('No records in input structure file to transform')

    if not ocnt:
        oechem.OEThrow.Warning('No transformed structures generated')

    print("Input molecules={} Output molecules={}".format(orec, ocnt))

    return 0