Пример #1
0
def PerformSearch():
    """Perform search using specified SMARTS pattern."""

    # Set up a pattern molecule...
    PatternMol = Chem.MolFromSmarts(OptionsInfo["Pattern"])

    # Setup a molecule reader...
    MiscUtil.PrintInfo("\nProcessing file %s..." % OptionsInfo["Infile"])
    Mols = RDKitUtil.ReadMolecules(OptionsInfo["Infile"],
                                   **OptionsInfo["InfileParams"])

    # Set up molecule writers...
    Writer, WriterFiltered = SetupMoleculeWriters()

    MolCount, ValidMolCount, RemainingMolCount = ProcessMolecules(
        Mols, PatternMol, Writer, WriterFiltered)

    if Writer is not None:
        Writer.close()
    if WriterFiltered is not None:
        WriterFiltered.close()

    MiscUtil.PrintInfo("\nTotal number of molecules: %d" % MolCount)
    MiscUtil.PrintInfo("Number of valid molecules: %d" % ValidMolCount)
    MiscUtil.PrintInfo("Number of ignored molecules: %d" %
                       (MolCount - ValidMolCount))

    MiscUtil.PrintInfo("\nNumber of remaining molecules: %d" %
                       RemainingMolCount)
    MiscUtil.PrintInfo("Number of filtered molecules: %d" %
                       (ValidMolCount - RemainingMolCount))
def PerformConstrainedMinimization():
    """Perform constrained minimization."""
    
    # Read and validate reference molecule...
    RefMol = RetrieveReferenceMolecule()
    
    # Setup a molecule reader for input file...
    MiscUtil.PrintInfo("\nProcessing file %s..." % OptionsInfo["Infile"])
    OptionsInfo["InfileParams"]["AllowEmptyMols"] = True
    Mols  = RDKitUtil.ReadMolecules(OptionsInfo["Infile"], **OptionsInfo["InfileParams"])

    # Set up a molecule writer...
    Writer = RDKitUtil.MoleculesWriter(OptionsInfo["Outfile"], **OptionsInfo["OutfileParams"])
    if Writer is None:
        MiscUtil.PrintError("Failed to setup a writer for output fie %s " % OptionsInfo["Outfile"])
    MiscUtil.PrintInfo("Generating file %s..." % OptionsInfo["Outfile"])

    MolCount, ValidMolCount, CoreScaffoldMissingCount, MinimizationFailedCount = ProcessMolecules(RefMol, Mols, Writer)

    if Writer is not None:
        Writer.close()
    
    MiscUtil.PrintInfo("\nTotal number of molecules: %d" % MolCount)
    MiscUtil.PrintInfo("Number of valid molecules: %d" % ValidMolCount)
    MiscUtil.PrintInfo("Number of molecules with missing core scaffold: %d" % CoreScaffoldMissingCount)
    MiscUtil.PrintInfo("Number of molecules failed during conformation generation or minimization: %d" % MinimizationFailedCount)
    MiscUtil.PrintInfo("Number of ignored molecules: %d" % (MolCount - ValidMolCount + CoreScaffoldMissingCount + MinimizationFailedCount))
Пример #3
0
def PerformFiltering():
    """Filter molecules using SMARTS specified in ChEMBL filters file."""

    # Setup ChEMBL patterns and pattern mols...
    MiscUtil.PrintInfo(
        "\nSetting up ChEMBL pattern molecules for performing substructure search..."
    )
    ChEMBLPatterns = RetrieveChEMBLPatterns()
    ChEMBLPatternMols = SetupChEMBLPatternMols(ChEMBLPatterns)

    # Setup a molecule reader...
    MiscUtil.PrintInfo("\nProcessing file %s..." % OptionsInfo["Infile"])
    Mols = RDKitUtil.ReadMolecules(OptionsInfo["Infile"],
                                   **OptionsInfo["InfileParams"])

    # Set up molecule writers...
    Writer, WriterFiltered = SetupMoleculeWriters()

    MolCount, ValidMolCount, RemainingMolCount = ProcessMolecules(
        Mols, ChEMBLPatternMols, Writer, WriterFiltered)

    if Writer is not None:
        Writer.close()
    if WriterFiltered is not None:
        WriterFiltered.close()

    MiscUtil.PrintInfo("\nTotal number of molecules: %d" % MolCount)
    MiscUtil.PrintInfo("Number of valid molecules: %d" % ValidMolCount)
    MiscUtil.PrintInfo("Number of ignored molecules: %d" %
                       (MolCount - ValidMolCount))

    MiscUtil.PrintInfo("\nNumber of remaining molecules: %d" %
                       RemainingMolCount)
    MiscUtil.PrintInfo("Number of filtered molecules: %d" %
                       (ValidMolCount - RemainingMolCount))
Пример #4
0
def GenerateConformers():
    """Generate conformers."""

    # Setup a molecule reader...
    MiscUtil.PrintInfo("\nProcessing file %s..." % OptionsInfo["Infile"])
    Mols = RDKitUtil.ReadMolecules(OptionsInfo["Infile"],
                                   **OptionsInfo["InfileParams"])

    # Set up a molecule writer...
    Writer = RDKitUtil.MoleculesWriter(OptionsInfo["Outfile"],
                                       **OptionsInfo["OutfileParams"])
    if Writer is None:
        MiscUtil.PrintError("Failed to setup a writer for output fie %s " %
                            OptionsInfo["Outfile"])
    MiscUtil.PrintInfo("Generating file %s..." % OptionsInfo["Outfile"])

    MolCount, ValidMolCount, ConfGenFailedCount = ProcessMolecules(
        Mols, Writer)

    if Writer is not None:
        Writer.close()

    MiscUtil.PrintInfo("\nTotal number of molecules: %d" % MolCount)
    MiscUtil.PrintInfo("Number of valid molecules: %d" % ValidMolCount)
    MiscUtil.PrintInfo(
        "Number of molecules failed during conformation generation or minimization: %d"
        % ConfGenFailedCount)
    MiscUtil.PrintInfo("Number of ignored molecules: %d" %
                       (MolCount - ValidMolCount + ConfGenFailedCount))
Пример #5
0
def CalculateEnergy():
    """Calculate single point energy calculation."""

    # Setup a molecule reader...
    MiscUtil.PrintInfo("\nProcessing file %s..." % OptionsInfo["Infile"])
    Mols = RDKitUtil.ReadMolecules(OptionsInfo["Infile"],
                                   **OptionsInfo["InfileParams"])

    # Set up a molecule writer...
    Writer = RDKitUtil.MoleculesWriter(OptionsInfo["Outfile"],
                                       **OptionsInfo["OutfileParams"])
    if Writer is None:
        MiscUtil.PrintError("Failed to setup a writer for output fie %s " %
                            OptionsInfo["Outfile"])
    MiscUtil.PrintInfo("Generating file %s..." % OptionsInfo["Outfile"])

    MolCount, ValidMolCount, EnergyFailedCount = ProcessMolecules(Mols, Writer)

    if Writer is not None:
        Writer.close()

    MiscUtil.PrintInfo("\nTotal number of molecules: %d" % MolCount)
    MiscUtil.PrintInfo("Number of valid molecules: %d" % ValidMolCount)
    MiscUtil.PrintInfo(
        "Number of molecules failed during energy calculation: %d" %
        EnergyFailedCount)
    MiscUtil.PrintInfo("Number of ignored molecules: %d" %
                       (MolCount - ValidMolCount + EnergyFailedCount))
Пример #6
0
def RetrieveReactantsMolecules():
    """Retrieve reactant molecules from each reactant file and return a list containing lists of molecules
    for each reactant file."""

    MiscUtil.PrintInfo("\nProcessing reactant file(s)...")

    ReactantsMolsList = []
    ReactantFilesList = OptionsInfo["ReactantFilesList"]
    UseReactantNames = OptionsInfo["UseReactantNames"]
    ReactantCount = 0

    for FileIndex in range(0, len(ReactantFilesList)):
        ReactantCount += 1
        ReactantFile = ReactantFilesList[FileIndex]

        MiscUtil.PrintInfo("\nProcessing reactant file: %s..." % ReactantFile)

        Mols = RDKitUtil.ReadMolecules(ReactantFile,
                                       **OptionsInfo["InfileParams"])

        ValidMols = []
        MolCount = 0
        ValidMolCount = 0

        for Mol in Mols:
            MolCount += 1
            if Mol is None:
                continue

            if RDKitUtil.IsMolEmpty(Mol):
                MolName = RDKitUtil.GetMolName(Mol, MolCount)
                MiscUtil.PrintWarning("Ignoring empty molecule: %s" % MolName)
                continue

            ValidMolCount += 1

            # Check and set mol name...
            if UseReactantNames:
                MolName = RDKitUtil.GetMolName(Mol)
                if not len(MolName):
                    MolName = "React%dMol%d" % (ReactantCount, MolCount)
                    Mol.SetProp("_Name", MolName)

            ValidMols.append(Mol)

        ReactantsMolsList.append(ValidMols)

        MiscUtil.PrintInfo("Total number of molecules: %d" % MolCount)
        MiscUtil.PrintInfo("Number of valid molecules: %d" % ValidMolCount)
        MiscUtil.PrintInfo("Number of ignored molecules: %d" %
                           (MolCount - ValidMolCount))

    return ReactantsMolsList
Пример #7
0
def CalculatePartialCharges():
    """Calculate partial atomic charges."""

    Infile = OptionsInfo["Infile"]
    Outfile = OptionsInfo["Outfile"]

    MiscUtil.PrintInfo("Calculating partial atomic charges...")
    
    # Setup a molecule reader...
    MiscUtil.PrintInfo("\nProcessing file %s..." % Infile)
    Mols = RDKitUtil.ReadMolecules(Infile, **OptionsInfo["InfileParams"])
    
    # Setup a writer...
    Compute2DCoords = OptionsInfo["OutfileParams"]["Compute2DCoords"]
    Writer = RDKitUtil.MoleculesWriter(Outfile, **OptionsInfo["OutfileParams"])
        
    if Writer is None:
        MiscUtil.PrintError("Failed to setup a writer for output fie %s " % Outfile)

    MiscUtil.PrintInfo("Generating file %s..." % Outfile)

    # Process molecules...
    MolCount, ValidMolCount, CalcFailedCount = [0] * 3 

    for Mol in Mols:
        MolCount += 1
        if Mol is None:
            continue
        
        if RDKitUtil.IsMolEmpty(Mol):
            MolName = RDKitUtil.GetMolName(Mol, MolCount)
            MiscUtil.PrintWarning("Ignoring empty molecule: %s" % MolName)
            continue
        
        ValidMolCount += 1
        MolWithHs = Chem.AddHs(Mol)

        # Retrieve charges...
        PartialCharges = CalculateMolPartialCharges(MolWithHs, MolCount)
        if not len(PartialCharges):
            CalcFailedCount += 1
            continue
            
        # Write out charges...
        WriteMolPartialCharges(Writer, MolWithHs, PartialCharges, Compute2DCoords)
        
    if Writer is not None:
        Writer.close()
        
    MiscUtil.PrintInfo("\nTotal number of molecules: %d" % MolCount)
    MiscUtil.PrintInfo("Number of valid molecules: %d" % ValidMolCount)
    MiscUtil.PrintInfo("Number of molecules failed during calculation of partial charges: %d" % CalcFailedCount)
    MiscUtil.PrintInfo("Number of ignored molecules: %d" % (MolCount - ValidMolCount + CalcFailedCount))
Пример #8
0
def ConvertFileFormat():
    """Convert between  file formats"""
    
    Infile = OptionsInfo["Infile"]
    Outfile = OptionsInfo["Outfile"]
    
    # Read molecules...
    MiscUtil.PrintInfo("\nReading file %s..." % Infile)
    Mols = RDKitUtil.ReadMolecules(Infile, **OptionsInfo["InfileParams"])
    MiscUtil.PrintInfo("Total number of molecules: %d" % len(Mols))
    
    # Write molecules...
    MiscUtil.PrintInfo("\nGenerating file %s..." % Outfile)
    MolCount, ProcessedMolCount = RDKitUtil.WriteMolecules(Outfile, Mols, **OptionsInfo["OutfileParams"])
    
    MiscUtil.PrintInfo("Total number of molecules: %d" % MolCount)
    MiscUtil.PrintInfo("Number of molecules processed: %d" % ProcessedMolCount)
    MiscUtil.PrintInfo("Number of molecules ignored: %d" % (MolCount - ProcessedMolCount))
Пример #9
0
def PerformFunctionalGroupsSearch():
    """Retrieve functional groups information and perform search."""

    # Process functional groups info...
    ProcessFunctionalGroupsInfo()

    # Setup pattern mols for functional group SMARTS...
    GroupsPatternMols = SetupFunctionalGroupsSMARTSPatterns()

    # Setup a molecule reader...
    MiscUtil.PrintInfo("\nProcessing file %s..." % OptionsInfo["Infile"])
    Mols = RDKitUtil.ReadMolecules(OptionsInfo["Infile"],
                                   **OptionsInfo["InfileParams"])

    # Set up  molecule writers...
    Writer, GroupOutfilesWriters = SetupMoleculeWriters()

    MolCount, ValidMolCount, RemainingMolCount, GroupsPatternMatchCountList = ProcessMolecules(
        Mols, GroupsPatternMols, Writer, GroupOutfilesWriters)

    if Writer is not None:
        Writer.close()
    for GroupOutfileWriter in GroupOutfilesWriters:
        GroupOutfileWriter.close()

    MiscUtil.PrintInfo("\nTotal number of molecules: %d" % MolCount)
    MiscUtil.PrintInfo("Number of valid molecules: %d" % ValidMolCount)
    MiscUtil.PrintInfo("Number of ignored molecules: %d" %
                       (MolCount - ValidMolCount))

    MiscUtil.PrintInfo(
        "\nTotal number of molecules matched against specified match criteria: %d"
        % RemainingMolCount)

    MiscUtil.PrintInfo(
        "\nNumber of molecuels matched against individual functional groups:")
    MiscUtil.PrintInfo("FunctionalGroupName,MatchCount")

    for GroupIndex in range(0, len(OptionsInfo["SpecifiedFunctionalGroups"])):
        GroupName = OptionsInfo["SpecifiedFunctionalGroups"][GroupIndex]
        if OptionsInfo["SpecifiedFunctionalGroupsNegateMatch"][GroupIndex]:
            GroupName = '!' + GroupName
        GroupMatchCount = GroupsPatternMatchCountList[GroupIndex]
        MiscUtil.PrintInfo("%s,%d" % (GroupName, GroupMatchCount))
Пример #10
0
def RemoveSalts():
    """Identify and remove salts from molecules"""
    
    # Setup a molecule reader...
    MiscUtil.PrintInfo("\nProcessing file %s..." % OptionsInfo["Infile"])
    Mols  = RDKitUtil.ReadMolecules(OptionsInfo["Infile"], **OptionsInfo["InfileParams"])
    
    # Set up a molecule writer...
    Writer = SetupMoleculeWriter()

    MolCount, ValidMolCount, SaltsMolCount = ProcessMolecules(Mols, Writer)

    if Writer is not None:
        Writer.close()
    
    MiscUtil.PrintInfo("\nTotal number of molecules: %d" % MolCount)
    MiscUtil.PrintInfo("Number of valid molecules: %d" % ValidMolCount)
    MiscUtil.PrintInfo("Number of ignored molecules: %d" % (MolCount - ValidMolCount))
    
    MiscUtil.PrintInfo("\nNumber of molecules coontaining salts: %d" % (SaltsMolCount))
def PerformCalculations():
    """Calculate descriptors for a specified list of descriptors."""

    # Setup a molecule reader...
    MiscUtil.PrintInfo("\nProcessing file %s..." % OptionsInfo["Infile"])
    Mols = RDKitUtil.ReadMolecules(OptionsInfo["Infile"],
                                   **OptionsInfo["InfileParams"])

    # Setup a writer...
    Writer = SetupMoleculeWriter()

    # Process molecules...
    MolCount, ValidMolCount = ProcessMolecules(Mols, Writer)

    if Writer is not None:
        Writer.close()

    MiscUtil.PrintInfo("\nTotal number of molecules: %d" % MolCount)
    MiscUtil.PrintInfo("Number of valid molecules: %d" % ValidMolCount)
    MiscUtil.PrintInfo("Number of ignored molecules: %d" %
                       (MolCount - ValidMolCount))
Пример #12
0
def CalculatePartialCharges():
    """Calculate partial atomic charges."""

    # Setup a molecule reader...
    MiscUtil.PrintInfo("\nProcessing file %s..." % OptionsInfo["Infile"])
    Mols = RDKitUtil.ReadMolecules(OptionsInfo["Infile"],
                                   **OptionsInfo["InfileParams"])

    # Set up a molecule writer...
    Writer = SetupMoleculeWriter()

    MolCount, ValidMolCount, CalcFailedCount = ProcessMolecules(Mols, Writer)

    if Writer is not None:
        Writer.close()

    MiscUtil.PrintInfo("\nTotal number of molecules: %d" % MolCount)
    MiscUtil.PrintInfo("Number of valid molecules: %d" % ValidMolCount)
    MiscUtil.PrintInfo(
        "Number of molecules failed during calculation of partial charges: %d"
        % CalcFailedCount)
    MiscUtil.PrintInfo("Number of ignored molecules: %d" %
                       (MolCount - ValidMolCount + CalcFailedCount))
Пример #13
0
def GenerateMolecularFrameworks():
    """Generate Bemis Murcko molecular framworks."""
    
    Infile = OptionsInfo["Infile"]
    Outfile = OptionsInfo["Outfile"]

    UseChirality = OptionsInfo["UseChirality"]

    RemoveDuplicateFrameworks = OptionsInfo["RemoveDuplicateFrameworks"]
    UseGraphFrameworks = OptionsInfo["UseGraphFrameworks"]
    
    SortFrameworks = OptionsInfo["SortFrameworks"]
    if SortFrameworks:
        FrameworkMolIDs = []
        FrameworkMolIDToMolMap = {}
        FrameworkMolIDToAtomCountMap = {}
        
        DuplicateFrameworkMolIDs = []
        DuplicateFrameworkMolIDToMolMap = {}
        DuplicateFrameworkMolIDToAtomCountMap = {}
        
    DuplicatesOutfile = ""
    if RemoveDuplicateFrameworks:
        DuplicatesOutfile = OptionsInfo["DuplicatesOutfile"]

    # Setup a molecule reader...
    MiscUtil.PrintInfo("\nProcessing file %s..." % Infile)
    Mols  = RDKitUtil.ReadMolecules(Infile, **OptionsInfo["InfileParams"])
    
    # Set up a molecular framework  writer...
    Writer = RDKitUtil.MoleculesWriter(Outfile, **OptionsInfo["OutfileParams"])
    if Writer is None:
        MiscUtil.PrintError("Failed to setup a writer for output fie %s " % Outfile)
    
    # Set up a duplicate molecular framework writer...    
    if RemoveDuplicateFrameworks:
        DuplicatesWriter = RDKitUtil.MoleculesWriter(DuplicatesOutfile, **OptionsInfo["OutfileParams"])
        if Writer is None:
            MiscUtil.PrintError("Failed to setup a writer for duplicates output fie %s " % DuplicatesOutfile)
        
    if RemoveDuplicateFrameworks:
        MiscUtil.PrintInfo("Generating files: %s and %s..." % (Outfile, DuplicatesOutfile))
    else:
        MiscUtil.PrintInfo("Generating file %s..." % Outfile)

    # Process molecules...
    MolCount = 0
    ValidMolCount = 0
    
    FrameworksCount = 0
    UniqueFrameworksCount = 0
    DuplicateFrameworksCount = 0
    
    CanonicalSMILESMap = {}
    
    Compute2DCoords = OptionsInfo["OutfileParams"]["Compute2DCoords"]
    
    for Mol in Mols:
        MolCount += 1
        
        if Mol is None:
            continue
        
        if RDKitUtil.IsMolEmpty(Mol):
            MolName = RDKitUtil.GetMolName(Mol, MolCount)
            MiscUtil.PrintWarning("Ignoring empty molecule: %s" % MolName)
            continue
        
        ValidMolCount += 1

        if UseGraphFrameworks:
            FrameworksMol = MurckoScaffold.MakeScaffoldGeneric(Mol)
        else:
            FrameworksMol = MurckoScaffold.GetScaffoldForMol(Mol)

        if Compute2DCoords:
            AllChem.Compute2DCoords(FrameworksMol)
            
        if SortFrameworks:
            HeavyAtomCount = FrameworksMol.GetNumHeavyAtoms()

        FrameworksCount += 1
        
        if RemoveDuplicateFrameworks:
            CanonicalSMILES = Chem.MolToSmiles(FrameworksMol, isomericSmiles = UseChirality, canonical = True)
            if CanonicalSMILES in CanonicalSMILESMap:
                DuplicateFrameworksCount += 1
                if SortFrameworks:
                    # Track duplicate frameworks...
                    DuplicateFrameworkMolIDs.append(DuplicateFrameworksCount)
                    DuplicateFrameworkMolIDToMolMap[DuplicateFrameworksCount] = FrameworksMol
                    DuplicateFrameworkMolIDToAtomCountMap[DuplicateFrameworksCount] = HeavyAtomCount
                else:
                    # Write it out...
                    DuplicatesWriter.write(FrameworksMol)
            else:
                UniqueFrameworksCount += 1
                CanonicalSMILESMap[CanonicalSMILES] = CanonicalSMILES
                if SortFrameworks:
                    # Track unique frameworks...
                    FrameworkMolIDs.append(UniqueFrameworksCount)
                    FrameworkMolIDToMolMap[UniqueFrameworksCount] = FrameworksMol
                    FrameworkMolIDToAtomCountMap[UniqueFrameworksCount] = HeavyAtomCount
                else:
                    # Write it out...
                    Writer.write(FrameworksMol)
        elif SortFrameworks:
            # Track for sorting...
            FrameworkMolIDs.append(FrameworksCount)
            FrameworkMolIDToMolMap[FrameworksCount] = FrameworksMol
            FrameworkMolIDToAtomCountMap[FrameworksCount] = HeavyAtomCount
        else:
            # Write it out...
            Writer.write(FrameworksMol)
            
    if SortFrameworks:
        ReverseOrder = OptionsInfo["DescendingSortOrder"]
        SortAndWriteFrameworks(Writer, FrameworkMolIDs, FrameworkMolIDToMolMap, FrameworkMolIDToAtomCountMap, ReverseOrder)
        if RemoveDuplicateFrameworks:
            SortAndWriteFrameworks(DuplicatesWriter, DuplicateFrameworkMolIDs, DuplicateFrameworkMolIDToMolMap, DuplicateFrameworkMolIDToAtomCountMap, ReverseOrder)
    
    Writer.close()
    if RemoveDuplicateFrameworks:
        DuplicatesWriter.close()

    MiscUtil.PrintInfo("\nTotal number of molecular frameworks: %d" % FrameworksCount)
    if RemoveDuplicateFrameworks:
        MiscUtil.PrintInfo("Number of unique molecular frameworks: %d" % UniqueFrameworksCount)
        MiscUtil.PrintInfo("Number of duplicate molecular frameworks: %d" % DuplicateFrameworksCount)
        
    MiscUtil.PrintInfo("\nTotal number of molecules: %d" % MolCount)
    MiscUtil.PrintInfo("Number of valid molecules: %d" % ValidMolCount)
    MiscUtil.PrintInfo("Number of ignored molecules: %d" % (MolCount - ValidMolCount))
Пример #14
0
def RemoveSalts():
    """Identify and remove salts from molecules"""

    Infile = OptionsInfo["Infile"]
    Outfile = OptionsInfo["Outfile"]

    CountMode = OptionsInfo["CountMode"]

    # Setup a molecule reader...
    MiscUtil.PrintInfo("\nProcessing file %s..." % Infile)
    Mols = RDKitUtil.ReadMolecules(Infile, **OptionsInfo["InfileParams"])

    # Set up a molecule writer...
    Writer = None
    DuplicatesWriter = None
    if not CountMode:
        Writer = RDKitUtil.MoleculesWriter(Outfile,
                                           **OptionsInfo["OutfileParams"])
        if Writer is None:
            MiscUtil.PrintError("Failed to setup a writer for output fie %s " %
                                Outfile)

        MiscUtil.PrintInfo("Generating file %s..." % (Outfile))

    # Set up a salt remover...
    SaltsByComponentsMode = OptionsInfo["SaltsByComponentsMode"]
    Remover = None
    if not SaltsByComponentsMode:
        Remover = SaltRemover(defnFilename=OptionsInfo["SaltsFile"],
                              defnData=OptionsInfo["SaltsSMARTS"],
                              defnFormat=InputFormat.SMARTS)

    # Process molecules...
    MolCount = 0
    ValidMolCount = 0
    SaltsMolCount = 0

    Compute2DCoords = OptionsInfo["OutfileParams"]["Compute2DCoords"]

    for Mol in Mols:
        MolCount += 1

        if Mol is None:
            continue

        if RDKitUtil.IsMolEmpty(Mol):
            MolName = RDKitUtil.GetMolName(Mol, MolCount)
            MiscUtil.PrintWarning("Ignoring empty molecule: %s" % MolName)
            continue

        ValidMolCount += 1

        UnsaltedMol, SaltyStatus = RemoveMolSalts(Mol, Remover, MolCount)

        if SaltyStatus:
            SaltsMolCount += 1

        if not CountMode:
            if Compute2DCoords:
                AllChem.Compute2DCoords(UnsaltedMol)
            Writer.write(UnsaltedMol)

    if Writer is not None:
        Writer.close()

    if DuplicatesWriter is not None:
        DuplicatesWriter.close()

    MiscUtil.PrintInfo("\nTotal number of molecules: %d" % MolCount)
    MiscUtil.PrintInfo("Number of valid molecules: %d" % ValidMolCount)
    MiscUtil.PrintInfo("Number of ignored molecules: %d" %
                       (MolCount - ValidMolCount))

    MiscUtil.PrintInfo("\nNumber of molecules coontaining salts: %d" %
                       (SaltsMolCount))
Пример #15
0
def PerformFiltering():
    """Filter molecules using SMARTS specified in PAINS filter file."""

    Infile = OptionsInfo["Infile"]
    Outfile = OptionsInfo["Outfile"]

    CountMode = OptionsInfo["CountMode"]
    NegateMatch = OptionsInfo["NegateMatch"]
    PAINSMode = OptionsInfo["PAINSMode"]

    # Setup PAINS patterns and pattern mols...
    PAINSPatterns = RetrievePAINSPatterns(PAINSMode)
    PAINSPatternMols = SetupPAINSPatternMols(PAINSPatterns)

    # Setup a molecule reader...
    MiscUtil.PrintInfo("\nProcessing file %s..." % Infile)
    Mols = RDKitUtil.ReadMolecules(Infile, **OptionsInfo["InfileParams"])

    # Set up a molecule writer...
    Writer = None
    if not CountMode:
        Writer = RDKitUtil.MoleculesWriter(Outfile,
                                           **OptionsInfo["OutfileParams"])
        if Writer is None:
            MiscUtil.PrintError("Failed to setup a writer for output fie %s " %
                                Outfile)
        MiscUtil.PrintInfo("Generating file %s..." % Outfile)

    Compute2DCoords = OptionsInfo["OutfileParams"]["Compute2DCoords"]

    # Process molecules...
    MolCount = 0
    ValidMolCount = 0
    FilteredCount = 0

    for Mol in Mols:
        MolCount += 1

        if Mol is None:
            continue

        if RDKitUtil.IsMolEmpty(Mol):
            MolName = RDKitUtil.GetMolName(Mol, MolCount)
            MiscUtil.PrintWarning("Ignoring empty molecule: %s" % MolName)
            continue

        ValidMolCount += 1

        MolMatched = DoesMoleculeContainsPAINSPattern(Mol, PAINSPatternMols)
        if MolMatched == NegateMatch:
            FilteredCount += 1
            if not CountMode:
                if Compute2DCoords:
                    AllChem.Compute2DCoords(Mol)
                Writer.write(Mol)

    if Writer is not None:
        Writer.close()

    MiscUtil.PrintInfo("\nTotal number of molecules: %d" % MolCount)
    MiscUtil.PrintInfo("Number of valid molecules: %d" % ValidMolCount)
    MiscUtil.PrintInfo("Number of ignored molecules: %d" %
                       (MolCount - ValidMolCount))

    MiscUtil.PrintInfo("\nTotal number of filtered molecules: %d" %
                       FilteredCount)
def PerformCalculations():
    """Calculate descriptors for a specified list of descriptors."""

    Infile = OptionsInfo["Infile"]
    Outfile = OptionsInfo["Outfile"]
    
    DescriptorNames = OptionsInfo["SpecifiedDescriptorNames"]
    DescriptorCount = len(DescriptorNames)
    
    TextOutFileMode = OptionsInfo["TextOutFileMode"]
    TextOutFileDelim = OptionsInfo["TextOutFileDelim"]
    SMILESOut = OptionsInfo["SMILESOut"]

    Precision = OptionsInfo["Precision"]
    
    MiscUtil.PrintInfo("Calculating %d molecular descriptor(s) for each molecule..." % DescriptorCount)
    
    # Setup a molecule reader...
    MiscUtil.PrintInfo("\nProcessing file %s..." % Infile)
    Mols = RDKitUtil.ReadMolecules(Infile, **OptionsInfo["InfileParams"])
    
    # Setup a writer...
    Compute2DCoords = OptionsInfo["OutfileParams"]["Compute2DCoords"]
    if TextOutFileMode:
        Writer = open(Outfile, "w")
    else:
        Writer = RDKitUtil.MoleculesWriter(Outfile, **OptionsInfo["OutfileParams"])
        
    if Writer is None:
        MiscUtil.PrintError("Failed to setup a writer for output fie %s " % Outfile)

    MiscUtil.PrintInfo("Generating file %s..." % Outfile)

    # Wite out headers for a text file...
    if TextOutFileMode:
        LineWords = []
        if SMILESOut:
            LineWords.append("SMILES")
        LineWords.append("MolID")
        LineWords.extend(DescriptorNames)
        Line = TextOutFileDelim.join(LineWords)
        Writer.write("%s\n" % Line)
        
    # Process molecules...
    MolCount = 0
    ValidMolCount = 0
    MatchCount = 0

    for Mol in Mols:
        MolCount += 1
        if Mol is None:
            continue
        
        if RDKitUtil.IsMolEmpty(Mol):
            MolName = RDKitUtil.GetMolName(Mol, MolCount)
            MiscUtil.PrintWarning("Ignoring empty molecule: %s" % MolName)
            continue
        
        ValidMolCount += 1

        # Calculate descriptors...
        CalculatedValues = []
        for Index in range(0, DescriptorCount):
            Name = DescriptorNames[Index]
            ComputeFunction = DescriptorNamesMap["ComputeFunction"][Name]
            Value = FormatCalculatedValue(ComputeFunction(Mol), Precision)
            CalculatedValues.append(Value)

        # Write out calculated values...
        if TextOutFileMode:
            LineWords = []
            if SMILESOut:
                SMILES = Chem.MolToSmiles(Mol, isomericSmiles = True, canonical = True)
                LineWords.append(SMILES)
            
            MolName = RDKitUtil.GetMolName(Mol, MolCount)
            LineWords.append(MolName)
            LineWords.extend(CalculatedValues)
            Line = TextOutFileDelim.join(LineWords)
            Writer.write("%s\n" % Line)
        else:
            for Index in range(0, DescriptorCount):
                Name = DescriptorNames[Index]
                Value = CalculatedValues[Index]
                Mol.SetProp(Name, Value)
                
            if Compute2DCoords:
                AllChem.Compute2DCoords(Mol)
                
            Writer.write(Mol)
    
    if Writer is not None:
        Writer.close()
        
    MiscUtil.PrintInfo("\nTotal number of molecules: %d" % MolCount)
    MiscUtil.PrintInfo("Number of valid molecules: %d" % ValidMolCount)
    MiscUtil.PrintInfo("Number of ignored molecules: %d" % (MolCount - ValidMolCount))
def GenerateConformers():
    """Generate conformers."""

    Infile = OptionsInfo["Infile"]
    Outfile = OptionsInfo["Outfile"]

    # Setup a molecule reader...
    MiscUtil.PrintInfo("\nProcessing file %s..." % Infile)
    Mols = RDKitUtil.ReadMolecules(Infile, **OptionsInfo["InfileParams"])

    # Set up a molecule writer...
    Writer = None
    Writer = RDKitUtil.MoleculesWriter(Outfile, **OptionsInfo["OutfileParams"])
    if Writer is None:
        MiscUtil.PrintError("Failed to setup a writer for output fie %s " %
                            Outfile)
    MiscUtil.PrintInfo("Generating file %s...\n" % Outfile)

    # Process molecules...
    MolCount = 0
    ValidMolCount = 0
    ConfGenFailedCount = 0

    SkipMinimization = OptionsInfo["SkipForceFieldMinimization"]

    if SkipMinimization:
        MiscUtil.PrintInfo(
            "Generating conformers without performing energy minimization...\n"
        )
    else:
        MiscUtil.PrintInfo(
            "Generating conformers and performing energy minimization...\n")

    for Mol in Mols:
        MolCount += 1

        if Mol is None:
            continue

        if RDKitUtil.IsMolEmpty(Mol):
            MolName = RDKitUtil.GetMolName(Mol, MolCount)
            MiscUtil.PrintWarning("Ignoring empty molecule: %s" % MolName)
            continue

        ValidMolCount += 1

        if SkipMinimization:
            Status = GenerateMolConformers(Mol, MolCount, Writer)
        else:
            Status = GenerateAndMinimizeMolConformers(Mol, MolCount, Writer)

        if not Status:
            ConfGenFailedCount += 1

    if Writer is not None:
        Writer.close()

    MiscUtil.PrintInfo("\nTotal number of molecules: %d" % MolCount)
    MiscUtil.PrintInfo("Number of valid molecules: %d" % ValidMolCount)
    MiscUtil.PrintInfo(
        "Number of molecules failed during conformation generation or minimization: %d"
        % ConfGenFailedCount)
    MiscUtil.PrintInfo("Number of ignored molecules: %d" %
                       (MolCount - ValidMolCount + ConfGenFailedCount))
Пример #18
0
def RemoveDuplicates():
    """Identify and remove duplicate molecules based on canonical SMILES"""
    
    Infile = OptionsInfo["Infile"]
    Outfile = OptionsInfo["Outfile"]
    DuplicatesOutfile = OptionsInfo["DuplicatesOutfile"]
    
    CountMode = OptionsInfo["CountMode"]
    UseChirality = OptionsInfo["UseChirality"]
    
    # Setup a molecule reader...
    MiscUtil.PrintInfo("\nProcessing file %s..." % Infile)
    Mols  = RDKitUtil.ReadMolecules(Infile, **OptionsInfo["InfileParams"])
    
    # Set up a molecule writer...
    Writer = None
    DuplicatesWriter = None
    if not CountMode:
        Writer = RDKitUtil.MoleculesWriter(Outfile, **OptionsInfo["OutfileParams"])
        if Writer is None:
            MiscUtil.PrintError("Failed to setup a writer for output fie %s " % Outfile)
        DuplicatesWriter = RDKitUtil.MoleculesWriter(DuplicatesOutfile, **OptionsInfo["OutfileParams"])
        if DuplicatesWriter is None:
            MiscUtil.PrintError("Failed to setup a writer for output fie %s " % DuplicatesOutfile)
        
        MiscUtil.PrintInfo("Generating files %s and %s..." % (Outfile, DuplicatesOutfile))

    # Process molecules...
    MolCount = 0
    ValidMolCount = 0
    
    UniqueMolCount = 0
    DuplicateMolCount = 0
    
    CanonicalSMILESMap = {}
    Compute2DCoords = OptionsInfo["OutfileParams"]["Compute2DCoords"]

    for Mol in Mols:
        MolCount += 1
        
        if Mol is None:
            continue
        
        if RDKitUtil.IsMolEmpty(Mol):
            MolName = RDKitUtil.GetMolName(Mol, MolCount)
            MiscUtil.PrintWarning("Ignoring empty molecule: %s" % MolName)
            continue
        
        ValidMolCount += 1
        
        CanonicalSMILES = Chem.MolToSmiles(Mol, isomericSmiles = UseChirality, canonical = True)
        
        if Compute2DCoords:
            if not CountMode:
                AllChem.Compute2DCoords(Mol)
        
        if CanonicalSMILES in CanonicalSMILESMap:
            DuplicateMolCount += 1
            if not CountMode:
                DuplicatesWriter.write(Mol)
        else:
            UniqueMolCount += 1
            CanonicalSMILESMap[CanonicalSMILES] = CanonicalSMILES
            if not CountMode:
                Writer.write(Mol)
    
    if Writer is not None:
        Writer.close()
    
    if DuplicatesWriter is not None:
        DuplicatesWriter.close()
    
    MiscUtil.PrintInfo("\nTotal number of molecules: %d" % MolCount)
    MiscUtil.PrintInfo("Number of valid molecules: %d" % ValidMolCount)
    MiscUtil.PrintInfo("Number of ignored molecules: %d" % (MolCount - ValidMolCount))

    MiscUtil.PrintInfo("\nTotal number of unique molecules: %d" % UniqueMolCount)
    MiscUtil.PrintInfo("Total number of duplicate molecules: %d" % DuplicateMolCount)
Пример #19
0
def PerformSearch():
    """Perform search using specified SMARTS pattern."""
    
    Infile = OptionsInfo["Infile"]
    Outfile = OptionsInfo["Outfile"]
    
    CountMode = OptionsInfo["CountMode"]
    NegateMatch = OptionsInfo["NegateMatch"]
    UseChirality = OptionsInfo["UseChirality"]

    # Set up a pattern molecule...
    PatternMol = Chem.MolFromSmarts(OptionsInfo["Pattern"])
    
    # Setup a molecule reader...
    MiscUtil.PrintInfo("\nProcessing file %s..." % Infile)
    Mols  = RDKitUtil.ReadMolecules(Infile, **OptionsInfo["InfileParams"])
    
    # Set up a molecule writer...
    Writer = None
    if not CountMode:
        Writer = RDKitUtil.MoleculesWriter(Outfile, **OptionsInfo["OutfileParams"])
        if Writer is None:
            MiscUtil.PrintError("Failed to setup a writer for output fie %s " % Outfile)
        MiscUtil.PrintInfo("Generating file %s..." % Outfile)

    # Process molecules...
    MolCount = 0
    ValidMolCount = 0
    MatchCount = 0

    Compute2DCoords = OptionsInfo["OutfileParams"]["Compute2DCoords"]
    
    for Mol in Mols:
        MolCount += 1
        
        if Mol is None:
            continue
        
        if RDKitUtil.IsMolEmpty(Mol):
            MolName = RDKitUtil.GetMolName(Mol, MolCount)
            MiscUtil.PrintWarning("Ignoring empty molecule: %s" % MolName)
            continue
        
        ValidMolCount += 1

        MolMatched = Mol.HasSubstructMatch(PatternMol, useChirality = UseChirality)
        if MolMatched != NegateMatch:
            MatchCount += 1
            if not CountMode:
                if Compute2DCoords:
                    AllChem.Compute2DCoords(Mol)
                Writer.write(Mol)
    
    if Writer is not None:
        Writer.close()
    
    MiscUtil.PrintInfo("\nTotal number of molecules: %d" % MolCount)
    MiscUtil.PrintInfo("Number of valid molecules: %d" % ValidMolCount)
    MiscUtil.PrintInfo("Number of ignored molecules: %d" % (MolCount - ValidMolCount))

    if NegateMatch:
        MiscUtil.PrintInfo("\nTotal number of not matched molecules: %d" % MatchCount)
    else:
        MiscUtil.PrintInfo("\nTotal number of matched molecules: %d" % MatchCount)
def PerformEnumeration():
    """Enumerate stereoisomers."""

    Infile = OptionsInfo["Infile"]
    Outfile = OptionsInfo["Outfile"]

    # Setup a molecule reader...
    MiscUtil.PrintInfo("\nProcessing file %s..." % Infile)
    Mols = RDKitUtil.ReadMolecules(Infile, **OptionsInfo["InfileParams"])

    # Set up a molecule writer...
    Writer = None
    Writer = RDKitUtil.MoleculesWriter(Outfile, **OptionsInfo["OutfileParams"])
    if Writer is None:
        MiscUtil.PrintError("Failed to setup a writer for output fie %s " %
                            Outfile)
    MiscUtil.PrintInfo("Generating file %s...\n" % Outfile)

    # Setup stereo enumeration options...
    StereoOptions = StereoEnumerationOptions(
        tryEmbedding=OptionsInfo["DiscardNonPhysical"],
        onlyUnassigned=OptionsInfo["UnassignedOnly"],
        maxIsomers=OptionsInfo["MaxIsomers"])

    # Process molecules...
    MolCount = 0
    ValidMolCount = 0

    Compute2DCoords = OptionsInfo["OutfileParams"]["Compute2DCoords"]

    for Mol in Mols:
        MolCount += 1

        if Mol is None:
            continue

        if RDKitUtil.IsMolEmpty(Mol):
            MolName = RDKitUtil.GetMolName(Mol, MolCount)
            MiscUtil.PrintWarning("Ignoring empty molecule: %s" % MolName)
            continue

        ValidMolCount += 1

        MolName = RDKitUtil.GetMolName(Mol, MolCount)

        # Generate and process stereoisomers...
        StereoisomersMols = EnumerateStereoisomers(Mol, options=StereoOptions)
        IsomerCount = 0
        for IsomerMol in StereoisomersMols:
            IsomerCount += 1

            # Set isomer mol name...
            IsomerMolName = "%s_Isomer%d" % (MolName, IsomerCount)
            IsomerMol.SetProp("_Name", IsomerMolName)

            if Compute2DCoords:
                AllChem.Compute2DCoords(IsomerMol)

            Writer.write(IsomerMol)

        MiscUtil.PrintInfo("Number of stereoisomers written for %s: %d" %
                           (MolName, IsomerCount))

    if Writer is not None:
        Writer.close()

    MiscUtil.PrintInfo("\nTotal number of molecules: %d" % MolCount)
    MiscUtil.PrintInfo("Number of valid molecules: %d" % ValidMolCount)
    MiscUtil.PrintInfo("Number of ignored molecules: %d" %
                       (MolCount - ValidMolCount))
Пример #21
0
def RemoveInvalidMolecules():
    """Identify and remove invalid molecules."""

    Infile = OptionsInfo["Infile"]
    Outfile = OptionsInfo["Outfile"]
    InvalidMolsOutfile = OptionsInfo["InvalidMolsOutfile"]

    CountMode = OptionsInfo["CountMode"]

    # Setup a molecule reader...
    MiscUtil.PrintInfo("\nProcessing file %s..." % Infile)
    Mols = RDKitUtil.ReadMolecules(Infile, **OptionsInfo["InfileParams"])

    Writer = None
    MolNumWriter = None
    if not CountMode:
        # Set up a molecule writer...
        Writer = RDKitUtil.MoleculesWriter(Outfile,
                                           **OptionsInfo["OutfileParams"])
        if Writer is None:
            MiscUtil.PrintError("Failed to setup a writer for output fie %s " %
                                Outfile)

        # Set up a invalid molecule number writer...
        InvalidMolsWriter = open(InvalidMolsOutfile, "w")
        if InvalidMolsWriter is None:
            MiscUtil.PrintError("Failed to open output fie %s " %
                                InvalidMolsOutfile)
        InvalidMolsWriter.write("MolName\n")

        MiscUtil.PrintInfo("Generating files %s and %s..." %
                           (Outfile, InvalidMolsOutfile))

    # Process molecules...
    MolCount = 0
    ValidMolCount = 0

    Compute2DCoords = OptionsInfo["OutfileParams"]["Compute2DCoords"]
    SetSMILESMolProps = OptionsInfo["OutfileParams"]["SetSMILESMolProps"]

    FirstMol = True
    for Mol in Mols:
        MolCount += 1

        if Mol is None:
            MolName = "Mol%s" % MolCount
            MiscUtil.PrintWarning("Ignoring invalid molecule:  %s" % MolName)
            if not CountMode:
                InvalidMolsWriter.write("%s\n" % MolName)
            continue

        if RDKitUtil.IsMolEmpty(Mol):
            MolName = RDKitUtil.GetMolName(Mol, MolCount)
            MiscUtil.PrintWarning("Ignoring invalid empty molecule: %s" %
                                  MolName)
            if not CountMode:
                InvalidMolsWriter.write("%s\n" % MolName)
            continue
        ValidMolCount += 1

        if FirstMol:
            FirstMol = False
            if not CountMode:
                if SetSMILESMolProps:
                    RDKitUtil.SetWriterMolProps(Writer, Mol)

        if Compute2DCoords:
            if not CountMode:
                AllChem.Compute2DCoords(Mol)

        if not CountMode:
            Writer.write(Mol)

    if Writer is not None:
        Writer.close()

    MiscUtil.PrintInfo("\nTotal number of molecules: %d" % MolCount)
    MiscUtil.PrintInfo("Number of valid molecules: %d" % ValidMolCount)
    MiscUtil.PrintInfo("Number of invalid molecules: %d" %
                       (MolCount - ValidMolCount))
def PerformSearch():
    """Perform search using SMARTS pattern for specified functional groups."""

    Infile = OptionsInfo["Infile"]
    Outfile = OptionsInfo["Outfile"]

    Groups = OptionsInfo["SpecifiedFunctionalGroups"]
    GroupsNegateMatch = OptionsInfo["SpecifiedFunctionalGroupsNegateMatch"]
    GroupsPatternMols = OptionsInfo["SpecifiedFunctionalGroupsPatternMols"]

    GroupsOutfiles = OptionsInfo["SpecifiedFunctionalGroupsOutfiles"]
    GroupsCount = len(Groups)

    CombineMatchResults = OptionsInfo["CombineMatchResults"]
    AndCombineOperatorMode = OptionsInfo["AndCombineOperatorMode"]

    CountMode = OptionsInfo["CountMode"]
    UseChirality = OptionsInfo["UseChirality"]

    # Setup a molecule reader...
    MiscUtil.PrintInfo("\nProcessing file %s..." % Infile)
    Mols = RDKitUtil.ReadMolecules(Infile, **OptionsInfo["InfileParams"])

    # Set up  molecule writers...
    Writer = None
    GroupOutfilesWriters = []
    if not CountMode:
        Writer, GroupOutfilesWriters = SetupMoleculeWriters(
            CombineMatchResults, Outfile, GroupsOutfiles)

    # Initialize pattern mols match count and status...
    GroupsPatternMolsMatchCount = [0] * GroupsCount
    GroupsPatternMolsMatchStatus = [False] * GroupsCount

    # Process molecules...
    MolCount = 0
    ValidMolCount = 0
    MatchCount = 0

    Compute2DCoords = OptionsInfo["OutfileParams"]["Compute2DCoords"]

    for Mol in Mols:
        MolCount += 1

        if Mol is None:
            continue

        if RDKitUtil.IsMolEmpty(Mol):
            MolName = RDKitUtil.GetMolName(Mol, MolCount)
            MiscUtil.PrintWarning("Ignoring empty molecule: %s" % MolName)
            continue

        ValidMolCount += 1

        # Match pattern mols...
        for GroupIndex in range(0, GroupsCount):
            GroupsPatternMolsMatchStatus[GroupIndex] = DoesPatternMolMatch(
                GroupsPatternMols[GroupIndex], Mol, UseChirality,
                GroupsNegateMatch[GroupIndex])
            if GroupsPatternMolsMatchStatus[GroupIndex]:
                GroupsPatternMolsMatchCount[GroupIndex] += 1

        # Match mol against all specified criteria...
        MolMatched = DoesMolMeetSpecifiedMatchCriteria(
            GroupsPatternMolsMatchStatus, CombineMatchResults,
            AndCombineOperatorMode)
        if MolMatched:
            MatchCount += 1

        # Nothing to write...
        if CountMode or (not MolMatched):
            continue

        # Write out matched molecules...
        if Compute2DCoords:
            AllChem.Compute2DCoords(Mol)

        if CombineMatchResults:
            Writer.write(Mol)
        else:
            for GroupIndex in range(0, GroupsCount):
                if GroupsPatternMolsMatchStatus[GroupIndex]:
                    GroupOutfilesWriters[GroupIndex].write(Mol)

    if Writer is not None:
        Writer.close()
    for GroupOutfileWriter in GroupOutfilesWriters:
        GroupOutfileWriter.close()

    MiscUtil.PrintInfo("\nTotal number of molecules: %d" % MolCount)
    MiscUtil.PrintInfo("Number of valid molecules: %d" % ValidMolCount)
    MiscUtil.PrintInfo("Number of ignored molecules: %d" %
                       (MolCount - ValidMolCount))

    MiscUtil.PrintInfo(
        "\nTotal number of molecules matched against specified match criteria: %d"
        % MatchCount)

    MiscUtil.PrintInfo(
        "\nNumber of molecuels matched against individual functional groups:")
    MiscUtil.PrintInfo("FunctionalGroupName,MatchCount")

    for GroupIndex in range(0, GroupsCount):
        GroupName = Groups[GroupIndex]
        if GroupsNegateMatch[GroupIndex]:
            GroupName = '!' + GroupName
        GroupMatchCount = GroupsPatternMolsMatchCount[GroupIndex]
        MiscUtil.PrintInfo("%s,%d" % (GroupName, GroupMatchCount))