def PerformSearch(): """Perform search using specified SMARTS pattern.""" # Set up a pattern molecule... PatternMol = Chem.MolFromSmarts(OptionsInfo["Pattern"]) # Setup a molecule reader... MiscUtil.PrintInfo("\nProcessing file %s..." % OptionsInfo["Infile"]) Mols = RDKitUtil.ReadMolecules(OptionsInfo["Infile"], **OptionsInfo["InfileParams"]) # Set up molecule writers... Writer, WriterFiltered = SetupMoleculeWriters() MolCount, ValidMolCount, RemainingMolCount = ProcessMolecules( Mols, PatternMol, Writer, WriterFiltered) if Writer is not None: Writer.close() if WriterFiltered is not None: WriterFiltered.close() MiscUtil.PrintInfo("\nTotal number of molecules: %d" % MolCount) MiscUtil.PrintInfo("Number of valid molecules: %d" % ValidMolCount) MiscUtil.PrintInfo("Number of ignored molecules: %d" % (MolCount - ValidMolCount)) MiscUtil.PrintInfo("\nNumber of remaining molecules: %d" % RemainingMolCount) MiscUtil.PrintInfo("Number of filtered molecules: %d" % (ValidMolCount - RemainingMolCount))
def PerformConstrainedMinimization(): """Perform constrained minimization.""" # Read and validate reference molecule... RefMol = RetrieveReferenceMolecule() # Setup a molecule reader for input file... MiscUtil.PrintInfo("\nProcessing file %s..." % OptionsInfo["Infile"]) OptionsInfo["InfileParams"]["AllowEmptyMols"] = True Mols = RDKitUtil.ReadMolecules(OptionsInfo["Infile"], **OptionsInfo["InfileParams"]) # Set up a molecule writer... Writer = RDKitUtil.MoleculesWriter(OptionsInfo["Outfile"], **OptionsInfo["OutfileParams"]) if Writer is None: MiscUtil.PrintError("Failed to setup a writer for output fie %s " % OptionsInfo["Outfile"]) MiscUtil.PrintInfo("Generating file %s..." % OptionsInfo["Outfile"]) MolCount, ValidMolCount, CoreScaffoldMissingCount, MinimizationFailedCount = ProcessMolecules(RefMol, Mols, Writer) if Writer is not None: Writer.close() MiscUtil.PrintInfo("\nTotal number of molecules: %d" % MolCount) MiscUtil.PrintInfo("Number of valid molecules: %d" % ValidMolCount) MiscUtil.PrintInfo("Number of molecules with missing core scaffold: %d" % CoreScaffoldMissingCount) MiscUtil.PrintInfo("Number of molecules failed during conformation generation or minimization: %d" % MinimizationFailedCount) MiscUtil.PrintInfo("Number of ignored molecules: %d" % (MolCount - ValidMolCount + CoreScaffoldMissingCount + MinimizationFailedCount))
def PerformFiltering(): """Filter molecules using SMARTS specified in ChEMBL filters file.""" # Setup ChEMBL patterns and pattern mols... MiscUtil.PrintInfo( "\nSetting up ChEMBL pattern molecules for performing substructure search..." ) ChEMBLPatterns = RetrieveChEMBLPatterns() ChEMBLPatternMols = SetupChEMBLPatternMols(ChEMBLPatterns) # Setup a molecule reader... MiscUtil.PrintInfo("\nProcessing file %s..." % OptionsInfo["Infile"]) Mols = RDKitUtil.ReadMolecules(OptionsInfo["Infile"], **OptionsInfo["InfileParams"]) # Set up molecule writers... Writer, WriterFiltered = SetupMoleculeWriters() MolCount, ValidMolCount, RemainingMolCount = ProcessMolecules( Mols, ChEMBLPatternMols, Writer, WriterFiltered) if Writer is not None: Writer.close() if WriterFiltered is not None: WriterFiltered.close() MiscUtil.PrintInfo("\nTotal number of molecules: %d" % MolCount) MiscUtil.PrintInfo("Number of valid molecules: %d" % ValidMolCount) MiscUtil.PrintInfo("Number of ignored molecules: %d" % (MolCount - ValidMolCount)) MiscUtil.PrintInfo("\nNumber of remaining molecules: %d" % RemainingMolCount) MiscUtil.PrintInfo("Number of filtered molecules: %d" % (ValidMolCount - RemainingMolCount))
def GenerateConformers(): """Generate conformers.""" # Setup a molecule reader... MiscUtil.PrintInfo("\nProcessing file %s..." % OptionsInfo["Infile"]) Mols = RDKitUtil.ReadMolecules(OptionsInfo["Infile"], **OptionsInfo["InfileParams"]) # Set up a molecule writer... Writer = RDKitUtil.MoleculesWriter(OptionsInfo["Outfile"], **OptionsInfo["OutfileParams"]) if Writer is None: MiscUtil.PrintError("Failed to setup a writer for output fie %s " % OptionsInfo["Outfile"]) MiscUtil.PrintInfo("Generating file %s..." % OptionsInfo["Outfile"]) MolCount, ValidMolCount, ConfGenFailedCount = ProcessMolecules( Mols, Writer) if Writer is not None: Writer.close() MiscUtil.PrintInfo("\nTotal number of molecules: %d" % MolCount) MiscUtil.PrintInfo("Number of valid molecules: %d" % ValidMolCount) MiscUtil.PrintInfo( "Number of molecules failed during conformation generation or minimization: %d" % ConfGenFailedCount) MiscUtil.PrintInfo("Number of ignored molecules: %d" % (MolCount - ValidMolCount + ConfGenFailedCount))
def CalculateEnergy(): """Calculate single point energy calculation.""" # Setup a molecule reader... MiscUtil.PrintInfo("\nProcessing file %s..." % OptionsInfo["Infile"]) Mols = RDKitUtil.ReadMolecules(OptionsInfo["Infile"], **OptionsInfo["InfileParams"]) # Set up a molecule writer... Writer = RDKitUtil.MoleculesWriter(OptionsInfo["Outfile"], **OptionsInfo["OutfileParams"]) if Writer is None: MiscUtil.PrintError("Failed to setup a writer for output fie %s " % OptionsInfo["Outfile"]) MiscUtil.PrintInfo("Generating file %s..." % OptionsInfo["Outfile"]) MolCount, ValidMolCount, EnergyFailedCount = ProcessMolecules(Mols, Writer) if Writer is not None: Writer.close() MiscUtil.PrintInfo("\nTotal number of molecules: %d" % MolCount) MiscUtil.PrintInfo("Number of valid molecules: %d" % ValidMolCount) MiscUtil.PrintInfo( "Number of molecules failed during energy calculation: %d" % EnergyFailedCount) MiscUtil.PrintInfo("Number of ignored molecules: %d" % (MolCount - ValidMolCount + EnergyFailedCount))
def RetrieveReactantsMolecules(): """Retrieve reactant molecules from each reactant file and return a list containing lists of molecules for each reactant file.""" MiscUtil.PrintInfo("\nProcessing reactant file(s)...") ReactantsMolsList = [] ReactantFilesList = OptionsInfo["ReactantFilesList"] UseReactantNames = OptionsInfo["UseReactantNames"] ReactantCount = 0 for FileIndex in range(0, len(ReactantFilesList)): ReactantCount += 1 ReactantFile = ReactantFilesList[FileIndex] MiscUtil.PrintInfo("\nProcessing reactant file: %s..." % ReactantFile) Mols = RDKitUtil.ReadMolecules(ReactantFile, **OptionsInfo["InfileParams"]) ValidMols = [] MolCount = 0 ValidMolCount = 0 for Mol in Mols: MolCount += 1 if Mol is None: continue if RDKitUtil.IsMolEmpty(Mol): MolName = RDKitUtil.GetMolName(Mol, MolCount) MiscUtil.PrintWarning("Ignoring empty molecule: %s" % MolName) continue ValidMolCount += 1 # Check and set mol name... if UseReactantNames: MolName = RDKitUtil.GetMolName(Mol) if not len(MolName): MolName = "React%dMol%d" % (ReactantCount, MolCount) Mol.SetProp("_Name", MolName) ValidMols.append(Mol) ReactantsMolsList.append(ValidMols) MiscUtil.PrintInfo("Total number of molecules: %d" % MolCount) MiscUtil.PrintInfo("Number of valid molecules: %d" % ValidMolCount) MiscUtil.PrintInfo("Number of ignored molecules: %d" % (MolCount - ValidMolCount)) return ReactantsMolsList
def CalculatePartialCharges(): """Calculate partial atomic charges.""" Infile = OptionsInfo["Infile"] Outfile = OptionsInfo["Outfile"] MiscUtil.PrintInfo("Calculating partial atomic charges...") # Setup a molecule reader... MiscUtil.PrintInfo("\nProcessing file %s..." % Infile) Mols = RDKitUtil.ReadMolecules(Infile, **OptionsInfo["InfileParams"]) # Setup a writer... Compute2DCoords = OptionsInfo["OutfileParams"]["Compute2DCoords"] Writer = RDKitUtil.MoleculesWriter(Outfile, **OptionsInfo["OutfileParams"]) if Writer is None: MiscUtil.PrintError("Failed to setup a writer for output fie %s " % Outfile) MiscUtil.PrintInfo("Generating file %s..." % Outfile) # Process molecules... MolCount, ValidMolCount, CalcFailedCount = [0] * 3 for Mol in Mols: MolCount += 1 if Mol is None: continue if RDKitUtil.IsMolEmpty(Mol): MolName = RDKitUtil.GetMolName(Mol, MolCount) MiscUtil.PrintWarning("Ignoring empty molecule: %s" % MolName) continue ValidMolCount += 1 MolWithHs = Chem.AddHs(Mol) # Retrieve charges... PartialCharges = CalculateMolPartialCharges(MolWithHs, MolCount) if not len(PartialCharges): CalcFailedCount += 1 continue # Write out charges... WriteMolPartialCharges(Writer, MolWithHs, PartialCharges, Compute2DCoords) if Writer is not None: Writer.close() MiscUtil.PrintInfo("\nTotal number of molecules: %d" % MolCount) MiscUtil.PrintInfo("Number of valid molecules: %d" % ValidMolCount) MiscUtil.PrintInfo("Number of molecules failed during calculation of partial charges: %d" % CalcFailedCount) MiscUtil.PrintInfo("Number of ignored molecules: %d" % (MolCount - ValidMolCount + CalcFailedCount))
def ConvertFileFormat(): """Convert between file formats""" Infile = OptionsInfo["Infile"] Outfile = OptionsInfo["Outfile"] # Read molecules... MiscUtil.PrintInfo("\nReading file %s..." % Infile) Mols = RDKitUtil.ReadMolecules(Infile, **OptionsInfo["InfileParams"]) MiscUtil.PrintInfo("Total number of molecules: %d" % len(Mols)) # Write molecules... MiscUtil.PrintInfo("\nGenerating file %s..." % Outfile) MolCount, ProcessedMolCount = RDKitUtil.WriteMolecules(Outfile, Mols, **OptionsInfo["OutfileParams"]) MiscUtil.PrintInfo("Total number of molecules: %d" % MolCount) MiscUtil.PrintInfo("Number of molecules processed: %d" % ProcessedMolCount) MiscUtil.PrintInfo("Number of molecules ignored: %d" % (MolCount - ProcessedMolCount))
def PerformFunctionalGroupsSearch(): """Retrieve functional groups information and perform search.""" # Process functional groups info... ProcessFunctionalGroupsInfo() # Setup pattern mols for functional group SMARTS... GroupsPatternMols = SetupFunctionalGroupsSMARTSPatterns() # Setup a molecule reader... MiscUtil.PrintInfo("\nProcessing file %s..." % OptionsInfo["Infile"]) Mols = RDKitUtil.ReadMolecules(OptionsInfo["Infile"], **OptionsInfo["InfileParams"]) # Set up molecule writers... Writer, GroupOutfilesWriters = SetupMoleculeWriters() MolCount, ValidMolCount, RemainingMolCount, GroupsPatternMatchCountList = ProcessMolecules( Mols, GroupsPatternMols, Writer, GroupOutfilesWriters) if Writer is not None: Writer.close() for GroupOutfileWriter in GroupOutfilesWriters: GroupOutfileWriter.close() MiscUtil.PrintInfo("\nTotal number of molecules: %d" % MolCount) MiscUtil.PrintInfo("Number of valid molecules: %d" % ValidMolCount) MiscUtil.PrintInfo("Number of ignored molecules: %d" % (MolCount - ValidMolCount)) MiscUtil.PrintInfo( "\nTotal number of molecules matched against specified match criteria: %d" % RemainingMolCount) MiscUtil.PrintInfo( "\nNumber of molecuels matched against individual functional groups:") MiscUtil.PrintInfo("FunctionalGroupName,MatchCount") for GroupIndex in range(0, len(OptionsInfo["SpecifiedFunctionalGroups"])): GroupName = OptionsInfo["SpecifiedFunctionalGroups"][GroupIndex] if OptionsInfo["SpecifiedFunctionalGroupsNegateMatch"][GroupIndex]: GroupName = '!' + GroupName GroupMatchCount = GroupsPatternMatchCountList[GroupIndex] MiscUtil.PrintInfo("%s,%d" % (GroupName, GroupMatchCount))
def RemoveSalts(): """Identify and remove salts from molecules""" # Setup a molecule reader... MiscUtil.PrintInfo("\nProcessing file %s..." % OptionsInfo["Infile"]) Mols = RDKitUtil.ReadMolecules(OptionsInfo["Infile"], **OptionsInfo["InfileParams"]) # Set up a molecule writer... Writer = SetupMoleculeWriter() MolCount, ValidMolCount, SaltsMolCount = ProcessMolecules(Mols, Writer) if Writer is not None: Writer.close() MiscUtil.PrintInfo("\nTotal number of molecules: %d" % MolCount) MiscUtil.PrintInfo("Number of valid molecules: %d" % ValidMolCount) MiscUtil.PrintInfo("Number of ignored molecules: %d" % (MolCount - ValidMolCount)) MiscUtil.PrintInfo("\nNumber of molecules coontaining salts: %d" % (SaltsMolCount))
def PerformCalculations(): """Calculate descriptors for a specified list of descriptors.""" # Setup a molecule reader... MiscUtil.PrintInfo("\nProcessing file %s..." % OptionsInfo["Infile"]) Mols = RDKitUtil.ReadMolecules(OptionsInfo["Infile"], **OptionsInfo["InfileParams"]) # Setup a writer... Writer = SetupMoleculeWriter() # Process molecules... MolCount, ValidMolCount = ProcessMolecules(Mols, Writer) if Writer is not None: Writer.close() MiscUtil.PrintInfo("\nTotal number of molecules: %d" % MolCount) MiscUtil.PrintInfo("Number of valid molecules: %d" % ValidMolCount) MiscUtil.PrintInfo("Number of ignored molecules: %d" % (MolCount - ValidMolCount))
def CalculatePartialCharges(): """Calculate partial atomic charges.""" # Setup a molecule reader... MiscUtil.PrintInfo("\nProcessing file %s..." % OptionsInfo["Infile"]) Mols = RDKitUtil.ReadMolecules(OptionsInfo["Infile"], **OptionsInfo["InfileParams"]) # Set up a molecule writer... Writer = SetupMoleculeWriter() MolCount, ValidMolCount, CalcFailedCount = ProcessMolecules(Mols, Writer) if Writer is not None: Writer.close() MiscUtil.PrintInfo("\nTotal number of molecules: %d" % MolCount) MiscUtil.PrintInfo("Number of valid molecules: %d" % ValidMolCount) MiscUtil.PrintInfo( "Number of molecules failed during calculation of partial charges: %d" % CalcFailedCount) MiscUtil.PrintInfo("Number of ignored molecules: %d" % (MolCount - ValidMolCount + CalcFailedCount))
def GenerateMolecularFrameworks(): """Generate Bemis Murcko molecular framworks.""" Infile = OptionsInfo["Infile"] Outfile = OptionsInfo["Outfile"] UseChirality = OptionsInfo["UseChirality"] RemoveDuplicateFrameworks = OptionsInfo["RemoveDuplicateFrameworks"] UseGraphFrameworks = OptionsInfo["UseGraphFrameworks"] SortFrameworks = OptionsInfo["SortFrameworks"] if SortFrameworks: FrameworkMolIDs = [] FrameworkMolIDToMolMap = {} FrameworkMolIDToAtomCountMap = {} DuplicateFrameworkMolIDs = [] DuplicateFrameworkMolIDToMolMap = {} DuplicateFrameworkMolIDToAtomCountMap = {} DuplicatesOutfile = "" if RemoveDuplicateFrameworks: DuplicatesOutfile = OptionsInfo["DuplicatesOutfile"] # Setup a molecule reader... MiscUtil.PrintInfo("\nProcessing file %s..." % Infile) Mols = RDKitUtil.ReadMolecules(Infile, **OptionsInfo["InfileParams"]) # Set up a molecular framework writer... Writer = RDKitUtil.MoleculesWriter(Outfile, **OptionsInfo["OutfileParams"]) if Writer is None: MiscUtil.PrintError("Failed to setup a writer for output fie %s " % Outfile) # Set up a duplicate molecular framework writer... if RemoveDuplicateFrameworks: DuplicatesWriter = RDKitUtil.MoleculesWriter(DuplicatesOutfile, **OptionsInfo["OutfileParams"]) if Writer is None: MiscUtil.PrintError("Failed to setup a writer for duplicates output fie %s " % DuplicatesOutfile) if RemoveDuplicateFrameworks: MiscUtil.PrintInfo("Generating files: %s and %s..." % (Outfile, DuplicatesOutfile)) else: MiscUtil.PrintInfo("Generating file %s..." % Outfile) # Process molecules... MolCount = 0 ValidMolCount = 0 FrameworksCount = 0 UniqueFrameworksCount = 0 DuplicateFrameworksCount = 0 CanonicalSMILESMap = {} Compute2DCoords = OptionsInfo["OutfileParams"]["Compute2DCoords"] for Mol in Mols: MolCount += 1 if Mol is None: continue if RDKitUtil.IsMolEmpty(Mol): MolName = RDKitUtil.GetMolName(Mol, MolCount) MiscUtil.PrintWarning("Ignoring empty molecule: %s" % MolName) continue ValidMolCount += 1 if UseGraphFrameworks: FrameworksMol = MurckoScaffold.MakeScaffoldGeneric(Mol) else: FrameworksMol = MurckoScaffold.GetScaffoldForMol(Mol) if Compute2DCoords: AllChem.Compute2DCoords(FrameworksMol) if SortFrameworks: HeavyAtomCount = FrameworksMol.GetNumHeavyAtoms() FrameworksCount += 1 if RemoveDuplicateFrameworks: CanonicalSMILES = Chem.MolToSmiles(FrameworksMol, isomericSmiles = UseChirality, canonical = True) if CanonicalSMILES in CanonicalSMILESMap: DuplicateFrameworksCount += 1 if SortFrameworks: # Track duplicate frameworks... DuplicateFrameworkMolIDs.append(DuplicateFrameworksCount) DuplicateFrameworkMolIDToMolMap[DuplicateFrameworksCount] = FrameworksMol DuplicateFrameworkMolIDToAtomCountMap[DuplicateFrameworksCount] = HeavyAtomCount else: # Write it out... DuplicatesWriter.write(FrameworksMol) else: UniqueFrameworksCount += 1 CanonicalSMILESMap[CanonicalSMILES] = CanonicalSMILES if SortFrameworks: # Track unique frameworks... FrameworkMolIDs.append(UniqueFrameworksCount) FrameworkMolIDToMolMap[UniqueFrameworksCount] = FrameworksMol FrameworkMolIDToAtomCountMap[UniqueFrameworksCount] = HeavyAtomCount else: # Write it out... Writer.write(FrameworksMol) elif SortFrameworks: # Track for sorting... FrameworkMolIDs.append(FrameworksCount) FrameworkMolIDToMolMap[FrameworksCount] = FrameworksMol FrameworkMolIDToAtomCountMap[FrameworksCount] = HeavyAtomCount else: # Write it out... Writer.write(FrameworksMol) if SortFrameworks: ReverseOrder = OptionsInfo["DescendingSortOrder"] SortAndWriteFrameworks(Writer, FrameworkMolIDs, FrameworkMolIDToMolMap, FrameworkMolIDToAtomCountMap, ReverseOrder) if RemoveDuplicateFrameworks: SortAndWriteFrameworks(DuplicatesWriter, DuplicateFrameworkMolIDs, DuplicateFrameworkMolIDToMolMap, DuplicateFrameworkMolIDToAtomCountMap, ReverseOrder) Writer.close() if RemoveDuplicateFrameworks: DuplicatesWriter.close() MiscUtil.PrintInfo("\nTotal number of molecular frameworks: %d" % FrameworksCount) if RemoveDuplicateFrameworks: MiscUtil.PrintInfo("Number of unique molecular frameworks: %d" % UniqueFrameworksCount) MiscUtil.PrintInfo("Number of duplicate molecular frameworks: %d" % DuplicateFrameworksCount) MiscUtil.PrintInfo("\nTotal number of molecules: %d" % MolCount) MiscUtil.PrintInfo("Number of valid molecules: %d" % ValidMolCount) MiscUtil.PrintInfo("Number of ignored molecules: %d" % (MolCount - ValidMolCount))
def RemoveSalts(): """Identify and remove salts from molecules""" Infile = OptionsInfo["Infile"] Outfile = OptionsInfo["Outfile"] CountMode = OptionsInfo["CountMode"] # Setup a molecule reader... MiscUtil.PrintInfo("\nProcessing file %s..." % Infile) Mols = RDKitUtil.ReadMolecules(Infile, **OptionsInfo["InfileParams"]) # Set up a molecule writer... Writer = None DuplicatesWriter = None if not CountMode: Writer = RDKitUtil.MoleculesWriter(Outfile, **OptionsInfo["OutfileParams"]) if Writer is None: MiscUtil.PrintError("Failed to setup a writer for output fie %s " % Outfile) MiscUtil.PrintInfo("Generating file %s..." % (Outfile)) # Set up a salt remover... SaltsByComponentsMode = OptionsInfo["SaltsByComponentsMode"] Remover = None if not SaltsByComponentsMode: Remover = SaltRemover(defnFilename=OptionsInfo["SaltsFile"], defnData=OptionsInfo["SaltsSMARTS"], defnFormat=InputFormat.SMARTS) # Process molecules... MolCount = 0 ValidMolCount = 0 SaltsMolCount = 0 Compute2DCoords = OptionsInfo["OutfileParams"]["Compute2DCoords"] for Mol in Mols: MolCount += 1 if Mol is None: continue if RDKitUtil.IsMolEmpty(Mol): MolName = RDKitUtil.GetMolName(Mol, MolCount) MiscUtil.PrintWarning("Ignoring empty molecule: %s" % MolName) continue ValidMolCount += 1 UnsaltedMol, SaltyStatus = RemoveMolSalts(Mol, Remover, MolCount) if SaltyStatus: SaltsMolCount += 1 if not CountMode: if Compute2DCoords: AllChem.Compute2DCoords(UnsaltedMol) Writer.write(UnsaltedMol) if Writer is not None: Writer.close() if DuplicatesWriter is not None: DuplicatesWriter.close() MiscUtil.PrintInfo("\nTotal number of molecules: %d" % MolCount) MiscUtil.PrintInfo("Number of valid molecules: %d" % ValidMolCount) MiscUtil.PrintInfo("Number of ignored molecules: %d" % (MolCount - ValidMolCount)) MiscUtil.PrintInfo("\nNumber of molecules coontaining salts: %d" % (SaltsMolCount))
def PerformFiltering(): """Filter molecules using SMARTS specified in PAINS filter file.""" Infile = OptionsInfo["Infile"] Outfile = OptionsInfo["Outfile"] CountMode = OptionsInfo["CountMode"] NegateMatch = OptionsInfo["NegateMatch"] PAINSMode = OptionsInfo["PAINSMode"] # Setup PAINS patterns and pattern mols... PAINSPatterns = RetrievePAINSPatterns(PAINSMode) PAINSPatternMols = SetupPAINSPatternMols(PAINSPatterns) # Setup a molecule reader... MiscUtil.PrintInfo("\nProcessing file %s..." % Infile) Mols = RDKitUtil.ReadMolecules(Infile, **OptionsInfo["InfileParams"]) # Set up a molecule writer... Writer = None if not CountMode: Writer = RDKitUtil.MoleculesWriter(Outfile, **OptionsInfo["OutfileParams"]) if Writer is None: MiscUtil.PrintError("Failed to setup a writer for output fie %s " % Outfile) MiscUtil.PrintInfo("Generating file %s..." % Outfile) Compute2DCoords = OptionsInfo["OutfileParams"]["Compute2DCoords"] # Process molecules... MolCount = 0 ValidMolCount = 0 FilteredCount = 0 for Mol in Mols: MolCount += 1 if Mol is None: continue if RDKitUtil.IsMolEmpty(Mol): MolName = RDKitUtil.GetMolName(Mol, MolCount) MiscUtil.PrintWarning("Ignoring empty molecule: %s" % MolName) continue ValidMolCount += 1 MolMatched = DoesMoleculeContainsPAINSPattern(Mol, PAINSPatternMols) if MolMatched == NegateMatch: FilteredCount += 1 if not CountMode: if Compute2DCoords: AllChem.Compute2DCoords(Mol) Writer.write(Mol) if Writer is not None: Writer.close() MiscUtil.PrintInfo("\nTotal number of molecules: %d" % MolCount) MiscUtil.PrintInfo("Number of valid molecules: %d" % ValidMolCount) MiscUtil.PrintInfo("Number of ignored molecules: %d" % (MolCount - ValidMolCount)) MiscUtil.PrintInfo("\nTotal number of filtered molecules: %d" % FilteredCount)
def PerformCalculations(): """Calculate descriptors for a specified list of descriptors.""" Infile = OptionsInfo["Infile"] Outfile = OptionsInfo["Outfile"] DescriptorNames = OptionsInfo["SpecifiedDescriptorNames"] DescriptorCount = len(DescriptorNames) TextOutFileMode = OptionsInfo["TextOutFileMode"] TextOutFileDelim = OptionsInfo["TextOutFileDelim"] SMILESOut = OptionsInfo["SMILESOut"] Precision = OptionsInfo["Precision"] MiscUtil.PrintInfo("Calculating %d molecular descriptor(s) for each molecule..." % DescriptorCount) # Setup a molecule reader... MiscUtil.PrintInfo("\nProcessing file %s..." % Infile) Mols = RDKitUtil.ReadMolecules(Infile, **OptionsInfo["InfileParams"]) # Setup a writer... Compute2DCoords = OptionsInfo["OutfileParams"]["Compute2DCoords"] if TextOutFileMode: Writer = open(Outfile, "w") else: Writer = RDKitUtil.MoleculesWriter(Outfile, **OptionsInfo["OutfileParams"]) if Writer is None: MiscUtil.PrintError("Failed to setup a writer for output fie %s " % Outfile) MiscUtil.PrintInfo("Generating file %s..." % Outfile) # Wite out headers for a text file... if TextOutFileMode: LineWords = [] if SMILESOut: LineWords.append("SMILES") LineWords.append("MolID") LineWords.extend(DescriptorNames) Line = TextOutFileDelim.join(LineWords) Writer.write("%s\n" % Line) # Process molecules... MolCount = 0 ValidMolCount = 0 MatchCount = 0 for Mol in Mols: MolCount += 1 if Mol is None: continue if RDKitUtil.IsMolEmpty(Mol): MolName = RDKitUtil.GetMolName(Mol, MolCount) MiscUtil.PrintWarning("Ignoring empty molecule: %s" % MolName) continue ValidMolCount += 1 # Calculate descriptors... CalculatedValues = [] for Index in range(0, DescriptorCount): Name = DescriptorNames[Index] ComputeFunction = DescriptorNamesMap["ComputeFunction"][Name] Value = FormatCalculatedValue(ComputeFunction(Mol), Precision) CalculatedValues.append(Value) # Write out calculated values... if TextOutFileMode: LineWords = [] if SMILESOut: SMILES = Chem.MolToSmiles(Mol, isomericSmiles = True, canonical = True) LineWords.append(SMILES) MolName = RDKitUtil.GetMolName(Mol, MolCount) LineWords.append(MolName) LineWords.extend(CalculatedValues) Line = TextOutFileDelim.join(LineWords) Writer.write("%s\n" % Line) else: for Index in range(0, DescriptorCount): Name = DescriptorNames[Index] Value = CalculatedValues[Index] Mol.SetProp(Name, Value) if Compute2DCoords: AllChem.Compute2DCoords(Mol) Writer.write(Mol) if Writer is not None: Writer.close() MiscUtil.PrintInfo("\nTotal number of molecules: %d" % MolCount) MiscUtil.PrintInfo("Number of valid molecules: %d" % ValidMolCount) MiscUtil.PrintInfo("Number of ignored molecules: %d" % (MolCount - ValidMolCount))
def GenerateConformers(): """Generate conformers.""" Infile = OptionsInfo["Infile"] Outfile = OptionsInfo["Outfile"] # Setup a molecule reader... MiscUtil.PrintInfo("\nProcessing file %s..." % Infile) Mols = RDKitUtil.ReadMolecules(Infile, **OptionsInfo["InfileParams"]) # Set up a molecule writer... Writer = None Writer = RDKitUtil.MoleculesWriter(Outfile, **OptionsInfo["OutfileParams"]) if Writer is None: MiscUtil.PrintError("Failed to setup a writer for output fie %s " % Outfile) MiscUtil.PrintInfo("Generating file %s...\n" % Outfile) # Process molecules... MolCount = 0 ValidMolCount = 0 ConfGenFailedCount = 0 SkipMinimization = OptionsInfo["SkipForceFieldMinimization"] if SkipMinimization: MiscUtil.PrintInfo( "Generating conformers without performing energy minimization...\n" ) else: MiscUtil.PrintInfo( "Generating conformers and performing energy minimization...\n") for Mol in Mols: MolCount += 1 if Mol is None: continue if RDKitUtil.IsMolEmpty(Mol): MolName = RDKitUtil.GetMolName(Mol, MolCount) MiscUtil.PrintWarning("Ignoring empty molecule: %s" % MolName) continue ValidMolCount += 1 if SkipMinimization: Status = GenerateMolConformers(Mol, MolCount, Writer) else: Status = GenerateAndMinimizeMolConformers(Mol, MolCount, Writer) if not Status: ConfGenFailedCount += 1 if Writer is not None: Writer.close() MiscUtil.PrintInfo("\nTotal number of molecules: %d" % MolCount) MiscUtil.PrintInfo("Number of valid molecules: %d" % ValidMolCount) MiscUtil.PrintInfo( "Number of molecules failed during conformation generation or minimization: %d" % ConfGenFailedCount) MiscUtil.PrintInfo("Number of ignored molecules: %d" % (MolCount - ValidMolCount + ConfGenFailedCount))
def RemoveDuplicates(): """Identify and remove duplicate molecules based on canonical SMILES""" Infile = OptionsInfo["Infile"] Outfile = OptionsInfo["Outfile"] DuplicatesOutfile = OptionsInfo["DuplicatesOutfile"] CountMode = OptionsInfo["CountMode"] UseChirality = OptionsInfo["UseChirality"] # Setup a molecule reader... MiscUtil.PrintInfo("\nProcessing file %s..." % Infile) Mols = RDKitUtil.ReadMolecules(Infile, **OptionsInfo["InfileParams"]) # Set up a molecule writer... Writer = None DuplicatesWriter = None if not CountMode: Writer = RDKitUtil.MoleculesWriter(Outfile, **OptionsInfo["OutfileParams"]) if Writer is None: MiscUtil.PrintError("Failed to setup a writer for output fie %s " % Outfile) DuplicatesWriter = RDKitUtil.MoleculesWriter(DuplicatesOutfile, **OptionsInfo["OutfileParams"]) if DuplicatesWriter is None: MiscUtil.PrintError("Failed to setup a writer for output fie %s " % DuplicatesOutfile) MiscUtil.PrintInfo("Generating files %s and %s..." % (Outfile, DuplicatesOutfile)) # Process molecules... MolCount = 0 ValidMolCount = 0 UniqueMolCount = 0 DuplicateMolCount = 0 CanonicalSMILESMap = {} Compute2DCoords = OptionsInfo["OutfileParams"]["Compute2DCoords"] for Mol in Mols: MolCount += 1 if Mol is None: continue if RDKitUtil.IsMolEmpty(Mol): MolName = RDKitUtil.GetMolName(Mol, MolCount) MiscUtil.PrintWarning("Ignoring empty molecule: %s" % MolName) continue ValidMolCount += 1 CanonicalSMILES = Chem.MolToSmiles(Mol, isomericSmiles = UseChirality, canonical = True) if Compute2DCoords: if not CountMode: AllChem.Compute2DCoords(Mol) if CanonicalSMILES in CanonicalSMILESMap: DuplicateMolCount += 1 if not CountMode: DuplicatesWriter.write(Mol) else: UniqueMolCount += 1 CanonicalSMILESMap[CanonicalSMILES] = CanonicalSMILES if not CountMode: Writer.write(Mol) if Writer is not None: Writer.close() if DuplicatesWriter is not None: DuplicatesWriter.close() MiscUtil.PrintInfo("\nTotal number of molecules: %d" % MolCount) MiscUtil.PrintInfo("Number of valid molecules: %d" % ValidMolCount) MiscUtil.PrintInfo("Number of ignored molecules: %d" % (MolCount - ValidMolCount)) MiscUtil.PrintInfo("\nTotal number of unique molecules: %d" % UniqueMolCount) MiscUtil.PrintInfo("Total number of duplicate molecules: %d" % DuplicateMolCount)
def PerformSearch(): """Perform search using specified SMARTS pattern.""" Infile = OptionsInfo["Infile"] Outfile = OptionsInfo["Outfile"] CountMode = OptionsInfo["CountMode"] NegateMatch = OptionsInfo["NegateMatch"] UseChirality = OptionsInfo["UseChirality"] # Set up a pattern molecule... PatternMol = Chem.MolFromSmarts(OptionsInfo["Pattern"]) # Setup a molecule reader... MiscUtil.PrintInfo("\nProcessing file %s..." % Infile) Mols = RDKitUtil.ReadMolecules(Infile, **OptionsInfo["InfileParams"]) # Set up a molecule writer... Writer = None if not CountMode: Writer = RDKitUtil.MoleculesWriter(Outfile, **OptionsInfo["OutfileParams"]) if Writer is None: MiscUtil.PrintError("Failed to setup a writer for output fie %s " % Outfile) MiscUtil.PrintInfo("Generating file %s..." % Outfile) # Process molecules... MolCount = 0 ValidMolCount = 0 MatchCount = 0 Compute2DCoords = OptionsInfo["OutfileParams"]["Compute2DCoords"] for Mol in Mols: MolCount += 1 if Mol is None: continue if RDKitUtil.IsMolEmpty(Mol): MolName = RDKitUtil.GetMolName(Mol, MolCount) MiscUtil.PrintWarning("Ignoring empty molecule: %s" % MolName) continue ValidMolCount += 1 MolMatched = Mol.HasSubstructMatch(PatternMol, useChirality = UseChirality) if MolMatched != NegateMatch: MatchCount += 1 if not CountMode: if Compute2DCoords: AllChem.Compute2DCoords(Mol) Writer.write(Mol) if Writer is not None: Writer.close() MiscUtil.PrintInfo("\nTotal number of molecules: %d" % MolCount) MiscUtil.PrintInfo("Number of valid molecules: %d" % ValidMolCount) MiscUtil.PrintInfo("Number of ignored molecules: %d" % (MolCount - ValidMolCount)) if NegateMatch: MiscUtil.PrintInfo("\nTotal number of not matched molecules: %d" % MatchCount) else: MiscUtil.PrintInfo("\nTotal number of matched molecules: %d" % MatchCount)
def PerformEnumeration(): """Enumerate stereoisomers.""" Infile = OptionsInfo["Infile"] Outfile = OptionsInfo["Outfile"] # Setup a molecule reader... MiscUtil.PrintInfo("\nProcessing file %s..." % Infile) Mols = RDKitUtil.ReadMolecules(Infile, **OptionsInfo["InfileParams"]) # Set up a molecule writer... Writer = None Writer = RDKitUtil.MoleculesWriter(Outfile, **OptionsInfo["OutfileParams"]) if Writer is None: MiscUtil.PrintError("Failed to setup a writer for output fie %s " % Outfile) MiscUtil.PrintInfo("Generating file %s...\n" % Outfile) # Setup stereo enumeration options... StereoOptions = StereoEnumerationOptions( tryEmbedding=OptionsInfo["DiscardNonPhysical"], onlyUnassigned=OptionsInfo["UnassignedOnly"], maxIsomers=OptionsInfo["MaxIsomers"]) # Process molecules... MolCount = 0 ValidMolCount = 0 Compute2DCoords = OptionsInfo["OutfileParams"]["Compute2DCoords"] for Mol in Mols: MolCount += 1 if Mol is None: continue if RDKitUtil.IsMolEmpty(Mol): MolName = RDKitUtil.GetMolName(Mol, MolCount) MiscUtil.PrintWarning("Ignoring empty molecule: %s" % MolName) continue ValidMolCount += 1 MolName = RDKitUtil.GetMolName(Mol, MolCount) # Generate and process stereoisomers... StereoisomersMols = EnumerateStereoisomers(Mol, options=StereoOptions) IsomerCount = 0 for IsomerMol in StereoisomersMols: IsomerCount += 1 # Set isomer mol name... IsomerMolName = "%s_Isomer%d" % (MolName, IsomerCount) IsomerMol.SetProp("_Name", IsomerMolName) if Compute2DCoords: AllChem.Compute2DCoords(IsomerMol) Writer.write(IsomerMol) MiscUtil.PrintInfo("Number of stereoisomers written for %s: %d" % (MolName, IsomerCount)) if Writer is not None: Writer.close() MiscUtil.PrintInfo("\nTotal number of molecules: %d" % MolCount) MiscUtil.PrintInfo("Number of valid molecules: %d" % ValidMolCount) MiscUtil.PrintInfo("Number of ignored molecules: %d" % (MolCount - ValidMolCount))
def RemoveInvalidMolecules(): """Identify and remove invalid molecules.""" Infile = OptionsInfo["Infile"] Outfile = OptionsInfo["Outfile"] InvalidMolsOutfile = OptionsInfo["InvalidMolsOutfile"] CountMode = OptionsInfo["CountMode"] # Setup a molecule reader... MiscUtil.PrintInfo("\nProcessing file %s..." % Infile) Mols = RDKitUtil.ReadMolecules(Infile, **OptionsInfo["InfileParams"]) Writer = None MolNumWriter = None if not CountMode: # Set up a molecule writer... Writer = RDKitUtil.MoleculesWriter(Outfile, **OptionsInfo["OutfileParams"]) if Writer is None: MiscUtil.PrintError("Failed to setup a writer for output fie %s " % Outfile) # Set up a invalid molecule number writer... InvalidMolsWriter = open(InvalidMolsOutfile, "w") if InvalidMolsWriter is None: MiscUtil.PrintError("Failed to open output fie %s " % InvalidMolsOutfile) InvalidMolsWriter.write("MolName\n") MiscUtil.PrintInfo("Generating files %s and %s..." % (Outfile, InvalidMolsOutfile)) # Process molecules... MolCount = 0 ValidMolCount = 0 Compute2DCoords = OptionsInfo["OutfileParams"]["Compute2DCoords"] SetSMILESMolProps = OptionsInfo["OutfileParams"]["SetSMILESMolProps"] FirstMol = True for Mol in Mols: MolCount += 1 if Mol is None: MolName = "Mol%s" % MolCount MiscUtil.PrintWarning("Ignoring invalid molecule: %s" % MolName) if not CountMode: InvalidMolsWriter.write("%s\n" % MolName) continue if RDKitUtil.IsMolEmpty(Mol): MolName = RDKitUtil.GetMolName(Mol, MolCount) MiscUtil.PrintWarning("Ignoring invalid empty molecule: %s" % MolName) if not CountMode: InvalidMolsWriter.write("%s\n" % MolName) continue ValidMolCount += 1 if FirstMol: FirstMol = False if not CountMode: if SetSMILESMolProps: RDKitUtil.SetWriterMolProps(Writer, Mol) if Compute2DCoords: if not CountMode: AllChem.Compute2DCoords(Mol) if not CountMode: Writer.write(Mol) if Writer is not None: Writer.close() MiscUtil.PrintInfo("\nTotal number of molecules: %d" % MolCount) MiscUtil.PrintInfo("Number of valid molecules: %d" % ValidMolCount) MiscUtil.PrintInfo("Number of invalid molecules: %d" % (MolCount - ValidMolCount))
def PerformSearch(): """Perform search using SMARTS pattern for specified functional groups.""" Infile = OptionsInfo["Infile"] Outfile = OptionsInfo["Outfile"] Groups = OptionsInfo["SpecifiedFunctionalGroups"] GroupsNegateMatch = OptionsInfo["SpecifiedFunctionalGroupsNegateMatch"] GroupsPatternMols = OptionsInfo["SpecifiedFunctionalGroupsPatternMols"] GroupsOutfiles = OptionsInfo["SpecifiedFunctionalGroupsOutfiles"] GroupsCount = len(Groups) CombineMatchResults = OptionsInfo["CombineMatchResults"] AndCombineOperatorMode = OptionsInfo["AndCombineOperatorMode"] CountMode = OptionsInfo["CountMode"] UseChirality = OptionsInfo["UseChirality"] # Setup a molecule reader... MiscUtil.PrintInfo("\nProcessing file %s..." % Infile) Mols = RDKitUtil.ReadMolecules(Infile, **OptionsInfo["InfileParams"]) # Set up molecule writers... Writer = None GroupOutfilesWriters = [] if not CountMode: Writer, GroupOutfilesWriters = SetupMoleculeWriters( CombineMatchResults, Outfile, GroupsOutfiles) # Initialize pattern mols match count and status... GroupsPatternMolsMatchCount = [0] * GroupsCount GroupsPatternMolsMatchStatus = [False] * GroupsCount # Process molecules... MolCount = 0 ValidMolCount = 0 MatchCount = 0 Compute2DCoords = OptionsInfo["OutfileParams"]["Compute2DCoords"] for Mol in Mols: MolCount += 1 if Mol is None: continue if RDKitUtil.IsMolEmpty(Mol): MolName = RDKitUtil.GetMolName(Mol, MolCount) MiscUtil.PrintWarning("Ignoring empty molecule: %s" % MolName) continue ValidMolCount += 1 # Match pattern mols... for GroupIndex in range(0, GroupsCount): GroupsPatternMolsMatchStatus[GroupIndex] = DoesPatternMolMatch( GroupsPatternMols[GroupIndex], Mol, UseChirality, GroupsNegateMatch[GroupIndex]) if GroupsPatternMolsMatchStatus[GroupIndex]: GroupsPatternMolsMatchCount[GroupIndex] += 1 # Match mol against all specified criteria... MolMatched = DoesMolMeetSpecifiedMatchCriteria( GroupsPatternMolsMatchStatus, CombineMatchResults, AndCombineOperatorMode) if MolMatched: MatchCount += 1 # Nothing to write... if CountMode or (not MolMatched): continue # Write out matched molecules... if Compute2DCoords: AllChem.Compute2DCoords(Mol) if CombineMatchResults: Writer.write(Mol) else: for GroupIndex in range(0, GroupsCount): if GroupsPatternMolsMatchStatus[GroupIndex]: GroupOutfilesWriters[GroupIndex].write(Mol) if Writer is not None: Writer.close() for GroupOutfileWriter in GroupOutfilesWriters: GroupOutfileWriter.close() MiscUtil.PrintInfo("\nTotal number of molecules: %d" % MolCount) MiscUtil.PrintInfo("Number of valid molecules: %d" % ValidMolCount) MiscUtil.PrintInfo("Number of ignored molecules: %d" % (MolCount - ValidMolCount)) MiscUtil.PrintInfo( "\nTotal number of molecules matched against specified match criteria: %d" % MatchCount) MiscUtil.PrintInfo( "\nNumber of molecuels matched against individual functional groups:") MiscUtil.PrintInfo("FunctionalGroupName,MatchCount") for GroupIndex in range(0, GroupsCount): GroupName = Groups[GroupIndex] if GroupsNegateMatch[GroupIndex]: GroupName = '!' + GroupName GroupMatchCount = GroupsPatternMolsMatchCount[GroupIndex] MiscUtil.PrintInfo("%s,%d" % (GroupName, GroupMatchCount))