def PerformOneToOneAlignment(ValidRefMols, ValidProbeMols, Writer): """Perform pairwise alignment""" ValidRefMolCount = len(ValidRefMols) ValidProbeMolCount = len(ValidProbeMols) MolCount = ValidRefMolCount if ValidRefMolCount > ValidProbeMolCount: MolCount = ValidProbeMolCount if ValidRefMolCount != ValidProbeMolCount: MiscUtil.PrintWarning( "Number of valid reference molecules, %d, is not equal to number of valid probe molecules, %d .\n" % (ValidRefMolCount, ValidProbeMolCount)) MiscUtil.PrintWarning( "Pairwise alignment will be performed only for first %s molecules.\n" % (MolCount)) # Process molecules... AlignmentFailedCount = 0 for MolIndex in range(0, MolCount): RefMol = ValidRefMols[MolIndex] ProbeMol = ValidProbeMols[MolIndex] RefMolName = RDKitUtil.GetMolName(RefMol, (MolIndex + 1)) ProbeMolName = RDKitUtil.GetMolName(ProbeMol, (MolIndex + 1)) Status = PerformAlignmentAndWrieOutput(RefMol, ProbeMol, RefMolName, ProbeMolName, Writer) if not Status: AlignmentFailedCount += 1 return AlignmentFailedCount
def CalculateOneToOneRMSDValues(OutFH, OutDelim): """Calculate pairwise RMSD values.""" RefFilesCount = len(OptionsInfo["RefFilesNames"]) ProbeFilesCount = len(OptionsInfo["ProbeFilesNames"]) FilesCount = ProbeFilesCount if RefFilesCount > ProbeFilesCount else RefFilesCount if RefFilesCount != ProbeFilesCount: MiscUtil.PrintWarning( "Number of reference files, %d, is not equal to number of probe files, %d .\n" % (RefFilesCount, ProbeFilesCount)) MiscUtil.PrintWarning( "Pairwise RMSD will be calculated only for first %s files.\n" % (FilesCount)) # Process files... for FileIndex in range(0, FilesCount): RefFileIndex = FileIndex ProbeFileIndex = FileIndex LoadRefFile(RefFileIndex) LoadProbeFile(ProbeFileIndex) RMSD = CalculateRMSDValue(RefFileIndex, ProbeFileIndex) RefID = OptionsInfo["RefFilesInfo"]["FilesRoots"][RefFileIndex] ProbeID = OptionsInfo["ProbeFilesInfo"]["FilesRoots"][ProbeFileIndex] Line = "%s%s%s%s%s\n" % (RefID, OutDelim, ProbeID, OutDelim, RMSD) OutFH.write(Line) DeleteRefObject(RefFileIndex) DeleteProbeObject(ProbeFileIndex)
def ProcessMoleculesUsingSingleProcess(Mols, Writer): """Process and calculate energy of molecules using a single process.""" MiscUtil.PrintInfo("\nCalculating energy...") (MolCount, ValidMolCount, EnergyFailedCount) = [0] * 3 for Mol in Mols: MolCount += 1 if Mol is None: continue if RDKitUtil.IsMolEmpty(Mol): if not OptionsInfo["QuietMode"]: MolName = RDKitUtil.GetMolName(Mol, MolCount) MiscUtil.PrintWarning("Ignoring empty molecule: %s" % MolName) continue ValidMolCount += 1 CalcStatus, Energy = CalculateMoleculeEnergy(Mol, MolCount) if CalcStatus: Energy = "%.2f" % Energy else: if not OptionsInfo["QuietMode"]: MolName = RDKitUtil.GetMolName(Mol, MolCount) MiscUtil.PrintWarning("Failed to calculate energy for molecule %s" % MolName) EnergyFailedCount += 1 continue WriteMolecule(Writer, Mol, Energy) return (MolCount, ValidMolCount, EnergyFailedCount)
def CalculateOneToOneRMSDValues(ValidRefMols, ValidProbeMols, OutFH, OutDelim): """Calculate pairwise RMSD values.""" ValidRefMolCount = len(ValidRefMols) ValidProbeMolCount = len(ValidProbeMols) MolCount = ValidRefMolCount if ValidRefMolCount > ValidProbeMolCount: MolCount = ValidProbeMolCount if ValidRefMolCount != ValidProbeMolCount: MiscUtil.PrintWarning("Number of valid reference molecules, %d, is not equal to number of valid probe molecules, %d .\n" % (ValidRefMolCount, ValidProbeMolCount)) MiscUtil.PrintWarning("Pairwise RMSD will be calculated only for first %s molecules.\n" % (MolCount)) # Process molecules... for MolIndex in range(0, MolCount): RefMol = ValidRefMols[MolIndex] ProbeMol = ValidProbeMols[MolIndex] RefMolName = RDKitUtil.GetMolName(RefMol, (MolIndex + 1)) ProbeMolName = RDKitUtil.GetMolName(ProbeMol, (MolIndex + 1)) RMSD = CalculateRMSDValue(RefMol, ProbeMol) Line = "%s%s%s%s%s\n" % (RefMolName, OutDelim, ProbeMolName, OutDelim, RMSD) OutFH.write(Line)
def SetupCoreScaffoldByMCS(RefMol, Mol, MolCount): """Setup a reference molecule core containing common scaffold atoms between a pair of molecules using MCS.""" MCSParams = OptionsInfo["MCSParams"] Mols = [RefMol, Mol] MCSResultObject = rdFMCS.FindMCS(Mols, maximizeBonds = MCSParams["MaximizeBonds"], threshold = MCSParams["Threshold"], timeout = MCSParams["TimeOut"], verbose = MCSParams["Verbose"], matchValences = MCSParams["MatchValences"], ringMatchesRingOnly = MCSParams["RingMatchesRingOnly"], completeRingsOnly = MCSParams["CompleteRingsOnly"], matchChiralTag = MCSParams["MatchChiralTag"], atomCompare = MCSParams["AtomCompare"], bondCompare = MCSParams["BondCompare"], seedSmarts = MCSParams["SeedSMARTS"]) if MCSResultObject.canceled: if not OptionsInfo["QuietMode"]: MiscUtil.PrintWarning("MCS failed to identify a common core scaffold between reference moecule and input molecule %s. Specify a different set of parameters using \"-m, --mcsParams\" option and try again." % (RDKitUtil.GetMolName(Mol, MolCount))) return None CoreNumAtoms = MCSResultObject.numAtoms CoreNumBonds = MCSResultObject.numBonds SMARTSCore = MCSResultObject.smartsString if not len(SMARTSCore): if not OptionsInfo["QuietMode"]: MiscUtil.PrintWarning("MCS failed to identify a common core scaffold between reference moecule and input molecule %s. Specify a different set of parameters using \"-m, --mcsParams\" option and try again." % (RDKitUtil.GetMolName(Mol, MolCount))) return None if CoreNumAtoms < MCSParams["MinNumAtoms"]: if not OptionsInfo["QuietMode"]: MiscUtil.PrintWarning("Number of atoms, %d, in core scaffold identified by MCS is less than, %d, as specified by \"minNumAtoms\" parameter in \"-m, --mcsParams\" option." % (CoreNumAtoms, MCSParams["MinNumAtoms"])) return None if CoreNumBonds < MCSParams["MinNumBonds"]: if not OptionsInfo["QuietMode"]: MiscUtil.PrintWarning("Number of bonds, %d, in core scaffold identified by MCS is less than, %d, as specified by \"minNumBonds\" parameter in \"-m, --mcsParams\" option." % (CoreNumBonds, MCSParams["MinNumBonds"])) return None return GenerateCoreMol(RefMol, SMARTSCore)
def GenerateAndMinimizeConformers(Mol, MolCount, Writer): "Generate and mininize conformers for a molecule and write out the lowest energy conformer." if OptionsInfo["AddHydrogens"]: Mol = Chem.AddHs(Mol) ConfIDs = EmbedMolecule(Mol) if not len(ConfIDs): MolName = RDKitUtil.GetMolName(Mol, MolCount) MiscUtil.PrintWarning( "Minimization couldn't be performed for molecule %s: Embedding failed...\n" % MolName) return False CalcEnergyMap = {} for ConfID in ConfIDs: try: if OptionsInfo["UseUFF"]: Status = AllChem.UFFOptimizeMolecule( Mol, confId=ConfID, maxIters=OptionsInfo["MaxIters"]) elif OptionsInfo["UseMMFF"]: Status = AllChem.MMFFOptimizeMolecule( Mol, confId=ConfID, maxIters=OptionsInfo["MaxIters"]) else: MiscUtil.PrintError( "Minimization couldn't be performed: Specified forcefield, %s, is not supported" % OptionsInfo["ForceField"]) except RuntimeError as ErrMsg: MolName = RDKitUtil.GetMolName(Mol, MolCount) MiscUtil.PrintWarning( "Minimization couldn't be performed for molecule %s:\n%s\n" % (MolName, ErrMsg)) return False EnergyStatus, Energy = GetConformerEnergy(Mol, ConfID) if not EnergyStatus: MolName = RDKitUtil.GetMolName(Mol, MolCount) MiscUtil.PrintWarning( "Failed to retrieve calculated energy for conformation number %d of molecule %s. Try again after removing any salts or cleaing up the molecule...\n" % (ConfID, MolName)) return False if Status != 0: MolName = RDKitUtil.GetMolName(Mol, MolCount) MiscUtil.PrintWarning( "Minimization failed to converge for conformation number %d of molecule %s in %d steps. Try using higher value for \"--maxIters\" option...\n" % (ConfID, MolName, OptionsInfo["MaxIters"])) CalcEnergyMap[ConfID] = Energy SortedConfIDs = sorted(ConfIDs, key=lambda ConfID: CalcEnergyMap[ConfID]) MinEnergyConfID = SortedConfIDs[0] if OptionsInfo["RemoveHydrogens"]: Mol = Chem.RemoveHs(Mol) Writer.write(Mol, confId=MinEnergyConfID) return True
def MinimizeMolecule(Mol, MolNum=None): "Minimize molecule." if OptionsInfo["AddHydrogens"]: Mol = Chem.AddHs(Mol, addCoords=True) Status = 0 try: if OptionsInfo["UseUFF"]: Status = AllChem.UFFOptimizeMolecule( Mol, maxIters=OptionsInfo["MaxIters"]) elif OptionsInfo["UseMMFF"]: Status = AllChem.MMFFOptimizeMolecule( Mol, maxIters=OptionsInfo["MaxIters"], mmffVariant=OptionsInfo["MMFFVariant"]) else: MiscUtil.PrintError( "Minimization couldn't be performed: Specified forcefield, %s, is not supported" % OptionsInfo["ForceField"]) except (ValueError, RuntimeError, Chem.rdchem.KekulizeException) as ErrMsg: if not OptionsInfo["QuietMode"]: MolName = RDKitUtil.GetMolName(Mol, MolNum) MiscUtil.PrintWarning( "Minimization couldn't be performed for molecule %s:\n%s\n" % (MolName, ErrMsg)) return (Mol, False, None) if Status != 0: if not OptionsInfo["QuietMode"]: MolName = RDKitUtil.GetMolName(Mol, MolNum) MiscUtil.PrintWarning( "Minimization failed to converge for molecule %s in %d steps. Try using higher value for \"--maxIters\" option...\n" % (MolName, OptionsInfo["MaxIters"])) Energy = None if OptionsInfo["EnergyOut"]: EnergyStatus, Energy = GetEnergy(Mol) if EnergyStatus: Energy = "%.2f" % Energy else: if not OptionsInfo["QuietMode"]: MolName = RDKitUtil.GetMolName(Mol, MolNum) MiscUtil.PrintWarning( "Failed to retrieve calculated energy for molecule %s. Try again after removing any salts or cleaing up the molecule...\n" % (MolName)) if OptionsInfo["RemoveHydrogens"]: Mol = Chem.RemoveHs(Mol) return (Mol, True, Energy)
def EmbedMolecule(Mol, MolNum=None): "Embed conformations" ConfIDs = [] MaxConfs = OptionsInfo["MaxConfs"] RandomSeed = OptionsInfo["RandomSeed"] EnforceChirality = OptionsInfo["EnforceChirality"] UseExpTorsionAnglePrefs = OptionsInfo["UseExpTorsionAnglePrefs"] UseBasicKnowledge = OptionsInfo["UseBasicKnowledge"] try: ConfIDs = AllChem.EmbedMultipleConfs( Mol, numConfs=MaxConfs, randomSeed=RandomSeed, enforceChirality=EnforceChirality, useExpTorsionAnglePrefs=UseExpTorsionAnglePrefs, useBasicKnowledge=UseBasicKnowledge) except ValueError as ErrMsg: if not OptionsInfo["QuietMode"]: MolName = RDKitUtil.GetMolName(Mol, MolNum) MiscUtil.PrintWarning("Embedding failed for molecule %s:\n%s\n" % (MolName, ErrMsg)) ConfIDs = [] return ConfIDs
def PerformAlignmentAndWrieOutput(RefMol, ProbeMol, RefMolName, ProbeMolName, Writer): """Perform alignment and write to output file.""" Status = True try: if OptionsInfo["UseRMSD"]: RMSD = rdMolAlign.AlignMol(ProbeMol, RefMol, maxIters=OptionsInfo["MaxIters"]) elif OptionsInfo["UseBestRMSD"]: RMSD = AllChem.GetBestRMS(RefMol, ProbeMol) elif OptionsInfo["UseOpen3A"]: O3A = rdMolAlign.GetO3A(ProbeMol, RefMol) Score = O3A.Align() elif OptionsInfo["UseCrippenOpen3A"]: CrippenO3A = rdMolAlign.GetCrippenO3A(ProbeMol, RefMol) Score = CrippenO3A.Align() else: MiscUtil.PrintError( "Alignment couldn't be performed: Specified alignment value, %s, is not supported" % OptionsInfo["Alignment"]) except (RuntimeError, ValueError): Status = False MiscUtil.PrintWarning( "Alignment failed between reference molecule, %s, and probe molecule, %s.\nWriting unaligned probe molecule...\n" % (RefMolName, ProbeMolName)) # Write out aligned probe molecule... Writer.write(ProbeMol) return Status
def CalculateMolPartialCharges(Mol, MolCount): """Calculate partial atomic charges for a molecule.""" PartialCharges = [] if OptionsInfo["MMFFChargesMode"]: if AllChem.MMFFHasAllMoleculeParams(Mol): MMFFProp = AllChem.MMFFGetMoleculeProperties(Mol) PartialCharges = [ MMFFProp.GetMMFFPartialCharge(AtomIndex) for AtomIndex in range(Mol.GetNumAtoms()) ] else: MolName = RDKitUtil.GetMolName(Mol, MolCount) MiscUtil.PrintWarning( "Failed to calculate MMFF partial charges for molecule, %s: Missing forcefield parameters" % MolName) return (False, PartialCharges) else: rdPartialCharges.ComputeGasteigerCharges( Mol, nIter=OptionsInfo["NumIters"], throwOnParamFailure=OptionsInfo["AllowParamFailure"]) PartialCharges = [ Atom.GetProp("_GasteigerCharge") for Atom in Mol.GetAtoms() ] # Format charges... PartialCharges = [ "%.*f" % (OptionsInfo["Precision"], float(Value)) for Value in PartialCharges ] return (True, PartialCharges)
def WorkerProcess(EncodedMolInfo): """Process data for a worker process.""" MolIndex, EncodedMol = EncodedMolInfo if EncodedMol is None: return [MolIndex, None, False, None] Mol = RDKitUtil.MolFromBase64EncodedMolString(EncodedMol) if RDKitUtil.IsMolEmpty(Mol): MolName = RDKitUtil.GetMolName(Mol, (MolIndex + 1)) MiscUtil.PrintWarning("Ignoring empty molecule: %s" % MolName) return [MolIndex, None, False, None] MolWithHs = Chem.AddHs(Mol) EncodedMolWithHs = RDKitUtil.MolToBase64EncodedMolString( MolWithHs, PropertyPickleFlags=Chem.PropertyPickleOptions.MolProps | Chem.PropertyPickleOptions.PrivateProps) # Retrieve charges... CalcStatus, PartialCharges = CalculateMolPartialCharges( MolWithHs, (MolIndex + 1)) return [MolIndex, EncodedMolWithHs, CalcStatus, PartialCharges]
def ProcessMoleculesUsingSingleProcess(Mols, Writer): """Process molecules and calculate partial charges using a single process. """ MiscUtil.PrintInfo("Calculating partial atomic charges...") Compute2DCoords = OptionsInfo["OutfileParams"]["Compute2DCoords"] MolCount, ValidMolCount, CalcFailedCount = [0] * 3 for Mol in Mols: MolCount += 1 if Mol is None: continue if RDKitUtil.IsMolEmpty(Mol): MolName = RDKitUtil.GetMolName(Mol, MolCount) MiscUtil.PrintWarning("Ignoring empty molecule: %s" % MolName) continue ValidMolCount += 1 MolWithHs = Chem.AddHs(Mol) # Retrieve charges... CalcStatus, PartialCharges = CalculateMolPartialCharges( MolWithHs, MolCount) if not CalcStatus: CalcFailedCount += 1 continue # Write out charges... WriteMolPartialCharges(Writer, MolWithHs, PartialCharges, Compute2DCoords) return (MolCount, ValidMolCount, CalcFailedCount)
def WorkerProcess(EncodedMolInfo): """Process data for a worker process.""" MolIndex, EncodedMol = EncodedMolInfo CalcStatus = False ConfIDs = None ConfEnergies = None if EncodedMol is None: return [MolIndex, None, CalcStatus, ConfIDs, ConfEnergies] Mol = RDKitUtil.MolFromBase64EncodedMolString(EncodedMol) if RDKitUtil.IsMolEmpty(Mol): if not OptionsInfo["QuietMode"]: MolName = RDKitUtil.GetMolName(Mol, (MolIndex + 1)) MiscUtil.PrintWarning("Ignoring empty molecule: %s" % MolName) return [MolIndex, None, CalcStatus, ConfIDs, ConfEnergies] Mol, CalcStatus, ConfIDs, ConfEnergies = GenerateMolConformers( Mol, (MolIndex + 1)) return [ MolIndex, RDKitUtil.MolToBase64EncodedMolString( Mol, PropertyPickleFlags=Chem.PropertyPickleOptions.MolProps | Chem.PropertyPickleOptions.PrivateProps), CalcStatus, ConfIDs, ConfEnergies ]
def ProcessMoleculesUsingSingleProcess(Mols, Writer): """Process molecules and calculate descriptors using a single process.""" DescriptorsCount = len(OptionsInfo["SpecifiedDescriptorNames"]) MiscUtil.PrintInfo( "\nCalculating %d molecular %s for each molecule..." % (DescriptorsCount, ("descroptors" if DescriptorsCount > 1 else "descriptor"))) (MolCount, ValidMolCount) = [0] * 2 for MolIndex, Mol in enumerate(Mols): MolCount += 1 if Mol is None: continue if RDKitUtil.IsMolEmpty(Mol): MolName = RDKitUtil.GetMolName(Mol, MolCount) MiscUtil.PrintWarning("Ignoring empty molecule: %s" % MolName) continue ValidMolCount += 1 # Calculate and write descriptor values... CalculatedValues = CalculateDescriptorValues(MolIndex, Mol) WriteDescriptorValues(Mol, MolCount, Writer, CalculatedValues) return (MolCount, ValidMolCount)
def WriteChainFile(MolName, ChainID, ChainFile): """Write chain file.""" MiscUtil.PrintInfo("\nGenerating output file %s..." % ChainFile) ChainName = "%s_Chain%s" % (MolName, ChainID) ChainSelection = "%s and (chain %s)" % (MolName, ChainID) if not OptionsInfo["ChainsMode"]: ChainSelection += " and (not organic)" if not OptionsInfo["KeepSolvents"]: ChainSelection += " and (not solvent)" if not OptionsInfo["KeepInorganics"]: ChainSelection += " and (not inorganic)" ChainSelection = "(%s)" % ChainSelection MiscUtil.PrintInfo("Chain selection: %s" % ChainSelection) pymol.cmd.create(ChainName, ChainSelection) pymol.cmd.save(ChainFile, ChainSelection) pymol.cmd.delete(ChainName) if not os.path.exists(ChainFile): MiscUtil.PrintWarning("Failed to generate Chain file, %s..." % (ChainFile))
def WorkerProcess(EncodedMolInfo): """Process data for a worker process.""" MolIndex, EncodedMol = EncodedMolInfo CoreScaffoldMissingStatus = False CalcStatus = False Energy = None ScaffoldEmbedRMSD = None if EncodedMol is None: return [MolIndex, None, CoreScaffoldMissingStatus, CalcStatus, Energy, ScaffoldEmbedRMSD] RefMol = OptionsInfo["RefMol"] Mol = RDKitUtil.MolFromBase64EncodedMolString(EncodedMol) if RDKitUtil.IsMolEmpty(Mol): if not OptionsInfo["QuietMode"]: MolName = RDKitUtil.GetMolName(Mol, (MolIndex + 1)) MiscUtil.PrintWarning("Ignoring empty molecule: %s" % MolName) return [MolIndex, None, CoreScaffoldMissingStatus, CalcStatus, Energy, ScaffoldEmbedRMSD] # Setup a reference molecule core containing common scaffold atoms... RefMolCore = SetupCoreScaffold(RefMol, Mol, (MolIndex + 1)) if RefMolCore is None: CoreScaffoldMissingStatus = True return [MolIndex, None, CalcStatus, CoreScaffoldMissingStatus, Energy, ScaffoldEmbedRMSD] Mol, CalcStatus, Energy, ScaffoldEmbedRMSD = ConstrainAndMinimizeMolecule(Mol, RefMolCore, (MolIndex + 1)) return [MolIndex, RDKitUtil.MolToBase64EncodedMolString(Mol, PropertyPickleFlags = Chem.PropertyPickleOptions.MolProps | Chem.PropertyPickleOptions.PrivateProps), CoreScaffoldMissingStatus, CalcStatus, Energy, ScaffoldEmbedRMSD]
def RetrieveReferenceMolecule(): """Retrieve and validate reference molecule """ RefFile = OptionsInfo["RefFile"] MiscUtil.PrintInfo("\nProcessing file %s..." % (RefFile)) OptionsInfo["InfileParams"]["AllowEmptyMols"] = False ValidRefMols, RefMolCount, ValidRefMolCount = RDKitUtil.ReadAndValidateMolecules(RefFile, **OptionsInfo["InfileParams"]) if ValidRefMolCount == 0: MiscUtil.PrintError("The reference file, %s, contains no valid molecules." % RefFile) elif ValidRefMolCount > 1: MiscUtil.PrintWarning("The reference file, %s, contains, %d, valid molecules. Using first molecule as the reference molecule..." % (RefFile, ValidRefMolCount)) RefMol = ValidRefMols[0] if OptionsInfo["UseScaffoldSMARTS"]: ScaffoldPatternMol = Chem.MolFromSmarts(OptionsInfo["ScaffoldSMARTS"]) if ScaffoldPatternMol is None: MiscUtil.PrintError("Failed to create scaffold pattern molecule. The scaffold SMARTS pattern, %s, specified using \"-s, --scaffold\" option is not valid." % (OptionsInfo["ScaffoldSMARTS"])) if not RefMol.HasSubstructMatch(ScaffoldPatternMol): MiscUtil.PrintError("The scaffold SMARTS pattern, %s, specified using \"-s, --scaffold\" option, is missing in the first valid reference molecule." % (OptionsInfo["ScaffoldSMARTS"])) return RefMol
def ProcessMoleculesUsingSingleProcess(RefMol, Mols, Writer): """Process and minimize molecules using a single process.""" (MolCount, ValidMolCount, CoreScaffoldMissingCount, MinimizationFailedCount) = [0] * 4 for Mol in Mols: MolCount += 1 if Mol is None: continue if RDKitUtil.IsMolEmpty(Mol): if not OptionsInfo["QuietMode"]: MolName = RDKitUtil.GetMolName(Mol, MolCount) MiscUtil.PrintWarning("Ignoring empty molecule: %s" % MolName) continue ValidMolCount += 1 # Setup a reference molecule core containing common scaffold atoms... RefMolCore = SetupCoreScaffold(RefMol, Mol, MolCount) if RefMolCore is None: CoreScaffoldMissingCount += 1 continue Mol, CalcStatus, Energy, ScaffoldEmbedRMSD = ConstrainAndMinimizeMolecule(Mol, RefMolCore, MolCount) if not CalcStatus: MinimizationFailedCount += 1 continue WriteMolecule(Writer, Mol, Energy, ScaffoldEmbedRMSD) return (MolCount, ValidMolCount, CoreScaffoldMissingCount, MinimizationFailedCount)
def CheckOptionTableClassColorValues(OptionName, ColorsList): """Check names of table color classes and issue a warning for unknown names.""" TableClassColors = ["thead-dark", "thead-light", "table-primary", "table-success", "table-danger", "table-info", "table-warning", "table-active", "table-secondary", "table-light", "table-dark", "bg-primary", "bg-success", "bg-danger", "bg-info", "bg-warning", "bg-secondary", "bg-dark", "bg-light"] for Color in ColorsList: if not Color in TableClassColors: MiscUtil.PrintWarning("The color class name, %s, specified using option \"%s\" appears to be a unknown name..." % (Color, OptionName))
def ProcessMoleculesUsingSingleProcess(Mols, GroupsPatternMols, Writer, GroupOutfilesWriters): """Process and search molecules using a single process.""" MiscUtil.PrintInfo("\nSearching functional groups...") Compute2DCoords = OptionsInfo["OutfileParams"]["Compute2DCoords"] CombineMatchResults = OptionsInfo["CombineMatchResults"] SetSMILESMolProps = OptionsInfo["OutfileParams"]["SetSMILESMolProps"] GroupsPatternsMatchCountList = [0] * len( OptionsInfo["SpecifiedFunctionalGroups"]) (MolCount, ValidMolCount, RemainingMolCount) = [0] * 3 FirstMol = True for Mol in Mols: MolCount += 1 if Mol is None: continue if RDKitUtil.IsMolEmpty(Mol): MolName = RDKitUtil.GetMolName(Mol, MolCount) MiscUtil.PrintWarning("Ignoring empty molecule: %s" % MolName) continue ValidMolCount += 1 if FirstMol: FirstMol = False if SetSMILESMolProps: if Writer is not None: RDKitUtil.SetWriterMolProps(Writer, Mol) for GroupOutfileWriter in GroupOutfilesWriters: if GroupOutfileWriter is not None: RDKitUtil.SetWriterMolProps(GroupOutfileWriter, Mol) # Match molecule against functional group patterns... MolMatched, GroupsPatternMatchStatusList = MatchMolecule( Mol, GroupsPatternMols) # Update functional group match count... for GroupIndex, MatchStatus in enumerate(GroupsPatternMatchStatusList): if MatchStatus: GroupsPatternsMatchCountList[GroupIndex] += 1 if not MolMatched: continue RemainingMolCount += 1 WriteMolecule(Writer, GroupOutfilesWriters, Mol, Compute2DCoords, CombineMatchResults, GroupsPatternMatchStatusList) return (MolCount, ValidMolCount, RemainingMolCount, GroupsPatternsMatchCountList)
def MoleculesWriter(FileName, **KeyWordArgs): """Set up a molecule writer. Arguments: FileName (str): Name of a file with complete path. **KeyWordArgs (dictionary) : Parameter name and value pairs for writing and processing molecules. Returns: RDKit object : Molecule writer. Notes: The file extension is used to determine type of the file and set up an appropriate file writer. """ # Set default values for possible arguments... WriterArgs = { "Compute2DCoords": False, "Kekulize": False, "SMILESDelimiter": ' ', "SMILESIsomeric": True, "SMILESTitleLine": True, "SMILESMolName": True } # Set specified values for possible arguments... for Arg in WriterArgs: if Arg in KeyWordArgs: WriterArgs[Arg] = KeyWordArgs[Arg] Writer = None if MiscUtil.CheckFileExt(FileName, "sdf sd"): Writer = Chem.SDWriter(FileName) if WriterArgs["Kekulize"]: Writer.SetKekulize(True) elif MiscUtil.CheckFileExt(FileName, "pdb"): Writer = Chem.PDBWriter(FileName) elif MiscUtil.CheckFileExt(FileName, "smi"): # Text for the name column in the title line. Blank indicates not to include name column # in the output file... NameHeader = 'Name' if WriterArgs["SMILESMolName"] else '' Writer = Chem.SmilesWriter(FileName, delimiter=WriterArgs["SMILESDelimiter"], nameHeader=NameHeader, includeHeader=WriterArgs["SMILESTitleLine"], isomericSmiles=WriterArgs["SMILESIsomeric"], kekuleSmiles=WriterArgs["Kekulize"]) else: MiscUtil.PrintWarning( "RDKitUtil.WriteMolecules: Non supported file type: %s" % FileName) return Writer
def GenerateMolConformersWithoutMinimization(Mol, MolNum=None): "Generate conformers for a molecule without performing minimization." ConfIDs = EmbedMolecule(Mol, MolNum) if not len(ConfIDs): if not OptionsInfo["QuietMode"]: MolName = RDKitUtil.GetMolName(Mol, MolNum) MiscUtil.PrintWarning( "Conformation generation couldn't be performed for molecule %s: Embedding failed...\n" % MolName) return [Mol, False, None, None] if OptionsInfo["AlignConformers"]: AllChem.AlignMolConformers(Mol) if not OptionsInfo["QuietMode"]: MolName = RDKitUtil.GetMolName(Mol, MolNum) MiscUtil.PrintInfo("\nNumber of conformations generated for %s: %d" % (MolName, len(ConfIDs))) # Convert ConfIDs into a list... ConfIDsList = [ConfID for ConfID in ConfIDs] # Setup conformation energies... ConfEnergies = None if OptionsInfo["EnergyOut"]: ConfEnergies = [] for ConfID in ConfIDsList: EnergyStatus, Energy = GetConformerEnergy(Mol, ConfID) Energy = "%.2f" % Energy if EnergyStatus else "NotAvailable" ConfEnergies.append(Energy) if not EnergyStatus: if not OptionsInfo["QuietMode"]: MolName = RDKitUtil.GetMolName(Mol, MolNum) MiscUtil.PrintWarning( "Failed to retrieve calculated energy for conformation number %d of molecule %s. Try again after removing any salts or cleaing up the molecule...\n" % (ConfID, MolName)) return [Mol, True, ConfIDsList, ConfEnergies]
def RetrieveReactantsMolecules(): """Retrieve reactant molecules from each reactant file and return a list containing lists of molecules for each reactant file.""" MiscUtil.PrintInfo("\nProcessing reactant file(s)...") ReactantsMolsList = [] ReactantFilesList = OptionsInfo["ReactantFilesList"] UseReactantNames = OptionsInfo["UseReactantNames"] ReactantCount = 0 for FileIndex in range(0, len(ReactantFilesList)): ReactantCount += 1 ReactantFile = ReactantFilesList[FileIndex] MiscUtil.PrintInfo("\nProcessing reactant file: %s..." % ReactantFile) Mols = RDKitUtil.ReadMolecules(ReactantFile, **OptionsInfo["InfileParams"]) ValidMols = [] MolCount = 0 ValidMolCount = 0 for Mol in Mols: MolCount += 1 if Mol is None: continue if RDKitUtil.IsMolEmpty(Mol): MolName = RDKitUtil.GetMolName(Mol, MolCount) MiscUtil.PrintWarning("Ignoring empty molecule: %s" % MolName) continue ValidMolCount += 1 # Check and set mol name... if UseReactantNames: MolName = RDKitUtil.GetMolName(Mol) if not len(MolName): MolName = "React%dMol%d" % (ReactantCount, MolCount) Mol.SetProp("_Name", MolName) ValidMols.append(Mol) ReactantsMolsList.append(ValidMols) MiscUtil.PrintInfo("Total number of molecules: %d" % MolCount) MiscUtil.PrintInfo("Number of valid molecules: %d" % ValidMolCount) MiscUtil.PrintInfo("Number of ignored molecules: %d" % (MolCount - ValidMolCount)) return ReactantsMolsList
def CalculateDescriptorValue(MolIndex, Mol, Name): """Calculate value for a specific descriptor along with handling any calculation failure.""" try: Value = DescriptorNamesMap["ComputeFunction"][Name](Mol) except ValueError as ErrMsg: MiscUtil.PrintWarning( "Failed to calculate descriptor %s for molecule %s:\n%s\n" % (Name, (MolIndex + 1), ErrMsg)) Value = "NA" return Value
def CalculatePartialCharges(): """Calculate partial atomic charges.""" Infile = OptionsInfo["Infile"] Outfile = OptionsInfo["Outfile"] MiscUtil.PrintInfo("Calculating partial atomic charges...") # Setup a molecule reader... MiscUtil.PrintInfo("\nProcessing file %s..." % Infile) Mols = RDKitUtil.ReadMolecules(Infile, **OptionsInfo["InfileParams"]) # Setup a writer... Compute2DCoords = OptionsInfo["OutfileParams"]["Compute2DCoords"] Writer = RDKitUtil.MoleculesWriter(Outfile, **OptionsInfo["OutfileParams"]) if Writer is None: MiscUtil.PrintError("Failed to setup a writer for output fie %s " % Outfile) MiscUtil.PrintInfo("Generating file %s..." % Outfile) # Process molecules... MolCount, ValidMolCount, CalcFailedCount = [0] * 3 for Mol in Mols: MolCount += 1 if Mol is None: continue if RDKitUtil.IsMolEmpty(Mol): MolName = RDKitUtil.GetMolName(Mol, MolCount) MiscUtil.PrintWarning("Ignoring empty molecule: %s" % MolName) continue ValidMolCount += 1 MolWithHs = Chem.AddHs(Mol) # Retrieve charges... PartialCharges = CalculateMolPartialCharges(MolWithHs, MolCount) if not len(PartialCharges): CalcFailedCount += 1 continue # Write out charges... WriteMolPartialCharges(Writer, MolWithHs, PartialCharges, Compute2DCoords) if Writer is not None: Writer.close() MiscUtil.PrintInfo("\nTotal number of molecules: %d" % MolCount) MiscUtil.PrintInfo("Number of valid molecules: %d" % ValidMolCount) MiscUtil.PrintInfo("Number of molecules failed during calculation of partial charges: %d" % CalcFailedCount) MiscUtil.PrintInfo("Number of ignored molecules: %d" % (MolCount - ValidMolCount + CalcFailedCount))
def ReadMolecules(FileName, **KeyWordArgs): """Read molecules from an input file without performing any validation and creation of molecule objects. Arguments: FileName (str): Name of a file with complete path. **KeyWordArgs (dictionary) : Parameter name and value pairs for reading and processing molecules. Returns: list : List of RDKit molecule objects. Notes: The file extension is used to determine type of the file and set up an appropriate file reader. """ # Set default values for possible arguments... ReaderArgs = {"Sanitize": True, "RemoveHydrogens": True, "StrictParsing": True, "SMILESDelimiter" : ' ', "SMILESColumn": 1, "SMILESNameColumn": 2, "SMILESTitleLine": True } # Set specified values for possible arguments... for Arg in ReaderArgs: if Arg in KeyWordArgs: ReaderArgs[Arg] = KeyWordArgs[Arg] # Modify specific valeus for SMILES... if MiscUtil.CheckFileExt(FileName, "smi csv tsv txt"): Args = ["Sanitize", "SMILESTitleLine"] for Arg in Args: if ReaderArgs[Arg] is True: ReaderArgs[Arg] = 1 else: ReaderArgs[Arg] = 0 Mols = [] if MiscUtil.CheckFileExt(FileName, "sdf sd"): return ReadMoleculesFromSDFile(FileName, ReaderArgs["Sanitize"], ReaderArgs["RemoveHydrogens"], ReaderArgs['StrictParsing']) elif MiscUtil.CheckFileExt(FileName, "mol"): return ReadMoleculesFromMolFile(FileName, ReaderArgs["Sanitize"], ReaderArgs["RemoveHydrogens"], ReaderArgs['StrictParsing']) elif MiscUtil.CheckFileExt(FileName, "mol2"): return ReadMoleculesFromMol2File(FileName, ReaderArgs["Sanitize"], ReaderArgs["RemoveHydrogens"]) elif MiscUtil.CheckFileExt(FileName, "pdb"): return ReadMoleculesFromPDBFile(FileName, ReaderArgs["Sanitize"], ReaderArgs["RemoveHydrogens"]) elif MiscUtil.CheckFileExt(FileName, "smi txt csv tsv"): SMILESColumnIndex = ReaderArgs["SMILESColumn"] - 1 SMILESNameColumnIndex = ReaderArgs["SMILESNameColumn"] - 1 return ReadMoleculesFromSMILESFile(FileName, ReaderArgs["SMILESDelimiter"], SMILESColumnIndex, SMILESNameColumnIndex, ReaderArgs["SMILESTitleLine"], ReaderArgs["Sanitize"]) else: MiscUtil.PrintWarning("RDKitUtil.ReadMolecules: Non supported file type: %s" % FileName) return Mols
def SetupCoreScaffoldBySMARTS(RefMol, Mol, MolCount): """Setup a reference molecule core containing common scaffold atoms between a pair of molecules using specified SMARTS.""" if OptionsInfo["ScaffoldPatternMol"] is None: OptionsInfo["ScaffoldPatternMol"] = Chem.MolFromSmarts(OptionsInfo["ScaffoldSMARTS"]) if not Mol.HasSubstructMatch(OptionsInfo["ScaffoldPatternMol"]): if not OptionsInfo["QuietMode"]: MiscUtil.PrintWarning("The scaffold SMARTS pattern, %s, specified using \"-s, --scaffold\" option is missing in input molecule, %s." % (OptionsInfo["ScaffoldSMARTS"], RDKitUtil.GetMolName(Mol, MolCount))) return None return GenerateCoreMol(RefMol, OptionsInfo["ScaffoldSMARTS"])
def WriteAlignedInputObject(FileIndex): """Write out aligned input object""" Outfile = OptionsInfo["InfilesInfo"]["OutfilesNames"][FileIndex] InputName = OptionsInfo["InfilesInfo"]["PyMOLObjectNames"][FileIndex] MiscUtil.PrintInfo("Generating aligned output file %s..." % Outfile) pymol.cmd.save(Outfile, InputName) if not os.path.exists(Outfile): MiscUtil.PrintWarning("Failed to generate aligned output file, %s..." % (Outfile))
def WriteProductMolecule(Writer, ProdMol, Sanitize, Compute2DCoords): """Prepare and write out product molecule.""" try: if Sanitize: Chem.SanitizeMol(ProdMol) except (RuntimeError, ValueError): MiscUtil.PrintWarning( "Ignoring product molecule: Failed to sanitize...\n") return False try: if Compute2DCoords: AllChem.Compute2DCoords(ProdMol) except (RuntimeError, ValueError): MiscUtil.PrintWarning( "Ignoring product molecule: Failed to compute 2D coordinates...\n") return False Writer.write(ProdMol) return True
def GetChainIDsPairsForInterfaceResidues(ChainIDs): """Get chain IDs pairs for identifying interface residues. """ ChainIDsPairsList = [] InterfaceResiduesChainsList = OptionsInfo["InterfaceResiduesChainsList"] if not len(InterfaceResiduesChainsList): # Use first two chain IDs... if len(ChainIDs) >= 2: ChainIDsPair = [ChainIDs[0], ChainIDs[1]] ChainIDsPairsList.append(ChainIDsPair) return ChainIDsPairsList # Validate specified pairwise chain IDs... for Index in range(0, len(InterfaceResiduesChainsList), 2): ChainIDs1 = InterfaceResiduesChainsList[Index] ChainIDs2 = InterfaceResiduesChainsList[Index + 1] ValidChainIDs = True SpecifiedChainIDs = [] SpecifiedChainIDs.extend(ChainIDs1) SpecifiedChainIDs.extend(ChainIDs2) for ChainID in (SpecifiedChainIDs): if not ChainID in ChainIDs: ValidChainIDs = False MiscUtil.PrintWarning( "The chain ID, %s, specified using \"--interfaceResiduesChains\" for a chain IDs pairs is not a valid chain ID." % (ChainID)) if not ValidChainIDs: MiscUtil.PrintWarning("Ignoring chain IDs pair: %s, %s" % ("+".join(ChainIDs1), "+".join(ChainIDs2))) continue ChainIDsPair = [ChainIDs1, ChainIDs2] ChainIDsPairsList.append(ChainIDsPair) return ChainIDsPairsList