def main(argv=[__name__]): if len(argv) != 4: oechem.OEThrow.Usage("%s <reffile> <rocs_hits_file> <output.sdf>" % argv[0]) reffs = oechem.oemolistream(argv[1]) fitfs = oechem.oemolistream(argv[2]) outfs = oechem.oemolostream(argv[3]) refmol = oechem.OEGraphMol() oechem.OEReadMolecule(reffs, refmol) # Prepare reference molecule for calculation # With default options this will add required color atoms prep = oeshape.OEOverlapPrep() prep.Prep(refmol) # Get appropriate function to calculate analytic color colorFunc = oeshape.OEAnalyticColorFunc() colorFunc.SetupRef(refmol) res = oeshape.OEOverlapResults() for fitmol in fitfs.GetOEGraphMols(): prep.Prep(fitmol) colorFunc.Overlap(fitmol, res) oechem.OESetSDData(fitmol, "AnalyticColorTanimoto", "%.2f" % res.GetColorTanimoto()) oechem.OEWriteMolecule(outfs, fitmol) print("Fit Title: %s Color Tanimoto: %.2f" % (fitmol.GetTitle(), res.GetColorTanimoto()))
def main(argv=[__name__]): if len(argv) != 3: oechem.OEThrow.Usage("%s <reffile> <fitfile>" % argv[0]) reffs = oechem.oemolistream(argv[1]) fitfs = oechem.oemolistream(argv[2]) refmol = oechem.OEGraphMol() oechem.OEReadMolecule(reffs, refmol) # Prepare reference molecule for calculation # With default options this will remove any explicit # hydrogens present and add color atoms prep = oeshape.OEOverlapPrep() prep.Prep(refmol) # Get appropriate function to calculate both shape and color # By default the OEOverlapFunc contains OEGridShapeFunc for shape # and OEExactColorFunc for color func = oeshape.OEOverlapFunc() func.SetupRef(refmol) res = oeshape.OEOverlapResults() for fitmol in fitfs.GetOEGraphMols(): prep.Prep(fitmol) func.Overlap(fitmol, res) print("title: %s tanimoto combo = %.2f shape tanimoto = %.2f color tanimoto = %.2f" % (fitmol.GetTitle(), res.GetTanimotoCombo(), res.GetTanimoto(), res.GetColorTanimoto()))
def tanimotocombo(ref_mol, query_mol): """ This is the TanimotoCombo function. It takes in two OEMols. It does not matter (for our purposes) which mol is the reference mol and which one is the query mol. The TanimotoCombo distance between the two mols is the same no matter which is the reference and which is the query. The GetTanimotoCombo() function comes from openeye. Args: ref_mol (oemol) An oemol that has already been read in. query_mol (oemol) Another oemol that has already been read in. Returns: res.GetTanimotoCombo() (float) The TanimotoCombo value. """ # Prepare reference molecule for calculation # With default options this will remove any explicit # hydrogens present and add color atoms prep = oeshape.OEOverlapPrep() prep.Prep(ref_mol) # Get appropriate function to calculate both shape and color # By default the OEOverlapFunc contains OEGridShapeFunc for shape # and OEExactColorFunc for color func = oeshape.OEOverlapFunc() func.SetupRef(ref_mol) res = oeshape.OEOverlapResults() prep.Prep(query_mol) func.Overlap(query_mol, res) return (res.GetTanimotoCombo())
def main(argv=[__name__]): if len(argv) != 3: oechem.OEThrow.Usage("%s <reffile> <fitfile>" % argv[0]) reffs = oechem.oemolistream(argv[1]) fitfs = oechem.oemolistream(argv[2]) refmol = oechem.OEGraphMol() oechem.OEReadMolecule(reffs, refmol) # Prepare reference molecule for calculation # With default options this will remove any explicit hydrogens present prep = oeshape.OEOverlapPrep() prep.Prep(refmol) # Get appropriate function to calculate exact shape shapeFunc = oeshape.OEExactShapeFunc() shapeFunc.SetupRef(refmol) res = oeshape.OEOverlapResults() for fitmol in fitfs.GetOEGraphMols(): prep.Prep(fitmol) shapeFunc.Overlap(fitmol, res) print("title: %s exact tanimoto = %.2f" % (fitmol.GetTitle(), res.GetTanimoto()))
def main(argv=[__name__]): if len(argv) != 3: oechem.OEThrow.Usage("%s <reffile> <fitfile>" % argv[0]) reffs = oechem.oemolistream(argv[1]) fitfs = oechem.oemolistream(argv[2]) refmol = oechem.OEGraphMol() oechem.OEReadMolecule(reffs, refmol) # Prepare reference molecule for calculation # With default options this will remove any explicit # hydrogens present, and add required color atoms prep = oeshape.OEOverlapPrep() prep.Prep(refmol) # Get appropriate function to calculate exact color colorFunc = oeshape.OEExactColorFunc() colorFunc.SetupRef(refmol) res = oeshape.OEOverlapResults() fitmol = oechem.OEGraphMol() while oechem.OEReadMolecule(fitfs, fitmol): prep.Prep(fitmol) colorFunc.Overlap(fitmol, res) print("title: %s color score = %.2f" % (fitmol.GetTitle(), res.GetColorScore()))
def main(argv=[__name__]): itf = oechem.OEInterface(InterfaceData, argv) # Set up best overlay to the query molecule qfs = oechem.oemolistream() if not qfs.open(itf.GetString("-q")): oechem.OEThrow.Fatal("Unable to open %s" % itf.GetString("-q")) qmol = oechem.OEMol() oechem.OEReadMolecule(qfs, qmol) # Set up overlap to protein exclusion volume efs = oechem.oemolistream() if not efs.open(itf.GetString("-e")): oechem.OEThrow.Fatal("Unable to open %s" % itf.GetString("-e")) emol = oechem.OEMol() oechem.OEReadMolecule(efs, emol) evol = oeshape.OEExactShapeFunc() evol.SetupRef(emol) # open database and output streams ifs = oechem.oemolistream() if not ifs.open(itf.GetString("-d")): oechem.OEThrow.Fatal("Unable to open %s" % itf.GetString("-d")) ofs = oechem.oemolostream() if not ofs.open(itf.GetString("-o")): oechem.OEThrow.Fatal("Unable to open %s" % itf.GetString("-o")) print("Title Combo Rescore") for mol in ifs.GetOEMols(): res = oeshape.OEROCSResult() oeshape.OEROCSOverlay(res, qmol, mol) outmol = res.GetOverlayConf() # calculate overlap with protein eres = oeshape.OEOverlapResults() evol.Overlap(outmol, eres) frac = eres.GetOverlap() / eres.GetFitSelfOverlap() rescore = res.GetTanimotoCombo() - frac # attach data to molecule and write it oechem.OESetSDData(outmol, "TanimotoCombo", "%-.3f" % res.GetTanimotoCombo()) oechem.OESetSDData(outmol, "Exclusion Volume", "%-.3f" % eres.overlap) oechem.OESetSDData(outmol, "Fraction Overlap", "%-.3f" % frac) oechem.OESetSDData(outmol, "Rescore", "%-.3f" % rescore) oechem.OEWriteMolecule(ofs, outmol) print("%-20s %.3f %.3f" % (outmol.GetTitle(), res.GetTanimotoCombo(), rescore))
def main(argv=[__name__]): if len(argv) != 4: oechem.OEThrow.Usage("%s <reffile> <rocs_hits_file> <output.sdf>" % argv[0]) reffs = oechem.oemolistream(argv[1]) fitfs = oechem.oemolistream(argv[2]) outfs = oechem.oemolostream(argv[3]) refmol = oechem.OEGraphMol() oechem.OEReadMolecule(reffs, refmol) # Get appropriate function to calculate analytic shape shapeFunc = oeshape.OEAnalyticShapeFunc() shapeFunc.SetupRef(refmol) res = oeshape.OEOverlapResults() for fitmol in fitfs.GetOEGraphMols(): shapeFunc.Overlap(fitmol, res) oechem.OESetSDData(fitmol, "AnalyticTanimoto", "%.2f" % res.GetTanimoto()) oechem.OEWriteMolecule(outfs, fitmol)
def main(argv=[__name__]): if len(argv) != 3: oechem.OEThrow.Usage("%s <reffile> <overlayfile>" % argv[0]) reffs = oechem.oemolistream(argv[1]) fitfs = oechem.oemolistream(argv[2]) refmol = oechem.OEGraphMol() oechem.OEReadMolecule(reffs, refmol) # Modify ImplicitMillsDean color force field by # adding user defined color interactions cff = oeshape.OEColorForceField() cff.Init(oeshape.OEColorFFType_ImplicitMillsDean) cff.ClearInteractions() donorType = cff.GetType("donor") accepType = cff.GetType("acceptor") cff.AddInteraction(donorType, donorType, "gaussian", -1.0, 1.0) cff.AddInteraction(accepType, accepType, "gaussian", -1.0, 1.0) # Prepare reference molecule for calculation # With default options this will add required color atoms # Set the modified color force field for addignment prep = oeshape.OEOverlapPrep() prep.SetColorForceField(cff) prep.Prep(refmol) # Get appropriate function to calculate exact color # Set appropriate options to use the user defined color options = oeshape.OEColorOptions() options.SetColorForceField(cff) colorFunc = oeshape.OEExactColorFunc(options) colorFunc.SetupRef(refmol) res = oeshape.OEOverlapResults() for fitmol in fitfs.GetOEGraphMols(): prep.Prep(fitmol) colorFunc.Overlap(fitmol, res) print("Fit Title: %s Color Tanimoto: %.2f" % (fitmol.GetTitle(), res.GetColorTanimoto()))
def generate_restricted_conformers(receptor, refmol, mol, core_smarts=None): """ Generate and select a conformer of the specified molecule using the reference molecule Parameters ---------- receptor : openeye.oechem.OEGraphMol Receptor (already prepped for docking) for identifying optimal pose refmol : openeye.oechem.OEGraphMol Reference molecule which shares some part in common with the proposed molecule mol : openeye.oechem.OEGraphMol Molecule whose conformers are to be enumerated core_smarts : str, optional, default=None If core_smarts is specified, substructure will be extracted using SMARTS. """ from openeye import oechem, oeomega # DEBUG: For benzotriazoles, truncate refmol core_smarts = 'c1ccc(NC(=O)[C,N]n2nnc3ccccc32)cc1' # prospective core_smarts = 'NC(=O)[C,N]n2nnc3ccccc32' # retrospective # Get core fragment if core_smarts: # Truncate refmol to SMARTS if specified #print(f'Trunctating using SMARTS {refmol_smarts}') ss = oechem.OESubSearch(core_smarts) oechem.OEPrepareSearch(refmol, ss) for match in ss.Match(refmol): core_fragment = oechem.OEGraphMol() oechem.OESubsetMol(core_fragment, match) break #print(f'refmol has {refmol.NumAtoms()} atoms') else: core_fragment = GetCoreFragment(refmol, [mol]) oechem.OESuppressHydrogens(core_fragment) #print(f' Core fragment has {core_fragment.NumAtoms()} heavy atoms') MIN_CORE_ATOMS = 6 if core_fragment.NumAtoms() < MIN_CORE_ATOMS: return None # Create an Omega instance #omegaOpts = oeomega.OEOmegaOptions() omegaOpts = oeomega.OEOmegaOptions(oeomega.OEOmegaSampling_Dense) # Set the fixed reference molecule omegaFixOpts = oeomega.OEConfFixOptions() omegaFixOpts.SetFixMaxMatch(10) # allow multiple MCSS matches omegaFixOpts.SetFixDeleteH(True) # only use heavy atoms omegaFixOpts.SetFixMol(core_fragment) #omegaFixOpts.SetFixSmarts(smarts) omegaFixOpts.SetFixRMS(0.5) atomexpr = oechem.OEExprOpts_Aromaticity | oechem.OEExprOpts_Hybridization bondexpr = oechem.OEExprOpts_BondOrder | oechem.OEExprOpts_Aromaticity omegaFixOpts.SetAtomExpr(atomexpr) omegaFixOpts.SetBondExpr(bondexpr) omegaOpts.SetConfFixOptions(omegaFixOpts) molBuilderOpts = oeomega.OEMolBuilderOptions() molBuilderOpts.SetStrictAtomTypes(False) # don't give up if MMFF types are not found omegaOpts.SetMolBuilderOptions(molBuilderOpts) omegaOpts.SetWarts(False) # expand molecule title omegaOpts.SetStrictStereo(False) # set strict stereochemistry omegaOpts.SetIncludeInput(False) # don't include input omegaOpts.SetMaxConfs(1000) # generate lots of conformers #omegaOpts.SetEnergyWindow(10.0) # allow high energies omega = oeomega.OEOmega(omegaOpts) from openeye import oequacpac if not oequacpac.OEGetReasonableProtomer(mol): print('No reasonable protomer found') return None mol = oechem.OEMol(mol) # multi-conformer molecule ret_code = omega.Build(mol) if (mol.GetDimension() != 3) or (ret_code != oeomega.OEOmegaReturnCode_Success): print(f'Omega failure: {mol.GetDimension()} and {oeomega.OEGetOmegaError(ret_code)}') return None # Extract poses class Pose(object): def __init__(self, conformer): self.conformer = conformer self.clash_score = None self.docking_score = None self.overlap_score = None poses = [ Pose(conf) for conf in mol.GetConfs() ] # Score clashes bump_check = BumpCheck(receptor) for pose in poses: pose.clash_score = bump_check.count(pose.conformer) # Score docking poses from openeye import oedocking score = oedocking.OEScore(oedocking.OEScoreType_Chemgauss4) score.Initialize(receptor) for pose in poses: pose.docking_score = score.ScoreLigand(pose.conformer) # Compute overlap scores from openeye import oeshape overlap_prep = oeshape.OEOverlapPrep() overlap_prep.Prep(refmol) shapeFunc = oeshape.OEExactShapeFunc() shapeFunc.SetupRef(refmol) oeshape_result = oeshape.OEOverlapResults() for pose in poses: tmpmol = oechem.OEGraphMol(pose.conformer) overlap_prep.Prep(tmpmol) shapeFunc.Overlap(tmpmol, oeshape_result) pose.overlap_score = oeshape_result.GetRefTversky() # Filter poses based on top 10% of overlap poses = sorted(poses, key= lambda pose : pose.overlap_score) poses = poses[int(0.9*len(poses)):] # Select the best docking score import numpy as np poses = sorted(poses, key=lambda pose : pose.docking_score) pose = poses[0] mol.SetActive(pose.conformer) oechem.OESetSDData(mol, 'clash_score', str(pose.clash_score)) oechem.OESetSDData(mol, 'docking_score', str(pose.docking_score)) oechem.OESetSDData(mol, 'overlap_score', str(pose.overlap_score)) # Convert to single-conformer molecule mol = oechem.OEGraphMol(mol) return mol
shapeFunc = oeshape.OEOverlapFunc() # Compute distinct fragments compute_fragment_basis = True n_clusters = 40 if compute_fragment_basis: # TODO: Use PCCA+ when we have time to implement this print('Extracting a fragment basis...') import numpy as np from sklearn.cluster import SpectralClustering fragment_names = list(fragments.keys()) nfragments = len(fragment_names) affinity_matrix = np.ones([nfragments, nfragments], np.float32) for i in range(nfragments): shapeFunc.SetupRef(fragments[fragment_names[i]]) result = oeshape.OEOverlapResults() for j in range(i + 1, nfragments): shapeFunc.Overlap(fragments[fragment_names[j]], result) overlap = result.GetTanimoto() affinity_matrix[i, j] = overlap affinity_matrix[j, i] = overlap clustering = SpectralClustering( n_clusters=n_clusters, affinity='precomputed').fit(affinity_matrix) unique_fragment_names = [index for index in range(n_clusters)] for fragment_index, cluster_index in enumerate(clustering.labels_): unique_fragment_names[cluster_index] = fragment_names[ fragment_index] fragments = { fragment_name: fragments[fragment_name] for fragment_name in unique_fragment_names }
def generate_restricted_conformers(receptor, refmol, mol, core_smarts=None): """ Generate and select a conformer of the specified molecule using the reference molecule Parameters ---------- receptor : openeye.oechem.OEGraphMol Receptor (already prepped for docking) for identifying optimal pose refmol : openeye.oechem.OEGraphMol Reference molecule which shares some part in common with the proposed molecule mol : openeye.oechem.OEGraphMol Molecule whose conformers are to be enumerated core_smarts : str, optional, default=None If core_smarts is specified, substructure will be extracted using SMARTS. """ from openeye import oechem, oeomega logging.debug( f'mol: {oechem.OEMolToSmiles(mol)} | core_smarts: {core_smarts}') # Be quiet from openeye import oechem oechem.OEThrow.SetLevel(oechem.OEErrorLevel_Quiet) #oechem.OEThrow.SetLevel(oechem.OEErrorLevel_Error) # Get core fragment if core_smarts: # Truncate refmol to SMARTS if specified #print(f'Trunctating using SMARTS {refmol_smarts}') ss = oechem.OESubSearch(core_smarts) oechem.OEPrepareSearch(refmol, ss) for match in ss.Match(refmol): core_fragment = oechem.OEGraphMol() oechem.OESubsetMol(core_fragment, match) logging.debug( f'Truncated refmol to generate core_fragment: {oechem.OEMolToSmiles(core_fragment)}' ) break #print(f'refmol has {refmol.NumAtoms()} atoms') else: core_fragment = GetCoreFragment(refmol, [mol]) oechem.OESuppressHydrogens(core_fragment) #print(f' Core fragment has {core_fragment.NumAtoms()} heavy atoms') MIN_CORE_ATOMS = 6 if core_fragment.NumAtoms() < MIN_CORE_ATOMS: return None # Create an Omega instance #omegaOpts = oeomega.OEOmegaOptions() omegaOpts = oeomega.OEOmegaOptions(oeomega.OEOmegaSampling_Dense) # Set the fixed reference molecule omegaFixOpts = oeomega.OEConfFixOptions() omegaFixOpts.SetFixMaxMatch(10) # allow multiple MCSS matches omegaFixOpts.SetFixDeleteH(True) # only use heavy atoms omegaFixOpts.SetFixMol(core_fragment) #omegaFixOpts.SetFixSmarts(core_smarts) # DEBUG omegaFixOpts.SetFixRMS(0.5) # This causes a warning: #Warning: OESubSearch::Match() is unable to match unset hybridization in the target (EN300-221518_3_1) for patterns with set hybridization, call OEPrepareSearch on the target first #atomexpr = oechem.OEExprOpts_Aromaticity | oechem.OEExprOpts_Hybridization atomexpr = oechem.OEExprOpts_Aromaticity | oechem.OEExprOpts_AtomicNumber bondexpr = oechem.OEExprOpts_BondOrder | oechem.OEExprOpts_Aromaticity omegaFixOpts.SetAtomExpr(atomexpr) omegaFixOpts.SetBondExpr(bondexpr) omegaOpts.SetConfFixOptions(omegaFixOpts) molBuilderOpts = oeomega.OEMolBuilderOptions() molBuilderOpts.SetStrictAtomTypes( False) # don't give up if MMFF types are not found omegaOpts.SetMolBuilderOptions(molBuilderOpts) omegaOpts.SetWarts(False) # expand molecule title omegaOpts.SetStrictStereo(True) # set strict stereochemistry omegaOpts.SetIncludeInput(False) # don't include input omegaOpts.SetMaxConfs(1000) # generate lots of conformers omegaOpts.SetEnergyWindow(20.0) # allow high energies omega = oeomega.OEOmega(omegaOpts) # TODO: Expand protonation states and tautomers from openeye import oequacpac if not oequacpac.OEGetReasonableProtomer(mol): logging.warning('No reasonable protomer found') return None mol = oechem.OEMol(mol) # multi-conformer molecule ret_code = omega.Build(mol) if (mol.GetDimension() != 3) or (ret_code != oeomega.OEOmegaReturnCode_Success): msg = f'\nOmega failure for {mol.GetTitle()} : SMILES {oechem.OEMolToSmiles(mol)} : core_smarts {core_smarts} : {oeomega.OEGetOmegaError(ret_code)}\n' logging.warning(msg) return None # Return the molecule with an error code #oechem.OESetSDData(mol, 'error', '{oeomega.OEGetOmegaError(ret_code)}') #return mol # Extract poses class Pose(object): def __init__(self, conformer): self.conformer = conformer self.clash_score = None self.docking_score = None self.overlap_score = None poses = [Pose(conf) for conf in mol.GetConfs()] # Score clashes bump_check = BumpCheck(receptor) for pose in poses: pose.clash_score = bump_check.count(pose.conformer) # Score docking poses from openeye import oedocking score = oedocking.OEScore(oedocking.OEScoreType_Chemgauss4) score.Initialize(receptor) for pose in poses: pose.docking_score = score.ScoreLigand(pose.conformer) # Compute overlap scores from openeye import oeshape overlap_prep = oeshape.OEOverlapPrep() overlap_prep.Prep(refmol) shapeFunc = oeshape.OEExactShapeFunc() shapeFunc.SetupRef(refmol) oeshape_result = oeshape.OEOverlapResults() for pose in poses: tmpmol = oechem.OEGraphMol(pose.conformer) overlap_prep.Prep(tmpmol) shapeFunc.Overlap(tmpmol, oeshape_result) pose.overlap_score = oeshape_result.GetRefTversky() # Filter poses based on top 10% of overlap poses = sorted(poses, key=lambda pose: pose.overlap_score) poses = poses[int(0.9 * len(poses)):] # Select the best docking score import numpy as np poses = sorted(poses, key=lambda pose: pose.docking_score) pose = poses[0] mol.SetActive(pose.conformer) oechem.OESetSDData(mol, 'clash_score', str(pose.clash_score)) oechem.OESetSDData(mol, 'docking_score', str(pose.docking_score)) oechem.OESetSDData(mol, 'overlap_score', str(pose.overlap_score)) # Convert to single-conformer molecule mol = oechem.OEGraphMol(mol) # Compute MMFF energy energy = mmff_energy(mol) oechem.OESetSDData(mol, 'MMFF_internal_energy', str(energy)) # Store SMILES docked_smiles = oechem.OEMolToSmiles(mol) oechem.OESetSDData(mol, 'docked_smiles', docked_smiles) return mol