def main(argv=[__name__]): if len(argv) != 5: oechem.OEThrow.Usage("%s <reffile> <fitfile> <out.sdf> <keepsize>" % argv[0]) reffs = oechem.oemolistream(argv[1]) fitfs = oechem.oemolistream(argv[2]) outfs = oechem.oemolostream(argv[3]) keepsize = int(argv[4]) refmol = oechem.OEMol() oechem.OEReadMolecule(reffs, refmol) print("Ref. Title:", refmol.GetTitle(), "Num Confs:", refmol.NumConfs()) # Prepare reference molecule for calculation # With default options this will remove any explicit # hydrogens present and add color atoms prep = oeshape.OEOverlapPrep() prep.Prep(refmol) overlay = oeshape.OEMultiRefOverlay() overlay.SetupRef(refmol) for fitmol in fitfs.GetOEMols(): print("Fit Title:", fitmol.GetTitle(), "Num Confs:", fitmol.NumConfs()) prep.Prep(fitmol) resCount = 0 # Sort all scores according to highest tanimoto scoreiter = oeshape.OEBestOverlayScoreIter() oeshape.OESortOverlayScores(scoreiter, overlay.Overlay(fitmol), oeshape.OEHighestTanimoto()) for score in scoreiter: outmol = oechem.OEGraphMol( fitmol.GetConf(oechem.OEHasConfIdx(score.GetFitConfIdx()))) score.Transform(outmol) oechem.OESetSDData(outmol, "RefConfIdx", "%-d" % score.GetRefConfIdx()) oechem.OESetSDData(outmol, "tanimoto combo", "%-.3f" % score.GetTanimotoCombo()) oechem.OEWriteMolecule( outfs, refmol.GetConf(oechem.OEHasConfIdx(score.GetRefConfIdx()))) oechem.OEWriteMolecule(outfs, outmol) resCount += 1 # Break at the user specified size if resCount == keepsize: break print(resCount, "results returned")
def get_oe_mol_positions(molecule, conformer_idx=0): from openeye import oechem # Extract correct conformer if conformer_idx > 0: try: if molecule.NumConfs() <= conformer_idx: raise UnboundLocalError # same error message molecule = oechem.OEGraphMol( molecule.GetConf(oechem.OEHasConfIdx(conformer_idx))) except UnboundLocalError: raise ValueError( 'conformer_idx {} out of range'.format(conformer_idx)) # Extract positions oe_coords = oechem.OEFloatArray(3) molecule_pos = np.zeros((molecule.NumAtoms(), 3)) for i, atom in enumerate(molecule.GetAtoms()): molecule.GetCoords(atom, oe_coords) molecule_pos[i] = oe_coords return molecule_pos
def overlay_molecules( reference_molecule: oechem.OEGraphMol, fit_molecule: oechem.OEMol, return_overlay: bool = True, ) -> (int, List[oechem.OEGraphMol]): """ Overlay two molecules and calculate TanimotoCombo score. Parameters ---------- reference_molecule: oechem.OEGraphMol An OpenEye molecule holding the reference molecule for overlay. fit_molecule: oechem.OEMol An OpenEye multi-conformer molecule holding the fit molecule for overlay. return_overlay: bool If the best scored overlay of molecules should be returned. Returns ------- : int or int and list of oechem.OEGraphMol The TanimotoCombo score of the best overlay and the overlay if score_only is set False. """ from openeye import oechem, oeshape prep = oeshape.OEOverlapPrep() prep.Prep(reference_molecule) overlay = oeshape.OEOverlay() overlay.SetupRef(reference_molecule) prep.Prep(fit_molecule) score = oeshape.OEBestOverlayScore() overlay.BestOverlay(score, fit_molecule, oeshape.OEHighestTanimoto()) if not return_overlay: return score.GetTanimotoCombo() else: overlay = [reference_molecule] fit_molecule = oechem.OEGraphMol( fit_molecule.GetConf(oechem.OEHasConfIdx(score.GetFitConfIdx()))) score.Transform(fit_molecule) overlay.append(fit_molecule) return score.GetTanimotoCombo(), overlay
def read_oe_molecule(file_path, conformer_idx=None): from openeye import oechem # Open input file stream ifs = oechem.oemolistream() if not ifs.open(file_path): oechem.OEThrow.Fatal('Unable to open {}'.format(file_path)) # Read all conformations for mol in ifs.GetOEMols(): try: molecule.NewConf(mol) except UnboundLocalError: molecule = oechem.OEMol(mol) # Select conformation of interest if conformer_idx is not None: if molecule.NumConfs() <= conformer_idx: raise ValueError( 'conformer_idx {} out of range'.format(conformer_idx)) molecule = oechem.OEGraphMol( molecule.GetConf(oechem.OEHasConfIdx(conformer_idx))) return molecule
def main(argv=[__name__]): parser = argparse.ArgumentParser() # positional arguments retaining backward compatibility parser.add_argument( 'database', help='File containing the database molecules to be search \ (format not restricted to *.oeb).') parser.add_argument( 'query', default=[], nargs='+', help='File containing the query molecule(s) to be search \ (format not restricted to *.oeb).') parser.add_argument( '--nHits', dest='nHits', type=int, default=100, help='Number of hits to return (default = number of database mols).') parser.add_argument('--cutoff', dest='cutoff', type=float, default=argparse.SUPPRESS, help='Specify a cutoff criteria for scores.') parser.add_argument( '--tversky', dest='tversky', action='store_true', default=argparse.SUPPRESS, help='Switch to Tversky similarity scoring (default = Tanimoto).') args = parser.parse_args() dbname = args.database if not oefastrocs.OEFastROCSIsGPUReady(): oechem.OEThrow.Info("No supported GPU available!") return 0 # set options opts = oefastrocs.OEShapeDatabaseOptions() opts.SetLimit(args.nHits) print("Number of hits set to %u" % opts.GetLimit()) if hasattr(args, 'cutoff') is not False: opts.SetCutoff(args.cutoff) print("Cutoff set to %f" % args.cutoff) if hasattr(args, 'tversky') is not False: opts.SetSimFunc(args.tversky) print("Tversky similarity scoring set.") # read in database ifs = oechem.oemolistream() if not ifs.open(dbname): oechem.OEThrow.Fatal("Unable to open '%s'" % dbname) print("\nOpening database file %s ..." % dbname) timer = oechem.OEWallTimer() dbase = oefastrocs.OEShapeDatabase() moldb = oechem.OEMolDatabase() if not moldb.Open(ifs): oechem.OEThrow.Fatal("Unable to open '%s'" % dbname) dots = oechem.OEThreadedDots(10000, 200, "conformers") if not dbase.Open(moldb, dots): oechem.OEThrow.Fatal("Unable to initialize OEShapeDatabase on '%s'" % dbname) dots.Total() print("%f seconds to load database\n" % timer.Elapsed()) for qfname in args.query: # read in query qfs = oechem.oemolistream() if not qfs.open(qfname): oechem.OEThrow.Fatal("Unable to open '%s'" % qfname) mcmol = oechem.OEMol() if not oechem.OEReadMolecule(qfs, mcmol): oechem.OEThrow.Fatal("Unable to read query from '%s'" % qfname) qfs.rewind() ext = oechem.OEGetFileExtension(qfname) qmolidx = 0 while oechem.OEReadMolecule(qfs, mcmol): # write out to file name based on molecule title ofs = oechem.oemolostream() moltitle = mcmol.GetTitle() if len(moltitle) == 0: moltitle = str(qmolidx) ofname = moltitle + "_results." + ext if not ofs.open(ofname): oechem.OEThrow.Fatal("Unable to open '%s'" % argv[4]) print("Searching for %s of %s (%s conformers)" % (moltitle, qfname, mcmol.NumConfs())) qconfidx = 0 for conf in mcmol.GetConfs(): for score in dbase.GetSortedScores(conf, opts): dbmol = oechem.OEMol() dbmolidx = score.GetMolIdx() if not moldb.GetMolecule(dbmol, dbmolidx): print( "Unable to retrieve molecule '%u' from the database" % dbmolidx) continue mol = oechem.OEGraphMol( dbmol.GetConf(oechem.OEHasConfIdx(score.GetConfIdx()))) oechem.OESetSDData(mol, "QueryConfidx", "%s" % qconfidx) oechem.OESetSDData(mol, "ShapeTanimoto", "%.4f" % score.GetShapeTanimoto()) oechem.OESetSDData(mol, "ColorTanimoto", "%.4f" % score.GetColorTanimoto()) oechem.OESetSDData(mol, "TanimotoCombo", "%.4f" % score.GetTanimotoCombo()) score.Transform(mol) oechem.OEWriteMolecule(ofs, mol) qconfidx += 1 print("%s conformers processed" % qconfidx) print("Wrote results to %s\n" % ofname) qmolidx += 1 return 0
def main(argv=[__name__]): if len(argv) < 3: oechem.OEThrow.Usage("%s <database> [<queries> ... ]" % argv[0]) return 0 # check system if not oefastrocs.OEFastROCSIsGPUReady(): oechem.OEThrow.Info("No supported GPU available!") return 0 # read in database dbname = argv[1] print("Opening database file %s ..." % dbname) dbase = oefastrocs.OEShapeDatabase() moldb = oechem.OEMolDatabase() if not moldb.Open(dbname): oechem.OEThrow.Fatal("Unable to open '%s'" % dbname) dots = oechem.OEThreadedDots(10000, 200, "conformers") if not dbase.Open(moldb, dots): oechem.OEThrow.Fatal("Unable to initialize OEShapeDatabase on '%s'" % dbname) # customize search options opts = oefastrocs.OEShapeDatabaseOptions() opts.SetLimit(5) for qfname in argv[2:]: # read in query qfs = oechem.oemolistream() if not qfs.open(qfname): oechem.OEThrow.Fatal("Unable to open '%s'" % qfname) query = oechem.OEGraphMol() if not oechem.OEReadMolecule(qfs, query): oechem.OEThrow.Fatal("Unable to read query from '%s'" % qfname) ext = oechem.OEGetFileExtension(qfname) base = qfname[:-(len(ext) + 1)] # write out everthing to a similary named file ofs = oechem.oemolostream() ofname = base + "_results." + ext if not ofs.open(ofname): oechem.OEThrow.Fatal("Unable to open '%s'" % argv[4]) oechem.OEWriteMolecule(ofs, query) print("Searching for %s" % qfname) for score in dbase.GetSortedScores(query, opts): print("Score for mol %u(conf %u) %f shape %f color" % (score.GetMolIdx(), score.GetConfIdx(), score.GetShapeTanimoto(), score.GetColorTanimoto())) dbmol = oechem.OEMol() molidx = score.GetMolIdx() if not moldb.GetMolecule(dbmol, molidx): print("Unable to retrieve molecule '%u' from the database" % molidx) continue mol = oechem.OEGraphMol( dbmol.GetConf(oechem.OEHasConfIdx(score.GetConfIdx()))) oechem.OESetSDData(mol, "ShapeTanimoto", "%.4f" % score.GetShapeTanimoto()) oechem.OESetSDData(mol, "ColorTanimoto", "%.4f" % score.GetColorTanimoto()) oechem.OESetSDData(mol, "TanimotoCombo", "%.4f" % score.GetTanimotoCombo()) score.Transform(mol) oechem.OEWriteMolecule(ofs, mol) print("Wrote results to %s" % ofname) return 0
def GetBestOverlays(self, querymolstr, options, iformat, oformat): """ Return a string of the format specified by 'oformat' containing nhits overlaid confomers using querymolstr as the query interpretted as iformat. querymolstr - a string containing a molecule to use as the query options - an instance of OEShapeDatabaseOptions iformat - a string representing the file extension to parse the querymolstr as. Note: old clients could be passing .sq files, so iformat == '.oeb' will try to interpret the file as a .sq file. oformat - file format to write the results as """ timer = oechem.OEWallTimer() # make sure to wait for the load to finish blocking = True loaded = self.IsLoaded(blocking) assert loaded if iformat.startswith(".sq"): query = ReadShapeQuery(querymolstr) else: # read in query qfs = oechem.oemolistream() qfs = SetupStream(qfs, iformat) if not qfs.openstring(querymolstr): raise ValueError("Unable to open input molecule string") query = oechem.OEGraphMol() if not oechem.OEReadMolecule(qfs, query): if iformat == ".oeb": # could be an old client trying to send a .sq file. query = ReadShapeQuery(querymolstr) else: raise ValueError( "Unable to read a molecule from the string of format '%s'" % iformat) ofs = oechem.oemolostream() ofs = SetupStream(ofs, oformat) if not ofs.openstring(): raise ValueError("Unable to openstring for output") # do we only want shape based results? # this is a "Write" lock to be paranoid and not overload the GPU self.rwlock.AcquireWriteLock() try: # do search scores = self.shapedb.GetSortedScores(query, options) sys.stderr.write("%f seconds to do search\n" % timer.Elapsed()) finally: self.rwlock.ReleaseWriteLock() timer.Start() # write results for score in scores: mcmol = oechem.OEMol() if not self.moldb.GetMolecule(mcmol, score.GetMolIdx()): oechem.OEThrow.Warning( "Can't retrieve molecule %i from the OEMolDatabase, " "skipping..." % score.GetMolIdx()) continue # remove hydrogens to make output smaller, this also # ensures OEPrepareFastROCSMol will have the same output oechem.OESuppressHydrogens(mcmol) mol = oechem.OEGraphMol( mcmol.GetConf(oechem.OEHasConfIdx(score.GetConfIdx()))) oechem.OECopySDData(mol, mcmol) if options.GetSimFunc() == oefastrocs.OEShapeSimFuncType_Tanimoto: oechem.OESetSDData(mol, "ShapeTanimoto", "%.4f" % score.GetShapeTanimoto()) oechem.OESetSDData(mol, "ColorTanimoto", "%.4f" % score.GetColorTanimoto()) oechem.OESetSDData(mol, "TanimotoCombo", "%.4f" % score.GetTanimotoCombo()) else: oechem.OESetSDData(mol, "ShapeTversky", "%.4f" % score.GetShapeTversky()) oechem.OESetSDData(mol, "ColorTversky", "%.4f" % score.GetColorTversky()) oechem.OESetSDData(mol, "TverskyCombo", "%.4f" % score.GetTverskyCombo()) if options.GetInitialOrientation( ) != oefastrocs.OEFastROCSOrientation_Inertial: oechem.OEAddSDData( mol, "Opt. Starting Pos.", GetAltStartsString(options.GetInitialOrientation())) score.Transform(mol) oechem.OEWriteMolecule(ofs, mol) output = ofs.GetString() sys.stderr.write("%f seconds to write hitlist\n" % timer.Elapsed()) sys.stderr.flush() ofs.close() return output
def main(argv=[__name__]): if len(argv) < 3: oechem.OEThrow.Usage("%s <database> [<queries> ... ]" % argv[0]) return 0 # check system if not oefastrocs.OEFastROCSIsGPUReady(): oechem.OEThrow.Info("No supported GPU available!") return 0 # read in database dbname = argv[1] print("Opening database file %s ..." % dbname) dbase = oefastrocs.OEShapeDatabase() moldb = oechem.OEMolDatabase() if not moldb.Open(dbname): oechem.OEThrow.Fatal("Unable to open '%s'" % dbname) dots = oechem.OEThreadedDots(10000, 200, "conformers") if not dbase.Open(moldb, dots): oechem.OEThrow.Fatal("Unable to initialize OEShapeDatabase on '%s'" % dbname) # customize search options opts = oefastrocs.OEShapeDatabaseOptions() opts.SetInitialOrientation( oefastrocs.OEFastROCSOrientation_UserInertialStarts) opts.SetLimit(5) for qfname in argv[2:]: # read in query qfs = oechem.oemolistream() if not qfs.open(qfname): oechem.OEThrow.Fatal("Unable to open '%s'" % qfname) query = oechem.OEGraphMol() if not oechem.OEReadMolecule(qfs, query): oechem.OEThrow.Fatal("Unable to read query from '%s'" % qfname) ext = oechem.OEGetFileExtension(qfname) base = qfname[:-(len(ext) + 1)] # write out everthing to a similary named file ofs = oechem.oemolostream() ofname = base + "_user_results." + ext if not ofs.open(ofname): oechem.OEThrow.Fatal("Unable to open '%s'" % argv[4]) oechem.OEWriteMolecule(ofs, query) startsCoords = oechem.OEFloatVector() atomIdx = 1 xyz = query.GetCoords()[atomIdx] for x in xyz: startsCoords.append(x) if not len(startsCoords) % 3 == 0: oechem.OEThrow.Fatal( "Something went wrong whilst reading in user-starts coordinates" ) opts.SetUserStarts(oechem.OEFloatVector(startsCoords), int(len(startsCoords) / 3)) opts.SetMaxOverlays(opts.GetNumInertialStarts() * opts.GetNumUserStarts()) if opts.GetInitialOrientation( ) == oefastrocs.OEFastROCSOrientation_UserInertialStarts: numStarts = opts.GetNumUserStarts() print("This example will use %u starts" % numStarts) print("Searching for %s" % qfname) for score in dbase.GetSortedScores(query, opts): print("Score for mol %u(conf %u) %f shape %f color" % (score.GetMolIdx(), score.GetConfIdx(), score.GetShapeTanimoto(), score.GetColorTanimoto())) dbmol = oechem.OEMol() molidx = score.GetMolIdx() if not moldb.GetMolecule(dbmol, molidx): print("Unable to retrieve molecule '%u' from the database" % molidx) continue mol = oechem.OEGraphMol( dbmol.GetConf(oechem.OEHasConfIdx(score.GetConfIdx()))) oechem.OESetSDData(mol, "ShapeTanimoto", "%.4f" % score.GetShapeTanimoto()) oechem.OESetSDData(mol, "ColorTanimoto", "%.4f" % score.GetColorTanimoto()) oechem.OESetSDData(mol, "TanimotoCombo", "%.4f" % score.GetTanimotoCombo()) score.Transform(mol) oechem.OEWriteMolecule(ofs, mol) print("Wrote results to %s" % ofname) return 0
omega.SetSampleHydrogens( True ) # Word to the wise: skipping this step can lead to significantly different charges! omega.SetEnergyWindow(15.0) omega.SetRMSThreshold( 1.0 ) # Word to the wise: skipping this step can lead to significantly different charges! if omega(mol_multiconf): # generate conformation # Generate am1bcc partial charges oequacpac.OEAssignCharges(mol_multiconf, oequacpac.OEAM1BCCELF10Charges()) # Get total charge conf = mol_multiconf.GetConf(oechem.OEHasConfIdx(0)) absFCharge = 0 sumFCharge = 0 sumPCharge = 0.0 for atm in mol_multiconf.GetAtoms(): sumFCharge += atm.GetFormalCharge() absFCharge += abs(atm.GetFormalCharge()) sumPCharge += atm.GetPartialCharge() oechem.OEThrow.Info( "%s: %d formal charges give total charge %d ; Sum of Partial Charges %5.4f" % (mol_multiconf.GetTitle(), absFCharge, sumFCharge, sumPCharge)) # Output file ofs = oechem.oemolostream(charged_FN) ofs.SetFormat(oechem.OEFormat_MOL2H) oechem.OEWriteMolecule(ofs, conf)
# Convert to OEMol molecule mol_oemol = smiles_to_oemol(smiles_str) mol_oemol.SetTitle(ligand_name) # Use OpenEye Omega toolkit to generate lowest energy structure omega = oeomega.OEOmega() omega.SetCanonOrder(False) omega.SetMaxConfs(OMEGA_MAX_CONFS) omega(mol_oemol) # Generate protonation/tautomeric states with epik and charge molecule valid_structure = False with working_directory(temp_dir): for i in range(mol_oemol.GetMaxConfIdx()): try: confomer = mol_oemol.GetConf(oechem.OEHasConfIdx(i)) mk_single_conformer_epik(ligand_name, confomer, pH=PH) valid_structure = True break except ValueError: print "WARNING: Omega structure #{:d} discarded.".format(i) # Copy to output directory final file if valid_structure: mol2_file_name = ligand_name + '.mol2' src_path = os.path.join(temp_dir, mol2_file_name) dst_path = os.path.join(LIGANDS_DIR_PATH, mol2_file_name) shutil.copyfile(src_path, dst_path) smiles_file.close()
def save_charged_mol(mol, ofs): """Write conformer 0 of `mol` into output file stream `ofs`""" conf = mol.GetConf(oechem.OEHasConfIdx(0)) oechem.OEWriteMolecule(ofs, conf)
def main(argv=[__name__]): if len(argv) < 3: oechem.OEThrow.Usage("%s <database> [<queries> ... ]" % argv[0]) dbname = argv[1] # read in database ifs = oechem.oemolistream() if not ifs.open(dbname): oechem.OEThrow.Fatal("Unable to open '%s'" % dbname) print("Opening database file %s ..." % dbname) timer = oechem.OEWallTimer() dbase = oefastrocs.OEShapeDatabase() moldb = oechem.OEMolDatabase() if not moldb.Open(ifs): oechem.OEThrow.Fatal("Unable to open '%s'" % dbname) dots = oechem.OEThreadedDots(10000, 200, "conformers") if not dbase.Open(moldb, dots): oechem.OEThrow.Fatal("Unable to initialize OEShapeDatabase on '%s'" % dbname) dots.Total() print("%s seconds to load database" % timer.Elapsed()) opts = oefastrocs.OEShapeDatabaseOptions() opts.SetSimFunc(oefastrocs.OEShapeSimFuncType_Tversky) numHits = moldb.NumMols() opts.SetLimit(numHits) for qfname in argv[2:]: # read in query qfs = oechem.oemolistream() if not qfs.open(qfname): oechem.OEThrow.Fatal("Unable to open '%s'" % argv[1]) query = oechem.OEGraphMol() if not oechem.OEReadMolecule(qfs, query): oechem.OEThrow.Fatal("Unable to read query from '%s'" % argv[1]) ext = oechem.OEGetFileExtension(qfname) base = qfname[:-(len(ext) + 1)] # write out everthing to a similary named file ofs = oechem.oemolostream() ofname = base + "_results." + ext if not ofs.open(ofname): oechem.OEThrow.Fatal("Unable to open '%s'" % argv[4]) print("Searching for %s" % qfname) for score in dbase.GetSortedScores(query, opts): dbmol = oechem.OEMol() molidx = score.GetMolIdx() if not moldb.GetMolecule(dbmol, molidx): print("Unable to retrieve molecule '%u' from the database" % molidx) continue mol = oechem.OEGraphMol( dbmol.GetConf(oechem.OEHasConfIdx(score.GetConfIdx()))) oechem.OESetSDData(mol, "ShapeTversky", "%.4f" % score.GetShapeTversky()) oechem.OESetSDData(mol, "ColorTversky", "%.4f" % score.GetColorTversky()) oechem.OESetSDData(mol, "TverskyCombo", "%.4f" % score.GetTverskyCombo()) score.Transform(mol) oechem.OEWriteMolecule(ofs, mol) print("Wrote results to %s" % ofname) return 0