def createOeBinaryDatabaseAndIndex(self, oebMolFilePath, oeMolDbFilePath): """Create OE binary database file and associated index from the input serial binary data file. Args: oebMolFilePath (str): input OeMol stream binary file path oeMolDbFilePath (str): output OeMolDatabase file path Returns: int: number of molecules processed in the database. """ molCount = 0 try: startTime = time.time() moldb = oechem.OEMolDatabase() if not moldb.Open(oebMolFilePath): logger.error("Read fails for %r", oebMolFilePath) return molCount # logger.info( "Opened database in format %r num mols %d max index %d", moldb.GetFormat(), moldb.NumMols(), moldb.GetMaxMolIdx()) moldb.Save(oeMolDbFilePath) tL = list(moldb.GetTitles()) logger.info("First and last titles: %r %r", tL[0], tL[-1]) molCount = moldb.NumMols() endTime = time.time() logger.info("Completed operation at %s (%.4f seconds)", time.strftime("%Y %m %d %H:%M:%S", time.localtime()), endTime - startTime) except Exception as e: logger.exception("Failing with %s", str(e)) return molCount
def _sort_by_rotbond(ifs, outdir): """ Parameters ---------- ifs: str absolute path to molecule database outdir: str absolute path to where output files should be written. """ nrotors_map = {} moldb = oechem.OEMolDatabase(ifs) mol = oechem.OEGraphMol() for idx in range(moldb.GetMaxMolIdx()): if moldb.GetMolecule(mol, idx): nrotors = sum([bond.IsRotor() for bond in mol.GetBonds()]) if nrotors not in nrotors_map: nrotors_map[nrotors] = [] nrotors_map[nrotors].append(idx) # Write out a separate database for each num rotors for nrotor in nrotors_map: size = len(nrotors_map[nrotor]) ofname = os.path.join(outdir, 'nrotor_{}.smi'.format(nrotor)) ofs = new_output_stream(ofname) write_oedatabase(moldb, ofs, nrotors_map[nrotor], size)
def initialize(self, dbdict): self('DELETE FROM db', commit=True) for name, path in dbdict.items(): db = oechem.OEMolDatabase(path) nmols = db.NumMols() self('INSERT INTO db (name, path, molcount) VALUES (?,?,?)', (name, path, nmols), commit=True)
def SplitChunk(ifs, chunksize, outbase, ext): moldb = oechem.OEMolDatabase(ifs) chunk, count = 1, chunksize for idx in range(moldb.GetMaxMolIdx()): if count == chunksize: ofs = NewOutputStream(outbase, ext, chunk) chunk, count = chunk + 1, 0 count += 1 moldb.WriteMolecule(ofs, idx)
def MolSort(ifs, ofs): moldb = oechem.OEMolDatabase(ifs) titles = [(t, i) for i, t in enumerate(moldb.GetTitles())] titles.sort() indices = [i for t, i in titles] moldb.Order(indices) moldb.Save(ofs)
def MolCount(fname): ifs = oechem.oemolistream() if not ifs.open(fname): oechem.OEThrow.Warning("Unable to open %s for reading" % fname) return 0 moldb = oechem.OEMolDatabase(ifs) nummols = moldb.NumMols() print("%s contains %d molecule(s)." % (fname, nummols)) return nummols
def main(): if not oefastrocs.OEFastROCSIsGPUReady(): oechem.OEThrow.Info("No supported GPU available!") return 0 args = getargs() dbname = args.d # read in database ifs = oechem.oemolistream() if not ifs.open(dbname): oechem.OEThrow.Fatal("Unable to open '%s'" % dbname) print("Opening database file %s ..." % dbname) timer = oechem.OEWallTimer() opts = oefastrocs.OEShapeDatabaseOptions() opts.SetLimit(1) dbase = oefastrocs.OEShapeDatabase() moldb = oechem.OEMolDatabase() if not moldb.Open(ifs): oechem.OEThrow.Fatal("Unable to open '%s'" % dbname) dots = oechem.OEThreadedDots(10000, 200, "conformers") if not dbase.Open(moldb, dots): oechem.OEThrow.Fatal("Unable to initialize OEShapeDatabase on '%s'" % dbname) dots.Total() print("%f seconds to load database" % timer.Elapsed()) df = pd.read_csv(args.i) res = [] for smile in tqdm(df.loc[:, 'smiles'].tolist()): resn = len(res) try: q = FromString(smile)[0] for score in dbase.GetSortedScores(q, 1): res.append(score.GetTanimotoCombo()) break except KeyboardInterrupt: print("caught") exit() except: res.append(np.nan) if len(res) == resn: res.append(np.nan) df['fastroc'] = res print(df.head) df.to_csv(args.o, sep=',', index=False) return 0
def loadOeBinaryDatabaseAndIndex(self, oeMolDbFilePath): molDb = None try: molDb = oechem.OEMolDatabase() if not molDb.Open(oeMolDbFilePath): logger.error("Unable to open %r", oeMolDbFilePath) molCount = molDb.NumMols() logger.info("Loaded OE database file containing %d molecules", molCount) except Exception as e: logger.exception("Loading %r failing with %s", oeMolDbFilePath, str(e)) return molDb
def test_sort_nrotors(self): """Tests sorting fragments by nrotors""" ifs = get_fn('frags.smi') out_dir = self.get_writes_dir() fragment._sort_by_rotbond(ifs, outdir=out_dir) mol = oechem.OEGraphMol() for n in range(1, 6): db = get_fn('nrotor_{}.smi'.format(n), written=True) moldb = oechem.OEMolDatabase(db) for idx in range(moldb.GetMaxMolIdx()): if moldb.GetMolecule(mol, idx): nrotor = sum([bond.IsRotor() for bond in mol.GetBonds()]) self.assertEquals(n, nrotor)
def main(argv=[__name__]): itf = oechem.OEInterface() oechem.OEConfigure(itf, InterfaceData) oegraphsim.OEConfigureFingerPrint( itf, oegraphsim.OEGetFPType(oegraphsim.OEFPType_Tree)) if not oechem.OEParseCommandLine(itf, argv): return 1 ifname = itf.GetString("-in") ffname = itf.GetString("-fpdb") if oechem.OEGetFileExtension(ffname) != "fpbin": oechem.OEThrow.Fatal( "Fingerprint database file should have '.fpbin' file extension!") idxfname = oechem.OEGetMolDatabaseIdxFileName(ifname) if not os.path.exists(idxfname): if not oechem.OECreateMolDatabaseIdx(ifname): oechem.OEThrow.Warning("Unable to create %s molecule index file" % idxfname) oechem.OEThrow.Info("Using %s index molecule file" % idxfname) moldb = oechem.OEMolDatabase() if not moldb.Open(ifname): oechem.OEThrow.Fatal("Cannot open molecule database file!") nrmols = moldb.GetMaxMolIdx() fptype = oegraphsim.OESetupFingerPrint(itf) oechem.OEThrow.Info("Using fingerprint type %s" % fptype.GetFPTypeString()) opts = oegraphsim.OECreateFastFPDatabaseOptions(fptype) opts.SetTracer(oechem.OEDots(100000, 1000, "fingerprints")) oechem.OEThrow.Info("Generating fingerprints with %d threads" % opts.GetNumProcessors()) timer = oechem.OEWallTimer() if not oegraphsim.OECreateFastFPDatabaseFile(ffname, ifname, opts): oechem.OEThrow.Fatal("Cannot create fingerprint database file!") oechem.OEThrow.Info("%5.2f secs to generate %d fingerprints" % (timer.Elapsed(), nrmols)) return 0
def __init__(self, itf): """ Create a MCMolShapeDatabase from the parameters specified by the OEInterface. """ self.rwlock = ReadWriteLock() self.loadedEvent = Event() self.dbname = itf.GetString("-dbase") self.moldb = oechem.OEMolDatabase() self.dbtype = GetDatabaseType(itf.GetBool("-shapeOnly")) self.shapedb = oefastrocs.OEShapeDatabase(*GetShapeDatabaseArgs(itf)) # this thread is daemonic so a KeyboardInterupt # during the load will cancel the process self.loaderThread = DatabaseLoaderThread(self.shapedb, self.moldb, self.dbname, self.loadedEvent) self.loaderThread.setDaemon(True) self.loaderThread.start()
def SplitNParts(ifs, nparts, outbase, ext): moldb = oechem.OEMolDatabase(ifs) molcount = moldb.NumMols() chunksize, lft = divmod(molcount, nparts) if lft != 0: chunksize += 1 chunk, count = 1, 0 ofs = NewOutputStream(outbase, ext, chunk) for idx in range(moldb.GetMaxMolIdx()): count += 1 if count > chunksize: if chunk == lft: chunksize -= 1 ofs.close() chunk, count = chunk + 1, 1 ofs = NewOutputStream(outbase, ext, chunk) moldb.WriteMolecule(ofs, idx)
def main(argv=[__name__]): parser = argparse.ArgumentParser() # positional arguments retaining backward compatibility parser.add_argument( 'database', help='File containing the database molecules to be search \ (format not restricted to *.oeb).') parser.add_argument( 'query', default=[], nargs='+', help='File containing the query molecule(s) to be search \ (format not restricted to *.oeb).') parser.add_argument( '--nHits', dest='nHits', type=int, default=100, help='Number of hits to return (default = number of database mols).') parser.add_argument('--cutoff', dest='cutoff', type=float, default=argparse.SUPPRESS, help='Specify a cutoff criteria for scores.') parser.add_argument( '--tversky', dest='tversky', action='store_true', default=argparse.SUPPRESS, help='Switch to Tversky similarity scoring (default = Tanimoto).') args = parser.parse_args() dbname = args.database if not oefastrocs.OEFastROCSIsGPUReady(): oechem.OEThrow.Info("No supported GPU available!") return 0 # set options opts = oefastrocs.OEShapeDatabaseOptions() opts.SetLimit(args.nHits) print("Number of hits set to %u" % opts.GetLimit()) if hasattr(args, 'cutoff') is not False: opts.SetCutoff(args.cutoff) print("Cutoff set to %f" % args.cutoff) if hasattr(args, 'tversky') is not False: opts.SetSimFunc(args.tversky) print("Tversky similarity scoring set.") # read in database ifs = oechem.oemolistream() if not ifs.open(dbname): oechem.OEThrow.Fatal("Unable to open '%s'" % dbname) print("\nOpening database file %s ..." % dbname) timer = oechem.OEWallTimer() dbase = oefastrocs.OEShapeDatabase() moldb = oechem.OEMolDatabase() if not moldb.Open(ifs): oechem.OEThrow.Fatal("Unable to open '%s'" % dbname) dots = oechem.OEThreadedDots(10000, 200, "conformers") if not dbase.Open(moldb, dots): oechem.OEThrow.Fatal("Unable to initialize OEShapeDatabase on '%s'" % dbname) dots.Total() print("%f seconds to load database\n" % timer.Elapsed()) for qfname in args.query: # read in query qfs = oechem.oemolistream() if not qfs.open(qfname): oechem.OEThrow.Fatal("Unable to open '%s'" % qfname) mcmol = oechem.OEMol() if not oechem.OEReadMolecule(qfs, mcmol): oechem.OEThrow.Fatal("Unable to read query from '%s'" % qfname) qfs.rewind() ext = oechem.OEGetFileExtension(qfname) qmolidx = 0 while oechem.OEReadMolecule(qfs, mcmol): # write out to file name based on molecule title ofs = oechem.oemolostream() moltitle = mcmol.GetTitle() if len(moltitle) == 0: moltitle = str(qmolidx) ofname = moltitle + "_results." + ext if not ofs.open(ofname): oechem.OEThrow.Fatal("Unable to open '%s'" % argv[4]) print("Searching for %s of %s (%s conformers)" % (moltitle, qfname, mcmol.NumConfs())) qconfidx = 0 for conf in mcmol.GetConfs(): for score in dbase.GetSortedScores(conf, opts): dbmol = oechem.OEMol() dbmolidx = score.GetMolIdx() if not moldb.GetMolecule(dbmol, dbmolidx): print( "Unable to retrieve molecule '%u' from the database" % dbmolidx) continue mol = oechem.OEGraphMol( dbmol.GetConf(oechem.OEHasConfIdx(score.GetConfIdx()))) oechem.OESetSDData(mol, "QueryConfidx", "%s" % qconfidx) oechem.OESetSDData(mol, "ShapeTanimoto", "%.4f" % score.GetShapeTanimoto()) oechem.OESetSDData(mol, "ColorTanimoto", "%.4f" % score.GetColorTanimoto()) oechem.OESetSDData(mol, "TanimotoCombo", "%.4f" % score.GetTanimotoCombo()) score.Transform(mol) oechem.OEWriteMolecule(ofs, mol) qconfidx += 1 print("%s conformers processed" % qconfidx) print("Wrote results to %s\n" % ofname) qmolidx += 1 return 0
def main(argv=[__name__]): if len(argv) < 3: oechem.OEThrow.Usage("%s <database> [<queries> ... ]" % argv[0]) return 0 # check system if not oefastrocs.OEFastROCSIsGPUReady(): oechem.OEThrow.Info("No supported GPU available!") return 0 # read in database dbname = argv[1] print("Opening database file %s ..." % dbname) dbase = oefastrocs.OEShapeDatabase() moldb = oechem.OEMolDatabase() if not moldb.Open(dbname): oechem.OEThrow.Fatal("Unable to open '%s'" % dbname) dots = oechem.OEThreadedDots(10000, 200, "conformers") if not dbase.Open(moldb, dots): oechem.OEThrow.Fatal("Unable to initialize OEShapeDatabase on '%s'" % dbname) # customize search options opts = oefastrocs.OEShapeDatabaseOptions() opts.SetLimit(5) for qfname in argv[2:]: # read in query qfs = oechem.oemolistream() if not qfs.open(qfname): oechem.OEThrow.Fatal("Unable to open '%s'" % qfname) query = oechem.OEGraphMol() if not oechem.OEReadMolecule(qfs, query): oechem.OEThrow.Fatal("Unable to read query from '%s'" % qfname) ext = oechem.OEGetFileExtension(qfname) base = qfname[:-(len(ext) + 1)] # write out everthing to a similary named file ofs = oechem.oemolostream() ofname = base + "_results." + ext if not ofs.open(ofname): oechem.OEThrow.Fatal("Unable to open '%s'" % argv[4]) oechem.OEWriteMolecule(ofs, query) print("Searching for %s" % qfname) for score in dbase.GetSortedScores(query, opts): print("Score for mol %u(conf %u) %f shape %f color" % (score.GetMolIdx(), score.GetConfIdx(), score.GetShapeTanimoto(), score.GetColorTanimoto())) dbmol = oechem.OEMol() molidx = score.GetMolIdx() if not moldb.GetMolecule(dbmol, molidx): print("Unable to retrieve molecule '%u' from the database" % molidx) continue mol = oechem.OEGraphMol( dbmol.GetConf(oechem.OEHasConfIdx(score.GetConfIdx()))) oechem.OESetSDData(mol, "ShapeTanimoto", "%.4f" % score.GetShapeTanimoto()) oechem.OESetSDData(mol, "ColorTanimoto", "%.4f" % score.GetColorTanimoto()) oechem.OESetSDData(mol, "TanimotoCombo", "%.4f" % score.GetTanimotoCombo()) score.Transform(mol) oechem.OEWriteMolecule(ofs, mol) print("Wrote results to %s" % ofname) return 0
# current license or subscription to the applicable OpenEye offering. # THE SAMPLE CODE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, # EXPRESS OR IMPLIED. OPENEYE DISCLAIMS ALL WARRANTIES, INCLUDING, BUT # NOT LIMITED TO, WARRANTIES OF MERCHANTABILITY, FITNESS FOR A # PARTICULAR PURPOSE AND NONINFRINGEMENT. In no event shall OpenEye be # liable for any damages or liability in connection with the Sample Code # or its use. # @ <SNIPPET> from openeye import oechem import sys if len(sys.argv) != 4: oechem.OEThrow.Usage("%s <input> <output> <index>" % sys.argv[0]) moldb = oechem.OEMolDatabase() if not moldb.Open(sys.argv[1]): oechem.OEThrow.Fatal("Unable to open %s for reading" % sys.argv[1]) ofs = oechem.oemolostream() if not ofs.open(sys.argv[2]): oechem.OEThrow.Fatal("Unable to open %s for writing" % sys.argv[2]) idx = int(sys.argv[3]) mol = oechem.OEMol() if not moldb.GetMolecule(mol, idx): oechem.OEThrow.Fatal("Unable to read a molecule from index %u" % idx) oechem.OEWriteMolecule(ofs, mol) # @ </SNIPPET>
def main(argv=[__name__]): itf = oechem.OEInterface() oechem.OEConfigure(itf, InterfaceData) defopts = oegraphsim.OEFPDatabaseOptions(10, oegraphsim.OESimMeasure_Tanimoto) oegraphsim.OEConfigureFPDatabaseOptions(itf, defopts) oegraphsim.OEConfigureFPDatabaseMemoryType(itf) if not oechem.OEParseCommandLine(itf, argv): return 0 qfname = itf.GetString("-query") mfname = itf.GetString("-molfname") ffname = itf.GetString("-fpdbfname") ofname = itf.GetString("-out") # initialize databases timer = oechem.OEWallTimer() timer.Start() ifs = oechem.oemolistream() if not ifs.open(qfname): oechem.OEThrow.Fatal("Cannot open input file!") query = oechem.OEGraphMol() if not oechem.OEReadMolecule(ifs, query): oechem.OEThrow.Fatal("Cannot read query molecule!") moldb = oechem.OEMolDatabase() if not moldb.Open(mfname): oechem.OEThrow.Fatal("Cannot open molecule database!") memtype = oegraphsim.OEGetFPDatabaseMemoryType(itf) fpdb = oegraphsim.OEFastFPDatabase(ffname, memtype) if not fpdb.IsValid(): oechem.OEThrow.Fatal("Cannot open fingerprint database!") nrfps = fpdb.NumFingerPrints() memtypestr = fpdb.GetMemoryTypeString() ofs = oechem.oemolostream() if not ofs.open(ofname): oechem.OEThrow.Fatal("Cannot open output file!") if not oegraphsim.OEAreCompatibleDatabases(moldb, fpdb): oechem.OEThrow.Fatal("Databases are not compatible!") oechem.OEThrow.Info("%5.2f sec to initialize databases" % timer.Elapsed()) fptype = fpdb.GetFPTypeBase() oechem.OEThrow.Info("Using fingerprint type %s" % fptype.GetFPTypeString()) opts = oegraphsim.OEFPDatabaseOptions() oegraphsim.OESetupFPDatabaseOptions(opts, itf) # search fingerprint database timer.Start() scores = fpdb.GetSortedScores(query, opts) oechem.OEThrow.Info("%5.2f sec to search %d fingerprints %s" % (timer.Elapsed(), nrfps, memtypestr)) timer.Start() nrhits = 0 hit = oechem.OEGraphMol() for si in scores: if moldb.GetMolecule(hit, si.GetIdx()): nrhits += 1 oechem.OESetSDData(hit, "Similarity score", "%.2f" % si.GetScore()) oechem.OEWriteMolecule(ofs, hit) oechem.OEThrow.Info("%5.2f sec to write %d hits" % (timer.Elapsed(), nrhits)) return 0
def main(argv=[__name__]): if len(argv) < 3: oechem.OEThrow.Usage("%s <database> [<queries> ... ]" % argv[0]) dbname = argv[1] # read in database ifs = oechem.oemolistream() if not ifs.open(dbname): oechem.OEThrow.Fatal("Unable to open '%s'" % dbname) print("Opening database file %s ..." % dbname) timer = oechem.OEWallTimer() dbase = oefastrocs.OEShapeDatabase() moldb = oechem.OEMolDatabase() if not moldb.Open(ifs): oechem.OEThrow.Fatal("Unable to open '%s'" % dbname) dots = oechem.OEThreadedDots(10000, 200, "conformers") if not dbase.Open(moldb, dots): oechem.OEThrow.Fatal("Unable to initialize OEShapeDatabase on '%s'" % dbname) dots.Total() print("%s seconds to load database" % timer.Elapsed()) opts = oefastrocs.OEShapeDatabaseOptions() opts.SetSimFunc(oefastrocs.OEShapeSimFuncType_Tversky) numHits = moldb.NumMols() opts.SetLimit(numHits) for qfname in argv[2:]: # read in query qfs = oechem.oemolistream() if not qfs.open(qfname): oechem.OEThrow.Fatal("Unable to open '%s'" % argv[1]) query = oechem.OEGraphMol() if not oechem.OEReadMolecule(qfs, query): oechem.OEThrow.Fatal("Unable to read query from '%s'" % argv[1]) ext = oechem.OEGetFileExtension(qfname) base = qfname[:-(len(ext) + 1)] # write out everthing to a similary named file ofs = oechem.oemolostream() ofname = base + "_results." + ext if not ofs.open(ofname): oechem.OEThrow.Fatal("Unable to open '%s'" % argv[4]) print("Searching for %s" % qfname) for score in dbase.GetSortedScores(query, opts): dbmol = oechem.OEMol() molidx = score.GetMolIdx() if not moldb.GetMolecule(dbmol, molidx): print("Unable to retrieve molecule '%u' from the database" % molidx) continue mol = oechem.OEGraphMol( dbmol.GetConf(oechem.OEHasConfIdx(score.GetConfIdx()))) oechem.OESetSDData(mol, "ShapeTversky", "%.4f" % score.GetShapeTversky()) oechem.OESetSDData(mol, "ColorTversky", "%.4f" % score.GetColorTversky()) oechem.OESetSDData(mol, "TverskyCombo", "%.4f" % score.GetTverskyCombo()) score.Transform(mol) oechem.OEWriteMolecule(ofs, mol) print("Wrote results to %s" % ofname) return 0
def main(argv=[__name__]): itf = oechem.OEInterface() oechem.OEConfigure(itf, InterfaceData) defopts = oegraphsim.OEFPDatabaseOptions(10, oegraphsim.OESimMeasure_Tanimoto) oegraphsim.OEConfigureFPDatabaseOptions(itf, defopts) oegraphsim.OEConfigureFingerPrint( itf, oegraphsim.OEGetFPType(oegraphsim.OEFPType_Tree)) if not oechem.OEParseCommandLine(itf, argv): return 0 qfname = itf.GetString("-query") mfname = itf.GetString("-molfname") ofname = itf.GetString("-out") # initialize databases timer = oechem.OEWallTimer() timer.Start() ifs = oechem.oemolistream() if not ifs.open(qfname): oechem.OEThrow.Fatal("Cannot open input file!") query = oechem.OEGraphMol() if not oechem.OEReadMolecule(ifs, query): oechem.OEThrow.Fatal("Cannot read query molecule!") moldb = oechem.OEMolDatabase() if not moldb.Open(mfname): oechem.OEThrow.Fatal("Cannot open molecule database!") ofs = oechem.oemolostream() if not ofs.open(ofname): oechem.OEThrow.Fatal("Cannot open output file!") fptype = oegraphsim.OESetupFingerPrint(itf) oechem.OEThrow.Info("Using fingerprint type %s" % fptype.GetFPTypeString()) fpdb = oegraphsim.OEFPDatabase(fptype) emptyfp = oegraphsim.OEFingerPrint() emptyfp.SetFPTypeBase(fptype) nrmols = moldb.GetMaxMolIdx() mol = oechem.OEGraphMol() for idx in range(0, nrmols): if moldb.GetMolecule(mol, idx): fpdb.AddFP(mol) else: fpdb.AddFP(emptyfp) nrfps = fpdb.NumFingerPrints() oechem.OEThrow.Info("%5.2f sec to initialize databases" % timer.Elapsed()) opts = oegraphsim.OEFPDatabaseOptions() oegraphsim.OESetupFPDatabaseOptions(opts, itf) # search fingerprint database timer.Start() scores = fpdb.GetSortedScores(query, opts) oechem.OEThrow.Info("%5.2f sec to search %d fingerprints" % (timer.Elapsed(), nrfps)) timer.Start() hit = oechem.OEGraphMol() for si in scores: if moldb.GetMolecule(hit, si.GetIdx()): oechem.OEWriteMolecule(ofs, hit) oechem.OEThrow.Info("%5.2f sec to write %d hits" % (timer.Elapsed(), opts.GetLimit())) return 0
def main(argv=[__name__]): if len(argv) < 3: oechem.OEThrow.Usage("%s <database> [<queries> ... ]" % argv[0]) return 0 # check system if not oefastrocs.OEFastROCSIsGPUReady(): oechem.OEThrow.Info("No supported GPU available!") return 0 # read in database dbname = argv[1] print("Opening database file %s ..." % dbname) dbase = oefastrocs.OEShapeDatabase() moldb = oechem.OEMolDatabase() if not moldb.Open(dbname): oechem.OEThrow.Fatal("Unable to open '%s'" % dbname) dots = oechem.OEThreadedDots(10000, 200, "conformers") if not dbase.Open(moldb, dots): oechem.OEThrow.Fatal("Unable to initialize OEShapeDatabase on '%s'" % dbname) # customize search options opts = oefastrocs.OEShapeDatabaseOptions() opts.SetInitialOrientation( oefastrocs.OEFastROCSOrientation_UserInertialStarts) opts.SetLimit(5) for qfname in argv[2:]: # read in query qfs = oechem.oemolistream() if not qfs.open(qfname): oechem.OEThrow.Fatal("Unable to open '%s'" % qfname) query = oechem.OEGraphMol() if not oechem.OEReadMolecule(qfs, query): oechem.OEThrow.Fatal("Unable to read query from '%s'" % qfname) ext = oechem.OEGetFileExtension(qfname) base = qfname[:-(len(ext) + 1)] # write out everthing to a similary named file ofs = oechem.oemolostream() ofname = base + "_user_results." + ext if not ofs.open(ofname): oechem.OEThrow.Fatal("Unable to open '%s'" % argv[4]) oechem.OEWriteMolecule(ofs, query) startsCoords = oechem.OEFloatVector() atomIdx = 1 xyz = query.GetCoords()[atomIdx] for x in xyz: startsCoords.append(x) if not len(startsCoords) % 3 == 0: oechem.OEThrow.Fatal( "Something went wrong whilst reading in user-starts coordinates" ) opts.SetUserStarts(oechem.OEFloatVector(startsCoords), int(len(startsCoords) / 3)) opts.SetMaxOverlays(opts.GetNumInertialStarts() * opts.GetNumUserStarts()) if opts.GetInitialOrientation( ) == oefastrocs.OEFastROCSOrientation_UserInertialStarts: numStarts = opts.GetNumUserStarts() print("This example will use %u starts" % numStarts) print("Searching for %s" % qfname) for score in dbase.GetSortedScores(query, opts): print("Score for mol %u(conf %u) %f shape %f color" % (score.GetMolIdx(), score.GetConfIdx(), score.GetShapeTanimoto(), score.GetColorTanimoto())) dbmol = oechem.OEMol() molidx = score.GetMolIdx() if not moldb.GetMolecule(dbmol, molidx): print("Unable to retrieve molecule '%u' from the database" % molidx) continue mol = oechem.OEGraphMol( dbmol.GetConf(oechem.OEHasConfIdx(score.GetConfIdx()))) oechem.OESetSDData(mol, "ShapeTanimoto", "%.4f" % score.GetShapeTanimoto()) oechem.OESetSDData(mol, "ColorTanimoto", "%.4f" % score.GetColorTanimoto()) oechem.OESetSDData(mol, "TanimotoCombo", "%.4f" % score.GetTanimotoCombo()) score.Transform(mol) oechem.OEWriteMolecule(ofs, mol) print("Wrote results to %s" % ofname) return 0
def get_molcount(path): db = oechem.OEMolDatabase(path) return db.NumMols()