def main(cls, argv): if len(argv) < 2: print "Usage: BerkeleyDbIndexer <index dir> -create" return dbHome = argv[1] create = len(argv) > 2 and argv[2] == "-create" if not os.path.exists(dbHome): os.makedirs(dbHome) elif create: for name in os.listdir(dbHome): if name.startswith('__'): os.remove(os.path.join(dbHome, name)) env = DBEnv() env.set_flags(DB_LOG_INMEMORY, 1); if os.name == 'nt': env.set_cachesize(0, 0x4000000, 1) elif os.name == 'posix': from commands import getstatusoutput if getstatusoutput('uname') == (0, 'Linux'): env.set_cachesize(0, 0x4000000, 1) env.open(dbHome, (DB_CREATE | DB_THREAD | DB_INIT_MPOOL | DB_INIT_LOCK | DB_INIT_TXN), 0) index = DB(env) blocks = DB(env) txn = None try: txn = env.txn_begin(None) index.open(filename = '__index__', dbtype = DB_BTREE, flags = DB_CREATE | DB_THREAD, txn = txn) blocks.open(filename = '__blocks__', dbtype = DB_BTREE, flags = DB_CREATE | DB_THREAD, txn = txn) except: if txn is not None: txn.abort() txn = None raise else: txn.commit() txn = None try: txn = env.txn_begin(None) directory = DbDirectory(txn, index, blocks, 0) writer = IndexWriter(directory, StandardAnalyzer(), create, IndexWriter.MaxFieldLength.UNLIMITED) writer.setUseCompoundFile(False) doc = Document() doc.add(Field("contents", "The quick brown fox...", Field.Store.YES, Field.Index.ANALYZED)) writer.addDocument(doc) writer.commit() writer.close() except: if txn is not None: txn.abort() txn = None raise else: txn.commit() index.close() blocks.close() env.close() print "Indexing Complete"
def main(cls, argv): if len(argv) < 2: print "Usage: BerkeleyDbIndexer <index dir> -create" return dbHome = argv[1] create = len(argv) > 2 and argv[2] == "-create" if not os.path.exists(dbHome): os.makedirs(dbHome) elif create: for name in os.listdir(dbHome): if name.startswith('__'): os.remove(os.path.join(dbHome, name)) env = DBEnv() env.set_flags(DB_LOG_INMEMORY, 1) if os.name == 'nt': env.set_cachesize(0, 0x4000000, 1) elif os.name == 'posix': from commands import getstatusoutput if getstatusoutput('uname') == (0, 'Linux'): env.set_cachesize(0, 0x4000000, 1) env.open(dbHome, (DB_CREATE | DB_THREAD | DB_INIT_MPOOL | DB_INIT_LOCK | DB_INIT_TXN), 0) index = DB(env) blocks = DB(env) txn = None try: txn = env.txn_begin(None) index.open(filename='__index__', dbtype=DB_BTREE, flags=DB_CREATE | DB_THREAD, txn=txn) blocks.open(filename='__blocks__', dbtype=DB_BTREE, flags=DB_CREATE | DB_THREAD, txn=txn) except: if txn is not None: txn.abort() txn = None raise else: txn.commit() txn = None try: txn = env.txn_begin(None) directory = DbDirectory(txn, index, blocks, 0) writer = IndexWriter(directory, StandardAnalyzer(), create, IndexWriter.MaxFieldLength.UNLIMITED) writer.setUseCompoundFile(False) doc = Document() doc.add( Field("contents", "The quick brown fox...", Field.Store.YES, Field.Index.ANALYZED)) writer.addDocument(doc) writer.optimize() writer.close() except: if txn is not None: txn.abort() txn = None raise else: txn.commit() index.close() blocks.close() env.close() print "Indexing Complete"
def main(cls, argv): if len(argv) != 2: print "Usage: BerkeleyDbSearcher <index dir>" return dbHome = argv[1] env = DBEnv() env.set_flags(DB_LOG_INMEMORY, 1); if os.name == 'nt': env.set_cachesize(0, 0x4000000, 1) elif os.name == 'posix': from commands import getstatusoutput if getstatusoutput('uname') == (0, 'Linux'): env.set_cachesize(0, 0x4000000, 1) env.open(dbHome, (DB_THREAD | DB_INIT_MPOOL | DB_INIT_LOCK | DB_INIT_TXN), 0) index = DB(env) blocks = DB(env) txn = None try: txn = env.txn_begin(None) index.open(filename = '__index__', dbtype = DB_BTREE, flags = DB_THREAD, txn = txn) blocks.open(filename = '__blocks__', dbtype = DB_BTREE, flags = DB_THREAD, txn = txn) except: if txn is not None: txn.abort() txn = None raise else: txn.commit() txn = None try: txn = env.txn_begin(None) directory = DbDirectory(txn, index, blocks, 0) searcher = self.getSearcher() topDocs = searcher.search(TermQuery(Term("contents", "fox")), 50) print topDocs.totalHits, "document(s) found" del searcher except: if txn is not None: txn.abort() txn = None raise else: txn.abort() index.close() blocks.close() env.close()
def main(cls, argv): if len(argv) != 2: print "Usage: BerkeleyDbSearcher <index dir>" return dbHome = argv[1] env = DBEnv() env.set_flags(DB_LOG_INMEMORY, 1); if os.name == 'nt': env.set_cachesize(0, 0x4000000, 1) elif os.name == 'posix': from commands import getstatusoutput if getstatusoutput('uname') == (0, 'Linux'): env.set_cachesize(0, 0x4000000, 1) env.open(dbHome, (DB_THREAD | DB_INIT_MPOOL | DB_INIT_LOCK | DB_INIT_TXN), 0) index = DB(env) blocks = DB(env) txn = None try: txn = env.txn_begin(None) index.open(filename = '__index__', dbtype = DB_BTREE, flags = DB_THREAD, txn = txn) blocks.open(filename = '__blocks__', dbtype = DB_BTREE, flags = DB_THREAD, txn = txn) except: if txn is not None: txn.abort() txn = None raise else: txn.commit() txn = None try: txn = env.txn_begin(None) directory = DbDirectory(txn, index, blocks, 0) searcher = IndexSearcher(directory, True) topDocs = searcher.search(TermQuery(Term("contents", "fox")), 50) print topDocs.totalHits, "document(s) found" searcher.close() except: if txn is not None: txn.abort() txn = None raise else: txn.abort() index.close() blocks.close() env.close()