def __init__(self, options): """ :param options: path: absolute local fs path create_if_missing: default true """ self.path = options["path"].strip() with RocksdbAdapter.db_lock: super().__init__(options) L.debug(f'initing db={self.path}, db_dict={RocksdbAdapter.db_dict}') self.is_closed = False if self.path not in RocksdbAdapter.db_dict: opts = rocksdb.Options() opts.create_if_missing = (str(options.get("create_if_missing", "True")).lower() == 'true') opts.compression = rocksdb.CompressionType.no_compression # todo:0: parameterize write_buffer_size opts.max_open_files = -1 # opts.allow_concurrent_memtable_write = True opts.write_buffer_size = 128 * 1024 opts.max_write_buffer_number = 1 opts.allow_mmap_writes = False opts.allow_mmap_reads = False opts.arena_block_size = 1024 opts.allow_concurrent_memtable_write = True opts.max_bytes_for_level_base = 1 << 20 opts.target_file_size_base = 1 << 22 opts.num_levels = 1 opts.level0_slowdown_writes_trigger = 1 opts.table_cache_numshardbits = 1 opts.manifest_preallocation_size = 128 * 1024 opts.table_factory = rocksdb.BlockBasedTableFactory(no_block_cache=True, block_size=128*1024) if opts.create_if_missing: os.makedirs(self.path, exist_ok=True) self.db = None # todo:0: parameterize max_retry_cnt max_retry_cnt = 30 retry_cnt = 0 while not self.db: try: self.db = rocksdb.DB(self.path, opts) except rocksdb.errors.RocksIOError as e: if retry_cnt > max_retry_cnt: L.exception(f'failed to open path={self.path} after retry.') raise e retry_cnt += 1 L.warn(f'fail to open db path={self.path}. retry_cnt={retry_cnt}. db_dict={RocksdbAdapter.db_dict}') gc.collect() time.sleep(1) L.trace(f'RocksdbAdapter.__init__: path not in dict db path={self.path}') RocksdbAdapter.db_dict[self.path] = self.db RocksdbAdapter.count_dict[self.path] = 0 else: L.trace(f'RocksdbAdapter.__init__: path in dict={self.path}') self.db = RocksdbAdapter.db_dict[self.path] prev_count = RocksdbAdapter.count_dict[self.path] RocksdbAdapter.count_dict[self.path] = prev_count + 1 L.trace(f"RocksdbAdapter.__init__: path={self.path}, prev_count={prev_count}, cur_count={prev_count + 1}")
def __init__( self, database_path, database_name, ): database_path = '{database_path}/keys/{database_name}'.format( database_path=database_path, database_name=database_name, ) os.makedirs( name=database_path, exist_ok=True, ) rocksdb_options = rocksdb.Options() rocksdb_options.create_if_missing = True rocksdb_options.max_open_files = 300000 rocksdb_options.write_buffer_size = 67108864 rocksdb_options.max_write_buffer_number = 3 rocksdb_options.target_file_size_base = 67108864 rocksdb_options.compression = rocksdb.CompressionType.no_compression rocksdb_options.table_factory = rocksdb.BlockBasedTableFactory( filter_policy=rocksdb.BloomFilterPolicy(bits_per_key=10, ), ) self.database_obj = rocksdb.DB( db_name=database_path, opts=rocksdb_options, ) self.database_obj.compact_range( begin=None, end=None, )
def open(self): # initial "production ready" settings via https://python-rocksdb.readthedocs.io/en/latest/tutorial/index.html opts = rocksdb.Options() opts.create_if_missing = True opts.max_open_files = 300000 opts.write_buffer_size = 64 * 2**20 # 64MB opts.max_write_buffer_number = 3 opts.target_file_size_base = 64 * 2**20 # 64MB opts.merge_operator = self.NgramMergeOperator() opts.compression = rocksdb.CompressionType.lz4_compression # fast ingest stuff # via https://github.com/facebook/rocksdb/wiki/RocksDB-FAQ -- "Q: What's the fastest way to load data into RocksDB?" # these settings require manual compaction after ingest opts.max_background_flushes = 8 opts.level0_file_num_compaction_trigger = -1 opts.level0_slowdown_writes_trigger = -1 opts.level0_stop_writes_trigger = 2**16 # default is 24 -- we want to avoid hitting this until it's done opts.write_buffer_size = 32 * 2**20 # default is 4 * 2 ** 20 opts.max_write_buffer_number = 8 # default is 2 opts.table_factory = rocksdb.BlockBasedTableFactory( filter_policy=rocksdb.BloomFilterPolicy(10), block_cache=rocksdb.LRUCache(2 * 2**30), # 2GB block_cache_compressed=rocksdb.LRUCache(500 * 2**20)) # 500MB self._db = rocksdb.DB(self.db_path(), opts, read_only=self.read_only)
def __init__(self, db_name, q=None): import rocksdb self._db_name = db_name self._q = q if self._q is None: opts = rocksdb.Options() opts.create_if_missing = True opts.max_open_files = 300000 opts.max_open_files = -1 # Dangerous opts.write_buffer_size = 2 * 512 * 1024**2 opts.max_write_buffer_number = 3 opts.target_file_size_base = 512 * 1024**2 # MB #opts.compression = rocksdb.CompressionType.zlib_compression opts.table_factory = rocksdb.BlockBasedTableFactory( filter_policy=rocksdb.BloomFilterPolicy(10), #cache_index_and_filter_blocks=True, #optimize_filters_for_hits=True, block_cache=rocksdb.LRUCache(5 * (1024**3)), block_size=64 * 1024, block_cache_compressed=rocksdb.LRUCache(500 * (1024**2))) self._db = rocksdb.DB(self._db_name, opts) A, mA, C, mC, G, T = 0, 1, 2, 3, 4, 5
def main(): kvTotalSize, dbName = sys.argv[1], sys.argv[2] + ".db" # kvcntleft = int(sys.argv[3]) m = re.match(r'(\d+)G', kvTotalSize) if not m: print("invalid format for kv size") return kvTotalSize = int(m.group(1)) << 30 if os.path.exists(dbName): print("db already exists") return opts = rocksdb.Options() opts.create_if_missing = True opts.max_open_files = 10000 opts.table_factory = rocksdb.BlockBasedTableFactory( filter_policy=rocksdb.BloomFilterPolicy(10), block_cache=rocksdb.LRUCache(8 << 20)) db = rocksdb.DB(dbName, opts) opts.create_if_missing = True curkvSize = 0 it = db.iterkeys() it.seek_to_last() last_key = it.get() seqi = int.from_bytes(last_key, "big") print(seqi) while curkvSize < kvTotalSize: # and kvcntleft: seqi += 1 key = seqi.to_bytes(40, "big") value = generateValue() db.put(key, value) curkvSize += len(key) + len(value) #kvcntleft -= 1 print("total number of keys: {}, total size in bytes: {}".format( seqi, curkvSize))
def rocks_opts(**kwargs): # pylint: disable=no-member opts = rocksdb.Options(**kwargs) opts.table_factory = rocksdb.BlockBasedTableFactory( filter_policy=rocksdb.BloomFilterPolicy(10), block_cache=rocksdb.LRUCache(2 * (1024**3)), block_cache_compressed=rocksdb.LRUCache(500 * (1024**2)), ) return opts
def default_opts(): opts = rocksdb.Options() opts.create_if_missing = True opts.table_factory = rocksdb.BlockBasedTableFactory( filter_policy=rocksdb.BloomFilterPolicy(10), block_cache=rocksdb.LRUCache(2 * (1024**3)), block_cache_compressed=rocksdb.LRUCache(500 * (1024**2)), ) return opts
def setup(self): if self.db is None: self.opts.create_if_missing = True self.opts.max_open_files = 300000 self.opts.write_buffer_size = 67108864 self.opts.max_write_buffer_number = 3 self.opts.target_file_size_base = 67108864 self.opts.merge_operator = UpdateData() self.opts.table_factory = rocksdb.BlockBasedTableFactory( filter_policy=rocksdb.BloomFilterPolicy(10), block_cache=rocksdb.LRUCache(2 * (1024**3)), block_cache_compressed=rocksdb.LRUCache(500 * (1024**2))) self.db = rocksdb.DB(f"{self.dbname}.db", self.opts)
def __init__( self, database_path, database_name, ): database_path = '{database_path}/queues/{database_name}'.format( database_path=database_path, database_name=database_name, ) os.makedirs( name=database_path, exist_ok=True, ) rocksdb_options = rocksdb.Options() rocksdb_options.create_if_missing = True rocksdb_options.max_open_files = 300000 rocksdb_options.write_buffer_size = 67108864 rocksdb_options.max_write_buffer_number = 3 rocksdb_options.target_file_size_base = 67108864 rocksdb_options.compression = rocksdb.CompressionType.no_compression rocksdb_options.table_factory = rocksdb.BlockBasedTableFactory( filter_policy=rocksdb.BloomFilterPolicy(bits_per_key=10, ), ) self.database_obj = rocksdb.DB( db_name=database_path, opts=rocksdb_options, ) self.database_obj.compact_range( begin=None, end=None, ) self.database_iterator = self.database_obj.iteritems() self.database_iterator.seek_to_last() try: key, value = next(self.database_iterator) self.last_key = key except StopIteration: self.last_key = b'' self.database_iterator.seek_to_first() try: key, value = next(self.database_iterator) self.first_key = key self.database_iterator.seek(self.first_key) except StopIteration: self.first_key = b'' self.db_was_changed_recently = False
def default_db_options(uid_length=parms.uid_length): opts = rocksdb.Options() opts.create_if_missing = True opts.prefix_extractor = StaticPrefix(uid_length) opts.max_open_files = 300000 opts.write_buffer_size = 67108864 opts.max_write_buffer_number = 3 opts.target_file_size_base = 67108864 opts.table_factory = rocksdb.BlockBasedTableFactory( filter_policy=rocksdb.BloomFilterPolicy(10), block_cache=rocksdb.LRUCache(2 * (1024 ** 3)), block_cache_compressed=rocksdb.LRUCache(500 * (1024 ** 2))) return opts
def __init__(self, path: str = './', with_index: bool = True, cache_capacity: Optional[int] = None, use_memory_indexes: bool = False): import rocksdb self.log = logger.new() self._path = path self._use_memory_indexes = use_memory_indexes tx_dir = os.path.join(path, _DB_NAME) lru_cache = cache_capacity and rocksdb.LRUCache(cache_capacity) table_factory = rocksdb.BlockBasedTableFactory(block_cache=lru_cache) options = rocksdb.Options( table_factory=table_factory, write_buffer_size=83886080, # 80MB (default is 4MB) compression=rocksdb.CompressionType.no_compression, allow_mmap_writes=True, # default is False allow_mmap_reads=True, # default is already True ) cf_names: List[bytes] try: # get the list of existing column families cf_names = rocksdb.list_column_families(tx_dir, options) except rocksdb.errors.RocksIOError: # this means the db doesn't exist, a repair will create one rocksdb.repair_db(tx_dir, options) cf_names = [] # we need to open all column families column_families = { cf: rocksdb.ColumnFamilyOptions() for cf in cf_names } # finally, open the database self._db = rocksdb.DB(tx_dir, options, column_families=column_families) self.log.debug('open db', cf_list=[ cf.name.decode('ascii') for cf in self._db.column_families ]) self._cf_tx = self._get_or_create_column_family(_CF_NAME_TX) self._cf_meta = self._get_or_create_column_family(_CF_NAME_META) self._cf_attr = self._get_or_create_column_family(_CF_NAME_ATTR) super().__init__(with_index=with_index)
def __apply_db_config_opts(self, opts): if self._db_config is None: return _db_config = self._db_config if _db_config['max_open_files'] is not None: opts.max_open_files = _db_config['max_open_files'] if _db_config['max_log_file_size'] is not None: opts.max_log_file_size = _db_config['max_log_file_size'] if _db_config['keep_log_file_num'] is not None: opts.keep_log_file_num = _db_config['keep_log_file_num'] if _db_config['db_log_dir'] is not None: opts.db_log_dir = _db_config['db_log_dir'] # Compaction related options if _db_config['target_file_size_base'] is not None: opts.target_file_size_base = _db_config['target_file_size_base'] # Memtable related options if _db_config['write_buffer_size'] is not None: opts.write_buffer_size = _db_config['write_buffer_size'] if _db_config['max_write_buffer_number'] is not None: opts.max_write_buffer_number = _db_config[ 'max_write_buffer_number'] if _db_config['block_size'] is not None \ or _db_config['block_cache_size'] is not None \ or _db_config['block_cache_compressed_size'] is not None \ or _db_config['no_block_cache'] is not None: block_size = _db_config['block_size'] block_cache_size = _db_config['block_cache_size'] block_cache_compressed_size = _db_config[ 'block_cache_compressed_size'] no_block_cache = _db_config['no_block_cache'] block_cache = None block_cache_compressed = None if block_cache_size is not None: block_cache = rocksdb.LRUCache(block_cache_size) if block_cache_compressed_size is not None: block_cache_compressed = rocksdb.LRUCache( block_cache_compressed_size) opts.table_factory = rocksdb.BlockBasedTableFactory( block_size=block_size, block_cache=block_cache, block_cache_compressed=block_cache_compressed, no_block_cache=no_block_cache)
def as_options(self) -> Options: return rocksdb.Options( create_if_missing=True, max_open_files=self.max_open_files, write_buffer_size=self.write_buffer_size, max_write_buffer_number=self.max_write_buffer_number, target_file_size_base=self.target_file_size_base, table_factory=rocksdb.BlockBasedTableFactory( filter_policy=rocksdb.BloomFilterPolicy( self.bloom_filter_size), block_cache=rocksdb.LRUCache(self.block_cache_size), block_cache_compressed=rocksdb.LRUCache( self.block_cache_compressed_size), ), **self.extra_options)
def get_rocks_db(): size = 128*1024*1024 opts = rocksdb.Options() opts.create_if_missing = True opts.write_buffer_size = size opts.max_write_buffer_number = 2 opts.target_file_size_base = size opts.compression = rocksdb.CompressionType.no_compression opts.disable_auto_compactions = True opts.table_factory = rocksdb.BlockBasedTableFactory( block_size=512, filter_policy=rocksdb.BloomFilterPolicy(10), block_cache=rocksdb.LRUCache(2 * (1024 ** 3)), block_cache_compressed=rocksdb.LRUCache(500 * (1024 ** 2))) db = rocksdb.DB('rocks.db', opts) return db
def initRocks(self, ind): opts = rocksdb.Options() opts.create_if_missing = True opts.max_open_files = 300000 opts.write_buffer_size = 1073741824 opts.max_write_buffer_number = 20 opts.target_file_size_base = 67108864 opts.max_background_compactions = 8 opts.max_background_flushes = 4 opts.table_factory = rocksdb.BlockBasedTableFactory( filter_policy=rocksdb.BloomFilterPolicy(10), block_cache=rocksdb.LRUCache(2 * (1024 ** 3)), block_cache_compressed=rocksdb.LRUCache(500 * (1024 ** 2))) db = rocksdb.DB(self.dirs + '/' + ind + '.db', opts) return db
def init_store(config): """ Only called if using rocksdb for now. """ global hashes_db opts = rocksdb.Options() opts.create_if_missing = True opts.max_open_files = 10000 opts.write_buffer_size = 67108864 opts.max_write_buffer_number = 3 opts.target_file_size_base = 67108864 opts.table_factory = rocksdb.BlockBasedTableFactory( filter_policy=rocksdb.BloomFilterPolicy(10), block_cache=rocksdb.LRUCache(2 * (1024 ** 3)), block_cache_compressed=rocksdb.LRUCache(500 * (1024 ** 2))) hashes_db = rocksdb.DB(os.path.join(config.storage.folder.value, HASHES_STORAGE), opts)
def post_init(self): import rocksdb opts = rocksdb.Options() opts.create_if_missing = True opts.max_open_files = 300000 opts.write_buffer_size = 67108864 opts.max_write_buffer_number = 3 opts.target_file_size_base = 67108864 opts.table_factory = rocksdb.BlockBasedTableFactory( filter_policy=rocksdb.BloomFilterPolicy(10), block_cache=rocksdb.LRUCache(2 * (1024**3)), block_cache_compressed=rocksdb.LRUCache(500 * (1024**2))) for key, value in self.kwargs.items(): setattr(opts, key, value) self._db = rocksdb.DB(self.data_path, opts, read_only=self.read_only)
def get_rocksdb_options(): rocks_options = rocksdb.Options() rocks_options.create_if_missing = True rocks_options.merge_operator = StringAddOperator() rocks_options.compression = CompressionType.zstd_compression rocks_options.max_open_files = 300000 rocks_options.write_buffer_size = 67 * 1024**2 rocks_options.max_write_buffer_number = 3 rocks_options.target_file_size_base = 256 * 1024**2 rocks_options.max_log_file_size = 4 * 1024**2 rocks_options.keep_log_file_num = 100 # we want to set this option, but it's not included in the python client # rocks_options.optimize_filters_for_hits = True rocks_options.table_factory = rocksdb.BlockBasedTableFactory( block_cache=rocksdb.LRUCache(1 * 1024**3), block_size=16 * 1024, filter_policy=rocksdb.BloomFilterPolicy(10), ) return rocks_options
def init(): opts = rocksdb.Options() opts.create_if_missing = True opts.max_open_files = int(config['rocksdb']['max_open_files']) opts.write_buffer_size = int(config['rocksdb']['write_buffer_size']) opts.max_write_buffer_number = int( config['rocksdb']['max_write_buffer_number']) opts.target_file_size_base = int( config['rocksdb']['target_file_size_base']) opts.table_factory = rocksdb.BlockBasedTableFactory( filter_policy=rocksdb.BloomFilterPolicy( int(config['rocksdb']['filter_policy'])), block_cache=rocksdb.LRUCache(2 * (1024**3)), block_cache_compressed=rocksdb.LRUCache(500 * (1024**2))) # TODO find a way to add block_cache* in config, only this format is allowed for rocksdb # https://python-rocksdb.readthedocs.io/en/latest/tutorial/#prefixextractor opts.prefix_extractor = StaticPrefix() db = rocksdb.DB(config['rocksdb']['db_file'], opts) return db
def __init__(self, read_only=False, filepath=None, max_open_files=300000, write_buffer_size=67108864, max_write_buffer_number=3, target_file_size_base=67108864, **kwargs): opts = rocksdb.Options() opts.create_if_missing = True opts.max_open_files = max_open_files opts.write_buffer_size = write_buffer_size opts.max_write_buffer_number = max_write_buffer_number opts.target_file_size_base = target_file_size_base opts.table_factory = rocksdb.BlockBasedTableFactory( filter_policy=rocksdb.BloomFilterPolicy(10), block_cache=rocksdb.LRUCache(2 * (1024**3)), block_cache_compressed=rocksdb.LRUCache(500 * (1024**2))) opts.merge_operator = AssocCounter() self._db = rocksdb.DB(filepath, opts) super().__init__(read_only)
def initdb(fn): """Initialize a database with a given name Arguments: fn - name of the database """ # create database opts = rocksdb.Options() opts.create_if_missing = True opts.max_open_files = 300000 opts.write_buffer_size = 67108864 opts.max_write_buffer_number = 3 opts.target_file_size_base = 67108864 opts.table_factory = rocksdb.BlockBasedTableFactory( filter_policy=rocksdb.BloomFilterPolicy(10), block_cache=rocksdb.LRUCache(2 * (1024**3)), block_cache_compressed=rocksdb.LRUCache(500 * (1024**2))) db = rocksdb.DB(fn + '.db', opts) print( "\x1b[6;30;42m[+]\x1b[0m setting up database {}.db in current directory..." .format(fn)) return db
def test_table_factory(self): opts = rocksdb.Options() self.assertIsNone(opts.table_factory) opts.table_factory = rocksdb.BlockBasedTableFactory() opts.table_factory = rocksdb.PlainTableFactory()
def test_block_options(self): rocksdb.BlockBasedTableFactory( block_size=4096, filter_policy=TestFilterPolicy(), block_cache=rocksdb.LRUCache(100))
required=False) global args args = parser.parse_args() if not args.memory: import rocksdb # create database for nodes opts = rocksdb.Options() opts.create_if_missing = True opts.max_open_files = 300000 opts.write_buffer_size = 67108864 opts.max_write_buffer_number = 3 opts.target_file_size_base = 67108864 opts.table_factory = rocksdb.BlockBasedTableFactory( filter_policy=rocksdb.BloomFilterPolicy(10), block_cache=rocksdb.LRUCache(2 * (1024**3)), block_cache_compressed=rocksdb.LRUCache(500 * (1024**2))) db = rocksdb.DB('nodes.db', opts) if args.verbose: print( "\x1b[6;30;42m[+]\x1b[0m setting up database nodes.db in current directory..." ) else: db = dict() if args.size is None: # auto set base graph size in # of edges total = 0 with open(args.input) as f: for line in f:
sizes = (64 * 2**i * (j + 2) for i in range(4) for j in range(2)) sizes = [s for s in sizes if s <= 1024] parser.add_argument('--out', type=str, default="out.lmdb") parser.add_argument('--size', type=str, default=','.join(map(str, sizes))) parser.add_argument('--n_worker', type=int, default=12) parser.add_argument('path', type=str) args = parser.parse_args() sizes = [int(s.strip()) for s in args.size.split(',')] print(f'Make dataset of image sizes:', ', '.join(str(s) for s in sizes)) imgset = datasets.ImageFolder(args.path) opts = rocksdb.Options() opts.create_if_missing = True opts.write_buffer_size = 2**34 opts.max_write_buffer_number = 2**14 opts.max_open_files = -1 opts.compression = rocksdb.CompressionType.no_compression opts.target_file_size_base = 2**26 opts.target_file_size_multiplier = 4 opts.max_bytes_for_level_base = 2**28 opts.manifest_preallocation_size = 2**36 opts.table_factory = rocksdb.BlockBasedTableFactory( filter_policy=rocksdb.BloomFilterPolicy(32), block_cache=rocksdb.LRUCache(2**34, 32), ) db = rocksdb.DB("out.rdb", opts) prepare(db, imgset, args.n_worker, sizes=sizes)
# max_jobs = cpus - 1 # print("Found " + str(cpus) + " CPU cores on your system. " + str(max_jobs) + " cores will be used.") opts = rocksdb.Options() # Create new instance if not already present opts.create_if_missing = True # We have A LOT of BTC-Transactions, so file open limit should be increased opts.max_open_files = 1000000 # Increase buffer size since I/O is the bottleneck, not RAM opts.write_buffer_size = db_memory * 0.3 opts.max_write_buffer_number = 3 opts.target_file_size_base = 67108864 # Bloom filters for faster lookup opts.table_factory = rocksdb.BlockBasedTableFactory( filter_policy=rocksdb.BloomFilterPolicy(12), block_cache=rocksdb.LRUCache(db_memory * 0.3), block_cache_compressed=rocksdb.LRUCache(db_memory * 0.4)) # Load RocksDB Database db = rocksdb.DB(DB_PATH, opts) # Load Blockchain blockchain = Blockchain(BLOCK_PATH) # Initialize iterator with respect to user specifications if END_BLOCK < 1: blockchain = blockchain.get_ordered_blocks(INDEX_PATH, start=START_BLOCK) TOTAL_BLOCKS = len(blockchain.blockIndexes) print("Processing the entire blockchain.") print("INFO: Depending on your system, this process may take up to a week. You can interrupt the process " + "at any time by pressing CTRL+C.") iterator = blockchain