def cleanup_cube(cube, store): """ Remove unused keys from cube datasets. .. important:: All untracked keys which start with the cube's `uuid_prefix` followed by the `KTK_CUBE_UUID_SEPERATOR` (e.g. `my_cube_uuid++seed...`) will be deleted by this routine. These keys may be leftovers from past overwrites or index updates. Parameters ---------- cube: Cube Cube specification. store: Union[simplekv.KeyValueStore, Callable[[], simplekv.KeyValueStore]] KV store. """ if callable(store): store = store() datasets = discover_datasets_unchecked(uuid_prefix=cube.uuid_prefix, store=store) keys = get_keys_to_clean(cube.uuid_prefix, datasets, store) for k in sorted(keys): store.delete(k)
def cleanup_cube_bag(cube, store, blocksize=100): """ Remove unused keys from cube datasets. .. important:: All untracked keys which start with the cube's `uuid_prefix` followed by the `KTK_CUBE_UUID_SEPERATOR` (e.g. `my_cube_uuid++seed...`) will be deleted by this routine. These keys may be leftovers from past overwrites or index updates. Parameters ---------- cube: Cube Cube specification. store: Union[simplekv.KeyValueStore, Callable[[], simplekv.KeyValueStore]] KV store. blocksize: int Number of keys to delete at once. Returns ------- bag: dask.bag.Bag A dask bag that performs the given operation. May contain multiple partitions. """ check_store_factory(store) check_blocksize(blocksize) store_obj = store() datasets = discover_datasets_unchecked(uuid_prefix=cube.uuid_prefix, store=store) keys = get_keys_to_clean(cube.uuid_prefix, datasets, store_obj) return db.from_sequence( seq=sorted(keys), partition_size=blocksize).map_partitions(_delete, store=store)