def test_lru_cache(): data = bytes("Hello World", "utf-8") cache = LRUCache(zarr.MemoryStore(), zarr.MemoryStore(), 30) cache["Aello"] = data cache["Beta"] = data assert "Aello" in cache._cached_items assert "Beta" in cache._cached_items assert "Aello" in cache.cache_storage assert "Beta" in cache.cache_storage cache["Gamma"] = data cache["Gamma"] = data assert "Aello" not in cache._cached_items assert "Aello" not in cache.cache_storage assert "Gamma" in cache._cached_items assert "Gamma" in cache.cache_storage assert list(sorted(cache)) == ["Aello", "Beta", "Gamma"] assert list(sorted(cache.cache_storage)) == ["Beta", "Gamma"] assert list(sorted(cache.actual_storage)) == ["Aello"] del cache["Gamma"] assert list(sorted(cache)) == ["Aello", "Beta"] assert list(sorted(cache.cache_storage)) == ["Beta"] cache["Aello"] cache["Beta"] try: del cache["KeyError"] except KeyError: pass assert list(sorted(cache.actual_storage)) == ["Aello"] cache.flush() assert list(sorted(cache.actual_storage)) == ["Aello", "Beta"] cache.commit()
def __init__( self, params, zarr_store=None, simulator=None, sync_path: Optional[PathType] = None, ): """Instantiate an iP3 store stored in the memory. Args: zdim: Number of z dimensions zarr_store (zarr.MemoryStore, zarr.DirectoryStore): optional, used in loading. """ if zarr_store is None: zarr_store = zarr.MemoryStore() logging.debug("Creating new empty MemoryStore.") else: logging.debug("Creating MemoryStore from zarr_store.") super().__init__( params=params, zarr_store=zarr_store, simulator=simulator, sync_path=sync_path, )
def copy(self, sync_path=None): zarr_store = zarr.MemoryStore() zarr.convenience.copy_store(source=self.zarr_store, dest=zarr_store) return MemoryStore( params=self.params, zarr_store=zarr_store, simulator=self._simulator, sync_path=sync_path, )
def load(cls, path: PathType): """Load existing DirectoryStore state into a MemoryStore object.""" memory_store = zarr.MemoryStore() directory_store = zarr.DirectoryStore(path) zarr.convenience.copy_store(source=directory_store, dest=memory_store) group = zarr.group(store=memory_store) xshape = cls._extract_xshape_from_zarr_group(group) zdim = cls._extract_zdim_from_zarr_group(group) return MemoryStore(zdim=zdim, xshape=xshape, store=memory_store)
def get_storage_map(fs, path, memcache=2 ** 26, lock=True, storage_cache=2 ** 28): store = _get_storage_map(fs, path) cache_path = get_cache_path(path) if storage_cache and storage_cache > 0: os.makedirs(cache_path, exist_ok=True) store = LRUCache( zarr.LMDBStore(cache_path, buffers=True, lock=lock), store, storage_cache ) if memcache and memcache > 0: store = LRUCache(zarr.MemoryStore(), store, memcache) return store
def main(): numcodecs.register_codec(PngCodec, "png") with Timer("Compress"): arr = zarr.create( shape=(10, 10, 1920, 1080, 7), dtype="uint8", compressor=PngCodec(solo_channel=True), store=zarr.MemoryStore(), ) arr[:] = np.ones((10, 10, 1920, 1080, 7), dtype="uint8") print(arr[:].shape)
def __init__( self, dataset: xr.Dataset, model: Model, zobject: Optional[Union[zarr.Group, MutableMapping, str]] = None, encoding: Optional[EncodingDict] = None, decoding: Optional[Dict] = None, batch_dim: Optional[str] = None, lock: Optional[Any] = None, ): self.dataset = dataset self.model = model self.in_memory = False self.consolidated = False if isinstance(zobject, zarr.Group): self.zgroup = zobject elif zobject is None: self.zgroup = zarr.group(store=zarr.MemoryStore()) self.in_memory = True else: self.zgroup = zarr.group(store=zobject) self.output_vars = dataset.xsimlab.output_vars_by_clock self.output_save_steps = dataset.xsimlab.get_output_save_steps() if encoding is None: encoding = {} if decoding is None: decoding = {} self.decoding = decoding self.var_info = _get_var_info(dataset, model, encoding) self.batch_dim = batch_dim self.batch_size = get_batch_size(dataset, batch_dim) self.mclock_dim = dataset.xsimlab.main_clock_dim self.clock_sizes = dataset.xsimlab.clock_sizes # initialize clock incrementers self.clock_incs = self._init_clock_incrementers() # ensure no dataset conflict in zarr group znames = [vi["name"] for vi in self.var_info.values()] ensure_no_dataset_conflict(self.zgroup, znames) if lock is None: self.lock = DummyLock() else: self.lock = lock
def test_nested_store(): store = NestedStore(zarr.MemoryStore(), "hello") store["item"] = bytes("Hello World", "utf-8") assert store["item"] == bytes("Hello World", "utf-8") del store["item"] assert store.get("item") is None store["item1"] = bytes("Hello World 1", "utf-8") store["item2"] = bytes("Hello World 2", "utf-8") assert len(store) == 2 assert tuple(store) == ("item1", "item2") try: store.close() except AttributeError as ex: assert "object has no attribute 'close'" in str(ex)
def load(cls, path: PathType): """Load existing DirectoryStore state into a MemoryStore object.""" memory_store = zarr.MemoryStore() directory_store = zarr.DirectoryStore(path) zarr.convenience.copy_store(source=directory_store, dest=memory_store) group = zarr.group(store=memory_store) #<<<<<<< HEAD # xshape = cls._extract_xshape_from_zarr_group(group) # zdim = cls._extract_zdim_from_zarr_group(group) # return cls(zdim=zdim, xshape=xshape, store=memory_store) # # sim_shapes = cls._extract_sim_shapes_from_zarr_group(group) # # z = cls._extract_params_from_zarr_group(group) # # return MemoryCache(params=z, sim_shapes=sim_shapes, store=memory_store) #======= zdim = group[cls._filesystem.pars].shape[1] return MemoryStore(params=zdim, zarr_store=memory_store)
def __init__(self, params, zarr_store=None, simulator=None): """Instantiate an iP3 store stored in the memory. Args: params (list of strings or int): List of paramater names. If int use ['z0', 'z1', ...]. zarr_store (zarr.MemoryStore, zarr.DirectoryStore): optional, used in loading. simulator: simulator object. """ if zarr_store is None: zarr_store = zarr.MemoryStore() log.debug("Creating new empty MemoryStore.") else: log.debug("Creating MemoryStore from zarr_store.") # super().__init__( # params=params, # zarr_store=zarr_store, # simulator=simulator, # sync_path=sync_path, # ) super().__init__(params=params, zarr_store=zarr_store, simulator=simulator)
def get_storage_map(fs, path, memcache=2**26, lock=True, storage_cache=2**28): store = _get_storage_map(fs, path) if memcache and memcache > 0: store = LRUCache(zarr.MemoryStore(), store, memcache) return store
class TestZarrSimulationStore: @pytest.mark.parametrize( "zobj", [None, "dir", zarr.MemoryStore(), zarr.group()]) def test_constructor(self, in_ds, model, zobj, tmpdir): if zobj == "dir": zobj = str(tmpdir) store = ZarrSimulationStore(in_ds, model, zobject=zobj) assert store.zgroup.store is not None assert store.batch_size == -1 if zobj is None: assert store.in_memory is True def test_constructor_batch(self, store_batch): assert store_batch.batch_size == 2 def test_constructor_conflict(self, in_ds, model): zgroup = zarr.group() zgroup.create_dataset("profile__u", shape=(1, 1)) with pytest.raises(ValueError, match=r".*already contains.*"): ZarrSimulationStore(in_ds, model, zobject=zgroup) def test_write_input_xr_dataset(self, in_ds, store): store.write_input_xr_dataset() ds = xr.open_zarr(store.zgroup.store, chunks=None) # output variables removed del in_ds["add__offset"] xr.testing.assert_equal(ds, in_ds) # check output variables attrs removed before saving input dataset assert not ds.xsimlab.output_vars def test_write_output_vars(self, in_ds, store): model = store.model model.state[("profile", "u")] = np.array([1.0, 2.0, 3.0]) model.state[("roll", "u_diff")] = np.array([-1.0, 1.0, 0.0]) model.state[("add", "offset")] = 2.0 store.write_output_vars(-1, 0) ztest = zarr.open_group(store.zgroup.store, mode="r") assert ztest.profile__u.shape == (in_ds.clock.size, 3) np.testing.assert_array_equal(ztest.profile__u[0], np.array([1.0, 2.0, 3.0])) assert ztest.roll__u_diff.shape == (in_ds.out.size, 3) np.testing.assert_array_equal(ztest.roll__u_diff[0], np.array([-1.0, 1.0, 0.0])) assert ztest.add__u_diff.shape == (in_ds.out.size, ) np.testing.assert_array_equal(ztest.add__u_diff, np.array([2.0, np.nan, np.nan])) # test save main clock but not out clock store.write_output_vars(-1, 1) np.testing.assert_array_equal(ztest.profile__u[1], np.array([1.0, 2.0, 3.0])) np.testing.assert_array_equal(ztest.roll__u_diff[1], np.array([np.nan, np.nan, np.nan])) # test save no-clock outputs store.write_output_vars(-1, -1) np.testing.assert_array_equal(ztest.profile__u_opp, np.array([-1.0, -2.0, -3.0])) assert ztest.add__offset[()] == 2.0 def test_write_output_vars_error(self, store): model = store.model model.state[("profile", "u")] = np.array([[1.0, 2.0, 3.0], [4.0, 5.0, 6.0]]) model.state[("roll", "u_diff")] = np.array([-1.0, 1.0, 0.0]) model.state[("add", "offset")] = 2.0 with pytest.raises(ValueError, match=r".*accepted dimension.*"): store.write_output_vars(-1, 0) def test_write_output_vars_batch(self, store_batch, model_batch1, model_batch2): model_batch1.state[("profile", "u")] = np.array([1.0, 2.0, 3.0]) model_batch2.state[("profile", "u")] = np.array([4.0, 5.0, 6.0]) model_batch1.state[("roll", "u_diff")] = np.array([-1.0, 1.0, 0.0]) model_batch2.state[("roll", "u_diff")] = np.array([0.0, 1.0, -1.0]) model_batch1.state[("add", "offset")] = 2.0 model_batch2.state[("add", "offset")] = 3.0 store_batch.write_output_vars(0, 0, model=model_batch1) store_batch.write_output_vars(1, 0, model=model_batch2) ztest = zarr.open_group(store_batch.zgroup.store, mode="r") assert ztest.profile__u.ndim == 3 np.testing.assert_array_equal( ztest.profile__u[:, 0, :], np.array([[1.0, 2.0, 3.0], [4.0, 5.0, 6.0]])) store_batch.write_output_vars(0, -1, model=model_batch1) store_batch.write_output_vars(1, -1, model=model_batch2) np.testing.assert_array_equal(ztest.add__offset[:], np.array([2.0, 3.0])) # test default chunk size along batch dim assert ztest.profile__u.chunks[0] == 1 def test_write_index_vars(self, store): store.model.state[("init_profile", "x")] = np.array([1.0, 2.0, 3.0]) store.write_index_vars() ztest = zarr.open_group(store.zgroup.store, mode="r") np.testing.assert_array_equal(ztest.x, np.array([1.0, 2.0, 3.0])) def test_write_index_vars_batch(self, store_batch, model_batch1): # ensure that no batch dim is created model_batch1.state[("init_profile", "x")] = np.array([1.0, 2.0, 3.0]) store_batch.write_index_vars(model=model_batch1) ztest = zarr.open_group(store_batch.zgroup.store, mode="r") np.testing.assert_array_equal(ztest.x, np.array([1.0, 2.0, 3.0])) def test_write_global_vars(self): # ensure that variable metadata (dims, etc.) is properly accessed for global references @xs.process class Foo: var = xs.variable(dims="x", global_name="global_var", intent="out") @xs.process class Bar: var = xs.global_ref("global_var") model = xs.Model({"foo": Foo, "bar": Bar}) in_ds = xs.create_setup( model=model, clocks={"clock": [0, 1]}, output_vars={"bar__var": None}, ) store = ZarrSimulationStore(in_ds, model) model.state[("foo", "var")] = np.array([1, 2, 3]) store.write_output_vars(-1, -1) ztest = zarr.open_group(store.zgroup.store, mode="r") np.testing.assert_array_equal(ztest.bar__var, np.array([1, 2, 3])) def test_resize_zarr_dataset(self): @xs.process class P: arr = xs.variable(dims="x", intent="out") model = xs.Model({"p": P}) in_ds = xs.create_setup( model=model, clocks={"clock": [0, 1, 2]}, output_vars={"p__arr": "clock"}, ) store = ZarrSimulationStore(in_ds, model) for step, size in zip([0, 1, 2], [1, 3, 2]): model.state[("p", "arr")] = np.ones(size) store.write_output_vars(-1, step) ztest = zarr.open_group(store.zgroup.store, mode="r") expected = np.array([[1.0, np.nan, np.nan], [1.0, 1.0, 1.0], [1.0, 1.0, np.nan]]) np.testing.assert_array_equal(ztest.p__arr, expected) def test_encoding(self): @xs.process class P: v1 = xs.variable(dims="x", intent="out", encoding={"dtype": np.int32}) v2 = xs.on_demand(dims="x", encoding={"fill_value": 0}) v3 = xs.index(dims="x") v4 = xs.variable( dims="x", intent="out", encoding={ "dtype": object, "object_codec": zarr.codecs.Pickle() }, ) @v2.compute def _get_v2(self): return [0] model = xs.Model({"p": P}) in_ds = xs.create_setup( model=model, clocks={"clock": [0]}, output_vars={ "p__v1": None, "p__v2": None, "p__v3": None, "p__v4": None }, ) store = ZarrSimulationStore( in_ds, model, encoding={ "p__v2": { "fill_value": -1 }, "p__v3": { "chunks": (10, ) } }, ) model.state[("p", "v1")] = [0] model.state[("p", "v3")] = [0] model.state[("p", "v4")] = [{"foo": "bar"}] store.write_output_vars(-1, -1) ztest = zarr.open_group(store.zgroup.store, mode="r") assert ztest.p__v1.dtype == np.int32 # test encoding precedence ZarrSimulationStore > model variable assert ztest.p__v2.fill_value == -1 assert ztest.p__v3.chunks == (10, ) assert ztest.p__v4[0] == {"foo": "bar"} def test_fill_values(self): @xs.process class Foo: v_int64 = xs.variable(dims="x", intent="out") v_float64 = xs.variable(dims="x", intent="out") v_uint8 = xs.variable(dims="x", intent="out", encoding={"dtype": np.uint8}) v_string = xs.variable(dims="x", intent="out") v_bool = xs.variable(dims="x", intent="out") def initialize(self): self.v_int64 = [0, np.iinfo("int64").max] self.v_float64 = [0.0, np.nan] self.v_uint8 = [0, 255] self.v_string = ["hello", ""] self.v_bool = [True, False] model = xs.Model({"foo": Foo}) in_ds = xs.create_setup( model=model, clocks={"clock": [0, 1]}, output_vars={ "foo__v_int64": None, "foo__v_float64": None, "foo__v_uint8": None, "foo__v_string": None, "foo__v_bool": None, }, ) out_ds = in_ds.xsimlab.run(model=model) np.testing.assert_equal(out_ds["foo__v_int64"].data, [0, np.nan]) np.testing.assert_equal(out_ds["foo__v_float64"].data, [0.0, np.nan]) np.testing.assert_equal(out_ds["foo__v_uint8"].data, [0, np.nan]) # np.testing.assert_equal does not work for "object" dtypes, so test each value explicitly: assert out_ds["foo__v_string"].data[0] == "hello" assert np.isnan(out_ds["foo__v_string"].data[1]) assert out_ds["foo__v_bool"].data[0] == True assert np.isnan(out_ds["foo__v_bool"].data[1]) def test_open_as_xr_dataset(self, store): model = store.model model.state[("profile", "u")] = np.array([1.0, 2.0, 3.0]) model.state[("roll", "u_diff")] = np.array([-1.0, 1.0, 0.0]) model.state[("add", "offset")] = 2.0 store.write_output_vars(-1, 0) store.write_output_vars(-1, -1) ds = store.open_as_xr_dataset() if store.in_memory: assert ds.profile__u.chunks is None else: assert ds.profile__u.chunks is not None # test scalars still loaded in memory assert isinstance(ds.variables["add__offset"]._data, np.ndarray)
class TestZarrSimulationStore: @pytest.mark.parametrize( "zobj", [None, "dir", zarr.MemoryStore(), zarr.group()]) def test_constructor(self, in_ds, model, zobj, tmpdir): if zobj == "dir": zobj = str(tmpdir) store = ZarrSimulationStore(in_ds, model, zobject=zobj) assert store.zgroup.store is not None assert store.batch_size == -1 if zobj is None: assert store.in_memory is True def test_constructor_batch(self, store_batch): assert store_batch.batch_size == 2 def test_constructor_conflict(self, in_ds, model): zgroup = zarr.group() zgroup.create_dataset("profile__u", shape=(1, 1)) with pytest.raises(ValueError, match=r".*already contains.*"): ZarrSimulationStore(in_ds, model, zobject=zgroup) def test_write_input_xr_dataset(self, in_ds, store): store.write_input_xr_dataset() ds = xr.open_zarr(store.zgroup.store, chunks=None) # output variables removed del in_ds["add__offset"] xr.testing.assert_equal(ds, in_ds) # check output variables attrs removed before saving input dataset assert not ds.xsimlab.output_vars def test_write_output_vars(self, in_ds, store): model = store.model model.state[("profile", "u")] = np.array([1.0, 2.0, 3.0]) model.state[("roll", "u_diff")] = np.array([-1.0, 1.0, 0.0]) model.state[("add", "offset")] = 2.0 store.write_output_vars(-1, 0) ztest = zarr.open_group(store.zgroup.store, mode="r") assert ztest.profile__u.shape == (in_ds.clock.size, 3) np.testing.assert_array_equal(ztest.profile__u[0], np.array([1.0, 2.0, 3.0])) assert ztest.roll__u_diff.shape == (in_ds.out.size, 3) np.testing.assert_array_equal(ztest.roll__u_diff[0], np.array([-1.0, 1.0, 0.0])) assert ztest.add__u_diff.shape == (in_ds.out.size, ) np.testing.assert_array_equal(ztest.add__u_diff, np.array([2.0, np.nan, np.nan])) # test save master clock but not out clock store.write_output_vars(-1, 1) np.testing.assert_array_equal(ztest.profile__u[1], np.array([1.0, 2.0, 3.0])) np.testing.assert_array_equal(ztest.roll__u_diff[1], np.array([np.nan, np.nan, np.nan])) # test save no-clock outputs store.write_output_vars(-1, -1) np.testing.assert_array_equal(ztest.profile__u_opp, np.array([-1.0, -2.0, -3.0])) assert ztest.add__offset[()] == 2.0 def test_write_output_vars_error(self, store): model = store.model model.state[("profile", "u")] = np.array([[1.0, 2.0, 3.0], [4.0, 5.0, 6.0]]) model.state[("roll", "u_diff")] = np.array([-1.0, 1.0, 0.0]) model.state[("add", "offset")] = 2.0 with pytest.raises(ValueError, match=r".*accepted dimension.*"): store.write_output_vars(-1, 0) def test_write_output_vars_batch(self, store_batch, model_batch1, model_batch2): model_batch1.state[("profile", "u")] = np.array([1.0, 2.0, 3.0]) model_batch2.state[("profile", "u")] = np.array([4.0, 5.0, 6.0]) model_batch1.state[("roll", "u_diff")] = np.array([-1.0, 1.0, 0.0]) model_batch2.state[("roll", "u_diff")] = np.array([0.0, 1.0, -1.0]) model_batch1.state[("add", "offset")] = 2.0 model_batch2.state[("add", "offset")] = 3.0 store_batch.write_output_vars(0, 0, model=model_batch1) store_batch.write_output_vars(1, 0, model=model_batch2) ztest = zarr.open_group(store_batch.zgroup.store, mode="r") assert ztest.profile__u.ndim == 3 np.testing.assert_array_equal( ztest.profile__u[:, 0, :], np.array([[1.0, 2.0, 3.0], [4.0, 5.0, 6.0]])) store_batch.write_output_vars(0, -1, model=model_batch1) store_batch.write_output_vars(1, -1, model=model_batch2) np.testing.assert_array_equal(ztest.add__offset[:], np.array([2.0, 3.0])) # test default chunk size along batch dim assert ztest.profile__u.chunks[0] == 1 def test_write_index_vars(self, store): store.model.state[("init_profile", "x")] = np.array([1.0, 2.0, 3.0]) store.write_index_vars() ztest = zarr.open_group(store.zgroup.store, mode="r") np.testing.assert_array_equal(ztest.x, np.array([1.0, 2.0, 3.0])) def test_write_index_vars_batch(self, store_batch, model_batch1): # ensure that no batch dim is created model_batch1.state[("init_profile", "x")] = np.array([1.0, 2.0, 3.0]) store_batch.write_index_vars(model=model_batch1) ztest = zarr.open_group(store_batch.zgroup.store, mode="r") np.testing.assert_array_equal(ztest.x, np.array([1.0, 2.0, 3.0])) def test_resize_zarr_dataset(self): @xs.process class P: arr = xs.variable(dims="x", intent="out") model = xs.Model({"p": P}) in_ds = xs.create_setup( model=model, clocks={"clock": [0, 1, 2]}, output_vars={"p__arr": "clock"}, ) store = ZarrSimulationStore(in_ds, model) for step, size in zip([0, 1, 2], [1, 3, 2]): model.state[("p", "arr")] = np.ones(size) store.write_output_vars(-1, step) ztest = zarr.open_group(store.zgroup.store, mode="r") expected = np.array([[1.0, np.nan, np.nan], [1.0, 1.0, 1.0], [1.0, 1.0, np.nan]]) np.testing.assert_array_equal(ztest.p__arr, expected) def test_encoding(self): @xs.process class P: v1 = xs.variable(dims="x", intent="out", encoding={"dtype": np.int32}) v2 = xs.on_demand(dims="x", encoding={"fill_value": 0}) v3 = xs.index(dims="x") @v2.compute def _get_v2(self): return [0] model = xs.Model({"p": P}) in_ds = xs.create_setup( model=model, clocks={"clock": [0]}, output_vars={ "p__v1": None, "p__v2": None, "p__v3": None }, ) store = ZarrSimulationStore( in_ds, model, encoding={ "p__v2": { "fill_value": -1 }, "p__v3": { "chunks": (10, ) } }, ) model.state[("p", "v1")] = [0] model.state[("p", "v3")] = [0] store.write_output_vars(-1, -1) ztest = zarr.open_group(store.zgroup.store, mode="r") assert ztest.p__v1.dtype == np.int32 # test encoding precedence ZarrSimulationStore > model variable assert ztest.p__v2.fill_value == -1 assert ztest.p__v3.chunks == (10, ) def test_open_as_xr_dataset(self, store): model = store.model model.state[("profile", "u")] = np.array([1.0, 2.0, 3.0]) model.state[("roll", "u_diff")] = np.array([-1.0, 1.0, 0.0]) model.state[("add", "offset")] = 2.0 store.write_output_vars(-1, 0) store.write_output_vars(-1, -1) ds = store.open_as_xr_dataset() if store.in_memory: assert ds.profile__u.chunks is None else: assert ds.profile__u.chunks is not None # test scalars still loaded in memory assert isinstance(ds.variables["add__offset"]._data, np.ndarray)