def _load_metadata_nosync(self): try: mkey = self._key_prefix + array_meta_key meta_bytes = self._store[mkey] except KeyError: err_array_not_found(self._path) else: # decode and store metadata meta = decode_array_metadata(meta_bytes) self._meta = meta self._shape = meta['shape'] self._chunks = meta['chunks'] self._dtype = meta['dtype'] self._fill_value = meta['fill_value'] self._order = meta['order'] # setup compressor config = meta['compressor'] if config is None: self._compressor = None else: self._compressor = get_codec(config) # setup filters filters = meta['filters'] if filters: filters = [get_codec(config) for config in filters] self._filters = filters
def open_array(store, mode='a', shape=None, chunks=True, dtype=None, compressor='default', fill_value=0, order='C', synchronizer=None, filters=None, cache_metadata=True, cache_attrs=True, path=None, object_codec=None, **kwargs): """Open an array using file-mode-like semantics. Parameters ---------- store : MutableMapping or string Store or path to directory in file system or name of zip file. mode : {'r', 'r+', 'a', 'w', 'w-'}, optional Persistence mode: 'r' means read only (must exist); 'r+' means read/write (must exist); 'a' means read/write (create if doesn't exist); 'w' means create (overwrite if exists); 'w-' means create (fail if exists). shape : int or tuple of ints, optional Array shape. chunks : int or tuple of ints, optional Chunk shape. If True, will be guessed from `shape` and `dtype`. If False, will be set to `shape`, i.e., single chunk for the whole array. dtype : string or dtype, optional NumPy dtype. compressor : Codec, optional Primary compressor. fill_value : object, optional Default value to use for uninitialized portions of the array. order : {'C', 'F'}, optional Memory layout to be used within each chunk. synchronizer : object, optional Array synchronizer. filters : sequence, optional Sequence of filters to use to encode chunk data prior to compression. cache_metadata : bool, optional If True, array configuration metadata will be cached for the lifetime of the object. If False, array metadata will be reloaded prior to all data access and modification operations (may incur overhead depending on storage and data access pattern). cache_attrs : bool, optional If True (default), user attributes will be cached for attribute read operations. If False, user attributes are reloaded from the store prior to all attribute read operations. path : string, optional Array path within store. object_codec : Codec, optional A codec to encode object arrays, only needed if dtype=object. Returns ------- z : zarr.core.Array Examples -------- >>> import numpy as np >>> import zarr >>> z1 = zarr.open_array('data/example.zarr', mode='w', shape=(10000, 10000), ... chunks=(1000, 1000), fill_value=0) >>> z1[:] = np.arange(100000000).reshape(10000, 10000) >>> z1 <zarr.core.Array (10000, 10000) float64> >>> z2 = zarr.open_array('data/example.zarr', mode='r') >>> z2 <zarr.core.Array (10000, 10000) float64 read-only> >>> np.all(z1[:] == z2[:]) True Notes ----- There is no need to close an array. Data are automatically flushed to the file system. """ # use same mode semantics as h5py # r : read only, must exist # r+ : read/write, must exist # w : create, delete if exists # w- or x : create, fail if exists # a : read/write if exists, create otherwise (default) # handle polymorphic store arg store = normalize_store_arg(store, clobber=(mode == 'w')) path = normalize_storage_path(path) # API compatibility with h5py compressor, fill_value = _kwargs_compat(compressor, fill_value, kwargs) # ensure fill_value of correct type if fill_value is not None: fill_value = np.array(fill_value, dtype=dtype)[()] # ensure store is initialized if mode in ['r', 'r+']: if contains_group(store, path=path): err_contains_group(path) elif not contains_array(store, path=path): err_array_not_found(path) elif mode == 'w': init_array(store, shape=shape, chunks=chunks, dtype=dtype, compressor=compressor, fill_value=fill_value, order=order, filters=filters, overwrite=True, path=path, object_codec=object_codec) elif mode == 'a': if contains_group(store, path=path): err_contains_group(path) elif not contains_array(store, path=path): init_array(store, shape=shape, chunks=chunks, dtype=dtype, compressor=compressor, fill_value=fill_value, order=order, filters=filters, path=path, object_codec=object_codec) elif mode in ['w-', 'x']: if contains_group(store, path=path): err_contains_group(path) elif contains_array(store, path=path): err_contains_array(path) else: init_array(store, shape=shape, chunks=chunks, dtype=dtype, compressor=compressor, fill_value=fill_value, order=order, filters=filters, path=path, object_codec=object_codec) # determine read only status read_only = mode == 'r' # instantiate array z = Array(store, read_only=read_only, synchronizer=synchronizer, cache_metadata=cache_metadata, cache_attrs=cache_attrs, path=path) return z
def open_array(store=None, mode='a', shape=None, chunks=None, dtype=None, compressor='default', fill_value=0, order='C', synchronizer=None, filters=None, cache_metadata=True, path=None, **kwargs): """Open array using mode-like semantics. Parameters ---------- store : MutableMapping or string Store or path to directory in file system. mode : {'r', 'r+', 'a', 'w', 'w-'} Persistence mode: 'r' means read only (must exist); 'r+' means read/write (must exist); 'a' means read/write (create if doesn't exist); 'w' means create (overwrite if exists); 'w-' means create (fail if exists). shape : int or tuple of ints Array shape. chunks : int or tuple of ints, optional Chunk shape. If not provided, will be guessed from `shape` and `dtype`. dtype : string or dtype, optional NumPy dtype. compressor : Codec, optional Primary compressor. fill_value : object Default value to use for uninitialized portions of the array. order : {'C', 'F'}, optional Memory layout to be used within each chunk. synchronizer : object, optional Array synchronizer. filters : sequence, optional Sequence of filters to use to encode chunk data prior to compression. cache_metadata : bool, optional If True, array configuration metadata will be cached for the lifetime of the object. If False, array metadata will be reloaded prior to all data access and modification operations (may incur overhead depending on storage and data access pattern). path : string, optional Array path. Returns ------- z : zarr.core.Array Examples -------- >>> import numpy as np >>> import zarr >>> z1 = zarr.open_array('example.zarr', mode='w', shape=(10000, 10000), ... chunks=(1000, 1000), fill_value=0) >>> z1[:] = np.arange(100000000).reshape(10000, 10000) >>> z1 Array((10000, 10000), float64, chunks=(1000, 1000), order=C) nbytes: 762.9M; nbytes_stored: 23.0M; ratio: 33.2; initialized: 100/100 compressor: Blosc(cname='lz4', clevel=5, shuffle=1) store: DirectoryStore >>> z2 = zarr.open_array('example.zarr', mode='r') >>> z2 Array((10000, 10000), float64, chunks=(1000, 1000), order=C) nbytes: 762.9M; nbytes_stored: 23.0M; ratio: 33.2; initialized: 100/100 compressor: Blosc(cname='lz4', clevel=5, shuffle=1) store: DirectoryStore >>> np.all(z1[:] == z2[:]) True Notes ----- There is no need to close an array. Data are automatically flushed to the file system. """ # flake8: noqa # use same mode semantics as h5py # r : read only, must exist # r+ : read/write, must exist # w : create, delete if exists # w- or x : create, fail if exists # a : read/write if exists, create otherwise (default) # handle polymorphic store arg store = _handle_store_arg(store) path = normalize_storage_path(path) # compatibility compressor, fill_value = _handle_kwargs(compressor, fill_value, kwargs) # ensure store is initialized if mode in ['r', 'r+']: if contains_group(store, path=path): err_contains_group(path) elif not contains_array(store, path=path): err_array_not_found(path) elif mode == 'w': init_array(store, shape=shape, chunks=chunks, dtype=dtype, compressor=compressor, fill_value=fill_value, order=order, filters=filters, overwrite=True, path=path) elif mode == 'a': if contains_group(store, path=path): err_contains_group(path) elif not contains_array(store, path=path): init_array(store, shape=shape, chunks=chunks, dtype=dtype, compressor=compressor, fill_value=fill_value, order=order, filters=filters, path=path) elif mode in ['w-', 'x']: if contains_group(store, path=path): err_contains_group(path) elif contains_array(store, path=path): err_contains_array(path) else: init_array(store, shape=shape, chunks=chunks, dtype=dtype, compressor=compressor, fill_value=fill_value, order=order, filters=filters, path=path) # determine read only status read_only = mode == 'r' # instantiate array z = Array(store, read_only=read_only, synchronizer=synchronizer, cache_metadata=cache_metadata, path=path) return z