def _require_dataset_nosync(self, name, shape, dtype=None, exact=False, **kwargs): path = self._item_path(name) if contains_array(self._store, path): synchronizer = kwargs.get('synchronizer', self._synchronizer) cache_metadata = kwargs.get('cache_metadata', True) a = Array(self._store, path=path, read_only=self._read_only, chunk_store=self._chunk_store, synchronizer=synchronizer, cache_metadata=cache_metadata) shape = normalize_shape(shape) if shape != a.shape: raise TypeError('shapes do not match') dtype = np.dtype(dtype) if exact: if dtype != a.dtype: raise TypeError('dtypes do not match exactly') else: if not np.can_cast(dtype, a.dtype): raise TypeError('dtypes cannot be safely cast') return a else: return self._create_dataset_nosync(name, shape=shape, dtype=dtype, **kwargs)
def _require_dataset_nosync(self, name, shape, dtype=None, exact=False, **kwargs): path = self._item_path(name) if contains_array(self._store, path): # array already exists at path, validate that it is the right shape and type synchronizer = kwargs.get('synchronizer', self._synchronizer) cache_metadata = kwargs.get('cache_metadata', True) cache_attrs = kwargs.get('cache_attrs', self.attrs.cache) a = Array(self._store, path=path, read_only=self._read_only, chunk_store=self._chunk_store, synchronizer=synchronizer, cache_metadata=cache_metadata, cache_attrs=cache_attrs) shape = normalize_shape(shape) if shape != a.shape: raise TypeError('shape do not match existing array; expected {}, got {}' .format(a.shape, shape)) dtype = np.dtype(dtype) if exact: if dtype != a.dtype: raise TypeError('dtypes do not match exactly; expected {}, got {}' .format(a.dtype, dtype)) else: if not np.can_cast(dtype, a.dtype): raise TypeError('dtypes ({}, {}) cannot be safely cast' .format(dtype, a.dtype)) return a else: return self._create_dataset_nosync(name, shape=shape, dtype=dtype, **kwargs)
def extract_zarray(da, encoding, dtype): """ helper function to extract zarr array metadata. """ meta = { "compressor": encoding.get("compressor", da.encoding.get("compressor", default_compressor)), "filters": encoding.get("filters", da.encoding.get("filters", None)), "chunks": encoding.get("chunks", None), "dtype": dtype.str, "fill_value": _extract_fill_value(da, dtype), "order": "C", "shape": list(normalize_shape(da.shape)), "zarr_format": zarr_format, } if meta["chunks"] is None: if da.chunks is not None: meta["chunks"] = list([c[0] for c in da.chunks]) else: meta["chunks"] = list(da.shape) return meta
def test_normalize_shape(): eq((100, ), normalize_shape((100, ))) eq((100, ), normalize_shape([100])) eq((100, ), normalize_shape(100)) with assert_raises(TypeError): normalize_shape(None) with assert_raises(ValueError): normalize_shape('foo')
def test_normalize_shape(): assert (100, ) == normalize_shape((100, )) assert (100, ) == normalize_shape([100]) assert (100, ) == normalize_shape(100) with pytest.raises(TypeError): normalize_shape(None) with pytest.raises(ValueError): normalize_shape('foo')
def test_normalize_shape(): eq((100,), normalize_shape((100,))) eq((100,), normalize_shape([100])) eq((100,), normalize_shape(100)) with assert_raises(TypeError): normalize_shape(None) with assert_raises(ValueError): normalize_shape('foo')
def _init_array_metadata(store, shape, chunks=None, dtype=None, compressor='default', fill_value=None, order='C', overwrite=False, path=None, chunk_store=None, filters=None): # guard conditions if overwrite: # attempt to delete any pre-existing items in store rmdir(store, path) if chunk_store is not None and chunk_store != store: rmdir(chunk_store, path) elif contains_array(store, path): err_contains_array(path) elif contains_group(store, path): err_contains_group(path) # normalize metadata shape = normalize_shape(shape) dtype = np.dtype(dtype) chunks = normalize_chunks(chunks, shape, dtype.itemsize) order = normalize_order(order) # obtain compressor config if compressor == 'none': # compatibility compressor = None elif compressor == 'default': compressor = default_compressor if compressor: try: compressor_config = compressor.get_config() except AttributeError: err_bad_compressor(compressor) else: compressor_config = None # obtain filters config if filters: filters_config = [f.get_config() for f in filters] else: filters_config = None # initialize metadata meta = dict(shape=shape, chunks=chunks, dtype=dtype, compressor=compressor_config, fill_value=fill_value, order=order, filters=filters_config) key = _path_to_prefix(path) + array_meta_key store[key] = encode_array_metadata(meta) # initialize attributes key = _path_to_prefix(path) + attrs_key store[key] = json.dumps(dict()).encode('ascii')
def _extract_zarray(da, encoding, dtype): """ helper function to extract zarr array metadata. """ meta = { 'compressor': encoding.get('compressor', da.encoding.get('compressor', default_compressor)), 'filters': encoding.get('filters', da.encoding.get('filters', None)), 'chunks': encoding.get('chunks', None), 'dtype': dtype.str, 'fill_value': _extract_fill_value(da, dtype), 'order': 'C', 'shape': list(normalize_shape(da.shape)), 'zarr_format': zarr_format, } if meta['chunks'] is None: meta['chunks'] = da.shape # validate chunks if isinstance(da.data, dask_array_type): var_chunks = tuple([c[0] for c in da.data.chunks]) else: var_chunks = da.shape if not var_chunks == tuple(meta['chunks']): raise ValueError('Encoding chunks do not match inferred chunks') meta['chunks'] = list(meta['chunks']) # return chunks as a list return meta
def view(self, shape=None, chunks=None, dtype=None, fill_value=None, filters=None, read_only=None, synchronizer=None): """Return an array sharing the same data. Parameters ---------- shape : int or tuple of ints Array shape. chunks : int or tuple of ints, optional Chunk shape. dtype : string or dtype, optional NumPy dtype. fill_value : object Default value to use for uninitialized portions of the array. filters : sequence, optional Sequence of filters to use to encode chunk data prior to compression. read_only : bool, optional True if array should be protected against modification. synchronizer : object, optional Array synchronizer. Notes ----- WARNING: This is an experimental feature and should be used with care. There are plenty of ways to generate errors and/or cause data corruption. Examples -------- Bypass filters: >>> import zarr >>> import numpy as np >>> np.random.seed(42) >>> labels = [b'female', b'male'] >>> data = np.random.choice(labels, size=10000) >>> filters = [zarr.Categorize(labels=labels, ... dtype=data.dtype, ... astype='u1')] >>> a = zarr.array(data, chunks=1000, filters=filters) >>> a[:] array([b'female', b'male', b'female', ..., b'male', b'male', b'female'], dtype='|S6') >>> v = a.view(dtype='u1', filters=[]) >>> v.is_view True >>> v[:] array([1, 2, 1, ..., 2, 2, 1], dtype=uint8) Views can be used to modify data: >>> x = v[:] >>> x.sort() >>> v[:] = x >>> v[:] array([1, 1, 1, ..., 2, 2, 2], dtype=uint8) >>> a[:] array([b'female', b'female', b'female', ..., b'male', b'male', b'male'], dtype='|S6') View as a different dtype with the same itemsize: >>> data = np.random.randint(0, 2, size=10000, dtype='u1') >>> a = zarr.array(data, chunks=1000) >>> a[:] array([0, 0, 1, ..., 1, 0, 0], dtype=uint8) >>> v = a.view(dtype=bool) >>> v[:] array([False, False, True, ..., True, False, False], dtype=bool) >>> np.all(a[:].view(dtype=bool) == v[:]) True An array can be viewed with a dtype with a different itemsize, however some care is needed to adjust the shape and chunk shape so that chunk data is interpreted correctly: >>> data = np.arange(10000, dtype='u2') >>> a = zarr.array(data, chunks=1000) >>> a[:10] array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9], dtype=uint16) >>> v = a.view(dtype='u1', shape=20000, chunks=2000) >>> v[:10] array([0, 0, 1, 0, 2, 0, 3, 0, 4, 0], dtype=uint8) >>> np.all(a[:].view('u1') == v[:]) True Change fill value for uninitialized chunks: >>> a = zarr.full(10000, chunks=1000, fill_value=-1, dtype='i1') >>> a[:] array([-1, -1, -1, ..., -1, -1, -1], dtype=int8) >>> v = a.view(fill_value=42) >>> v[:] array([42, 42, 42, ..., 42, 42, 42], dtype=int8) Note that resizing or appending to views is not permitted: >>> a = zarr.empty(10000) >>> v = a.view() >>> try: ... v.resize(20000) ... except PermissionError as e: ... print(e) not permitted for views """ # flake8: noqa store = self._store chunk_store = self._chunk_store path = self._path if read_only is None: read_only = self._read_only if synchronizer is None: synchronizer = self._synchronizer a = Array(store=store, path=path, chunk_store=chunk_store, read_only=read_only, synchronizer=synchronizer, cache_metadata=True) a._is_view = True # allow override of some properties if dtype is None: dtype = self._dtype else: dtype = np.dtype(dtype) a._dtype = dtype if shape is None: shape = self._shape else: shape = normalize_shape(shape) a._shape = shape if chunks is not None: chunks = normalize_chunks(chunks, shape, dtype.itemsize) a._chunks = chunks if fill_value is not None: a._fill_value = fill_value if filters is not None: a._filters = filters return a
def _init_array_metadata(store, shape, chunks=None, dtype=None, compressor='default', fill_value=None, order='C', overwrite=False, path=None, chunk_store=None, filters=None): # guard conditions if overwrite: # attempt to delete any pre-existing items in store rmdir(store, path) if chunk_store is not None: rmdir(chunk_store, path) elif contains_array(store, path): err_contains_array(path) elif contains_group(store, path): err_contains_group(path) # normalize metadata shape = normalize_shape(shape) dtype = np.dtype(dtype) if dtype.kind in 'mM': raise ValueError( 'datetime64 and timedelta64 dtypes are not currently supported; ' 'please store the data using int64 instead') chunks = normalize_chunks(chunks, shape, dtype.itemsize) order = normalize_order(order) fill_value = normalize_fill_value(fill_value, dtype) # compressor prep if shape == (): # no point in compressing a 0-dimensional array, only a single value compressor = None elif compressor == 'none': # compatibility compressor = None elif compressor == 'default': compressor = default_compressor # obtain compressor config compressor_config = None if compressor: try: compressor_config = compressor.get_config() except AttributeError: err_bad_compressor(compressor) # obtain filters config if filters: filters_config = [f.get_config() for f in filters] else: filters_config = None # initialize metadata meta = dict(shape=shape, chunks=chunks, dtype=dtype, compressor=compressor_config, fill_value=fill_value, order=order, filters=filters_config) key = _path_to_prefix(path) + array_meta_key store[key] = encode_array_metadata(meta) # initialize attributes key = _path_to_prefix(path) + attrs_key store[key] = json.dumps(dict()).encode('ascii')