def write_multiscale(pyramid: List, group: zarr.Group) -> None: """Write a pyramid with multiscale metadata to disk.""" paths = [] for path, dataset in enumerate(pyramid): group.create_dataset(str(path), data=pyramid[path]) paths.append({"path": str(path)}) multiscales = [{"version": "0.1", "datasets": paths}] group.attrs["multiscales"] = multiscales
def write_array(self, group: zarr.Group, name: str, array: np.ndarray) -> None: if array.dtype.kind == 'V': self.write_dataframe(group, name, array) else: dtype = str if array.dtype.kind == 'O' else array.dtype group.create_dataset(name, data=array, shape=array.shape, chunks=calc_chunk(array.shape), dtype=dtype, compressor=COMPRESSOR, overwrite=True)
def write_multiscale( pyramid: List, group: zarr.Group, chunks: Union[Tuple[Any, ...], int] = None, ) -> None: """Write a pyramid with multiscale metadata to disk.""" paths = [] for path, dataset in enumerate(pyramid): # TODO: chunks here could be different per layer group.create_dataset(str(path), data=dataset, chunks=chunks) paths.append({"path": str(path)}) multiscales = [{"version": "0.2", "datasets": paths}] group.attrs["multiscales"] = multiscales
def create_zarr_obj_array(g: zarr.Group, name: str, data, dtype: Union[str, Any] = None, overwrite: bool = True) -> zarr.hierarchy: """ Creates and returns a Zarr object array. A Zarr object array can contain any type of object. https://zarr.readthedocs.io/en/stable/tutorial.html#object-arrays Args: g (zarr.hierarchy): name (str): data (): dtype (Union[str, Any]): overwrite (bool): Returns: A Zarr object Array. """ data = np.array(data) if dtype is None or dtype == object: dtype = 'U' + str(max([len(str(x)) for x in data])) if np.issubdtype(data.dtype, np.dtype('S')): data = data.astype('U') dtype = data.dtype return g.create_dataset(name, data=data, chunks=(100000, ), shape=len(data), dtype=dtype, overwrite=overwrite)
def create_zarr_dataset(g: zarr.Group, name: str, chunks: tuple, dtype: Any, shape: Tuple, overwrite: bool = True) -> zarr.hierarchy: """ Creates and returns a Zarr array. Args: g (zarr.hierarchy): name (str): chunks (tuple): dtype (Any): shape (Tuple): overwrite (bool): Returns: A Zarr Array. """ from numcodecs import Blosc compressor = Blosc(cname='lz4', clevel=5, shuffle=Blosc.BITSHUFFLE) return g.create_dataset(name, chunks=chunks, dtype=dtype, shape=shape, compressor=compressor, overwrite=overwrite)
def write_series(self, group: zarr.Group, name: str, array: np.ndarray, data_type: str) -> None: if data_type == 'data_frame': if not is_categorical_dtype( array) and name != '_index' and is_string_dtype(array): keywords = set(array) if len(keywords ) <= array.size / 10.0: # at least 10x reduction array = pd.Categorical(array, categories=natsorted(keywords)) if is_categorical_dtype(array): # write category keys categories = group.require_group('_categories') values = array.categories.values if isinstance(values[0], bytes): values = np.array([x.decode() for x in values], dtype=object) dtype = str if values.dtype.kind == 'O' else values.dtype categories.create_dataset(name, data=values, shape=values.shape, chunks=calc_chunk(values.shape), dtype=dtype, compressor=COMPRESSOR, overwrite=True) # write codes codes_arr = group.create_dataset(name, data=array.codes, shape=array.codes.shape, chunks=calc_chunk( array.codes.shape), dtype=array.codes.dtype, compressor=COMPRESSOR, overwrite=True) codes_arr.attrs['ordered'] = array.ordered return None dtype = str if array.dtype.kind == 'O' else array.dtype group.create_dataset(name, data=array, shape=array.shape, chunks=calc_chunk(array.shape), dtype=dtype, compressor=COMPRESSOR, overwrite=True)
def write_multiscale( pyramid: List, group: zarr.Group, chunks: Union[Tuple[Any, ...], int] = None, fmt: Format = CurrentFormat(), axes: Union[str, List[str]] = None, ) -> None: """ Write a pyramid with multiscale metadata to disk. Parameters ---------- pyramid: List of np.ndarray the image data to save. Largest level first All image arrays MUST be up to 5-dimensional with dimensions ordered (t, c, z, y, x) group: zarr.Group the group within the zarr store to store the data in chunks: int or tuple of ints, size of the saved chunks to store the image fmt: Format The format of the ome_zarr data which should be used. Defaults to the most current. axes: str or list of str the names of the axes. e.g. "tczyx". Not needed for v0.1 or v0.2 or for v0.3 if 2D or 5D. Otherwise this must be provided """ dims = len(pyramid[0].shape) axes = _validate_axes_names(dims, axes, fmt) paths = [] for path, dataset in enumerate(pyramid): # TODO: chunks here could be different per layer group.create_dataset(str(path), data=dataset, chunks=chunks) paths.append(str(path)) write_multiscales_metadata(group, paths, fmt, axes)
def create_zarr_obj_array( g: zarr.Group, name: str, data, dtype: Union[str, Any] = None, overwrite: bool = True, chunk_size: int = 100000, ) -> zarr.hierarchy: """ Creates and returns a Zarr object array. A Zarr object array can contain any type of object. https://zarr.readthedocs.io/en/stable/tutorial.html#object-arrays Args: g (zarr.hierarchy): name (str): data (): dtype (Union[str, Any]): overwrite (bool): chunk_size (int): Returns: A Zarr object Array. """ from numcodecs import Blosc compressor = Blosc(cname="lz4", clevel=5, shuffle=Blosc.BITSHUFFLE) data = np.array(data) if dtype is None or dtype == object: dtype = "U" + str(max([len(str(x)) for x in data])) if np.issubdtype(data.dtype, np.dtype("S")): data = data.astype("U") dtype = data.dtype if chunk_size is None or chunk_size is False: chunks = False else: chunks = (chunk_size, ) return g.create_dataset( name, data=data, chunks=chunks, shape=len(data), dtype=dtype, overwrite=overwrite, compressor=compressor, )