def write_zarr( dataset: xr.Dataset, store: Union[MutableMapping, str, pathlib.Path], preset: int = 9, dist: int = 4, mode: str = 'w', consolidated: bool = True) -> xr.backends.ZarrStore: lzma_filters = [ dict(id=lzma.FILTER_DELTA, dist=dist), dict(id=lzma.FILTER_LZMA2, preset=preset)] compressor = numcodecs.LZMA(filters=lzma_filters, format=lzma.FORMAT_RAW) var_name = get_variable_name(dataset) encoding = {var_name: {'compressor': compressor}} return dataset.to_zarr( store, mode=mode, consolidated=consolidated, encoding=encoding)
def write_zarr_to_s3(dataset, dest_bucket, s3): zarr_path, base_zarr_filename = get_zarr_path_and_filename(dataset) zarr_path = os.path.join(dest_bucket, zarr_path) full_zarr_filename = os.path.join(zarr_path, base_zarr_filename) if s3.exists(full_zarr_filename): raise FileExistsError( 'Destination already exists: {}'.format(full_zarr_filename)) s3.makedirs(path=zarr_path) store = s3fs.S3Map(root=full_zarr_filename, s3=s3, check=False, create=True) lzma_filters = [ dict(id=lzma.FILTER_DELTA, dist=4), dict(id=lzma.FILTER_LZMA2, preset=9) ] compressor = numcodecs.LZMA(filters=lzma_filters, format=lzma.FORMAT_RAW) var_name = get_variable_name(dataset) encoding = {var_name: {'compressor': compressor}} dataset.to_zarr(store, mode='w', consolidated=True, encoding=encoding) return full_zarr_filename
import zarr def write_n5(path, shape, block_size, compressor): store = zarr.N5Store(path) data = np.arange(np.prod(shape), dtype=np.uint16) data = data.reshape(shape) data_transpose = data.transpose() z = zarr.zeros( data_transpose.shape, chunks=block_size[::-1], store=store, dtype=data.dtype, overwrite=True, compressor=compressor) z[...] = data_transpose write_n5(path='raw', shape=[5, 4], block_size=[3, 2], compressor=None) write_n5( path='gzip', shape=[5, 4], block_size=[3, 2], compressor=numcodecs.GZip()) write_n5( path='bzip2', shape=[5, 4], block_size=[3, 2], compressor=numcodecs.BZ2()) write_n5( path='xz', shape=[5, 4], block_size=[3, 2], compressor=numcodecs.LZMA(preset=4)) write_n5( path='blosc', shape=[5, 4], block_size=[3, 2], compressor=numcodecs.Blosc())
from hypothesis.extra.numpy import arrays, scalar_dtypes from binpickle.read import BinPickleFile, load from binpickle.write import BinPickler, dump from binpickle import codecs RW_CTORS = [BinPickler, BinPickler.mappable, BinPickler.compressed] RW_CODECS = [st.just(None), st.builds(codecs.GZ)] if codecs.Blosc.AVAILABLE: RW_CTORS.append(lambda f: BinPickler.compressed(f, codecs.Blosc('zstd', 5))) RW_CODECS.append(st.builds(codecs.Blosc)) RW_CODECS.append(st.builds(codecs.Blosc, st.just('zstd'))) if codecs.NC.AVAILABLE: import numcodecs RW_CTORS.append(lambda f: BinPickler.compressed(f, numcodecs.LZMA())) RW_CODECS.append(st.builds(codecs.NC, st.just(numcodecs.LZMA()))) # also build a chain test RW_CTORS.append(lambda f: BinPickler.compressed(f, codecs.Chain([numcodecs.MsgPack(), codecs.GZ()]))) RW_CONFIGS = it.product( RW_CTORS, [False, True] ) RW_PARAMS = ['writer', 'direct'] @pytest.fixture def rng(): return np.random.default_rng()