def test_group_key_completions(): g = group() d = dir(g) k = g._ipython_key_completions_() # none of these names should be an attribute assert 'foo' not in d assert 'bar' not in d assert 'baz' not in d assert 'qux' not in d assert 'xxx' not in d assert 'yyy' not in d assert 'zzz' not in d assert '123' not in d assert '456' not in d assert 'asdf;' not in d # none of these names should be an item assert 'foo' not in k assert 'bar' not in k assert 'baz' not in k assert 'qux' not in k assert 'xxx' not in k assert 'yyy' not in k assert 'zzz' not in k assert '123' not in k assert '456' not in k assert 'asdf;' not in k g.create_groups('foo', 'bar', 'baz/qux', '123') g.zeros('xxx', shape=100) g.zeros('yyy', shape=100) g.zeros('zzz', shape=100) g.zeros('456', shape=100) g.zeros('asdf;', shape=100) d = dir(g) k = g._ipython_key_completions_() assert 'foo' in d assert 'bar' in d assert 'baz' in d assert 'qux' not in d assert 'xxx' in d assert 'yyy' in d assert 'zzz' in d assert '123' not in d # not valid identifier assert '456' not in d # not valid identifier assert 'asdf;' not in d # not valid identifier assert 'foo' in k assert 'bar' in k assert 'baz' in k assert 'qux' not in k assert 'xxx' in k assert 'yyy' in k assert 'zzz' in k assert '123' in k assert '456' in k assert 'asdf;' in k
def test_group_completions(): g = group() d = dir(g) assert 'foo' not in d assert 'bar' not in d assert 'baz' not in d assert 'qux' not in d assert 'xxx' not in d assert 'yyy' not in d assert 'zzz' not in d assert '123' not in d assert '456' not in d g.create_groups('foo', 'bar', 'baz/qux', '123') g.zeros('xxx', shape=100) g.zeros('yyy', shape=100) g.zeros('zzz', shape=100) g.zeros('456', shape=100) d = dir(g) assert 'foo' in d assert 'bar' in d assert 'baz' in d assert 'qux' not in d assert 'xxx' in d assert 'yyy' in d assert 'zzz' in d assert '123' not in d # not valid identifier assert '456' not in d # not valid identifier
def source(self, request, tmpdir): def prep_source(source): foo = source.create_group('foo') foo.attrs['experiment'] = 'weird science' baz = foo.create_dataset('bar/baz', data=np.arange(100), chunks=(50, )) baz.attrs['units'] = 'metres' if request.param: extra_kws = dict(compression='gzip', compression_opts=3, fillvalue=84, shuffle=True, fletcher32=True) else: extra_kws = dict(compressor=Zlib(3), order='F', fill_value=42, filters=[Adler32()]) source.create_dataset('spam', data=np.arange(100, 200).reshape(20, 5), chunks=(10, 2), dtype='i2', **extra_kws) return source if request.param: h5py = pytest.importorskip('h5py') fn = tmpdir.join('source.h5') with h5py.File(str(fn), mode='w') as h5f: yield prep_source(h5f) else: yield prep_source(group())
def dest(self, request, tmpdir): if request.param: h5py = pytest.importorskip('h5py') fn = tmpdir.join('dest.h5') with h5py.File(str(fn), mode='w') as h5f: yield h5f else: yield group()
def test_group(): # test the group() convenience function # basic usage g = group() assert isinstance(g, Group) assert '' == g.path assert '/' == g.name # usage with custom store store = dict() g = group(store=store) assert isinstance(g, Group) assert store is g.store # overwrite behaviour store = dict() init_array(store, shape=100, chunks=10) with pytest.raises(ValueError): group(store) g = group(store, overwrite=True) assert isinstance(g, Group) assert store is g.store
def test_group(): # test the group() convenience function # basic usage g = group() assert_is_instance(g, Group) eq('', g.path) eq('/', g.name) # usage with custom store store = dict() g = group(store=store) assert_is_instance(g, Group) assert_is(store, g.store) # overwrite behaviour store = dict() init_array(store, shape=100, chunks=10) with assert_raises(KeyError): group(store) g = group(store, overwrite=True) assert_is_instance(g, Group) assert_is(store, g.store)
def dest(self, request, tmpdir): if request.param == 'hdf5': h5py = pytest.importorskip('h5py') fn = tmpdir.join('dest.h5') with h5py.File(str(fn), mode='w') as h5f: yield h5f elif request.param == 'zarr': yield group(path='group2', zarr_version=3) elif request.param == 'zarr_kvstore': store = KVStoreV3(dict()) yield group(store, path='group2', zarr_version=3) elif request.param == 'zarr_fsstore': fn = tmpdir.join('dest.zr3') store = FSStoreV3(str(fn), auto_mkdir=True) yield group(store, path='group2', zarr_version=3) elif request.param == 'zarr_directorystore': fn = tmpdir.join('dest.zr3') store = DirectoryStoreV3(str(fn)) yield group(store, path='group2', zarr_version=3) elif request.param == 'zarr_sqlitestore': fn = tmpdir.join('dest.db') store = SQLiteStoreV3(str(fn)) yield group(store, path='group2', zarr_version=3)
def test_group(): # test the group() convenience function # basic usage g = group() assert_is_instance(g, Group) eq('', g.path) eq('/', g.name) # usage with custom store store = dict() g = group(store=store) assert_is_instance(g, Group) assert_is(store, g.store) # overwrite behaviour store = dict() init_array(store, shape=100, chunks=10) with assert_raises(ValueError): group(store) g = group(store, overwrite=True) assert_is_instance(g, Group) assert_is(store, g.store)
def test_consolidated_with_chunk_store(): # setup initial data store = MemoryStore() chunk_store = MemoryStore() z = group(store, chunk_store=chunk_store) z.create_group('g1') g2 = z.create_group('g2') g2.attrs['hello'] = 'world' arr = g2.create_dataset('arr', shape=(20, 20), chunks=(5, 5), dtype='f8') assert 16 == arr.nchunks assert 0 == arr.nchunks_initialized arr.attrs['data'] = 1 arr[:] = 1.0 assert 16 == arr.nchunks_initialized # perform consolidation out = consolidate_metadata(store) assert isinstance(out, Group) assert '.zmetadata' in store for key in [ '.zgroup', 'g1/.zgroup', 'g2/.zgroup', 'g2/.zattrs', 'g2/arr/.zarray', 'g2/arr/.zattrs' ]: del store[key] # open consolidated z2 = open_consolidated(store, chunk_store=chunk_store) assert ['g1', 'g2'] == list(z2) assert 'world' == z2.g2.attrs['hello'] assert 1 == z2.g2.arr.attrs['data'] assert (z2.g2.arr[:] == 1.0).all() assert 16 == z2.g2.arr.nchunks assert 16 == z2.g2.arr.nchunks_initialized # test the data are writeable z2.g2.arr[:] = 2 assert (z2.g2.arr[:] == 2).all() # test invalid modes with pytest.raises(ValueError): open_consolidated(store, mode='a', chunk_store=chunk_store) with pytest.raises(ValueError): open_consolidated(store, mode='w', chunk_store=chunk_store) # make sure keyword arguments are passed through without error open_consolidated(store, cache_attrs=True, synchronizer=None, chunk_store=chunk_store)
def test_tree(): # setup g1 = group() g2 = g1.create_group('foo') g3 = g1.create_group('bar') g3.create_group('baz') g5 = g3.create_group('quux') g5.create_dataset('baz', shape=100, chunks=10) # test root group expect_bytes = textwrap.dedent(u"""\ / +-- bar | +-- baz | +-- quux | +-- baz (100,) float64 +-- foo""").encode() expect_text = textwrap.dedent(u"""\ / ├── bar │ ├── baz │ └── quux │ └── baz (100,) float64 └── foo""") _check_tree(g1, expect_bytes, expect_text) # test different group expect_bytes = textwrap.dedent(u"""\ foo""").encode() expect_text = textwrap.dedent(u"""\ foo""") _check_tree(g2, expect_bytes, expect_text) # test different group expect_bytes = textwrap.dedent(u"""\ bar +-- baz +-- quux +-- baz (100,) float64""").encode() expect_text = textwrap.dedent(u"""\ bar ├── baz └── quux └── baz (100,) float64""") _check_tree(g3, expect_bytes, expect_text)
def test_format_compatibility(): # This test is intended to catch any unintended changes that break the ability to # read data stored with a previous minor version (which should be format-compatible). # fixture data fixture = group(store=DirectoryStore('fixture')) # set seed to get consistent random data np.random.seed(42) arrays_chunks = [ (np.arange(1111, dtype='i1'), 100), (np.arange(1111, dtype='i2'), 100), (np.arange(1111, dtype='i4'), 100), (np.arange(1111, dtype='i8'), 1000), (np.random.randint(0, 200, size=2222, dtype='u1'), 100), (np.random.randint(0, 2000, size=2222, dtype='u2'), 100), (np.random.randint(0, 2000, size=2222, dtype='u4'), 100), (np.random.randint(0, 2000, size=2222, dtype='u8'), 100), (np.linspace(0, 1, 3333, dtype='f2'), 100), (np.linspace(0, 1, 3333, dtype='f4'), 100), (np.linspace(0, 1, 3333, dtype='f8'), 100), (np.random.normal(loc=0, scale=1, size=4444).astype('f2'), 100), (np.random.normal(loc=0, scale=1, size=4444).astype('f4'), 100), (np.random.normal(loc=0, scale=1, size=4444).astype('f8'), 100), (np.random.choice([b'A', b'C', b'G', b'T'], size=5555, replace=True).astype('S'), 100), (np.random.choice(['foo', 'bar', 'baz', 'quux'], size=5555, replace=True).astype('U'), 100), (np.random.choice([0, 1 / 3, 1 / 7, 1 / 9, np.nan], size=5555, replace=True).astype('f8'), 100), (np.random.randint(0, 2, size=5555, dtype=bool), 100), (np.arange(20000, dtype='i4').reshape(2000, 10, order='C'), (100, 3)), (np.arange(20000, dtype='i4').reshape(200, 100, order='F'), (100, 30)), (np.arange(20000, dtype='i4').reshape(200, 10, 10, order='C'), (100, 3, 3)), (np.arange(20000, dtype='i4').reshape(20, 100, 10, order='F'), (10, 30, 3)), (np.arange(20000, dtype='i4').reshape(20, 10, 10, 10, order='C'), (10, 3, 3, 3)), (np.arange(20000, dtype='i4').reshape(20, 10, 10, 10, order='F'), (10, 3, 3, 3)), ] compressors = [ None, Zlib(level=1), BZ2(level=1), Blosc(cname='zstd', clevel=1, shuffle=0), Blosc(cname='zstd', clevel=1, shuffle=1), Blosc(cname='zstd', clevel=1, shuffle=2), Blosc(cname='lz4', clevel=1, shuffle=0), ] for i, (arr, chunks) in enumerate(arrays_chunks): if arr.flags.f_contiguous: order = 'F' else: order = 'C' for j, compressor in enumerate(compressors): path = '{}/{}'.format(i, j) if path not in fixture: # pragma: no cover # store the data - should be one-time operation fixture.array(path, data=arr, chunks=chunks, order=order, compressor=compressor) # setup array z = fixture[path] # check contents if arr.dtype.kind == 'f': assert_array_almost_equal(arr, z[:]) else: assert_array_equal(arr, z[:]) # check dtype assert arr.dtype == z.dtype # check compressor if compressor is None: assert z.compressor is None else: assert compressor.codec_id == z.compressor.codec_id assert compressor.get_config() == z.compressor.get_config()
def test_format_compatibility(): # This test is intended to catch any unintended changes that break the ability to # read data stored with a previous minor version (which should be format-compatible). # fixture data fixture = group(store=DirectoryStore('fixture')) # set seed to get consistent random data np.random.seed(42) arrays_chunks = [ (np.arange(1111, dtype='i1'), 100), (np.arange(1111, dtype='i2'), 100), (np.arange(1111, dtype='i4'), 100), (np.arange(1111, dtype='i8'), 1000), (np.random.randint(0, 200, size=2222, dtype='u1'), 100), (np.random.randint(0, 2000, size=2222, dtype='u2'), 100), (np.random.randint(0, 2000, size=2222, dtype='u4'), 100), (np.random.randint(0, 2000, size=2222, dtype='u8'), 100), (np.linspace(0, 1, 3333, dtype='f2'), 100), (np.linspace(0, 1, 3333, dtype='f4'), 100), (np.linspace(0, 1, 3333, dtype='f8'), 100), (np.random.normal(loc=0, scale=1, size=4444).astype('f2'), 100), (np.random.normal(loc=0, scale=1, size=4444).astype('f4'), 100), (np.random.normal(loc=0, scale=1, size=4444).astype('f8'), 100), (np.random.choice([b'A', b'C', b'G', b'T'], size=5555, replace=True).astype('S'), 100), (np.random.choice(['foo', 'bar', 'baz', 'quux'], size=5555, replace=True).astype('U'), 100), (np.random.choice([0, 1/3, 1/7, 1/9, np.nan], size=5555, replace=True).astype('f8'), 100), (np.random.randint(0, 2, size=5555, dtype=bool), 100), (np.arange(20000, dtype='i4').reshape(2000, 10, order='C'), (100, 3)), (np.arange(20000, dtype='i4').reshape(200, 100, order='F'), (100, 30)), (np.arange(20000, dtype='i4').reshape(200, 10, 10, order='C'), (100, 3, 3)), (np.arange(20000, dtype='i4').reshape(20, 100, 10, order='F'), (10, 30, 3)), (np.arange(20000, dtype='i4').reshape(20, 10, 10, 10, order='C'), (10, 3, 3, 3)), (np.arange(20000, dtype='i4').reshape(20, 10, 10, 10, order='F'), (10, 3, 3, 3)), ] compressors = [ None, Zlib(level=1), BZ2(level=1), Blosc(cname='zstd', clevel=1, shuffle=0), Blosc(cname='zstd', clevel=1, shuffle=1), Blosc(cname='zstd', clevel=1, shuffle=2), Blosc(cname='lz4', clevel=1, shuffle=0), ] for i, (arr, chunks) in enumerate(arrays_chunks): if arr.flags.f_contiguous: order = 'F' else: order = 'C' for j, compressor in enumerate(compressors): path = '{}/{}'.format(i, j) if path not in fixture: # pragma: no cover # store the data - should be one-time operation fixture.array(path, data=arr, chunks=chunks, order=order, compressor=compressor) # setup array z = fixture[path] # check contents if arr.dtype.kind == 'f': assert_array_almost_equal(arr, z[:]) else: assert_array_equal(arr, z[:]) # check dtype eq(arr.dtype, z.dtype) # check compressor if compressor is None: assert_is_none(z.compressor) else: eq(compressor.codec_id, z.compressor.codec_id) eq(compressor.get_config(), z.compressor.get_config())
def test_group_key_completions(): g = group() d = dir(g) # noinspection PyProtectedMember k = g._ipython_key_completions_() # none of these names should be an attribute assert 'foo' not in d assert 'bar' not in d assert 'baz' not in d assert 'qux' not in d assert 'xxx' not in d assert 'yyy' not in d assert 'zzz' not in d assert '123' not in d assert '456' not in d assert 'asdf;' not in d # none of these names should be an item assert 'foo' not in k assert 'bar' not in k assert 'baz' not in k assert 'qux' not in k assert 'xxx' not in k assert 'yyy' not in k assert 'zzz' not in k assert '123' not in k assert '456' not in k assert 'asdf;' not in k g.create_groups('foo', 'bar', 'baz/qux', '123') g.zeros('xxx', shape=100) g.zeros('yyy', shape=100) g.zeros('zzz', shape=100) g.zeros('456', shape=100) g.zeros('asdf;', shape=100) d = dir(g) # noinspection PyProtectedMember k = g._ipython_key_completions_() assert 'foo' in d assert 'bar' in d assert 'baz' in d assert 'qux' not in d assert 'xxx' in d assert 'yyy' in d assert 'zzz' in d assert '123' not in d # not valid identifier assert '456' not in d # not valid identifier assert 'asdf;' not in d # not valid identifier assert 'foo' in k assert 'bar' in k assert 'baz' in k assert 'qux' not in k assert 'xxx' in k assert 'yyy' in k assert 'zzz' in k assert '123' in k assert '456' in k assert 'asdf;' in k
def test_consolidate_metadata(): # setup initial data store = MemoryStore() z = group(store) z.create_group('g1') g2 = z.create_group('g2') g2.attrs['hello'] = 'world' arr = g2.create_dataset('arr', shape=(20, 20), chunks=(5, 5), dtype='f8') assert 16 == arr.nchunks assert 0 == arr.nchunks_initialized arr.attrs['data'] = 1 arr[:] = 1.0 assert 16 == arr.nchunks_initialized # perform consolidation out = consolidate_metadata(store) assert isinstance(out, Group) assert '.zmetadata' in store for key in [ '.zgroup', 'g1/.zgroup', 'g2/.zgroup', 'g2/.zattrs', 'g2/arr/.zarray', 'g2/arr/.zattrs' ]: del store[key] # open consolidated z2 = open_consolidated(store) assert ['g1', 'g2'] == list(z2) assert 'world' == z2.g2.attrs['hello'] assert 1 == z2.g2.arr.attrs['data'] assert (z2.g2.arr[:] == 1.0).all() assert 16 == z2.g2.arr.nchunks assert 16 == z2.g2.arr.nchunks_initialized # tests del/write on the store cmd = ConsolidatedMetadataStore(store) with pytest.raises(PermissionError): del cmd['.zgroup'] with pytest.raises(PermissionError): cmd['.zgroup'] = None # test getsize on the store assert isinstance(getsize(cmd), Integral) # test new metadata are not writeable with pytest.raises(PermissionError): z2.create_group('g3') with pytest.raises(PermissionError): z2.create_dataset('spam', shape=42, chunks=7, dtype='i4') with pytest.raises(PermissionError): del z2['g2'] # test consolidated metadata are not writeable with pytest.raises(PermissionError): z2.g2.attrs['hello'] = 'universe' with pytest.raises(PermissionError): z2.g2.arr.attrs['foo'] = 'bar' # test the data are writeable z2.g2.arr[:] = 2 assert (z2.g2.arr[:] == 2).all() # test invalid modes with pytest.raises(ValueError): open_consolidated(store, mode='a') with pytest.raises(ValueError): open_consolidated(store, mode='w') # make sure keyword arguments are passed through without error open_consolidated(store, cache_attrs=True, synchronizer=None)
def test_consolidate_metadata(with_chunk_store, zarr_version, listable, monkeypatch, stores_from_path): # setup initial data if stores_from_path: store = tempfile.mkdtemp() atexit.register(atexit_rmtree, store) if with_chunk_store: chunk_store = tempfile.mkdtemp() atexit.register(atexit_rmtree, chunk_store) else: chunk_store = None version_kwarg = {'zarr_version': zarr_version} else: if zarr_version == 2: store = MemoryStore() chunk_store = MemoryStore() if with_chunk_store else None elif zarr_version == 3: store = MemoryStoreV3() chunk_store = MemoryStoreV3() if with_chunk_store else None version_kwarg = {} path = 'dataset' if zarr_version == 3 else None z = group(store, chunk_store=chunk_store, path=path, **version_kwarg) # Reload the actual store implementation in case str store_to_copy = z.store z.create_group('g1') g2 = z.create_group('g2') g2.attrs['hello'] = 'world' arr = g2.create_dataset('arr', shape=(20, 20), chunks=(5, 5), dtype='f8') assert 16 == arr.nchunks assert 0 == arr.nchunks_initialized arr.attrs['data'] = 1 arr[:] = 1.0 assert 16 == arr.nchunks_initialized if stores_from_path: # get the actual store class for use with consolidate_metadata store_class = z._store else: store_class = store if zarr_version == 3: # error on v3 if path not provided with pytest.raises(ValueError): consolidate_metadata(store_class, path=None) with pytest.raises(ValueError): consolidate_metadata(store_class, path='') # perform consolidation out = consolidate_metadata(store_class, path=path) assert isinstance(out, Group) assert ['g1', 'g2'] == list(out) if not stores_from_path: if zarr_version == 2: assert isinstance(out._store, ConsolidatedMetadataStore) assert '.zmetadata' in store meta_keys = ['.zgroup', 'g1/.zgroup', 'g2/.zgroup', 'g2/.zattrs', 'g2/arr/.zarray', 'g2/arr/.zattrs'] else: assert isinstance(out._store, ConsolidatedMetadataStoreV3) assert 'meta/root/consolidated/.zmetadata' in store meta_keys = ['zarr.json', meta_root + 'dataset.group.json', meta_root + 'dataset/g1.group.json', meta_root + 'dataset/g2.group.json', meta_root + 'dataset/g2/arr.array.json', 'meta/root/consolidated.group.json'] for key in meta_keys: del store[key] # https://github.com/zarr-developers/zarr-python/issues/993 # Make sure we can still open consolidated on an unlistable store: if not listable: fs_memory = pytest.importorskip("fsspec.implementations.memory") monkeypatch.setattr(fs_memory.MemoryFileSystem, "isdir", lambda x, y: False) monkeypatch.delattr(fs_memory.MemoryFileSystem, "ls") fs = fs_memory.MemoryFileSystem() if zarr_version == 2: store_to_open = FSStore("", fs=fs) else: store_to_open = FSStoreV3("", fs=fs) # copy original store to new unlistable store store_to_open.update(store_to_copy) else: store_to_open = store # open consolidated z2 = open_consolidated(store_to_open, chunk_store=chunk_store, path=path, **version_kwarg) assert ['g1', 'g2'] == list(z2) assert 'world' == z2.g2.attrs['hello'] assert 1 == z2.g2.arr.attrs['data'] assert (z2.g2.arr[:] == 1.0).all() assert 16 == z2.g2.arr.nchunks if listable: assert 16 == z2.g2.arr.nchunks_initialized else: with pytest.raises(NotImplementedError): _ = z2.g2.arr.nchunks_initialized if stores_from_path: # path string is note a BaseStore subclass so cannot be used to # initialize a ConsolidatedMetadataStore. if zarr_version == 2: with pytest.raises(ValueError): cmd = ConsolidatedMetadataStore(store) elif zarr_version == 3: with pytest.raises(ValueError): cmd = ConsolidatedMetadataStoreV3(store) else: # tests del/write on the store if zarr_version == 2: cmd = ConsolidatedMetadataStore(store) with pytest.raises(PermissionError): del cmd['.zgroup'] with pytest.raises(PermissionError): cmd['.zgroup'] = None else: cmd = ConsolidatedMetadataStoreV3(store) with pytest.raises(PermissionError): del cmd[meta_root + 'dataset.group.json'] with pytest.raises(PermissionError): cmd[meta_root + 'dataset.group.json'] = None # test getsize on the store assert isinstance(getsize(cmd), Integral) # test new metadata are not writeable with pytest.raises(PermissionError): z2.create_group('g3') with pytest.raises(PermissionError): z2.create_dataset('spam', shape=42, chunks=7, dtype='i4') with pytest.raises(PermissionError): del z2['g2'] # test consolidated metadata are not writeable with pytest.raises(PermissionError): z2.g2.attrs['hello'] = 'universe' with pytest.raises(PermissionError): z2.g2.arr.attrs['foo'] = 'bar' # test the data are writeable z2.g2.arr[:] = 2 assert (z2.g2.arr[:] == 2).all() # test invalid modes with pytest.raises(ValueError): open_consolidated(store, chunk_store=chunk_store, mode='a', path=path) with pytest.raises(ValueError): open_consolidated(store, chunk_store=chunk_store, mode='w', path=path) with pytest.raises(ValueError): open_consolidated(store, chunk_store=chunk_store, mode='w-', path=path) # make sure keyword arguments are passed through without error open_consolidated( store, chunk_store=chunk_store, path=path, cache_attrs=True, synchronizer=None, **version_kwarg, )