def load_zarr_archive(zarr_path, result_shape, result_chunks, overwrite=False): if overwrite == True: if zarr_path.exists(): shutil.rmtree(zarr_path) print("zarr archive removed") z = zarr.create( result_shape, chunks=result_chunks, dtype=np.float64, fill_value=-np.inf, # -np.inf indicates incomplete computation store=str(zarr_path) ) print("zarr_archive created") else: if zarr_path.exists(): z = zarr.open(str(zarr_path)) print("zarr archive loaded") else: z = zarr.create( result_shape, chunks=result_chunks, dtype=np.float64, fill_value=-np.inf, # -np.inf indicates incomplete computation store=str(zarr_path) ) print("zarr_archive created") return z
def get_auto_chunks(shape, dtype): # A hack to get chunks guessed by zarr if dtype == object: arr = zarr.create(shape, dtype=dtype, object_codec=zarr.codecs.Pickle()) else: arr = zarr.create(shape, dtype=dtype) return arr.chunks
def get_temp_filepath(self): if self.backend == 'POSIX': self.temp_dir = tempfile.mkdtemp() self.dir_store = os.path.join(self.temp_dir, 'temp-%s%s' % (next(_counter), self.suffix)) # Saving dask objects as Zarr requires more than just a filehandle if not self.dask: self.storage_obj = self.dir_store else: self.storage_obj = zarr.create(shape=self.shape, chunks=self.chunksize, store=self.dir_store, dtype=self.dtype, overwrite=True) elif self.backend == 'GCS': if not self.gcs_zarr: raise NotImplementedError("Missing config for GCP test") # HACK in order to give worker pods read/write to storage fs = gcsfs.GCSFileSystem(project=self.gcp_project_name, token='cache') token = fs.session.credentials self.gcp_project = gcsfs.GCSFileSystem(project=self.gcp_project_name, token=token) self.gcsfsmap = gcsfs.mapping.GCSMap(self.gcs_zarr, gcs=self.gcp_project, check=True, create=False) if not self.dask: gsutil_arg = "gs://%s" % self.gcs_zarr call(["gsutil", "-q", "-m", "rm", "-r", gsutil_arg]) self.storage_obj = self.gcsfsmap else: self.storage_obj = zarr.create(shape=self.shape, chunks=self.chunksize, store=self.gcsfsmap, dtype=self.dtype, overwrite=True) elif self.backend == 'FUSE': if not self.gcs_zarr_fuse: raise NotImplementedError("Missing config for FUSE test") self.temp_dir = tempfile.mkdtemp() self.dir_store = self.temp_dir + self.gcs_zarr_fuse call([GCSFUSE, self.gcs_bucket, self.temp_dir]) # Remove previous test runs if os.path.exists(self.dir_store): shutil.rmtree(self.dir_store) os.makedirs(self.dir_store) # Return the path if this isn't Dask # TODO: This should be a function if not self.dask: self.storage_obj = self.dir_store else: self.storage_obj = zarr.create(shape=self.shape, chunks=self.chunksize, store=self.dir_store, dtype=self.dtype, overwrite=True) else: raise NotImplementedError("Storage backend not implemented.")
def test_set_orthogonal_selection_1d(): # setup v = np.arange(1050, dtype=int) a = np.empty(v.shape, dtype=int) z = zarr.create(shape=a.shape, chunks=100, dtype=a.dtype) # test with different degrees of sparseness np.random.seed(42) for p in 0.5, 0.1, 0.01: # boolean arrays ix = np.random.binomial(1, p, size=a.shape[0]).astype(bool) _test_set_orthogonal_selection(v, a, z, ix) # integer arrays ix = np.random.choice(a.shape[0], size=int(a.shape[0] * p), replace=True) _test_set_orthogonal_selection(v, a, z, ix) ix.sort() _test_set_orthogonal_selection(v, a, z, ix) ix = ix[::-1] _test_set_orthogonal_selection(v, a, z, ix) # basic selections for selection in basic_selections_1d: _test_set_orthogonal_selection(v, a, z, selection)
def test_set_orthogonal_selection_3d(): # setup v = np.arange(100000, dtype=int).reshape(200, 50, 10) a = np.empty_like(v) z = zarr.create(shape=a.shape, chunks=(60, 20, 3), dtype=a.dtype) np.random.seed(42) # test with different degrees of sparseness for p in 0.5, 0.1, 0.01: # boolean arrays ix0 = np.random.binomial(1, p, size=a.shape[0]).astype(bool) ix1 = np.random.binomial(1, .5, size=a.shape[1]).astype(bool) ix2 = np.random.binomial(1, .5, size=a.shape[2]).astype(bool) _test_set_orthogonal_selection_3d(v, a, z, ix0, ix1, ix2) # integer arrays ix0 = np.random.choice(a.shape[0], size=int(a.shape[0] * p), replace=True) ix1 = np.random.choice(a.shape[1], size=int(a.shape[1] * .5), replace=True) ix2 = np.random.choice(a.shape[2], size=int(a.shape[2] * .5), replace=True) _test_set_orthogonal_selection_3d(v, a, z, ix0, ix1, ix2) # sorted increasing ix0.sort() ix1.sort() ix2.sort() _test_set_orthogonal_selection_3d(v, a, z, ix0, ix1, ix2) # sorted decreasing ix0 = ix0[::-1] ix1 = ix1[::-1] ix2 = ix2[::-1] _test_set_orthogonal_selection_3d(v, a, z, ix0, ix1, ix2)
def test_set_coordinate_selection_2d(): # setup v = np.arange(10000, dtype=int).reshape(1000, 10) a = np.empty_like(v) z = zarr.create(shape=a.shape, chunks=(300, 3), dtype=a.dtype) np.random.seed(42) # test with different degrees of sparseness for p in 2, 0.5, 0.1, 0.01: n = int(a.size * p) ix0 = np.random.choice(a.shape[0], size=n, replace=True) ix1 = np.random.choice(a.shape[1], size=n, replace=True) selections = ( (42, 4), (-1, -1), # index both axes with array (ix0, ix1), # mixed indexing with array / int (ix0, 4), (42, ix1), ) for selection in selections: _test_set_coordinate_selection(v, a, z, selection) # multi-dimensional selection ix0 = np.array([[1, 2, 3], [4, 5, 6]]) ix1 = np.array([[1, 3, 2], [2, 0, 5]]) _test_set_coordinate_selection(v, a, z, (ix0, ix1))
def create(cls, path: Path, array_info: ArrayInfo) -> "ZarrArray": assert array_info.data_format == cls.data_format assert array_info.chunks_per_shard == Vec3Int.full( 1), "Zarr storage doesn't support sharding yet" zarr.create( shape=(array_info.num_channels, 1, 1, 1), chunks=(array_info.num_channels, ) + array_info.chunk_size.to_tuple(), dtype=array_info.voxel_type, compressor=(numcodecs.Blosc( cname="zstd", clevel=3, shuffle=numcodecs.Blosc.SHUFFLE) if array_info.compression_mode else None), store=_fsstore_from_path(path), order="F", ) return ZarrArray(path)
def prepare_zarr_storage(variations, out_path): store = zarr.DirectoryStore(str(out_path)) root = zarr.group(store=store, overwrite=True) metadata = variations.metadata sources = [] targets = [] samples_array = variations.samples #samples_array.compute_chunk_sizes() sources.append(samples_array) object_codec = None if samples_array.dtype == object: object_codec = numcodecs.VLenUTF8() dataset = zarr.create(shape=samples_array.shape, path='samples', store=store, dtype=samples_array.dtype, object_codec=object_codec) targets.append(dataset) variants = root.create_group(ZARR_VARIANTS_GROUP_NAME, overwrite=True) calls = root.create_group(ZARR_CALL_GROUP_NAME, overwrite=True) for field, array in variations.items(): definition = ALLELE_ZARR_DEFINITION_MAPPINGS[field] field_metadata = metadata.get(field, None) array = variations[field] if array is None: continue array.compute_chunk_sizes() sources.append(array) group_name = definition['group'] group = calls if group_name == ZARR_CALL_GROUP_NAME else variants path = os.path.sep + os.path.join(group.path, definition['field']) object_codec = None if array.dtype == object: object_codec = numcodecs.VLenUTF8() dataset = zarr.create(shape=array.shape, path=path, store=store, object_codec=object_codec, dtype=array.dtype) if field_metadata is not None: for key, value in field_metadata.items(): dataset.attrs[key] = value targets.append(dataset) lock = SerializableLock() return da.store(sources, targets, compute=False, lock=lock)
def zarr_array(args): storage = zarr.storage.NestedDirectoryStore(os.path.join( args.path, 'zarr')) arr = zarr.create(shape=(50000, 50000), chunks=(1000, 1000), dtype='int32', store=storage, overwrite=True) return arr
def test_get_coordinate_selection_2d(): # setup a = np.arange(10000, dtype=int).reshape(1000, 10) z = zarr.create(shape=a.shape, chunks=(300, 3), dtype=a.dtype) z[:] = a np.random.seed(42) # test with different degrees of sparseness for p in 2, 0.5, 0.1, 0.01: n = int(a.size * p) ix0 = np.random.choice(a.shape[0], size=n, replace=True) ix1 = np.random.choice(a.shape[1], size=n, replace=True) selections = [ # single value (42, 4), (-1, -1), # index both axes with array (ix0, ix1), # mixed indexing with array / int (ix0, 4), (42, ix1), (42, 4), ] for selection in selections: _test_get_coordinate_selection(a, z, selection) # not monotonically increasing (first dim) ix0 = [3, 3, 4, 2, 5] ix1 = [1, 3, 5, 7, 9] _test_get_coordinate_selection(a, z, (ix0, ix1)) # not monotonically increasing (second dim) ix0 = [1, 1, 2, 2, 5] ix1 = [1, 3, 2, 1, 0] _test_get_coordinate_selection(a, z, (ix0, ix1)) # multi-dimensional selection ix0 = np.array([[1, 1, 2], [2, 2, 5]]) ix1 = np.array([[1, 3, 2], [1, 0, 0]]) _test_get_coordinate_selection(a, z, (ix0, ix1)) with pytest.raises(IndexError): selection = slice(5, 15), [1, 2, 3] z.get_coordinate_selection(selection) with pytest.raises(IndexError): selection = [1, 2, 3], slice(5, 15) z.get_coordinate_selection(selection) with pytest.raises(IndexError): selection = Ellipsis, [1, 2, 3] z.get_coordinate_selection(selection) with pytest.raises(IndexError): selection = Ellipsis z.get_coordinate_selection(selection)
def main(): numcodecs.register_codec(PngCodec, "png") with Timer("Compress"): arr = zarr.create( shape=(10, 10, 1920, 1080, 7), dtype="uint8", compressor=PngCodec(solo_channel=True), store=zarr.MemoryStore(), ) arr[:] = np.ones((10, 10, 1920, 1080, 7), dtype="uint8") print(arr[:].shape)
def test_set_mask_selection_2d(): # setup v = np.arange(10000, dtype=int).reshape(1000, 10) a = np.empty_like(v) z = zarr.create(shape=a.shape, chunks=(300, 3), dtype=a.dtype) np.random.seed(42) # test with different degrees of sparseness for p in 0.5, 0.1, 0.01: ix = np.random.binomial(1, p, size=a.size).astype(bool).reshape(a.shape) _test_set_mask_selection(v, a, z, ix)
def time_zarr(dataset, batch_size=1, num_batches=1): if os.path.exists(dataset + "_zarr"): ds_zarr = zarr.open(dataset + "_zarr") else: ds_zarr = zarr.create( shape=(batch_size * num_batches, 785), chunks=(batch_size, None), store=dataset + "_zarr", ) assert type(ds_zarr) == zarr.core.Array time_batches(ds_zarr, batch_size, num_batches)
def test_orthogonal_indexing_edge_cases(): a = np.arange(6).reshape(1, 2, 3) z = zarr.create(shape=a.shape, chunks=(1, 2, 3), dtype=a.dtype) z[:] = a expect = oindex(a, (0, slice(None), [0, 1, 2])) actual = z.oindex[0, :, [0, 1, 2]] assert_array_equal(expect, actual) expect = oindex(a, (0, slice(None), [True, True, True])) actual = z.oindex[0, :, [True, True, True]] assert_array_equal(expect, actual)
def test_get_basic_selection_0d(): # setup a = np.array(42) z = zarr.create(shape=a.shape, dtype=a.dtype, fill_value=None) z[...] = a assert_array_equal(a, z.get_basic_selection(Ellipsis)) assert_array_equal(a, z[...]) assert 42 == z.get_basic_selection(()) assert 42 == z[()] # test out param b = np.zeros_like(a) z.get_basic_selection(Ellipsis, out=b) assert_array_equal(a, b) # test structured array value = (b'aaa', 1, 4.2) a = np.array(value, dtype=[('foo', 'S3'), ('bar', 'i4'), ('baz', 'f8')]) z = zarr.create(shape=a.shape, dtype=a.dtype, fill_value=None) z[()] = value assert_array_equal(a, z.get_basic_selection(Ellipsis)) assert_array_equal(a, z[...]) assert a[()] == z.get_basic_selection(()) assert a[()] == z[()] assert b'aaa' == z.get_basic_selection((), fields='foo') assert b'aaa' == z['foo'] assert a[['foo', 'bar']] == z.get_basic_selection((), fields=['foo', 'bar']) assert a[['foo', 'bar']] == z['foo', 'bar'] # test out param b = np.zeros_like(a) z.get_basic_selection(Ellipsis, out=b) assert_array_equal(a, b) c = np.zeros_like(a[['foo', 'bar']]) z.get_basic_selection(Ellipsis, out=c, fields=['foo', 'bar']) assert_array_equal(a[['foo', 'bar']], c)
def _zarr_create_with_attrs( # type: ignore[no-untyped-def] shape, chunks, dtype, store, path, overwrite, attrs, **kwargs): # Create the zarr group and update its attributes within the same task (thread) arr = zarr.create( shape=shape, chunks=chunks, dtype=dtype, store=store, path=path, overwrite=overwrite, **kwargs, ) if attrs is not None: arr.attrs.update(attrs) return arr
def test_set_basic_selection_0d(): # setup v = np.array(42) a = np.zeros_like(v) z = zarr.zeros_like(v) assert_array_equal(a, z) # tests z.set_basic_selection(Ellipsis, v) assert_array_equal(v, z) z[...] = 0 assert_array_equal(a, z) z[...] = v assert_array_equal(v, z) # test structured array value = (b'aaa', 1, 4.2) v = np.array(value, dtype=[('foo', 'S3'), ('bar', 'i4'), ('baz', 'f8')]) a = np.zeros_like(v) z = zarr.create(shape=a.shape, dtype=a.dtype, fill_value=None) # tests z.set_basic_selection(Ellipsis, v) assert_array_equal(v, z) z.set_basic_selection(Ellipsis, a) assert_array_equal(a, z) z[...] = v assert_array_equal(v, z) z[...] = a assert_array_equal(a, z) # with fields z.set_basic_selection(Ellipsis, v['foo'], fields='foo') assert v['foo'] == z['foo'] assert a['bar'] == z['bar'] assert a['baz'] == z['baz'] z['bar'] = v['bar'] assert v['foo'] == z['foo'] assert v['bar'] == z['bar'] assert a['baz'] == z['baz'] # multiple field assignment not supported with pytest.raises(IndexError): z.set_basic_selection(Ellipsis, v[['foo', 'bar']], fields=['foo', 'bar']) with pytest.raises(IndexError): z[..., 'foo', 'bar'] = v[['foo', 'bar']]
def test_get_basic_selection_1d(): # setup a = np.arange(1050, dtype=int) z = zarr.create(shape=a.shape, chunks=100, dtype=a.dtype) z[:] = a for selection in basic_selections_1d: _test_get_basic_selection(a, z, selection) for selection in basic_selections_1d_bad: with pytest.raises(IndexError): z.get_basic_selection(selection) with pytest.raises(IndexError): z[selection] with pytest.raises(IndexError): z.get_basic_selection([1, 0])
def test_set_mask_selection_1d(): # setup v = np.arange(1050, dtype=int) a = np.empty_like(v) z = zarr.create(shape=a.shape, chunks=100, dtype=a.dtype) np.random.seed(42) # test with different degrees of sparseness for p in 0.5, 0.1, 0.01: ix = np.random.binomial(1, p, size=a.shape[0]).astype(bool) _test_set_mask_selection(v, a, z, ix) for selection in mask_selections_1d_bad: with pytest.raises(IndexError): z.set_mask_selection(selection, 42) with pytest.raises(IndexError): z.vindex[selection] = 42
def test_get_basic_selection_1d(): # setup a = np.arange(1050, dtype=int) z = zarr.create(shape=a.shape, chunks=100, dtype=a.dtype) z[:] = a for selection in basic_selections_1d: _test_get_basic_selection(a, z, selection) bad_selections = basic_selections_1d_bad + [ [0, 1], # fancy indexing ] for selection in bad_selections: with pytest.raises(IndexError): z.get_basic_selection(selection) with pytest.raises(IndexError): z[selection]
def test_get_orthogonal_selection_2d(): # setup a = np.arange(10000, dtype=int).reshape(1000, 10) z = zarr.create(shape=a.shape, chunks=(300, 3), dtype=a.dtype) z[:] = a np.random.seed(42) # test with different degrees of sparseness for p in 0.5, 0.1, 0.01: # boolean arrays ix0 = np.random.binomial(1, p, size=a.shape[0]).astype(bool) ix1 = np.random.binomial(1, 0.5, size=a.shape[1]).astype(bool) _test_get_orthogonal_selection_2d(a, z, ix0, ix1) # mixed int array / bool array selections = ( (ix0, np.nonzero(ix1)[0]), (np.nonzero(ix0)[0], ix1), ) for selection in selections: _test_get_orthogonal_selection(a, z, selection) # integer arrays ix0 = np.random.choice(a.shape[0], size=int(a.shape[0] * p), replace=True) ix1 = np.random.choice(a.shape[1], size=int(a.shape[1] * .5), replace=True) _test_get_orthogonal_selection_2d(a, z, ix0, ix1) ix0.sort() ix1.sort() _test_get_orthogonal_selection_2d(a, z, ix0, ix1) ix0 = ix0[::-1] ix1 = ix1[::-1] _test_get_orthogonal_selection_2d(a, z, ix0, ix1) for selection in basic_selections_2d: _test_get_orthogonal_selection(a, z, selection) for selection in basic_selections_2d_bad: with pytest.raises(IndexError): z.get_orthogonal_selection(selection) with pytest.raises(IndexError): z.oindex[selection]
def test_get_coordinate_selection_1d(): # setup a = np.arange(1050, dtype=int) z = zarr.create(shape=a.shape, chunks=100, dtype=a.dtype) z[:] = a np.random.seed(42) # test with different degrees of sparseness for p in 2, 0.5, 0.1, 0.01: n = int(a.size * p) ix = np.random.choice(a.shape[0], size=n, replace=True) _test_get_coordinate_selection(a, z, ix) ix.sort() _test_get_coordinate_selection(a, z, ix) ix = ix[::-1] _test_get_coordinate_selection(a, z, ix) selections = [ # test single item 42, -1, # test wraparound [0, 3, 10, -23, -12, -1], # test out of order [3, 105, 23, 127], # not monotonically increasing # test multi-dimensional selection np.array([[2, 4], [6, 8]]), ] for selection in selections: _test_get_coordinate_selection(a, z, selection) # test errors bad_selections = coordinate_selections_1d_bad + [ [a.shape[0] + 1], # out of bounds [-(a.shape[0] + 1)], # out of bounds ] for selection in bad_selections: with pytest.raises(IndexError): z.get_coordinate_selection(selection) with pytest.raises(IndexError): z.vindex[selection]
def test_get_mask_selection_2d(): # setup a = np.arange(10000, dtype=int).reshape(1000, 10) z = zarr.create(shape=a.shape, chunks=(300, 3), dtype=a.dtype) z[:] = a np.random.seed(42) # test with different degrees of sparseness for p in 0.5, 0.1, 0.01: ix = np.random.binomial(1, p, size=a.size).astype(bool).reshape(a.shape) _test_get_mask_selection(a, z, ix) # test errors with pytest.raises(IndexError): z.vindex[np.zeros((1000, 5), dtype=bool)] # too short with pytest.raises(IndexError): z.vindex[np.zeros((2000, 10), dtype=bool)] # too long with pytest.raises(IndexError): z.vindex[[True, False]] # wrong no. dimensions
def test_get_orthogonal_selection_1d_bool(): # setup a = np.arange(1050, dtype=int) z = zarr.create(shape=a.shape, chunks=100, dtype=a.dtype) z[:] = a np.random.seed(42) # test with different degrees of sparseness for p in 0.5, 0.1, 0.01: ix = np.random.binomial(1, p, size=a.shape[0]).astype(bool) _test_get_orthogonal_selection(a, z, ix) # test errors with pytest.raises(IndexError): z.oindex[np.zeros(50, dtype=bool)] # too short with pytest.raises(IndexError): z.oindex[np.zeros(2000, dtype=bool)] # too long with pytest.raises(IndexError): z.oindex[[[True, False], [False, True]]] # too many dimensions
def test_get_basic_selection_2d(): # setup a = np.arange(10000, dtype=int).reshape(1000, 10) z = zarr.create(shape=a.shape, chunks=(300, 3), dtype=a.dtype) z[:] = a for selection in basic_selections_2d: _test_get_basic_selection(a, z, selection) bad_selections = basic_selections_2d_bad + [ # integer arrays [0, 1], ([0, 1], [0, 1]), (slice(None), [0, 1]), ] for selection in bad_selections: with pytest.raises(IndexError): z.get_basic_selection(selection) with pytest.raises(IndexError): z[selection]
def time_zarr(dataset, batch_size=1): ds = hub.Dataset(dataset) if os.path.exists(dataset.split("/")[1] + "_zarr"): ds_zarr = zarr.open(dataset.split("/")[1] + "_zarr") else: store = zarr.DirectoryStore(dataset.split("/")[1] + "_zarr") shape = [ ds["image"].shape[0], ds["image"].shape[1] * ds["image"].shape[2] * ds["image"].shape[3] + 1, ] ds_zarr = zarr.create((shape[0], shape[1]), store=store, chunks=(batch_size, None)) for batch in range(ds.shape[0] // batch_size): ds_numpy = np.concatenate( ( ds["image", batch * batch_size:(batch + 1) * batch_size].compute().reshape(batch_size, -1), ds["label", batch * batch_size:(batch + 1) * batch_size].compute().reshape(batch_size, -1), ), axis=1, ) ds_zarr[batch * batch_size:(batch + 1) * batch_size] = ds_numpy assert type(ds_zarr) == zarr.core.Array with Timer("Time"): counter = 0 t0 = time() for batch in range(ds.shape[0] // batch_size): x, y = ( ds_zarr[batch * batch_size:(batch + 1) * batch_size, :-1], ds_zarr[batch * batch_size:(batch + 1) * batch_size, -1], ) counter += 1 t1 = time() print("Batch", counter, f"dt: {t1 - t0}") t0 = t1
def test_get_orthogonal_selection_1d_int(): # setup a = np.arange(1050, dtype=int) z = zarr.create(shape=a.shape, chunks=100, dtype=a.dtype) z[:] = a np.random.seed(42) # test with different degrees of sparseness for p in 2, 0.5, 0.1, 0.01: # unordered ix = np.random.choice(a.shape[0], size=int(a.shape[0] * p), replace=True) _test_get_orthogonal_selection(a, z, ix) # increasing ix.sort() _test_get_orthogonal_selection(a, z, ix) # decreasing ix = ix[::-1] _test_get_orthogonal_selection(a, z, ix) selections = basic_selections_1d + [ # test wraparound [0, 3, 10, -23, -12, -1], # explicit test not sorted [3, 105, 23, 127], ] for selection in selections: _test_get_orthogonal_selection(a, z, selection) bad_selections = basic_selections_1d_bad + [ [a.shape[0] + 1], # out of bounds [-(a.shape[0] + 1)], # out of bounds [[2, 4], [6, 8]], # too many dimensions ] for selection in bad_selections: with pytest.raises(IndexError): z.get_orthogonal_selection(selection) with pytest.raises(IndexError): z.oindex[selection]
def test_set_coordinate_selection_1d(): # setup v = np.arange(1050, dtype=int) a = np.empty(v.shape, dtype=v.dtype) z = zarr.create(shape=a.shape, chunks=100, dtype=a.dtype) np.random.seed(42) # test with different degrees of sparseness for p in 2, 0.5, 0.1, 0.01: n = int(a.size * p) ix = np.random.choice(a.shape[0], size=n, replace=True) _test_set_coordinate_selection(v, a, z, ix) # multi-dimensional selection ix = np.array([[2, 4], [6, 8]]) _test_set_coordinate_selection(v, a, z, ix) for selection in coordinate_selections_1d_bad: with pytest.raises(IndexError): z.set_coordinate_selection(selection, 42) with pytest.raises(IndexError): z.vindex[selection] = 42
def test_get_basic_selection_2d(): # setup a = np.arange(10000, dtype=int).reshape(1000, 10) z = zarr.create(shape=a.shape, chunks=(300, 3), dtype=a.dtype) z[:] = a for selection in basic_selections_2d: _test_get_basic_selection(a, z, selection) bad_selections = basic_selections_2d_bad + [ # integer arrays [0, 1], (slice(None), [0, 1]), ] for selection in bad_selections: with pytest.raises(IndexError): z.get_basic_selection(selection) with pytest.raises(IndexError): z[selection] # check fallback on fancy indexing fancy_selection = ([0, 1], [0, 1]) np.testing.assert_array_equal(z[fancy_selection], [0, 11])
def test_get_mask_selection_1d(): # setup a = np.arange(1050, dtype=int) z = zarr.create(shape=a.shape, chunks=100, dtype=a.dtype) z[:] = a np.random.seed(42) # test with different degrees of sparseness for p in 0.5, 0.1, 0.01: ix = np.random.binomial(1, p, size=a.shape[0]).astype(bool) _test_get_mask_selection(a, z, ix) # test errors bad_selections = mask_selections_1d_bad + [ np.zeros(50, dtype=bool), # too short np.zeros(2000, dtype=bool), # too long [[True, False], [False, True]], # too many dimensions ] for selection in bad_selections: with pytest.raises(IndexError): z.get_mask_selection(selection) with pytest.raises(IndexError): z.vindex[selection]
def test_get_selection_out(): # basic selections a = np.arange(1050) z = zarr.create(shape=1050, chunks=100, dtype=a.dtype) z[:] = a selections = [ slice(50, 150), slice(0, 1050), slice(1, 2), ] for selection in selections: expect = a[selection] out = zarr.create(shape=expect.shape, chunks=10, dtype=expect.dtype, fill_value=0) z.get_basic_selection(selection, out=out) assert_array_equal(expect, out[:]) with pytest.raises(TypeError): z.get_basic_selection(Ellipsis, out=[]) # orthogonal selections a = np.arange(10000, dtype=int).reshape(1000, 10) z = zarr.create(shape=a.shape, chunks=(300, 3), dtype=a.dtype) z[:] = a np.random.seed(42) # test with different degrees of sparseness for p in 0.5, 0.1, 0.01: ix0 = np.random.binomial(1, p, size=a.shape[0]).astype(bool) ix1 = np.random.binomial(1, .5, size=a.shape[1]).astype(bool) selections = [ # index both axes with array (ix0, ix1), # mixed indexing with array / slice (ix0, slice(1, 5)), (slice(250, 350), ix1), # mixed indexing with array / int (ix0, 4), (42, ix1), # mixed int array / bool array (ix0, np.nonzero(ix1)[0]), (np.nonzero(ix0)[0], ix1), ] for selection in selections: expect = oindex(a, selection) # out = zarr.create(shape=expect.shape, chunks=10, dtype=expect.dtype, # fill_value=0) out = np.zeros(expect.shape, dtype=expect.dtype) z.get_orthogonal_selection(selection, out=out) assert_array_equal(expect, out[:]) # coordinate selections a = np.arange(10000, dtype=int).reshape(1000, 10) z = zarr.create(shape=a.shape, chunks=(300, 3), dtype=a.dtype) z[:] = a np.random.seed(42) # test with different degrees of sparseness for p in 0.5, 0.1, 0.01: n = int(a.size * p) ix0 = np.random.choice(a.shape[0], size=n, replace=True) ix1 = np.random.choice(a.shape[1], size=n, replace=True) selections = [ # index both axes with array (ix0, ix1), # mixed indexing with array / int (ix0, 4), (42, ix1), ] for selection in selections: expect = a[selection] out = np.zeros(expect.shape, dtype=expect.dtype) z.get_coordinate_selection(selection, out=out) assert_array_equal(expect, out[:])
def test_get_selections_with_fields(): a = [('aaa', 1, 4.2), ('bbb', 2, 8.4), ('ccc', 3, 12.6)] a = np.array(a, dtype=[('foo', 'S3'), ('bar', 'i4'), ('baz', 'f8')]) z = zarr.create(shape=a.shape, chunks=2, dtype=a.dtype, fill_value=None) z[:] = a fields_fixture = [ 'foo', ['foo'], ['foo', 'bar'], ['foo', 'baz'], ['bar', 'baz'], ['foo', 'bar', 'baz'], ['bar', 'foo'], ['baz', 'bar', 'foo'], ] for fields in fields_fixture: # total selection expect = a[fields] actual = z.get_basic_selection(Ellipsis, fields=fields) assert_array_equal(expect, actual) # alternative API if isinstance(fields, str): actual = z[fields] assert_array_equal(expect, actual) elif len(fields) == 2: actual = z[fields[0], fields[1]] assert_array_equal(expect, actual) if isinstance(fields, str): actual = z[..., fields] assert_array_equal(expect, actual) elif len(fields) == 2: actual = z[..., fields[0], fields[1]] assert_array_equal(expect, actual) # basic selection with slice expect = a[fields][0:2] actual = z.get_basic_selection(slice(0, 2), fields=fields) assert_array_equal(expect, actual) # alternative API if isinstance(fields, str): actual = z[0:2, fields] assert_array_equal(expect, actual) elif len(fields) == 2: actual = z[0:2, fields[0], fields[1]] assert_array_equal(expect, actual) # basic selection with single item expect = a[fields][1] actual = z.get_basic_selection(1, fields=fields) assert_array_equal(expect, actual) # alternative API if isinstance(fields, str): actual = z[1, fields] assert_array_equal(expect, actual) elif len(fields) == 2: actual = z[1, fields[0], fields[1]] assert_array_equal(expect, actual) # orthogonal selection ix = [0, 2] expect = a[fields][ix] actual = z.get_orthogonal_selection(ix, fields=fields) assert_array_equal(expect, actual) # alternative API if isinstance(fields, str): actual = z.oindex[ix, fields] assert_array_equal(expect, actual) elif len(fields) == 2: actual = z.oindex[ix, fields[0], fields[1]] assert_array_equal(expect, actual) # coordinate selection ix = [0, 2] expect = a[fields][ix] actual = z.get_coordinate_selection(ix, fields=fields) assert_array_equal(expect, actual) # alternative API if isinstance(fields, str): actual = z.vindex[ix, fields] assert_array_equal(expect, actual) elif len(fields) == 2: actual = z.vindex[ix, fields[0], fields[1]] assert_array_equal(expect, actual) # mask selection ix = [True, False, True] expect = a[fields][ix] actual = z.get_mask_selection(ix, fields=fields) assert_array_equal(expect, actual) # alternative API if isinstance(fields, str): actual = z.vindex[ix, fields] assert_array_equal(expect, actual) elif len(fields) == 2: actual = z.vindex[ix, fields[0], fields[1]] assert_array_equal(expect, actual) # missing/bad fields with pytest.raises(IndexError): z.get_basic_selection(Ellipsis, fields=['notafield']) with pytest.raises(IndexError): z.get_basic_selection(Ellipsis, fields=slice(None))