def make_testdata_fillvalue(): zarr.open_array("array_fv.zr", mode='w', shape=[100, 100, 100], chunks=[10, 10, 10], dtype='f8', fill_value=42)
def load_arrays_noncoding_and_centromeres(local_path, _set, chrom, coding_reg_df, sitefilter='gamb_colu', filter_centro=True): """ This function reads and filters a genotyping array to the noncoding, noncentromeric regions, and applys a filter depending on whether the samples are arabiensis (arab) or gambiae/coluzzii (gamb_colu) """ Ag_array = zarr.open_array( f"{local_path}/snp_genotypes/all/{_set}/{chrom}/calldata/GT/", mode='r') filters = zarr.open( f"{local_path}/site_filters/dt_20200416/{sitefilter}/{chrom}/variants/filter_pass", mode="r") positions = zarr.open_array( f"{local_path}/snp_genotypes/all/sites/{chrom}/variants/POS/", mode='r') positions = positions[:][filters[:]] geno = allel.GenotypeDaskArray(Ag_array) geno = geno[filters[:]] if filter_centro is True: if chrom == '2L': centromere = (positions > 3000000) elif chrom == '2R': centromere = (positions < 57000000) elif chrom == '3L': centromere = (positions > 2000000) elif chrom == '3R': centromere = (positions < 50000000) elif chrom == 'X': centromere = (positions < 21000000) positions = allel.SortedIndex(positions[centromere]) else: positions = allel.SortedIndex(positions) #get boolean array for positions that are coding - allel.locate_ranges so fast! coding = positions.locate_ranges(coding_reg_df.start, coding_reg_df.end, strict=False) #compress to get noncoding SNPs and remove centromeric regions of low recombination #get non-centromeric regions. currently chosen by eye based on ag1000g phase1 paper fig1. if filter_centro is True: geno = geno.compress(centromere, axis=0) geno = geno.compress( ~coding, axis=0) #we want noncoding regions so '~' to get inverse of boolean positions = positions[~coding] return (geno, positions)
def make_testdata(): zz = zarr.open_array("array.zr", mode='w', shape=[100, 100, 100], chunks=[10, 10, 10], dtype='f8') zz[:] = 42 zz1 = zarr.open_array("array_raw.zr", mode='w', shape=[100, 100, 100], chunks=[10, 10, 10], dtype='f8', compressor=None) zz1[:] = 42
def test_get_data_multi_binary(self): with open(REQUEST_MULTI_JSON, 'r') as fp: request = json.load(fp) sentinel_hub = SentinelHub() # TODO (forman): discuss with Primoz how to effectively do multi-bands request t1 = time.perf_counter() response = sentinel_hub.get_data(request, mime_type='application/octet-stream') t2 = time.perf_counter() print(f"test_get_data_multi_binary: took {t2 - t1} secs") _write_zarr_array(self.RESPONSE_MULTI_ZARR, response.content, 0, (512, 512, 4), '<f4') sentinel_hub.close() zarr_array = zarr.open_array(self.RESPONSE_MULTI_ZARR) self.assertEqual((1, 512, 512, 4), zarr_array.shape) self.assertEqual((1, 512, 512, 4), zarr_array.chunks) np_array = np.array(zarr_array).astype(np.float32) self.assertEqual(np.float32, np_array.dtype) np.testing.assert_almost_equal( np.array([ 0.6425, 0.6676, 0.5922, 0.5822, 0.5735, 0.4921, 0.5902, 0.6518, 0.5825, 0.5321 ], dtype=np.float32), np_array[0, 0, 0:10, 0]) np.testing.assert_almost_equal( np.array([ 0.8605, 0.8528, 0.8495, 0.8378, 0.8143, 0.7959, 0.7816, 0.7407, 0.7182, 0.7326 ], dtype=np.float32), np_array[0, 511, -10:, 0])
def _save_labels(self, frames): """ Save the labels as a zarr file in the data directory """ # get file name and path name = Path(self.nd2_file).stem data_path = Path(self.nd2_file).parents[0] lab_path = os.path.join(data_path, name + '_labels.zarr') # get the shape of the first frame shape = self.labels[list(self.labels.keys())[0]].shape # get the the number of frames if isinstance(frames, range): # e.g., range(0, 193) --> 194 frames t = frames.stop + 1 - frames.start else: t = len(frames) # instantiate zarr array self.labels_volume = zarr.open_array(lab_path, mode='w', shape=(t, shape[0], shape[1], shape[2]), chunks=(1, shape[0], shape[1], shape[2]), dtype='i4', fill_value=0) # add frames to volume for frame in frames: self.labels_volume[frame, ...] = self.labels[frame]
def build_gs_async(): logging.info("\n\n\nCalling build_gs_async\n\n\n:") store = GCSMapperAio("gs://vcm-ml-data/tmp/test.zarr", cache_size=n) g = zarr.open_array(store, shape=(n,), chunks=(3,), mode="w") for i in range(n): g[i] = i store.flush()
def ensure_size(self, new_shape: Vec3IntLike, align_with_shards: bool = True, warn: bool = False) -> None: new_shape = Vec3Int(new_shape) zarray = self._zarray new_shape_tuple = ( zarray.shape[0], max(zarray.shape[1], new_shape.x), max(zarray.shape[2], new_shape.y), max(zarray.shape[3], new_shape.z), ) if new_shape_tuple != zarray.shape: if align_with_shards: shard_size = self.info.shard_size new_shape = new_shape.ceildiv(shard_size) * shard_size new_shape_tuple = (zarray.shape[0], ) + new_shape.to_tuple() # Check on-disk for changes to shape current_zarray = zarr.open_array(store=_fsstore_from_path( self._path), mode="r") if zarray.shape != current_zarray.shape: warnings.warn( f"[WARNING] While resizing the Zarr array at {self._path}, a differing shape ({zarray.shape} != {current_zarray.shape}) was found in the currently persisted metadata." + "This is likely happening because multiple processes changed the metadata of this array." ) if warn: warnings.warn( f"[WARNING] Resizing zarr array from `{zarray.shape}` to `{new_shape_tuple}`." ) zarray.resize(new_shape_tuple)
def _open(): z = zarr.open_array(self.path, mode=mode, shape=self.shape, chunks=self.chunk_shape, dtype=self.dtype, fill_value=0) return z
def test_reader_return_callable(tmp_path): """ Test the the reader returns a valid funciton when opening a file """ example_zarr_folder = tmp_path / 'example.zarr' z1 = zarr.open_array(example_zarr_folder, mode='w', shape=(10000, 10000), chunks=(1000, 1000), dtype='i4', fill_value=0) res = zarr_tensorstore(example_zarr_folder) assert callable(res)
def write_hap_array(pop, chrom, p1, p2, name, samples, inaccessible=False): """ Function to write a haplotype array for a specific region and population. currently using for iSAFE """ if inaccessible is False: ############ Read zarrs ############# Ag_store = zarr.open_array( f"/home/sanj/ag1000g/data/ag1000g.phase2.ar1.pass/{chrom}/calldata/GT/", mode='r') positions = zarr.open_array( f"/home/sanj/ag1000g/data/ag1000g.phase2.ar1.pass/{chrom}/variants/POS", mode='r')[:] else: Ag_store = zarr.open_array( f"/media/sanj/Sanj_HDD/Ag1000g/ag1000g.phase2.ar1/{chrom}/calldata/GT/", mode='r') positions = zarr.open_array( f"/media/sanj/Sanj_HDD/Ag1000g/ag1000g.phase2.ar1/{chrom}/variants/POS", mode='r')[:] print("--------------------------------------------------") print(f"Zarrs loaded: {pop}, Chromosome {chrom}") ############ Load intro gen.array and compute statistics ########### ag_geno = allel.GenotypeChunkedArray(Ag_store) pop_bool = samples.population == pop print("Constructing HaplotypeArray") pop_geno = ag_geno.compress(pop_bool, axis=1) pop_haplo = pop_geno.to_haplotypes() flt_region = np.where((positions >= p1) & (positions <= p2))[0] #get chrom positions sweep = pop_haplo.take(flt_region, axis=0) ac = sweep.count_alleles() flt_ac = ac.is_segregating() sweep = sweep.compress(flt_ac, axis=0) #eep only segregating flt_seg = positions.take( flt_region[flt_ac]) #repeat filtering on positions dt = pd.DataFrame(data=sweep) dt.index = flt_seg dt.to_csv(f'../data/{pop}/{chrom}/sweep_hapl_{name}', index=True, sep="\t") print(f"Writing Haplotype array for {name} region for iSAFE algorithm")
def _zarray(self) -> zarr.Array: if self._cached_zarray is None: try: self._cached_zarray = zarr.open_array(store=_fsstore_from_path( self._path), mode="a") except Exception as e: raise ArrayException( f"Exception while opening Zarr array for {self._path}" ) from e return self._cached_zarray
def __append_var(ds, store, name, dim, syncro=None): print("Appending " + name + " from " + ds) dataset = __nc_open(ds) var = dataset.variables[name] if dim in var.dimensions: axis = store[name].attrs['_ARRAY_DIMENSIONS'].index(dim) array = zarr.open_array(store=store[name], mode='r+', synchronizer=syncro) array.append(var, axis)
def whatsnpisit(locs, chrom, inaccessible=False, missense=True, provide_region=False): """ Given a list of locations+chrom, returns a table of those snps with their aa change if a missense variant. Useful for RNA_seq variant calling pipeline""" if inaccessible is False: ############ Read zarrs ############# Ag_store = zarr.open_array( f"/home/sanj/ag1000g/data/ag1000g.phase2.ar1.pass/{chrom}/calldata/GT/", mode='r') positions = allel.SortedIndex( zarr.open_array( f"/home/sanj/ag1000g/data/ag1000g.phase2.ar1.pass/{chrom}/variants/POS", mode='r')[:]) callset_fn = '/home/sanj/ag1000g/data/snp_eff/ag1000g.phase2.ar1.snpeff.AgamP4.2.pass.h5' callset = h5py.File(callset_fn, mode='r') snp_eff = callset[chrom]['variants']['ANN'][:] else: Ag_store = zarr.open_array( f"/media/sanj/Sanj_HDD/Ag1000g/ag1000g.phase2.ar1/{chrom}/calldata/GT/", mode='r') positions = allel.SortedIndex( zarr.open_array( f"/media/sanj/Sanj_HDD/Ag1000g/ag1000g.phase2.ar1/{chrom}/variants/POS", mode='r')[:]) callset_fn = '/home/sanj/ag1000g/data/all_snp_eff/ag1000g.phase2.ar1.snpeff.AgamP4.2.h5' callset = h5py.File(callset_fn, mode='r') snp_eff = callset[chrom]['variants']['ANN'][:] positions_bool, pos_bool = positions.locate_intersection(locs) snp_eff = snp_eff[positions_bool] return (snp_eff)
def test_nested_array(): # Create zarr hierarchy original = np.random.rand(1024, 1024) grp = zarr.open() grp.create_dataset("nested", data=original) # Intitilize app with nested nested array route = create_zarr_route(grp.get("nested")) app = Starlette(routes=[route]) # Ensure indexing works remote_store = HTTPStore(TestClient(app)) arr = zarr.open_array(remote_store) np.testing.assert_allclose(arr[:], original)
def make_coord(fss, z, accum_dim): # a) accum = [] logger.debug("accumulate coords array %s", accum_dim) times = False for fs in fss: zz = zarr.open_array(fs.get_mapper(accum_dim)) try: import cftime if not isinstance(zz, cftime.real_datetime): # Try and get the calendar attribute from "calendar" attribute # If it doesn't exist, assume a standard calendar if zz.attrs.get("calendar") is not None: calendar = zz.attrs.get("calendar", "standard") else: calendar = 'standard' # Update attrs in z[accum_dim] zattr = dict(z[accum_dim].attrs) zattr['calendar'] = 'standard' z[accum_dim].attrs.put(zattr) zz = cftime.num2pydate(zz[...], units=zz.attrs["units"], calendar=calendar) times = True logger.debug("converted times") accum.append(zz) else: accum.append(zz) except Exception as e: ex = e accum.append(zz[...].copy()) attr = dict(z[accum_dim].attrs) if times: accum = [np.array(a, dtype="M8") for a in accum] attr.pop('units', None) attr.pop('calendar', None) acc = np.concatenate([np.atleast_1d(a) for a in accum]).squeeze() logger.debug("write coords array") arr = z.create_dataset(name=accum_dim, data=acc, overwrite=True) arr.attrs.update(attr) return len(acc)
def test_numpy_writeable(): # Create data original = np.random.rand(1024, 1024) mutable = zarr.array(original) # Initialize app route = create_zarr_route(mutable) app = Starlette(routes=[route]) # Open remote array and compare remote_store = HTTPStore(TestClient(app)) arr = zarr.open_array(remote_store) arr[:50, :50] = 2 np.testing.assert_allclose(arr[:], mutable[:])
def transform_weather(): if os.path.exists('sst.day.mean.v2.zarr'): return datasets = [Dataset(path)['sst'] for path in sorted(glob('data/*.nc'))] n = sum(d.shape[0] for d in datasets) shape = (n, 720, 1440) chunks = (72, 360, 360) f = zarr.open_array('sst.day.mean.v2.zarr', shape=shape, chunks=chunks, dtype='f4') i = 0 for d in datasets: m = d.shape[0] f[i:i + m] = d[:].filled(np.nan) i += m
def test_save_ev(self): dir_name = saveData(self.ev, persist=False) main = os.path.join(dir_name, "main_data") meta = os.path.join(dir_name, "meta_data") errors = [] if (len([f for f in os.listdir(main) if not f.startswith(".")]) or len( [f for f in os.listdir(meta) if not f.startswith(".")])) == 0: errors.append("EventList is not saved or does not exist") else: times = zarr.open_array(store=main, mode="r", path="times")[...] energy = zarr.open_array(store=main, mode="r", path="energy")[...] pi_channel = zarr.open_array(store=main, mode="r", path="pi_channel")[...] gti = zarr.open_array(store=main, mode="r", path="gti")[...] gti = gti.reshape((gti.size // 2, 2)) dt = zarr.open_array(store=meta, mode="r", path="dt")[...] ncounts = zarr.open_array(store=meta, mode="r", path="ncounts")[...] mjdref = zarr.open_array(store=meta, mode="r", path="mjdref")[...] notes = zarr.open_array(store=meta, mode="r", path="notes")[...] if not np.array_equal(self.ev.time, times): errors.append("ev.time is not saved precisely") if not np.array_equal(self.ev.energy, energy): errors.append("ev.energy is not saved precisely") if not np.array_equal(self.ev.pi, pi_channel): errors.append("ev.pi is not saved precisely") if not np.array_equal(self.ev.gti, gti): errors.append("ev.gti is not saved precisely") if not np.isclose(self.ev.dt, dt): errors.append("ev.dt is not saved precisely") if not self.ev.ncounts == ncounts: errors.append("ev.ncounts is not saved precisely") if not np.isclose(self.ev.mjdref, mjdref): errors.append("ev.mjdref is not saved precisely") if not self.ev.notes == notes: errors.append("ev.notes is not saved precisely") assert not errors, "Errors encountered:\n{}".format("\n".join(errors))
def test_numpy_read_only(): # Create data original = np.random.rand(1024, 1024) z = zarr.array(original, read_only=True) # Initialize app route = create_zarr_route(z) app = Starlette(routes=[route]) # Open remote array and compare remote_store = HTTPStore(TestClient(app)) arr = zarr.open_array(remote_store) np.testing.assert_allclose(arr[:], original) # Make sure can't write with pytest.raises(ValueError): arr[:50, :50] = 10
def test_reader_can_read_and_write_to_file(tmp_path): """ Creates a zarr file, writes random data to it, then saves the file. Once saved, the file is then reopened and the data is compared. """ example_zarr_folder = tmp_path / 'example.zarr' z1 = zarr.open_array(example_zarr_folder, mode='w', shape=(100, 100), chunks=(100, 100)) z1[:] = np.random.rand(100, 100) reader_func = zarr_tensorstore(example_zarr_folder) layers = reader_func(example_zarr_folder) assert isinstance(layers, List) assert len(layers) == 1 layer_info = layers[0] assert isinstance(layer_info, Tuple) np.testing.assert_allclose(np.asarray(layer_info[0]), z1)
def test_save_fits_data(self): fname = os.path.join(datadir, "monol_testA.evt") dir_name = saveData(fname, persist=False) evtdata = load_events_and_gtis(fname, additional_columns=["PI"]) mjdref_def = ref_mjd(fname, hdu=1) time_def = evtdata.ev_list pi_channel_def = evtdata.additional_data["PI"] gti_def = evtdata.gti_list tstart_def = evtdata.t_start tstop_def = evtdata.t_stop main = os.path.join(dir_name, "main_data") meta = os.path.join(dir_name, "meta_data") errors = [] if (len([f for f in os.listdir(main) if not f.startswith(".")]) or len( [f for f in os.listdir(meta) if not f.startswith(".")])) == 0: errors.append("EventList is not saved or does not exist") else: times = zarr.open_array(store=main, mode="r", path="times")[...] pi_channel = zarr.open_array(store=main, mode="r", path="pi_channel")[...] gti = zarr.open_array(store=main, mode="r", path="gti")[...] gti = gti.reshape((gti.size // 2, 2)) tstart = zarr.open_array(store=meta, mode="r", path="tstart")[...] tstop = zarr.open_array(store=meta, mode="r", path="tstop")[...] mjdref = zarr.open_array(store=meta, mode="r", path="mjdref")[...] order = np.argsort(times) times = times[order] pi_channel = pi_channel[order] if not np.allclose(time_def, times): errors.append("fits.events.data.time is not saved precisely") if not np.array_equal(pi_channel_def, pi_channel): errors.append("fits.events.data.pi is not saved precisely") if not np.allclose(gti_def, gti): errors.append("fits.gti.data is not saved precisely") if not (tstart == tstart_def): errors.append( "fits.events.header.tstart is not saved precisely") if not (tstop == tstop_def): errors.append( "fits.events.header.tstop is not saved precisely") if not (mjdref == mjdref_def): errors.append( "fits.events.header.mjdref is not saved precisely") assert not errors, "Errors encountered:\n{}".format("\n".join(errors))
def scale(self, input_array: str, output_directory: str) -> None: """Perform downsampling to disk.""" func = getattr(self, self.method, None) if not func: raise Exception store = self.__check_store(output_directory) base = zarr.open_array(input_array) pyramid = func(base) if self.labeled: self.__assert_values(pyramid) grp = self.__create_group(store, base, pyramid) if self.copy_metadata: print(f"copying attribute keys: {list(base.attrs.keys())}") grp.attrs.update(base.attrs)
def test_read_zarr_execution(setup): session = setup test_array = np.random.RandomState(0).rand(20, 10) group_name = "test_group" dataset_name = "test_dataset" with pytest.raises(TypeError): fromzarr(object()) with tempfile.TemporaryDirectory() as d: path = os.path.join(d, f"test_read_{int(time.time())}.zarr") group = zarr.group(path) arr = group.array(group_name + "/" + dataset_name, test_array, chunks=(7, 4)) r = fromzarr(arr) result = r.execute().fetch() np.testing.assert_array_equal(result, test_array) assert len(session._session._tileable_to_fetch[r.data].chunks) > 1 arr = zarr.open_array(f"{path}/{group_name}/{dataset_name}") r = fromzarr(arr) result = r.execute().fetch() np.testing.assert_array_equal(result, test_array) assert len(session._session._tileable_to_fetch[r.data].chunks) > 1 r = fromzarr(path, group=group_name, dataset=dataset_name) result = r.execute().fetch() np.testing.assert_array_equal(result, test_array) assert len(session._session._tileable_to_fetch[r.data].chunks) > 1 r = fromzarr(path + "/" + group_name + "/" + dataset_name) result = r.execute().fetch() np.testing.assert_array_equal(result, test_array) assert len(session._session._tileable_to_fetch[r.data].chunks) > 1
def testReadZarrExecution(self): test_array = np.random.RandomState(0).rand(20, 10) group_name = 'test_group' dataset_name = 'test_dataset' with self.assertRaises(TypeError): fromzarr(object()) with tempfile.TemporaryDirectory() as d: path = os.path.join(d, 'test_read_{}.zarr'.format(int(time.time()))) group = zarr.group(path) arr = group.array(group_name + '/' + dataset_name, test_array, chunks=(7, 4)) r = fromzarr(arr) result = self.executor.execute_tensor(r, concat=True)[0] np.testing.assert_array_equal(result, test_array) self.assertGreater(len(get_tiled(r).chunks), 1) arr = zarr.open_array('{}/{}/{}'.format(path, group_name, dataset_name)) r = fromzarr(arr) result = self.executor.execute_tensor(r, concat=True)[0] np.testing.assert_array_equal(result, test_array) self.assertGreater(len(get_tiled(r).chunks), 1) r = fromzarr(path, group=group_name, dataset=dataset_name) result = self.executor.execute_tensor(r, concat=True)[0] np.testing.assert_array_equal(result, test_array) self.assertGreater(len(get_tiled(r).chunks), 1) r = fromzarr(path + '/' + group_name + '/' + dataset_name) result = self.executor.execute_tensor(r, concat=True)[0] np.testing.assert_array_equal(result, test_array) self.assertGreater(len(get_tiled(r).chunks), 1)
def test_save_lc(self): test_lc = copy.deepcopy(self.lc) # Make sure counts_err exists _ = test_lc.counts_err dir_name = saveData(test_lc, persist=False) main = os.path.join(dir_name, "main_data") meta = os.path.join(dir_name, "meta_data") errors = [] if (len([f for f in os.listdir(main) if not f.startswith(".")]) or len( [f for f in os.listdir(meta) if not f.startswith(".")])) == 0: errors.append("Lightcurve is not saved or does not exist") else: times = zarr.open_array(store=main, mode="r", path="times")[...] counts = zarr.open_array(store=main, mode="r", path="counts")[...] count_err = zarr.open_array(store=main, mode="r", path="count_err")[...] gti = zarr.open_array(store=main, mode="r", path="gti")[...] gti = gti.reshape((gti.size // 2, 2)) dt = zarr.open_array(store=meta, mode="r", path="dt")[...] mjdref = zarr.open_array(store=meta, mode="r", path="mjdref")[...] err_dist = zarr.open_array(store=meta, mode="r", path="err_dist")[...] if not np.array_equal(test_lc.time, times): errors.append("lc.time is not saved precisely") if not np.array_equal(test_lc.counts, counts): errors.append("lc.counts is not saved precisely") if not np.array_equal(test_lc.counts_err, count_err): errors.append("lc.counts_err is not saved precisely") if not np.array_equal(test_lc.gti, gti): errors.append("lc.gti is not saved precisely") if not (test_lc.dt == dt): errors.append("lc.dt is not saved precisely") if not (test_lc.mjdref == mjdref): errors.append("lc.mjdref is not saved precisely") if not (test_lc.err_dist == err_dist): errors.append("lc.err_dist is not saved precisely") assert not errors, "Errors encountered:\n{}".format("\n".join(errors))
def testStoreZarrExecution(self): raw = np.random.RandomState(0).rand(10, 20) group_name = 'test_group' dataset_name = 'test_dataset' t = tensor(raw, chunk_size=6) with self.assertRaises(TypeError): tozarr(object(), t) with tempfile.TemporaryDirectory() as d: filename = os.path.join( d, 'test_store_{}.zarr'.format(int(time.time()))) path = '{}/{}/{}'.format(filename, group_name, dataset_name) r = tozarr(filename, t, group=group_name, dataset=dataset_name, compressor=Zstd(level=3)) self.executor.execute_tensor(r) arr = zarr.open(path) np.testing.assert_array_equal(arr, raw) self.assertEqual(arr.compressor, Zstd(level=3)) r = tozarr(path, t + 2) self.executor.execute_tensor(r) arr = zarr.open(path) np.testing.assert_array_equal(arr, raw + 2) filters = [Delta(dtype='i4')] compressor = Blosc(cname='zstd', clevel=1, shuffle=Blosc.SHUFFLE) arr = zarr.open(path, compressor=compressor, filters=filters) r = tozarr(arr, t + 1) self.executor.execute_tensor(r) result = zarr.open_array(path) np.testing.assert_array_equal(result, raw + 1)
def initialize_group( group_path: Pathlike, arrays: Sequence[NDArray[Any]], array_paths: Sequence[str], chunks: Sequence[int], group_attrs: Dict[str, Any] = {}, compressor: Codec = numcodecs.GZip(-1), array_attrs: Optional[Sequence[Dict[str, Any]]] = None, modes: Tuple[AccessMode, AccessMode] = ("w", "w"), group_kwargs: Dict[str, Any] = {}, array_kwargs: Dict[str, Any] = {}, ) -> zarr.hierarchy.Group: group_access_mode, array_access_mode = modes group = access(group_path, mode=group_access_mode, attrs=group_attrs, **group_kwargs) if array_attrs is None: _array_attrs: Tuple[Dict[str, Any], ...] = ({},) * len(arrays) else: _array_attrs = array_attrs for name, arr, attrs, chnks in zip(array_paths, arrays, _array_attrs, chunks): path = os.path.join(group.path, name) z_arr = zarr.open_array( store=group.store, mode=array_access_mode, fill_value=0, path=path, shape=arr.shape, dtype=arr.dtype, chunks=chnks, compressor=compressor, **array_kwargs) z_arr.attrs.update(attrs) return group
def test_store_zarr_execution(setup): raw = np.random.RandomState(0).rand(10, 20) group_name = 'test_group' dataset_name = 'test_dataset' t = tensor(raw, chunk_size=6) with pytest.raises(TypeError): tozarr(object(), t) with tempfile.TemporaryDirectory() as d: filename = os.path.join(d, f'test_store_{int(time.time())}.zarr') path = f'{filename}/{group_name}/{dataset_name}' r = tozarr(filename, t, group=group_name, dataset=dataset_name, compressor=Zstd(level=3)) r.execute() arr = zarr.open(path) np.testing.assert_array_equal(arr, raw) assert arr.compressor == Zstd(level=3) r = tozarr(path, t + 2) r.execute() arr = zarr.open(path) np.testing.assert_array_equal(arr, raw + 2) filters = [Delta(dtype='i4')] compressor = Blosc(cname='zstd', clevel=1, shuffle=Blosc.SHUFFLE) arr = zarr.open(path, compressor=compressor, filters=filters) r = tozarr(arr, t + 1) r.execute() result = zarr.open_array(path) np.testing.assert_array_equal(result, raw + 1)
def __init__( self, url: str, shape: typing.Tuple[int, ...] = None, dtype="float32", creds=None, memcache: float = None, ): if shape is not None: self._zarr = zarr.zeros( shape, dtype=dtype, chunks=self._determine_chunksizes(shape, dtype), store=hub.areal.store.get_storage_map(url, creds, memcache), overwrite=True, ) else: self._zarr = zarr.open_array( hub.areal.store.get_storage_map(url, creds, memcache)) self._shape = self._zarr.shape self._chunks = self._zarr.chunks self._dtype = self._zarr.dtype self._memcache = memcache
def labels_to_zarr(labels, out_path): """ Persist a label array as a "sparse" zarr on disk. Parameters ---------- labels : np.ndarray Labelled images in a numpy array out_path : str or Path Destination on disk to save the persistent zarr array Returns ------- label_zarr: zarr.Array Persistent zarr array holding the same data as labels """ coords = labels.nonzero() label_vals = labels[coords] label_zarr = zarr.open_array( out_path, shape=labels.shape, dtype=labels.dtype, chunks=(1, 10, -1, -1) ) label_zarr.set_coordinate_selection(coords, label_vals) return label_zarr