Пример #1
0
def make_testdata_fillvalue():
    zarr.open_array("array_fv.zr",
                    mode='w',
                    shape=[100, 100, 100],
                    chunks=[10, 10, 10],
                    dtype='f8',
                    fill_value=42)
Пример #2
0
def load_arrays_noncoding_and_centromeres(local_path,
                                          _set,
                                          chrom,
                                          coding_reg_df,
                                          sitefilter='gamb_colu',
                                          filter_centro=True):
    """
    This function reads and filters a genotyping array to the noncoding, noncentromeric regions, and applys a filter depending on 
    whether the samples are arabiensis (arab) or gambiae/coluzzii (gamb_colu)
    """
    Ag_array = zarr.open_array(
        f"{local_path}/snp_genotypes/all/{_set}/{chrom}/calldata/GT/",
        mode='r')
    filters = zarr.open(
        f"{local_path}/site_filters/dt_20200416/{sitefilter}/{chrom}/variants/filter_pass",
        mode="r")
    positions = zarr.open_array(
        f"{local_path}/snp_genotypes/all/sites/{chrom}/variants/POS/",
        mode='r')
    positions = positions[:][filters[:]]
    geno = allel.GenotypeDaskArray(Ag_array)
    geno = geno[filters[:]]

    if filter_centro is True:
        if chrom == '2L':
            centromere = (positions > 3000000)
        elif chrom == '2R':
            centromere = (positions < 57000000)
        elif chrom == '3L':
            centromere = (positions > 2000000)
        elif chrom == '3R':
            centromere = (positions < 50000000)
        elif chrom == 'X':
            centromere = (positions < 21000000)

        positions = allel.SortedIndex(positions[centromere])
    else:
        positions = allel.SortedIndex(positions)

    #get boolean array for positions that are coding - allel.locate_ranges so fast!
    coding = positions.locate_ranges(coding_reg_df.start,
                                     coding_reg_df.end,
                                     strict=False)
    #compress to get noncoding SNPs and remove centromeric regions of low recombination
    #get non-centromeric regions. currently chosen by eye based on ag1000g phase1 paper fig1.

    if filter_centro is True: geno = geno.compress(centromere, axis=0)
    geno = geno.compress(
        ~coding,
        axis=0)  #we want noncoding regions so '~' to get inverse of boolean
    positions = positions[~coding]

    return (geno, positions)
Пример #3
0
def make_testdata():
    zz = zarr.open_array("array.zr",
                         mode='w',
                         shape=[100, 100, 100],
                         chunks=[10, 10, 10],
                         dtype='f8')
    zz[:] = 42

    zz1 = zarr.open_array("array_raw.zr",
                          mode='w',
                          shape=[100, 100, 100],
                          chunks=[10, 10, 10],
                          dtype='f8',
                          compressor=None)
    zz1[:] = 42
Пример #4
0
    def test_get_data_multi_binary(self):
        with open(REQUEST_MULTI_JSON, 'r') as fp:
            request = json.load(fp)

        sentinel_hub = SentinelHub()

        # TODO (forman): discuss with Primoz how to effectively do multi-bands request
        t1 = time.perf_counter()
        response = sentinel_hub.get_data(request,
                                         mime_type='application/octet-stream')
        t2 = time.perf_counter()
        print(f"test_get_data_multi_binary: took {t2 - t1} secs")

        _write_zarr_array(self.RESPONSE_MULTI_ZARR, response.content, 0,
                          (512, 512, 4), '<f4')

        sentinel_hub.close()

        zarr_array = zarr.open_array(self.RESPONSE_MULTI_ZARR)
        self.assertEqual((1, 512, 512, 4), zarr_array.shape)
        self.assertEqual((1, 512, 512, 4), zarr_array.chunks)
        np_array = np.array(zarr_array).astype(np.float32)
        self.assertEqual(np.float32, np_array.dtype)
        np.testing.assert_almost_equal(
            np.array([
                0.6425, 0.6676, 0.5922, 0.5822, 0.5735, 0.4921, 0.5902, 0.6518,
                0.5825, 0.5321
            ],
                     dtype=np.float32), np_array[0, 0, 0:10, 0])
        np.testing.assert_almost_equal(
            np.array([
                0.8605, 0.8528, 0.8495, 0.8378, 0.8143, 0.7959, 0.7816, 0.7407,
                0.7182, 0.7326
            ],
                     dtype=np.float32), np_array[0, 511, -10:, 0])
Пример #5
0
    def _save_labels(self, frames):
        """
        Save the labels as a zarr file in the data directory
        """
        # get file name and path
        name = Path(self.nd2_file).stem
        data_path = Path(self.nd2_file).parents[0]
        lab_path = os.path.join(data_path, name + '_labels.zarr')

        # get the shape of the first frame
        shape = self.labels[list(self.labels.keys())[0]].shape

        # get the the number of frames
        if isinstance(frames, range):
            # e.g., range(0, 193) --> 194 frames
            t = frames.stop + 1 - frames.start
        else:
            t = len(frames)

        # instantiate zarr array
        self.labels_volume = zarr.open_array(lab_path,
                                             mode='w',
                                             shape=(t, shape[0], shape[1],
                                                    shape[2]),
                                             chunks=(1, shape[0], shape[1],
                                                     shape[2]),
                                             dtype='i4',
                                             fill_value=0)

        # add frames to volume
        for frame in frames:
            self.labels_volume[frame, ...] = self.labels[frame]
Пример #6
0
def build_gs_async():
    logging.info("\n\n\nCalling build_gs_async\n\n\n:")
    store = GCSMapperAio("gs://vcm-ml-data/tmp/test.zarr", cache_size=n)
    g = zarr.open_array(store, shape=(n,), chunks=(3,), mode="w")
    for i in range(n):
        g[i] = i
    store.flush()
Пример #7
0
    def ensure_size(self,
                    new_shape: Vec3IntLike,
                    align_with_shards: bool = True,
                    warn: bool = False) -> None:
        new_shape = Vec3Int(new_shape)
        zarray = self._zarray

        new_shape_tuple = (
            zarray.shape[0],
            max(zarray.shape[1], new_shape.x),
            max(zarray.shape[2], new_shape.y),
            max(zarray.shape[3], new_shape.z),
        )
        if new_shape_tuple != zarray.shape:
            if align_with_shards:
                shard_size = self.info.shard_size
                new_shape = new_shape.ceildiv(shard_size) * shard_size
                new_shape_tuple = (zarray.shape[0], ) + new_shape.to_tuple()

            # Check on-disk for changes to shape
            current_zarray = zarr.open_array(store=_fsstore_from_path(
                self._path),
                                             mode="r")
            if zarray.shape != current_zarray.shape:
                warnings.warn(
                    f"[WARNING] While resizing the Zarr array at {self._path}, a differing shape ({zarray.shape} != {current_zarray.shape}) was found in the currently persisted metadata."
                    +
                    "This is likely happening because multiple processes changed the metadata of this array."
                )

            if warn:
                warnings.warn(
                    f"[WARNING] Resizing zarr array from `{zarray.shape}` to `{new_shape_tuple}`."
                )
            zarray.resize(new_shape_tuple)
Пример #8
0
 def _open():
     z = zarr.open_array(self.path,
                         mode=mode,
                         shape=self.shape,
                         chunks=self.chunk_shape,
                         dtype=self.dtype,
                         fill_value=0)
     return z
Пример #9
0
def test_reader_return_callable(tmp_path):
    """
    Test the the reader returns a valid funciton when opening a file
    """ 
    example_zarr_folder = tmp_path / 'example.zarr'
    z1 = zarr.open_array(example_zarr_folder, mode='w', shape=(10000, 10000), chunks=(1000, 1000), dtype='i4', fill_value=0)
    res = zarr_tensorstore(example_zarr_folder)
    assert callable(res)
Пример #10
0
def write_hap_array(pop, chrom, p1, p2, name, samples, inaccessible=False):
    """ Function to write a haplotype array for a specific region and population. currently using for iSAFE """

    if inaccessible is False:
        ############ Read zarrs #############
        Ag_store = zarr.open_array(
            f"/home/sanj/ag1000g/data/ag1000g.phase2.ar1.pass/{chrom}/calldata/GT/",
            mode='r')
        positions = zarr.open_array(
            f"/home/sanj/ag1000g/data/ag1000g.phase2.ar1.pass/{chrom}/variants/POS",
            mode='r')[:]
    else:
        Ag_store = zarr.open_array(
            f"/media/sanj/Sanj_HDD/Ag1000g/ag1000g.phase2.ar1/{chrom}/calldata/GT/",
            mode='r')
        positions = zarr.open_array(
            f"/media/sanj/Sanj_HDD/Ag1000g/ag1000g.phase2.ar1/{chrom}/variants/POS",
            mode='r')[:]

    print("--------------------------------------------------")
    print(f"Zarrs loaded: {pop}, Chromosome {chrom}")

    ############ Load intro gen.array and compute statistics ###########
    ag_geno = allel.GenotypeChunkedArray(Ag_store)
    pop_bool = samples.population == pop

    print("Constructing HaplotypeArray")
    pop_geno = ag_geno.compress(pop_bool, axis=1)
    pop_haplo = pop_geno.to_haplotypes()

    flt_region = np.where((positions >= p1)
                          & (positions <= p2))[0]  #get chrom positions
    sweep = pop_haplo.take(flt_region, axis=0)
    ac = sweep.count_alleles()
    flt_ac = ac.is_segregating()
    sweep = sweep.compress(flt_ac, axis=0)  #eep only segregating
    flt_seg = positions.take(
        flt_region[flt_ac])  #repeat filtering on positions
    dt = pd.DataFrame(data=sweep)
    dt.index = flt_seg
    dt.to_csv(f'../data/{pop}/{chrom}/sweep_hapl_{name}', index=True, sep="\t")
    print(f"Writing Haplotype array for {name} region for iSAFE algorithm")
Пример #11
0
 def _zarray(self) -> zarr.Array:
     if self._cached_zarray is None:
         try:
             self._cached_zarray = zarr.open_array(store=_fsstore_from_path(
                 self._path),
                                                   mode="a")
         except Exception as e:
             raise ArrayException(
                 f"Exception while opening Zarr array for {self._path}"
             ) from e
     return self._cached_zarray
Пример #12
0
def __append_var(ds, store, name, dim, syncro=None):

    print("Appending " + name + " from " + ds)

    dataset = __nc_open(ds)
    var = dataset.variables[name]

    if dim in var.dimensions:
        axis = store[name].attrs['_ARRAY_DIMENSIONS'].index(dim)
        array = zarr.open_array(store=store[name],
                                mode='r+',
                                synchronizer=syncro)
        array.append(var, axis)
Пример #13
0
def whatsnpisit(locs,
                chrom,
                inaccessible=False,
                missense=True,
                provide_region=False):
    """ Given a list of locations+chrom, returns a table of those snps with their aa change
    if a missense variant. Useful for RNA_seq variant calling pipeline"""

    if inaccessible is False:
        ############ Read zarrs #############
        Ag_store = zarr.open_array(
            f"/home/sanj/ag1000g/data/ag1000g.phase2.ar1.pass/{chrom}/calldata/GT/",
            mode='r')
        positions = allel.SortedIndex(
            zarr.open_array(
                f"/home/sanj/ag1000g/data/ag1000g.phase2.ar1.pass/{chrom}/variants/POS",
                mode='r')[:])

        callset_fn = '/home/sanj/ag1000g/data/snp_eff/ag1000g.phase2.ar1.snpeff.AgamP4.2.pass.h5'
        callset = h5py.File(callset_fn, mode='r')
        snp_eff = callset[chrom]['variants']['ANN'][:]
    else:
        Ag_store = zarr.open_array(
            f"/media/sanj/Sanj_HDD/Ag1000g/ag1000g.phase2.ar1/{chrom}/calldata/GT/",
            mode='r')
        positions = allel.SortedIndex(
            zarr.open_array(
                f"/media/sanj/Sanj_HDD/Ag1000g/ag1000g.phase2.ar1/{chrom}/variants/POS",
                mode='r')[:])

        callset_fn = '/home/sanj/ag1000g/data/all_snp_eff/ag1000g.phase2.ar1.snpeff.AgamP4.2.h5'
        callset = h5py.File(callset_fn, mode='r')
        snp_eff = callset[chrom]['variants']['ANN'][:]

    positions_bool, pos_bool = positions.locate_intersection(locs)
    snp_eff = snp_eff[positions_bool]

    return (snp_eff)
def test_nested_array():
    # Create zarr hierarchy
    original = np.random.rand(1024, 1024)
    grp = zarr.open()
    grp.create_dataset("nested", data=original)

    # Intitilize app with nested nested array
    route = create_zarr_route(grp.get("nested"))
    app = Starlette(routes=[route])

    # Ensure indexing works
    remote_store = HTTPStore(TestClient(app))
    arr = zarr.open_array(remote_store)
    np.testing.assert_allclose(arr[:], original)
Пример #15
0
def make_coord(fss, z, accum_dim):
    # a)
    accum = []
    logger.debug("accumulate coords array %s", accum_dim)
    times = False
    for fs in fss:
        zz = zarr.open_array(fs.get_mapper(accum_dim))

        try:
            import cftime
            if not isinstance(zz, cftime.real_datetime):

                # Try and get the calendar attribute from "calendar" attribute
                # If it doesn't exist, assume a standard calendar
                if zz.attrs.get("calendar") is not None:
                    calendar = zz.attrs.get("calendar", "standard")
                else:
                    calendar = 'standard'

                    # Update attrs in z[accum_dim]
                    zattr = dict(z[accum_dim].attrs)
                    zattr['calendar'] = 'standard'
                    z[accum_dim].attrs.put(zattr)            
                
                zz = cftime.num2pydate(zz[...], units=zz.attrs["units"],
                                       calendar=calendar)
                times = True
                logger.debug("converted times")
                accum.append(zz)
            else:
                accum.append(zz)
        except Exception as e:
            ex = e
            accum.append(zz[...].copy())
    attr = dict(z[accum_dim].attrs)
    if times:
        accum = [np.array(a, dtype="M8") for a in accum]
        attr.pop('units', None)
    
    attr.pop('calendar', None)

    acc = np.concatenate([np.atleast_1d(a) for a in accum]).squeeze()

    logger.debug("write coords array")
    arr = z.create_dataset(name=accum_dim,
                           data=acc,
                           overwrite=True)
    arr.attrs.update(attr)
    return len(acc)
def test_numpy_writeable():
    # Create data
    original = np.random.rand(1024, 1024)
    mutable = zarr.array(original)

    # Initialize app
    route = create_zarr_route(mutable)
    app = Starlette(routes=[route])

    # Open remote array and compare
    remote_store = HTTPStore(TestClient(app))
    arr = zarr.open_array(remote_store)
    arr[:50, :50] = 2

    np.testing.assert_allclose(arr[:], mutable[:])
Пример #17
0
def transform_weather():
    if os.path.exists('sst.day.mean.v2.zarr'):
        return
    datasets = [Dataset(path)['sst'] for path in sorted(glob('data/*.nc'))]
    n = sum(d.shape[0] for d in datasets)
    shape = (n, 720, 1440)
    chunks = (72, 360, 360)
    f = zarr.open_array('sst.day.mean.v2.zarr', shape=shape, chunks=chunks,
                        dtype='f4')

    i = 0
    for d in datasets:
        m = d.shape[0]
        f[i:i + m] = d[:].filled(np.nan)
        i += m
Пример #18
0
    def test_save_ev(self):
        dir_name = saveData(self.ev, persist=False)

        main = os.path.join(dir_name, "main_data")
        meta = os.path.join(dir_name, "meta_data")

        errors = []

        if (len([f for f in os.listdir(main) if not f.startswith(".")]) or len(
            [f for f in os.listdir(meta) if not f.startswith(".")])) == 0:
            errors.append("EventList is not saved or does not exist")

        else:
            times = zarr.open_array(store=main, mode="r", path="times")[...]
            energy = zarr.open_array(store=main, mode="r", path="energy")[...]
            pi_channel = zarr.open_array(store=main,
                                         mode="r",
                                         path="pi_channel")[...]
            gti = zarr.open_array(store=main, mode="r", path="gti")[...]
            gti = gti.reshape((gti.size // 2, 2))
            dt = zarr.open_array(store=meta, mode="r", path="dt")[...]
            ncounts = zarr.open_array(store=meta, mode="r",
                                      path="ncounts")[...]
            mjdref = zarr.open_array(store=meta, mode="r", path="mjdref")[...]
            notes = zarr.open_array(store=meta, mode="r", path="notes")[...]

            if not np.array_equal(self.ev.time, times):
                errors.append("ev.time is not saved precisely")
            if not np.array_equal(self.ev.energy, energy):
                errors.append("ev.energy is not saved precisely")
            if not np.array_equal(self.ev.pi, pi_channel):
                errors.append("ev.pi is not saved precisely")
            if not np.array_equal(self.ev.gti, gti):
                errors.append("ev.gti is not saved precisely")
            if not np.isclose(self.ev.dt, dt):
                errors.append("ev.dt is not saved precisely")
            if not self.ev.ncounts == ncounts:
                errors.append("ev.ncounts is not saved precisely")
            if not np.isclose(self.ev.mjdref, mjdref):
                errors.append("ev.mjdref is not saved precisely")
            if not self.ev.notes == notes:
                errors.append("ev.notes is not saved precisely")

        assert not errors, "Errors encountered:\n{}".format("\n".join(errors))
def test_numpy_read_only():
    # Create data
    original = np.random.rand(1024, 1024)
    z = zarr.array(original, read_only=True)

    # Initialize app
    route = create_zarr_route(z)
    app = Starlette(routes=[route])

    # Open remote array and compare
    remote_store = HTTPStore(TestClient(app))
    arr = zarr.open_array(remote_store)
    np.testing.assert_allclose(arr[:], original)

    # Make sure can't write
    with pytest.raises(ValueError):
        arr[:50, :50] = 10
Пример #20
0
def test_reader_can_read_and_write_to_file(tmp_path):
    """
    Creates a zarr file, writes random data to it, then saves the file. Once saved, the file is then 
    reopened and the data is compared.
    """
    example_zarr_folder = tmp_path / 'example.zarr'
    z1 = zarr.open_array(example_zarr_folder, mode='w', shape=(100, 100), chunks=(100, 100))
    z1[:] = np.random.rand(100, 100)

    reader_func = zarr_tensorstore(example_zarr_folder)

    layers = reader_func(example_zarr_folder)
    assert isinstance(layers, List)
    assert len(layers) == 1

    layer_info = layers[0]
    assert isinstance(layer_info, Tuple) 
    np.testing.assert_allclose(np.asarray(layer_info[0]), z1)
Пример #21
0
    def test_save_fits_data(self):
        fname = os.path.join(datadir, "monol_testA.evt")
        dir_name = saveData(fname, persist=False)

        evtdata = load_events_and_gtis(fname, additional_columns=["PI"])
        mjdref_def = ref_mjd(fname, hdu=1)
        time_def = evtdata.ev_list
        pi_channel_def = evtdata.additional_data["PI"]
        gti_def = evtdata.gti_list
        tstart_def = evtdata.t_start
        tstop_def = evtdata.t_stop

        main = os.path.join(dir_name, "main_data")
        meta = os.path.join(dir_name, "meta_data")

        errors = []

        if (len([f for f in os.listdir(main) if not f.startswith(".")]) or len(
            [f for f in os.listdir(meta) if not f.startswith(".")])) == 0:
            errors.append("EventList is not saved or does not exist")
        else:
            times = zarr.open_array(store=main, mode="r", path="times")[...]
            pi_channel = zarr.open_array(store=main,
                                         mode="r",
                                         path="pi_channel")[...]
            gti = zarr.open_array(store=main, mode="r", path="gti")[...]
            gti = gti.reshape((gti.size // 2, 2))
            tstart = zarr.open_array(store=meta, mode="r", path="tstart")[...]
            tstop = zarr.open_array(store=meta, mode="r", path="tstop")[...]
            mjdref = zarr.open_array(store=meta, mode="r", path="mjdref")[...]

            order = np.argsort(times)
            times = times[order]
            pi_channel = pi_channel[order]

            if not np.allclose(time_def, times):
                errors.append("fits.events.data.time is not saved precisely")
            if not np.array_equal(pi_channel_def, pi_channel):
                errors.append("fits.events.data.pi is not saved precisely")
            if not np.allclose(gti_def, gti):
                errors.append("fits.gti.data is not saved precisely")
            if not (tstart == tstart_def):
                errors.append(
                    "fits.events.header.tstart is not saved precisely")
            if not (tstop == tstop_def):
                errors.append(
                    "fits.events.header.tstop is not saved precisely")
            if not (mjdref == mjdref_def):
                errors.append(
                    "fits.events.header.mjdref is not saved precisely")

        assert not errors, "Errors encountered:\n{}".format("\n".join(errors))
Пример #22
0
    def scale(self, input_array: str, output_directory: str) -> None:
        """Perform downsampling to disk."""
        func = getattr(self, self.method, None)
        if not func:
            raise Exception

        store = self.__check_store(output_directory)
        base = zarr.open_array(input_array)
        pyramid = func(base)

        if self.labeled:
            self.__assert_values(pyramid)

        grp = self.__create_group(store, base, pyramid)

        if self.copy_metadata:
            print(f"copying attribute keys: {list(base.attrs.keys())}")
            grp.attrs.update(base.attrs)
Пример #23
0
def test_read_zarr_execution(setup):
    session = setup

    test_array = np.random.RandomState(0).rand(20, 10)
    group_name = "test_group"
    dataset_name = "test_dataset"

    with pytest.raises(TypeError):
        fromzarr(object())

    with tempfile.TemporaryDirectory() as d:
        path = os.path.join(d, f"test_read_{int(time.time())}.zarr")

        group = zarr.group(path)
        arr = group.array(group_name + "/" + dataset_name,
                          test_array,
                          chunks=(7, 4))

        r = fromzarr(arr)

        result = r.execute().fetch()
        np.testing.assert_array_equal(result, test_array)
        assert len(session._session._tileable_to_fetch[r.data].chunks) > 1

        arr = zarr.open_array(f"{path}/{group_name}/{dataset_name}")
        r = fromzarr(arr)

        result = r.execute().fetch()
        np.testing.assert_array_equal(result, test_array)
        assert len(session._session._tileable_to_fetch[r.data].chunks) > 1

        r = fromzarr(path, group=group_name, dataset=dataset_name)

        result = r.execute().fetch()
        np.testing.assert_array_equal(result, test_array)
        assert len(session._session._tileable_to_fetch[r.data].chunks) > 1

        r = fromzarr(path + "/" + group_name + "/" + dataset_name)

        result = r.execute().fetch()
        np.testing.assert_array_equal(result, test_array)
        assert len(session._session._tileable_to_fetch[r.data].chunks) > 1
Пример #24
0
    def testReadZarrExecution(self):
        test_array = np.random.RandomState(0).rand(20, 10)
        group_name = 'test_group'
        dataset_name = 'test_dataset'

        with self.assertRaises(TypeError):
            fromzarr(object())

        with tempfile.TemporaryDirectory() as d:
            path = os.path.join(d,
                                'test_read_{}.zarr'.format(int(time.time())))

            group = zarr.group(path)
            arr = group.array(group_name + '/' + dataset_name,
                              test_array,
                              chunks=(7, 4))

            r = fromzarr(arr)

            result = self.executor.execute_tensor(r, concat=True)[0]
            np.testing.assert_array_equal(result, test_array)
            self.assertGreater(len(get_tiled(r).chunks), 1)

            arr = zarr.open_array('{}/{}/{}'.format(path, group_name,
                                                    dataset_name))
            r = fromzarr(arr)

            result = self.executor.execute_tensor(r, concat=True)[0]
            np.testing.assert_array_equal(result, test_array)
            self.assertGreater(len(get_tiled(r).chunks), 1)

            r = fromzarr(path, group=group_name, dataset=dataset_name)

            result = self.executor.execute_tensor(r, concat=True)[0]
            np.testing.assert_array_equal(result, test_array)
            self.assertGreater(len(get_tiled(r).chunks), 1)

            r = fromzarr(path + '/' + group_name + '/' + dataset_name)

            result = self.executor.execute_tensor(r, concat=True)[0]
            np.testing.assert_array_equal(result, test_array)
            self.assertGreater(len(get_tiled(r).chunks), 1)
Пример #25
0
    def test_save_lc(self):
        test_lc = copy.deepcopy(self.lc)
        # Make sure counts_err exists
        _ = test_lc.counts_err

        dir_name = saveData(test_lc, persist=False)

        main = os.path.join(dir_name, "main_data")
        meta = os.path.join(dir_name, "meta_data")

        errors = []

        if (len([f for f in os.listdir(main) if not f.startswith(".")]) or len(
            [f for f in os.listdir(meta) if not f.startswith(".")])) == 0:
            errors.append("Lightcurve is not saved or does not exist")
        else:
            times = zarr.open_array(store=main, mode="r", path="times")[...]
            counts = zarr.open_array(store=main, mode="r", path="counts")[...]
            count_err = zarr.open_array(store=main, mode="r",
                                        path="count_err")[...]
            gti = zarr.open_array(store=main, mode="r", path="gti")[...]
            gti = gti.reshape((gti.size // 2, 2))

            dt = zarr.open_array(store=meta, mode="r", path="dt")[...]
            mjdref = zarr.open_array(store=meta, mode="r", path="mjdref")[...]
            err_dist = zarr.open_array(store=meta, mode="r",
                                       path="err_dist")[...]

            if not np.array_equal(test_lc.time, times):
                errors.append("lc.time is not saved precisely")
            if not np.array_equal(test_lc.counts, counts):
                errors.append("lc.counts is not saved precisely")
            if not np.array_equal(test_lc.counts_err, count_err):
                errors.append("lc.counts_err is not saved precisely")
            if not np.array_equal(test_lc.gti, gti):
                errors.append("lc.gti is not saved precisely")
            if not (test_lc.dt == dt):
                errors.append("lc.dt is not saved precisely")
            if not (test_lc.mjdref == mjdref):
                errors.append("lc.mjdref is not saved precisely")
            if not (test_lc.err_dist == err_dist):
                errors.append("lc.err_dist is not saved precisely")

        assert not errors, "Errors encountered:\n{}".format("\n".join(errors))
Пример #26
0
    def testStoreZarrExecution(self):
        raw = np.random.RandomState(0).rand(10, 20)

        group_name = 'test_group'
        dataset_name = 'test_dataset'

        t = tensor(raw, chunk_size=6)

        with self.assertRaises(TypeError):
            tozarr(object(), t)

        with tempfile.TemporaryDirectory() as d:
            filename = os.path.join(
                d, 'test_store_{}.zarr'.format(int(time.time())))
            path = '{}/{}/{}'.format(filename, group_name, dataset_name)

            r = tozarr(filename,
                       t,
                       group=group_name,
                       dataset=dataset_name,
                       compressor=Zstd(level=3))
            self.executor.execute_tensor(r)

            arr = zarr.open(path)
            np.testing.assert_array_equal(arr, raw)
            self.assertEqual(arr.compressor, Zstd(level=3))

            r = tozarr(path, t + 2)
            self.executor.execute_tensor(r)

            arr = zarr.open(path)
            np.testing.assert_array_equal(arr, raw + 2)

            filters = [Delta(dtype='i4')]
            compressor = Blosc(cname='zstd', clevel=1, shuffle=Blosc.SHUFFLE)
            arr = zarr.open(path, compressor=compressor, filters=filters)

            r = tozarr(arr, t + 1)
            self.executor.execute_tensor(r)
            result = zarr.open_array(path)
            np.testing.assert_array_equal(result, raw + 1)
Пример #27
0
def initialize_group(
    group_path: Pathlike,
    arrays: Sequence[NDArray[Any]],
    array_paths: Sequence[str],
    chunks: Sequence[int],
    group_attrs: Dict[str, Any] = {},
    compressor: Codec = numcodecs.GZip(-1),
    array_attrs: Optional[Sequence[Dict[str, Any]]] = None,
    modes: Tuple[AccessMode, AccessMode] = ("w", "w"),
    group_kwargs: Dict[str, Any] = {},
    array_kwargs: Dict[str, Any] = {},
) -> zarr.hierarchy.Group:
    group_access_mode, array_access_mode = modes
    group = access(group_path,
                   mode=group_access_mode,
                   attrs=group_attrs,
                   **group_kwargs)

    if array_attrs is None:
        _array_attrs: Tuple[Dict[str, Any], ...] = ({},) * len(arrays)
    else:
        _array_attrs = array_attrs

    for name, arr, attrs, chnks in zip(array_paths,
                                       arrays,
                                       _array_attrs,
                                       chunks):
        path = os.path.join(group.path, name)
        z_arr = zarr.open_array(
            store=group.store,
            mode=array_access_mode,
            fill_value=0,
            path=path,
            shape=arr.shape,
            dtype=arr.dtype,
            chunks=chnks,
            compressor=compressor,
            **array_kwargs)
        z_arr.attrs.update(attrs)

    return group
Пример #28
0
def test_store_zarr_execution(setup):
    raw = np.random.RandomState(0).rand(10, 20)

    group_name = 'test_group'
    dataset_name = 'test_dataset'

    t = tensor(raw, chunk_size=6)

    with pytest.raises(TypeError):
        tozarr(object(), t)

    with tempfile.TemporaryDirectory() as d:
        filename = os.path.join(d, f'test_store_{int(time.time())}.zarr')
        path = f'{filename}/{group_name}/{dataset_name}'

        r = tozarr(filename,
                   t,
                   group=group_name,
                   dataset=dataset_name,
                   compressor=Zstd(level=3))
        r.execute()

        arr = zarr.open(path)
        np.testing.assert_array_equal(arr, raw)
        assert arr.compressor == Zstd(level=3)

        r = tozarr(path, t + 2)
        r.execute()

        arr = zarr.open(path)
        np.testing.assert_array_equal(arr, raw + 2)

        filters = [Delta(dtype='i4')]
        compressor = Blosc(cname='zstd', clevel=1, shuffle=Blosc.SHUFFLE)
        arr = zarr.open(path, compressor=compressor, filters=filters)

        r = tozarr(arr, t + 1)
        r.execute()
        result = zarr.open_array(path)
        np.testing.assert_array_equal(result, raw + 1)
Пример #29
0
 def __init__(
     self,
     url: str,
     shape: typing.Tuple[int, ...] = None,
     dtype="float32",
     creds=None,
     memcache: float = None,
 ):
     if shape is not None:
         self._zarr = zarr.zeros(
             shape,
             dtype=dtype,
             chunks=self._determine_chunksizes(shape, dtype),
             store=hub.areal.store.get_storage_map(url, creds, memcache),
             overwrite=True,
         )
     else:
         self._zarr = zarr.open_array(
             hub.areal.store.get_storage_map(url, creds, memcache))
     self._shape = self._zarr.shape
     self._chunks = self._zarr.chunks
     self._dtype = self._zarr.dtype
     self._memcache = memcache
Пример #30
0
def labels_to_zarr(labels, out_path):
    """
    Persist a label array as a "sparse" zarr on disk.

    Parameters
    ----------
    labels : np.ndarray
        Labelled images in a numpy array
    out_path : str or Path
        Destination on disk to save the persistent zarr array

    Returns
    -------
    label_zarr: zarr.Array
        Persistent zarr array holding the same data as labels
    """

    coords = labels.nonzero()
    label_vals = labels[coords]
    label_zarr = zarr.open_array(
        out_path, shape=labels.shape, dtype=labels.dtype, chunks=(1, 10, -1, -1)
    )
    label_zarr.set_coordinate_selection(coords, label_vals)
    return label_zarr