Exemplo n.º 1
0
 def __init__(self, file, signal, expg, **kwargs):
     super().__init__(file, signal, expg, **kwargs)
     self.Dataset = zarr.Array
     self.unicode_kwds = {"dtype": object, "object_codec": numcodecs.JSON()}
     self.ragged_kwds = {
         "dtype": object,
         "object_codec": numcodecs.VLenArray(int),
         "exact": True
     }
Exemplo n.º 2
0
 def store(self, synced_collection, data):
     dataset = synced_collection.group.require_dataset(
         "test",
         overwrite=True,
         shape=1,
         dtype="object",
         object_codec=numcodecs.JSON(),
     )
     dataset[0] = data
Exemplo n.º 3
0
    def __init__(self, group=None, name=None, codec=None, *args, **kwargs):
        if not ZARR:
            raise RuntimeError(
                "The Zarr package must be installed to use the ZarrCollection."
            )

        super().__init__(**kwargs)
        self._group = group
        self._name = name
        self._object_codec = numcodecs.JSON() if codec is None else codec
Exemplo n.º 4
0
def _write_key_value_to_zarr(f, key, value, **kwargs):
    if isinstance(value, Mapping):
        for k, v in value.items():
            if not isinstance(k, str):
                warnings.warn('dict key {} transformed to str upon writing to zarr,'
                              'using string keys is recommended'
                              .format(k))
            _write_key_value_to_zarr(f, key + '/' + str(k), v, **kwargs)
        return

    def preprocess_writing(value):
        if value is None:
            return value
        elif issparse(value):
            return value
        elif isinstance(value, dict):
            # old hack for storing dicts, is never reached
            # in the current implementation, can be removed in the future
            value = np.array([str(value)])
        else:
            # make sure value is an array
            value = np.array(value)
            # hm, why that?
            if value.ndim == 0: value = np.array([value])
        # make sure string format is chosen correctly
        if value.dtype.kind == 'U': value = value.astype(np.string_)
        return value

    value = preprocess_writing(value)

    # for some reason, we need the following for writing string arrays
    if key in f.keys() and value is not None: del f[key]

    # ignore arrays with empty dtypes
    if value is None or not value.dtype.descr:
        return
    try:
        if key in set(f.keys()):
            import zarr
            is_valid_group = isinstance(f[key], zarr.hierarchy.Group) \
                             and f[key].shape == value.shape \
                             and f[key].dtype == value.dtype
            if not is_valid_group and not issparse(value):
                f[key][()] = value
                return
            else:
                del f[key]
        #f.create_dataset(key, data=value, **kwargs)
        if key != 'X' and 'chunks' in kwargs:  # TODO: make this more explicit
            del kwargs['chunks']
        import numcodecs  # TODO: only set object_codec for objects
        ds = f.create_dataset(key, shape=value.shape,
                                 dtype=value.dtype, object_codec=numcodecs.JSON(), **kwargs)
        _write_in_zarr_chunks(ds, key, value)
    except TypeError:
        # try writing as byte strings
        try:
            if value.dtype.names is None:
                if key in set(f.keys()):
                    if (f[key].shape == value.shape
                            and f[key].dtype == value.dtype):
                        f[key][()] = value.astype('S')
                        return
                    else:
                        del f[key]
                #f.create_dataset(key, data=value.astype('S'), **kwargs)
                ds = f.create_dataset(key, shape=value.astype('S').shape,
                                         dtype=value.astype('S').dtype, **kwargs)
                _write_in_zarr_chunks(ds, key, value.astype('S'))
            else:
                new_dtype = [(dt[0], 'S{}'.format(int(dt[1][2:])*4))
                             for dt in value.dtype.descr]
                if key in set(f.keys()):
                    if (f[key].shape == value.shape
                            and f[key].dtype == value.dtype):
                        f[key][()] = value.astype(new_dtype)
                        return
                    else:
                        del f[key]
                #f.create_dataset(
                #    key, data=value.astype(new_dtype), **kwargs)
                ds = f.create_dataset(key, shape=value.astype(new_dtype).shape,
                                      dtype=value.astype(new_dtype).dtype, **kwargs)
                _write_in_zarr_chunks(ds, key, value.astype(new_dtype))
        except Exception as e:
            warnings.warn('Could not save field with key = "{}" '
                          'to hdf5 file: {}'.format(key, e))