示例#1
0
    def __init__(self, store, path=None, read_only=False, chunk_store=None,
                 cache_attrs=True, synchronizer=None):
        self._store = store
        self._chunk_store = chunk_store
        self._path = normalize_storage_path(path)
        if self._path:
            self._key_prefix = self._path + '/'
        else:
            self._key_prefix = ''
        self._read_only = read_only
        self._synchronizer = synchronizer

        # guard conditions
        if contains_array(store, path=self._path):
            raise ContainsArrayError(path)

        # initialize metadata
        try:
            mkey = self._key_prefix + group_meta_key
            meta_bytes = store[mkey]
        except KeyError:
            raise GroupNotFoundError(path)
        else:
            meta = decode_group_metadata(meta_bytes)
            self._meta = meta

        # setup attributes
        akey = self._key_prefix + attrs_key
        self._attrs = Attributes(store, key=akey, read_only=read_only,
                                 cache=cache_attrs, synchronizer=synchronizer)

        # setup info
        self._info = InfoReporter(self)
示例#2
0
def open_group(store=None,
               mode='a',
               cache_attrs=True,
               synchronizer=None,
               path=None,
               chunk_store=None,
               storage_options=None):
    """Open a group using file-mode-like semantics.

    Parameters
    ----------
    store : MutableMapping or string, optional
        Store or path to directory in file system or name of zip file.
    mode : {'r', 'r+', 'a', 'w', 'w-'}, optional
        Persistence mode: 'r' means read only (must exist); 'r+' means
        read/write (must exist); 'a' means read/write (create if doesn't
        exist); 'w' means create (overwrite if exists); 'w-' means create
        (fail if exists).
    cache_attrs : bool, optional
        If True (default), user attributes will be cached for attribute read
        operations. If False, user attributes are reloaded from the store prior
        to all attribute read operations.
    synchronizer : object, optional
        Array synchronizer.
    path : string, optional
        Group path within store.
    chunk_store : MutableMapping or string, optional
        Store or path to directory in file system or name of zip file.
    storage_options : dict
        If using an fsspec URL to create the store, these will be passed to
        the backend implementation. Ignored otherwise.

    Returns
    -------
    g : zarr.hierarchy.Group

    Examples
    --------
    >>> import zarr
    >>> root = zarr.open_group('data/example.zarr', mode='w')
    >>> foo = root.create_group('foo')
    >>> bar = root.create_group('bar')
    >>> root
    <zarr.hierarchy.Group '/'>
    >>> root2 = zarr.open_group('data/example.zarr', mode='a')
    >>> root2
    <zarr.hierarchy.Group '/'>
    >>> root == root2
    True

    """

    # handle polymorphic store arg
    clobber = mode != 'r'
    store = _normalize_store_arg(store,
                                 clobber=clobber,
                                 storage_options=storage_options)
    if chunk_store is not None:
        chunk_store = _normalize_store_arg(chunk_store,
                                           clobber=clobber,
                                           storage_options=storage_options)
    path = normalize_storage_path(path)

    # ensure store is initialized

    if mode in ['r', 'r+']:
        if contains_array(store, path=path):
            raise ContainsArrayError(path)
        elif not contains_group(store, path=path):
            raise GroupNotFoundError(path)

    elif mode == 'w':
        init_group(store, overwrite=True, path=path, chunk_store=chunk_store)

    elif mode == 'a':
        if contains_array(store, path=path):
            raise ContainsArrayError(path)
        if not contains_group(store, path=path):
            init_group(store, path=path, chunk_store=chunk_store)

    elif mode in ['w-', 'x']:
        if contains_array(store, path=path):
            raise ContainsArrayError(path)
        elif contains_group(store, path=path):
            raise ContainsGroupError(path)
        else:
            init_group(store, path=path, chunk_store=chunk_store)

    # determine read only status
    read_only = mode == 'r'

    return Group(store,
                 read_only=read_only,
                 cache_attrs=cache_attrs,
                 synchronizer=synchronizer,
                 path=path,
                 chunk_store=chunk_store)
示例#3
0
def open_array(store=None,
               mode="a",
               shape=None,
               chunks=True,
               dtype=None,
               compressor="default",
               fill_value=0,
               order="C",
               synchronizer=None,
               filters=None,
               cache_metadata=True,
               cache_attrs=True,
               path=None,
               object_codec=None,
               chunk_store=None,
               storage_options=None,
               partial_decompress=False,
               **kwargs):
    """Open an array using file-mode-like semantics.

    Parameters
    ----------
    store : MutableMapping or string, optional
        Store or path to directory in file system or name of zip file.
    mode : {'r', 'r+', 'a', 'w', 'w-'}, optional
        Persistence mode: 'r' means read only (must exist); 'r+' means
        read/write (must exist); 'a' means read/write (create if doesn't
        exist); 'w' means create (overwrite if exists); 'w-' means create
        (fail if exists).
    shape : int or tuple of ints, optional
        Array shape.
    chunks : int or tuple of ints, optional
        Chunk shape. If True, will be guessed from `shape` and `dtype`. If
        False, will be set to `shape`, i.e., single chunk for the whole array.
        If an int, the chunk size in each dimension will be given by the value
        of `chunks`. Default is True.
    dtype : string or dtype, optional
        NumPy dtype.
    compressor : Codec, optional
        Primary compressor.
    fill_value : object, optional
        Default value to use for uninitialized portions of the array.
    order : {'C', 'F'}, optional
        Memory layout to be used within each chunk.
    synchronizer : object, optional
        Array synchronizer.
    filters : sequence, optional
        Sequence of filters to use to encode chunk data prior to compression.
    cache_metadata : bool, optional
        If True, array configuration metadata will be cached for the
        lifetime of the object. If False, array metadata will be reloaded
        prior to all data access and modification operations (may incur
        overhead depending on storage and data access pattern).
    cache_attrs : bool, optional
        If True (default), user attributes will be cached for attribute read
        operations. If False, user attributes are reloaded from the store prior
        to all attribute read operations.
    path : string, optional
        Array path within store.
    object_codec : Codec, optional
        A codec to encode object arrays, only needed if dtype=object.
    chunk_store : MutableMapping or string, optional
        Store or path to directory in file system or name of zip file.
    storage_options : dict
        If using an fsspec URL to create the store, these will be passed to
        the backend implementation. Ignored otherwise.
    partial_decompress : bool, optional
        If True and while the chunk_store is a FSStore and the compresion used
        is Blosc, when getting data from the array chunks will be partially
        read and decompressed when possible.

        .. versionadded:: 2.7

    Returns
    -------
    z : zarr.core.Array

    Examples
    --------
    >>> import numpy as np
    >>> import zarr
    >>> z1 = zarr.open_array('data/example.zarr', mode='w', shape=(10000, 10000),
    ...                      chunks=(1000, 1000), fill_value=0)
    >>> z1[:] = np.arange(100000000).reshape(10000, 10000)
    >>> z1
    <zarr.core.Array (10000, 10000) float64>
    >>> z2 = zarr.open_array('data/example.zarr', mode='r')
    >>> z2
    <zarr.core.Array (10000, 10000) float64 read-only>
    >>> np.all(z1[:] == z2[:])
    True

    Notes
    -----
    There is no need to close an array. Data are automatically flushed to the
    file system.

    """

    # use same mode semantics as h5py
    # r : read only, must exist
    # r+ : read/write, must exist
    # w : create, delete if exists
    # w- or x : create, fail if exists
    # a : read/write if exists, create otherwise (default)

    # handle polymorphic store arg
    clobber = (mode == 'w')
    store = normalize_store_arg(store,
                                clobber=clobber,
                                storage_options=storage_options,
                                mode=mode)
    if chunk_store is not None:
        chunk_store = normalize_store_arg(chunk_store,
                                          clobber=clobber,
                                          storage_options=storage_options)
    path = normalize_storage_path(path)

    # API compatibility with h5py
    compressor, fill_value = _kwargs_compat(compressor, fill_value, kwargs)

    # ensure fill_value of correct type
    if fill_value is not None:
        fill_value = np.array(fill_value, dtype=dtype)[()]

    # ensure store is initialized

    if mode in ['r', 'r+']:
        if contains_group(store, path=path):
            raise ContainsGroupError(path)
        elif not contains_array(store, path=path):
            raise ArrayNotFoundError(path)

    elif mode == 'w':
        init_array(store,
                   shape=shape,
                   chunks=chunks,
                   dtype=dtype,
                   compressor=compressor,
                   fill_value=fill_value,
                   order=order,
                   filters=filters,
                   overwrite=True,
                   path=path,
                   object_codec=object_codec,
                   chunk_store=chunk_store)

    elif mode == 'a':
        if contains_group(store, path=path):
            raise ContainsGroupError(path)
        elif not contains_array(store, path=path):
            init_array(store,
                       shape=shape,
                       chunks=chunks,
                       dtype=dtype,
                       compressor=compressor,
                       fill_value=fill_value,
                       order=order,
                       filters=filters,
                       path=path,
                       object_codec=object_codec,
                       chunk_store=chunk_store)

    elif mode in ['w-', 'x']:
        if contains_group(store, path=path):
            raise ContainsGroupError(path)
        elif contains_array(store, path=path):
            raise ContainsArrayError(path)
        else:
            init_array(store,
                       shape=shape,
                       chunks=chunks,
                       dtype=dtype,
                       compressor=compressor,
                       fill_value=fill_value,
                       order=order,
                       filters=filters,
                       path=path,
                       object_codec=object_codec,
                       chunk_store=chunk_store)

    # determine read only status
    read_only = mode == 'r'

    # instantiate array
    z = Array(store,
              read_only=read_only,
              synchronizer=synchronizer,
              cache_metadata=cache_metadata,
              cache_attrs=cache_attrs,
              path=path,
              chunk_store=chunk_store)

    return z
示例#4
0
    def __init__(self,
                 store,
                 path=None,
                 read_only=False,
                 chunk_store=None,
                 cache_attrs=True,
                 synchronizer=None,
                 zarr_version=None):
        store: BaseStore = _normalize_store_arg(store,
                                                zarr_version=zarr_version)
        if zarr_version is None:
            zarr_version = getattr(store, '_store_version',
                                   DEFAULT_ZARR_VERSION)
        if chunk_store is not None:
            chunk_store: BaseStore = _normalize_store_arg(
                chunk_store, zarr_version=zarr_version)
        self._store = store
        self._chunk_store = chunk_store
        self._path = normalize_storage_path(path)
        if self._path:
            self._key_prefix = self._path + '/'
        else:
            self._key_prefix = ''
        self._read_only = read_only
        self._synchronizer = synchronizer
        self._version = zarr_version

        if self._version == 3:
            self._data_key_prefix = data_root + self._key_prefix
            self._data_path = data_root + self._path
            self._hierarchy_metadata = _get_hierarchy_metadata(
                store=self._store)
            self._metadata_key_suffix = _get_metadata_suffix(store=self._store)

        # guard conditions
        if contains_array(store, path=self._path):
            raise ContainsArrayError(path)

        # initialize metadata
        try:
            mkey = _prefix_to_group_key(self._store, self._key_prefix)
            assert not mkey.endswith("root/.group")
            meta_bytes = store[mkey]
        except KeyError:
            if self._version == 2:
                raise GroupNotFoundError(path)
            else:
                implicit_prefix = meta_root + self._key_prefix
                if self._store.list_prefix(implicit_prefix):
                    # implicit group does not have any metadata
                    self._meta = None
                else:
                    raise GroupNotFoundError(path)
        else:
            self._meta = self._store._metadata_class.decode_group_metadata(
                meta_bytes)

        # setup attributes
        if self._version == 2:
            akey = self._key_prefix + attrs_key
        else:
            # Note: mkey doesn't actually exist for implicit groups, but the
            # object can still be created.
            akey = mkey
        self._attrs = Attributes(store,
                                 key=akey,
                                 read_only=read_only,
                                 cache=cache_attrs,
                                 synchronizer=synchronizer)

        # setup info
        self._info = InfoReporter(self)
示例#5
0
def open_array(store=None,
               mode="a",
               shape=None,
               chunks=True,
               dtype=None,
               compressor="default",
               fill_value=0,
               order="C",
               synchronizer=None,
               filters=None,
               cache_metadata=True,
               cache_attrs=True,
               path=None,
               object_codec=None,
               chunk_store=None,
               storage_options=None,
               partial_decompress=False,
               write_empty_chunks=True,
               *,
               zarr_version=None,
               dimension_separator=None,
               **kwargs):
    """Open an array using file-mode-like semantics.

    Parameters
    ----------
    store : MutableMapping or string, optional
        Store or path to directory in file system or name of zip file.
    mode : {'r', 'r+', 'a', 'w', 'w-'}, optional
        Persistence mode: 'r' means read only (must exist); 'r+' means
        read/write (must exist); 'a' means read/write (create if doesn't
        exist); 'w' means create (overwrite if exists); 'w-' means create
        (fail if exists).
    shape : int or tuple of ints, optional
        Array shape.
    chunks : int or tuple of ints, optional
        Chunk shape. If True, will be guessed from `shape` and `dtype`. If
        False, will be set to `shape`, i.e., single chunk for the whole array.
        If an int, the chunk size in each dimension will be given by the value
        of `chunks`. Default is True.
    dtype : string or dtype, optional
        NumPy dtype.
    compressor : Codec, optional
        Primary compressor.
    fill_value : object, optional
        Default value to use for uninitialized portions of the array.
    order : {'C', 'F'}, optional
        Memory layout to be used within each chunk.
    synchronizer : object, optional
        Array synchronizer.
    filters : sequence, optional
        Sequence of filters to use to encode chunk data prior to compression.
    cache_metadata : bool, optional
        If True, array configuration metadata will be cached for the
        lifetime of the object. If False, array metadata will be reloaded
        prior to all data access and modification operations (may incur
        overhead depending on storage and data access pattern).
    cache_attrs : bool, optional
        If True (default), user attributes will be cached for attribute read
        operations. If False, user attributes are reloaded from the store prior
        to all attribute read operations.
    path : string, optional
        Array path within store.
    object_codec : Codec, optional
        A codec to encode object arrays, only needed if dtype=object.
    chunk_store : MutableMapping or string, optional
        Store or path to directory in file system or name of zip file.
    storage_options : dict
        If using an fsspec URL to create the store, these will be passed to
        the backend implementation. Ignored otherwise.
    partial_decompress : bool, optional
        If True and while the chunk_store is a FSStore and the compression used
        is Blosc, when getting data from the array chunks will be partially
        read and decompressed when possible.
    write_empty_chunks : bool, optional
        If True (default), all chunks will be stored regardless of their
        contents. If False, each chunk is compared to the array's fill value
        prior to storing. If a chunk is uniformly equal to the fill value, then
        that chunk is not be stored, and the store entry for that chunk's key
        is deleted. This setting enables sparser storage, as only chunks with
        non-fill-value data are stored, at the expense of overhead associated
        with checking the data of each chunk.

        .. versionadded:: 2.11

    zarr_version : {None, 2, 3}, optional
        The zarr protocol version of the array to be opened. If None, it will
        be inferred from ``store`` or ``chunk_store`` if they are provided,
        otherwise defaulting to 2.
    dimension_separator : {None, '.', '/'}, optional
        Can be used to specify whether the array is in a flat ('.') or nested
        ('/') format. If None, the appropriate value will be read from `store`
        when present. Otherwise, defaults to '.' when ``zarr_version == 2``
        and `/` otherwise.

    Returns
    -------
    z : zarr.core.Array

    Examples
    --------
    >>> import numpy as np
    >>> import zarr
    >>> z1 = zarr.open_array('data/example.zarr', mode='w', shape=(10000, 10000),
    ...                      chunks=(1000, 1000), fill_value=0)
    >>> z1[:] = np.arange(100000000).reshape(10000, 10000)
    >>> z1
    <zarr.core.Array (10000, 10000) float64>
    >>> z2 = zarr.open_array('data/example.zarr', mode='r')
    >>> z2
    <zarr.core.Array (10000, 10000) float64 read-only>
    >>> np.all(z1[:] == z2[:])
    True

    Notes
    -----
    There is no need to close an array. Data are automatically flushed to the
    file system.

    """

    # use same mode semantics as h5py
    # r : read only, must exist
    # r+ : read/write, must exist
    # w : create, delete if exists
    # w- or x : create, fail if exists
    # a : read/write if exists, create otherwise (default)

    if zarr_version is None and store is None:
        zarr_version = getattr(chunk_store, '_store_version',
                               DEFAULT_ZARR_VERSION)

    # handle polymorphic store arg
    store = normalize_store_arg(store,
                                storage_options=storage_options,
                                mode=mode,
                                zarr_version=zarr_version)
    zarr_version = getattr(store, '_store_version', DEFAULT_ZARR_VERSION)
    if chunk_store is not None:
        chunk_store = normalize_store_arg(chunk_store,
                                          storage_options=storage_options,
                                          mode=mode,
                                          zarr_version=zarr_version)

    # respect the dimension separator specified in a store, if present
    if dimension_separator is None:
        if hasattr(store, '_dimension_separator'):
            dimension_separator = store._dimension_separator
        else:
            dimension_separator = '.' if zarr_version == 2 else '/'

    if zarr_version == 3 and path is None:
        path = 'array'  # TODO: raise ValueError instead?

    path = normalize_storage_path(path)

    # API compatibility with h5py
    compressor, fill_value = _kwargs_compat(compressor, fill_value, kwargs)

    # ensure fill_value of correct type
    if fill_value is not None:
        fill_value = np.array(fill_value, dtype=dtype)[()]

    # ensure store is initialized

    if mode in ['r', 'r+']:
        if not contains_array(store, path=path):
            if contains_group(store, path=path):
                raise ContainsGroupError(path)
            raise ArrayNotFoundError(path)

    elif mode == 'w':
        init_array(store,
                   shape=shape,
                   chunks=chunks,
                   dtype=dtype,
                   compressor=compressor,
                   fill_value=fill_value,
                   order=order,
                   filters=filters,
                   overwrite=True,
                   path=path,
                   object_codec=object_codec,
                   chunk_store=chunk_store,
                   dimension_separator=dimension_separator)

    elif mode == 'a':
        if not contains_array(store, path=path):
            if contains_group(store, path=path):
                raise ContainsGroupError(path)
            init_array(store,
                       shape=shape,
                       chunks=chunks,
                       dtype=dtype,
                       compressor=compressor,
                       fill_value=fill_value,
                       order=order,
                       filters=filters,
                       path=path,
                       object_codec=object_codec,
                       chunk_store=chunk_store,
                       dimension_separator=dimension_separator)

    elif mode in ['w-', 'x']:
        if contains_group(store, path=path):
            raise ContainsGroupError(path)
        elif contains_array(store, path=path):
            raise ContainsArrayError(path)
        else:
            init_array(store,
                       shape=shape,
                       chunks=chunks,
                       dtype=dtype,
                       compressor=compressor,
                       fill_value=fill_value,
                       order=order,
                       filters=filters,
                       path=path,
                       object_codec=object_codec,
                       chunk_store=chunk_store,
                       dimension_separator=dimension_separator)

    # determine read only status
    read_only = mode == 'r'

    # instantiate array
    z = Array(store,
              read_only=read_only,
              synchronizer=synchronizer,
              cache_metadata=cache_metadata,
              cache_attrs=cache_attrs,
              path=path,
              chunk_store=chunk_store,
              write_empty_chunks=write_empty_chunks)

    return z