Пример #1
0
    def rename(self, src_path, dst_path, metadata_key_suffix='.json'):
        store_src_path = normalize_storage_path(src_path)
        store_dst_path = normalize_storage_path(dst_path)

        dir_path = self.path
        any_existed = False
        for root_prefix in ['meta', 'data']:
            src_path = os.path.join(dir_path, root_prefix, 'root',
                                    store_src_path)
            if os.path.exists(src_path):
                any_existed = True
                dst_path = os.path.join(dir_path, root_prefix, 'root',
                                        store_dst_path)
                os.renames(src_path, dst_path)

        for suffix in [
                '.array' + metadata_key_suffix, '.group' + metadata_key_suffix
        ]:
            src_meta = os.path.join(dir_path, 'meta', 'root',
                                    store_src_path + suffix)
            if os.path.exists(src_meta):
                any_existed = True
                dst_meta = os.path.join(dir_path, 'meta', 'root',
                                        store_dst_path + suffix)
                dst_dir = os.path.dirname(dst_meta)
                if not os.path.exists(dst_dir):
                    os.makedirs(dst_dir)
                os.rename(src_meta, dst_meta)
        if not any_existed:
            raise FileNotFoundError("nothing found at src_path")
Пример #2
0
    def rename(self, src_path: Path, dst_path: Path):
        src_path = normalize_storage_path(src_path)
        dst_path = normalize_storage_path(dst_path)

        any_renamed = False
        for base in [meta_root, data_root]:
            if self.list_prefix(base + src_path):
                src_parent, src_key = self._get_parent(base + src_path)
                dst_parent, dst_key = self._require_parent(base + dst_path)

                if src_key in src_parent:
                    dst_parent[dst_key] = src_parent.pop(src_key)

                if base == meta_root:
                    # check for and move corresponding metadata
                    sfx = _get_metadata_suffix(self)
                    src_meta = src_key + '.array' + sfx
                    if src_meta in src_parent:
                        dst_meta = dst_key + '.array' + sfx
                        dst_parent[dst_meta] = src_parent.pop(src_meta)
                    src_meta = src_key + '.group' + sfx
                    if src_meta in src_parent:
                        dst_meta = dst_key + '.group' + sfx
                        dst_parent[dst_meta] = src_parent.pop(src_meta)
                any_renamed = True
        any_renamed = _rename_metadata_v3(self, src_path,
                                          dst_path) or any_renamed
        if not any_renamed:
            raise ValueError(f"no item {src_path} found to rename")
Пример #3
0
 def _strip_prefix_from_path(path, prefix):
     # normalized things will not have any leading or trailing slashes
     path_norm = normalize_storage_path(path)
     prefix_norm = normalize_storage_path(prefix)
     if prefix:
         return path_norm[(len(prefix_norm) + 1):]
     else:
         return path_norm
Пример #4
0
 def _item_path(self, item):
     if item and item[0] == '/':
         # absolute path
         path = normalize_storage_path(item)
     else:
         # relative path
         path = normalize_storage_path(item)
         if self._path:
             path = self._key_prefix + path
     return path
Пример #5
0
 def _item_path(self, item):
     if item and item[0] == '/':
         # absolute path
         path = normalize_storage_path(item)
     else:
         # relative path
         path = normalize_storage_path(item)
         if self._path:
             path = self._key_prefix + path
     return path
Пример #6
0
    def __init__(self, store, path=None, read_only=False, chunk_store=None,
                 synchronizer=None, cache_metadata=True):
        # N.B., expect at this point store is fully initialized with all
        # configuration metadata fully specified and normalized

        self._store = store
        self._path = normalize_storage_path(path)
        if self._path:
            self._key_prefix = self._path + '/'
        else:
            self._key_prefix = ''
        self._read_only = read_only
        if chunk_store is None:
            self._chunk_store = store
        else:
            self._chunk_store = chunk_store
        self._synchronizer = synchronizer
        self._cache_metadata = cache_metadata
        self._is_view = False

        # initialize metadata
        self._load_metadata()

        # initialize attributes
        akey = self._key_prefix + attrs_key
        self._attrs = Attributes(store, key=akey, read_only=read_only,
                                 synchronizer=synchronizer)
Пример #7
0
    def getsize(self, path=None):
        path = normalize_storage_path(path)

        # obtain value to return size of
        if path:
            try:
                parent, key = self._get_parent(path)
                value = parent[key]
            except KeyError:
                err_path_not_found(path)
        else:
            value = self.root

        # obtain size of value
        if isinstance(value, self.cls):
            # total size for directory
            size = 0
            for v in value.values():
                if not isinstance(v, self.cls):
                    try:
                        size += buffer_size(v)
                    except TypeError:
                        return -1
            return size
        else:
            try:
                return buffer_size(value)
            except TypeError:
                return -1
Пример #8
0
    def __init__(self,
                 store,
                 path=None,
                 read_only=False,
                 chunk_store=None,
                 synchronizer=None,
                 cache_metadata=True):
        # N.B., expect at this point store is fully initialized with all
        # configuration metadata fully specified and normalized

        self._store = store
        self._path = normalize_storage_path(path)
        if self._path:
            self._key_prefix = self._path + '/'
        else:
            self._key_prefix = ''
        self._read_only = read_only
        if chunk_store is None:
            self._chunk_store = store
        else:
            self._chunk_store = chunk_store
        self._synchronizer = synchronizer
        self._cache_metadata = cache_metadata
        self._is_view = False

        # initialize metadata
        self._load_metadata()

        # initialize attributes
        akey = self._key_prefix + attrs_key
        self._attrs = Attributes(store,
                                 key=akey,
                                 read_only=read_only,
                                 synchronizer=synchronizer)
Пример #9
0
    def __init__(self, store, path=None, read_only=False, chunk_store=None,
                 cache_attrs=True, synchronizer=None):
        self._store = store
        self._chunk_store = chunk_store
        self._path = normalize_storage_path(path)
        if self._path:
            self._key_prefix = self._path + '/'
        else:
            self._key_prefix = ''
        self._read_only = read_only
        self._synchronizer = synchronizer

        # guard conditions
        if contains_array(store, path=self._path):
            raise ContainsArrayError(path)

        # initialize metadata
        try:
            mkey = self._key_prefix + group_meta_key
            meta_bytes = store[mkey]
        except KeyError:
            raise GroupNotFoundError(path)
        else:
            meta = decode_group_metadata(meta_bytes)
            self._meta = meta

        # setup attributes
        akey = self._key_prefix + attrs_key
        self._attrs = Attributes(store, key=akey, read_only=read_only,
                                 cache=cache_attrs, synchronizer=synchronizer)

        # setup info
        self._info = InfoReporter(self)
Пример #10
0
 def rmdir(self, path=None):
     store_path = normalize_storage_path(path)
     dir_path = self.path
     if store_path:
         dir_path = os.path.join(dir_path, store_path)
     if os.path.isdir(dir_path):
         shutil.rmtree(dir_path)
Пример #11
0
 def rmdir(self, path: str = "") -> None:
     if not self.is_erasable():
         raise NotImplementedError(
             f'{type(self)} is not erasable, cannot call "rmdir"'
         )  # pragma: no cover
     path = normalize_storage_path(path)
     _rmdir_from_keys(self, path)
Пример #12
0
    def __init__(
        self,
        container=None,
        prefix='',
        account_name=None,
        account_key=None,
        blob_service_kwargs=None,
        dimension_separator=None,
        client=None,
    ):
        self._dimension_separator = dimension_separator
        self.prefix = normalize_storage_path(prefix)
        if client is None:
            # deprecated option, try to construct the client for them
            msg = (
                "Providing 'container', 'account_name', 'account_key', and 'blob_service_kwargs'"
                "is deprecated. Provide and instance of 'azure.storage.blob.ContainerClient' "
                "'client' instead.")
            warnings.warn(msg, FutureWarning, stacklevel=2)
            from azure.storage.blob import ContainerClient
            blob_service_kwargs = blob_service_kwargs or {}
            client = ContainerClient(
                "https://{}.blob.core.windows.net/".format(account_name),
                container,
                credential=account_key,
                **blob_service_kwargs)

        self.client = client
        self._container = container
        self._account_name = account_name
        self._account_key = account_key
Пример #13
0
    def getsize(self, path=None):
        path = normalize_storage_path(path)

        # obtain value to return size of
        if path:
            try:
                parent, key = self._get_parent(path)
                value = parent[key]
            except KeyError:
                err_path_not_found(path)
        else:
            value = self.root

        # obtain size of value
        if isinstance(value, self.cls):
            # total size for directory
            size = 0
            for v in value.values():
                if not isinstance(v, self.cls):
                    try:
                        size += buffer_size(v)
                    except TypeError:
                        return -1
            return size
        else:
            try:
                return buffer_size(value)
            except TypeError:
                return -1
Пример #14
0
 def rmdir(self, path=None):
     store_path = normalize_storage_path(path)
     dir_path = self.path
     if store_path:
         dir_path = os.path.join(dir_path, store_path)
     if os.path.isdir(dir_path):
         shutil.rmtree(dir_path)
Пример #15
0
def group(store=None,
          overwrite=False,
          chunk_store=None,
          synchronizer=None,
          path=None):
    """Create a group.

    Parameters
    ----------
    store : MutableMapping or string, optional
        Store or path to directory in file system.
    overwrite : bool, optional
        If True, delete any pre-existing data in `store` at `path` before
        creating the group.
    chunk_store : MutableMapping, optional
        Separate storage for chunks. If not provided, `store` will be used
        for storage of both chunks and metadata.
    synchronizer : object, optional
        Array synchronizer.
    path : string, optional
        Group path within store.

    Returns
    -------
    g : zarr.hierarchy.Group

    Examples
    --------
    Create a group in memory::

        >>> import zarr
        >>> g = zarr.group()
        >>> g
        <zarr.hierarchy.Group '/'>

    Create a group with a different store::

        >>> store = zarr.DirectoryStore('data/example.zarr')
        >>> g = zarr.group(store=store, overwrite=True)
        >>> g
        <zarr.hierarchy.Group '/'>

    """

    # handle polymorphic store arg
    store = _normalize_store_arg(store)
    path = normalize_storage_path(path)

    # require group
    if overwrite or not contains_group(store):
        init_group(store,
                   overwrite=overwrite,
                   chunk_store=chunk_store,
                   path=path)

    return Group(store,
                 read_only=False,
                 chunk_store=chunk_store,
                 synchronizer=synchronizer,
                 path=path)
Пример #16
0
 def getsize(self, path=None):
     path = normalize_storage_path(path)
     children = self.listdir(path)
     if children:
         size = 0
         for child in children:
             if path:
                 name = path + '/' + child
             else:
                 name = child
             try:
                 info = self.zf.getinfo(name)
             except KeyError:
                 pass
             else:
                 size += info.compress_size
         return size
     elif path:
         try:
             info = self.zf.getinfo(path)
             return info.compress_size
         except KeyError:
             err_path_not_found(path)
     else:
         return 0
Пример #17
0
    def __init__(self, store, path=None, read_only=False, chunk_store=None,
                 synchronizer=None):

        self._store = store
        self._path = normalize_storage_path(path)
        if self._path:
            self._key_prefix = self._path + '/'
        else:
            self._key_prefix = ''
        self._read_only = read_only
        if chunk_store is None:
            self._chunk_store = store
        else:
            self._chunk_store = chunk_store
        self._synchronizer = synchronizer

        # guard conditions
        if contains_array(store, path=self._path):
            err_contains_array(path)

        # initialize metadata
        try:
            mkey = self._key_prefix + group_meta_key
            meta_bytes = store[mkey]
        except KeyError:
            err_group_not_found(path)
        else:
            meta = decode_group_metadata(meta_bytes)
            self._meta = meta

        # setup attributes
        akey = self._key_prefix + attrs_key
        self._attrs = Attributes(store, key=akey, read_only=read_only,
                                 synchronizer=synchronizer)
Пример #18
0
 def getsize(self, path=None):
     path = normalize_storage_path(path)
     children = self.listdir(path)
     if children:
         size = 0
         for child in children:
             if path:
                 name = path + '/' + child
             else:
                 name = child
             try:
                 info = self.zf.getinfo(name)
             except KeyError:
                 pass
             else:
                 size += info.compress_size
         return size
     elif path:
         try:
             info = self.zf.getinfo(path)
             return info.compress_size
         except KeyError:
             err_path_not_found(path)
     else:
         return 0
Пример #19
0
def init_group(store, overwrite=False, path=None, chunk_store=None):
    """initialize a group store.

    Parameters
    ----------
    store : MutableMapping
        A mapping that supports string keys and byte sequence values.
    overwrite : bool, optional
        If True, erase all data in `store` prior to initialisation.
    path : string, optional
        Path under which array is stored.
    chunk_store : MutableMapping, optional
        Separate storage for chunks. If not provided, `store` will be used
        for storage of both chunks and metadata.

    """

    # normalize path
    path = normalize_storage_path(path)

    # ensure parent group initialized
    _require_parent_group(path, store=store, chunk_store=chunk_store,
                          overwrite=overwrite)

    # initialise metadata
    _init_group_metadata(store=store, overwrite=overwrite, path=path,
                         chunk_store=chunk_store)
Пример #20
0
def init_group(store, overwrite=False, path=None, chunk_store=None):
    """initialize a group store.

    Parameters
    ----------
    store : MutableMapping
        A mapping that supports string keys and byte sequence values.
    overwrite : bool, optional
        If True, erase all data in `store` prior to initialisation.
    path : string, optional
        Path under which array is stored.
    chunk_store : MutableMapping, optional
        Separate storage for chunks. If not provided, `store` will be used
        for storage of both chunks and metadata.

    """

    # normalize path
    path = normalize_storage_path(path)

    # ensure parent group initialized
    _require_parent_group(path,
                          store=store,
                          chunk_store=chunk_store,
                          overwrite=overwrite)

    # initialise metadata
    _init_group_metadata(store=store,
                         overwrite=overwrite,
                         path=path,
                         chunk_store=chunk_store)
Пример #21
0
 def listdir(self, path=None):
     dir_path = normalize_storage_path(self._append_path_to_prefix(path))
     if dir_path:
         dir_path += '/'
     items = [
         self._strip_prefix_from_path(blob.name, dir_path)
         for blob in self.client.walk_blobs(name_starts_with=dir_path, delimiter='/')
     ]
     return items
Пример #22
0
def rmdir(store, path=None):
    """Remove all items under the given path."""
    path = normalize_storage_path(path)
    if hasattr(store, 'rmdir'):
        # pass through
        store.rmdir(path)
    else:
        # slow version, delete one key at a time
        _rmdir_from_keys(store, path)
Пример #23
0
def listdir(store, path=None):
    """Obtain a directory listing for the given path."""
    path = normalize_storage_path(path)
    if hasattr(store, 'listdir'):
        # pass through
        return store.listdir(path)
    else:
        # slow version, iterate through all keys
        return _listdir_from_keys(store, path)
Пример #24
0
 def listdir(self, path=None):
     store_path = normalize_storage_path(path)
     dir_path = self.path
     if store_path:
         dir_path = os.path.join(dir_path, store_path)
     if os.path.isdir(dir_path):
         return sorted(os.listdir(dir_path))
     else:
         return []
Пример #25
0
 def listdir(self, path=None):
     store_path = normalize_storage_path(path)
     dir_path = self.path
     if store_path:
         dir_path = os.path.join(dir_path, store_path)
     if os.path.isdir(dir_path):
         return sorted(os.listdir(dir_path))
     else:
         return []
Пример #26
0
def rmdir(store, path=None):
    """Remove all items under the given path."""
    path = normalize_storage_path(path)
    if hasattr(store, 'rmdir'):
        # pass through
        store.rmdir(path)
    else:
        # slow version, delete one key at a time
        _rmdir_from_keys(store, path)
Пример #27
0
def listdir(store, path=None):
    """Obtain a directory listing for the given path."""
    path = normalize_storage_path(path)
    if hasattr(store, 'listdir'):
        # pass through
        return store.listdir(path)
    else:
        # slow version, iterate through all keys
        return _listdir_from_keys(store, path)
Пример #28
0
def group(store=None, overwrite=False, chunk_store=None,
          cache_attrs=True, synchronizer=None, path=None):
    """Create a group.

    Parameters
    ----------
    store : MutableMapping or string, optional
        Store or path to directory in file system.
    overwrite : bool, optional
        If True, delete any pre-existing data in `store` at `path` before
        creating the group.
    chunk_store : MutableMapping, optional
        Separate storage for chunks. If not provided, `store` will be used
        for storage of both chunks and metadata.
    cache_attrs : bool, optional
        If True (default), user attributes will be cached for attribute read
        operations. If False, user attributes are reloaded from the store prior
        to all attribute read operations.
    synchronizer : object, optional
        Array synchronizer.
    path : string, optional
        Group path within store.

    Returns
    -------
    g : zarr.hierarchy.Group

    Examples
    --------
    Create a group in memory::

        >>> import zarr
        >>> g = zarr.group()
        >>> g
        <zarr.hierarchy.Group '/'>

    Create a group with a different store::

        >>> store = zarr.DirectoryStore('data/example.zarr')
        >>> g = zarr.group(store=store, overwrite=True)
        >>> g
        <zarr.hierarchy.Group '/'>

    """

    # handle polymorphic store arg
    store = _normalize_store_arg(store)
    path = normalize_storage_path(path)

    # require group
    if overwrite or not contains_group(store):
        init_group(store, overwrite=overwrite, chunk_store=chunk_store,
                   path=path)

    return Group(store, read_only=False, chunk_store=chunk_store,
                 cache_attrs=cache_attrs, synchronizer=synchronizer, path=path)
Пример #29
0
def group(store=None, overwrite=False, chunk_store=None, synchronizer=None,
          path=None):
    """Create a group.

    Parameters
    ----------
    store : MutableMapping or string
        Store or path to directory in file system.
    overwrite : bool, optional
        If True, delete any pre-existing data in `store` at `path` before
        creating the group.
    chunk_store : MutableMapping, optional
        Separate storage for chunks. If not provided, `store` will be used
        for storage of both chunks and metadata.
    synchronizer : object, optional
        Array synchronizer.
    path : string, optional
        Group path.

    Returns
    -------
    g : zarr.hierarchy.Group

    Examples
    --------

    Create a group in memory::

        >>> import zarr
        >>> g = zarr.group()
        >>> g
        Group(/, 0)
          store: DictStore

    Create a group with a different store::

        >>> store = zarr.DirectoryStore('example')
        >>> g = zarr.group(store=store, overwrite=True)
        >>> g
        Group(/, 0)
          store: DirectoryStore

    """

    # handle polymorphic store arg
    store = _handle_store_arg(store)
    path = normalize_storage_path(path)

    # require group
    if overwrite or not contains_group(store):
        init_group(store, overwrite=overwrite, chunk_store=chunk_store,
                   path=path)

    return Group(store, read_only=False, chunk_store=chunk_store,
                 synchronizer=synchronizer, path=path)
Пример #30
0
 def rmdir(self, path=None):
     path = normalize_storage_path(path)
     if path:
         try:
             parent, key = self._get_parent(path)
             value = parent[key]
         except KeyError:
             return
         else:
             if isinstance(value, self.cls):
                 del parent[key]
     else:
         # clear out root
         self.root = self.cls()
Пример #31
0
 def listdir(self, path=None):
     path = normalize_storage_path(path)
     if path:
         try:
             parent, key = self._get_parent(path)
             value = parent[key]
         except KeyError:
             return []
     else:
         value = self.root
     if isinstance(value, self.cls):
         return sorted(value.keys())
     else:
         return []
Пример #32
0
 def listdir(self, path=None):
     path = normalize_storage_path(path)
     if path:
         try:
             parent, key = self._get_parent(path)
             value = parent[key]
         except KeyError:
             return []
     else:
         value = self.root
     if isinstance(value, self.cls):
         return sorted(value.keys())
     else:
         return []
Пример #33
0
 def rmdir(self, path=None):
     path = normalize_storage_path(path)
     if path:
         try:
             parent, key = self._get_parent(path)
             value = parent[key]
         except KeyError:
             return
         else:
             if isinstance(value, self.cls):
                 del parent[key]
     else:
         # clear out root
         self.root = self.cls()
Пример #34
0
 def getsize(self, path=None):
     path = normalize_storage_path(path)
     with self.mutex:
         children = self.list_prefix(data_root + path)
         children += self.list_prefix(meta_root + path)
         print(f"path={path}, children={children}")
         if children:
             size = 0
             for name in children:
                 info = self.zf.getinfo(name)
                 size += info.compress_size
             return size
         elif path in self:
             info = self.zf.getinfo(path)
             return info.compress_size
         else:
             return 0
Пример #35
0
 def rmdir(self, path=None):
     path = normalize_storage_path(path)
     if path:
         for base in [meta_root, data_root]:
             with self.lock:
                 self.cursor.execute(
                     'DELETE FROM zarr WHERE k LIKE (? || "/%")',
                     (base + path, ))
         # remove any associated metadata files
         sfx = _get_metadata_suffix(self)
         meta_dir = (meta_root + path).rstrip('/')
         array_meta_file = meta_dir + '.array' + sfx
         self.pop(array_meta_file, None)
         group_meta_file = meta_dir + '.group' + sfx
         self.pop(group_meta_file, None)
     else:
         self.clear()
Пример #36
0
 def getsize(self, path=None):
     store_path = normalize_storage_path(path)
     fs_path = self.path
     if store_path:
         fs_path = os.path.join(fs_path, store_path)
     if os.path.isfile(fs_path):
         return os.path.getsize(fs_path)
     elif os.path.isdir(fs_path):
         children = os.listdir(fs_path)
         size = 0
         for child in children:
             child_fs_path = os.path.join(fs_path, child)
             if os.path.isfile(child_fs_path):
                 size += os.path.getsize(child_fs_path)
         return size
     else:
         err_path_not_found(path)
Пример #37
0
 def getsize(self, path=None):
     store_path = normalize_storage_path(path)
     fs_path = self.path
     if store_path:
         fs_path = os.path.join(fs_path, store_path)
     if os.path.isfile(fs_path):
         return os.path.getsize(fs_path)
     elif os.path.isdir(fs_path):
         children = os.listdir(fs_path)
         size = 0
         for child in children:
             child_fs_path = os.path.join(fs_path, child)
             if os.path.isfile(child_fs_path):
                 size += os.path.getsize(child_fs_path)
         return size
     else:
         err_path_not_found(path)
Пример #38
0
    def rmdir(self, path=None):
        store_path = normalize_storage_path(path)
        dir_path = self.path
        if store_path:
            for base in [meta_root, data_root]:
                dir_path = os.path.join(dir_path, base + store_path)
                if os.path.isdir(dir_path):
                    shutil.rmtree(dir_path)

            # remove any associated metadata files
            sfx = _get_metadata_suffix(self)
            meta_dir = (meta_root + path).rstrip('/')
            array_meta_file = meta_dir + '.array' + sfx
            self.pop(array_meta_file, None)
            group_meta_file = meta_dir + '.group' + sfx
            self.pop(group_meta_file, None)

        elif os.path.isdir(dir_path):
            shutil.rmtree(dir_path)
Пример #39
0
 def _list_container(self,
                     path=None,
                     delimiter=None,
                     strip_prefix=False,
                     treat_path_as_dir=True):
     path = self.prefix if path is None else self._add_prefix(path)
     if path and treat_path_as_dir:
         path += "/"
     _, contents = self.conn.get_container(self.container,
                                           prefix=path,
                                           delimiter=delimiter)
     if strip_prefix:
         prefix_size = len(path)
         for entry in contents:
             name = entry.get('name', entry.get('subdir', ''))
             entry["name"] = normalize_storage_path(name[prefix_size:])
     for entry in contents:
         entry["bytes"] = entry.get("bytes", 0)
     return contents
Пример #40
0
    def getsize(self, path=None):
        store_path = normalize_storage_path(path)
        fs_path = self._append_path_to_prefix(store_path)
        if fs_path:
            blob_client = self.client.get_blob_client(fs_path)
        else:
            blob_client = None

        if blob_client and blob_client.exists():
            return blob_client.get_blob_properties().size
        else:
            size = 0
            if fs_path == '':
                fs_path = None
            elif not fs_path.endswith('/'):
                fs_path += '/'
            for blob in self.client.walk_blobs(name_starts_with=fs_path, delimiter='/'):
                blob_client = self.client.get_blob_client(blob)
                if blob_client.exists():
                    size += blob_client.get_blob_properties().size
            return size
Пример #41
0
def _get_files_and_dirs_from_path(store, path):
    path = normalize_storage_path(path)

    files = []
    # add array metadata file if present
    array_key = _prefix_to_array_key(store, path)
    if array_key in store:
        files.append(os.path.join(store.path, array_key))

    # add group metadata file if present
    group_key = _prefix_to_group_key(store, path)
    if group_key in store:
        files.append(os.path.join(store.path, group_key))

    dirs = []
    # add array and group folders if present
    for d in [data_root + path, meta_root + path]:
        dir_path = os.path.join(store.path, d)
        if os.path.exists(dir_path):
            dirs.append(dir_path)
    return files, dirs
Пример #42
0
    def __init__(self,
                 store,
                 path=None,
                 read_only=False,
                 chunk_store=None,
                 synchronizer=None):

        self._store = store
        self._path = normalize_storage_path(path)
        if self._path:
            self._key_prefix = self._path + '/'
        else:
            self._key_prefix = ''
        self._read_only = read_only
        if chunk_store is None:
            self._chunk_store = store
        else:
            self._chunk_store = chunk_store
        self._synchronizer = synchronizer

        # guard conditions
        if contains_array(store, path=self._path):
            err_contains_array(path)

        # initialize metadata
        try:
            mkey = self._key_prefix + group_meta_key
            meta_bytes = store[mkey]
        except KeyError:
            err_group_not_found(path)
        else:
            meta = decode_group_metadata(meta_bytes)
            self._meta = meta

        # setup attributes
        akey = self._key_prefix + attrs_key
        self._attrs = Attributes(store,
                                 key=akey,
                                 read_only=read_only,
                                 synchronizer=synchronizer)
Пример #43
0
def getsize(store, path=None):
    """Compute size of stored items for a given path."""
    path = normalize_storage_path(path)
    if hasattr(store, 'getsize'):
        # pass through
        return store.getsize(path)
    elif isinstance(store, dict):
        # compute from size of values
        prefix = _path_to_prefix(path)
        size = 0
        for k in listdir(store, path):
            try:
                v = store[prefix + k]
            except KeyError:
                pass
            else:
                try:
                    size += buffer_size(v)
                except TypeError:
                    return -1
        return size
    else:
        return -1
Пример #44
0
def getsize(store, path=None):
    """Compute size of stored items for a given path."""
    path = normalize_storage_path(path)
    if hasattr(store, 'getsize'):
        # pass through
        return store.getsize(path)
    elif isinstance(store, dict):
        # compute from size of values
        prefix = _path_to_prefix(path)
        size = 0
        for k in listdir(store, path):
            try:
                v = store[prefix + k]
            except KeyError:
                pass
            else:
                try:
                    size += buffer_size(v)
                except TypeError:
                    return -1
        return size
    else:
        return -1
Пример #45
0
    def rmdir(self, path: Path = None):
        path = normalize_storage_path(path)
        if path:
            for base in [meta_root, data_root]:
                try:
                    parent, key = self._get_parent(base + path)
                    value = parent[key]
                except KeyError:
                    continue
                else:
                    if isinstance(value, self.cls):
                        del parent[key]

            # remove any associated metadata files
            sfx = _get_metadata_suffix(self)
            meta_dir = (meta_root + path).rstrip('/')
            array_meta_file = meta_dir + '.array' + sfx
            self.pop(array_meta_file, None)
            group_meta_file = meta_dir + '.group' + sfx
            self.pop(group_meta_file, None)
        else:
            # clear out root
            self.root = self.cls()
Пример #46
0
def open_group(store=None, mode='a', synchronizer=None, path=None):
    """Open a group using mode-like semantics.

    Parameters
    ----------
    store : MutableMapping or string
        Store or path to directory in file system.
    mode : {'r', 'r+', 'a', 'w', 'w-'}
        Persistence mode: 'r' means read only (must exist); 'r+' means
        read/write (must exist); 'a' means read/write (create if doesn't
        exist); 'w' means create (overwrite if exists); 'w-' means create
        (fail if exists).
    synchronizer : object, optional
        Array synchronizer.
    path : string, optional
        Group path.

    Returns
    -------
    g : zarr.hierarchy.Group

    Examples
    --------
    >>> import zarr
    >>> root = zarr.open_group('example', mode='w')
    >>> foo = root.create_group('foo')
    >>> bar = root.create_group('bar')
    >>> root
    Group(/, 2)
      groups: 2; bar, foo
      store: DirectoryStore
    >>> root2 = zarr.open_group('example', mode='a')
    >>> root2
    Group(/, 2)
      groups: 2; bar, foo
      store: DirectoryStore
    >>> root == root2
    True

    """

    # handle polymorphic store arg
    store = _handle_store_arg(store)
    path = normalize_storage_path(path)

    # ensure store is initialized

    if mode in ['r', 'r+']:
        if contains_array(store, path=path):
            err_contains_array(path)
        elif not contains_group(store, path=path):
            err_group_not_found(path)

    elif mode == 'w':
        init_group(store, overwrite=True, path=path)

    elif mode == 'a':
        if contains_array(store, path=path):
            err_contains_array(path)
        if not contains_group(store, path=path):
            init_group(store, path=path)

    elif mode in ['w-', 'x']:
        if contains_array(store, path=path):
            err_contains_array(path)
        elif contains_group(store, path=path):
            err_contains_group(path)
        else:
            init_group(store, path=path)

    # determine read only status
    read_only = mode == 'r'

    return Group(store, read_only=read_only, synchronizer=synchronizer,
                 path=path)
Пример #47
0
 def _item_path(self, item):
     absolute = isinstance(item, str) and item and item[0] == '/'
     path = normalize_storage_path(item)
     if not absolute and self._path:
         path = self._key_prefix + path
     return path
Пример #48
0
def open_group(store, mode='a', cache_attrs=True, synchronizer=None, path=None):
    """Open a group using file-mode-like semantics.

    Parameters
    ----------
    store : MutableMapping or string
        Store or path to directory in file system or name of zip file.
    mode : {'r', 'r+', 'a', 'w', 'w-'}, optional
        Persistence mode: 'r' means read only (must exist); 'r+' means
        read/write (must exist); 'a' means read/write (create if doesn't
        exist); 'w' means create (overwrite if exists); 'w-' means create
        (fail if exists).
    cache_attrs : bool, optional
        If True (default), user attributes will be cached for attribute read
        operations. If False, user attributes are reloaded from the store prior
        to all attribute read operations.
    synchronizer : object, optional
        Array synchronizer.
    path : string, optional
        Group path within store.

    Returns
    -------
    g : zarr.hierarchy.Group

    Examples
    --------
    >>> import zarr
    >>> root = zarr.open_group('data/example.zarr', mode='w')
    >>> foo = root.create_group('foo')
    >>> bar = root.create_group('bar')
    >>> root
    <zarr.hierarchy.Group '/'>
    >>> root2 = zarr.open_group('data/example.zarr', mode='a')
    >>> root2
    <zarr.hierarchy.Group '/'>
    >>> root == root2
    True

    """

    # handle polymorphic store arg
    store = _normalize_store_arg(store)
    path = normalize_storage_path(path)

    # ensure store is initialized

    if mode in ['r', 'r+']:
        if contains_array(store, path=path):
            err_contains_array(path)
        elif not contains_group(store, path=path):
            err_group_not_found(path)

    elif mode == 'w':
        init_group(store, overwrite=True, path=path)

    elif mode == 'a':
        if contains_array(store, path=path):
            err_contains_array(path)
        if not contains_group(store, path=path):
            init_group(store, path=path)

    elif mode in ['w-', 'x']:
        if contains_array(store, path=path):
            err_contains_array(path)
        elif contains_group(store, path=path):
            err_contains_group(path)
        else:
            init_group(store, path=path)

    # determine read only status
    read_only = mode == 'r'

    return Group(store, read_only=read_only, cache_attrs=cache_attrs,
                 synchronizer=synchronizer, path=path)
Пример #49
0
 def listdir(self, path=None):
     path = normalize_storage_path(path)
     return _listdir_from_keys(self, path)
Пример #50
0
def contains_group(store, path=None):
    """Return True if the store contains a group at the given logical path."""
    path = normalize_storage_path(path)
    prefix = _path_to_prefix(path)
    key = prefix + group_meta_key
    return key in store
Пример #51
0
def init_array(store, shape, chunks=None, dtype=None, compressor='default',
               fill_value=None, order='C', overwrite=False, path=None,
               chunk_store=None, filters=None):
    """initialize an array store with the given configuration.

    Parameters
    ----------
    store : MutableMapping
        A mapping that supports string keys and bytes-like values.
    shape : int or tuple of ints
        Array shape.
    chunks : int or tuple of ints, optional
        Chunk shape. If not provided, will be guessed from `shape` and `dtype`.
    dtype : string or dtype, optional
        NumPy dtype.
    compressor : Codec, optional
        Primary compressor.
    fill_value : object
        Default value to use for uninitialized portions of the array.
    order : {'C', 'F'}, optional
        Memory layout to be used within each chunk.
    overwrite : bool, optional
        If True, erase all data in `store` prior to initialisation.
    path : string, optional
        Path under which array is stored.
    chunk_store : MutableMapping, optional
        Separate storage for chunks. If not provided, `store` will be used
        for storage of both chunks and metadata.
    filters : sequence, optional
        Sequence of filters to use to encode chunk data prior to compression.

    Examples
    --------
    Initialize an array store::

        >>> from zarr.storage import init_array
        >>> store = dict()
        >>> init_array(store, shape=(10000, 10000), chunks=(1000, 1000))
        >>> sorted(store.keys())
        ['.zarray', '.zattrs']

    Array metadata is stored as JSON::

        >>> print(str(store['.zarray'], 'ascii'))
        {
            "chunks": [
                1000,
                1000
            ],
            "compressor": {
                "clevel": 5,
                "cname": "lz4",
                "id": "blosc",
                "shuffle": 1
            },
            "dtype": "<f8",
            "fill_value": null,
            "filters": null,
            "order": "C",
            "shape": [
                10000,
                10000
            ],
            "zarr_format": 2
        }

    User-defined attributes are also stored as JSON, initially empty::

        >>> print(str(store['.zattrs'], 'ascii'))
        {}

    Initialize an array using a storage path::

        >>> store = dict()
        >>> init_array(store, shape=100000000, chunks=1000000, dtype='i1',
        ...            path='foo')
        >>> sorted(store.keys())
        ['.zattrs', '.zgroup', 'foo/.zarray', 'foo/.zattrs']
        >>> print(str(store['foo/.zarray'], 'ascii'))
        {
            "chunks": [
                1000000
            ],
            "compressor": {
                "clevel": 5,
                "cname": "lz4",
                "id": "blosc",
                "shuffle": 1
            },
            "dtype": "|i1",
            "fill_value": null,
            "filters": null,
            "order": "C",
            "shape": [
                100000000
            ],
            "zarr_format": 2
        }

    Notes
    -----
    The initialisation process involves normalising all array metadata,
    encoding as JSON and storing under the '.zarray' key. User attributes are 
    also initialized and stored as JSON under the '.zattrs' key.

    """

    # normalize path
    path = normalize_storage_path(path)
    
    # ensure parent group initialized
    _require_parent_group(path, store=store, chunk_store=chunk_store,
                          overwrite=overwrite)

    _init_array_metadata(store, shape=shape, chunks=chunks, dtype=dtype,
                         compressor=compressor, fill_value=fill_value,
                         order=order, overwrite=overwrite, path=path,
                         chunk_store=chunk_store, filters=filters)
Пример #52
0
def open(store, mode='a', **kwargs):
    """Convenience function to open a group or array using file-mode-like semantics.

    Parameters
    ----------
    store : MutableMapping or string
        Store or path to directory in file system or name of zip file.
    mode : {'r', 'r+', 'a', 'w', 'w-'}, optional
        Persistence mode: 'r' means read only (must exist); 'r+' means
        read/write (must exist); 'a' means read/write (create if doesn't
        exist); 'w' means create (overwrite if exists); 'w-' means create
        (fail if exists).
    **kwargs
        Additional parameters are passed through to :func:`zarr.open_array` or
        :func:`zarr.open_group`.

    See Also
    --------
    zarr.open_array, zarr.open_group

    Examples
    --------

    Storing data in a directory 'data/example.zarr' on the local file system::

        >>> import zarr
        >>> store = 'data/example.zarr'
        >>> zw = zarr.open(store, mode='w', shape=100, dtype='i4')  # open new array
        >>> zw
        <zarr.core.Array (100,) int32>
        >>> za = zarr.open(store, mode='a')  # open existing array for reading and writing
        >>> za
        <zarr.core.Array (100,) int32>
        >>> zr = zarr.open(store, mode='r')  # open existing array read-only
        >>> zr
        <zarr.core.Array (100,) int32 read-only>
        >>> gw = zarr.open(store, mode='w')  # open new group, overwriting previous data
        >>> gw
        <zarr.hierarchy.Group '/'>
        >>> ga = zarr.open(store, mode='a')  # open existing group for reading and writing
        >>> ga
        <zarr.hierarchy.Group '/'>
        >>> gr = zarr.open(store, mode='r')  # open existing group read-only
        >>> gr
        <zarr.hierarchy.Group '/' read-only>

    """

    path = kwargs.get('path', None)
    # handle polymorphic store arg
    store = normalize_store_arg(store, clobber=(mode == 'w'))
    path = normalize_storage_path(path)

    if mode in {'w', 'w-', 'x'}:
        if 'shape' in kwargs:
            return open_array(store, mode=mode, **kwargs)
        else:
            return open_group(store, mode=mode, **kwargs)

    elif mode == 'a':
        if contains_array(store, path):
            return open_array(store, mode=mode, **kwargs)
        elif contains_group(store, path):
            return open_group(store, mode=mode, **kwargs)
        elif 'shape' in kwargs:
            return open_array(store, mode=mode, **kwargs)
        else:
            return open_group(store, mode=mode, **kwargs)

    else:
        if contains_array(store, path):
            return open_array(store, mode=mode, **kwargs)
        elif contains_group(store, path):
            return open_group(store, mode=mode, **kwargs)
        else:
            err_path_not_found(path)
Пример #53
0
def copy_store(source, dest, source_path='', dest_path='', excludes=None,
               includes=None, flags=0, if_exists='raise', dry_run=False,
               log=None):
    """Copy data directly from the `source` store to the `dest` store. Use this
    function when you want to copy a group or array in the most efficient way,
    preserving all configuration and attributes. This function is more efficient
    than the copy() or copy_all() functions because it avoids de-compressing and
    re-compressing data, rather the compressed chunk data for each array are
    copied directly between stores.

    Parameters
    ----------
    source : Mapping
        Store to copy data from.
    dest : MutableMapping
        Store to copy data into.
    source_path : str, optional
        Only copy data from under this path in the source store.
    dest_path : str, optional
        Copy data into this path in the destination store.
    excludes : sequence of str, optional
        One or more regular expressions which will be matched against keys in
        the source store. Any matching key will not be copied.
    includes : sequence of str, optional
        One or more regular expressions which will be matched against keys in
        the source store and will override any excludes also matching.
    flags : int, optional
        Regular expression flags used for matching excludes and includes.
    if_exists : {'raise', 'replace', 'skip'}, optional
        How to handle keys that already exist in the destination store. If
        'raise' then a CopyError is raised on the first key already present
        in the destination store. If 'replace' then any data will be replaced in
        the destination. If 'skip' then any existing keys will not be copied.
    dry_run : bool, optional
        If True, don't actually copy anything, just log what would have
        happened.
    log : callable, file path or file-like object, optional
        If provided, will be used to log progress information.

    Returns
    -------
    n_copied : int
        Number of items copied.
    n_skipped : int
        Number of items skipped.
    n_bytes_copied : int
        Number of bytes of data that were actually copied.

    Examples
    --------

    >>> import zarr
    >>> store1 = zarr.DirectoryStore('data/example.zarr')
    >>> root = zarr.group(store1, overwrite=True)
    >>> foo = root.create_group('foo')
    >>> bar = foo.create_group('bar')
    >>> baz = bar.create_dataset('baz', shape=100, chunks=50, dtype='i8')
    >>> import numpy as np
    >>> baz[:] = np.arange(100)
    >>> root.tree()
    /
     └── foo
         └── bar
             └── baz (100,) int64
    >>> from sys import stdout
    >>> store2 = zarr.ZipStore('data/example.zip', mode='w')
    >>> zarr.copy_store(store1, store2, log=stdout)
    copy .zgroup
    copy foo/.zgroup
    copy foo/bar/.zgroup
    copy foo/bar/baz/.zarray
    copy foo/bar/baz/0
    copy foo/bar/baz/1
    all done: 6 copied, 0 skipped, 566 bytes copied
    (6, 0, 566)
    >>> new_root = zarr.group(store2)
    >>> new_root.tree()
    /
     └── foo
         └── bar
             └── baz (100,) int64
    >>> new_root['foo/bar/baz'][:]
    array([ 0,  1,  2,  ..., 97, 98, 99])
    >>> store2.close()  # zip stores need to be closed

    Notes
    -----
    Please note that this is an experimental feature. The behaviour of this
    function is still evolving and the default behaviour and/or parameters may change
    in future versions.

    """

    # normalize paths
    source_path = normalize_storage_path(source_path)
    dest_path = normalize_storage_path(dest_path)
    if source_path:
        source_path = source_path + '/'
    if dest_path:
        dest_path = dest_path + '/'

    # normalize excludes and includes
    if excludes is None:
        excludes = []
    elif isinstance(excludes, str):
        excludes = [excludes]
    if includes is None:
        includes = []
    elif isinstance(includes, str):
        includes = [includes]
    excludes = [re.compile(e, flags) for e in excludes]
    includes = [re.compile(i, flags) for i in includes]

    # check if_exists parameter
    valid_if_exists = ['raise', 'replace', 'skip']
    if if_exists not in valid_if_exists:
        raise ValueError('if_exists must be one of {!r}; found {!r}'
                         .format(valid_if_exists, if_exists))

    # setup counting variables
    n_copied = n_skipped = n_bytes_copied = 0

    # setup logging
    with _LogWriter(log) as log:

        # iterate over source keys
        for source_key in sorted(source.keys()):

            # filter to keys under source path
            if source_key.startswith(source_path):

                # process excludes and includes
                exclude = False
                for prog in excludes:
                    if prog.search(source_key):
                        exclude = True
                        break
                if exclude:
                    for prog in includes:
                        if prog.search(source_key):
                            exclude = False
                            break
                if exclude:
                    continue

                # map key to destination path
                key_suffix = source_key[len(source_path):]
                dest_key = dest_path + key_suffix

                # create a descriptive label for this operation
                descr = source_key
                if dest_key != source_key:
                    descr = descr + ' -> ' + dest_key

                # decide what to do
                do_copy = True
                if if_exists != 'replace':
                    if dest_key in dest:
                        if if_exists == 'raise':
                            raise CopyError('key {!r} exists in destination'
                                            .format(dest_key))
                        elif if_exists == 'skip':
                            do_copy = False

                # take action
                if do_copy:
                    log('copy {}'.format(descr))
                    if not dry_run:
                        data = source[source_key]
                        n_bytes_copied += buffer_size(data)
                        dest[dest_key] = data
                    n_copied += 1
                else:
                    log('skip {}'.format(descr))
                    n_skipped += 1

        # log a final message with a summary of what happened
        _log_copy_summary(log, dry_run, n_copied, n_skipped, n_bytes_copied)

    return n_copied, n_skipped, n_bytes_copied
Пример #54
0
 def listdir(self, path=None):
     path = normalize_storage_path(path)
     return _listdir_from_keys(self, path)