def getsize(self, path=None): path = normalize_storage_path(path) # obtain value to return size of if path: try: parent, key = self._get_parent(path) value = parent[key] except KeyError: err_path_not_found(path) else: value = self.root # obtain size of value if isinstance(value, self.cls): # total size for directory size = 0 for v in value.values(): if not isinstance(v, self.cls): try: size += buffer_size(v) except TypeError: return -1 return size else: try: return buffer_size(value) except TypeError: return -1
def test_nbytes_stored(self): # dict as store z = self.create_array(shape=1000, chunks=100) expect_nbytes_stored = sum(buffer_size(v) for v in z.store.values()) eq(expect_nbytes_stored, z.nbytes_stored) z[:] = 42 expect_nbytes_stored = sum(buffer_size(v) for v in z.store.values()) eq(expect_nbytes_stored, z.nbytes_stored)
def test_nbytes_stored(self): z = self.create_array(shape=1000, chunks=100) expect_nbytes_stored = sum(buffer_size(v) for v in z.store.values()) expect_nbytes_stored += sum(buffer_size(v) for v in z.chunk_store.values()) eq(expect_nbytes_stored, z.nbytes_stored) z[:] = 42 expect_nbytes_stored = sum(buffer_size(v) for v in z.store.values()) expect_nbytes_stored += sum(buffer_size(v) for v in z.chunk_store.values()) eq(expect_nbytes_stored, z.nbytes_stored) # mess with store z.chunk_store[z._key_prefix + 'foo'] = list(range(10)) eq(-1, z.nbytes_stored)
def test_nbytes_stored(self): # dict as store z = self.create_array(shape=1000, chunks=100) expect_nbytes_stored = sum(buffer_size(v) for v in z.store.values()) eq(expect_nbytes_stored, z.nbytes_stored) z[:] = 42 expect_nbytes_stored = sum(buffer_size(v) for v in z.store.values()) eq(expect_nbytes_stored, z.nbytes_stored) # mess with store try: z.store[z._key_prefix + 'foo'] = list(range(10)) eq(-1, z.nbytes_stored) except TypeError: pass
def test_nbytes_stored(self): # dict as store z = self.create_array(shape=1000, chunks=100) expect_nbytes_stored = sum(buffer_size(v) for k, v in z.store.items() if k.startswith('foo/bar/')) eq(expect_nbytes_stored, z.nbytes_stored) z[:] = 42 expect_nbytes_stored = sum(buffer_size(v) for k, v in z.store.items() if k.startswith('foo/bar/')) eq(expect_nbytes_stored, z.nbytes_stored) # mess with store z.store[z._key_prefix + 'foo'] = list(range(10)) eq(-1, z.nbytes_stored)
def getsize(self, path=None): size = 0 if path is None or path == '': # size of both the data and meta subdirs dirs = [] for d in ['data/root', 'meta/root']: dir_path = os.path.join(self.path, d) if os.path.exists(dir_path): dirs.append(dir_path) elif path in self: # access individual element by full path return buffer_size(self[path]) else: files, dirs = _get_files_and_dirs_from_path(self, path) for file in files: size += os.path.getsize(file) for d in dirs: size += self.fs.du(d, total=True, maxdepth=None) return size
def getsize(store, path=None): """Compute size of stored items for a given path.""" path = normalize_storage_path(path) if hasattr(store, 'getsize'): # pass through return store.getsize(path) elif isinstance(store, dict): # compute from size of values prefix = _path_to_prefix(path) size = 0 for k in listdir(store, path): try: v = store[prefix + k] except KeyError: pass else: try: size += buffer_size(v) except TypeError: return -1 return size else: return -1
def copy_store(source, dest, source_path='', dest_path='', excludes=None, includes=None, flags=0, if_exists='raise', dry_run=False, log=None): """Copy data directly from the `source` store to the `dest` store. Use this function when you want to copy a group or array in the most efficient way, preserving all configuration and attributes. This function is more efficient than the copy() or copy_all() functions because it avoids de-compressing and re-compressing data, rather the compressed chunk data for each array are copied directly between stores. Parameters ---------- source : Mapping Store to copy data from. dest : MutableMapping Store to copy data into. source_path : str, optional Only copy data from under this path in the source store. dest_path : str, optional Copy data into this path in the destination store. excludes : sequence of str, optional One or more regular expressions which will be matched against keys in the source store. Any matching key will not be copied. includes : sequence of str, optional One or more regular expressions which will be matched against keys in the source store and will override any excludes also matching. flags : int, optional Regular expression flags used for matching excludes and includes. if_exists : {'raise', 'replace', 'skip'}, optional How to handle keys that already exist in the destination store. If 'raise' then a CopyError is raised on the first key already present in the destination store. If 'replace' then any data will be replaced in the destination. If 'skip' then any existing keys will not be copied. dry_run : bool, optional If True, don't actually copy anything, just log what would have happened. log : callable, file path or file-like object, optional If provided, will be used to log progress information. Returns ------- n_copied : int Number of items copied. n_skipped : int Number of items skipped. n_bytes_copied : int Number of bytes of data that were actually copied. Examples -------- >>> import zarr >>> store1 = zarr.DirectoryStore('data/example.zarr') >>> root = zarr.group(store1, overwrite=True) >>> foo = root.create_group('foo') >>> bar = foo.create_group('bar') >>> baz = bar.create_dataset('baz', shape=100, chunks=50, dtype='i8') >>> import numpy as np >>> baz[:] = np.arange(100) >>> root.tree() / └── foo └── bar └── baz (100,) int64 >>> from sys import stdout >>> store2 = zarr.ZipStore('data/example.zip', mode='w') >>> zarr.copy_store(store1, store2, log=stdout) copy .zgroup copy foo/.zgroup copy foo/bar/.zgroup copy foo/bar/baz/.zarray copy foo/bar/baz/0 copy foo/bar/baz/1 all done: 6 copied, 0 skipped, 566 bytes copied (6, 0, 566) >>> new_root = zarr.group(store2) >>> new_root.tree() / └── foo └── bar └── baz (100,) int64 >>> new_root['foo/bar/baz'][:] array([ 0, 1, 2, ..., 97, 98, 99]) >>> store2.close() # zip stores need to be closed Notes ----- Please note that this is an experimental feature. The behaviour of this function is still evolving and the default behaviour and/or parameters may change in future versions. """ # normalize paths source_path = normalize_storage_path(source_path) dest_path = normalize_storage_path(dest_path) if source_path: source_path = source_path + '/' if dest_path: dest_path = dest_path + '/' # normalize excludes and includes if excludes is None: excludes = [] elif isinstance(excludes, str): excludes = [excludes] if includes is None: includes = [] elif isinstance(includes, str): includes = [includes] excludes = [re.compile(e, flags) for e in excludes] includes = [re.compile(i, flags) for i in includes] # check if_exists parameter valid_if_exists = ['raise', 'replace', 'skip'] if if_exists not in valid_if_exists: raise ValueError('if_exists must be one of {!r}; found {!r}'.format( valid_if_exists, if_exists)) # setup counting variables n_copied = n_skipped = n_bytes_copied = 0 # setup logging with _LogWriter(log) as log: # iterate over source keys for source_key in sorted(source.keys()): # filter to keys under source path if source_key.startswith(source_path): # process excludes and includes exclude = False for prog in excludes: if prog.search(source_key): exclude = True break if exclude: for prog in includes: if prog.search(source_key): exclude = False break if exclude: continue # map key to destination path key_suffix = source_key[len(source_path):] dest_key = dest_path + key_suffix # create a descriptive label for this operation descr = source_key if dest_key != source_key: descr = descr + ' -> ' + dest_key # decide what to do do_copy = True if if_exists != 'replace': if dest_key in dest: if if_exists == 'raise': raise CopyError( 'key {!r} exists in destination'.format( dest_key)) elif if_exists == 'skip': do_copy = False # take action if do_copy: log('copy {}'.format(descr)) if not dry_run: data = source[source_key] n_bytes_copied += buffer_size(data) dest[dest_key] = data n_copied += 1 else: log('skip {}'.format(descr)) n_skipped += 1 # log a final message with a summary of what happened _log_copy_summary(log, dry_run, n_copied, n_skipped, n_bytes_copied) return n_copied, n_skipped, n_bytes_copied
def copy_store(source, dest, source_path='', dest_path='', excludes=None, includes=None, flags=0, if_exists='raise', dry_run=False, log=None): """Copy data directly from the `source` store to the `dest` store. Use this function when you want to copy a group or array in the most efficient way, preserving all configuration and attributes. This function is more efficient than the copy() or copy_all() functions because it avoids de-compressing and re-compressing data, rather the compressed chunk data for each array are copied directly between stores. Parameters ---------- source : Mapping Store to copy data from. dest : MutableMapping Store to copy data into. source_path : str, optional Only copy data from under this path in the source store. dest_path : str, optional Copy data into this path in the destination store. excludes : sequence of str, optional One or more regular expressions which will be matched against keys in the source store. Any matching key will not be copied. includes : sequence of str, optional One or more regular expressions which will be matched against keys in the source store and will override any excludes also matching. flags : int, optional Regular expression flags used for matching excludes and includes. if_exists : {'raise', 'replace', 'skip'}, optional How to handle keys that already exist in the destination store. If 'raise' then a CopyError is raised on the first key already present in the destination store. If 'replace' then any data will be replaced in the destination. If 'skip' then any existing keys will not be copied. dry_run : bool, optional If True, don't actually copy anything, just log what would have happened. log : callable, file path or file-like object, optional If provided, will be used to log progress information. Returns ------- n_copied : int Number of items copied. n_skipped : int Number of items skipped. n_bytes_copied : int Number of bytes of data that were actually copied. Examples -------- >>> import zarr >>> store1 = zarr.DirectoryStore('data/example.zarr') >>> root = zarr.group(store1, overwrite=True) >>> foo = root.create_group('foo') >>> bar = foo.create_group('bar') >>> baz = bar.create_dataset('baz', shape=100, chunks=50, dtype='i8') >>> import numpy as np >>> baz[:] = np.arange(100) >>> root.tree() / └── foo └── bar └── baz (100,) int64 >>> from sys import stdout >>> store2 = zarr.ZipStore('data/example.zip', mode='w') >>> zarr.copy_store(store1, store2, log=stdout) copy .zgroup copy foo/.zgroup copy foo/bar/.zgroup copy foo/bar/baz/.zarray copy foo/bar/baz/0 copy foo/bar/baz/1 all done: 6 copied, 0 skipped, 566 bytes copied (6, 0, 566) >>> new_root = zarr.group(store2) >>> new_root.tree() / └── foo └── bar └── baz (100,) int64 >>> new_root['foo/bar/baz'][:] array([ 0, 1, 2, ..., 97, 98, 99]) >>> store2.close() # zip stores need to be closed Notes ----- Please note that this is an experimental feature. The behaviour of this function is still evolving and the default behaviour and/or parameters may change in future versions. """ # normalize paths source_path = normalize_storage_path(source_path) dest_path = normalize_storage_path(dest_path) if source_path: source_path = source_path + '/' if dest_path: dest_path = dest_path + '/' # normalize excludes and includes if excludes is None: excludes = [] elif isinstance(excludes, str): excludes = [excludes] if includes is None: includes = [] elif isinstance(includes, str): includes = [includes] excludes = [re.compile(e, flags) for e in excludes] includes = [re.compile(i, flags) for i in includes] # check if_exists parameter valid_if_exists = ['raise', 'replace', 'skip'] if if_exists not in valid_if_exists: raise ValueError('if_exists must be one of {!r}; found {!r}' .format(valid_if_exists, if_exists)) # setup counting variables n_copied = n_skipped = n_bytes_copied = 0 # setup logging with _LogWriter(log) as log: # iterate over source keys for source_key in sorted(source.keys()): # filter to keys under source path if source_key.startswith(source_path): # process excludes and includes exclude = False for prog in excludes: if prog.search(source_key): exclude = True break if exclude: for prog in includes: if prog.search(source_key): exclude = False break if exclude: continue # map key to destination path key_suffix = source_key[len(source_path):] dest_key = dest_path + key_suffix # create a descriptive label for this operation descr = source_key if dest_key != source_key: descr = descr + ' -> ' + dest_key # decide what to do do_copy = True if if_exists != 'replace': if dest_key in dest: if if_exists == 'raise': raise CopyError('key {!r} exists in destination' .format(dest_key)) elif if_exists == 'skip': do_copy = False # take action if do_copy: log('copy {}'.format(descr)) if not dry_run: data = source[source_key] n_bytes_copied += buffer_size(data) dest[dest_key] = data n_copied += 1 else: log('skip {}'.format(descr)) n_skipped += 1 # log a final message with a summary of what happened _log_copy_summary(log, dry_run, n_copied, n_skipped, n_bytes_copied) return n_copied, n_skipped, n_bytes_copied