def test_write_dataset(h5file): slices1 = write_dataset(h5file, 'test_data', np.ones((2 * DEFAULT_CHUNK_SIZE, ))) slices2 = write_dataset( h5file, 'test_data', np.concatenate(( 2 * np.ones((DEFAULT_CHUNK_SIZE, )), 2 * np.ones((DEFAULT_CHUNK_SIZE, )), 3 * np.ones((DEFAULT_CHUNK_SIZE, )), ))) assert slices1 == { (Slice(0 * DEFAULT_CHUNK_SIZE, 1 * DEFAULT_CHUNK_SIZE, 1), ): slice(0 * DEFAULT_CHUNK_SIZE, 1 * DEFAULT_CHUNK_SIZE), (Slice(1 * DEFAULT_CHUNK_SIZE, 2 * DEFAULT_CHUNK_SIZE, 1), ): slice(0 * DEFAULT_CHUNK_SIZE, 1 * DEFAULT_CHUNK_SIZE) } assert slices2 == { (Slice(0 * DEFAULT_CHUNK_SIZE, 1 * DEFAULT_CHUNK_SIZE, 1), ): slice(1 * DEFAULT_CHUNK_SIZE, 2 * DEFAULT_CHUNK_SIZE), (Slice(1 * DEFAULT_CHUNK_SIZE, 2 * DEFAULT_CHUNK_SIZE, 1), ): slice(1 * DEFAULT_CHUNK_SIZE, 2 * DEFAULT_CHUNK_SIZE), (Slice(2 * DEFAULT_CHUNK_SIZE, 3 * DEFAULT_CHUNK_SIZE, 1), ): slice(2 * DEFAULT_CHUNK_SIZE, 3 * DEFAULT_CHUNK_SIZE) } ds = h5file['/_version_data/test_data/raw_data'] assert ds.shape == (3 * DEFAULT_CHUNK_SIZE, ) assert_equal(ds[0:1 * DEFAULT_CHUNK_SIZE], 1.0) assert_equal(ds[1 * DEFAULT_CHUNK_SIZE:2 * DEFAULT_CHUNK_SIZE], 2.0) assert_equal(ds[2 * DEFAULT_CHUNK_SIZE:3 * DEFAULT_CHUNK_SIZE], 3.0) assert_equal(ds[3 * DEFAULT_CHUNK_SIZE:4 * DEFAULT_CHUNK_SIZE], 0.0) assert ds.dtype == np.float64
def test_write_dataset_compression(h5file): slices1 = write_dataset(h5file, 'test_data', np.ones((2 * DEFAULT_CHUNK_SIZE, )), compression='gzip', compression_opts=3) raises( ValueError, lambda: write_dataset(h5file, 'test_data', np.ones((DEFAULT_CHUNK_SIZE, )), compression='lzf')) raises( ValueError, lambda: write_dataset(h5file, 'test_data', np.ones((DEFAULT_CHUNK_SIZE, )), compression='gzip', compression_opts=4)) assert slices1 == { (Slice(0 * DEFAULT_CHUNK_SIZE, 1 * DEFAULT_CHUNK_SIZE, 1), ): slice(0 * DEFAULT_CHUNK_SIZE, 1 * DEFAULT_CHUNK_SIZE), (Slice(1 * DEFAULT_CHUNK_SIZE, 2 * DEFAULT_CHUNK_SIZE, 1), ): slice(0 * DEFAULT_CHUNK_SIZE, 1 * DEFAULT_CHUNK_SIZE) } ds = h5file['/_version_data/test_data/raw_data'] assert ds.shape == (1 * DEFAULT_CHUNK_SIZE, ) assert_equal(ds[0:1 * DEFAULT_CHUNK_SIZE], 1.0) assert ds.dtype == np.float64 assert ds.compression == 'gzip' assert ds.compression_opts == 3
def resize(self, size, axis=None): self.parent._check_committed() if axis is not None: if not (axis >= 0 and axis < self.ndim): raise ValueError("Invalid axis (0 to %s allowed)" % (self.ndim - 1)) try: newlen = int(size) except TypeError: raise TypeError( "Argument must be a single int if axis is specified") size = list(self.shape) size[axis] = newlen old_shape = self.shape size = tuple(size) if all(new <= old for new, old in zip(size, old_shape)): # Don't create a new array if the old one can just be sliced in # memory. idx = tuple(slice(0, i) for i in size) self.array = self.array[idx] else: old_shape_idx = Tuple(*[Slice(0, i) for i in old_shape]) new_shape_idx = Tuple(*[Slice(0, i) for i in size]) new_array = np.full(size, self.fillvalue, dtype=self.dtype) new_array[old_shape_idx.as_subindex( new_shape_idx).raw] = self.array[new_shape_idx.as_subindex( old_shape_idx).raw] self.array = new_array
def spaceid_to_slice(space): """ Convert an h5py spaceid object into an ndindex index The resulting index is always a Tuple index. """ from h5py import h5s sel_type = space.get_select_type() if sel_type == h5s.SEL_ALL: return Tuple() elif sel_type == h5s.SEL_HYPERSLABS: slices = [] starts, strides, counts, blocks = space.get_regular_hyperslab() for _start, _stride, count, block in zip(starts, strides, counts, blocks): start = _start if not (block == 1 or count == 1): raise NotImplementedError("Nontrivial blocks are not yet supported") end = _start + (_stride*(count - 1) + 1)*block stride = _stride if block == 1 else 1 slices.append(Slice(start, end, stride)) return Tuple(*slices) elif sel_type == h5s.SEL_NONE: return Tuple(Slice(0, 0),) else: raise NotImplementedError("Point selections are not yet supported")
def test_create_virtual_dataset(h5file): with h5file as f: slices1 = write_dataset(f, 'test_data', np.ones((2 * DEFAULT_CHUNK_SIZE, ))) slices2 = write_dataset( f, 'test_data', np.concatenate((2 * np.ones((DEFAULT_CHUNK_SIZE, )), 3 * np.ones( (DEFAULT_CHUNK_SIZE, ))))) virtual_data = create_virtual_dataset( f, 'test_version', 'test_data', (3 * DEFAULT_CHUNK_SIZE, ), { **slices1, Tuple( Slice(2 * DEFAULT_CHUNK_SIZE, 3 * DEFAULT_CHUNK_SIZE, 1), ): slices2[(Slice(1 * DEFAULT_CHUNK_SIZE, 2 * DEFAULT_CHUNK_SIZE, 1), )] }) assert virtual_data.shape == (3 * DEFAULT_CHUNK_SIZE, ) assert_equal(virtual_data[0:2 * DEFAULT_CHUNK_SIZE], 1.0) assert_equal( virtual_data[2 * DEFAULT_CHUNK_SIZE:3 * DEFAULT_CHUNK_SIZE], 3.0) assert virtual_data.dtype == np.float64
def test_write_dataset_offset_chunk_size(h5file): chunk_size = 2**10 chunks = (chunk_size, ) slices1 = write_dataset(h5file, 'test_data', 1 * np.ones((2 * chunk_size, )), chunks=chunks) slices2 = write_dataset( h5file, 'test_data', np.concatenate((2 * np.ones(chunks), 2 * np.ones(chunks), 3 * np.ones( (chunk_size - 2, ))))) assert slices1 == { Tuple(Slice(0 * chunk_size, 1 * chunk_size, 1)): slice(0 * chunk_size, 1 * chunk_size), Tuple(Slice(1 * chunk_size, 2 * chunk_size, 1)): slice(0 * chunk_size, 1 * chunk_size), } assert slices2 == { Tuple(Slice(0 * chunk_size, 1 * chunk_size, 1)): slice(1 * chunk_size, 2 * chunk_size), Tuple(Slice(1 * chunk_size, 2 * chunk_size, 1)): slice(1 * chunk_size, 2 * chunk_size), Tuple(Slice(2 * chunk_size, 3 * chunk_size - 2, 1)): slice(2 * chunk_size, 3 * chunk_size - 2), } ds = h5file['/_version_data/test_data/raw_data'] assert ds.shape == (3 * chunk_size, ) assert_equal(ds[0 * chunk_size:1 * chunk_size], 1.0) assert_equal(ds[1 * chunk_size:2 * chunk_size], 2.0) assert_equal(ds[2 * chunk_size:3 * chunk_size - 2], 3.0) assert_equal(ds[3 * chunk_size - 2:4 * chunk_size], 0.0)
def test_create_virtual_dataset_attrs(h5file): with h5file as f: slices1 = write_dataset(f, 'test_data', np.ones((2 * DEFAULT_CHUNK_SIZE, ))) slices2 = write_dataset( f, 'test_data', np.concatenate((2 * np.ones((DEFAULT_CHUNK_SIZE, )), 3 * np.ones( (DEFAULT_CHUNK_SIZE, ))))) attrs = {"attribute": "value"} virtual_data = create_virtual_dataset( f, 'test_version', 'test_data', (3 * DEFAULT_CHUNK_SIZE, ), { **slices1, Tuple( Slice(2 * DEFAULT_CHUNK_SIZE, 3 * DEFAULT_CHUNK_SIZE, 1), ): slices2[(Slice(1 * DEFAULT_CHUNK_SIZE, 2 * DEFAULT_CHUNK_SIZE, 1), )] }, attrs=attrs) assert dict(virtual_data.attrs) == { **attrs, "raw_data": '/_version_data/test_data/raw_data', "chunks": np.array([DEFAULT_CHUNK_SIZE]) }
def split_chunks(shape, chunks): """ Yield a set of ndindex indices for chunks over shape If the shape is not a multiple of the chunk size, some chunks will be truncated. For example, if a has shape (10, 19) and is chunked into chunks of shape (5, 5): >>> from versioned_hdf5.slicetools import split_chunks >>> for i in split_chunks((10, 19), (5, 5)): ... print(i) Tuple(slice(0, 5, 1), slice(0, 5, 1)) Tuple(slice(0, 5, 1), slice(5, 10, 1)) Tuple(slice(0, 5, 1), slice(10, 15, 1)) Tuple(slice(0, 5, 1), slice(15, 19, 1)) Tuple(slice(5, 10, 1), slice(0, 5, 1)) Tuple(slice(5, 10, 1), slice(5, 10, 1)) Tuple(slice(5, 10, 1), slice(10, 15, 1)) Tuple(slice(5, 10, 1), slice(15, 19, 1)) """ if len(shape) != len(chunks): raise ValueError("chunks shape must equal the array shape") if len(shape) == 0: raise NotImplementedError("Scalar datasets") d = [math.ceil(i/c) for i, c in zip(shape, chunks)] for c in product(*[range(i) for i in d]): # c = (0, 0, 0), (0, 0, 1), ... yield Tuple(*[Slice(chunk_size*i, min(chunk_size*(i + 1), n), 1) for n, chunk_size, i in zip(shape, chunks, c)])
def inverse(self): r""" Return a dictionary mapping Slice: array_of_hash. The Slices are all `reduce()`\d. """ return {Slice(*s).reduce(): h for h, s in self.hash_table}
def __setitem__(self, key, value): if not isinstance(key, bytes): raise TypeError("key must be bytes") if len(key) != self.hash_size: raise ValueError("key must be %d bytes" % self.hash_size) if isinstance(value, Tuple): if len(value.args) > 1: raise NotImplementedError( "Chunking in more other than the first dimension") value = value.args[0] if not isinstance(value, (slice, Slice)): raise TypeError("value must be a slice object") if value.step not in [1, None]: raise ValueError("only step-1 slices are supported") kv = (list(key), (value.start, value.stop)) if key in self._d: if bytes(self.hash_table[self._indices[key]])[0] != key: raise ValueError( "The key %s is already in the hashtable under another index." ) self.hash_table[self._indices[key]] = kv else: self.hash_table[self.largest_index] = kv self._indices[key] = self.largest_index self.largest_index += 1 if self.largest_index >= self.hash_table.shape[0]: self.hash_table.resize( (self.hash_table.shape[0] + self.chunk_size, )) self._d[key] = Slice(value)
def write_dataset(f, name, data, chunks=None, compression=None, compression_opts=None, fillvalue=None): if name not in f['_version_data']: return create_base_dataset(f, name, data=data, chunks=chunks, compression=compression, compression_opts=compression_opts, fillvalue=fillvalue) ds = f['_version_data'][name]['raw_data'] if isinstance(chunks, int) and not isinstance(chunks, bool): chunks = (chunks, ) if chunks is None: chunks = tuple(ds.attrs['chunks']) else: if chunks != tuple(ds.attrs['chunks']): raise ValueError( "Chunk size specified but doesn't match already existing chunk size" ) if compression or compression_opts: raise ValueError( "Compression options can only be specified for the first version of a dataset" ) if fillvalue is not None and fillvalue != ds.fillvalue: raise ValueError( f"fillvalues do not match ({fillvalue} != {ds.fillvalue})") if data.dtype != ds.dtype: raise ValueError(f"dtypes do not match ({data.dtype} != {ds.dtype})") # TODO: Handle more than one dimension old_shape = ds.shape hashtable = Hashtable(f, name) slices = {} slices_to_write = {} chunk_size = chunks[0] for s in split_chunks(data.shape, chunks): idx = hashtable.largest_index data_s = data[s.raw] raw_slice = Slice(idx * chunk_size, idx * chunk_size + data_s.shape[0]) data_hash = hashtable.hash(data_s) raw_slice2 = hashtable.setdefault(data_hash, raw_slice) if raw_slice2 == raw_slice: slices_to_write[raw_slice] = s slices[s] = raw_slice2 ds.resize((old_shape[0] + len(slices_to_write) * chunk_size, ) + chunks[1:]) for raw_slice, s in slices_to_write.items(): data_s = data[s.raw] idx = Tuple(raw_slice, *[slice(0, i) for i in data_s.shape[1:]]) ds[idx.raw] = data[s.raw] return slices
def write_dataset(f, name, data, chunks=None, dtype=None, compression=None, compression_opts=None, fillvalue=None): if name not in f['_version_data']: return create_base_dataset(f, name, data=data, dtype=dtype, chunks=chunks, compression=compression, compression_opts=compression_opts, fillvalue=fillvalue) ds = f['_version_data'][name]['raw_data'] if isinstance(chunks, int) and not isinstance(chunks, bool): chunks = (chunks,) if chunks is None: chunks = tuple(ds.attrs['chunks']) else: if chunks != tuple(ds.attrs['chunks']): raise ValueError("Chunk size specified but doesn't match already existing chunk size") if dtype is not None: if dtype != ds.dtype: raise ValueError("dtype specified but doesn't match already existing dtype") if compression and compression != ds.compression or compression_opts and compression_opts != ds.compression_opts: raise ValueError("Compression options can only be specified for the first version of a dataset") if fillvalue is not None and fillvalue != ds.fillvalue: dtype = ds.dtype if dtype.metadata and ('vlen' in dtype.metadata or 'h5py_encoding' in dtype.metadata): # Variable length string dtype. The ds.fillvalue will be None in # this case (see create_virtual_dataset() below) pass else: raise ValueError(f"fillvalues do not match ({fillvalue} != {ds.fillvalue})") if data.dtype != ds.dtype: raise ValueError(f"dtypes do not match ({data.dtype} != {ds.dtype})") # TODO: Handle more than one dimension old_shape = ds.shape slices = {} slices_to_write = {} chunk_size = chunks[0] with Hashtable(f, name) as hashtable: if len(data.shape) != 0: for s in ChunkSize(chunks).indices(data.shape): idx = hashtable.largest_index data_s = data[s.raw] raw_slice = Slice(idx*chunk_size, idx*chunk_size + data_s.shape[0]) data_hash = hashtable.hash(data_s) raw_slice2 = hashtable.setdefault(data_hash, raw_slice) if raw_slice2 == raw_slice: slices_to_write[raw_slice] = s slices[s] = raw_slice2 ds.resize((old_shape[0] + len(slices_to_write)*chunk_size,) + chunks[1:]) for raw_slice, s in slices_to_write.items(): # idx = raw_slice.expand(ds.shape[:1] + s.newshape(data.shape)[1:]) data_s = data[s.raw] idx = Tuple(raw_slice, *[slice(0, i) for i in data_s.shape[1:]]) ds[idx.raw] = data[s.raw] return slices
def split_slice(s, chunk): """ Split a slice into multiple slices along 0:chunk, chunk:2*chunk, etc. Yields tuples, (i, slice), where i is the chunk that should be sliced. """ start, stop, step = s.args for i in range(math.floor(start/chunk), math.ceil(stop/chunk)): yield i, s.as_subindex(Slice(i*chunk, (i + 1)*chunk))
def setup(self): from ndindex import (Slice, Tuple, Integer, ellipsis, Newaxis, IntegerArray, BooleanArray) self.slice = Slice(0, 4, 2) self.integer = Integer(1) self.tuple = Tuple(self.slice, ..., 0) self.ellipsis = ellipsis() self.newaxis = Newaxis() self.integer_array = IntegerArray([[1, 2], [-1, 2]]) self.boolean_array = BooleanArray([[True, False], [False, False]])
def write_dataset_chunks(f, name, data_dict): """ data_dict should be a dictionary mapping chunk_size index to either an array for that chunk, or a slice into the raw data for that chunk """ if name not in f['_version_data']: raise NotImplementedError( "Use write_dataset() if the dataset does not yet exist") ds = f['_version_data'][name]['raw_data'] chunks = tuple(ds.attrs['chunks']) chunk_size = chunks[0] shape = tuple( max(c.args[i].stop for c in data_dict) for i in range(len(chunks))) all_chunks = list(split_chunks(shape, chunks)) for c in all_chunks: if c not in data_dict: raise ValueError(f"data_dict does not include all chunks ({c})") for c in data_dict: if c not in all_chunks: raise ValueError(f"data_dict contains extra chunks ({c})") hashtable = Hashtable(f, name) slices = {i: None for i in data_dict} data_to_write = {} for chunk, data_s in data_dict.items(): if not isinstance( data_s, (slice, tuple, Tuple, Slice)) and data_s.dtype != ds.dtype: raise ValueError( f"dtypes do not match ({data_s.dtype} != {ds.dtype})") idx = hashtable.largest_index if isinstance(data_s, (slice, tuple, Tuple, Slice)): slices[chunk] = ndindex(data_s) else: raw_slice = Slice(idx * chunk_size, idx * chunk_size + data_s.shape[0]) data_hash = hashtable.hash(data_s) raw_slice2 = hashtable.setdefault(data_hash, raw_slice) if raw_slice2 == raw_slice: data_to_write[raw_slice] = data_s slices[chunk] = raw_slice2 assert None not in slices.values() old_shape = ds.shape ds.resize((old_shape[0] + len(data_to_write) * chunk_size, ) + chunks[1:]) for raw_slice, data_s in data_to_write.items(): c = (raw_slice.raw, ) + tuple(slice(0, i) for i in data_s.shape[1:]) ds[c] = data_s return slices
def _load_hashtable(self): hash_table = self.f['_version_data'][self.name]['hash_table'] largest_index = hash_table.attrs['largest_index'] hash_table_arr = hash_table[:largest_index] hashes = bytes(hash_table_arr['hash']) shapes = hash_table_arr['shape'] self._d = { hashes[i * self.hash_size:(i + 1) * self.hash_size]: Slice(*shapes[i]) for i in range(largest_index) } self._indices = {k: i for i, k in enumerate(self._d)}
def __setitem__(self, key, value): if isinstance(key, np.ndarray): key = key.tobytes() if not isinstance(key, bytes): raise TypeError(f"key must be bytes, got {type(key)}") if len(key) != self.hash_size: raise ValueError("key must be %d bytes" % self.hash_size) if isinstance(value, Tuple): if len(value.args) > 1: raise NotImplementedError( "Chunking in more other than the first dimension") value = value.args[0] if not isinstance(value, (slice, Slice)): raise TypeError("value must be a slice object") value = Slice(value) if value.isempty(): return if value.step not in [1, None]: raise ValueError("only step-1 slices are supported") kv = (list(key), (value.start, value.stop)) if key in self._indices: if bytes(self.hash_table[self._indices[key]])[0] != key: raise ValueError( "The key %s is already in the hashtable under another index." ) self.hash_table[self._indices[key]] = kv else: if self.largest_index >= self.hash_table.shape[0]: newshape = (self.hash_table.shape[0] + self.chunk_size, ) new_hash_table = np.zeros(newshape, dtype=self.hash_table.dtype) new_hash_table[:self.hash_table.shape[0]] = self.hash_table self.hash_table = new_hash_table self.hash_table[self.largest_index] = kv self._indices[key] = self.largest_index self.largest_index += 1
def resize(self, size, axis=None): """ Resize the dataset, or the specified axis. The rank of the dataset cannot be changed. "Size" should be a shape tuple, or if an axis is specified, an integer. BEWARE: This functions differently than the NumPy resize() method! The data is not "reshuffled" to fit in the new shape; each axis is grown or shrunk independently. The coordinates of existing data are fixed. """ self.parent._check_committed() # This boilerplate code is based on h5py.Dataset.resize if axis is not None: if not (axis >= 0 and axis < self.id.rank): raise ValueError("Invalid axis (0 to %s allowed)" % (self.id.rank - 1)) try: newlen = int(size) except TypeError: raise TypeError( "Argument must be a single int if axis is specified") size = list(self.shape) size[axis] = newlen size = tuple(size) # === END CODE FROM h5py.Dataset.resize === old_shape = self.shape data_dict = self.id.data_dict chunks = self.chunks old_shape_idx = Tuple(*[Slice(0, i) for i in old_shape]) new_data_dict = {} for c in set(split_chunks(size, chunks)): if c in data_dict: new_data_dict[c] = data_dict[c] else: a = self[c.raw] data = np.full(c.newshape(size), self.fillvalue, dtype=self.dtype) data[old_shape_idx.as_subindex(c).raw] = a new_data_dict[c] = data self.id.data_dict = new_data_dict self.id.shape = size
def test_split_slice(): chunk = 10 for start in range(20): for stop in range(30): for step in range(1, 10): s = Slice(start, stop, step) slices = list(split_slice(s, chunk)) base = list(range(100)[s.raw]) assert sum([len(s_) for i, s_ in slices]) ==\ len(s), (s, slices) pieces = [ list(range(i * chunk, (i + 1) * chunk)[s_.raw]) for i, s_ in slices ] extended = [] for p in pieces: extended.extend(p) assert base == extended, (s, slices)
def spaceid_to_slice(space): """ Convert an h5py spaceid object into an ndindex index The resulting index is always a Tuple index. """ from h5py import h5s sel_type = space.get_select_type() if sel_type == h5s.SEL_ALL: return Tuple() elif sel_type == h5s.SEL_HYPERSLABS: slices = [] starts, strides, counts, blocks = space.get_regular_hyperslab() for start, stride, count, block in zip(starts, strides, counts, blocks): slices.append(hyperslab_to_slice(start, stride, count, block)) return Tuple(*slices) elif sel_type == h5s.SEL_NONE: return Tuple(Slice(0, 0), ) else: raise NotImplementedError("Point selections are not yet supported")
def setup(self): self.s1 = Slice(0, 30) self.s2 = Slice(0, 1, 10)
def test_write_dataset_chunk_size(h5file): chunk_size = 2**10 chunks = (chunk_size, ) slices1 = write_dataset(h5file, 'test_data', np.ones((2 * chunk_size, )), chunks=chunks) raises( ValueError, lambda: write_dataset( h5file, 'test_data', np.ones(chunks), chunks=(2**9, ))) slices2 = write_dataset_chunks( h5file, 'test_data', { Tuple(Slice(0 * chunk_size, 1 * chunk_size, 1)): slices1[Tuple(Slice(0 * chunk_size, 1 * chunk_size, 1))], Tuple(Slice(1 * chunk_size, 2 * chunk_size, 1)): 2 * np.ones((chunk_size, )), Tuple(Slice(2 * chunk_size, 3 * chunk_size, 1)): 2 * np.ones((chunk_size, )), Tuple(Slice(3 * chunk_size, 4 * chunk_size, 1)): 3 * np.ones((chunk_size, )), }) assert slices1 == { Tuple(Slice(0 * chunk_size, 1 * chunk_size, 1)): slice(0 * chunk_size, 1 * chunk_size), Tuple(Slice(1 * chunk_size, 2 * chunk_size, 1)): slice(0 * chunk_size, 1 * chunk_size), } assert slices2 == { Tuple(Slice(0 * chunk_size, 1 * chunk_size, 1)): slice(0 * chunk_size, 1 * chunk_size), Tuple(Slice(1 * chunk_size, 2 * chunk_size, 1)): slice(1 * chunk_size, 2 * chunk_size), Tuple(Slice(2 * chunk_size, 3 * chunk_size, 1)): slice(1 * chunk_size, 2 * chunk_size), Tuple(Slice(3 * chunk_size, 4 * chunk_size, 1)): slice(2 * chunk_size, 3 * chunk_size), } ds = h5file['/_version_data/test_data/raw_data'] assert ds.shape == (3 * chunk_size, ) assert_equal(ds[0:1 * chunk_size], 1.0) assert_equal(ds[1 * chunk_size:2 * chunk_size], 2.0) assert_equal(ds[2 * chunk_size:3 * chunk_size], 3.0) assert_equal(ds[3 * chunk_size:4 * chunk_size], 0.0) assert ds.dtype == np.float64
def time_constructor_ints(self): Slice(0, 1, 10)
def test_write_dataset_offset_multidimension(h5file): chunks = ChunkSize(3 * (CHUNK_SIZE_3D, )) shape = (2 * CHUNK_SIZE_3D, 2 * CHUNK_SIZE_3D, 2 * CHUNK_SIZE_3D) data = np.zeros(shape) slices1 = write_dataset(h5file, 'test_data', data, chunks=chunks) shape2 = (2 * CHUNK_SIZE_3D - 2, 2 * CHUNK_SIZE_3D - 2, 2 * CHUNK_SIZE_3D - 2) data2 = np.empty(shape2) for n, c in enumerate(chunks.indices(shape)): data2[c.raw] = n slices2 = write_dataset(h5file, 'test_data', data2, chunks=chunks) assert slices1 == { ( Slice(0 * CHUNK_SIZE_3D, 1 * CHUNK_SIZE_3D, 1), Slice(0 * CHUNK_SIZE_3D, 1 * CHUNK_SIZE_3D, 1), Slice(0 * CHUNK_SIZE_3D, 1 * CHUNK_SIZE_3D, 1), ): slice(0 * CHUNK_SIZE_3D, 1 * CHUNK_SIZE_3D), ( Slice(0 * CHUNK_SIZE_3D, 1 * CHUNK_SIZE_3D, 1), Slice(0 * CHUNK_SIZE_3D, 1 * CHUNK_SIZE_3D, 1), Slice(1 * CHUNK_SIZE_3D, 2 * CHUNK_SIZE_3D, 1), ): slice(0 * CHUNK_SIZE_3D, 1 * CHUNK_SIZE_3D), ( Slice(0 * CHUNK_SIZE_3D, 1 * CHUNK_SIZE_3D, 1), Slice(1 * CHUNK_SIZE_3D, 2 * CHUNK_SIZE_3D, 1), Slice(0 * CHUNK_SIZE_3D, 1 * CHUNK_SIZE_3D, 1), ): slice(0 * CHUNK_SIZE_3D, 1 * CHUNK_SIZE_3D), ( Slice(0 * CHUNK_SIZE_3D, 1 * CHUNK_SIZE_3D, 1), Slice(1 * CHUNK_SIZE_3D, 2 * CHUNK_SIZE_3D, 1), Slice(1 * CHUNK_SIZE_3D, 2 * CHUNK_SIZE_3D, 1), ): slice(0 * CHUNK_SIZE_3D, 1 * CHUNK_SIZE_3D), ( Slice(1 * CHUNK_SIZE_3D, 2 * CHUNK_SIZE_3D, 1), Slice(0 * CHUNK_SIZE_3D, 1 * CHUNK_SIZE_3D, 1), Slice(0 * CHUNK_SIZE_3D, 1 * CHUNK_SIZE_3D, 1), ): slice(0 * CHUNK_SIZE_3D, 1 * CHUNK_SIZE_3D), ( Slice(1 * CHUNK_SIZE_3D, 2 * CHUNK_SIZE_3D, 1), Slice(0 * CHUNK_SIZE_3D, 1 * CHUNK_SIZE_3D, 1), Slice(1 * CHUNK_SIZE_3D, 2 * CHUNK_SIZE_3D, 1), ): slice(0 * CHUNK_SIZE_3D, 1 * CHUNK_SIZE_3D), ( Slice(1 * CHUNK_SIZE_3D, 2 * CHUNK_SIZE_3D, 1), Slice(1 * CHUNK_SIZE_3D, 2 * CHUNK_SIZE_3D, 1), Slice(0 * CHUNK_SIZE_3D, 1 * CHUNK_SIZE_3D, 1), ): slice(0 * CHUNK_SIZE_3D, 1 * CHUNK_SIZE_3D), ( Slice(1 * CHUNK_SIZE_3D, 2 * CHUNK_SIZE_3D, 1), Slice(1 * CHUNK_SIZE_3D, 2 * CHUNK_SIZE_3D, 1), Slice(1 * CHUNK_SIZE_3D, 2 * CHUNK_SIZE_3D, 1), ): slice(0 * CHUNK_SIZE_3D, 1 * CHUNK_SIZE_3D), } assert slices2 == { ( Slice(0 * CHUNK_SIZE_3D, 1 * CHUNK_SIZE_3D, 1), Slice(0 * CHUNK_SIZE_3D, 1 * CHUNK_SIZE_3D, 1), Slice(0 * CHUNK_SIZE_3D, 1 * CHUNK_SIZE_3D, 1), ): slice(0 * CHUNK_SIZE_3D, 1 * CHUNK_SIZE_3D), ( Slice(0 * CHUNK_SIZE_3D, 1 * CHUNK_SIZE_3D, 1), Slice(0 * CHUNK_SIZE_3D, 1 * CHUNK_SIZE_3D, 1), Slice(1 * CHUNK_SIZE_3D, 2 * CHUNK_SIZE_3D - 2, 1), ): slice(1 * CHUNK_SIZE_3D, 2 * CHUNK_SIZE_3D), ( Slice(0 * CHUNK_SIZE_3D, 1 * CHUNK_SIZE_3D, 1), Slice(1 * CHUNK_SIZE_3D, 2 * CHUNK_SIZE_3D - 2, 1), Slice(0 * CHUNK_SIZE_3D, 1 * CHUNK_SIZE_3D, 1), ): slice(2 * CHUNK_SIZE_3D, 3 * CHUNK_SIZE_3D), ( Slice(0 * CHUNK_SIZE_3D, 1 * CHUNK_SIZE_3D, 1), Slice(1 * CHUNK_SIZE_3D, 2 * CHUNK_SIZE_3D - 2, 1), Slice(1 * CHUNK_SIZE_3D, 2 * CHUNK_SIZE_3D - 2, 1), ): slice(3 * CHUNK_SIZE_3D, 4 * CHUNK_SIZE_3D), ( Slice(1 * CHUNK_SIZE_3D, 2 * CHUNK_SIZE_3D - 2, 1), Slice(0 * CHUNK_SIZE_3D, 1 * CHUNK_SIZE_3D, 1), Slice(0 * CHUNK_SIZE_3D, 1 * CHUNK_SIZE_3D, 1), ): slice(4 * CHUNK_SIZE_3D, 5 * CHUNK_SIZE_3D - 2), ( Slice(1 * CHUNK_SIZE_3D, 2 * CHUNK_SIZE_3D - 2, 1), Slice(0 * CHUNK_SIZE_3D, 1 * CHUNK_SIZE_3D, 1), Slice(1 * CHUNK_SIZE_3D, 2 * CHUNK_SIZE_3D - 2, 1), ): slice(5 * CHUNK_SIZE_3D, 6 * CHUNK_SIZE_3D - 2), ( Slice(1 * CHUNK_SIZE_3D, 2 * CHUNK_SIZE_3D - 2, 1), Slice(1 * CHUNK_SIZE_3D, 2 * CHUNK_SIZE_3D - 2, 1), Slice(0 * CHUNK_SIZE_3D, 1 * CHUNK_SIZE_3D, 1), ): slice(6 * CHUNK_SIZE_3D, 7 * CHUNK_SIZE_3D - 2), ( Slice(1 * CHUNK_SIZE_3D, 2 * CHUNK_SIZE_3D - 2, 1), Slice(1 * CHUNK_SIZE_3D, 2 * CHUNK_SIZE_3D - 2, 1), Slice(1 * CHUNK_SIZE_3D, 2 * CHUNK_SIZE_3D - 2, 1), ): slice(7 * CHUNK_SIZE_3D, 8 * CHUNK_SIZE_3D - 2), } ds = h5file['/_version_data/test_data/raw_data'] assert ds.shape == (8 * CHUNK_SIZE_3D, CHUNK_SIZE_3D, CHUNK_SIZE_3D) for n, c in enumerate(chunks.indices(shape2)): a = np.zeros(chunks) a[Tuple(*[slice(0, i) for i in shape2]).as_subindex(c).raw] = n assert_equal(ds[n * CHUNK_SIZE_3D:(n + 1) * CHUNK_SIZE_3D], a) assert ds.dtype == np.float64
def test_write_dataset_chunks(h5file): slices1 = write_dataset(h5file, 'test_data', np.ones((2 * DEFAULT_CHUNK_SIZE, ))) slices2 = write_dataset_chunks( h5file, 'test_data', { Tuple(Slice(0 * DEFAULT_CHUNK_SIZE, 1 * DEFAULT_CHUNK_SIZE, 1)): slices1[Tuple( Slice(0 * DEFAULT_CHUNK_SIZE, 1 * DEFAULT_CHUNK_SIZE, 1))], Tuple(Slice(1 * DEFAULT_CHUNK_SIZE, 2 * DEFAULT_CHUNK_SIZE, 1)): 2 * np.ones((DEFAULT_CHUNK_SIZE, )), Tuple(Slice(2 * DEFAULT_CHUNK_SIZE, 3 * DEFAULT_CHUNK_SIZE, 1)): 2 * np.ones((DEFAULT_CHUNK_SIZE, )), Tuple(Slice(3 * DEFAULT_CHUNK_SIZE, 4 * DEFAULT_CHUNK_SIZE, 1)): 3 * np.ones((DEFAULT_CHUNK_SIZE, )), }) assert slices1 == { Tuple(Slice(0 * DEFAULT_CHUNK_SIZE, 1 * DEFAULT_CHUNK_SIZE, 1)): slice(0 * DEFAULT_CHUNK_SIZE, 1 * DEFAULT_CHUNK_SIZE), Tuple(Slice(1 * DEFAULT_CHUNK_SIZE, 2 * DEFAULT_CHUNK_SIZE, 1)): slice(0 * DEFAULT_CHUNK_SIZE, 1 * DEFAULT_CHUNK_SIZE), } assert slices2 == { Tuple(Slice(0 * DEFAULT_CHUNK_SIZE, 1 * DEFAULT_CHUNK_SIZE, 1)): slice(0 * DEFAULT_CHUNK_SIZE, 1 * DEFAULT_CHUNK_SIZE), Tuple(Slice(1 * DEFAULT_CHUNK_SIZE, 2 * DEFAULT_CHUNK_SIZE, 1)): slice(1 * DEFAULT_CHUNK_SIZE, 2 * DEFAULT_CHUNK_SIZE), Tuple(Slice(2 * DEFAULT_CHUNK_SIZE, 3 * DEFAULT_CHUNK_SIZE, 1)): slice(1 * DEFAULT_CHUNK_SIZE, 2 * DEFAULT_CHUNK_SIZE), Tuple(Slice(3 * DEFAULT_CHUNK_SIZE, 4 * DEFAULT_CHUNK_SIZE, 1)): slice(2 * DEFAULT_CHUNK_SIZE, 3 * DEFAULT_CHUNK_SIZE), } ds = h5file['/_version_data/test_data/raw_data'] assert ds.shape == (3 * DEFAULT_CHUNK_SIZE, ) assert_equal(ds[0:1 * DEFAULT_CHUNK_SIZE], 1.0) assert_equal(ds[1 * DEFAULT_CHUNK_SIZE:2 * DEFAULT_CHUNK_SIZE], 2.0) assert_equal(ds[2 * DEFAULT_CHUNK_SIZE:3 * DEFAULT_CHUNK_SIZE], 3.0) assert_equal(ds[3 * DEFAULT_CHUNK_SIZE:4 * DEFAULT_CHUNK_SIZE], 0.0) assert ds.dtype == np.float64
def time_constructor_invalid(self): try: Slice(0.5) except TypeError: pass
class TimeSlice: def setup(self): self.s1 = Slice(0, 30) self.s2 = Slice(0, 1, 10) def time_constructor_slice(self): Slice(slice(0, 30)) def time_constructor_ints(self): Slice(0, 1, 10) def time_constructor_invalid(self): try: Slice(0.5) except TypeError: pass def time_reduce(self): self.s1.reduce() self.s2.reduce() def time_reduce_shape(self): self.s1.reduce(10) self.s2.reduce(10) def time_newshape(self): self.s1.newshape((10, 5)) self.s2.newshape((10, 5)) def time_isempty(self): self.s1.isempty() def time_isempty_shape(self): self.s1.isempty((10, 5))
def hyperslab_to_slice(start, stride, count, block): if not (block == 1 or count == 1): raise NotImplementedError("Nontrivial blocks are not yet supported") end = start + (stride * (count - 1) + 1) * block stride = stride if block == 1 else 1 return Slice(start, end, stride)
def test_write_dataset_multidimension(h5file): chunks = 3 * (CHUNK_SIZE_3D, ) data = np.zeros((2 * CHUNK_SIZE_3D, 2 * CHUNK_SIZE_3D, 2 * CHUNK_SIZE_3D)) slices1 = write_dataset(h5file, 'test_data', data, chunks=chunks) data2 = data.copy() for n, (i, j, k) in enumerate(itertools.product([0, 1], repeat=3)): data2[i * CHUNK_SIZE_3D:(i + 1) * CHUNK_SIZE_3D, j * CHUNK_SIZE_3D:(j + 1) * CHUNK_SIZE_3D, k * CHUNK_SIZE_3D:(k + 1) * CHUNK_SIZE_3D] = n slices2 = write_dataset(h5file, 'test_data', data2, chunks=chunks) assert slices1 == { ( Slice(0 * CHUNK_SIZE_3D, 1 * CHUNK_SIZE_3D, 1), Slice(0 * CHUNK_SIZE_3D, 1 * CHUNK_SIZE_3D, 1), Slice(0 * CHUNK_SIZE_3D, 1 * CHUNK_SIZE_3D, 1), ): slice(0 * CHUNK_SIZE_3D, 1 * CHUNK_SIZE_3D), ( Slice(0 * CHUNK_SIZE_3D, 1 * CHUNK_SIZE_3D, 1), Slice(0 * CHUNK_SIZE_3D, 1 * CHUNK_SIZE_3D, 1), Slice(1 * CHUNK_SIZE_3D, 2 * CHUNK_SIZE_3D, 1), ): slice(0 * CHUNK_SIZE_3D, 1 * CHUNK_SIZE_3D), ( Slice(0 * CHUNK_SIZE_3D, 1 * CHUNK_SIZE_3D, 1), Slice(1 * CHUNK_SIZE_3D, 2 * CHUNK_SIZE_3D, 1), Slice(0 * CHUNK_SIZE_3D, 1 * CHUNK_SIZE_3D, 1), ): slice(0 * CHUNK_SIZE_3D, 1 * CHUNK_SIZE_3D), ( Slice(0 * CHUNK_SIZE_3D, 1 * CHUNK_SIZE_3D, 1), Slice(1 * CHUNK_SIZE_3D, 2 * CHUNK_SIZE_3D, 1), Slice(1 * CHUNK_SIZE_3D, 2 * CHUNK_SIZE_3D, 1), ): slice(0 * CHUNK_SIZE_3D, 1 * CHUNK_SIZE_3D), ( Slice(1 * CHUNK_SIZE_3D, 2 * CHUNK_SIZE_3D, 1), Slice(0 * CHUNK_SIZE_3D, 1 * CHUNK_SIZE_3D, 1), Slice(0 * CHUNK_SIZE_3D, 1 * CHUNK_SIZE_3D, 1), ): slice(0 * CHUNK_SIZE_3D, 1 * CHUNK_SIZE_3D), ( Slice(1 * CHUNK_SIZE_3D, 2 * CHUNK_SIZE_3D, 1), Slice(0 * CHUNK_SIZE_3D, 1 * CHUNK_SIZE_3D, 1), Slice(1 * CHUNK_SIZE_3D, 2 * CHUNK_SIZE_3D, 1), ): slice(0 * CHUNK_SIZE_3D, 1 * CHUNK_SIZE_3D), ( Slice(1 * CHUNK_SIZE_3D, 2 * CHUNK_SIZE_3D, 1), Slice(1 * CHUNK_SIZE_3D, 2 * CHUNK_SIZE_3D, 1), Slice(0 * CHUNK_SIZE_3D, 1 * CHUNK_SIZE_3D, 1), ): slice(0 * CHUNK_SIZE_3D, 1 * CHUNK_SIZE_3D), ( Slice(1 * CHUNK_SIZE_3D, 2 * CHUNK_SIZE_3D, 1), Slice(1 * CHUNK_SIZE_3D, 2 * CHUNK_SIZE_3D, 1), Slice(1 * CHUNK_SIZE_3D, 2 * CHUNK_SIZE_3D, 1), ): slice(0 * CHUNK_SIZE_3D, 1 * CHUNK_SIZE_3D), } assert slices2 == { ( Slice(0 * CHUNK_SIZE_3D, 1 * CHUNK_SIZE_3D, 1), Slice(0 * CHUNK_SIZE_3D, 1 * CHUNK_SIZE_3D, 1), Slice(0 * CHUNK_SIZE_3D, 1 * CHUNK_SIZE_3D, 1), ): slice(0 * CHUNK_SIZE_3D, 1 * CHUNK_SIZE_3D), ( Slice(0 * CHUNK_SIZE_3D, 1 * CHUNK_SIZE_3D, 1), Slice(0 * CHUNK_SIZE_3D, 1 * CHUNK_SIZE_3D, 1), Slice(1 * CHUNK_SIZE_3D, 2 * CHUNK_SIZE_3D, 1), ): slice(1 * CHUNK_SIZE_3D, 2 * CHUNK_SIZE_3D), ( Slice(0 * CHUNK_SIZE_3D, 1 * CHUNK_SIZE_3D, 1), Slice(1 * CHUNK_SIZE_3D, 2 * CHUNK_SIZE_3D, 1), Slice(0 * CHUNK_SIZE_3D, 1 * CHUNK_SIZE_3D, 1), ): slice(2 * CHUNK_SIZE_3D, 3 * CHUNK_SIZE_3D), ( Slice(0 * CHUNK_SIZE_3D, 1 * CHUNK_SIZE_3D, 1), Slice(1 * CHUNK_SIZE_3D, 2 * CHUNK_SIZE_3D, 1), Slice(1 * CHUNK_SIZE_3D, 2 * CHUNK_SIZE_3D, 1), ): slice(3 * CHUNK_SIZE_3D, 4 * CHUNK_SIZE_3D), ( Slice(1 * CHUNK_SIZE_3D, 2 * CHUNK_SIZE_3D, 1), Slice(0 * CHUNK_SIZE_3D, 1 * CHUNK_SIZE_3D, 1), Slice(0 * CHUNK_SIZE_3D, 1 * CHUNK_SIZE_3D, 1), ): slice(4 * CHUNK_SIZE_3D, 5 * CHUNK_SIZE_3D), ( Slice(1 * CHUNK_SIZE_3D, 2 * CHUNK_SIZE_3D, 1), Slice(0 * CHUNK_SIZE_3D, 1 * CHUNK_SIZE_3D, 1), Slice(1 * CHUNK_SIZE_3D, 2 * CHUNK_SIZE_3D, 1), ): slice(5 * CHUNK_SIZE_3D, 6 * CHUNK_SIZE_3D), ( Slice(1 * CHUNK_SIZE_3D, 2 * CHUNK_SIZE_3D, 1), Slice(1 * CHUNK_SIZE_3D, 2 * CHUNK_SIZE_3D, 1), Slice(0 * CHUNK_SIZE_3D, 1 * CHUNK_SIZE_3D, 1), ): slice(6 * CHUNK_SIZE_3D, 7 * CHUNK_SIZE_3D), ( Slice(1 * CHUNK_SIZE_3D, 2 * CHUNK_SIZE_3D, 1), Slice(1 * CHUNK_SIZE_3D, 2 * CHUNK_SIZE_3D, 1), Slice(1 * CHUNK_SIZE_3D, 2 * CHUNK_SIZE_3D, 1), ): slice(7 * CHUNK_SIZE_3D, 8 * CHUNK_SIZE_3D), } ds = h5file['/_version_data/test_data/raw_data'] assert ds.shape == (8 * CHUNK_SIZE_3D, CHUNK_SIZE_3D, CHUNK_SIZE_3D) for n in range(8): assert_equal(ds[n * CHUNK_SIZE_3D:(n + 1) * CHUNK_SIZE_3D], n) assert ds.dtype == np.float64
def time_constructor_slice(self): Slice(slice(0, 30))