def test_compressor_as_filter(): for compressor in compressors: if compressor is None: # skip continue # setup filters dtype = 'i8' filters = [ Delta(dtype=dtype), compressor ] # setup data and arrays data = np.arange(10000, dtype=dtype) a1 = array(data, chunks=1000, compressor=None, filters=filters) a2 = array(data, chunks=1000, compressor=compressor, filters=filters[:1]) # check storage for i in range(10): x = bytes(a1.store[str(i)]) y = bytes(a2.store[str(i)]) eq(x, y) # check data assert_array_equal(data, a1[:]) assert_array_equal(a1[:], a2[:])
def test_array_with_quantize_filter(): # setup dtype = 'f8' digits = 3 flt = Quantize(digits=digits, dtype=dtype) filters = [flt] data = np.linspace(0, 1, 34, dtype=dtype) for compressor in compressors: a = array(data, chunks=5, compressor=compressor, filters=filters) # check round-trip assert_array_almost_equal(data, a[:], decimal=digits) # check chunks for i in range(6): cdata = a.store[str(i)] if compressor: chunk = compressor.decode(cdata) else: chunk = cdata actual = np.frombuffer(chunk, dtype=dtype) expect = flt.encode(data[i*5:(i*5)+5]) assert_array_equal(expect, actual)
def test_array_with_categorize_filter(): # setup data = np.random.choice([b'foo', b'bar', b'baz'], size=100) flt = Categorize(dtype=data.dtype, labels=['foo', 'bar', 'baz']) filters = [flt] for compressor in compressors: print(repr(compressor)) a = array(data, chunks=5, compressor=compressor, filters=filters) # check round-trip assert_array_equal(data, a[:]) # check chunks for i in range(20): cdata = a.store[str(i)] if a.compressor: chunk = a.compressor.decode(cdata) else: chunk = cdata actual = np.frombuffer(chunk, dtype='u1') expect = flt.encode(data[i*5:(i*5)+5]) assert_array_equal(expect, actual)
def test_array_with_astype_filter(): # setup encode_dtype = 'i1' decode_dtype = 'i8' filters = [AsType(encode_dtype=encode_dtype, decode_dtype=decode_dtype)] chunks = 10 chunk_size = 10 shape = chunks * chunk_size data = np.arange(shape, dtype=decode_dtype) for compressor in compressors: a = array(data, chunks=chunks, compressor=compressor, filters=filters) # check round-trip assert data.dtype == a.dtype assert_array_equal(data, a[:]) # check chunks for i in range(chunks): cdata = a.store[str(i)] if compressor: chunk = compressor.decode(cdata) else: chunk = cdata actual = np.frombuffer(chunk, dtype=encode_dtype) expect = data.astype(encode_dtype)[i*chunk_size:(i+1)*chunk_size] assert_array_equal(expect, actual)
def test_array_with_scaleoffset_filter(): # setup astype = 'u1' dtype = 'f8' flt = FixedScaleOffset(scale=10, offset=1000, astype=astype, dtype=dtype) filters = [flt] data = np.linspace(1000, 1001, 34, dtype='f8') for compressor in compressors: a = array(data, chunks=5, compressor=compressor, filters=filters) # check round-trip assert_array_almost_equal(data, a[:], decimal=1) # check chunks for i in range(6): cdata = a.store[str(i)] if compressor: chunk = compressor.decode(cdata) else: chunk = cdata actual = np.frombuffer(chunk, dtype=astype) expect = flt.encode(data[i*5:(i*5)+5]) assert_array_equal(expect, actual)
def test_array_with_delta_filter(): # setup astype = 'u1' dtype = 'i8' filters = [Delta(astype=astype, dtype=dtype)] data = np.arange(100, dtype=dtype) for compressor in compressors: a = array(data, chunks=10, compressor=compressor, filters=filters) # check round-trip assert_array_equal(data, a[:]) # check chunks for i in range(10): cdata = a.store[str(i)] if compressor: chunk = compressor.decode(cdata) else: chunk = cdata actual = np.frombuffer(chunk, dtype=astype) expect = np.array([i * 10] + ([1] * 9), dtype=astype) assert_array_equal(expect, actual)
def test_array_with_packbits_filter(): # setup flt = PackBits() filters = [flt] data = np.random.randint(0, 2, size=100, dtype=bool) for compressor in compressors: print(repr(compressor)) a = array(data, chunks=5, compressor=compressor, filters=filters) # check round-trip assert_array_equal(data, a[:]) # check chunks for i in range(20): cdata = a.store[str(i)] if compressor: chunk = compressor.decode(cdata) else: chunk = cdata actual = np.frombuffer(chunk, dtype='u1') expect = flt.encode(data[i*5:(i*5)+5]) assert_array_equal(expect, actual)
def test_array(): # with numpy array a = np.arange(100) z = array(a, chunks=10) eq(a.shape, z.shape) eq(a.dtype, z.dtype) assert_array_equal(a, z[:]) # with array-like a = list(range(100)) z = array(a, chunks=10) eq((100,), z.shape) eq(np.asarray(a).dtype, z.dtype) assert_array_equal(np.asarray(a), z[:]) # with another zarr array z2 = array(z) eq(z.shape, z2.shape) eq(z.chunks, z2.chunks) eq(z.dtype, z2.dtype) assert_array_equal(z[:], z2[:]) # with chunky array-likes b = np.arange(1000).reshape(100, 10) c = MockBcolzArray(b, 10) z3 = array(c) eq(c.shape, z3.shape) eq((10, 10), z3.chunks) b = np.arange(1000).reshape(100, 10) c = MockH5pyDataset(b, chunks=(10, 2)) z4 = array(c) eq(c.shape, z4.shape) eq((10, 2), z4.chunks) c = MockH5pyDataset(b, chunks=None) z5 = array(c) eq(c.shape, z5.shape) assert_is_instance(z5.chunks, tuple) # with dtype=None a = np.arange(100, dtype='i4') z = array(a, dtype=None) assert_array_equal(a[:], z[:]) eq(a.dtype, z.dtype) # with dtype=something else a = np.arange(100, dtype='i4') z = array(a, dtype='i8') assert_array_equal(a[:], z[:]) eq(np.dtype('i8'), z.dtype)
def test_array(): # with numpy array a = np.arange(100) z = array(a, chunks=10) assert a.shape == z.shape assert a.dtype == z.dtype assert_array_equal(a, z[:]) # with array-like a = list(range(100)) z = array(a, chunks=10) assert (100, ) == z.shape assert np.asarray(a).dtype == z.dtype assert_array_equal(np.asarray(a), z[:]) # with another zarr array z2 = array(z) assert z.shape == z2.shape assert z.chunks == z2.chunks assert z.dtype == z2.dtype assert_array_equal(z[:], z2[:]) # with chunky array-likes b = np.arange(1000).reshape(100, 10) c = MockBcolzArray(b, 10) z3 = array(c) assert c.shape == z3.shape assert (10, 10) == z3.chunks b = np.arange(1000).reshape(100, 10) c = MockH5pyDataset(b, chunks=(10, 2)) z4 = array(c) assert c.shape == z4.shape assert (10, 2) == z4.chunks c = MockH5pyDataset(b, chunks=None) z5 = array(c) assert c.shape == z5.shape assert isinstance(z5.chunks, tuple) # with dtype=None a = np.arange(100, dtype='i4') z = array(a, dtype=None) assert_array_equal(a[:], z[:]) assert a.dtype == z.dtype # with dtype=something else a = np.arange(100, dtype='i4') z = array(a, dtype='i8') assert_array_equal(a[:], z[:]) assert np.dtype('i8') == z.dtype
def test_array(): # with numpy array a = np.arange(100) z = array(a, chunks=10) assert a.shape == z.shape assert a.dtype == z.dtype assert_array_equal(a, z[:]) # with array-like a = list(range(100)) z = array(a, chunks=10) assert (100,) == z.shape assert np.asarray(a).dtype == z.dtype assert_array_equal(np.asarray(a), z[:]) # with another zarr array z2 = array(z) assert z.shape == z2.shape assert z.chunks == z2.chunks assert z.dtype == z2.dtype assert_array_equal(z[:], z2[:]) # with chunky array-likes b = np.arange(1000).reshape(100, 10) c = MockBcolzArray(b, 10) z3 = array(c) assert c.shape == z3.shape assert (10, 10) == z3.chunks b = np.arange(1000).reshape(100, 10) c = MockH5pyDataset(b, chunks=(10, 2)) z4 = array(c) assert c.shape == z4.shape assert (10, 2) == z4.chunks c = MockH5pyDataset(b, chunks=None) z5 = array(c) assert c.shape == z5.shape assert isinstance(z5.chunks, tuple) # with dtype=None a = np.arange(100, dtype='i4') z = array(a, dtype=None) assert_array_equal(a[:], z[:]) assert a.dtype == z.dtype # with dtype=something else a = np.arange(100, dtype='i4') z = array(a, dtype='i8') assert_array_equal(a[:], z[:]) assert np.dtype('i8') == z.dtype
def test_array(): # with numpy array a = np.arange(100) z = array(a, chunks=10) eq(a.shape, z.shape) eq(a.dtype, z.dtype) assert_array_equal(a, z[:]) # with array-like a = list(range(100)) z = array(a, chunks=10) eq((100, ), z.shape) eq(np.asarray(a).dtype, z.dtype) assert_array_equal(np.asarray(a), z[:]) # with another zarr array z2 = array(z) eq(z.shape, z2.shape) eq(z.chunks, z2.chunks) eq(z.dtype, z2.dtype) assert_array_equal(z[:], z2[:]) # with chunky array-likes b = np.arange(1000).reshape(100, 10) c = MockBcolzArray(b, 10) z3 = array(c) eq(c.shape, z3.shape) eq((10, 10), z3.chunks) b = np.arange(1000).reshape(100, 10) c = MockH5pyDataset(b, chunks=(10, 2)) z4 = array(c) eq(c.shape, z4.shape) eq((10, 2), z4.chunks) c = MockH5pyDataset(b, chunks=None) z5 = array(c) eq(c.shape, z5.shape) assert_is_instance(z5.chunks, tuple) # with dtype=None a = np.arange(100, dtype='i4') z = array(a, dtype=None) assert_array_equal(a[:], z[:]) eq(a.dtype, z.dtype) # with dtype=something else a = np.arange(100, dtype='i4') z = array(a, dtype='i8') assert_array_equal(a[:], z[:]) eq(np.dtype('i8'), z.dtype)
def _create_dataset_nosync(self, name, data=None, **kwargs): path = self._item_path(name) # determine synchronizer kwargs.setdefault('synchronizer', self._synchronizer) # create array if data is None: a = create(store=self._store, path=path, chunk_store=self._chunk_store, **kwargs) else: a = array(data, store=self._store, path=path, chunk_store=self._chunk_store, **kwargs) return a
def _create_dataset_nosync(self, name, data=None, **kwargs): path = self._item_path(name) # determine synchronizer kwargs.setdefault('synchronizer', self._synchronizer) kwargs.setdefault('cache_attrs', self.attrs.cache) # create array if data is None: a = create(store=self._store, path=path, chunk_store=self._chunk_store, **kwargs) else: a = array(data, store=self._store, path=path, chunk_store=self._chunk_store, **kwargs) return a
def test_create_read_only(): # https://github.com/alimanfoo/zarr/issues/151 # create an array initially read-only, then enable writing z = create(100, read_only=True) assert z.read_only with pytest.raises(PermissionError): z[:] = 42 z.read_only = False z[:] = 42 assert np.all(z[...] == 42) z.read_only = True with pytest.raises(PermissionError): z[:] = 0 # this is subtly different, but here we want to create an array with data, and then # have it be read-only a = np.arange(100) z = array(a, read_only=True) assert_array_equal(a, z[...]) assert z.read_only with pytest.raises(PermissionError): z[:] = 42
def test_create_read_only(): # https://github.com/alimanfoo/zarr/issues/151 # create an array initially read-only, then enable writing z = create(100, read_only=True) assert z.read_only with assert_raises(PermissionError): z[:] = 42 z.read_only = False z[:] = 42 assert np.all(z[...] == 42) z.read_only = True with assert_raises(PermissionError): z[:] = 0 # this is subtly different, but here we want to create an array with data, and then # have it be read-only a = np.arange(100) z = array(a, read_only=True) assert_array_equal(a, z[...]) assert z.read_only with assert_raises(PermissionError): z[:] = 42
def test_array_with_categorize_filter(): # setup data = np.random.choice([u'foo', u'bar', u'baz'], size=100) flt = Categorize(dtype=data.dtype, labels=[u'foo', u'bar', u'baz']) filters = [flt] for compressor in compressors: a = array(data, chunks=5, compressor=compressor, filters=filters) # check round-trip assert_array_equal(data, a[:]) # check chunks for i in range(20): cdata = a.store[str(i)] if a.compressor: chunk = a.compressor.decode(cdata) else: chunk = cdata actual = np.frombuffer(chunk, dtype='u1') expect = flt.encode(data[i*5:(i*5)+5]) assert_array_equal(expect, actual)
def test_array_with_packbits_filter(): # setup flt = PackBits() filters = [flt] data = np.random.randint(0, 2, size=100, dtype=bool) for compressor in compressors: a = array(data, chunks=5, compressor=compressor, filters=filters) # check round-trip assert_array_equal(data, a[:]) # check chunks for i in range(20): cdata = a.store[str(i)] if compressor: chunk = compressor.decode(cdata) else: chunk = cdata actual = np.frombuffer(chunk, dtype='u1') expect = flt.encode(data[i*5:(i*5)+5]) assert_array_equal(expect, actual)
def _array_nosync(self, name, data, **kwargs): path = self._item_path(name) kwargs.setdefault('synchronizer', self._synchronizer) return array(data, store=self._store, path=path, chunk_store=self._chunk_store, **kwargs)
def _array_nosync(self, name, data, **kwargs): path = self._item_path(name) kwargs.setdefault('synchronizer', self._synchronizer) kwargs.setdefault('cache_attrs', self.attrs.cache) return array(data, store=self._store, path=path, chunk_store=self._chunk_store, **kwargs)