def test_ensure_bytes_invalid_inputs(): # object array not allowed a = np.array([u'Xin chào thế giới'], dtype=object) for e in [a, memoryview(a)]: with pytest.raises(TypeError): ensure_bytes(e)
def __init__(self, labels, dtype, astype='u1'): self.dtype = np.dtype(dtype) if self.dtype.kind == 'S': self.labels = [ensure_bytes(l) for l in labels] elif self.dtype.kind == 'U': self.labels = [ensure_text(l) for l in labels] else: self.labels = labels self.astype = np.dtype(astype)
def test_ensure_bytes(): bufs = [ b'adsdasdas', bytes(20), np.arange(100), array.array('l', b'qwertyuiqwertyui') ] for buf in bufs: b = ensure_bytes(buf) assert isinstance(b, bytes)
def check_encode_decode(arr, codec, precision=None): # N.B., watch out here with blosc compressor, if the itemsize of # the source buffer is different then the results of encoding # (i.e., compression) may be different. Hence we *do not* require that # the results of encoding be identical for all possible inputs, rather # we just require that the results of the encode/decode round-trip can # be compared to the original array. # encoding should support any object exporting the buffer protocol # test encoding of numpy array enc = codec.encode(arr) dec = codec.decode(enc) compare_arrays(arr, dec, precision=precision) # test encoding of bytes buf = arr.tobytes(order='A') enc = codec.encode(buf) dec = codec.decode(enc) compare_arrays(arr, dec, precision=precision) # test encoding of bytearray buf = bytearray(arr.tobytes(order='A')) enc = codec.encode(buf) dec = codec.decode(enc) compare_arrays(arr, dec, precision=precision) # test encoding of array.array buf = array.array('b', arr.tobytes(order='A')) enc = codec.encode(buf) dec = codec.decode(enc) compare_arrays(arr, dec, precision=precision) # decoding should support any object exporting the buffer protocol, # setup enc_bytes = ensure_bytes(enc) # test decoding of raw bytes dec = codec.decode(enc_bytes) compare_arrays(arr, dec, precision=precision) # test decoding of bytearray dec = codec.decode(bytearray(enc_bytes)) compare_arrays(arr, dec, precision=precision) # test decoding of array.array buf = array.array('b', enc_bytes) dec = codec.decode(buf) compare_arrays(arr, dec, precision=precision) # test decoding of numpy array buf = np.frombuffer(enc_bytes, dtype='u1') dec = codec.decode(buf) compare_arrays(arr, dec, precision=precision) # test decoding directly into numpy array out = np.empty_like(arr) codec.decode(enc_bytes, out=out) compare_arrays(arr, out, precision=precision) # test decoding directly into bytearray out = bytearray(arr.nbytes) codec.decode(enc_bytes, out=out) # noinspection PyTypeChecker compare_arrays(arr, out, precision=precision)
def check_backwards_compatibility(codec_id, arrays, codecs, precision=None, prefix=None): # setup directory to hold data fixture if prefix: fixture_dir = os.path.join('fixture', codec_id, prefix) else: fixture_dir = os.path.join('fixture', codec_id) if not os.path.exists(fixture_dir): # pragma: no cover os.makedirs(fixture_dir) # save fixture data for i, arr in enumerate(arrays): arr_fn = os.path.join(fixture_dir, 'array.{:02d}.npy'.format(i)) if not os.path.exists(arr_fn): # pragma: no cover np.save(arr_fn, arr) # load fixture data for arr_fn in glob(os.path.join(fixture_dir, 'array.*.npy')): # setup i = int(arr_fn.split('.')[-2]) arr = np.load(arr_fn, allow_pickle=True) arr_bytes = arr.tobytes(order='A') if arr.flags.f_contiguous: order = 'F' else: order = 'C' for j, codec in enumerate(codecs): # setup a directory to hold encoded data codec_dir = os.path.join(fixture_dir, 'codec.{:02d}'.format(j)) if not os.path.exists(codec_dir): # pragma: no cover os.makedirs(codec_dir) # file with codec configuration information codec_fn = os.path.join(codec_dir, 'config.json') # one time save config if not os.path.exists(codec_fn): # pragma: no cover with open(codec_fn, mode='w') as cf: _json.dump(codec.get_config(), cf, sort_keys=True, indent=4) # load config and compare with expectation with open(codec_fn, mode='r') as cf: config = _json.load(cf) assert codec == get_codec(config) enc_fn = os.path.join(codec_dir, 'encoded.{:02d}.dat'.format(i)) # one time encode and save array if not os.path.exists(enc_fn): # pragma: no cover enc = codec.encode(arr) enc = ensure_bytes(enc) with open(enc_fn, mode='wb') as ef: ef.write(enc) # load and decode data with open(enc_fn, mode='rb') as ef: enc = ef.read() dec = codec.decode(enc) dec_arr = ensure_ndarray(dec).reshape(-1, order='A') dec_arr = dec_arr.view(dtype=arr.dtype).reshape(arr.shape, order=order) if precision and precision[j] is not None: assert_array_almost_equal(arr, dec_arr, decimal=precision[j]) elif arr.dtype == 'object': assert_array_items_equal(arr, dec_arr) else: assert_array_equal(arr, dec_arr) assert arr_bytes == ensure_bytes(dec)
def ensure_str(s): if not isinstance(s, str): s = ensure_bytes(s) if not PY2: # pragma: py2 no cover s = s.decode('ascii') return s
def __setitem__(self, key, value): value = ensure_bytes(value) blob_name = self._append_path_to_prefix(key) self.client.upload_blob(blob_name, value, overwrite=True)
def __setitem__(self, name, value): name = self._add_prefix(name) value = ensure_bytes(value) self.conn.put_object(self.container, name, value)
def __setitem__(self, key, value): """ In v2 both metadata and data are mixed so we'll need to convert things that ends with .z to the metadata path. """ # TODO convert to bytes if needed from numcodecs.compat import ensure_bytes parts = key.split("/") v3key = self._convert_2_to_3_keys(key) # convert chunk separator from ``.`` to ``/`` if key.endswith(".zarray"): data = json.loads(value.decode()) for source, target in RENAMED_MAP.items(): try: tmp = data[source] except KeyError: raise KeyError(f"{source} not found in {value}") del data[source] data[target] = tmp data["chunk_grid"] = {} data["chunk_grid"]["chunk_shape"] = data["chunks"] data["chunk_grid"]["type"] = "rectangular" data["chunk_grid"]["separator"] = "/" assert data["zarr_format"] == 2 del data["zarr_format"] assert data["filters"] in ( [], None), f"found filters: {data['filters']}" del data["filters"] data["extensions"] = [] try: attrs = json.loads( self._v3store.get(v3key).decode())["attributes"] except KeyError: attrs = [] data["attributes"] = attrs data = json.dumps(data, indent=4).encode() elif key.endswith(".zattrs"): try: # try zarray first... data = json.loads(self._v3store.get(v3key).decode()) except KeyError: try: v3key = v3key.replace(".array", ".group") data = json.loads(self._v3store.get(v3key).decode()) except KeyError: data = {} data["attributes"] = json.loads(value.decode()) self._v3store.set(v3key, json.dumps(data, indent=4).encode()) return # todo: we want to keep the .zattr which i sstored in the group/array file. # so to set, we need to get from the store assign update. elif v3key == "meta/root.group": # todo: this is wrong, the top md document is zarr.json. data = json.loads(value.decode()) data[ "zarr_format"] = "https://purl.org/zarr/spec/protocol/core/3.0" data = json.dumps(data, indent=4).encode() elif v3key.endswith("/.group"): data = json.loads(value.decode()) del data["zarr_format"] if "attributes" not in data: data["attributes"] = {} data = json.dumps(data).encode() else: data = value assert not isinstance(data, dict) self._v3store.set(v3key, ensure_bytes(data))