def test_non_numpy_inputs(): codec = MsgPack() # numpy will infer a range of different shapes and dtypes for these inputs. # Make sure that round-tripping through encode preserves this. data = [ [0, 1], [[0, 1], [2, 3]], [[0], [1], [2, 3]], [[[0, 0]], [[1, 1]], [[2, 3]]], ["1"], ["11", "11"], ["11", "1", "1"], [{}], [{ "key": "value" }, ["list", "of", "strings"]], [b"1"], [b"11", b"11"], [b"11", b"1", b"1"], [{ b"key": b"value" }, [b"list", b"of", b"strings"]], ] for input_data in data: actual = codec.decode(codec.encode(input_data)) expect = np.array(input_data) assert expect.shape == actual.shape assert np.array_equal(expect, actual)
def test_legacy_codec_broken(): # Simplest demonstration of why the MsgPack codec needed to be changed. # The LegacyMsgPack codec didn't include shape information in the serialised # bytes, which gave different shapes in the input and output under certain # circumstances. a = np.empty(2, dtype=object) a[0] = [0, 1] a[1] = [2, 3] codec = LegacyMsgPack() b = codec.decode(codec.encode(a)) assert a.shape == (2, ) assert b.shape == (2, 2) assert not np.array_equal(a, b) # Now show that the MsgPack codec handles this case properly. codec = MsgPack() b = codec.decode(codec.encode(a)) assert np.array_equal(a, b)
# -*- coding: utf-8 -*- from __future__ import absolute_import, print_function, division import unittest import itertools import numpy as np try: from numcodecs.msgpacks import LegacyMsgPack, MsgPack codecs = [LegacyMsgPack(), MsgPack()] except ImportError: # pragma: no cover raise unittest.SkipTest("msgpack not available") from numcodecs.tests.common import (check_config, check_repr, check_encode_decode_array, check_backwards_compatibility, greetings) # object array with strings # object array with mix strings / nans # object array with mix of string, int, float # ... arrays = [ np.array(['foo', 'bar', 'baz'] * 300, dtype=object), np.array([['foo', 'bar', np.nan]] * 300, dtype=object), np.array(['foo', 1.0, 2] * 300, dtype=object), np.arange(1000, dtype='i4'), np.array(['foo', 'bar', 'baz'] * 300), np.array(['foo', ['bar', 1.0, 2], { 'a': 'b', 'c': 42 }] * 300, dtype=object),
def test_bytes(): # test msgpack behaviour with bytes and str (unicode) bytes_arr = np.array([b'foo', b'bar', b'baz'], dtype=object) unicode_arr = np.array([u'foo', u'bar', u'baz'], dtype=object) # raw=False (default) codec = MsgPack() # works for bytes array, round-trips bytes to bytes b = codec.decode(codec.encode(bytes_arr)) assert np.array_equal(bytes_arr, b) assert isinstance(b[0], binary_type) assert b[0] == b'foo' # works for unicode array, round-trips unicode to unicode b = codec.decode(codec.encode(unicode_arr)) assert np.array_equal(unicode_arr, b) assert isinstance(b[0], text_type) assert b[0] == u'foo' # raw=True codec = MsgPack(raw=True) # works for bytes array, round-trips bytes to bytes b = codec.decode(codec.encode(bytes_arr)) assert np.array_equal(bytes_arr, b) assert isinstance(b[0], binary_type) assert b[0] == b'foo' # broken for unicode array, round-trips unicode to bytes b = codec.decode(codec.encode(unicode_arr)) if PY2: # pragma: py3 no cover # PY2 considers b'foo' and u'foo' to be equal assert np.array_equal(unicode_arr, b) else: # pragma: py2 no cover assert not np.array_equal(unicode_arr, b) assert isinstance(b[0], binary_type) assert b[0] == b'foo' # legacy codec codec = LegacyMsgPack() with warnings.catch_warnings(): warnings.simplefilter('ignore', PendingDeprecationWarning) # broken for bytes array, round-trips bytes to unicode b = codec.decode(codec.encode(bytes_arr)) if PY2: # pragma: py3 no cover # PY2 considers b'foo' and u'foo' to be equal assert np.array_equal(unicode_arr, b) else: # pragma: py2 no cover assert not np.array_equal(bytes_arr, b) assert isinstance(b[0], text_type) assert b[0] == u'foo' # works for unicode array, round-trips unicode to unicode b = codec.decode(codec.encode(unicode_arr)) assert np.array_equal(unicode_arr, b) assert isinstance(b[0], text_type) assert b[0] == u'foo'
# -*- coding: utf-8 -*- from __future__ import absolute_import, print_function, division import unittest import warnings import numpy as np try: from numcodecs.msgpacks import LegacyMsgPack, MsgPack default_codec = MsgPack() # N.B., legacy codec is broken, see tests below. Also legacy code generates # PendingDeprecationWarning due to use of encoding argument, which we ignore here # as not relevant. legacy_codec = LegacyMsgPack() except ImportError: # pragma: no cover raise unittest.SkipTest("msgpack not available") from numcodecs.tests.common import (check_config, check_repr, check_encode_decode_array, check_backwards_compatibility, greetings) from numcodecs.compat import text_type, binary_type, PY2 # object array with strings # object array with mix strings / nans # object array with mix of string, int, float # ... arrays = [ np.array([u'foo', u'bar', u'baz'] * 300, dtype=object), np.array([[u'foo', u'bar', np.nan]] * 300, dtype=object), np.array([u'foo', 1.0, 2] * 300, dtype=object), np.arange(1000, dtype='i4'),
def test_bytes(): # test msgpack behaviour with bytes and str (unicode) bytes_arr = np.array([b'foo', b'bar', b'baz'], dtype=object) unicode_arr = np.array(['foo', 'bar', 'baz'], dtype=object) # raw=False (default) codec = MsgPack() # works for bytes array, round-trips bytes to bytes b = codec.decode(codec.encode(bytes_arr)) assert np.array_equal(bytes_arr, b) assert isinstance(b[0], bytes) assert b[0] == b'foo' # works for unicode array, round-trips unicode to unicode b = codec.decode(codec.encode(unicode_arr)) assert np.array_equal(unicode_arr, b) assert isinstance(b[0], str) assert b[0] == 'foo' # raw=True codec = MsgPack(raw=True) # works for bytes array, round-trips bytes to bytes b = codec.decode(codec.encode(bytes_arr)) assert np.array_equal(bytes_arr, b) assert isinstance(b[0], bytes) assert b[0] == b'foo' # broken for unicode array, round-trips unicode to bytes b = codec.decode(codec.encode(unicode_arr)) assert not np.array_equal(unicode_arr, b) assert isinstance(b[0], bytes) assert b[0] == b'foo'
def test_encode_decode_shape_dtype_preserved(): codec = MsgPack() for arr in arrays: actual = codec.decode(codec.encode(arr)) assert arr.shape == actual.shape assert arr.dtype == actual.dtype
def test_backwards_compatibility(): codec = MsgPack() check_backwards_compatibility(codec.codec_id, arrays, [codec])
def test_config(): check_config(MsgPack())
def test_encode_decode(): for arr in arrays: check_encode_decode_array(arr, MsgPack())
def test_backwards_compatibility(): check_backwards_compatibility(MsgPack.codec_id, arrays, [MsgPack()])
def test_config(): codec = MsgPack() check_config(codec)