示例#1
0
def test_non_numpy_inputs():
    codec = MsgPack()
    # numpy will infer a range of different shapes and dtypes for these inputs.
    # Make sure that round-tripping through encode preserves this.
    data = [
        [0, 1],
        [[0, 1], [2, 3]],
        [[0], [1], [2, 3]],
        [[[0, 0]], [[1, 1]], [[2, 3]]],
        ["1"],
        ["11", "11"],
        ["11", "1", "1"],
        [{}],
        [{
            "key": "value"
        }, ["list", "of", "strings"]],
        [b"1"],
        [b"11", b"11"],
        [b"11", b"1", b"1"],
        [{
            b"key": b"value"
        }, [b"list", b"of", b"strings"]],
    ]
    for input_data in data:
        actual = codec.decode(codec.encode(input_data))
        expect = np.array(input_data)
        assert expect.shape == actual.shape
        assert np.array_equal(expect, actual)
示例#2
0
def test_legacy_codec_broken():
    # Simplest demonstration of why the MsgPack codec needed to be changed.
    # The LegacyMsgPack codec didn't include shape information in the serialised
    # bytes, which gave different shapes in the input and output under certain
    # circumstances.
    a = np.empty(2, dtype=object)
    a[0] = [0, 1]
    a[1] = [2, 3]
    codec = LegacyMsgPack()
    b = codec.decode(codec.encode(a))
    assert a.shape == (2, )
    assert b.shape == (2, 2)
    assert not np.array_equal(a, b)

    # Now show that the MsgPack codec handles this case properly.
    codec = MsgPack()
    b = codec.decode(codec.encode(a))
    assert np.array_equal(a, b)
示例#3
0
# -*- coding: utf-8 -*-
from __future__ import absolute_import, print_function, division
import unittest
import itertools

import numpy as np

try:
    from numcodecs.msgpacks import LegacyMsgPack, MsgPack
    codecs = [LegacyMsgPack(), MsgPack()]
except ImportError:  # pragma: no cover
    raise unittest.SkipTest("msgpack not available")

from numcodecs.tests.common import (check_config, check_repr,
                                    check_encode_decode_array,
                                    check_backwards_compatibility, greetings)

# object array with strings
# object array with mix strings / nans
# object array with mix of string, int, float
# ...
arrays = [
    np.array(['foo', 'bar', 'baz'] * 300, dtype=object),
    np.array([['foo', 'bar', np.nan]] * 300, dtype=object),
    np.array(['foo', 1.0, 2] * 300, dtype=object),
    np.arange(1000, dtype='i4'),
    np.array(['foo', 'bar', 'baz'] * 300),
    np.array(['foo', ['bar', 1.0, 2], {
        'a': 'b',
        'c': 42
    }] * 300, dtype=object),
示例#4
0
def test_bytes():
    # test msgpack behaviour with bytes and str (unicode)
    bytes_arr = np.array([b'foo', b'bar', b'baz'], dtype=object)
    unicode_arr = np.array([u'foo', u'bar', u'baz'], dtype=object)

    # raw=False (default)
    codec = MsgPack()
    # works for bytes array, round-trips bytes to bytes
    b = codec.decode(codec.encode(bytes_arr))
    assert np.array_equal(bytes_arr, b)
    assert isinstance(b[0], binary_type)
    assert b[0] == b'foo'
    # works for unicode array, round-trips unicode to unicode
    b = codec.decode(codec.encode(unicode_arr))
    assert np.array_equal(unicode_arr, b)
    assert isinstance(b[0], text_type)
    assert b[0] == u'foo'

    # raw=True
    codec = MsgPack(raw=True)
    # works for bytes array, round-trips bytes to bytes
    b = codec.decode(codec.encode(bytes_arr))
    assert np.array_equal(bytes_arr, b)
    assert isinstance(b[0], binary_type)
    assert b[0] == b'foo'
    # broken for unicode array, round-trips unicode to bytes
    b = codec.decode(codec.encode(unicode_arr))
    if PY2:  # pragma: py3 no cover
        # PY2 considers b'foo' and u'foo' to be equal
        assert np.array_equal(unicode_arr, b)
    else:  # pragma: py2 no cover
        assert not np.array_equal(unicode_arr, b)
    assert isinstance(b[0], binary_type)
    assert b[0] == b'foo'

    # legacy codec
    codec = LegacyMsgPack()
    with warnings.catch_warnings():
        warnings.simplefilter('ignore', PendingDeprecationWarning)
        # broken for bytes array, round-trips bytes to unicode
        b = codec.decode(codec.encode(bytes_arr))
        if PY2:  # pragma: py3 no cover
            # PY2 considers b'foo' and u'foo' to be equal
            assert np.array_equal(unicode_arr, b)
        else:  # pragma: py2 no cover
            assert not np.array_equal(bytes_arr, b)
        assert isinstance(b[0], text_type)
        assert b[0] == u'foo'
        # works for unicode array, round-trips unicode to unicode
        b = codec.decode(codec.encode(unicode_arr))
        assert np.array_equal(unicode_arr, b)
        assert isinstance(b[0], text_type)
        assert b[0] == u'foo'
示例#5
0
# -*- coding: utf-8 -*-
from __future__ import absolute_import, print_function, division
import unittest
import warnings

import numpy as np

try:
    from numcodecs.msgpacks import LegacyMsgPack, MsgPack
    default_codec = MsgPack()
    # N.B., legacy codec is broken, see tests below. Also legacy code generates
    # PendingDeprecationWarning due to use of encoding argument, which we ignore here
    # as not relevant.
    legacy_codec = LegacyMsgPack()
except ImportError:  # pragma: no cover
    raise unittest.SkipTest("msgpack not available")

from numcodecs.tests.common import (check_config, check_repr,
                                    check_encode_decode_array,
                                    check_backwards_compatibility, greetings)
from numcodecs.compat import text_type, binary_type, PY2

# object array with strings
# object array with mix strings / nans
# object array with mix of string, int, float
# ...
arrays = [
    np.array([u'foo', u'bar', u'baz'] * 300, dtype=object),
    np.array([[u'foo', u'bar', np.nan]] * 300, dtype=object),
    np.array([u'foo', 1.0, 2] * 300, dtype=object),
    np.arange(1000, dtype='i4'),
示例#6
0
def test_bytes():
    # test msgpack behaviour with bytes and str (unicode)
    bytes_arr = np.array([b'foo', b'bar', b'baz'], dtype=object)
    unicode_arr = np.array(['foo', 'bar', 'baz'], dtype=object)

    # raw=False (default)
    codec = MsgPack()
    # works for bytes array, round-trips bytes to bytes
    b = codec.decode(codec.encode(bytes_arr))
    assert np.array_equal(bytes_arr, b)
    assert isinstance(b[0], bytes)
    assert b[0] == b'foo'
    # works for unicode array, round-trips unicode to unicode
    b = codec.decode(codec.encode(unicode_arr))
    assert np.array_equal(unicode_arr, b)
    assert isinstance(b[0], str)
    assert b[0] == 'foo'

    # raw=True
    codec = MsgPack(raw=True)
    # works for bytes array, round-trips bytes to bytes
    b = codec.decode(codec.encode(bytes_arr))
    assert np.array_equal(bytes_arr, b)
    assert isinstance(b[0], bytes)
    assert b[0] == b'foo'
    # broken for unicode array, round-trips unicode to bytes
    b = codec.decode(codec.encode(unicode_arr))
    assert not np.array_equal(unicode_arr, b)
    assert isinstance(b[0], bytes)
    assert b[0] == b'foo'
示例#7
0
def test_encode_decode_shape_dtype_preserved():
    codec = MsgPack()
    for arr in arrays:
        actual = codec.decode(codec.encode(arr))
        assert arr.shape == actual.shape
        assert arr.dtype == actual.dtype
示例#8
0
def test_backwards_compatibility():
    codec = MsgPack()
    check_backwards_compatibility(codec.codec_id, arrays, [codec])
示例#9
0
def test_config():
    check_config(MsgPack())
示例#10
0
def test_encode_decode():
    for arr in arrays:
        check_encode_decode_array(arr, MsgPack())
示例#11
0
def test_backwards_compatibility():
    check_backwards_compatibility(MsgPack.codec_id, arrays, [MsgPack()])
示例#12
0
def test_config():
    codec = MsgPack()
    check_config(codec)