Пример #1
0
def test_vlen_dtype():
    dtype = strings.create_vlen_dtype(unicode_type)
    assert dtype.metadata['element_type'] == unicode_type
    assert strings.is_unicode_dtype(dtype)
    assert not strings.is_bytes_dtype(dtype)
    assert strings.check_vlen_dtype(dtype) is unicode_type

    dtype = strings.create_vlen_dtype(bytes_type)
    assert dtype.metadata['element_type'] == bytes_type
    assert not strings.is_unicode_dtype(dtype)
    assert strings.is_bytes_dtype(dtype)
    assert strings.check_vlen_dtype(dtype) is bytes_type

    assert strings.check_vlen_dtype(np.dtype(object)) is None
Пример #2
0
def test_vlen_dtype():
    dtype = strings.create_vlen_dtype(unicode_type)
    assert dtype.metadata['element_type'] == unicode_type
    assert strings.is_unicode_dtype(dtype)
    assert not strings.is_bytes_dtype(dtype)
    assert strings.check_vlen_dtype(dtype) is unicode_type

    dtype = strings.create_vlen_dtype(bytes_type)
    assert dtype.metadata['element_type'] == bytes_type
    assert not strings.is_unicode_dtype(dtype)
    assert strings.is_bytes_dtype(dtype)
    assert strings.check_vlen_dtype(dtype) is bytes_type

    assert strings.check_vlen_dtype(np.dtype(object)) is None
Пример #3
0
def test_vlen_dtype():
    dtype = strings.create_vlen_dtype(str)
    assert dtype.metadata["element_type"] == str
    assert strings.is_unicode_dtype(dtype)
    assert not strings.is_bytes_dtype(dtype)
    assert strings.check_vlen_dtype(dtype) is str

    dtype = strings.create_vlen_dtype(bytes)
    assert dtype.metadata["element_type"] == bytes
    assert not strings.is_unicode_dtype(dtype)
    assert strings.is_bytes_dtype(dtype)
    assert strings.check_vlen_dtype(dtype) is bytes

    assert strings.check_vlen_dtype(np.dtype(object)) is None
Пример #4
0
def test_EncodedStringCoder_encode():
    dtype = strings.create_vlen_dtype(str)
    raw_data = np.array(["abc", "ß∂µ∆"], dtype=dtype)
    expected_data = np.array([r.encode("utf-8") for r in raw_data], dtype=object)

    coder = strings.EncodedStringCoder(allows_unicode=True)
    raw = Variable(("x",), raw_data, encoding={"dtype": "S1"})
    actual = coder.encode(raw)
    expected = Variable(("x",), expected_data, attrs={"_Encoding": "utf-8"})
    assert_identical(actual, expected)

    raw = Variable(("x",), raw_data)
    assert_identical(coder.encode(raw), raw)

    coder = strings.EncodedStringCoder(allows_unicode=False)
    assert_identical(coder.encode(raw), expected)
Пример #5
0
def test_EncodedStringCoder_encode():
    dtype = strings.create_vlen_dtype(unicode_type)
    raw_data = np.array([u'abc', u'ß∂µ∆'], dtype=dtype)
    expected_data = np.array([r.encode('utf-8') for r in raw_data],
                             dtype=object)

    coder = strings.EncodedStringCoder(allows_unicode=True)
    raw = Variable(('x', ), raw_data, encoding={'dtype': 'S1'})
    actual = coder.encode(raw)
    expected = Variable(('x', ), expected_data, attrs={'_Encoding': 'utf-8'})
    assert_identical(actual, expected)

    raw = Variable(('x', ), raw_data)
    assert_identical(coder.encode(raw), raw)

    coder = strings.EncodedStringCoder(allows_unicode=False)
    assert_identical(coder.encode(raw), expected)
Пример #6
0
def test_EncodedStringCoder_encode():
    dtype = strings.create_vlen_dtype(unicode_type)
    raw_data = np.array([u'abc', u'ß∂µ∆'], dtype=dtype)
    expected_data = np.array([r.encode('utf-8') for r in raw_data],
                             dtype=object)

    coder = strings.EncodedStringCoder(allows_unicode=True)
    raw = Variable(('x',), raw_data, encoding={'dtype': 'S1'})
    actual = coder.encode(raw)
    expected = Variable(('x',), expected_data, attrs={'_Encoding': 'utf-8'})
    assert_identical(actual, expected)

    raw = Variable(('x',), raw_data)
    assert_identical(coder.encode(raw), raw)

    coder = strings.EncodedStringCoder(allows_unicode=False)
    assert_identical(coder.encode(raw), expected)
Пример #7
0
@pytest.mark.parametrize('original', [
    Variable(('x', ), [b'ab', b'cdef']),
    Variable((), b'ab'),
    Variable(('x', ), [b'a', b'b']),
    Variable((), b'a'),
])
def test_CharacterArrayCoder_roundtrip(original):
    coder = strings.CharacterArrayCoder()
    roundtripped = coder.decode(coder.encode(original))
    assert_identical(original, roundtripped)


@pytest.mark.parametrize('data', [
    np.array([b'a', b'bc']),
    np.array([b'a', b'bc'], dtype=strings.create_vlen_dtype(bytes_type)),
])
def test_CharacterArrayCoder_encode(data):
    coder = strings.CharacterArrayCoder()
    raw = Variable(('x', ), data)
    actual = coder.encode(raw)
    expected = Variable(('x', 'string2'), np.array([[b'a', b''], [b'b',
                                                                  b'c']]))
    assert_identical(actual, expected)


def test_StackedBytesArray():
    array = np.array([[b'a', b'b', b'c'], [b'd', b'e', b'f']], dtype='S')
    actual = strings.StackedBytesArray(array)
    expected = np.array([b'abc', b'def'], dtype='S')
    assert actual.dtype == expected.dtype
Пример #8
0
@pytest.mark.parametrize('original', [
    Variable(('x',), [b'ab', b'cdef']),
    Variable((), b'ab'),
    Variable(('x',), [b'a', b'b']),
    Variable((), b'a'),
])
def test_CharacterArrayCoder_roundtrip(original):
    coder = strings.CharacterArrayCoder()
    roundtripped = coder.decode(coder.encode(original))
    assert_identical(original, roundtripped)


@pytest.mark.parametrize('data', [
    np.array([b'a', b'bc']),
    np.array([b'a', b'bc'], dtype=strings.create_vlen_dtype(bytes_type)),
])
def test_CharacterArrayCoder_encode(data):
    coder = strings.CharacterArrayCoder()
    raw = Variable(('x',), data)
    actual = coder.encode(raw)
    expected = Variable(('x', 'string2'),
                        np.array([[b'a', b''], [b'b', b'c']]))
    assert_identical(actual, expected)


def test_StackedBytesArray():
    array = np.array([[b'a', b'b', b'c'], [b'd', b'e', b'f']], dtype='S')
    actual = strings.StackedBytesArray(array)
    expected = np.array([b'abc', b'def'], dtype='S')
    assert actual.dtype == expected.dtype
Пример #9
0
        Variable((), b"ab"),
        Variable(("x", ), [b"a", b"b"]),
        Variable((), b"a"),
    ],
)
def test_CharacterArrayCoder_roundtrip(original):
    coder = strings.CharacterArrayCoder()
    roundtripped = coder.decode(coder.encode(original))
    assert_identical(original, roundtripped)


@pytest.mark.parametrize(
    "data",
    [
        np.array([b"a", b"bc"]),
        np.array([b"a", b"bc"], dtype=strings.create_vlen_dtype(bytes)),
    ],
)
def test_CharacterArrayCoder_encode(data):
    coder = strings.CharacterArrayCoder()
    raw = Variable(("x", ), data)
    actual = coder.encode(raw)
    expected = Variable(("x", "string2"), np.array([[b"a", b""], [b"b",
                                                                  b"c"]]))
    assert_identical(actual, expected)


@pytest.mark.parametrize(
    ["original", "expected_char_dim_name"],
    [
        (Variable(("x", ), [b"ab", b"cdef"]), "string4"),
Пример #10
0
def test_numpy_subclass_handling(numpy_str_type) -> None:
    with pytest.raises(TypeError, match="unsupported type for vlen_dtype"):
        strings.create_vlen_dtype(numpy_str_type)