def test_EncodedStringCoder_encode(): dtype = strings.create_vlen_dtype(str) raw_data = np.array(["abc", "ß∂µ∆"], dtype=dtype) expected_data = np.array([r.encode("utf-8") for r in raw_data], dtype=object) coder = strings.EncodedStringCoder(allows_unicode=True) raw = Variable(("x",), raw_data, encoding={"dtype": "S1"}) actual = coder.encode(raw) expected = Variable(("x",), expected_data, attrs={"_Encoding": "utf-8"}) assert_identical(actual, expected) raw = Variable(("x",), raw_data) assert_identical(coder.encode(raw), raw) coder = strings.EncodedStringCoder(allows_unicode=False) assert_identical(coder.encode(raw), expected)
def test_EncodedStringCoder_encode(): dtype = strings.create_vlen_dtype(unicode_type) raw_data = np.array([u'abc', u'ß∂µ∆'], dtype=dtype) expected_data = np.array([r.encode('utf-8') for r in raw_data], dtype=object) coder = strings.EncodedStringCoder(allows_unicode=True) raw = Variable(('x', ), raw_data, encoding={'dtype': 'S1'}) actual = coder.encode(raw) expected = Variable(('x', ), expected_data, attrs={'_Encoding': 'utf-8'}) assert_identical(actual, expected) raw = Variable(('x', ), raw_data) assert_identical(coder.encode(raw), raw) coder = strings.EncodedStringCoder(allows_unicode=False) assert_identical(coder.encode(raw), expected)
def test_EncodedStringCoder_decode(): coder = strings.EncodedStringCoder() raw_data = np.array([b'abc', u'ß∂µ∆'.encode('utf-8')]) raw = Variable(('x', ), raw_data, {'_Encoding': 'utf-8'}) actual = coder.decode(raw) expected = Variable(('x', ), np.array([u'abc', u'ß∂µ∆'], dtype=object)) assert_identical(actual, expected) assert_identical(coder.decode(actual[0]), expected[0])
def test_EncodedStringCoder_decode(): coder = strings.EncodedStringCoder() raw_data = np.array([b"abc", "ß∂µ∆".encode()]) raw = Variable(("x", ), raw_data, {"_Encoding": "utf-8"}) actual = coder.decode(raw) expected = Variable(("x", ), np.array(["abc", "ß∂µ∆"], dtype=object)) assert_identical(actual, expected) assert_identical(coder.decode(actual[0]), expected[0])
def test_EncodedStringCoder_decode_dask(): coder = strings.EncodedStringCoder() raw_data = np.array([b'abc', u'ß∂µ∆'.encode('utf-8')]) raw = Variable(('x', ), raw_data, {'_Encoding': 'utf-8'}).chunk() actual = coder.decode(raw) assert isinstance(actual.data, da.Array) expected = Variable(('x', ), np.array([u'abc', u'ß∂µ∆'], dtype=object)) assert_identical(actual, expected) actual_indexed = coder.decode(actual[0]) assert isinstance(actual_indexed.data, da.Array) assert_identical(actual_indexed, expected[0])
def test_EncodedStringCoder_decode_dask(): coder = strings.EncodedStringCoder() raw_data = np.array([b"abc", "ß∂µ∆".encode()]) raw = Variable(("x", ), raw_data, {"_Encoding": "utf-8"}).chunk() actual = coder.decode(raw) assert isinstance(actual.data, da.Array) expected = Variable(("x", ), np.array(["abc", "ß∂µ∆"], dtype=object)) assert_identical(actual, expected) actual_indexed = coder.decode(actual[0]) assert isinstance(actual_indexed.data, da.Array) assert_identical(actual_indexed, expected[0])