def test_array_encode_utf8(): arr = np.array(strings, dtype='object') expected = [s.encode('utf-8') for s in strings] got = array_encode_utf8(arr) assert got.dtype == np.dtype('object') assert list(got) == expected ser = pd.Series(arr) got = array_encode_utf8(ser) assert got.dtype == np.dtype('object') assert list(got) == expected # Wrong array type arr = np.array(strings, dtype='U') with pytest.raises((TypeError, ValueError)): array_encode_utf8(arr) # Disabled for v2 if PY3: # Non-encodable string (lone surrogate) # on py2 this works anyway invalid_string = u"\uDE80" arr = np.array(strings + [invalid_string], dtype='object') with pytest.raises(UnicodeEncodeError): array_encode_utf8(arr) # Wrong object type arr = np.array([b"foo"], dtype='object') with pytest.raises(TypeError): array_encode_utf8(arr)
def test_array_encode_utf8(): arr = np.array(strings, dtype='object') expected = [s.encode('utf-8') for s in strings] got = array_encode_utf8(arr) assert got.dtype == np.dtype('object') assert list(got) == expected ser = pd.Series(arr) got = array_encode_utf8(ser) assert got.dtype == np.dtype('object') assert list(got) == expected invalid_string = u"\uDE80" arr = np.array(strings + [invalid_string], dtype='object') with pytest.raises(UnicodeEncodeError): array_encode_utf8(arr) # Wrong object type arr = np.array([b"foo"], dtype='object') with pytest.raises(TypeError): array_encode_utf8(arr)