def test_pyarrow_not_installed_raises(): msg = re.escape("pyarrow>=1.0.0 is required for PyArrow backed") with pytest.raises(ImportError, match=msg): StringDtype(storage="pyarrow") with pytest.raises(ImportError, match=msg): ArrowStringArray([]) with pytest.raises(ImportError, match=msg): ArrowStringArray._from_sequence(["a", None, "b"])
def __from_arrow__( self, array: pyarrow.Array | pyarrow.ChunkedArray) -> BaseStringArray: """ Construct StringArray from pyarrow Array/ChunkedArray. """ if self.storage == "pyarrow": from pandas.core.arrays.string_arrow import ArrowStringArray return ArrowStringArray(array) else: import pyarrow if isinstance(array, pyarrow.Array): chunks = [array] else: # pyarrow.ChunkedArray chunks = array.chunks results = [] for arr in chunks: # using _from_sequence to ensure None is converted to NA str_arr = StringArray._from_sequence(np.array(arr)) results.append(str_arr) if results: return StringArray._concat_same_type(results) else: return StringArray(np.array([], dtype="object"))
def test_setitem(multiple_chunks, key, value, expected): import pyarrow as pa result = pa.array(list("abcde")) expected = pa.array(expected) if multiple_chunks: result = pa.chunked_array([result[:3], result[3:]]) expected = pa.chunked_array([expected[:3], expected[3:]]) result = ArrowStringArray(result) expected = ArrowStringArray(expected) result[key] = value tm.assert_equal(result, expected) assert result._data.num_chunks == expected._data.num_chunks
def test_constructor_not_string_type_raises(array, chunked): arr = array.array([1, 2, 3]) if chunked: if array is np: pytest.skip("chunked not applicable to numpy array") arr = pa.chunked_array(arr) if array is np: msg = "Unsupported type '<class 'numpy.ndarray'>' for ArrowStringArray" else: msg = re.escape( "ArrowStringArray requires a PyArrow (chunked) array of string type" ) with pytest.raises(ValueError, match=msg): ArrowStringArray(arr)
def test_setitem_invalid_indexer_raises(): import pyarrow as pa arr = ArrowStringArray(pa.array(list("abcde"))) with pytest.raises(IndexError, match=None): arr[5] = "foo" with pytest.raises(IndexError, match=None): arr[-6] = "foo" with pytest.raises(IndexError, match=None): arr[[0, 5]] = "foo" with pytest.raises(IndexError, match=None): arr[[0, -6]] = "foo" with pytest.raises(IndexError, match=None): arr[[True, True, False]] = "foo" with pytest.raises(ValueError, match=None): arr[[0, 1]] = ["foo", "bar", "baz"]
def test_from_sequence_wrong_dtype_raises(): with pd.option_context("string_storage", "python"): ArrowStringArray._from_sequence(["a", None, "c"], dtype="string") with pd.option_context("string_storage", "pyarrow"): ArrowStringArray._from_sequence(["a", None, "c"], dtype="string") with pytest.raises(AssertionError, match=None): ArrowStringArray._from_sequence(["a", None, "c"], dtype="string[python]") ArrowStringArray._from_sequence(["a", None, "c"], dtype="string[pyarrow]") with pytest.raises(AssertionError, match=None): with pd.option_context("string_storage", "python"): ArrowStringArray._from_sequence(["a", None, "c"], dtype=StringDtype()) with pd.option_context("string_storage", "pyarrow"): ArrowStringArray._from_sequence(["a", None, "c"], dtype=StringDtype()) with pytest.raises(AssertionError, match=None): ArrowStringArray._from_sequence(["a", None, "c"], dtype=StringDtype("python")) ArrowStringArray._from_sequence(["a", None, "c"], dtype=StringDtype("pyarrow")) with pd.option_context("string_storage", "python"): StringArray._from_sequence(["a", None, "c"], dtype="string") with pd.option_context("string_storage", "pyarrow"): StringArray._from_sequence(["a", None, "c"], dtype="string") StringArray._from_sequence(["a", None, "c"], dtype="string[python]") with pytest.raises(AssertionError, match=None): StringArray._from_sequence(["a", None, "c"], dtype="string[pyarrow]") with pd.option_context("string_storage", "python"): StringArray._from_sequence(["a", None, "c"], dtype=StringDtype()) with pytest.raises(AssertionError, match=None): with pd.option_context("string_storage", "pyarrow"): StringArray._from_sequence(["a", None, "c"], dtype=StringDtype()) StringArray._from_sequence(["a", None, "c"], dtype=StringDtype("python")) with pytest.raises(AssertionError, match=None): StringArray._from_sequence(["a", None, "c"], dtype=StringDtype("pyarrow"))