def _str_repeat(self, repeats): if is_scalar(repeats): def scalar_rep(x): try: return bytes.__mul__(x, repeats) except TypeError: return str.__mul__(x, repeats) return self._str_map(scalar_rep, dtype=str) else: from pandas.core.arrays.string_ import StringArray def rep(x, r): if x is libmissing.NA: return x try: return bytes.__mul__(x, r) except TypeError: return str.__mul__(x, r) repeats = np.asarray(repeats, dtype=object) result = libops.vec_binop(np.asarray(self), repeats, rep) if isinstance(self, StringArray): # Not going through map, so we have to do this here. result = StringArray._from_sequence(result) return result
def astype(self, dtype, copy: bool = True) -> ArrayLike: """ Cast to a NumPy array or ExtensionArray with 'dtype'. Parameters ---------- dtype : str or dtype Typecode or data-type to which the array is cast. copy : bool, default True Whether to copy the data, even if not necessary. If False, a copy is made only if the old dtype does not match the new dtype. Returns ------- ndarray or ExtensionArray NumPy ndarray, or BooleanArray, IntegerArray or FloatingArray with 'dtype' for its dtype. Raises ------ TypeError if incompatible type with an FloatingDtype, equivalent of same_kind casting """ from pandas.core.arrays.string_ import StringArray, StringDtype dtype = pandas_dtype(dtype) # if the dtype is exactly the same, we can fastpath if self.dtype == dtype: # return the same object for copy=False return self.copy() if copy else self # if we are astyping to another nullable masked dtype, we can fastpath if isinstance(dtype, BaseMaskedDtype): # TODO deal with NaNs data = self._data.astype(dtype.numpy_dtype, copy=copy) # mask is copied depending on whether the data was copied, and # not directly depending on the `copy` keyword mask = self._mask if data is self._data else self._mask.copy() return dtype.construct_array_type()(data, mask, copy=False) elif isinstance(dtype, StringDtype): return StringArray._from_sequence(self, copy=False) # coerce if is_float_dtype(dtype): # In astype, we consider dtype=float to also mean na_value=np.nan kwargs = dict(na_value=np.nan) elif is_datetime64_dtype(dtype): kwargs = dict(na_value=np.datetime64("NaT")) else: kwargs = {} data = self.to_numpy(dtype=dtype, **kwargs) return astype_nansafe(data, dtype, copy=False)
def data_for_grouping(): return StringArray._from_sequence( ["B", "B", pd.NA, pd.NA, "A", "A", "B", "C"])
def data_missing_for_sorting(): return StringArray._from_sequence(["B", pd.NA, "A"])
def data_for_sorting(): return StringArray._from_sequence(["B", "C", "A"])
def data_missing(): """Length 2 array with [NA, Valid]""" return StringArray._from_sequence([pd.NA, "A"])
def data(): strings = np.random.choice(list(string.ascii_letters), size=100) while strings[0] == strings[1]: strings = np.random.choice(list(string.ascii_letters), size=100) return StringArray._from_sequence(strings)
def test_from_sequence_wrong_dtype_raises(): with pd.option_context("string_storage", "python"): ArrowStringArray._from_sequence(["a", None, "c"], dtype="string") with pd.option_context("string_storage", "pyarrow"): ArrowStringArray._from_sequence(["a", None, "c"], dtype="string") with pytest.raises(AssertionError, match=None): ArrowStringArray._from_sequence(["a", None, "c"], dtype="string[python]") ArrowStringArray._from_sequence(["a", None, "c"], dtype="string[pyarrow]") with pytest.raises(AssertionError, match=None): with pd.option_context("string_storage", "python"): ArrowStringArray._from_sequence(["a", None, "c"], dtype=StringDtype()) with pd.option_context("string_storage", "pyarrow"): ArrowStringArray._from_sequence(["a", None, "c"], dtype=StringDtype()) with pytest.raises(AssertionError, match=None): ArrowStringArray._from_sequence(["a", None, "c"], dtype=StringDtype("python")) ArrowStringArray._from_sequence(["a", None, "c"], dtype=StringDtype("pyarrow")) with pd.option_context("string_storage", "python"): StringArray._from_sequence(["a", None, "c"], dtype="string") with pd.option_context("string_storage", "pyarrow"): StringArray._from_sequence(["a", None, "c"], dtype="string") StringArray._from_sequence(["a", None, "c"], dtype="string[python]") with pytest.raises(AssertionError, match=None): StringArray._from_sequence(["a", None, "c"], dtype="string[pyarrow]") with pd.option_context("string_storage", "python"): StringArray._from_sequence(["a", None, "c"], dtype=StringDtype()) with pytest.raises(AssertionError, match=None): with pd.option_context("string_storage", "pyarrow"): StringArray._from_sequence(["a", None, "c"], dtype=StringDtype()) StringArray._from_sequence(["a", None, "c"], dtype=StringDtype("python")) with pytest.raises(AssertionError, match=None): StringArray._from_sequence(["a", None, "c"], dtype=StringDtype("pyarrow"))
def data_for_grouping(): return StringArray._from_sequence( ["B", "B", np.nan, np.nan, "A", "A", "B", "C"])