def _cmp_method(self, other, op): from pandas.arrays import BooleanArray if isinstance(other, StringArray): other = other._ndarray mask = isna(self) | isna(other) valid = ~mask if not lib.is_scalar(other): if len(other) != len(self): # prevent improper broadcasting when other is 2D raise ValueError( f"Lengths of operands do not match: {len(self)} != {len(other)}" ) other = np.asarray(other) other = other[valid] if op.__name__ in ops.ARITHMETIC_BINOPS: result = np.empty_like(self._ndarray, dtype="object") result[mask] = StringDtype.na_value result[valid] = op(self._ndarray[valid], other) return StringArray(result) else: # logical result = np.zeros(len(self._ndarray), dtype="bool") result[valid] = op(self._ndarray[valid], other) return BooleanArray(result, mask)
def _str_map(self, f, na_value=None, dtype: Optional[Dtype] = None): from pandas.arrays import ( BooleanArray, IntegerArray, StringArray, ) from pandas.core.arrays.string_ import StringDtype if dtype is None: dtype = StringDtype() if na_value is None: na_value = self.dtype.na_value mask = isna(self) arr = np.asarray(self) if is_integer_dtype(dtype) or is_bool_dtype(dtype): constructor: Union[Type[IntegerArray], Type[BooleanArray]] if is_integer_dtype(dtype): constructor = IntegerArray else: constructor = BooleanArray na_value_is_na = isna(na_value) if na_value_is_na: na_value = 1 result = lib.map_infer_mask( arr, f, mask.view("uint8"), convert=False, na_value=na_value, # error: Value of type variable "_DTypeScalar" of "dtype" cannot be # "object" # error: Argument 1 to "dtype" has incompatible type # "Union[ExtensionDtype, str, dtype[Any], Type[object]]"; expected # "Type[object]" dtype=np.dtype(dtype), # type: ignore[type-var,arg-type] ) if not na_value_is_na: mask[:] = False return constructor(result, mask) elif is_string_dtype(dtype) and not is_object_dtype(dtype): # i.e. StringDtype result = lib.map_infer_mask(arr, f, mask.view("uint8"), convert=False, na_value=na_value) return StringArray(result) else: # This is when the result type is object. We reach this when # -> We know the result type is truly object (e.g. .encode returns bytes # or .findall returns a list). # -> We don't know the result type. E.g. `.get` can return anything. return lib.map_infer_mask(arr, f, mask.view("uint8"))
def _str_map(self, f, na_value=None, dtype: Optional[Dtype] = None): from pandas.arrays import ( BooleanArray, IntegerArray, StringArray, ) from pandas.core.arrays.string_ import StringDtype if dtype is None: dtype = StringDtype() if na_value is None: na_value = self.dtype.na_value mask = isna(self) arr = np.asarray(self) if is_integer_dtype(dtype) or is_bool_dtype(dtype): constructor: Union[Type[IntegerArray], Type[BooleanArray]] if is_integer_dtype(dtype): constructor = IntegerArray else: constructor = BooleanArray na_value_is_na = isna(na_value) if na_value_is_na: na_value = 1 result = lib.map_infer_mask( arr, f, mask.view("uint8"), convert=False, na_value=na_value, dtype=np.dtype(dtype), ) if not na_value_is_na: mask[:] = False return constructor(result, mask) elif is_string_dtype(dtype) and not is_object_dtype(dtype): # i.e. StringDtype result = lib.map_infer_mask( arr, f, mask.view("uint8"), convert=False, na_value=na_value ) return StringArray(result) else: # This is when the result type is object. We reach this when # -> We know the result type is truly object (e.g. .encode returns bytes # or .findall returns a list). # -> We don't know the result type. E.g. `.get` can return anything. return lib.map_infer_mask(arr, f, mask.view("uint8"))
def astype(self, dtype: Any, copy: bool = True, casting: str = "unsafe") -> Any: _logger.debug("RLEArray.astype(dtype=%r, copy=%r)", dtype, copy) if isinstance(dtype, RLEDtype): if (not copy) and (dtype == self.dtype): return self return RLEArray( data=self._data.astype(dtype._dtype, casting=casting), positions=self._positions.copy(), ) if isinstance(dtype, pd.StringDtype): # TODO: fast-path return StringArray._from_sequence([str(x) for x in self]) if casting != "unsafe": return np.array(self, copy=copy).astype(dtype=dtype, casting=casting) else: return np.array(self, dtype=dtype, copy=copy)
pd.CategoricalDtype(None, ordered=True), pd.Categorical(["a", "b"], ordered=True), ), # Interval ( [pd.Interval(1, 2), pd.Interval(3, 4)], "interval", IntervalArray.from_tuples([(1, 2), (3, 4)]), ), # Sparse ([0, 1], "Sparse[int64]", SparseArray([0, 1], dtype="int64")), # IntegerNA ([1, None], "Int16", integer_array([1, None], dtype="Int16")), (pd.Series([1, 2]), None, PandasArray(np.array([1, 2], dtype=np.int64))), # String (["a", None], "string", StringArray._from_sequence(["a", None])), (["a", None], pd.StringDtype(), StringArray._from_sequence(["a", None])), # Boolean ([True, None], "boolean", BooleanArray._from_sequence([True, None])), ([True, None], pd.BooleanDtype(), BooleanArray._from_sequence([True, None])), # Index (pd.Index([1, 2]), None, PandasArray(np.array([1, 2], dtype=np.int64))), # Series[EA] returns the EA ( pd.Series(pd.Categorical(["a", "b"], categories=["a", "b", "c"])), None, pd.Categorical(["a", "b"], categories=["a", "b", "c"]), ), # "3rd party" EAs work ([decimal.Decimal(0), decimal.Decimal(1)], "decimal", to_decimal([0, 1])), # pass an ExtensionArray, but a different dtype
def peakmem_stringarray_construction(self): StringArray(self.series_arr)
def time_string_array_with_nan_construction(self): StringArray(self.series_arr_nan)
def time_string_array_construction(self): StringArray(self.series_arr)
na_value = 1 result = lib.map_infer_mask( arr, f, mask.view("uint8"), convert=False, na_value=na_value, dtype=np.dtype(dtype), ) if not na_value_is_na: mask[:] = False return constructor(result, mask) elif is_string_dtype(dtype) and not is_object_dtype(dtype): # i.e. StringDtype result = lib.map_infer_mask( arr, f, mask.view("uint8"), convert=False, na_value=na_value ) return StringArray(result) else: # This is when the result type is object. We reach this when # -> We know the result type is truly object (e.g. .encode returns bytes # or .findall returns a list). # -> We don't know the result type. E.g. `.get` can return anything. return lib.map_infer_mask(arr, f, mask.view("uint8")) StringArray._add_arithmetic_ops()
f, mask.view("uint8"), convert=False, na_value=na_value, dtype=np.dtype(dtype), ) if not na_value_is_na: mask[:] = False return constructor(result, mask) elif is_string_dtype(dtype) and not is_object_dtype(dtype): # i.e. StringDtype result = lib.map_infer_mask(arr, f, mask.view("uint8"), convert=False, na_value=na_value) return StringArray(result) else: # This is when the result type is object. We reach this when # -> We know the result type is truly object (e.g. .encode returns bytes # or .findall returns a list). # -> We don't know the result type. E.g. `.get` can return anything. return lib.map_infer_mask(arr, f, mask.view("uint8")) StringArray._add_arithmetic_ops() StringArray._add_comparison_ops()