def method(self, other): if isinstance(other, (ABCIndexClass, ABCSeries, ABCDataFrame)): return NotImplemented elif isinstance(other, cls): other = other._ndarray mask = isna(self) | isna(other) valid = ~mask if not lib.is_scalar(other): if len(other) != len(self): # prevent improper broadcasting when other is 2D raise ValueError( f"Lengths of operands do not match: {len(self)} != {len(other)}" ) other = np.asarray(other) other = other[valid] result = np.empty_like(self._ndarray, dtype="object") result[mask] = StringDtype.na_value result[valid] = op(self._ndarray[valid], other) if op.__name__ in {"add", "radd", "mul", "rmul"}: return StringArray(result) else: dtype = "object" if mask.any() else "bool" return np.asarray(result, dtype=dtype)
def _cmp_method(self, other, op): from pandas.arrays import BooleanArray if isinstance(other, StringArray): other = other._ndarray mask = isna(self) | isna(other) valid = ~mask if not lib.is_scalar(other): if len(other) != len(self): # prevent improper broadcasting when other is 2D raise ValueError( f"Lengths of operands do not match: {len(self)} != {len(other)}" ) other = np.asarray(other) other = other[valid] if op.__name__ in ops.ARITHMETIC_BINOPS: result = np.empty_like(self._ndarray, dtype="object") result[mask] = StringDtype.na_value result[valid] = op(self._ndarray[valid], other) return StringArray(result) else: # logical result = np.zeros(len(self._ndarray), dtype="bool") result[valid] = op(self._ndarray[valid], other) return BooleanArray(result, mask)
def _str_map(self, f, na_value=None, dtype: Optional[Dtype] = None): from pandas.arrays import ( BooleanArray, IntegerArray, StringArray, ) from pandas.core.arrays.string_ import StringDtype if dtype is None: dtype = StringDtype() if na_value is None: na_value = self.dtype.na_value mask = isna(self) arr = np.asarray(self) if is_integer_dtype(dtype) or is_bool_dtype(dtype): constructor: Union[Type[IntegerArray], Type[BooleanArray]] if is_integer_dtype(dtype): constructor = IntegerArray else: constructor = BooleanArray na_value_is_na = isna(na_value) if na_value_is_na: na_value = 1 result = lib.map_infer_mask( arr, f, mask.view("uint8"), convert=False, na_value=na_value, # error: Value of type variable "_DTypeScalar" of "dtype" cannot be # "object" # error: Argument 1 to "dtype" has incompatible type # "Union[ExtensionDtype, str, dtype[Any], Type[object]]"; expected # "Type[object]" dtype=np.dtype(dtype), # type: ignore[type-var,arg-type] ) if not na_value_is_na: mask[:] = False return constructor(result, mask) elif is_string_dtype(dtype) and not is_object_dtype(dtype): # i.e. StringDtype result = lib.map_infer_mask(arr, f, mask.view("uint8"), convert=False, na_value=na_value) return StringArray(result) else: # This is when the result type is object. We reach this when # -> We know the result type is truly object (e.g. .encode returns bytes # or .findall returns a list). # -> We don't know the result type. E.g. `.get` can return anything. return lib.map_infer_mask(arr, f, mask.view("uint8"))
def _str_map(self, f, na_value=None, dtype: Dtype | None = None, convert: bool = True): from pandas.arrays import BooleanArray if dtype is None: dtype = StringDtype(storage="python") if na_value is None: na_value = self.dtype.na_value mask = isna(self) arr = np.asarray(self) if is_integer_dtype(dtype) or is_bool_dtype(dtype): constructor: type[IntegerArray] | type[BooleanArray] if is_integer_dtype(dtype): constructor = IntegerArray else: constructor = BooleanArray na_value_is_na = isna(na_value) if na_value_is_na: na_value = 1 result = lib.map_infer_mask( arr, f, mask.view("uint8"), convert=False, na_value=na_value, # error: Argument 1 to "dtype" has incompatible type # "Union[ExtensionDtype, str, dtype[Any], Type[object]]"; expected # "Type[object]" dtype=np.dtype(dtype), # type: ignore[arg-type] ) if not na_value_is_na: mask[:] = False return constructor(result, mask) elif is_string_dtype(dtype) and not is_object_dtype(dtype): # i.e. StringDtype result = lib.map_infer_mask(arr, f, mask.view("uint8"), convert=False, na_value=na_value) return StringArray(result) else: # This is when the result type is object. We reach this when # -> We know the result type is truly object (e.g. .encode returns bytes # or .findall returns a list). # -> We don't know the result type. E.g. `.get` can return anything. return lib.map_infer_mask(arr, f, mask.view("uint8"))
def _str_map(self, f, na_value=None, dtype: Optional[Dtype] = None): from pandas.arrays import ( BooleanArray, IntegerArray, StringArray, ) from pandas.core.arrays.string_ import StringDtype if dtype is None: dtype = StringDtype() if na_value is None: na_value = self.dtype.na_value mask = isna(self) arr = np.asarray(self) if is_integer_dtype(dtype) or is_bool_dtype(dtype): constructor: Union[Type[IntegerArray], Type[BooleanArray]] if is_integer_dtype(dtype): constructor = IntegerArray else: constructor = BooleanArray na_value_is_na = isna(na_value) if na_value_is_na: na_value = 1 result = lib.map_infer_mask( arr, f, mask.view("uint8"), convert=False, na_value=na_value, dtype=np.dtype(dtype), ) if not na_value_is_na: mask[:] = False return constructor(result, mask) elif is_string_dtype(dtype) and not is_object_dtype(dtype): # i.e. StringDtype result = lib.map_infer_mask( arr, f, mask.view("uint8"), convert=False, na_value=na_value ) return StringArray(result) else: # This is when the result type is object. We reach this when # -> We know the result type is truly object (e.g. .encode returns bytes # or .findall returns a list). # -> We don't know the result type. E.g. `.get` can return anything. return lib.map_infer_mask(arr, f, mask.view("uint8"))
def __setitem__(self, key, value): value = extract_array(value, extract_numpy=True) if isinstance(value, type(self)): # extract_array doesn't extract PandasArray subclasses value = value._ndarray key = check_array_indexer(self, key) scalar_key = lib.is_scalar(key) scalar_value = lib.is_scalar(value) if scalar_key and not scalar_value: raise ValueError("setting an array element with a sequence.") # validate new items if scalar_value: if isna(value): value = StringDtype.na_value elif not isinstance(value, str): raise ValueError( f"Cannot set non-string value '{value}' into a StringArray." ) else: if not is_array_like(value): value = np.asarray(value, dtype=object) if len(value) and not lib.is_string_array(value, skipna=True): raise ValueError("Must provide strings.") super().__setitem__(key, value)
def get_value(self, series, key): """ Fast lookup of value from 1-dimensional ndarray. Only use this if you know what you're doing """ s = com.values_from_object(series) try: return com.maybe_box(self, super(PeriodIndex, self).get_value(s, key), series, key) except (KeyError, IndexError): try: asdt, parsed, reso = parse_time_string(key, self.freq) grp = resolution.Resolution.get_freq_group(reso) freqn = resolution.get_freq_group(self.freq) vals = self._ndarray_values # if our data is higher resolution than requested key, slice if grp < freqn: iv = Period(asdt, freq=(grp, 1)) ord1 = iv.asfreq(self.freq, how='S').ordinal ord2 = iv.asfreq(self.freq, how='E').ordinal if ord2 < vals[0] or ord1 > vals[-1]: raise KeyError(key) pos = np.searchsorted(self._ndarray_values, [ord1, ord2]) key = slice(pos[0], pos[1] + 1) return series[key] elif grp == freqn: key = Period(asdt, freq=self.freq).ordinal return com.maybe_box(self, self._engine.get_value(s, key), series, key) else: raise KeyError(key) except TypeError: pass period = Period(key, self.freq) key = period.value if isna(period) else period.ordinal return com.maybe_box(self, self._engine.get_value(s, key), series, key)
def _from_sequence(cls, scalars, dtype=None, copy=False): if dtype: assert dtype == "string" result = np.asarray(scalars, dtype="object") if copy and result is scalars: result = result.copy() # Standardize all missing-like values to NA # TODO: it would be nice to do this in _validate / lib.is_string_array # We are already doing a scan over the values there. na_values = isna(result) if na_values.any(): if result is scalars: # force a copy now, if we haven't already result = result.copy() result[na_values] = StringDtype.na_value return cls(result)