def astype_array(values: ArrayLike, dtype: DtypeObj, copy: bool = False) -> ArrayLike: """ Cast array (ndarray or ExtensionArray) to the new dtype. Parameters ---------- values : ndarray or ExtensionArray dtype : dtype object copy : bool, default False copy if indicated Returns ------- ndarray or ExtensionArray """ if ( values.dtype.kind in ["m", "M"] and dtype.kind in ["i", "u"] and isinstance(dtype, np.dtype) and dtype.itemsize != 8 ): # TODO(2.0) remove special case once deprecation on DTA/TDA is enforced msg = rf"cannot astype a datetimelike from [{values.dtype}] to [{dtype}]" raise TypeError(msg) if is_datetime64tz_dtype(dtype) and is_datetime64_dtype(values.dtype): return astype_dt64_to_dt64tz(values, dtype, copy, via_utc=True) if is_dtype_equal(values.dtype, dtype): if copy: return values.copy() return values if not isinstance(values, np.ndarray): # i.e. ExtensionArray values = values.astype(dtype, copy=copy) else: values = astype_nansafe(values, dtype, copy=copy) # in pandas we don't store numpy str dtypes, so convert to object if isinstance(dtype, np.dtype) and issubclass(values.dtype.type, str): values = np.array(values, dtype=object) return values
def na_accum_func(values: ArrayLike, accum_func, skipna: bool) -> ArrayLike: """ Cumulative function with skipna support. Parameters ---------- values : np.ndarray or ExtensionArray accum_func : {np.cumprod, np.maximum.accumulate, np.cumsum, np.minimum.accumulate} skipna : bool Returns ------- np.ndarray or ExtensionArray """ mask_a, mask_b = { np.cumprod: (1.0, np.nan), np.maximum.accumulate: (-np.inf, np.nan), np.cumsum: (0.0, np.nan), np.minimum.accumulate: (np.inf, np.nan), }[accum_func] # We will be applying this function to block values if values.dtype.kind in ["m", "M"]: # GH#30460, GH#29058 # numpy 1.18 started sorting NaTs at the end instead of beginning, # so we need to work around to maintain backwards-consistency. orig_dtype = values.dtype # We need to define mask before masking NaTs mask = isna(values) if accum_func == np.minimum.accumulate: # Note: the accum_func comparison fails as an "is" comparison y = values.view("i8") y[mask] = np.iinfo(np.int64).max changed = True else: y = values changed = False result = accum_func(y.view("i8"), axis=0) if skipna: result[mask] = iNaT elif accum_func == np.minimum.accumulate: # Restore NaTs that we masked previously nz = (~np.asarray(mask)).nonzero()[0] if len(nz): # everything up to the first non-na entry stays NaT result[: nz[0]] = iNaT if changed: # restore NaT elements y[mask] = iNaT # TODO: could try/finally for this? if isinstance(values, np.ndarray): result = result.view(orig_dtype) else: # DatetimeArray result = type(values)._from_sequence(result, dtype=orig_dtype) elif skipna and not issubclass(values.dtype.type, (np.integer, np.bool_)): vals = values.copy() mask = isna(vals) vals[mask] = mask_a result = accum_func(vals, axis=0) result[mask] = mask_b else: result = accum_func(values, axis=0) return result
def astype_dt64_to_dt64tz( values: ArrayLike, dtype: DtypeObj, copy: bool, via_utc: bool = False ) -> DatetimeArray: # GH#33401 we have inconsistent behaviors between # Datetimeindex[naive].astype(tzaware) # Series[dt64].astype(tzaware) # This collects them in one place to prevent further fragmentation. from pandas.core.construction import ensure_wrapped_if_datetimelike values = ensure_wrapped_if_datetimelike(values) values = cast("DatetimeArray", values) aware = isinstance(dtype, DatetimeTZDtype) if via_utc: # Series.astype behavior # caller is responsible for checking this assert values.tz is None and aware dtype = cast(DatetimeTZDtype, dtype) if copy: # this should be the only copy values = values.copy() warnings.warn( "Using .astype to convert from timezone-naive dtype to " "timezone-aware dtype is deprecated and will raise in a " "future version. Use ser.dt.tz_localize instead.", FutureWarning, stacklevel=find_stack_level(), ) # GH#33401 this doesn't match DatetimeArray.astype, which # goes through the `not via_utc` path return values.tz_localize("UTC").tz_convert(dtype.tz) else: # DatetimeArray/DatetimeIndex.astype behavior if values.tz is None and aware: dtype = cast(DatetimeTZDtype, dtype) warnings.warn( "Using .astype to convert from timezone-naive dtype to " "timezone-aware dtype is deprecated and will raise in a " "future version. Use obj.tz_localize instead.", FutureWarning, stacklevel=find_stack_level(), ) return values.tz_localize(dtype.tz) elif aware: # GH#18951: datetime64_tz dtype but not equal means different tz dtype = cast(DatetimeTZDtype, dtype) result = values.tz_convert(dtype.tz) if copy: result = result.copy() return result elif values.tz is not None: warnings.warn( "Using .astype to convert from timezone-aware dtype to " "timezone-naive dtype is deprecated and will raise in a " "future version. Use obj.tz_localize(None) or " "obj.tz_convert('UTC').tz_localize(None) instead", FutureWarning, stacklevel=find_stack_level(), ) result = values.tz_convert("UTC").tz_localize(None) if copy: result = result.copy() return result raise NotImplementedError("dtype_equal case should be handled elsewhere")
def astype_array_safe(values: ArrayLike, dtype, copy: bool = False, errors: IgnoreRaise = "raise") -> ArrayLike: """ Cast array (ndarray or ExtensionArray) to the new dtype. This basically is the implementation for DataFrame/Series.astype and includes all custom logic for pandas (NaN-safety, converting str to object, not allowing ) Parameters ---------- values : ndarray or ExtensionArray dtype : str, dtype convertible copy : bool, default False copy if indicated errors : str, {'raise', 'ignore'}, default 'raise' - ``raise`` : allow exceptions to be raised - ``ignore`` : suppress exceptions. On error return original object Returns ------- ndarray or ExtensionArray """ errors_legal_values = ("raise", "ignore") if errors not in errors_legal_values: invalid_arg = ( "Expected value of kwarg 'errors' to be one of " f"{list(errors_legal_values)}. Supplied value is '{errors}'") raise ValueError(invalid_arg) if inspect.isclass(dtype) and issubclass(dtype, ExtensionDtype): msg = (f"Expected an instance of {dtype.__name__}, " "but got the class instead. Try instantiating 'dtype'.") raise TypeError(msg) dtype = pandas_dtype(dtype) if isinstance(dtype, PandasDtype): # Ensure we don't end up with a PandasArray dtype = dtype.numpy_dtype if (is_datetime64_dtype(values.dtype) # need to do np.dtype check instead of is_datetime64_dtype # otherwise pyright complains and isinstance(dtype, np.dtype) and dtype.kind == "M" and not is_unitless(dtype) and not is_dtype_equal(dtype, values.dtype)): # unit conversion, we would re-cast to nanosecond, so this is # effectively just a copy (regardless of copy kwd) # TODO(2.0): remove special-case return values.copy() try: new_values = astype_array(values, dtype, copy=copy) except (ValueError, TypeError): # e.g. astype_nansafe can fail on object-dtype of strings # trying to convert to float if errors == "ignore": new_values = values else: raise return new_values