def _from_sequence(cls, scalars, *, dtype=None, copy=False): if dtype: assert dtype == "string" from pandas.core.arrays.masked import BaseMaskedArray if isinstance(scalars, BaseMaskedArray): # avoid costly conversion to object dtype na_values = scalars._mask result = scalars._data result = lib.ensure_string_array(result, copy=copy, convert_na_value=False) result[na_values] = StringDtype.na_value else: # convert non-na-likes to str, and nan-likes to StringDtype.na_value result = lib.ensure_string_array(scalars, na_value=StringDtype.na_value, copy=copy) # Manually creating new array avoids the validation step in the __init__, so is # faster. Refactor need for validation? new_string_array = object.__new__(cls) new_string_array._dtype = StringDtype() new_string_array._ndarray = result return new_string_array
def _from_sequence(cls, scalars, *, dtype: Dtype | None = None, copy=False): if dtype and not (isinstance(dtype, str) and dtype == "string"): dtype = pandas_dtype(dtype) assert isinstance(dtype, StringDtype) and dtype.storage == "python" from pandas.core.arrays.masked import BaseMaskedArray if isinstance(scalars, BaseMaskedArray): # avoid costly conversion to object dtype na_values = scalars._mask result = scalars._data result = lib.ensure_string_array(result, copy=copy, convert_na_value=False) result[na_values] = StringDtype.na_value else: # convert non-na-likes to str, and nan-likes to StringDtype.na_value result = lib.ensure_string_array(scalars, na_value=StringDtype.na_value, copy=copy) # Manually creating new array avoids the validation step in the __init__, so is # faster. Refactor need for validation? new_string_array = cls.__new__(cls) NDArrayBacked.__init__(new_string_array, result, StringDtype(storage="python")) return new_string_array
def _from_sequence(cls, scalars, dtype: Dtype | None = None, copy: bool = False): from pandas.core.arrays.masked import BaseMaskedArray _chk_pyarrow_available() if dtype and not (isinstance(dtype, str) and dtype == "string"): dtype = pandas_dtype(dtype) assert isinstance(dtype, StringDtype) and dtype.storage == "pyarrow" if isinstance(scalars, BaseMaskedArray): # avoid costly conversion to object dtype in ensure_string_array and # numerical issues with Float32Dtype na_values = scalars._mask result = scalars._data result = lib.ensure_string_array(result, copy=copy, convert_na_value=False) return cls(pa.array(result, mask=na_values, type=pa.string())) # convert non-na-likes to str result = lib.ensure_string_array(scalars, copy=copy) return cls(pa.array(result, type=pa.string(), from_pandas=True))
def _from_sequence(cls, scalars, dtype: Optional[Dtype] = None, copy: bool = False): cls._chk_pyarrow_available() # convert non-na-likes to str, and nan-likes to ArrowStringDtype.na_value scalars = lib.ensure_string_array(scalars, copy=False) return cls(pa.array(scalars, type=pa.string(), from_pandas=True))
def _from_sequence(cls, scalars, dtype=None, copy=False): if dtype: assert dtype == "string" # convert non-na-likes to str, and nan-likes to StringDtype.na_value result = lib.ensure_string_array(scalars, na_value=StringDtype.na_value, copy=copy) return cls(result)
def _from_sequence(cls, scalars, dtype=None, copy=False): if dtype: assert dtype == "string" # convert non-na-likes to str, and nan-likes to StringDtype.na_value result = lib.ensure_string_array( scalars, na_value=StringDtype.na_value, copy=copy ) # Manually creating new array avoids the validation step in the __init__, so is # faster. Refactor need for validation? new_string_array = object.__new__(cls) new_string_array._dtype = StringDtype() new_string_array._ndarray = result return new_string_array
def astype_nansafe( arr: np.ndarray, dtype: DtypeObj, copy: bool = True, skipna: bool = False ) -> ArrayLike: """ Cast the elements of an array to a given dtype a nan-safe manner. Parameters ---------- arr : ndarray dtype : np.dtype or ExtensionDtype copy : bool, default True If False, a view will be attempted but may fail, if e.g. the item sizes don't align. skipna: bool, default False Whether or not we should skip NaN when casting as a string-type. Raises ------ ValueError The dtype was a datetime64/timedelta64 dtype, but it had no unit. """ # We get here with 0-dim from sparse arr = np.atleast_1d(arr) # dispatch on extension dtype if needed if isinstance(dtype, ExtensionDtype): return dtype.construct_array_type()._from_sequence(arr, dtype=dtype, copy=copy) elif not isinstance(dtype, np.dtype): # pragma: no cover raise ValueError("dtype must be np.dtype or ExtensionDtype") if arr.dtype.kind in ["m", "M"] and ( issubclass(dtype.type, str) or dtype == _dtype_obj ): from pandas.core.construction import ensure_wrapped_if_datetimelike arr = ensure_wrapped_if_datetimelike(arr) return arr.astype(dtype, copy=copy) if issubclass(dtype.type, str): shape = arr.shape if arr.ndim > 1: arr = arr.ravel() return lib.ensure_string_array( arr, skipna=skipna, convert_na_value=False ).reshape(shape) elif is_datetime64_dtype(arr.dtype): if dtype == np.int64: if isna(arr).any(): raise ValueError("Cannot convert NaT values to integer") return arr.view(dtype) # allow frequency conversions if dtype.kind == "M": return arr.astype(dtype) raise TypeError(f"cannot astype a datetimelike from [{arr.dtype}] to [{dtype}]") elif is_timedelta64_dtype(arr.dtype): if dtype == np.int64: if isna(arr).any(): raise ValueError("Cannot convert NaT values to integer") return arr.view(dtype) elif dtype.kind == "m": return astype_td64_unit_conversion(arr, dtype, copy=copy) raise TypeError(f"cannot astype a timedelta from [{arr.dtype}] to [{dtype}]") elif np.issubdtype(arr.dtype, np.floating) and is_integer_dtype(dtype): return _astype_float_to_int_nansafe(arr, dtype, copy) elif is_object_dtype(arr.dtype): # if we have a datetime/timedelta array of objects # then coerce to a proper dtype and recall astype_nansafe if is_datetime64_dtype(dtype): from pandas import to_datetime return astype_nansafe( to_datetime(arr.ravel()).values.reshape(arr.shape), dtype, copy=copy, ) elif is_timedelta64_dtype(dtype): # bc we know arr.dtype == object, this is equivalent to # `np.asarray(to_timedelta(arr))`, but using a lower-level API that # does not require a circular import. return array_to_timedelta64(arr).view("m8[ns]").astype(dtype, copy=False) if dtype.name in ("datetime64", "timedelta64"): msg = ( f"The '{dtype.name}' dtype has no unit. Please pass in " f"'{dtype.name}[ns]' instead." ) raise ValueError(msg) if copy or is_object_dtype(arr.dtype) or is_object_dtype(dtype): # Explicit copy, or required since NumPy can't view from / to object. return arr.astype(dtype, copy=True) return arr.astype(dtype, copy=copy)
def _try_cast( arr: list | np.ndarray, dtype: DtypeObj | None, copy: bool, raise_cast_failure: bool, ) -> ArrayLike: """ Convert input to numpy ndarray and optionally cast to a given dtype. Parameters ---------- arr : ndarray or list Excludes: ExtensionArray, Series, Index. dtype : np.dtype, ExtensionDtype or None copy : bool If False, don't copy the data if not needed. raise_cast_failure : bool If True, and if a dtype is specified, raise errors during casting. Otherwise an object array is returned. Returns ------- np.ndarray or ExtensionArray """ is_ndarray = isinstance(arr, np.ndarray) if dtype is None: # perf shortcut as this is the most common case if is_ndarray: arr = cast(np.ndarray, arr) if arr.dtype != object: return sanitize_to_nanoseconds(arr, copy=copy) out = maybe_infer_to_datetimelike(arr) if out is arr and copy: out = out.copy() return out else: # i.e. list varr = np.array(arr, copy=False) # filter out cases that we _dont_ want to go through # maybe_infer_to_datetimelike if varr.dtype != object or varr.size == 0: return varr return maybe_infer_to_datetimelike(varr) elif isinstance(dtype, ExtensionDtype): # create an extension array from its dtype if isinstance(dtype, DatetimeTZDtype): # We can't go through _from_sequence because it handles dt64naive # data differently; _from_sequence treats naive as wall times, # while maybe_cast_to_datetime treats it as UTC # see test_maybe_promote_any_numpy_dtype_with_datetimetz # TODO(2.0): with deprecations enforced, should be able to remove # special case. return maybe_cast_to_datetime(arr, dtype) # TODO: copy? array_type = dtype.construct_array_type()._from_sequence subarr = array_type(arr, dtype=dtype, copy=copy) return subarr elif is_object_dtype(dtype): if not is_ndarray: subarr = construct_1d_object_array_from_listlike(arr) return subarr return ensure_wrapped_if_datetimelike(arr).astype(dtype, copy=copy) elif dtype.kind == "U": # TODO: test cases with arr.dtype.kind in ["m", "M"] return lib.ensure_string_array(arr, convert_na_value=False, copy=copy) elif dtype.kind in ["m", "M"]: return maybe_cast_to_datetime(arr, dtype) try: # GH#15832: Check if we are requesting a numeric dtype and # that we can convert the data to the requested dtype. if is_integer_dtype(dtype): # this will raise if we have e.g. floats subarr = maybe_cast_to_integer_array(arr, dtype) else: # 4 tests fail if we move this to a try/except/else; see # test_constructor_compound_dtypes, test_constructor_cast_failure # test_constructor_dict_cast2, test_loc_setitem_dtype subarr = np.array(arr, dtype=dtype, copy=copy) except (ValueError, TypeError): if raise_cast_failure: raise else: # we only get here with raise_cast_failure False, which means # called via the DataFrame constructor # GH#24435 warnings.warn( f"Could not cast to {dtype}, falling back to object. This " "behavior is deprecated. In a future version, when a dtype is " "passed to 'DataFrame', either all columns will be cast to that " "dtype, or a TypeError will be raised.", FutureWarning, stacklevel=find_stack_level(), ) subarr = np.array(arr, dtype=object, copy=copy) return subarr
def astype_nansafe(arr: np.ndarray, dtype: DtypeObj, copy: bool = True, skipna: bool = False) -> ArrayLike: """ Cast the elements of an array to a given dtype a nan-safe manner. Parameters ---------- arr : ndarray dtype : np.dtype or ExtensionDtype copy : bool, default True If False, a view will be attempted but may fail, if e.g. the item sizes don't align. skipna: bool, default False Whether or not we should skip NaN when casting as a string-type. Raises ------ ValueError The dtype was a datetime64/timedelta64 dtype, but it had no unit. """ if arr.ndim > 1: flat = arr.ravel() result = astype_nansafe(flat, dtype, copy=copy, skipna=skipna) # error: Item "ExtensionArray" of "Union[ExtensionArray, ndarray]" has no # attribute "reshape" return result.reshape(arr.shape) # type: ignore[union-attr] # We get here with 0-dim from sparse arr = np.atleast_1d(arr) # dispatch on extension dtype if needed if isinstance(dtype, ExtensionDtype): return dtype.construct_array_type()._from_sequence(arr, dtype=dtype, copy=copy) elif not isinstance(dtype, np.dtype): # pragma: no cover raise ValueError("dtype must be np.dtype or ExtensionDtype") if arr.dtype.kind in ["m", "M"] and (issubclass(dtype.type, str) or dtype == _dtype_obj): from pandas.core.construction import ensure_wrapped_if_datetimelike arr = ensure_wrapped_if_datetimelike(arr) return arr.astype(dtype, copy=copy) if issubclass(dtype.type, str): return lib.ensure_string_array(arr, skipna=skipna, convert_na_value=False) elif is_datetime64_dtype(arr.dtype): if dtype == np.int64: warnings.warn( f"casting {arr.dtype} values to int64 with .astype(...) " "is deprecated and will raise in a future version. " "Use .view(...) instead.", FutureWarning, stacklevel=find_stack_level(), ) if isna(arr).any(): raise ValueError("Cannot convert NaT values to integer") return arr.view(dtype) # allow frequency conversions if dtype.kind == "M": return arr.astype(dtype) raise TypeError( f"cannot astype a datetimelike from [{arr.dtype}] to [{dtype}]") elif is_timedelta64_dtype(arr.dtype): if dtype == np.int64: warnings.warn( f"casting {arr.dtype} values to int64 with .astype(...) " "is deprecated and will raise in a future version. " "Use .view(...) instead.", FutureWarning, stacklevel=find_stack_level(), ) if isna(arr).any(): raise ValueError("Cannot convert NaT values to integer") return arr.view(dtype) elif dtype.kind == "m": return astype_td64_unit_conversion(arr, dtype, copy=copy) raise TypeError( f"cannot astype a timedelta from [{arr.dtype}] to [{dtype}]") elif np.issubdtype(arr.dtype, np.floating) and np.issubdtype( dtype, np.integer): return _astype_float_to_int_nansafe(arr, dtype, copy) elif is_object_dtype(arr.dtype): # work around NumPy brokenness, #1987 if np.issubdtype(dtype.type, np.integer): return lib.astype_intsafe(arr, dtype) # if we have a datetime/timedelta array of objects # then coerce to a proper dtype and recall astype_nansafe elif is_datetime64_dtype(dtype): from pandas import to_datetime return astype_nansafe( to_datetime(arr).values, dtype, copy=copy, ) elif is_timedelta64_dtype(dtype): from pandas import to_timedelta return astype_nansafe(to_timedelta(arr)._values, dtype, copy=copy) if dtype.name in ("datetime64", "timedelta64"): msg = (f"The '{dtype.name}' dtype has no unit. Please pass in " f"'{dtype.name}[ns]' instead.") raise ValueError(msg) if copy or is_object_dtype(arr.dtype) or is_object_dtype(dtype): # Explicit copy, or required since NumPy can't view from / to object. return arr.astype(dtype, copy=True) return arr.astype(dtype, copy=copy)