def sum(self, axis: int = 0, min_count: int = 0, *args, **kwargs) -> Scalar: """ Sum of non-NA/null values Parameters ---------- axis : int, default 0 Not Used. NumPy compatibility. min_count : int, default 0 The required number of valid values to perform the summation. If fewer than ``min_count`` valid values are present, the result will be the missing value indicator for subarray type. *args, **kwargs Not Used. NumPy compatibility. Returns ------- scalar """ nv.validate_sum(args, kwargs) valid_vals = self._valid_sp_values sp_sum = valid_vals.sum() if self._null_fill_value: if check_below_min_count(valid_vals.shape, None, min_count): return na_value_for_dtype(self.dtype.subtype, compat=False) return sp_sum else: nsparse = self.sp_index.ngaps if check_below_min_count(valid_vals.shape, None, min_count - nsparse): return na_value_for_dtype(self.dtype.subtype, compat=False) return sp_sum + self.fill_value * nsparse
def _na_for_min_count( values: np.ndarray, axis: Optional[int] ) -> Union[Scalar, np.ndarray]: """ Return the missing value for `values`. Parameters ---------- values : ndarray axis : int or None axis for the reduction, required if values.ndim > 1. Returns ------- result : scalar or ndarray For 1-D values, returns a scalar of the correct missing type. For 2-D values, returns a 1-D array where each element is missing. """ # we either return np.nan or pd.NaT if is_numeric_dtype(values): values = values.astype("float64") fill_value = na_value_for_dtype(values.dtype) if values.ndim == 1: return fill_value else: assert axis is not None # assertion to make mypy happy result_shape = values.shape[:axis] + values.shape[axis + 1 :] # calling np.full with dtype parameter throws an ValueError when called # with dtype=np.datetime64 and and fill_value=pd.NaT try: result = np.full(result_shape, fill_value, dtype=values.dtype) except ValueError: result = np.full(result_shape, fill_value) return result
def to_array(self, dtype: DtypeObj) -> ArrayLike: """ Helper function to create the actual all-NA array from the NullArrayProxy object. Parameters ---------- arr : NullArrayProxy dtype : the dtype for the resulting array Returns ------- np.ndarray or ExtensionArray """ if isinstance(dtype, ExtensionDtype): empty = dtype.construct_array_type()._from_sequence([], dtype=dtype) indexer = -np.ones(self.n, dtype=np.intp) return empty.take(indexer, allow_fill=True) else: # when introducing missing values, int becomes float, bool becomes object dtype = ensure_dtype_can_hold_na(dtype) fill_value = na_value_for_dtype(dtype) arr = np.empty(self.n, dtype=dtype) arr.fill(fill_value) return ensure_wrapped_if_datetimelike(arr)
def _na_for_min_count(values, axis): """Return the missing value for `values` Parameters ---------- values : ndarray axis : int or None axis for the reduction Returns ------- result : scalar or ndarray For 1-D values, returns a scalar of the correct missing type. For 2-D values, returns a 1-D array where each element is missing. """ # we either return np.nan or pd.NaT if is_numeric_dtype(values): values = values.astype('float64') fill_value = na_value_for_dtype(values.dtype) if values.ndim == 1: return fill_value else: result_shape = (values.shape[:axis] + values.shape[axis + 1:]) result = np.empty(result_shape, dtype=values.dtype) result.fill(fill_value) return result
def _na_for_min_count(values: np.ndarray, axis: Optional[int]) -> Union[Scalar, np.ndarray]: """ Return the missing value for `values`. Parameters ---------- values : ndarray axis : int or None axis for the reduction, required if values.ndim > 1. Returns ------- result : scalar or ndarray For 1-D values, returns a scalar of the correct missing type. For 2-D values, returns a 1-D array where each element is missing. """ # we either return np.nan or pd.NaT if is_numeric_dtype(values): values = values.astype("float64") fill_value = na_value_for_dtype(values.dtype) if fill_value is NaT: fill_value = values.dtype.type("NaT", "ns") if values.ndim == 1: return fill_value elif axis is None: return fill_value else: result_shape = values.shape[:axis] + values.shape[axis + 1:] result = np.full(result_shape, fill_value, dtype=values.dtype) return result
def quantile_compat(values: ArrayLike, qs: npt.NDArray[np.float64], interpolation: str) -> ArrayLike: """ Compute the quantiles of the given values for each quantile in `qs`. Parameters ---------- values : np.ndarray or ExtensionArray qs : np.ndarray[float64] interpolation : str Returns ------- np.ndarray or ExtensionArray """ if isinstance(values, np.ndarray): fill_value = na_value_for_dtype(values.dtype, compat=False) mask = isna(values) return _quantile_with_mask(values, mask, fill_value, qs, interpolation) else: # In general we don't want to import from arrays here; # this is temporary pending discussion in GH#41428 from pandas.core.arrays import BaseMaskedArray if isinstance(values, BaseMaskedArray): # e.g. IntegerArray, does not implement _from_factorized out = _quantile_ea_fallback(values, qs, interpolation) else: out = _quantile_ea_compat(values, qs, interpolation) return out
def _na_for_min_count(values, axis): """Return the missing value for `values` Parameters ---------- values : ndarray axis : int or None axis for the reduction Returns ------- result : scalar or ndarray For 1-D values, returns a scalar of the correct missing type. For 2-D values, returns a 1-D array where each element is missing. """ # we either return np.nan or pd.NaT if is_numeric_dtype(values): values = values.astype('float64') fill_value = na_value_for_dtype(values.dtype) if values.ndim == 1: return fill_value else: result_shape = (values.shape[:axis] + values.shape[axis + 1:]) result = np.empty(result_shape, dtype=values.dtype) result.fill(fill_value) return result
def _na_for_min_count(values, axis: Optional[int]): """ Return the missing value for `values`. Parameters ---------- values : ndarray axis : int or None axis for the reduction, required if values.ndim > 1. Returns ------- result : scalar or ndarray For 1-D values, returns a scalar of the correct missing type. For 2-D values, returns a 1-D array where each element is missing. """ # we either return np.nan or pd.NaT if is_numeric_dtype(values): values = values.astype("float64") fill_value = na_value_for_dtype(values.dtype) if values.ndim == 1: return fill_value else: assert axis is not None # assertion to make mypy happy result_shape = values.shape[:axis] + values.shape[axis + 1 :] result = np.empty(result_shape, dtype=values.dtype) result.fill(fill_value) return result
def _simple_new(cls, data, sp_index, fill_value): if not isinstance(sp_index, SparseIndex): # caller must pass SparseIndex raise ValueError('sp_index must be a SparseIndex') if fill_value is None: if sp_index.ngaps > 0: # has missing hole fill_value = np.nan else: fill_value = na_value_for_dtype(data.dtype) if (is_integer_dtype(data) and is_float(fill_value) and sp_index.ngaps > 0): # if float fill_value is being included in dense repr, # convert values to float data = data.astype(float) result = data.view(cls) if not isinstance(sp_index, SparseIndex): # caller must pass SparseIndex raise ValueError('sp_index must be a SparseIndex') result.sp_index = sp_index result._fill_value = fill_value return result
def _simple_new(cls, data, sp_index, fill_value): if not isinstance(sp_index, SparseIndex): # caller must pass SparseIndex raise ValueError('sp_index must be a SparseIndex') if fill_value is None: if sp_index.ngaps > 0: # has missing hole fill_value = np.nan else: fill_value = na_value_for_dtype(data.dtype) if (is_integer_dtype(data) and is_float(fill_value) and sp_index.ngaps > 0): # if float fill_value is being included in dense repr, # convert values to float data = data.astype(float) result = data.view(cls) if not isinstance(sp_index, SparseIndex): # caller must pass SparseIndex raise ValueError('sp_index must be a SparseIndex') result.sp_index = sp_index result._fill_value = fill_value return result
def test_na_value_for_dtype(): for dtype in [np.dtype('M8[ns]'), np.dtype('m8[ns]'), DatetimeTZDtype('datetime64[ns, US/Eastern]')]: assert na_value_for_dtype(dtype) is NaT for dtype in ['u1', 'u2', 'u4', 'u8', 'i1', 'i2', 'i4', 'i8']: assert na_value_for_dtype(np.dtype(dtype)) == 0 for dtype in ['bool']: assert na_value_for_dtype(np.dtype(dtype)) is False for dtype in ['f2', 'f4', 'f8']: assert np.isnan(na_value_for_dtype(np.dtype(dtype))) for dtype in ['O']: assert np.isnan(na_value_for_dtype(np.dtype(dtype)))
def test_na_value_for_dtype(): for dtype in [np.dtype('M8[ns]'), np.dtype('m8[ns]'), DatetimeTZDtype('datetime64[ns, US/Eastern]')]: assert na_value_for_dtype(dtype) is NaT for dtype in ['u1', 'u2', 'u4', 'u8', 'i1', 'i2', 'i4', 'i8']: assert na_value_for_dtype(np.dtype(dtype)) == 0 for dtype in ['bool']: assert na_value_for_dtype(np.dtype(dtype)) is False for dtype in ['f2', 'f4', 'f8']: assert np.isnan(na_value_for_dtype(np.dtype(dtype))) for dtype in ['O']: assert np.isnan(na_value_for_dtype(np.dtype(dtype)))
def make_sparse(arr: np.ndarray, kind="block", fill_value=None, dtype=None, copy=False): """ Convert ndarray to sparse format Parameters ---------- arr : ndarray kind : {'block', 'integer'} fill_value : NaN or another value dtype : np.dtype, optional copy : bool, default False Returns ------- (sparse_values, index, fill_value) : (ndarray, SparseIndex, Scalar) """ assert isinstance(arr, np.ndarray) if arr.ndim > 1: raise TypeError("expected dimension <= 1 data") if fill_value is None: fill_value = na_value_for_dtype(arr.dtype) if isna(fill_value): mask = notna(arr) else: # cast to object comparison to be safe if is_string_dtype(arr): arr = arr.astype(object) if is_object_dtype(arr.dtype): # element-wise equality check method in numpy doesn't treat # each element type, eg. 0, 0.0, and False are treated as # same. So we have to check the both of its type and value. mask = splib.make_mask_object_ndarray(arr, fill_value) else: mask = arr != fill_value length = len(arr) if length != len(mask): # the arr is a SparseArray indices = mask.sp_index.indices else: indices = mask.nonzero()[0].astype(np.int32) index = _make_index(length, indices, kind) sparsified_values = arr[mask] if dtype is not None: sparsified_values = astype_nansafe(sparsified_values, dtype=dtype) # TODO: copy return sparsified_values, index, fill_value
def take_nd( arr: ArrayLike, indexer, axis: int = 0, fill_value=lib.no_default, allow_fill: bool = True, ) -> ArrayLike: """ Specialized Cython take which sets NaN values in one pass This dispatches to ``take`` defined on ExtensionArrays. It does not currently dispatch to ``SparseArray.take`` for sparse ``arr``. Note: this function assumes that the indexer is a valid(ated) indexer with no out of bound indices. Parameters ---------- arr : np.ndarray or ExtensionArray Input array. indexer : ndarray 1-D array of indices to take, subarrays corresponding to -1 value indices are filed with fill_value axis : int, default 0 Axis to take from fill_value : any, default np.nan Fill value to replace -1 values with allow_fill : bool, default True If False, indexer is assumed to contain no -1 values so no filling will be done. This short-circuits computation of a mask. Result is undefined if allow_fill == False and -1 is present in indexer. Returns ------- subarray : np.ndarray or ExtensionArray May be the same type as the input, or cast to an ndarray. """ if fill_value is lib.no_default: fill_value = na_value_for_dtype(arr.dtype, compat=False) if not isinstance(arr, np.ndarray): # i.e. ExtensionArray, # includes for EA to catch DatetimeArray, TimedeltaArray if not is_1d_only_ea_obj(arr): # i.e. DatetimeArray, TimedeltaArray arr = cast("NDArrayBackedExtensionArray", arr) return arr.take(indexer, fill_value=fill_value, allow_fill=allow_fill, axis=axis) return arr.take(indexer, fill_value=fill_value, allow_fill=allow_fill) arr = np.asarray(arr) return _take_nd_ndarray(arr, indexer, axis, fill_value, allow_fill)
def make_sparse(arr, kind='block', fill_value=None, dtype=None, copy=False): """ Convert ndarray to sparse format Parameters ---------- arr : ndarray kind : {'block', 'integer'} fill_value : NaN or another value dtype : np.dtype, optional copy : bool, default False Returns ------- (sparse_values, index, fill_value) : (ndarray, SparseIndex, Scalar) """ arr = _sanitize_values(arr) if arr.ndim > 1: raise TypeError("expected dimension <= 1 data") if fill_value is None: fill_value = na_value_for_dtype(arr.dtype) if isna(fill_value): mask = notna(arr) else: # For str arrays in NumPy 1.12.0, operator!= below isn't # element-wise but just returns False if fill_value is not str, # so cast to object comparison to be safe if is_string_dtype(arr): arr = arr.astype(object) if is_object_dtype(arr.dtype): # element-wise equality check method in numpy doesn't treat # each element type, eg. 0, 0.0, and False are treated as # same. So we have to check the both of its type and value. mask = splib.make_mask_object_ndarray(arr, fill_value) else: mask = arr != fill_value length = len(arr) if length != len(mask): # the arr is a SparseArray indices = mask.sp_index.indices else: indices = mask.nonzero()[0].astype(np.int32) index = _make_index(length, indices, kind) sparsified_values = arr[mask] if dtype is not None: sparsified_values = astype_nansafe(sparsified_values, dtype=dtype) # TODO: copy return sparsified_values, index, fill_value
def make_sparse(arr, kind='block', fill_value=None): """ Convert ndarray to sparse format Parameters ---------- arr : ndarray kind : {'block', 'integer'} fill_value : NaN or another value Returns ------- (sparse_values, index) : (ndarray, SparseIndex) """ arr = _sanitize_values(arr) if arr.ndim > 1: raise TypeError("expected dimension <= 1 data") if fill_value is None: fill_value = na_value_for_dtype(arr.dtype) if isna(fill_value): mask = notna(arr) else: # For str arrays in NumPy 1.12.0, operator!= below isn't # element-wise but just returns False if fill_value is not str, # so cast to object comparison to be safe if is_string_dtype(arr): arr = arr.astype(object) if is_object_dtype(arr.dtype): # element-wise equality check method in numpy doesn't treat # each element type, eg. 0, 0.0, and False are treated as # same. So we have to check the both of its type and value. mask = splib.make_mask_object_ndarray(arr, fill_value) else: mask = arr != fill_value length = len(arr) if length != mask.size: # the arr is a SparseArray indices = mask.sp_index.indices else: indices = mask.nonzero()[0].astype(np.int32) index = _make_index(length, indices, kind) sparsified_values = arr[mask] return sparsified_values, index, fill_value
def take_nd( arr: ArrayLike, indexer, axis: int = 0, out: Optional[np.ndarray] = None, fill_value=lib.no_default, allow_fill: bool = True, ) -> ArrayLike: """ Specialized Cython take which sets NaN values in one pass This dispatches to ``take`` defined on ExtensionArrays. It does not currently dispatch to ``SparseArray.take`` for sparse ``arr``. Parameters ---------- arr : np.ndarray or ExtensionArray Input array. indexer : ndarray 1-D array of indices to take, subarrays corresponding to -1 value indices are filed with fill_value axis : int, default 0 Axis to take from out : ndarray or None, default None Optional output array, must be appropriate type to hold input and fill_value together, if indexer has any -1 value entries; call maybe_promote to determine this type for any fill_value fill_value : any, default np.nan Fill value to replace -1 values with allow_fill : boolean, default True If False, indexer is assumed to contain no -1 values so no filling will be done. This short-circuits computation of a mask. Result is undefined if allow_fill == False and -1 is present in indexer. Returns ------- subarray : np.ndarray or ExtensionArray May be the same type as the input, or cast to an ndarray. """ if fill_value is lib.no_default: fill_value = na_value_for_dtype(arr.dtype, compat=False) if not isinstance(arr, np.ndarray): # i.e. ExtensionArray, # includes for EA to catch DatetimeArray, TimedeltaArray return arr.take(indexer, fill_value=fill_value, allow_fill=allow_fill) arr = np.asarray(arr) return _take_nd_ndarray(arr, indexer, axis, out, fill_value, allow_fill)
def f(values, axis=None, skipna=True, **kwds): if len(self.kwargs) > 0: for k, v in compat.iteritems(self.kwargs): if k not in kwds: kwds[k] = v try: if values.size == 0: # we either return np.nan or pd.NaT if is_numeric_dtype(values): values = values.astype('float64') fill_value = na_value_for_dtype(values.dtype) if values.ndim == 1: return fill_value else: result_shape = (values.shape[:axis] + values.shape[axis + 1:]) result = np.empty(result_shape, dtype=values.dtype) result.fill(fill_value) return result if (_USE_BOTTLENECK and skipna and _bn_ok_dtype(values.dtype, bn_name)): result = bn_func(values, axis=axis, **kwds) # prefer to treat inf/-inf as NA, but must compute the func # twice :( if _has_infs(result): result = alt(values, axis=axis, skipna=skipna, **kwds) else: result = alt(values, axis=axis, skipna=skipna, **kwds) except Exception: try: result = alt(values, axis=axis, skipna=skipna, **kwds) except ValueError as e: # we want to transform an object array # ValueError message to the more typical TypeError # e.g. this is normally a disallowed function on # object arrays that contain strings if is_object_dtype(values): raise TypeError(e) raise return result
def f(values, axis=None, skipna=True, **kwds): if len(self.kwargs) > 0: for k, v in compat.iteritems(self.kwargs): if k not in kwds: kwds[k] = v try: if values.size == 0: # we either return np.nan or pd.NaT if is_numeric_dtype(values): values = values.astype('float64') fill_value = na_value_for_dtype(values.dtype) if values.ndim == 1: return fill_value else: result_shape = (values.shape[:axis] + values.shape[axis + 1:]) result = np.empty(result_shape, dtype=values.dtype) result.fill(fill_value) return result if (_USE_BOTTLENECK and skipna and _bn_ok_dtype(values.dtype, bn_name)): result = bn_func(values, axis=axis, **kwds) # prefer to treat inf/-inf as NA, but must compute the func # twice :( if _has_infs(result): result = alt(values, axis=axis, skipna=skipna, **kwds) else: result = alt(values, axis=axis, skipna=skipna, **kwds) except Exception: try: result = alt(values, axis=axis, skipna=skipna, **kwds) except ValueError as e: # we want to transform an object array # ValueError message to the more typical TypeError # e.g. this is normally a disallowed function on # object arrays that contain strings if is_object_dtype(values): raise TypeError(e) raise return result
def __init__(self, dtype: Dtype = np.float64, fill_value: Any = None): if isinstance(dtype, type(self)): if fill_value is None: fill_value = dtype.fill_value dtype = dtype.subtype dtype = pandas_dtype(dtype) if is_string_dtype(dtype): dtype = np.dtype("object") if fill_value is None: fill_value = na_value_for_dtype(dtype) self._dtype = dtype self._fill_value = fill_value self._check_fill_value()
def __init__(self, dtype: Dtype = np.float64, fill_value: Any = None) -> None: if isinstance(dtype, type(self)): if fill_value is None: fill_value = dtype.fill_value dtype = dtype.subtype dtype = pandas_dtype(dtype) if is_string_dtype(dtype): dtype = np.dtype("object") if fill_value is None: fill_value = na_value_for_dtype(dtype) if not is_scalar(fill_value): raise ValueError(f"fill_value must be a scalar. Got {fill_value} instead") self._dtype = dtype self._fill_value = fill_value
def test_value_counts_with_normalize(self, data): # GH 33172 data = data[:10].unique() values = np.array(data[~data.isna()]) ser = pd.Series(data, dtype=data.dtype) result = ser.value_counts(normalize=True).sort_index() if not isinstance(data, pd.Categorical): expected = pd.Series([1 / len(values)] * len(values), index=result.index) else: expected = pd.Series(0.0, index=result.index) expected[result > 0] = 1 / len(values) if na_value_for_dtype(data.dtype) is pd.NA: # TODO(GH#44692): avoid special-casing expected = expected.astype("Float64") self.assert_series_equal(result, expected)
def make_sparse(arr, kind='block', fill_value=None): """ Convert ndarray to sparse format Parameters ---------- arr : ndarray kind : {'block', 'integer'} fill_value : NaN or another value Returns ------- (sparse_values, index) : (ndarray, SparseIndex) """ arr = _sanitize_values(arr) if arr.ndim > 1: raise TypeError("expected dimension <= 1 data") if fill_value is None: fill_value = na_value_for_dtype(arr.dtype) if isna(fill_value): mask = notna(arr) else: # For str arrays in NumPy 1.12.0, operator!= below isn't # element-wise but just returns False if fill_value is not str, # so cast to object comparison to be safe if is_string_dtype(arr): arr = arr.astype(object) mask = arr != fill_value length = len(arr) if length != mask.size: # the arr is a SparseArray indices = mask.sp_index.indices else: indices = mask.nonzero()[0].astype(np.int32) index = _make_index(length, indices, kind) sparsified_values = arr[mask] return sparsified_values, index, fill_value
def interpolate_2d_with_fill( data: np.ndarray, # floating dtype index: Index, axis: int, method: str = "linear", limit: int | None = None, limit_direction: str = "forward", limit_area: str | None = None, fill_value: Any | None = None, **kwargs, ) -> np.ndarray: """ Column-wise application of interpolate_1d. Notes ----- The signature does differs from interpolate_1d because it only includes what is needed for Block.interpolate. """ # validate the interp method clean_interp_method(method, index, **kwargs) if is_valid_na_for_dtype(fill_value, data.dtype): fill_value = na_value_for_dtype(data.dtype, compat=False) def func(yvalues: np.ndarray) -> np.ndarray: # process 1-d slices in the axis direction, returning it # should the axis argument be handled below in apply_along_axis? # i.e. not an arg to interpolate_1d return interpolate_1d( xvalues=index, yvalues=yvalues, method=method, limit=limit, limit_direction=limit_direction, limit_area=limit_area, fill_value=fill_value, bounds_error=False, **kwargs, ) # interp each column independently return np.apply_along_axis(func, axis, data)
def make_sparse(arr, kind='block', fill_value=None): """ Convert ndarray to sparse format Parameters ---------- arr : ndarray kind : {'block', 'integer'} fill_value : NaN or another value Returns ------- (sparse_values, index) : (ndarray, SparseIndex) """ arr = _sanitize_values(arr) if arr.ndim > 1: raise TypeError("expected dimension <= 1 data") if fill_value is None: fill_value = na_value_for_dtype(arr.dtype) if isnull(fill_value): mask = notnull(arr) else: # For str arrays in NumPy 1.12.0, operator!= below isn't # element-wise but just returns False if fill_value is not str, # so cast to object comparison to be safe if is_string_dtype(arr): arr = arr.astype(object) mask = arr != fill_value length = len(arr) if length != mask.size: # the arr is a SparseArray indices = mask.sp_index.indices else: indices = mask.nonzero()[0].astype(np.int32) index = _make_index(length, indices, kind) sparsified_values = arr[mask] return sparsified_values, index, fill_value
def quantile_compat(values: ArrayLike, qs: np.ndarray, interpolation: str) -> ArrayLike: """ Compute the quantiles of the given values for each quantile in `qs`. Parameters ---------- values : np.ndarray or ExtensionArray qs : np.ndarray[float64] interpolation : str Returns ------- np.ndarray or ExtensionArray """ if isinstance(values, np.ndarray): fill_value = na_value_for_dtype(values.dtype, compat=False) mask = isna(values) return _quantile_with_mask(values, mask, fill_value, qs, interpolation) else: return _quantile_ea_compat(values, qs, interpolation)
def quantile_compat(values: ArrayLike, qs, interpolation: str, axis: int) -> ArrayLike: """ Compute the quantiles of the given values for each quantile in `qs`. Parameters ---------- values : np.ndarray or ExtensionArray qs : a scalar or list of the quantiles to be computed interpolation : str axis : int Returns ------- np.ndarray or ExtensionArray """ if isinstance(values, np.ndarray): fill_value = na_value_for_dtype(values.dtype, compat=False) mask = isna(values) return quantile_with_mask(values, mask, fill_value, qs, interpolation, axis) else: return quantile_ea_compat(values, qs, interpolation, axis)
def __init__(self, dtype=np.float64, fill_value=None): # type: (Union[str, np.dtype, 'ExtensionDtype', type], Any) -> None from pandas.core.dtypes.missing import na_value_for_dtype from pandas.core.dtypes.common import (pandas_dtype, is_string_dtype, is_scalar) if isinstance(dtype, type(self)): if fill_value is None: fill_value = dtype.fill_value dtype = dtype.subtype dtype = pandas_dtype(dtype) if is_string_dtype(dtype): dtype = np.dtype('object') if fill_value is None: fill_value = na_value_for_dtype(dtype) if not is_scalar(fill_value): raise ValueError("fill_value must be a scalar. Got {} " "instead".format(fill_value)) self._dtype = dtype self._fill_value = fill_value
def __init__(self, dtype=np.float64, fill_value=None): # type: (Union[str, np.dtype, 'ExtensionDtype', type], Any) -> None from pandas.core.dtypes.missing import na_value_for_dtype from pandas.core.dtypes.common import ( pandas_dtype, is_string_dtype, is_scalar ) if isinstance(dtype, type(self)): if fill_value is None: fill_value = dtype.fill_value dtype = dtype.subtype dtype = pandas_dtype(dtype) if is_string_dtype(dtype): dtype = np.dtype('object') if fill_value is None: fill_value = na_value_for_dtype(dtype) if not is_scalar(fill_value): raise ValueError("fill_value must be a scalar. Got {} " "instead".format(fill_value)) self._dtype = dtype self._fill_value = fill_value
def __init__(self, data, sparse_index=None, index=None, fill_value=None, kind='integer', dtype=None, copy=False): from pandas.core.internals import SingleBlockManager if isinstance(data, SingleBlockManager): data = data.internal_values() if fill_value is None and isinstance(dtype, SparseDtype): fill_value = dtype.fill_value if isinstance(data, (type(self), ABCSparseSeries)): # disable normal inference on dtype, sparse_index, & fill_value if sparse_index is None: sparse_index = data.sp_index if fill_value is None: fill_value = data.fill_value if dtype is None: dtype = data.dtype # TODO: make kind=None, and use data.kind? data = data.sp_values # Handle use-provided dtype if isinstance(dtype, compat.string_types): # Two options: dtype='int', regular numpy dtype # or dtype='Sparse[int]', a sparse dtype try: dtype = SparseDtype.construct_from_string(dtype) except TypeError: dtype = pandas_dtype(dtype) if isinstance(dtype, SparseDtype): if fill_value is None: fill_value = dtype.fill_value dtype = dtype.subtype if index is not None and not is_scalar(data): raise Exception("must only pass scalars with an index ") if is_scalar(data): if index is not None: if data is None: data = np.nan if index is not None: npoints = len(index) elif sparse_index is None: npoints = 1 else: npoints = sparse_index.length dtype = infer_dtype_from_scalar(data)[0] data = construct_1d_arraylike_from_scalar(data, npoints, dtype) if dtype is not None: dtype = pandas_dtype(dtype) # TODO: disentangle the fill_value dtype inference from # dtype inference if data is None: # XXX: What should the empty dtype be? Object or float? data = np.array([], dtype=dtype) if not is_array_like(data): try: # probably shared code in sanitize_series from pandas.core.series import _sanitize_array data = _sanitize_array(data, index=None) except ValueError: # NumPy may raise a ValueError on data like [1, []] # we retry with object dtype here. if dtype is None: dtype = object data = np.atleast_1d(np.asarray(data, dtype=dtype)) else: raise if copy: # TODO: avoid double copy when dtype forces cast. data = data.copy() if fill_value is None: fill_value_dtype = data.dtype if dtype is None else dtype if fill_value_dtype is None: fill_value = np.nan else: fill_value = na_value_for_dtype(fill_value_dtype) if isinstance(data, type(self)) and sparse_index is None: sparse_index = data._sparse_index sparse_values = np.asarray(data.sp_values, dtype=dtype) elif sparse_index is None: sparse_values, sparse_index, fill_value = make_sparse( data, kind=kind, fill_value=fill_value, dtype=dtype) else: sparse_values = np.asarray(data, dtype=dtype) if len(sparse_values) != sparse_index.npoints: raise AssertionError( "Non array-like type {type} must " "have the same length as the index".format( type=type(sparse_values))) self._sparse_index = sparse_index self._sparse_values = sparse_values self._dtype = SparseDtype(sparse_values.dtype, fill_value)
def _interpolate_2d_with_fill( data: np.ndarray, # floating dtype index: Index, axis: int, method: str = "linear", limit: int | None = None, limit_direction: str = "forward", limit_area: str | None = None, fill_value: Any | None = None, **kwargs, ) -> None: """ Column-wise application of _interpolate_1d. Notes ----- Alters 'data' in-place. The signature does differ from _interpolate_1d because it only includes what is needed for Block.interpolate. """ # validate the interp method clean_interp_method(method, index, **kwargs) if is_valid_na_for_dtype(fill_value, data.dtype): fill_value = na_value_for_dtype(data.dtype, compat=False) if method == "time": if not needs_i8_conversion(index.dtype): raise ValueError( "time-weighted interpolation only works " "on Series or DataFrames with a " "DatetimeIndex" ) method = "values" valid_limit_directions = ["forward", "backward", "both"] limit_direction = limit_direction.lower() if limit_direction not in valid_limit_directions: raise ValueError( "Invalid limit_direction: expecting one of " f"{valid_limit_directions}, got '{limit_direction}'." ) if limit_area is not None: valid_limit_areas = ["inside", "outside"] limit_area = limit_area.lower() if limit_area not in valid_limit_areas: raise ValueError( f"Invalid limit_area: expecting one of {valid_limit_areas}, got " f"{limit_area}." ) # default limit is unlimited GH #16282 limit = algos.validate_limit(nobs=None, limit=limit) indices = _index_to_interp_indices(index, method) def func(yvalues: np.ndarray) -> None: # process 1-d slices in the axis direction _interpolate_1d( indices=indices, yvalues=yvalues, method=method, limit=limit, limit_direction=limit_direction, limit_area=limit_area, fill_value=fill_value, bounds_error=False, **kwargs, ) # interp each column independently np.apply_along_axis(func, axis, data) return
def __init__(self, data, sparse_index=None, index=None, fill_value=None, kind='integer', dtype=None, copy=False): from pandas.core.internals import SingleBlockManager if isinstance(data, SingleBlockManager): data = data.internal_values() if fill_value is None and isinstance(dtype, SparseDtype): fill_value = dtype.fill_value if isinstance(data, (type(self), ABCSparseSeries)): # disable normal inference on dtype, sparse_index, & fill_value if sparse_index is None: sparse_index = data.sp_index if fill_value is None: fill_value = data.fill_value if dtype is None: dtype = data.dtype # TODO: make kind=None, and use data.kind? data = data.sp_values # Handle use-provided dtype if isinstance(dtype, compat.string_types): # Two options: dtype='int', regular numpy dtype # or dtype='Sparse[int]', a sparse dtype try: dtype = SparseDtype.construct_from_string(dtype) except TypeError: dtype = pandas_dtype(dtype) if isinstance(dtype, SparseDtype): if fill_value is None: fill_value = dtype.fill_value dtype = dtype.subtype if index is not None and not is_scalar(data): raise Exception("must only pass scalars with an index ") if is_scalar(data): if index is not None: if data is None: data = np.nan if index is not None: npoints = len(index) elif sparse_index is None: npoints = 1 else: npoints = sparse_index.length dtype = infer_dtype_from_scalar(data)[0] data = construct_1d_arraylike_from_scalar( data, npoints, dtype ) if dtype is not None: dtype = pandas_dtype(dtype) # TODO: disentangle the fill_value dtype inference from # dtype inference if data is None: # XXX: What should the empty dtype be? Object or float? data = np.array([], dtype=dtype) if not is_array_like(data): try: # probably shared code in sanitize_series from pandas.core.series import _sanitize_array data = _sanitize_array(data, index=None) except ValueError: # NumPy may raise a ValueError on data like [1, []] # we retry with object dtype here. if dtype is None: dtype = object data = np.atleast_1d(np.asarray(data, dtype=dtype)) else: raise if copy: # TODO: avoid double copy when dtype forces cast. data = data.copy() if fill_value is None: fill_value_dtype = data.dtype if dtype is None else dtype if fill_value_dtype is None: fill_value = np.nan else: fill_value = na_value_for_dtype(fill_value_dtype) if isinstance(data, type(self)) and sparse_index is None: sparse_index = data._sparse_index sparse_values = np.asarray(data.sp_values, dtype=dtype) elif sparse_index is None: sparse_values, sparse_index, fill_value = make_sparse( data, kind=kind, fill_value=fill_value, dtype=dtype ) else: sparse_values = np.asarray(data, dtype=dtype) if len(sparse_values) != sparse_index.npoints: raise AssertionError("Non array-like type {type} must " "have the same length as the index" .format(type=type(sparse_values))) self._sparse_index = sparse_index self._sparse_values = sparse_values self._dtype = SparseDtype(sparse_values.dtype, fill_value)
def take_nd( arr, indexer, axis: int = 0, out: Optional[np.ndarray] = None, fill_value=lib.no_default, allow_fill: bool = True, ): """ Specialized Cython take which sets NaN values in one pass This dispatches to ``take`` defined on ExtensionArrays. It does not currently dispatch to ``SparseArray.take`` for sparse ``arr``. Parameters ---------- arr : array-like Input array. indexer : ndarray 1-D array of indices to take, subarrays corresponding to -1 value indices are filed with fill_value axis : int, default 0 Axis to take from out : ndarray or None, default None Optional output array, must be appropriate type to hold input and fill_value together, if indexer has any -1 value entries; call maybe_promote to determine this type for any fill_value fill_value : any, default np.nan Fill value to replace -1 values with allow_fill : boolean, default True If False, indexer is assumed to contain no -1 values so no filling will be done. This short-circuits computation of a mask. Result is undefined if allow_fill == False and -1 is present in indexer. Returns ------- subarray : array-like May be the same type as the input, or cast to an ndarray. """ if fill_value is lib.no_default: fill_value = na_value_for_dtype(arr.dtype, compat=False) arr = extract_array(arr, extract_numpy=True) if not isinstance(arr, np.ndarray): # i.e. ExtensionArray, # includes for EA to catch DatetimeArray, TimedeltaArray return arr.take(indexer, fill_value=fill_value, allow_fill=allow_fill) arr = np.asarray(arr) indexer, dtype, fill_value, mask_info = _take_preprocess_indexer_and_fill_value( arr, indexer, axis, out, fill_value, allow_fill ) flip_order = False if arr.ndim == 2 and arr.flags.f_contiguous: flip_order = True if flip_order: arr = arr.T axis = arr.ndim - axis - 1 if out is not None: out = out.T # at this point, it's guaranteed that dtype can hold both the arr values # and the fill_value if out is None: out_shape_ = list(arr.shape) out_shape_[axis] = len(indexer) out_shape = tuple(out_shape_) if arr.flags.f_contiguous and axis == arr.ndim - 1: # minor tweak that can make an order-of-magnitude difference # for dataframes initialized directly from 2-d ndarrays # (s.t. df.values is c-contiguous and df._mgr.blocks[0] is its # f-contiguous transpose) out = np.empty(out_shape, dtype=dtype, order="F") else: out = np.empty(out_shape, dtype=dtype) func = _get_take_nd_function( arr.ndim, arr.dtype, out.dtype, axis=axis, mask_info=mask_info ) func(arr, indexer, out, fill_value) if flip_order: out = out.T return out
def test_na_value_for_dtype(dtype, na_value): result = na_value_for_dtype(dtype) assert result is na_value
def test_na_value_for_dtype(dtype, na_value): result = na_value_for_dtype(dtype) # identify check doesn't work for datetime64/timedelta64("NaT") bc they # are not singletons assert result is na_value or (isna(result) and isna(na_value) and type(result) is type(na_value))
def __init__( self, data, sparse_index=None, index=None, fill_value=None, kind="integer", dtype=None, copy=False, ): if fill_value is None and isinstance(dtype, SparseDtype): fill_value = dtype.fill_value if isinstance(data, type(self)): # disable normal inference on dtype, sparse_index, & fill_value if sparse_index is None: sparse_index = data.sp_index if fill_value is None: fill_value = data.fill_value if dtype is None: dtype = data.dtype # TODO: make kind=None, and use data.kind? data = data.sp_values # Handle use-provided dtype if isinstance(dtype, str): # Two options: dtype='int', regular numpy dtype # or dtype='Sparse[int]', a sparse dtype try: dtype = SparseDtype.construct_from_string(dtype) except TypeError: dtype = pandas_dtype(dtype) if isinstance(dtype, SparseDtype): if fill_value is None: fill_value = dtype.fill_value dtype = dtype.subtype if index is not None and not is_scalar(data): raise Exception("must only pass scalars with an index") if is_scalar(data): if index is not None and data is None: data = np.nan if index is not None: npoints = len(index) elif sparse_index is None: npoints = 1 else: npoints = sparse_index.length dtype = infer_dtype_from_scalar(data)[0] data = construct_1d_arraylike_from_scalar(data, npoints, dtype) if dtype is not None: dtype = pandas_dtype(dtype) # TODO: disentangle the fill_value dtype inference from # dtype inference if data is None: # TODO: What should the empty dtype be? Object or float? data = np.array([], dtype=dtype) if not is_array_like(data): try: # probably shared code in sanitize_series data = sanitize_array(data, index=None) except ValueError: # NumPy may raise a ValueError on data like [1, []] # we retry with object dtype here. if dtype is None: dtype = object data = np.atleast_1d(np.asarray(data, dtype=dtype)) else: raise if copy: # TODO: avoid double copy when dtype forces cast. data = data.copy() if fill_value is None: fill_value_dtype = data.dtype if dtype is None else dtype if fill_value_dtype is None: fill_value = np.nan else: fill_value = na_value_for_dtype(fill_value_dtype) if isinstance(data, type(self)) and sparse_index is None: sparse_index = data._sparse_index sparse_values = np.asarray(data.sp_values, dtype=dtype) elif sparse_index is None: data = extract_array(data, extract_numpy=True) if not isinstance(data, np.ndarray): # EA if is_datetime64tz_dtype(data.dtype): warnings.warn( f"Creating SparseArray from {data.dtype} data " "loses timezone information. Cast to object before " "sparse to retain timezone information.", UserWarning, stacklevel=2, ) data = np.asarray(data, dtype="datetime64[ns]") data = np.asarray(data) sparse_values, sparse_index, fill_value = make_sparse( data, kind=kind, fill_value=fill_value, dtype=dtype) else: sparse_values = np.asarray(data, dtype=dtype) if len(sparse_values) != sparse_index.npoints: raise AssertionError( f"Non array-like type {type(sparse_values)} must " "have the same length as the index") self._sparse_index = sparse_index self._sparse_values = sparse_values self._dtype = SparseDtype(sparse_values.dtype, fill_value)
def test_na_value_for_dtype(dtype, na_value): result = na_value_for_dtype(dtype) assert result is na_value