def test_bounds_check_large(self): arr = np.array([1, 2]) with pytest.raises(IndexError): algos.take(arr, [2, 3], allow_fill=True) with pytest.raises(IndexError): algos.take(arr, [2, 3], allow_fill=False)
def take(self, indices, allow_fill=False, fill_value=None, axis=None, **kwargs): """ Take elements from the IntervalArray. Parameters ---------- indices : sequence of integers Indices to be taken. allow_fill : bool, default False How to handle negative values in `indices`. * False: negative values in `indices` indicate positional indices from the right (the default). This is similar to :func:`numpy.take`. * True: negative values in `indices` indicate missing values. These values are set to `fill_value`. Any other other negative values raise a ``ValueError``. fill_value : Interval or NA, optional Fill value to use for NA-indices when `allow_fill` is True. This may be ``None``, in which case the default NA value for the type, ``self.dtype.na_value``, is used. For many ExtensionArrays, there will be two representations of `fill_value`: a user-facing "boxed" scalar, and a low-level physical NA value. `fill_value` should be the user-facing version, and the implementation should handle translating that to the physical version for processing the take if necessary. axis : any, default None Present for compat with IntervalIndex; does nothing. Returns ------- IntervalArray Raises ------ IndexError When the indices are out of bounds for the array. ValueError When `indices` contains negative values other than ``-1`` and `allow_fill` is True. """ nv.validate_take(tuple(), kwargs) fill_left = fill_right = fill_value if allow_fill: fill_left, fill_right = self._validate_fill_value(fill_value) left_take = take( self._left, indices, allow_fill=allow_fill, fill_value=fill_left ) right_take = take( self._right, indices, allow_fill=allow_fill, fill_value=fill_right ) return self._shallow_copy(left_take, right_take)
def take( self: BaseMaskedArrayT, indexer, *, allow_fill: bool = False, fill_value: Scalar | None = None, ) -> BaseMaskedArrayT: # we always fill with 1 internally # to avoid upcasting data_fill_value = self._internal_fill_value if isna( fill_value) else fill_value result = take(self._data, indexer, fill_value=data_fill_value, allow_fill=allow_fill) mask = take(self._mask, indexer, fill_value=True, allow_fill=allow_fill) # if we are filling # we only fill where the indexer is null # not existing missing values # TODO(jreback) what if we have a non-na float as a fill value? if allow_fill and notna(fill_value): fill_mask = np.asarray(indexer) == -1 result[fill_mask] = fill_value mask = mask ^ fill_mask return type(self)(result, mask, copy=False)
def test_take_empty(self, allow_fill): arr = np.array([], dtype=np.int64) # empty take is ok result = algos.take(arr, [], allow_fill=allow_fill) tm.assert_numpy_array_equal(arr, result) with pytest.raises(IndexError): algos.take(arr, [0], allow_fill=allow_fill)
def test_bounds_check_small(self): arr = np.array([1, 2, 3], dtype=np.int64) indexer = [0, -1, -2] with pytest.raises(ValueError): algos.take(arr, indexer, allow_fill=True) result = algos.take(arr, indexer) expected = np.array([1, 3, 2], dtype=np.int64) tm.assert_numpy_array_equal(result, expected)
def test_take_axis_0(self): arr = np.arange(12).reshape(4, 3) result = algos.take(arr, [0, -1]) expected = np.array([[0, 1, 2], [9, 10, 11]]) tm.assert_numpy_array_equal(result, expected) # allow_fill=True result = algos.take(arr, [0, -1], allow_fill=True, fill_value=0) expected = np.array([[0, 1, 2], [0, 0, 0]]) tm.assert_numpy_array_equal(result, expected)
def test_take_empty(self, allow_fill): arr = np.array([], dtype=np.int64) # empty take is ok result = algos.take(arr, [], allow_fill=allow_fill) tm.assert_numpy_array_equal(arr, result) msg = ("cannot do a non-empty take from an empty axes.|" "indices are out-of-bounds") with pytest.raises(IndexError, match=msg): algos.take(arr, [0], allow_fill=allow_fill)
def test_bounds_check_large(self): arr = np.array([1, 2]) msg = "indices are out-of-bounds" with pytest.raises(IndexError, match=msg): algos.take(arr, [2, 3], allow_fill=True) msg = "index 2 is out of bounds for( axis 0 with)? size 2" with pytest.raises(IndexError, match=msg): algos.take(arr, [2, 3], allow_fill=False)
def test_bounds_check_small(self): arr = np.array([1, 2, 3], dtype=np.int64) indexer = [0, -1, -2] msg = r"'indices' contains values less than allowed \(-2 < -1\)" with pytest.raises(ValueError, match=msg): algos.take(arr, indexer, allow_fill=True) result = algos.take(arr, indexer) expected = np.array([1, 3, 2], dtype=np.int64) tm.assert_numpy_array_equal(result, expected)
def test_take_non_hashable_fill_value(self): arr = np.array([1, 2, 3]) indexer = np.array([1, -1]) with pytest.raises(ValueError, match="fill_value must be a scalar"): algos.take(arr, indexer, allow_fill=True, fill_value=[1]) # with object dtype it is allowed arr = np.array([1, 2, 3], dtype=object) result = algos.take(arr, indexer, allow_fill=True, fill_value=[1]) expected = np.array([2, [1]], dtype=object) tm.assert_numpy_array_equal(result, expected)
def test_take_axis_1(self): arr = np.arange(12).reshape(4, 3) result = algos.take(arr, [0, -1], axis=1) expected = np.array([[0, 2], [3, 5], [6, 8], [9, 11]]) tm.assert_numpy_array_equal(result, expected) # allow_fill=True result = algos.take(arr, [0, -1], axis=1, allow_fill=True, fill_value=0) expected = np.array([[0, 0], [3, 0], [6, 0], [9, 0]]) tm.assert_numpy_array_equal(result, expected) # GH#26976 make sure we validate along the correct axis with pytest.raises(IndexError, match="indices are out-of-bounds"): algos.take(arr, [0, 3], axis=1, allow_fill=True, fill_value=0)
def take(self, indices, allow_fill=False, fill_value=None): """Take elements from an array. # type: (Sequence[int], bool, Optional[Any]) -> PintArray Parameters ---------- indices : sequence of integers Indices to be taken. allow_fill : bool, default False How to handle negative values in `indices`. * False: negative values in `indices` indicate positional indices from the right (the default). This is similar to :func:`numpy.take`. * True: negative values in `indices` indicate missing values. These values are set to `fill_value`. Any other other negative values raise a ``ValueError``. fill_value : any, optional Fill value to use for NA-indices when `allow_fill` is True. This may be ``None``, in which case the default NA value for the type, ``self.dtype.na_value``, is used. Returns ------- PintArray Raises ------ IndexError When the indices are out of bounds for the array. ValueError When `indices` contains negative values other than ``-1`` and `allow_fill` is True. Notes ----- PintArray.take is called by ``Series.__getitem__``, ``.loc``, ``iloc``, when `indices` is a sequence of values. Additionally, it's called by :meth:`Series.reindex`, or any other method that causes realignemnt, with a `fill_value`. See Also -------- numpy.take pandas.api.extensions.take Examples -------- """ from pandas.core.algorithms import take data = self._data if allow_fill and fill_value is None: fill_value = self.dtype.na_value if isinstance(fill_value, _Quantity): fill_value = fill_value.to(self.units).magnitude result = take(data, indices, fill_value=fill_value, allow_fill=allow_fill) return PintArray(result, dtype=self.dtype)
def take(self, indices, allow_fill=False, fill_value=None): if (allow_fill is False or (allow_fill and fill_value is self.dtype.na_value)) \ and len(self) > 0: return type(self)(self[indices], dtype=self._dtype) if self._use_arrow: array = self._arrow_array.to_pandas().to_numpy() else: array = self._ndarray replace = False if allow_fill and \ (fill_value is None or fill_value == self._dtype.na_value): fill_value = self.dtype.na_value replace = True result = take(array, indices, fill_value=fill_value, allow_fill=allow_fill) del array if replace and pa is not None: # pyarrow cannot recognize pa.NULL result[result == self.dtype.na_value] = None return type(self)(result, dtype=self._dtype)
def unstack(self, unstacker, fill_value) -> ArrayManager: """ Return a BlockManager with all blocks unstacked.. Parameters ---------- unstacker : reshape._Unstacker fill_value : Any fill_value for newly introduced missing values. Returns ------- unstacked : BlockManager """ indexer, _ = unstacker._indexer_and_to_sort new_indexer = np.full(unstacker.mask.shape, -1) new_indexer[unstacker.mask] = indexer new_indexer2D = new_indexer.reshape(*unstacker.full_shape) new_arrays = [] for arr in self.arrays: for i in range(unstacker.full_shape[1]): new_arr = algos.take(arr, new_indexer2D[:, i], allow_fill=True, fill_value=fill_value) new_arrays.append(new_arr) new_index = unstacker.new_index new_columns = unstacker.get_new_columns(self._axes[1]) new_axes = [new_index, new_columns] return type(self)(new_arrays, new_axes, do_integrity_check=False)
def take(self, indices, allow_fill=False, fill_value=None): from pandas.core.algorithms import take result = take(self._ndarray, indices, allow_fill=allow_fill, fill_value=fill_value) return type(self)(result)
def take(self, indices, allow_fill=False, fill_value=None): if fill_value is None: # Primarily for subclasses fill_value = self.dtype.na_value result = take( self._ndarray, indices, allow_fill=allow_fill, fill_value=fill_value ) return type(self)(result)
def take(self, indices, allow_fill=False, fill_value=None): if allow_fill: fill_value = self._validate_fill_value(fill_value) new_values = take(self.asi8, indices, allow_fill=allow_fill, fill_value=fill_value) return type(self)(new_values, dtype=self.dtype)
def take( self: _T, indices: Sequence[int], allow_fill: bool = False, fill_value: Any = None, ) -> _T: if allow_fill: fill_value = self._validate_fill_value(fill_value) new_data = take( self._ndarray, indices, allow_fill=allow_fill, fill_value=fill_value, ) return self._from_backing_data(new_data)
def take(self, indices, allow_fill=False, fill_value=None): from pandas.core.algorithms import take # If the ExtensionArray is backed by an ndarray, then # just pass that here instead of coercing to object. if allow_fill and fill_value is None: fill_value = self.dtype.na_value # fill value should always be translated from the scalar # type for the array, to the physical storage type for # the data, before passing to take. result = take(self.data, indices, fill_value=fill_value, allow_fill=allow_fill) return self._from_sequence(result)
def take(self, indices, allow_fill=False, fill_value=None): from pandas.core.algorithms import take if allow_fill: if fill_value is None or pd.isna(fill_value): fill_value = None elif not isinstance(fill_value, self.dtype.type): raise TypeError('Provide geometry or None as fill value') result = take(self.data, indices, allow_fill=allow_fill, fill_value=fill_value) if allow_fill and fill_value is None: result[pd.isna(result)] = None return self.__class__(result)
def take( self: NDArrayBackedExtensionArrayT, indices: TakeIndexer, *, allow_fill: bool = False, fill_value: Any = None, axis: int = 0, ) -> NDArrayBackedExtensionArrayT: if allow_fill: fill_value = self._validate_scalar(fill_value) new_data = take( self._ndarray, indices, allow_fill=allow_fill, fill_value=fill_value, axis=axis, ) return self._from_backing_data(new_data)
def take(self, indices, allow_fill=False, fill_value=None): if allow_fill is False: return ArrowStringArray(self[indices]) string_array = self._arrow_array.to_pandas().to_numpy() replace = False if allow_fill and fill_value is None: fill_value = self.dtype.na_value replace = True result = take(string_array, indices, fill_value=fill_value, allow_fill=allow_fill) if replace: # pyarrow cannot recognize pa.NULL result[result == self.dtype.na_value] = None return ArrowStringArray(result)
def take(self, indices, allow_fill=False, fill_value=None): if allow_fill is False: return type(self)(self[indices], dtype=self._dtype) array = self._arrow_array.to_pandas().to_numpy() replace = False if allow_fill and fill_value is None: fill_value = self.dtype.na_value replace = True result = take(array, indices, fill_value=fill_value, allow_fill=allow_fill) del array if replace: # pyarrow cannot recognize pa.NULL result[result == self.dtype.na_value] = None return type(self)(result, dtype=self._dtype)
def take( self: NDArrayBackedExtensionArrayT, indices: Sequence[int], *, allow_fill: bool = False, fill_value: Any = None, axis: int = 0, ) -> NDArrayBackedExtensionArrayT: if allow_fill: fill_value = self._validate_fill_value(fill_value) new_data = take( self._ndarray, # error: Argument 2 to "take" has incompatible type "Sequence[int]"; # expected "ndarray" indices, # type: ignore[arg-type] allow_fill=allow_fill, fill_value=fill_value, axis=axis, ) return self._from_backing_data(new_data)
def take(self, indices, allow_fill=False, fill_value=None): if allow_fill: if isna(fill_value): fill_value = iNaT elif isinstance(fill_value, Period): if self.freq != fill_value.freq: msg = DIFFERENT_FREQ_INDEX.format(self.freq.freqstr, fill_value.freqstr) raise IncompatibleFrequency(msg) fill_value = fill_value.ordinal else: msg = "'fill_value' should be a Period. Got '{}'." raise ValueError(msg.format(fill_value)) new_values = algos.take(self._data, indices, allow_fill=allow_fill, fill_value=fill_value) return type(self)(new_values, self.freq)
def take(self, indices, allow_fill=False, fill_value=None): from pandas.core.algorithms import take if allow_fill and fill_value is None: fill_value = self.dtype.na_value if fill_value is self.dtype.na_value: fill_value = self.dtype._record_na_value # fill value should always be translated from the scalar # type for the array, to the physical storage type for # the data, before passing to take. indices = np.asarray(indices) if allow_fill: mask = (indices == -1) if not len(self): if not (indices == -1).all(): msg = "Invalid take for empty array. Must be all -1." raise IndexError(msg) else: # all NA take from and empty array result = np.zeros(len(indices), dtype=self.dtype._record_type) result.fill(fill_value) return self._from_ndarray(result) if (np.asarray(indices) < -1).any(): msg = ("Invalid value in 'indices'. Must be all >= -1 " "for 'allow_fill=True'") raise ValueError(msg) result = take(self.data, indices, allow_fill=False) if allow_fill: result[mask] = fill_value return self._from_sequence(result, dtype=self.dtype, default_money_code=self.default_money_code)
def take(self, indices, allow_fill=False, fill_value=None): if allow_fill: if isna(fill_value): fill_value = iNaT elif isinstance(fill_value, Period): if self.freq != fill_value.freq: msg = DIFFERENT_FREQ_INDEX.format( self.freq.freqstr, fill_value.freqstr ) raise IncompatibleFrequency(msg) fill_value = fill_value.ordinal else: msg = "'fill_value' should be a Period. Got '{}'." raise ValueError(msg.format(fill_value)) new_values = algos.take(self._data, indices, allow_fill=allow_fill, fill_value=fill_value) return type(self)(new_values, self.freq)
def test_take_na_empty(self): result = algos.take(np.array([]), [-1, -1], allow_fill=True, fill_value=0.0) expected = np.array([0., 0.]) tm.assert_numpy_array_equal(result, expected)
def test_take_coerces_list(self): arr = [1, 2, 3] result = algos.take(arr, [0, 0]) expected = np.array([1, 1]) tm.assert_numpy_array_equal(result, expected)
def take(self, indices, allow_fill=False, fill_value=None, axis=None, **kwargs): """ Take elements from the IntervalArray. Parameters ---------- indices : sequence of integers Indices to be taken. allow_fill : bool, default False How to handle negative values in `indices`. * False: negative values in `indices` indicate positional indices from the right (the default). This is similar to :func:`numpy.take`. * True: negative values in `indices` indicate missing values. These values are set to `fill_value`. Any other other negative values raise a ``ValueError``. fill_value : Interval or NA, optional Fill value to use for NA-indices when `allow_fill` is True. This may be ``None``, in which case the default NA value for the type, ``self.dtype.na_value``, is used. For many ExtensionArrays, there will be two representations of `fill_value`: a user-facing "boxed" scalar, and a low-level physical NA value. `fill_value` should be the user-facing version, and the implementation should handle translating that to the physical version for processing the take if necessary. axis : any, default None Present for compat with IntervalIndex; does nothing. Returns ------- IntervalArray Raises ------ IndexError When the indices are out of bounds for the array. ValueError When `indices` contains negative values other than ``-1`` and `allow_fill` is True. """ from pandas.core.algorithms import take nv.validate_take(tuple(), kwargs) fill_left = fill_right = fill_value if allow_fill: if fill_value is None: fill_left = fill_right = self.left._na_value elif is_interval(fill_value): self._check_closed_matches(fill_value, name='fill_value') fill_left, fill_right = fill_value.left, fill_value.right elif not is_scalar(fill_value) and notna(fill_value): msg = ("'IntervalArray.fillna' only supports filling with a " "'scalar pandas.Interval or NA'. Got a '{}' instead." .format(type(fill_value).__name__)) raise ValueError(msg) left_take = take(self.left, indices, allow_fill=allow_fill, fill_value=fill_left) right_take = take(self.right, indices, allow_fill=allow_fill, fill_value=fill_right) return self._shallow_copy(left_take, right_take)
def take(self, indices, allow_fill=False, fill_value=None): """ Take elements from an array. Parameters ---------- indices : sequence of integers Indices to be taken. allow_fill : bool, default False How to handle negative values in `indices`. * False: negative values in `indices` indicate position indices from the right (the default). This is similar to :func:`numpy.take`. * True: negative values in `indices` indicate missing values. These values are set to `fill_value`. Any other negative values raise a ``ValueError``. fill_value : any, optional Fill value to use for NA-indices when `allow_fill` is True. This may be ``None``, in which case the default NA value for the type, ``self.dtype.na_value``, is used. For many ExtensionArrays, there will be two representations of `fill_value`: a user-facing "boxed" scalar, and a low-level physical NA value. `fill_value` should be the user-facing version, and the implementation should handle translating that to the physical version for processing the take if necessary. Returns ------- ExtensionArray Raises ------ IndexError When the indices are out of bounds for the array. ValueError When `indices` contain negative values other than ``-1`` and `allow_fill` is True. Notes ----- ExtensionArray.take is called by ``Series.__getitem__``, ``.loc``, ``iloc``, when `indices` is a sequence of values. Additionally, it's called by :meth:`Series.reindex`, or any other method that causes realignemnt, with a `fill_value`. See Also -------- numpy.take pandas.api.extensions.take """ from pandas.core.algorithms import take data = self.astype(object) if allow_fill and fill_value is None: fill_value = self.dtype.na_value # fill value should always be translated from the scalar # type for the array, to the physical storage type for # the data, before passing to take. result = take(data, indices, fill_value=fill_value, allow_fill=allow_fill) return self._from_sequence(result)
def take(self, indices, allow_fill=False, fill_value=None, axis=None, **kwargs): """ Take elements from the IntervalArray. Parameters ---------- indices : sequence of integers Indices to be taken. allow_fill : bool, default False How to handle negative values in `indices`. * False: negative values in `indices` indicate positional indices from the right (the default). This is similar to :func:`numpy.take`. * True: negative values in `indices` indicate missing values. These values are set to `fill_value`. Any other other negative values raise a ``ValueError``. fill_value : Interval or NA, optional Fill value to use for NA-indices when `allow_fill` is True. This may be ``None``, in which case the default NA value for the type, ``self.dtype.na_value``, is used. For many ExtensionArrays, there will be two representations of `fill_value`: a user-facing "boxed" scalar, and a low-level physical NA value. `fill_value` should be the user-facing version, and the implementation should handle translating that to the physical version for processing the take if necessary. axis : any, default None Present for compat with IntervalIndex; does nothing. Returns ------- IntervalArray Raises ------ IndexError When the indices are out of bounds for the array. ValueError When `indices` contains negative values other than ``-1`` and `allow_fill` is True. """ from pandas.core.algorithms import take nv.validate_take(tuple(), kwargs) fill_left = fill_right = fill_value if allow_fill: if fill_value is None: fill_left = fill_right = self.left._na_value elif is_interval(fill_value): self._check_closed_matches(fill_value, name="fill_value") fill_left, fill_right = fill_value.left, fill_value.right elif not is_scalar(fill_value) and notna(fill_value): msg = ("'IntervalArray.fillna' only supports filling with a " "'scalar qq_pandas.Interval or NA'. Got a '{}' instead." .format(type(fill_value).__name__)) raise ValueError(msg) left_take = take(self.left, indices, allow_fill=allow_fill, fill_value=fill_left) right_take = take(self.right, indices, allow_fill=allow_fill, fill_value=fill_right) return self._shallow_copy(left_take, right_take)