def __getitem__(self, value): value = check_array_indexer(self, value) left = self.left[value] right = self.right[value] # scalar if not isinstance(left, ABCIndexClass): if is_scalar(left) and isna(left): return self._fill_value if np.ndim(left) > 1: # GH#30588 multi-dimensional indexer disallowed raise ValueError("multi-dimensional indexing not allowed") return Interval(left, right, self.closed) return self._shallow_copy(left, right)
def __getitem__(self, key): if lib.is_integer(key): # fast-path result = self._ndarray[key] if self.ndim == 1: return self._box_func(result) return self._from_backing_data(result) key = extract_array(key, extract_numpy=True) key = check_array_indexer(self, key) result = self._ndarray[key] if lib.is_scalar(result): return self._box_func(result) result = self._from_backing_data(result) return result
def __setitem__(self, key, value) -> None: key = check_array_indexer(self, key) if is_scalar(value): if is_valid_na_for_dtype(value, self.dtype): self._mask[key] = True else: value = self._validate_setitem_value(value) self._data[key] = value self._mask[key] = False return value, mask = self._coerce_to_array(value, dtype=self.dtype) self._data[key] = value self._mask[key] = mask
def __getitem__(self, item) -> Union[TokenSpan, "TokenSpanArray"]: """ See docstring in `ExtensionArray` class in `pandas/core/arrays/base.py` for information about this method. """ if isinstance(item, int): return TokenSpan( self.tokens[item], int(self._begin_tokens[item]), int(self._end_tokens[item]) ) else: # item not an int --> assume it's a numpy-compatible index item = check_array_indexer(self, item) return TokenSpanArray( self.tokens[item], self.begin_token[item], self.end_token[item] )
def __getitem__(self, item) -> Union["TensorArray", "TensorElement"]: """ See docstring in `Extension Array` class in `pandas/core/arrays/base.py` for information about this method. """ # Return scalar if single value is selected, a TensorElement for single array element, # or TensorArray for slice if isinstance(item, int): value = self._tensor[item] if np.isscalar(value): return value else: return TensorElement(value) else: item = check_array_indexer(self, item) return TensorArray(self._tensor[item])
def __getitem__(self, key): key = check_array_indexer(self, key) result = self._combined[key] if is_integer(key): left, right = result[0], result[1] if isna(left): return self._fill_value return Interval(left, right, self.closed) # TODO: need to watch out for incorrectly-reducing getitem if np.ndim(result) > 2: # GH#30588 multi-dimensional indexer disallowed raise ValueError("multi-dimensional indexing not allowed") return type(self)._simple_new(result, closed=self.closed)
def __setitem__(self, key: int | slice | np.ndarray, value: Any) -> None: """Set one or more values inplace. Parameters ---------- key : int, ndarray, or slice When called from, e.g. ``Series.__setitem__``, ``key`` will be one of * scalar int * ndarray of integers. * boolean ndarray * slice object value : ExtensionDtype.type, Sequence[ExtensionDtype.type], or object value or values to be set of ``key``. Returns ------- None """ key = check_array_indexer(self, key) indices = self._key_to_indices(key) if is_scalar(value): if isna(value): value = None elif not isinstance(value, str): raise ValueError("Scalar must be NA or str") value = np.broadcast_to(value, len(indices)) else: value = np.array(value, dtype=object, copy=True) for i, v in enumerate(value): if isna(v): value[i] = None elif not isinstance(v, str): raise ValueError("Scalar must be NA or str") if len(indices) != len(value): raise ValueError("Length of indexer and values mismatch") argsort = np.argsort(indices) indices = indices[argsort] value = value[argsort] self._data = self._set_via_chunk_iteration(indices=indices, value=value)
def __getitem__(self, item: Any) -> Any: """Select a subset of self. Parameters ---------- item : int, slice, or ndarray * int: The position in 'self' to get. * slice: A slice object, where 'start', 'stop', and 'step' are integers or None * ndarray: A 1-d boolean NumPy ndarray the same length as 'self' Returns ------- item : scalar or ExtensionArray Notes ----- For scalar ``item``, return a scalar value suitable for the array's type. This should be an instance of ``self.dtype.type``. For slice ``key``, return an instance of ``ExtensionArray``, even if the slice is length 0 or 1. For a boolean mask, return an instance of ``ExtensionArray``, filtered to the values where ``item`` is True. """ item = check_array_indexer(self, item) if isinstance(item, np.ndarray): if not len(item): return type(self)(pa.chunked_array([], type=pa.string())) elif is_integer_dtype(item.dtype): # error: Argument 1 to "take" of "ArrowStringArray" has incompatible # type "ndarray"; expected "Sequence[int]" return self.take(item) # type: ignore[arg-type] elif is_bool_dtype(item.dtype): return type(self)(self._data.filter(item)) else: raise IndexError("Only integers, slices and integer or " "boolean arrays are valid indices.") # We are not an array indexer, so maybe e.g. a slice or integer # indexer. We dispatch to pyarrow. value = self._data[item] if isinstance(value, pa.ChunkedArray): return type(self)(value) else: return self._as_pandas_scalar(value)
def __setitem__(self, key, value): # need to not use `not value` on numpy arrays if isinstance(value, (list, tuple)) and (not value): # doing nothing here seems to be ok return if isinstance(value, _Quantity): value = value.to(self.units).magnitude elif is_list_like(value) and isinstance(value[0], _Quantity): value = [item.to(self.units).magnitude for item in value] key = check_array_indexer(self, key) try: self._data[key] = value except IndexError as e: msg = "Mask is wrong length. {}".format(e) raise IndexError(msg)
def __setitem__(self, key: Union[int, np.ndarray], value: Any) -> None: """ See docstring in `ExtensionArray` class in `pandas/core/arrays/base.py` for information about this method. """ # Subroutine of the if-else sequence below def _is_sequence_of_spans(seq: Any): if isinstance(seq, SpanArray): return True if not isinstance(seq, (collections.abc.Sequence, np.ndarray)): return False else: # For other sequences, check for everything being Span or None return all(elem is None or isinstance(elem, Span) for elem in seq) key = check_array_indexer(self, key) if isinstance(value, ABCSeries) and isinstance(value.dtype, SpanDtype): value = value.values if value is None or (isinstance(value, collections.abc.Sequence) and len(value) == 0): self._begins[key] = Span.NULL_OFFSET_VALUE self._ends[key] = Span.NULL_OFFSET_VALUE self._text_ids[key] = StringTable.NONE_ID elif isinstance(value, Span): self._begins[key] = value.begin self._ends[key] = value.end self._text_ids[key] = self._string_table.maybe_add_thing(value.target_text) elif ((isinstance(key, slice) or (isinstance(key, np.ndarray) and is_bool_dtype(key.dtype))) and isinstance(value, SpanArray)): self._begins[key] = value.begin self._ends[key] = value.end self._text_ids[key] = self._string_table.maybe_add_things(value.target_text) elif (isinstance(key, np.ndarray) and len(value) > 0 and len(value) == len(key) and _is_sequence_of_spans(value)): for k, v in zip(key, value): self._begins[k] = v.begin self._ends[k] = v.end self._text_ids[k] = self._string_table.maybe_add_thing(v.target_text) else: raise ValueError( f"Attempted to set element {key} (type {type(key)}) of a SpanArray with " f"an object of type {type(value)}") # We just changed the contents of this array, so invalidate any cached # results computed from those contents. self.increment_version()
def __setitem__(self, key, value): # na value: need special casing to set directly on numpy arrays needs_float_conversion = False if is_scalar(value) and isna(value): if is_integer_dtype(self.dtype.subtype): # can't set NaN on a numpy integer array needs_float_conversion = True elif is_datetime64_any_dtype(self.dtype.subtype): # need proper NaT to set directly on the numpy array value = np.datetime64("NaT") elif is_timedelta64_dtype(self.dtype.subtype): # need proper NaT to set directly on the numpy array value = np.timedelta64("NaT") value_left, value_right = value, value # scalar interval elif is_interval_dtype(value) or isinstance(value, Interval): self._check_closed_matches(value, name="value") value_left, value_right = value.left, value.right else: # list-like of intervals try: array = IntervalArray(value) value_left, value_right = array.left, array.right except TypeError as err: # wrong type: not interval or NA msg = f"'value' should be an interval type, got {type(value)} instead." raise TypeError(msg) from err if needs_float_conversion: raise ValueError( "Cannot set float NaN to integer-backed IntervalArray") key = check_array_indexer(self, key) # Need to ensure that left and right are updated atomically, so we're # forced to copy, update the copy, and swap in the new values. left = self.left.copy(deep=True) left._values[key] = value_left self._left = left right = self.right.copy(deep=True) right._values[key] = value_right self._right = right
def __getitem__( self: NDArrayBackedExtensionArrayT, key: Union[int, slice, np.ndarray] ) -> Union[NDArrayBackedExtensionArrayT, Any]: if lib.is_integer(key): # fast-path result = self._ndarray[key] if self.ndim == 1: return self._box_func(result) return self._from_backing_data(result) key = extract_array(key, extract_numpy=True) key = check_array_indexer(self, key) result = self._ndarray[key] if lib.is_scalar(result): return self._box_func(result) result = self._from_backing_data(result) return result
def __getitem__(self, key): # avoid mypy issues when importing at the top-level from pandas.core.indexing import check_bool_indexer if isinstance(key, tuple): if len(key) > 1: raise IndexError("too many indices for array.") key = key[0] if is_integer(key): return self._get_val_at(key) elif isinstance(key, tuple): data_slice = self.to_dense()[key] elif isinstance(key, slice): # special case to preserve dtypes if key == slice(None): return self.copy() # TODO: this logic is surely elsewhere # TODO: this could be more efficient indices = np.arange(len(self), dtype=np.int32)[key] return self.take(indices) else: # TODO: I think we can avoid densifying when masking a # boolean SparseArray with another. Need to look at the # key's fill_value for True / False, and then do an intersection # on the indicies of the sp_values. if isinstance(key, SparseArray): if is_bool_dtype(key): key = key.to_dense() else: key = np.asarray(key) key = check_array_indexer(self, key) if com.is_bool_indexer(key): key = check_bool_indexer(self, key) return self.take(np.arange(len(key), dtype=np.int32)[key]) elif hasattr(key, "__len__"): return self.take(key) else: raise ValueError(f"Cannot slice with '{key}'") return type(self)(data_slice, kind=self.kind)
def __setitem__(self, key: Union[int, np.ndarray], value: Any) -> None: """ Set one or more values inplace. This method is not required to satisfy the pandas extension array interface. Parameters ---------- key : int, ndarray, or slice When called from, e.g. ``Series.__setitem__``, ``key`` will be one of * scalar int * ndarray of integers. * boolean ndarray * slice object value : ExtensionDtype.type, Sequence[ExtensionDtype.type], or object value or values to be set of ``key``. Returns ------- None """ key = check_array_indexer(self, key) if isinstance(value, TensorArrayElement) or np.isscalar(value): value = np.asarray(value) if isinstance(value, list): value = [ np.asarray(v) if isinstance(v, TensorArrayElement) else v for v in value ] if isinstance(value, ABCSeries) and isinstance(value.dtype, TensorDtype): value = value.values if value is None or isinstance(value, Sequence) and len(value) == 0: self._tensor[key] = np.full_like(self._tensor[key], np.nan) elif isinstance(key, (int, slice, np.ndarray)): self._tensor[key] = value else: raise NotImplementedError( f"__setitem__ with key type '{type(key)}' not implemented")
def __getitem__(self, item): # type (Any) -> Any """Select a subset of self. Parameters ---------- item : int, slice, or ndarray * int: The position in 'self' to get. * slice: A slice object, where 'start', 'stop', and 'step' are integers or None * ndarray: A 1-d boolean NumPy ndarray the same length as 'self' Returns ------- item : scalar or PintArray """ if is_integer(item): return self._data[item] * self.units item = check_array_indexer(self, item) return self.__class__(self._data[item], self.dtype)
def __setitem__(self, key: Union[int, np.ndarray], value: Any) -> None: """ See docstring in `ExtensionArray` class in `pandas/core/arrays/base.py` for information about this method. """ key = check_array_indexer(self, key) if isinstance(value, TensorElement) or np.isscalar(value): value = np.asarray(value) if isinstance(value, list): value = [np.asarray(v) if isinstance(v, TensorElement) else v for v in value] if isinstance(value, ABCSeries) and isinstance(value.dtype, TensorDtype): value = value.values if value is None or isinstance(value, Sequence) and len(value) == 0: nan_fill = np.full_like(self._tensor[key], np.nan) self._tensor[key] = nan_fill elif isinstance(key, (int, slice, np.ndarray)): self._tensor[key] = value else: raise NotImplementedError(f"__setitem__ with key type '{type(key)}' " f"not implemented")
def __getitem__( self: NDArrayBackedExtensionArrayT, key: PositionalIndexer2D, ) -> NDArrayBackedExtensionArrayT | Any: if lib.is_integer(key): # fast-path result = self._ndarray[key] if self.ndim == 1: return self._box_func(result) return self._from_backing_data(result) # error: Incompatible types in assignment (expression has type "ExtensionArray", # variable has type "Union[int, slice, ndarray]") key = extract_array(key, extract_numpy=True) # type: ignore[assignment] key = check_array_indexer(self, key) result = self._ndarray[key] if lib.is_scalar(result): return self._box_func(result) result = self._from_backing_data(result) return result
def __getitem__(self, item): # type (Any) -> Any """Select a subset of self. Parameters ---------- item : int, slice, or ndarray * int: The position in 'self' to get. * slice: A slice object, where 'start', 'stop', and 'step' are integers or None * ndarray: A 1-d boolean NumPy ndarray the same length as 'self' Returns ------- item : scalar or FletcherArray Notes ----- For scalar ``item``, return a scalar value suitable for the array's type. This should be an instance of ``self.dtype.type``. For slice ``key``, return an instance of ``FletcherArray``, even if the slice is length 0 or 1. For a boolean mask, return an instance of ``FletcherArray``, filtered to the values where ``item`` is True. """ if PANDAS_GE_0_26_0: item = check_array_indexer(self, item) if is_integer(item): return self.data[int(item)].as_py() if (not isinstance(item, slice) and len(item) > 0 and np.asarray(item[:1]).dtype.kind == "b"): item = np.argwhere(item).flatten() elif isinstance(item, slice): if item.step == 1 or item.step is None: return FletcherArray(self.data[item]) else: item = np.arange(len(self), dtype=self._indices_dtype)[item] return self.take(item)
def __getitem__( self: NDArrayBackedExtensionArrayT, key: int | slice | np.ndarray ) -> NDArrayBackedExtensionArrayT | Any: if lib.is_integer(key): # fast-path result = self._ndarray[key] if self.ndim == 1: return self._box_func(result) return self._from_backing_data(result) # error: Value of type variable "AnyArrayLike" of "extract_array" cannot be # "Union[int, slice, ndarray]" # error: Incompatible types in assignment (expression has type "ExtensionArray", # variable has type "Union[int, slice, ndarray]") key = extract_array( # type: ignore[type-var,assignment] key, extract_numpy=True) key = check_array_indexer(self, key) result = self._ndarray[key] if lib.is_scalar(result): return self._box_func(result) result = self._from_backing_data(result) return result
def __setitem__(self, key: int | slice | np.ndarray, value: Any) -> None: """Set one or more values inplace. Parameters ---------- key : int, ndarray, or slice When called from, e.g. ``Series.__setitem__``, ``key`` will be one of * scalar int * ndarray of integers. * boolean ndarray * slice object value : ExtensionDtype.type, Sequence[ExtensionDtype.type], or object value or values to be set of ``key``. Returns ------- None """ key = check_array_indexer(self, key) indices = self._indexing_key_to_indices(key) value = self._maybe_convert_setitem_value(value) argsort = np.argsort(indices) indices = indices[argsort] if is_scalar(value): value = np.broadcast_to(value, len(self)) elif len(indices) != len(value): raise ValueError("Length of indexer and values mismatch") else: value = np.asarray(value)[argsort] self._data = self._set_via_chunk_iteration(indices=indices, value=value)
def __setitem__(self, key, value): # type: (Union[int, np.ndarray], Any) -> None """Set one or more values inplace. Parameters ---------- key : int, ndarray, or slice When called from, e.g. ``Series.__setitem__``, ``key`` will be one of * scalar int * ndarray of integers. * boolean ndarray * slice object value : FletcherDtype.type, Sequence[FletcherDtype.type], or object value or values to be set of ``key``. Returns ------- None """ if PANDAS_GE_0_26_0: key = check_array_indexer(self, key) # Convert all possible input key types to an array of integers if is_bool_dtype(key): key = np.argwhere(key).flatten() elif isinstance(key, slice): key = np.array(range(len(self))[key]) elif is_integer(key): key = np.array([key]) else: key = np.asanyarray(key) if pd.api.types.is_scalar(value): value = np.broadcast_to(value, len(key)) else: value = np.asarray(value) if len(key) != len(value): raise ValueError("Length mismatch between index and value.") affected_chunks_index = self._get_chunk_indexer(key) affected_chunks_unique = np.unique(affected_chunks_index) all_chunks = list(self.data.iterchunks()) for ix, offset in zip(affected_chunks_unique, self.offsets[affected_chunks_unique]): chunk = all_chunks[ix] # Translate the array-wide indices to indices of the chunk key_chunk_indices = np.argwhere( affected_chunks_index == ix).flatten() array_chunk_indices = key[key_chunk_indices] - offset arr = chunk.to_pandas().values # In the case where we zero-copy Arrow to Pandas conversion, the # the resulting arrays are read-only. if not arr.flags.writeable: arr = arr.copy() arr[array_chunk_indices] = value[key_chunk_indices] mask = None # ARROW-2806: Inconsistent handling of np.nan requires adding a mask if (pa.types.is_integer(self.dtype.arrow_dtype) or pa.types.is_date(self.dtype.arrow_dtype) or pa.types.is_floating(self.dtype.arrow_dtype) or pa.types.is_boolean(self.dtype.arrow_dtype)): nan_values = pd.isna(value[key_chunk_indices]) if any(nan_values): nan_index = key_chunk_indices & nan_values mask = np.ones_like(arr, dtype=bool) mask[nan_index] = False pa_arr = pa.array(arr, self.dtype.arrow_dtype, mask=mask) all_chunks[ix] = pa_arr # data pointer has been changed ! self.data = pa.chunked_array(all_chunks)
def __setitem__(self, key: int | slice | np.ndarray, value: Any) -> None: """Set one or more values inplace. Parameters ---------- key : int, ndarray, or slice When called from, e.g. ``Series.__setitem__``, ``key`` will be one of * scalar int * ndarray of integers. * boolean ndarray * slice object value : ExtensionDtype.type, Sequence[ExtensionDtype.type], or object value or values to be set of ``key``. Returns ------- None """ key = check_array_indexer(self, key) if is_integer(key): key = cast(int, key) if not is_scalar(value): raise ValueError("Must pass scalars with scalar indexer") elif isna(value): value = None elif not isinstance(value, str): raise ValueError("Scalar must be NA or str") # Slice data and insert in-between new_data = [ *self._data[0:key].chunks, pa.array([value], type=pa.string()), *self._data[(key + 1):].chunks, ] self._data = pa.chunked_array(new_data) else: # Convert to integer indices and iteratively assign. # TODO: Make a faster variant of this in Arrow upstream. # This is probably extremely slow. # Convert all possible input key types to an array of integers if isinstance(key, slice): key_array = np.array(range(len(self))[key]) elif is_bool_dtype(key): # TODO(ARROW-9430): Directly support setitem(booleans) key_array = np.argwhere(key).flatten() else: # TODO(ARROW-9431): Directly support setitem(integers) key_array = np.asanyarray(key) if is_scalar(value): value = np.broadcast_to(value, len(key_array)) else: value = np.asarray(value) if len(key_array) != len(value): raise ValueError("Length of indexer and values mismatch") for k, v in zip(key_array, value): self[k] = v
def __setitem__(self, key, value) -> None: key = check_array_indexer(self, key) value = self._validate_setitem_value(value) self._ndarray[key] = value
def __getitem__(self, item: PositionalIndexer): """Select a subset of self. Parameters ---------- item : int, slice, or ndarray * int: The position in 'self' to get. * slice: A slice object, where 'start', 'stop', and 'step' are integers or None * ndarray: A 1-d boolean NumPy ndarray the same length as 'self' Returns ------- item : scalar or ExtensionArray Notes ----- For scalar ``item``, return a scalar value suitable for the array's type. This should be an instance of ``self.dtype.type``. For slice ``key``, return an instance of ``ExtensionArray``, even if the slice is length 0 or 1. For a boolean mask, return an instance of ``ExtensionArray``, filtered to the values where ``item`` is True. """ item = check_array_indexer(self, item) if isinstance(item, np.ndarray): if not len(item): # Removable once we migrate StringDtype[pyarrow] to ArrowDtype[string] if self._dtype.name == "string" and self._dtype.storage == "pyarrow": pa_dtype = pa.string() else: pa_dtype = self._dtype.pyarrow_dtype return type(self)(pa.chunked_array([], type=pa_dtype)) elif is_integer_dtype(item.dtype): return self.take(item) elif is_bool_dtype(item.dtype): return type(self)(self._data.filter(item)) else: raise IndexError("Only integers, slices and integer or " "boolean arrays are valid indices.") elif isinstance(item, tuple): item = unpack_tuple_and_ellipses(item) # error: Non-overlapping identity check (left operand type: # "Union[Union[int, integer[Any]], Union[slice, List[int], # ndarray[Any, Any]]]", right operand type: "ellipsis") if item is Ellipsis: # type: ignore[comparison-overlap] # TODO: should be handled by pyarrow? item = slice(None) if is_scalar(item) and not is_integer(item): # e.g. "foo" or 2.5 # exception message copied from numpy raise IndexError( r"only integers, slices (`:`), ellipsis (`...`), numpy.newaxis " r"(`None`) and integer or boolean arrays are valid indices") # We are not an array indexer, so maybe e.g. a slice or integer # indexer. We dispatch to pyarrow. value = self._data[item] if isinstance(value, pa.ChunkedArray): return type(self)(value) else: scalar = value.as_py() if scalar is None: return self._dtype.na_value else: return scalar
def __setitem__(self, key, value): value_left, value_right = self._validate_setitem_value(value) key = check_array_indexer(self, key) self._left[key] = value_left self._right[key] = value_right
def _validate_getitem_key(self, key): key = extract_array(key, extract_numpy=True) return check_array_indexer(self, key)
def _validate_setitem_key(self, key): return check_array_indexer(self, key)