def __sub__(self, other): other = lib.item_from_zerodim(other) if isinstance(other, (ABCSeries, ABCDataFrame)): return NotImplemented # scalar others elif other is NaT: result = self._sub_nat() elif isinstance(other, (Tick, timedelta, np.timedelta64)): result = self._add_delta(-other) elif isinstance(other, DateOffset): # specifically _not_ a Tick result = self._add_offset(-other) elif isinstance(other, (datetime, np.datetime64)): result = self._sub_datetimelike_scalar(other) elif lib.is_integer(other): # This check must come after the check for np.timedelta64 # as is_integer returns True for these maybe_integer_op_deprecated(self) result = self._time_shift(-other) elif isinstance(other, Period): result = self._sub_period(other) # array-like others elif is_timedelta64_dtype(other): # TimedeltaIndex, ndarray[timedelta64] result = self._add_delta(-other) elif is_offsetlike(other): # Array/Index of DateOffset objects result = self._addsub_offset_array(other, operator.sub) elif is_datetime64_dtype(other) or is_datetime64tz_dtype(other): # DatetimeIndex, ndarray[datetime64] result = self._sub_datetime_arraylike(other) elif is_period_dtype(other): # PeriodIndex result = self._sub_period_array(other) elif is_integer_dtype(other): maybe_integer_op_deprecated(self) result = self._addsub_int_array(other, operator.sub) elif isinstance(other, ABCIndexClass): raise TypeError("cannot subtract {cls} and {typ}" .format(cls=type(self).__name__, typ=type(other).__name__)) elif is_float_dtype(other): # Explicitly catch invalid dtypes raise TypeError("cannot subtract {dtype}-dtype from {cls}" .format(dtype=other.dtype, cls=type(self).__name__)) elif is_extension_array_dtype(other): # Categorical op will raise; defer explicitly return NotImplemented else: # pragma: no cover return NotImplemented if is_timedelta64_dtype(result) and isinstance(result, np.ndarray): from pandas.core.arrays import TimedeltaArrayMixin # TODO: infer freq? return TimedeltaArrayMixin(result) return result
def validate_periods(periods): """ If a `periods` argument is passed to the Datetime/Timedelta Array/Index constructor, cast it to an integer. Parameters ---------- periods : None, float, int Returns ------- periods : None or int Raises ------ TypeError if periods is None, float, or int """ if periods is not None: if lib.is_float(periods): periods = int(periods) elif not lib.is_integer(periods): raise TypeError('periods must be a number, got {periods}' .format(periods=periods)) return periods
def __new__(cls, values, freq=None, start=None, end=None, periods=None, closed=None): if (freq is not None and not isinstance(freq, DateOffset) and freq != 'infer'): freq = to_offset(freq) if periods is not None: if lib.is_float(periods): periods = int(periods) elif not lib.is_integer(periods): raise TypeError('`periods` must be a number, got {periods}' .format(periods=periods)) if values is None: if freq is None and com._any_none(periods, start, end): raise ValueError('Must provide freq argument if no data is ' 'supplied') else: return cls._generate(start, end, periods, freq, closed=closed) result = cls._simple_new(values, freq=freq) if freq == 'infer': inferred = result.inferred_freq if inferred: result._freq = to_offset(inferred) return result
def __add__(self, other): other = lib.item_from_zerodim(other) if isinstance(other, (ABCSeries, ABCDataFrame)): return NotImplemented # scalar others elif other is NaT: result = self._add_nat() elif isinstance(other, (Tick, timedelta, np.timedelta64)): result = self._add_delta(other) elif isinstance(other, DateOffset): # specifically _not_ a Tick result = self._add_offset(other) elif isinstance(other, (datetime, np.datetime64)): result = self._add_datetimelike_scalar(other) elif lib.is_integer(other): # This check must come after the check for np.timedelta64 # as is_integer returns True for these maybe_integer_op_deprecated(self) result = self._time_shift(other) # array-like others elif is_timedelta64_dtype(other): # TimedeltaIndex, ndarray[timedelta64] result = self._add_delta(other) elif is_offsetlike(other): # Array/Index of DateOffset objects result = self._addsub_offset_array(other, operator.add) elif is_datetime64_dtype(other) or is_datetime64tz_dtype(other): # DatetimeIndex, ndarray[datetime64] return self._add_datetime_arraylike(other) elif is_integer_dtype(other): maybe_integer_op_deprecated(self) result = self._addsub_int_array(other, operator.add) elif is_float_dtype(other): # Explicitly catch invalid dtypes raise TypeError("cannot add {dtype}-dtype to {cls}" .format(dtype=other.dtype, cls=type(self).__name__)) elif is_period_dtype(other): # if self is a TimedeltaArray and other is a PeriodArray with # a timedelta-like (i.e. Tick) freq, this operation is valid. # Defer to the PeriodArray implementation. # In remaining cases, this will end up raising TypeError. return NotImplemented elif is_extension_array_dtype(other): # Categorical op will raise; defer explicitly return NotImplemented else: # pragma: no cover return NotImplemented if is_timedelta64_dtype(result) and isinstance(result, np.ndarray): from pandas.core.arrays import TimedeltaArrayMixin # TODO: infer freq? return TimedeltaArrayMixin(result) return result
def __getitem__(self, key): """ This getitem defers to the underlying array, which by-definition can only handle list-likes, slices, and integer scalars """ is_int = lib.is_integer(key) if lib.is_scalar(key) and not is_int: raise IndexError("only integers, slices (`:`), ellipsis (`...`), " "numpy.newaxis (`None`) and integer or boolean " "arrays are valid indices") getitem = self._data.__getitem__ if is_int: val = getitem(key) return self._box_func(val) if com.is_bool_indexer(key): key = np.asarray(key, dtype=bool) if key.all(): key = slice(0, None, None) else: key = lib.maybe_booleans_to_slice(key.view(np.uint8)) attribs = self._get_attributes_dict() is_period = is_period_dtype(self) if is_period: freq = self.freq else: freq = None if isinstance(key, slice): if self.freq is not None and key.step is not None: freq = key.step * self.freq else: freq = self.freq elif key is Ellipsis: # GH#21282 indexing with Ellipsis is similar to a full slice, # should preserve `freq` attribute freq = self.freq attribs['freq'] = freq result = getitem(key) if result.ndim > 1: # To support MPL which performs slicing with 2 dim # even though it only has 1 dim by definition if is_period: return self._simple_new(result, **attribs) return result return self._simple_new(result, **attribs)
def validate_argsort_with_ascending(ascending, args, kwargs): """ If 'Categorical.argsort' is called via the 'numpy' library, the first parameter in its signature is 'axis', which takes either an integer or 'None', so check if the 'ascending' parameter has either integer type or is None, since 'ascending' itself should be a boolean """ if is_integer(ascending) or ascending is None: args = (ascending, ) + args ascending = True validate_argsort_kind(args, kwargs, max_fname_arg_count=3) return ascending
def _validate_setitem_value(self, value): """ Check if we have a scalar that we can cast losslessly. Raises ------ TypeError """ kind = self.dtype.kind # TODO: get this all from np_can_hold_element? if kind == "b": if lib.is_bool(value): return value elif kind == "f": if lib.is_integer(value) or lib.is_float(value): return value else: if lib.is_integer(value) or (lib.is_float(value) and value.is_integer()): return value # TODO: unsigned checks raise TypeError(f"Invalid value '{value}' for dtype {self.dtype}")
def __getitem__(self, key): if lib.is_integer(key): # fast-path result = self._ndarray[key] if self.ndim == 1: return self._box_func(result) return self._from_backing_data(result) key = self._validate_getitem_key(key) result = self._ndarray[key] if lib.is_scalar(result): return self._box_func(result) result = self._from_backing_data(result) return result
def validate_argsort_with_ascending(ascending, args, kwargs): """ If 'Categorical.argsort' is called via the 'numpy' library, the first parameter in its signature is 'axis', which takes either an integer or 'None', so check if the 'ascending' parameter has either integer type or is None, since 'ascending' itself should be a boolean """ if is_integer(ascending) or ascending is None: args = (ascending,) + args ascending = True validate_argsort_kind(args, kwargs, max_fname_arg_count=3) return ascending
def __getitem__(self, key): """ This getitem defers to the underlying array, which by-definition can only handle list-likes, slices, and integer scalars """ is_int = lib.is_integer(key) if lib.is_scalar(key) and not is_int: raise IndexError("only integers, slices (`:`), ellipsis (`...`), " "numpy.newaxis (`None`) and integer or boolean " "arrays are valid indices") getitem = self._data.__getitem__ if is_int: val = getitem(key) return self._box_func(val) if com.is_bool_indexer(key): key = np.asarray(key, dtype=bool) if key.all(): key = slice(0, None, None) else: key = lib.maybe_booleans_to_slice(key.view(np.uint8)) attribs = self._get_attributes_dict() is_period = is_period_dtype(self) if is_period: freq = self.freq else: freq = None if isinstance(key, slice): if self.freq is not None and key.step is not None: freq = key.step * self.freq else: freq = self.freq attribs['freq'] = freq result = getitem(key) if result.ndim > 1: # To support MPL which performs slicing with 2 dim # even though it only has 1 dim by definition if is_period: return self._simple_new(result, **attribs) return result return self._simple_new(result, **attribs)
def __getitem__(self, key): if lib.is_integer(key): # fast-path result = self._ndarray[key] if self.ndim == 1: return self._box_func(result) return self._from_backing_data(result) key = extract_array(key, extract_numpy=True) key = check_array_indexer(self, key) result = self._ndarray[key] if lib.is_scalar(result): return self._box_func(result) result = self._from_backing_data(result) return result
def weighted_qcut(values, weights, q, **kwargs): 'Return weighted quantile cuts from a given series, values.' if is_integer(q): quantiles = np.linspace(0, 1, q + 1) kwargs['labels'] = quantiles[:-1] else: quantiles = q order = weights.iloc[values.argsort()].cumsum() bins = pd.cut(order / order.iloc[-1], quantiles, **kwargs) if 'retbins' in kwargs: return bins[0].sort_index(), bins[1] else: return bins.sort_index()
def wrapper(self, other): is_self_int_dtype = is_integer_dtype(self.dtype) self, other = _align_method_SERIES(self, other, align_asobject=True) res_name = get_op_result_name(self, other) # TODO: shouldn't we be applying finalize whenever # not isinstance(other, ABCSeries)? finalizer = ( lambda x: x.__finalize__(self) if not isinstance(other, (ABCSeries, ABCIndexClass)) else x ) if isinstance(other, ABCDataFrame): # Defer to DataFrame implementation; fail early return NotImplemented other = lib.item_from_zerodim(other) if is_list_like(other) and not hasattr(other, "dtype"): # e.g. list, tuple other = construct_1d_object_array_from_listlike(other) lvalues = extract_array(self, extract_numpy=True) rvalues = extract_array(other, extract_numpy=True) if should_extension_dispatch(self, rvalues): res_values = dispatch_to_extension_op(op, lvalues, rvalues) else: if isinstance(rvalues, (ABCSeries, ABCIndexClass, np.ndarray)): is_other_int_dtype = is_integer_dtype(rvalues.dtype) rvalues = rvalues if is_other_int_dtype else fill_bool(rvalues, lvalues) else: # i.e. scalar is_other_int_dtype = lib.is_integer(rvalues) # For int vs int `^`, `|`, `&` are bitwise operators and return # integer dtypes. Otherwise these are boolean ops filler = fill_int if is_self_int_dtype and is_other_int_dtype else fill_bool res_values = na_op(lvalues, rvalues) res_values = filler(res_values) result = self._constructor(res_values, index=self.index, name=res_name) return finalizer(result)
def _maybe_convert_timedelta(self, other): """ Convert timedelta-like input to an integer multiple of self.freq Parameters ---------- other : timedelta, np.timedelta64, DateOffset, int, np.ndarray Returns ------- converted : int, np.ndarray[int64] Raises ------ IncompatibleFrequency : if the input cannot be written as a multiple of self.freq. Note IncompatibleFrequency subclasses ValueError. """ if isinstance( other, (timedelta, np.timedelta64, Tick, np.ndarray)): offset = frequencies.to_offset(self.freq.rule_code) if isinstance(offset, Tick): if isinstance(other, np.ndarray): nanos = np.vectorize(delta_to_nanoseconds)(other) else: nanos = delta_to_nanoseconds(other) offset_nanos = delta_to_nanoseconds(offset) check = np.all(nanos % offset_nanos == 0) if check: return nanos // offset_nanos elif isinstance(other, DateOffset): freqstr = other.rule_code base = frequencies.get_base_alias(freqstr) if base == self.freq.rule_code: return other.n msg = DIFFERENT_FREQ_INDEX.format(self.freqstr, other.freqstr) raise IncompatibleFrequency(msg) elif lib.is_integer(other): # integer is passed to .shift via # _add_datetimelike_methods basically # but ufunc may pass integer to _add_delta return other # raise when input doesn't have freq msg = "Input has different freq from {cls}(freq={freqstr})" raise IncompatibleFrequency(msg.format(cls=type(self).__name__, freqstr=self.freqstr))
def __getitem__( self: NDArrayBackedExtensionArrayT, key: Union[int, slice, np.ndarray] ) -> Union[NDArrayBackedExtensionArrayT, Any]: if lib.is_integer(key): # fast-path result = self._ndarray[key] if self.ndim == 1: return self._box_func(result) return self._from_backing_data(result) key = extract_array(key, extract_numpy=True) key = check_array_indexer(self, key) result = self._ndarray[key] if lib.is_scalar(result): return self._box_func(result) result = self._from_backing_data(result) return result
def __add__(self, other): other = lib.item_from_zerodim(other) if isinstance(other, (ABCSeries, ABCDataFrame)): return NotImplemented # scalar others elif other is NaT: result = self._add_nat() elif isinstance(other, (Tick, timedelta, np.timedelta64)): result = self._add_delta(other) elif isinstance(other, DateOffset): # specifically _not_ a Tick result = self._add_offset(other) elif isinstance(other, (datetime, np.datetime64)): result = self._add_datelike(other) elif lib.is_integer(other): # This check must come after the check for np.timedelta64 # as is_integer returns True for these result = self.shift(other) # array-like others elif is_timedelta64_dtype(other): # TimedeltaIndex, ndarray[timedelta64] result = self._add_delta(other) elif is_offsetlike(other): # Array/Index of DateOffset objects result = self._addsub_offset_array(other, operator.add) elif is_datetime64_dtype(other) or is_datetime64tz_dtype(other): # DatetimeIndex, ndarray[datetime64] return self._add_datelike(other) elif is_integer_dtype(other): result = self._addsub_int_array(other, operator.add) elif is_float_dtype(other) or is_period_dtype(other): # Explicitly catch invalid dtypes raise TypeError("cannot add {dtype}-dtype to {cls}" .format(dtype=other.dtype, cls=type(self).__name__)) elif is_extension_array_dtype(other): # Categorical op will raise; defer explicitly return NotImplemented else: # pragma: no cover return NotImplemented return result
def iset(self, loc: Union[int, slice, np.ndarray], value): """ Set new item in-place. Does not consolidate. Adds new Block if not contained in the current set of items """ if lib.is_integer(loc): # TODO normalize array -> this should in theory not be needed? value = extract_array(value, extract_numpy=True) if isinstance(value, np.ndarray) and value.ndim == 2: value = value[0, :] assert isinstance(value, (np.ndarray, ExtensionArray)) # value = np.asarray(value) # assert isinstance(value, np.ndarray) assert len(value) == len(self._axes[0]) self.arrays[loc] = value return # TODO raise Exception
def __getitem__( self: NDArrayBackedExtensionArrayT, key: PositionalIndexer2D, ) -> NDArrayBackedExtensionArrayT | Any: if lib.is_integer(key): # fast-path result = self._ndarray[key] if self.ndim == 1: return self._box_func(result) return self._from_backing_data(result) # error: Incompatible types in assignment (expression has type "ExtensionArray", # variable has type "Union[int, slice, ndarray]") key = extract_array(key, extract_numpy=True) # type: ignore[assignment] key = check_array_indexer(self, key) result = self._ndarray[key] if lib.is_scalar(result): return self._box_func(result) result = self._from_backing_data(result) return result
def __getitem__(self, key): """ Conserve RangeIndex type for scalar and slice keys. """ if is_scalar(key): if not lib.is_integer(key): raise IndexError("only integers, slices (`:`), " "ellipsis (`...`), numpy.newaxis (`None`) " "and integer or boolean " "arrays are valid indices") new_key = int(key) try: return self._range[new_key] except IndexError: raise IndexError("index {key} is out of bounds for axis 0 " "with size {size}".format(key=key, size=len(self))) if isinstance(key, slice): new_range = self._range[key] return self.from_range(new_range, name=self.name) # fall back to Int64Index return super().__getitem__(key)
def __getitem__( self: NDArrayBackedExtensionArrayT, key: int | slice | np.ndarray ) -> NDArrayBackedExtensionArrayT | Any: if lib.is_integer(key): # fast-path result = self._ndarray[key] if self.ndim == 1: return self._box_func(result) return self._from_backing_data(result) # error: Value of type variable "AnyArrayLike" of "extract_array" cannot be # "Union[int, slice, ndarray]" # error: Incompatible types in assignment (expression has type "ExtensionArray", # variable has type "Union[int, slice, ndarray]") key = extract_array( # type: ignore[type-var,assignment] key, extract_numpy=True) key = check_array_indexer(self, key) result = self._ndarray[key] if lib.is_scalar(result): return self._box_func(result) result = self._from_backing_data(result) return result
def __getitem__(self, key): """ Conserve RangeIndex type for scalar and slice keys. """ super_getitem = super(RangeIndex, self).__getitem__ if is_scalar(key): if not lib.is_integer(key): raise IndexError("only integers, slices (`:`), " "ellipsis (`...`), numpy.newaxis (`None`) " "and integer or boolean " "arrays are valid indices") n = com.cast_scalar_indexer(key) if n != key: return super_getitem(key) if n < 0: n = len(self) + key if n < 0 or n > len(self) - 1: raise IndexError("index {key} is out of bounds for axis 0 " "with size {size}".format(key=key, size=len(self))) return self._start + n * self._step if isinstance(key, slice): # This is basically PySlice_GetIndicesEx, but delegation to our # super routines if we don't have integers length = len(self) # complete missing slice information step = 1 if key.step is None else key.step if key.start is None: start = length - 1 if step < 0 else 0 else: start = key.start if start < 0: start += length if start < 0: start = -1 if step < 0 else 0 if start >= length: start = length - 1 if step < 0 else length if key.stop is None: stop = -1 if step < 0 else length else: stop = key.stop if stop < 0: stop += length if stop < 0: stop = -1 if stop > length: stop = length # delegate non-integer slices if (start != int(start) or stop != int(stop) or step != int(step)): return super_getitem(key) # convert indexes to values start = self._start + self._step * start stop = self._start + self._step * stop step = self._step * step return RangeIndex._simple_new(start, stop, step, name=self.name) # fall back to Int64Index return super_getitem(key)
def logical_op(left: Union[np.ndarray, ABCExtensionArray], right: Any, op) -> Union[np.ndarray, ABCExtensionArray]: """ Evaluate a logical operation `|`, `&`, or `^`. Parameters ---------- left : np.ndarray or ExtensionArray right : object Cannot be a DataFrame, Series, or Index. op : {operator.and_, operator.or_, operator.xor} Or one of the reversed variants from roperator. Returns ------- ndarrray or ExtensionArray """ fill_int = lambda x: x def fill_bool(x, left=None): # if `left` is specifically not-boolean, we do not cast to bool if x.dtype.kind in ["c", "f", "O"]: # dtypes that can hold NA mask = isna(x) if mask.any(): x = x.astype(object) x[mask] = False if left is None or is_bool_dtype(left.dtype): x = x.astype(bool) return x is_self_int_dtype = is_integer_dtype(left.dtype) right = lib.item_from_zerodim(right) if is_list_like(right) and not hasattr(right, "dtype"): # e.g. list, tuple right = construct_1d_object_array_from_listlike(right) # NB: We assume extract_array has already been called on left and right lvalues = left rvalues = right if should_extension_dispatch(lvalues, rvalues): res_values = dispatch_to_extension_op(op, lvalues, rvalues) else: if isinstance(rvalues, np.ndarray): is_other_int_dtype = is_integer_dtype(rvalues.dtype) rvalues = rvalues if is_other_int_dtype else fill_bool( rvalues, lvalues) else: # i.e. scalar is_other_int_dtype = lib.is_integer(rvalues) # For int vs int `^`, `|`, `&` are bitwise operators and return # integer dtypes. Otherwise these are boolean ops filler = fill_int if is_self_int_dtype and is_other_int_dtype else fill_bool res_values = na_logical_op(lvalues, rvalues, op) res_values = filler(res_values) # type: ignore return res_values
def __sub__(self, other): other = lib.item_from_zerodim(other) if isinstance(other, (ABCSeries, ABCDataFrame)): return NotImplemented # scalar others elif other is NaT: result = self._sub_nat() elif isinstance(other, (Tick, timedelta, np.timedelta64)): result = self._add_delta(-other) elif isinstance(other, DateOffset): # specifically _not_ a Tick result = self._add_offset(-other) elif isinstance(other, (datetime, np.datetime64)): result = self._sub_datetimelike_scalar(other) elif lib.is_integer(other): # This check must come after the check for np.timedelta64 # as is_integer returns True for these if not is_period_dtype(self): maybe_integer_op_deprecated(self) result = self._time_shift(-other) elif isinstance(other, Period): result = self._sub_period(other) # array-like others elif is_timedelta64_dtype(other): # TimedeltaIndex, ndarray[timedelta64] result = self._add_delta(-other) elif is_offsetlike(other): # Array/Index of DateOffset objects result = self._addsub_offset_array(other, operator.sub) elif is_datetime64_dtype(other) or is_datetime64tz_dtype(other): # DatetimeIndex, ndarray[datetime64] result = self._sub_datetime_arraylike(other) elif is_period_dtype(other): # PeriodIndex result = self._sub_period_array(other) elif is_integer_dtype(other): if not is_period_dtype(self): maybe_integer_op_deprecated(self) result = self._addsub_int_array(other, operator.sub) elif isinstance(other, ABCIndexClass): raise TypeError("cannot subtract {cls} and {typ}" .format(cls=type(self).__name__, typ=type(other).__name__)) elif is_float_dtype(other): # Explicitly catch invalid dtypes raise TypeError("cannot subtract {dtype}-dtype from {cls}" .format(dtype=other.dtype, cls=type(self).__name__)) elif is_extension_array_dtype(other): # Categorical op will raise; defer explicitly return NotImplemented else: # pragma: no cover return NotImplemented if is_timedelta64_dtype(result) and isinstance(result, np.ndarray): from pandas.core.arrays import TimedeltaArrayMixin # TODO: infer freq? return TimedeltaArrayMixin(result) return result