def _aggregate_series_pure_python(self, obj: Series, func: F): group_index, _, ngroups = self.group_info counts = np.zeros(ngroups, dtype=int) result = np.empty(ngroups, dtype="O") initialized = False splitter = get_splitter(obj, group_index, ngroups, axis=0) for label, group in enumerate(splitter): # Each step of this loop corresponds to # libreduction._BaseGrouper._apply_to_group res = func(group) res = libreduction.extract_result(res) if not initialized: # We only do this validation on the first iteration libreduction.check_result_array(res, 0) initialized = True counts[label] = group.shape[0] result[label] = res out = lib.maybe_convert_objects(result, try_float=False) out = maybe_cast_pointwise_result(out, obj.dtype, numeric_only=True) return out, counts
def agg_series(self, obj: Series, func: F) -> ArrayLike: # Caller is responsible for checking ngroups != 0 assert self.ngroups != 0 cast_back = True if len(obj) == 0: # SeriesGrouper would raise if we were to call _aggregate_series_fast result = self._aggregate_series_pure_python(obj, func) elif not isinstance(obj._values, np.ndarray): # _aggregate_series_fast would raise TypeError when # calling libreduction.Slider # In the datetime64tz case it would incorrectly cast to tz-naive # TODO: can we get a performant workaround for EAs backed by ndarray? result = self._aggregate_series_pure_python(obj, func) elif obj.index._has_complex_internals: # Preempt TypeError in _aggregate_series_fast result = self._aggregate_series_pure_python(obj, func) else: result = self._aggregate_series_fast(obj, func) cast_back = False npvalues = lib.maybe_convert_objects(result, try_float=False) if cast_back: # TODO: Is there a documented reason why we dont always cast_back? out = maybe_cast_pointwise_result(npvalues, obj.dtype, numeric_only=True) else: out = npvalues return out
def agg_series( self, obj: Series, func: F, preserve_dtype: bool = False ) -> ArrayLike: """ Parameters ---------- obj : Series func : function taking a Series and returning a scalar-like preserve_dtype : bool Whether the aggregation is known to be dtype-preserving. Returns ------- np.ndarray or ExtensionArray """ # test_groupby_empty_with_category gets here with self.ngroups == 0 # and len(obj) > 0 if len(obj) == 0: # SeriesGrouper would raise if we were to call _aggregate_series_fast result = self._aggregate_series_pure_python(obj, func) elif not isinstance(obj._values, np.ndarray): # _aggregate_series_fast would raise TypeError when # calling libreduction.Slider # In the datetime64tz case it would incorrectly cast to tz-naive # TODO: can we get a performant workaround for EAs backed by ndarray? result = self._aggregate_series_pure_python(obj, func) # we can preserve a little bit more aggressively with EA dtype # because maybe_cast_pointwise_result will do a try/except # with _from_sequence. NB we are assuming here that _from_sequence # is sufficiently strict that it casts appropriately. preserve_dtype = True elif obj.index._has_complex_internals: # Preempt TypeError in _aggregate_series_fast result = self._aggregate_series_pure_python(obj, func) elif isinstance(self, BinGrouper): # Not yet able to remove the BaseGrouper aggregate_series_fast, # as test_crosstab.test_categorical breaks without it result = self._aggregate_series_pure_python(obj, func) else: result = self._aggregate_series_fast(obj, func) npvalues = lib.maybe_convert_objects(result, try_float=False) if preserve_dtype: out = maybe_cast_pointwise_result(npvalues, obj.dtype, numeric_only=True) else: out = npvalues return out
def agg_series(self, obj: Series, func: F, preserve_dtype: bool = False) -> ArrayLike: """ Parameters ---------- obj : Series func : function taking a Series and returning a scalar-like preserve_dtype : bool Whether the aggregation is known to be dtype-preserving. Returns ------- np.ndarray or ExtensionArray """ # test_groupby_empty_with_category gets here with self.ngroups == 0 # and len(obj) > 0 if len(obj) == 0: # SeriesGrouper would raise if we were to call _aggregate_series_fast result = self._aggregate_series_pure_python(obj, func) elif not isinstance(obj._values, np.ndarray): result = self._aggregate_series_pure_python(obj, func) # we can preserve a little bit more aggressively with EA dtype # because maybe_cast_pointwise_result will do a try/except # with _from_sequence. NB we are assuming here that _from_sequence # is sufficiently strict that it casts appropriately. preserve_dtype = True else: result = self._aggregate_series_pure_python(obj, func) npvalues = lib.maybe_convert_objects(result, try_float=False) if preserve_dtype: out = maybe_cast_pointwise_result(npvalues, obj.dtype, numeric_only=True) else: out = npvalues return out