def _aggregate_series_pure_python( self, obj: Series, func: F, *args, engine: str = "cython", engine_kwargs=None, **kwargs, ): if maybe_use_numba(engine): numba_func, cache_key = generate_numba_func( func, engine_kwargs, kwargs, "groupby_agg") group_index, _, ngroups = self.group_info counts = np.zeros(ngroups, dtype=int) result = None splitter = get_splitter(obj, group_index, ngroups, axis=0) for label, group in splitter: if maybe_use_numba(engine): values, index = split_for_numba(group) res = numba_func(values, index, *args) if cache_key not in NUMBA_FUNC_CACHE: NUMBA_FUNC_CACHE[cache_key] = numba_func else: res = func(group, *args, **kwargs) if result is None: if isinstance(res, (Series, Index, np.ndarray)): if len(res) == 1: # e.g. test_agg_lambda_with_timezone lambda e: e.head(1) # FIXME: are we potentially losing important res.index info? res = res.item() else: raise ValueError("Function does not reduce") result = np.empty(ngroups, dtype="O") counts[label] = group.shape[0] result[label] = res assert result is not None result = lib.maybe_convert_objects(result, try_float=0) # TODO: maybe_cast_to_extension_array? return result, counts
def mean(self, *args, engine=None, engine_kwargs=None, **kwargs): if maybe_use_numba(engine): if self.method == "single": ewma_func = generate_numba_ewma_func(engine_kwargs, self._com, self.adjust, self.ignore_na, self._deltas) numba_cache_key = (lambda x: x, "ewma") else: ewma_func = generate_ewma_numba_table_func( engine_kwargs, self._com, self.adjust, self.ignore_na, self._deltas) numba_cache_key = (lambda x: x, "ewma_table") return self._apply( ewma_func, numba_cache_key=numba_cache_key, ) elif engine in ("cython", None): if engine_kwargs is not None: raise ValueError("cython engine does not accept engine_kwargs") nv.validate_window_func("mean", args, kwargs) window_func = partial( window_aggregations.ewma, com=self._com, adjust=self.adjust, ignore_na=self.ignore_na, deltas=self._deltas, ) return self._apply(window_func) else: raise ValueError("engine must be either 'numba' or 'cython'")
def sum(self, *args, engine=None, engine_kwargs=None, **kwargs): if not self.adjust: raise NotImplementedError( "sum is not implemented with adjust=False") if maybe_use_numba(engine): if self.method == "single": func = generate_numba_ewm_func else: func = generate_numba_ewm_table_func ewm_func = func( **get_jit_arguments(engine_kwargs), com=self._com, adjust=self.adjust, ignore_na=self.ignore_na, deltas=tuple(self._deltas), normalize=False, ) return self._apply(ewm_func) elif engine in ("cython", None): if engine_kwargs is not None: raise ValueError("cython engine does not accept engine_kwargs") nv.validate_window_func("sum", args, kwargs) deltas = None if self.times is None else self._deltas window_func = partial( window_aggregations.ewm, com=self._com, adjust=self.adjust, ignore_na=self.ignore_na, deltas=deltas, normalize=False, ) return self._apply(window_func) else: raise ValueError("engine must be either 'numba' or 'cython'")
def mean(self, engine=None, engine_kwargs=None): """ Parameters ---------- engine : str, default None * ``'cython'`` : Runs mean through C-extensions from cython. * ``'numba'`` : Runs mean through JIT compiled code from numba. Only available when ``raw`` is set to ``True``. * ``None`` : Defaults to ``'cython'`` or globally setting ``compute.use_numba`` .. versionadded:: 1.2.0 engine_kwargs : dict, default None * For ``'cython'`` engine, there are no accepted ``engine_kwargs`` * For ``'numba'`` engine, the engine can accept ``nopython``, ``nogil`` and ``parallel`` dictionary keys. The values must either be ``True`` or ``False``. The default ``engine_kwargs`` for the ``'numba'`` engine is ``{'nopython': True, 'nogil': False, 'parallel': False}``. .. versionadded:: 1.2.0 Returns ------- Series or DataFrame Return type is determined by the caller. """ if maybe_use_numba(engine): groupby_ewma_func = generate_numba_groupby_ewma_func( engine_kwargs, self.com, self.adjust, self.ignore_na, ) return self._apply( groupby_ewma_func, numba_cache_key=(lambda x: x, "groupby_ewma"), ) elif engine in ("cython", None): if engine_kwargs is not None: raise ValueError("cython engine does not accept engine_kwargs") def f(x): x = self._shallow_copy(x, groupby=self._groupby) return x.mean() return self._groupby.apply(f) else: raise ValueError("engine must be either 'numba' or 'cython'")
def agg_series( self, obj: Series, func: F, *args, engine: str = "cython", engine_kwargs=None, **kwargs, ): # Caller is responsible for checking ngroups != 0 assert self.ngroups != 0 if maybe_use_numba(engine): return self._aggregate_series_pure_python( obj, func, *args, engine=engine, engine_kwargs=engine_kwargs, **kwargs) if len(obj) == 0: # SeriesGrouper would raise if we were to call _aggregate_series_fast return self._aggregate_series_pure_python(obj, func) elif is_extension_array_dtype(obj.dtype): # _aggregate_series_fast would raise TypeError when # calling libreduction.Slider # In the datetime64tz case it would incorrectly cast to tz-naive # TODO: can we get a performant workaround for EAs backed by ndarray? return self._aggregate_series_pure_python(obj, func) elif obj.index._has_complex_internals: # Preempt TypeError in _aggregate_series_fast return self._aggregate_series_pure_python(obj, func) try: return self._aggregate_series_fast(obj, func) except ValueError as err: if "Function does not reduce" in str(err): # raised in libreduction pass else: raise return self._aggregate_series_pure_python(obj, func)
def __init__( self, obj: NDFrame, com: float | None = None, span: float | None = None, halflife: float | TimedeltaConvertibleTypes | None = None, alpha: float | None = None, min_periods: int | None = 0, adjust: bool = True, ignore_na: bool = False, axis: Axis = 0, times: str | np.ndarray | NDFrame | None = None, engine: str = "numba", engine_kwargs: dict[str, bool] | None = None, *, selection=None, ): if times is not None: raise NotImplementedError( "times is not implemented with online operations." ) super().__init__( obj=obj, com=com, span=span, halflife=halflife, alpha=alpha, min_periods=min_periods, adjust=adjust, ignore_na=ignore_na, axis=axis, times=times, selection=selection, ) self._mean = EWMMeanState( self._com, self.adjust, self.ignore_na, self.axis, obj.shape ) if maybe_use_numba(engine): self.engine = engine self.engine_kwargs = engine_kwargs else: raise ValueError("'numba' is the only supported engine")
def mean( self, numeric_only: bool = False, *args, engine=None, engine_kwargs=None, **kwargs, ): maybe_warn_args_and_kwargs(type(self), "mean", args, kwargs) if maybe_use_numba(engine): if self.method == "single": func = generate_numba_ewm_func else: func = generate_numba_ewm_table_func ewm_func = func( **get_jit_arguments(engine_kwargs), com=self._com, adjust=self.adjust, ignore_na=self.ignore_na, deltas=tuple(self._deltas), normalize=True, ) return self._apply(ewm_func, name="mean") elif engine in ("cython", None): if engine_kwargs is not None: raise ValueError("cython engine does not accept engine_kwargs") nv.validate_window_func("mean", args, kwargs) deltas = None if self.times is None else self._deltas window_func = partial( window_aggregations.ewm, com=self._com, adjust=self.adjust, ignore_na=self.ignore_na, deltas=deltas, normalize=True, ) return self._apply(window_func, name="mean", numeric_only=numeric_only) else: raise ValueError("engine must be either 'numba' or 'cython'")