class ExponentialMovingWindow(BaseWindow): r""" Provide exponential weighted (EW) functions. Available EW functions: ``mean()``, ``var()``, ``std()``, ``corr()``, ``cov()``. Exactly one parameter: ``com``, ``span``, ``halflife``, or ``alpha`` must be provided. Parameters ---------- com : float, optional Specify decay in terms of center of mass, :math:`\alpha = 1 / (1 + com)`, for :math:`com \geq 0`. span : float, optional Specify decay in terms of span, :math:`\alpha = 2 / (span + 1)`, for :math:`span \geq 1`. halflife : float, str, timedelta, optional Specify decay in terms of half-life, :math:`\alpha = 1 - \exp\left(-\ln(2) / halflife\right)`, for :math:`halflife > 0`. If ``times`` is specified, the time unit (str or timedelta) over which an observation decays to half its value. Only applicable to ``mean()`` and halflife value will not apply to the other functions. .. versionadded:: 1.1.0 alpha : float, optional Specify smoothing factor :math:`\alpha` directly, :math:`0 < \alpha \leq 1`. min_periods : int, default 0 Minimum number of observations in window required to have a value (otherwise result is NA). adjust : bool, default True Divide by decaying adjustment factor in beginning periods to account for imbalance in relative weightings (viewing EWMA as a moving average). - When ``adjust=True`` (default), the EW function is calculated using weights :math:`w_i = (1 - \alpha)^i`. For example, the EW moving average of the series [:math:`x_0, x_1, ..., x_t`] would be: .. math:: y_t = \frac{x_t + (1 - \alpha)x_{t-1} + (1 - \alpha)^2 x_{t-2} + ... + (1 - \alpha)^t x_0}{1 + (1 - \alpha) + (1 - \alpha)^2 + ... + (1 - \alpha)^t} - When ``adjust=False``, the exponentially weighted function is calculated recursively: .. math:: \begin{split} y_0 &= x_0\\ y_t &= (1 - \alpha) y_{t-1} + \alpha x_t, \end{split} ignore_na : bool, default False Ignore missing values when calculating weights; specify ``True`` to reproduce pre-0.15.0 behavior. - When ``ignore_na=False`` (default), weights are based on absolute positions. For example, the weights of :math:`x_0` and :math:`x_2` used in calculating the final weighted average of [:math:`x_0`, None, :math:`x_2`] are :math:`(1-\alpha)^2` and :math:`1` if ``adjust=True``, and :math:`(1-\alpha)^2` and :math:`\alpha` if ``adjust=False``. - When ``ignore_na=True`` (reproducing pre-0.15.0 behavior), weights are based on relative positions. For example, the weights of :math:`x_0` and :math:`x_2` used in calculating the final weighted average of [:math:`x_0`, None, :math:`x_2`] are :math:`1-\alpha` and :math:`1` if ``adjust=True``, and :math:`1-\alpha` and :math:`\alpha` if ``adjust=False``. axis : {0, 1}, default 0 The axis to use. The value 0 identifies the rows, and 1 identifies the columns. times : str, np.ndarray, Series, default None .. versionadded:: 1.1.0 Times corresponding to the observations. Must be monotonically increasing and ``datetime64[ns]`` dtype. If str, the name of the column in the DataFrame representing the times. If 1-D array like, a sequence with the same shape as the observations. Only applicable to ``mean()``. method : str {'single', 'table'}, default 'single' Execute the rolling operation per single column or row (``'single'``) or over the entire object (``'table'``). This argument is only implemented when specifying ``engine='numba'`` in the method call. Only applicable to ``mean()`` .. versionadded:: 1.4.0 Returns ------- DataFrame A Window sub-classed for the particular operation. See Also -------- rolling : Provides rolling window calculations. expanding : Provides expanding transformations. Notes ----- More details can be found at: :ref:`Exponentially weighted windows <window.exponentially_weighted>`. Examples -------- >>> df = pd.DataFrame({'B': [0, 1, 2, np.nan, 4]}) >>> df B 0 0.0 1 1.0 2 2.0 3 NaN 4 4.0 >>> df.ewm(com=0.5).mean() B 0 0.000000 1 0.750000 2 1.615385 3 1.615385 4 3.670213 Specifying ``times`` with a timedelta ``halflife`` when computing mean. >>> times = ['2020-01-01', '2020-01-03', '2020-01-10', '2020-01-15', '2020-01-17'] >>> df.ewm(halflife='4 days', times=pd.DatetimeIndex(times)).mean() B 0 0.000000 1 0.585786 2 1.523889 3 1.523889 4 3.233686 """ _attributes = [ "com", "span", "halflife", "alpha", "min_periods", "adjust", "ignore_na", "axis", "times", "method", ] def __init__( self, obj: FrameOrSeries, com: float | None = None, span: float | None = None, halflife: float | TimedeltaConvertibleTypes | None = None, alpha: float | None = None, min_periods: int | None = 0, adjust: bool = True, ignore_na: bool = False, axis: Axis = 0, times: str | np.ndarray | FrameOrSeries | None = None, method: str = "single", *, selection=None, ): super().__init__( obj=obj, min_periods=1 if min_periods is None else max(int(min_periods), 1), on=None, center=False, closed=None, method=method, axis=axis, selection=selection, ) self.com = com self.span = span self.halflife = halflife self.alpha = alpha self.adjust = adjust self.ignore_na = ignore_na self.times = times if self.times is not None: if not self.adjust: raise NotImplementedError( "times is not supported with adjust=False.") if isinstance(self.times, str): self.times = self._selected_obj[self.times] if not is_datetime64_ns_dtype(self.times): raise ValueError("times must be datetime64[ns] dtype.") # error: Argument 1 to "len" has incompatible type "Union[str, ndarray, # FrameOrSeries, None]"; expected "Sized" if len(self.times) != len(obj): # type: ignore[arg-type] raise ValueError( "times must be the same length as the object.") if not isinstance(self.halflife, (str, datetime.timedelta)): raise ValueError( "halflife must be a string or datetime.timedelta object") if isna(self.times).any(): raise ValueError("Cannot convert NaT values to integer") self._deltas = _calculate_deltas(self.times, self.halflife) # Halflife is no longer applicable when calculating COM # But allow COM to still be calculated if the user passes other decay args if common.count_not_none(self.com, self.span, self.alpha) > 0: self._com = get_center_of_mass(self.com, self.span, None, self.alpha) else: self._com = 1.0 else: if self.halflife is not None and isinstance( self.halflife, (str, datetime.timedelta)): raise ValueError( "halflife can only be a timedelta convertible argument if " "times is not None.") # Without times, points are equally spaced self._deltas = np.ones(max(len(self.obj) - 1, 0), dtype=np.float64) self._com = get_center_of_mass( # error: Argument 3 to "get_center_of_mass" has incompatible type # "Union[float, Any, None, timedelta64, signedinteger[_64Bit]]"; # expected "Optional[float]" self.com, self.span, self.halflife, # type: ignore[arg-type] self.alpha, ) def _get_window_indexer(self) -> BaseIndexer: """ Return an indexer class that will compute the window start and end bounds """ return ExponentialMovingWindowIndexer() def online(self, engine="numba", engine_kwargs=None): """ Return an ``OnlineExponentialMovingWindow`` object to calculate exponentially moving window aggregations in an online method. .. versionadded:: 1.3.0 Parameters ---------- engine: str, default ``'numba'`` Execution engine to calculate online aggregations. Applies to all supported aggregation methods. engine_kwargs : dict, default None Applies to all supported aggregation methods. * For ``'numba'`` engine, the engine can accept ``nopython``, ``nogil`` and ``parallel`` dictionary keys. The values must either be ``True`` or ``False``. The default ``engine_kwargs`` for the ``'numba'`` engine is ``{{'nopython': True, 'nogil': False, 'parallel': False}}`` and will be applied to the function Returns ------- OnlineExponentialMovingWindow """ return OnlineExponentialMovingWindow( obj=self.obj, com=self.com, span=self.span, halflife=self.halflife, alpha=self.alpha, min_periods=self.min_periods, adjust=self.adjust, ignore_na=self.ignore_na, axis=self.axis, times=self.times, engine=engine, engine_kwargs=engine_kwargs, selection=self._selection, ) @doc( _shared_docs["aggregate"], see_also=dedent(""" See Also -------- pandas.DataFrame.rolling.aggregate """), examples=dedent(""" Examples -------- >>> df = pd.DataFrame({"A": [1, 2, 3], "B": [4, 5, 6], "C": [7, 8, 9]}) >>> df A B C 0 1 4 7 1 2 5 8 2 3 6 9 >>> df.ewm(alpha=0.5).mean() A B C 0 1.000000 4.000000 7.000000 1 1.666667 4.666667 7.666667 2 2.428571 5.428571 8.428571 """), klass="Series/Dataframe", axis="", ) def aggregate(self, func, *args, **kwargs): return super().aggregate(func, *args, **kwargs) agg = aggregate @doc( template_header, create_section_header("Parameters"), args_compat, window_agg_numba_parameters, kwargs_compat, create_section_header("Returns"), template_returns, create_section_header("See Also"), template_see_also, create_section_header("Notes"), numba_notes.replace("\n", "", 1), window_method="ewm", aggregation_description="(exponential weighted moment) mean", agg_method="mean", ) def mean(self, *args, engine=None, engine_kwargs=None, **kwargs): if maybe_use_numba(engine): if self.method == "single": ewma_func = generate_numba_ewma_func(engine_kwargs, self._com, self.adjust, self.ignore_na, self._deltas) numba_cache_key = (lambda x: x, "ewma") else: ewma_func = generate_ewma_numba_table_func( engine_kwargs, self._com, self.adjust, self.ignore_na, self._deltas) numba_cache_key = (lambda x: x, "ewma_table") return self._apply( ewma_func, numba_cache_key=numba_cache_key, ) elif engine in ("cython", None): if engine_kwargs is not None: raise ValueError("cython engine does not accept engine_kwargs") nv.validate_window_func("mean", args, kwargs) window_func = partial( window_aggregations.ewma, com=self._com, adjust=self.adjust, ignore_na=self.ignore_na, deltas=self._deltas, ) return self._apply(window_func) else: raise ValueError("engine must be either 'numba' or 'cython'") @doc( template_header, create_section_header("Parameters"), dedent(""" bias : bool, default False Use a standard estimation bias correction. """).replace("\n", "", 1), args_compat, kwargs_compat, create_section_header("Returns"), template_returns, create_section_header("See Also"), template_see_also[:-1], window_method="ewm", aggregation_description= "(exponential weighted moment) standard deviation", agg_method="std", ) def std(self, bias: bool = False, *args, **kwargs): nv.validate_window_func("std", args, kwargs) return zsqrt(self.var(bias=bias, **kwargs)) def vol(self, bias: bool = False, *args, **kwargs): warnings.warn( ("vol is deprecated will be removed in a future version. " "Use std instead."), FutureWarning, stacklevel=2, ) return self.std(bias, *args, **kwargs) @doc( template_header, create_section_header("Parameters"), dedent(""" bias : bool, default False Use a standard estimation bias correction. """).replace("\n", "", 1), args_compat, kwargs_compat, create_section_header("Returns"), template_returns, create_section_header("See Also"), template_see_also[:-1], window_method="ewm", aggregation_description="(exponential weighted moment) variance", agg_method="var", ) def var(self, bias: bool = False, *args, **kwargs): nv.validate_window_func("var", args, kwargs) window_func = window_aggregations.ewmcov wfunc = partial( window_func, com=self._com, adjust=self.adjust, ignore_na=self.ignore_na, bias=bias, ) def var_func(values, begin, end, min_periods): return wfunc(values, begin, end, min_periods, values) return self._apply(var_func) @doc( template_header, create_section_header("Parameters"), dedent(""" other : Series or DataFrame , optional If not supplied then will default to self and produce pairwise output. pairwise : bool, default None If False then only matching columns between self and other will be used and the output will be a DataFrame. If True then all pairwise combinations will be calculated and the output will be a MultiIndex DataFrame in the case of DataFrame inputs. In the case of missing elements, only complete pairwise observations will be used. bias : bool, default False Use a standard estimation bias correction. """).replace("\n", "", 1), kwargs_compat, create_section_header("Returns"), template_returns, create_section_header("See Also"), template_see_also[:-1], window_method="ewm", aggregation_description= "(exponential weighted moment) sample covariance", agg_method="cov", ) def cov( self, other: DataFrame | Series | None = None, pairwise: bool | None = None, bias: bool = False, **kwargs, ): from pandas import Series def cov_func(x, y): x_array = self._prep_values(x) y_array = self._prep_values(y) window_indexer = self._get_window_indexer() min_periods = (self.min_periods if self.min_periods is not None else window_indexer.window_size) start, end = window_indexer.get_window_bounds( num_values=len(x_array), min_periods=min_periods, center=self.center, closed=self.closed, ) result = window_aggregations.ewmcov( x_array, start, end, # error: Argument 4 to "ewmcov" has incompatible type # "Optional[int]"; expected "int" self.min_periods, # type: ignore[arg-type] y_array, self._com, self.adjust, self.ignore_na, bias, ) return Series(result, index=x.index, name=x.name) return self._apply_pairwise(self._selected_obj, other, pairwise, cov_func) @doc( template_header, create_section_header("Parameters"), dedent(""" other : Series or DataFrame, optional If not supplied then will default to self and produce pairwise output. pairwise : bool, default None If False then only matching columns between self and other will be used and the output will be a DataFrame. If True then all pairwise combinations will be calculated and the output will be a MultiIndex DataFrame in the case of DataFrame inputs. In the case of missing elements, only complete pairwise observations will be used. """).replace("\n", "", 1), kwargs_compat, create_section_header("Returns"), template_returns, create_section_header("See Also"), template_see_also[:-1], window_method="ewm", aggregation_description= "(exponential weighted moment) sample correlation", agg_method="corr", ) def corr( self, other: DataFrame | Series | None = None, pairwise: bool | None = None, **kwargs, ): from pandas import Series def cov_func(x, y): x_array = self._prep_values(x) y_array = self._prep_values(y) window_indexer = self._get_window_indexer() min_periods = (self.min_periods if self.min_periods is not None else window_indexer.window_size) start, end = window_indexer.get_window_bounds( num_values=len(x_array), min_periods=min_periods, center=self.center, closed=self.closed, ) def _cov(X, Y): return window_aggregations.ewmcov( X, start, end, min_periods, Y, self._com, self.adjust, self.ignore_na, True, ) with np.errstate(all="ignore"): cov = _cov(x_array, y_array) x_var = _cov(x_array, x_array) y_var = _cov(y_array, y_array) result = cov / zsqrt(x_var * y_var) return Series(result, index=x.index, name=x.name) return self._apply_pairwise(self._selected_obj, other, pairwise, cov_func)
class Expanding(RollingAndExpandingMixin): """ Provide expanding transformations. Parameters ---------- min_periods : int, default 1 Minimum number of observations in window required to have a value (otherwise result is NA). center : bool, default False Set the labels at the center of the window. axis : int or str, default 0 method : str {'single', 'table'}, default 'single' Execute the rolling operation per single column or row (``'single'``) or over the entire object (``'table'``). This argument is only implemented when specifying ``engine='numba'`` in the method call. .. versionadded:: 1.3.0 Returns ------- a Window sub-classed for the particular operation See Also -------- rolling : Provides rolling window calculations. ewm : Provides exponential weighted functions. Notes ----- By default, the result is set to the right edge of the window. This can be changed to the center of the window by setting ``center=True``. Examples -------- >>> df = pd.DataFrame({"B": [0, 1, 2, np.nan, 4]}) >>> df B 0 0.0 1 1.0 2 2.0 3 NaN 4 4.0 >>> df.expanding(2).sum() B 0 NaN 1 1.0 2 3.0 3 3.0 4 7.0 """ _attributes = ["min_periods", "center", "axis", "method"] def __init__( self, obj: FrameOrSeries, min_periods: int = 1, center=None, axis: Axis = 0, method: str = "single", selection=None, ): super().__init__( obj=obj, min_periods=min_periods, center=center, axis=axis, method=method, selection=selection, ) def _get_window_indexer(self) -> BaseIndexer: """ Return an indexer class that will compute the window start and end bounds """ return ExpandingIndexer() @doc( _shared_docs["aggregate"], see_also=dedent(""" See Also -------- pandas.DataFrame.aggregate : Similar DataFrame method. pandas.Series.aggregate : Similar Series method. """), examples=dedent(""" Examples -------- >>> df = pd.DataFrame({"A": [1, 2, 3], "B": [4, 5, 6], "C": [7, 8, 9]}) >>> df A B C 0 1 4 7 1 2 5 8 2 3 6 9 >>> df.ewm(alpha=0.5).mean() A B C 0 1.000000 4.000000 7.000000 1 1.666667 4.666667 7.666667 2 2.428571 5.428571 8.428571 """), klass="Series/Dataframe", axis="", ) def aggregate(self, func, *args, **kwargs): return super().aggregate(func, *args, **kwargs) agg = aggregate @doc( template_header, create_section_header("Returns"), template_returns, create_section_header("See Also"), template_see_also[:-1], window_method="expanding", aggregation_description="count of non NaN observations", agg_method="count", ) def count(self): return super().count() @doc( template_header, create_section_header("Parameters"), window_apply_parameters, create_section_header("Returns"), template_returns, create_section_header("See Also"), template_see_also[:-1], window_method="expanding", aggregation_description="custom aggregation function", agg_method="apply", ) def apply( self, func: Callable[..., Any], raw: bool = False, engine: str | None = None, engine_kwargs: dict[str, bool] | None = None, args: tuple[Any, ...] | None = None, kwargs: dict[str, Any] | None = None, ): return super().apply( func, raw=raw, engine=engine, engine_kwargs=engine_kwargs, args=args, kwargs=kwargs, ) @doc( template_header, create_section_header("Parameters"), args_compat, window_agg_numba_parameters, kwargs_compat, create_section_header("Returns"), template_returns, create_section_header("See Also"), template_see_also, create_section_header("Notes"), numba_notes[:-1], window_method="expanding", aggregation_description="sum", agg_method="sum", ) def sum( self, *args, engine: str | None = None, engine_kwargs: dict[str, bool] | None = None, **kwargs, ): nv.validate_expanding_func("sum", args, kwargs) return super().sum(*args, engine=engine, engine_kwargs=engine_kwargs, **kwargs) @doc( template_header, create_section_header("Parameters"), args_compat, window_agg_numba_parameters, kwargs_compat, create_section_header("Returns"), template_returns, create_section_header("See Also"), template_see_also, create_section_header("Notes"), numba_notes[:-1], window_method="expanding", aggregation_description="maximum", agg_method="max", ) def max( self, *args, engine: str | None = None, engine_kwargs: dict[str, bool] | None = None, **kwargs, ): nv.validate_expanding_func("max", args, kwargs) return super().max(*args, engine=engine, engine_kwargs=engine_kwargs, **kwargs) @doc( template_header, create_section_header("Parameters"), args_compat, window_agg_numba_parameters, kwargs_compat, create_section_header("Returns"), template_returns, create_section_header("See Also"), template_see_also, create_section_header("Notes"), numba_notes[:-1], window_method="expanding", aggregation_description="minimum", agg_method="min", ) def min( self, *args, engine: str | None = None, engine_kwargs: dict[str, bool] | None = None, **kwargs, ): nv.validate_expanding_func("min", args, kwargs) return super().min(*args, engine=engine, engine_kwargs=engine_kwargs, **kwargs) @doc( template_header, create_section_header("Parameters"), args_compat, window_agg_numba_parameters, kwargs_compat, create_section_header("Returns"), template_returns, create_section_header("See Also"), template_see_also, create_section_header("Notes"), numba_notes[:-1], window_method="expanding", aggregation_description="mean", agg_method="mean", ) def mean( self, *args, engine: str | None = None, engine_kwargs: dict[str, bool] | None = None, **kwargs, ): nv.validate_expanding_func("mean", args, kwargs) return super().mean(*args, engine=engine, engine_kwargs=engine_kwargs, **kwargs) @doc( template_header, create_section_header("Parameters"), window_agg_numba_parameters, kwargs_compat, create_section_header("Returns"), template_returns, create_section_header("See Also"), template_see_also, create_section_header("Notes"), numba_notes[:-1], window_method="expanding", aggregation_description="median", agg_method="median", ) def median( self, engine: str | None = None, engine_kwargs: dict[str, bool] | None = None, **kwargs, ): return super().median(engine=engine, engine_kwargs=engine_kwargs, **kwargs) @doc( template_header, create_section_header("Parameters"), dedent(""" ddof : int, default 1 Delta Degrees of Freedom. The divisor used in calculations is ``N - ddof``, where ``N`` represents the number of elements.\n """).replace("\n", "", 1), args_compat, kwargs_compat, create_section_header("Returns"), template_returns, create_section_header("See Also"), "numpy.std : Equivalent method for NumPy array.\n", template_see_also, create_section_header("Notes"), dedent(""" The default ``ddof`` of 1 used in :meth:`Series.std` is different than the default ``ddof`` of 0 in :func:`numpy.std`. A minimum of one period is required for the rolling calculation.\n """).replace("\n", "", 1), create_section_header("Examples"), dedent(""" >>> s = pd.Series([5, 5, 6, 7, 5, 5, 5]) >>> s.expanding(3).std() 0 NaN 1 NaN 2 0.577350 3 0.957427 4 0.894427 5 0.836660 6 0.786796 dtype: float64 """).replace("\n", "", 1), window_method="expanding", aggregation_description="standard deviation", agg_method="std", ) def std(self, ddof: int = 1, *args, **kwargs): nv.validate_expanding_func("std", args, kwargs) return super().std(ddof=ddof, **kwargs) @doc( template_header, create_section_header("Parameters"), dedent(""" ddof : int, default 1 Delta Degrees of Freedom. The divisor used in calculations is ``N - ddof``, where ``N`` represents the number of elements.\n """).replace("\n", "", 1), args_compat, kwargs_compat, create_section_header("Returns"), template_returns, create_section_header("See Also"), "numpy.var : Equivalent method for NumPy array.\n", template_see_also, create_section_header("Notes"), dedent(""" The default ``ddof`` of 1 used in :meth:`Series.var` is different than the default ``ddof`` of 0 in :func:`numpy.var`. A minimum of one period is required for the rolling calculation.\n """).replace("\n", "", 1), create_section_header("Examples"), dedent(""" >>> s = pd.Series([5, 5, 6, 7, 5, 5, 5]) >>> s.expanding(3).var() 0 NaN 1 NaN 2 0.333333 3 0.916667 4 0.800000 5 0.700000 6 0.619048 dtype: float64 """).replace("\n", "", 1), window_method="expanding", aggregation_description="variance", agg_method="var", ) def var(self, ddof: int = 1, *args, **kwargs): nv.validate_expanding_func("var", args, kwargs) return super().var(ddof=ddof, **kwargs) @doc( template_header, create_section_header("Parameters"), dedent(""" ddof : int, default 1 Delta Degrees of Freedom. The divisor used in calculations is ``N - ddof``, where ``N`` represents the number of elements.\n """).replace("\n", "", 1), args_compat, kwargs_compat, create_section_header("Returns"), template_returns, create_section_header("See Also"), template_see_also, create_section_header("Notes"), "A minimum of one period is required for the calculation.\n\n", create_section_header("Examples"), dedent(""" >>> s = pd.Series([0, 1, 2, 3]) >>> s.expanding().sem() 0 NaN 1 0.707107 2 0.707107 3 0.745356 dtype: float64 """).replace("\n", "", 1), window_method="expanding", aggregation_description="standard error of mean", agg_method="sem", ) def sem(self, ddof: int = 1, *args, **kwargs): return super().sem(ddof=ddof, **kwargs) @doc( template_header, create_section_header("Parameters"), kwargs_compat, create_section_header("Returns"), template_returns, create_section_header("See Also"), "scipy.stats.skew : Third moment of a probability density.\n", template_see_also, create_section_header("Notes"), "A minimum of three periods is required for the rolling calculation.\n", window_method="expanding", aggregation_description="unbiased skewness", agg_method="skew", ) def skew(self, **kwargs): return super().skew(**kwargs) @doc( template_header, create_section_header("Parameters"), kwargs_compat, create_section_header("Returns"), template_returns, create_section_header("See Also"), "scipy.stats.kurtosis : Reference SciPy method.\n", template_see_also, create_section_header("Notes"), "A minimum of four periods is required for the calculation.\n\n", create_section_header("Examples"), dedent(""" The example below will show a rolling calculation with a window size of four matching the equivalent function call using `scipy.stats`. >>> arr = [1, 2, 3, 4, 999] >>> import scipy.stats >>> print(f"{{scipy.stats.kurtosis(arr[:-1], bias=False):.6f}}") -1.200000 >>> print(f"{{scipy.stats.kurtosis(arr, bias=False):.6f}}") 4.999874 >>> s = pd.Series(arr) >>> s.expanding(4).kurt() 0 NaN 1 NaN 2 NaN 3 -1.200000 4 4.999874 dtype: float64 """).replace("\n", "", 1), window_method="expanding", aggregation_description="Fisher's definition of kurtosis without bias", agg_method="kurt", ) def kurt(self, **kwargs): return super().kurt(**kwargs) @doc( template_header, create_section_header("Parameters"), dedent(""" quantile : float Quantile to compute. 0 <= quantile <= 1. interpolation : {{'linear', 'lower', 'higher', 'midpoint', 'nearest'}} This optional parameter specifies the interpolation method to use, when the desired quantile lies between two data points `i` and `j`: * linear: `i + (j - i) * fraction`, where `fraction` is the fractional part of the index surrounded by `i` and `j`. * lower: `i`. * higher: `j`. * nearest: `i` or `j` whichever is nearest. * midpoint: (`i` + `j`) / 2. """).replace("\n", "", 1), kwargs_compat, create_section_header("Returns"), template_returns, create_section_header("See Also"), template_see_also[:-1], window_method="expanding", aggregation_description="quantile", agg_method="quantile", ) def quantile( self, quantile: float, interpolation: str = "linear", **kwargs, ): return super().quantile( quantile=quantile, interpolation=interpolation, **kwargs, ) @doc( template_header, create_section_header("Parameters"), dedent(""" other : Series or DataFrame, optional If not supplied then will default to self and produce pairwise output. pairwise : bool, default None If False then only matching columns between self and other will be used and the output will be a DataFrame. If True then all pairwise combinations will be calculated and the output will be a MultiIndexed DataFrame in the case of DataFrame inputs. In the case of missing elements, only complete pairwise observations will be used. ddof : int, default 1 Delta Degrees of Freedom. The divisor used in calculations is ``N - ddof``, where ``N`` represents the number of elements. """).replace("\n", "", 1), kwargs_compat, create_section_header("Returns"), template_returns, create_section_header("See Also"), template_see_also[:-1], window_method="expanding", aggregation_description="sample covariance", agg_method="cov", ) def cov( self, other: FrameOrSeriesUnion | None = None, pairwise: bool | None = None, ddof: int = 1, **kwargs, ): return super().cov(other=other, pairwise=pairwise, ddof=ddof, **kwargs) @doc( template_header, create_section_header("Parameters"), dedent(""" other : Series or DataFrame, optional If not supplied then will default to self and produce pairwise output. pairwise : bool, default None If False then only matching columns between self and other will be used and the output will be a DataFrame. If True then all pairwise combinations will be calculated and the output will be a MultiIndexed DataFrame in the case of DataFrame inputs. In the case of missing elements, only complete pairwise observations will be used. """).replace("\n", "", 1), kwargs_compat, create_section_header("Returns"), template_returns, create_section_header("See Also"), dedent(""" cov : Similar method to calculate covariance. numpy.corrcoef : NumPy Pearson's correlation calculation. """).replace("\n", "", 1), template_see_also, create_section_header("Notes"), dedent(""" This function uses Pearson's definition of correlation (https://en.wikipedia.org/wiki/Pearson_correlation_coefficient). When `other` is not specified, the output will be self correlation (e.g. all 1's), except for :class:`~pandas.DataFrame` inputs with `pairwise` set to `True`. Function will return ``NaN`` for correlations of equal valued sequences; this is the result of a 0/0 division error. When `pairwise` is set to `False`, only matching columns between `self` and `other` will be used. When `pairwise` is set to `True`, the output will be a MultiIndex DataFrame with the original index on the first level, and the `other` DataFrame columns on the second level. In the case of missing elements, only complete pairwise observations will be used. """).replace("\n", "", 1), window_method="expanding", aggregation_description="correlation", agg_method="corr", ) def corr( self, other: FrameOrSeriesUnion | None = None, pairwise: bool | None = None, ddof: int = 1, **kwargs, ): return super().corr(other=other, pairwise=pairwise, ddof=ddof, **kwargs)
class ExponentialMovingWindow(BaseWindow): r""" Provide exponential weighted (EW) functions. Available EW functions: ``mean()``, ``var()``, ``std()``, ``corr()``, ``cov()``. Exactly one parameter: ``com``, ``span``, ``halflife``, or ``alpha`` must be provided. Parameters ---------- com : float, optional Specify decay in terms of center of mass, :math:`\alpha = 1 / (1 + com)`, for :math:`com \geq 0`. span : float, optional Specify decay in terms of span, :math:`\alpha = 2 / (span + 1)`, for :math:`span \geq 1`. halflife : float, str, timedelta, optional Specify decay in terms of half-life, :math:`\alpha = 1 - \exp\left(-\ln(2) / halflife\right)`, for :math:`halflife > 0`. If ``times`` is specified, the time unit (str or timedelta) over which an observation decays to half its value. Only applicable to ``mean()`` and halflife value will not apply to the other functions. .. versionadded:: 1.1.0 alpha : float, optional Specify smoothing factor :math:`\alpha` directly, :math:`0 < \alpha \leq 1`. min_periods : int, default 0 Minimum number of observations in window required to have a value (otherwise result is NA). adjust : bool, default True Divide by decaying adjustment factor in beginning periods to account for imbalance in relative weightings (viewing EWMA as a moving average). - When ``adjust=True`` (default), the EW function is calculated using weights :math:`w_i = (1 - \alpha)^i`. For example, the EW moving average of the series [:math:`x_0, x_1, ..., x_t`] would be: .. math:: y_t = \frac{x_t + (1 - \alpha)x_{t-1} + (1 - \alpha)^2 x_{t-2} + ... + (1 - \alpha)^t x_0}{1 + (1 - \alpha) + (1 - \alpha)^2 + ... + (1 - \alpha)^t} - When ``adjust=False``, the exponentially weighted function is calculated recursively: .. math:: \begin{split} y_0 &= x_0\\ y_t &= (1 - \alpha) y_{t-1} + \alpha x_t, \end{split} ignore_na : bool, default False Ignore missing values when calculating weights; specify ``True`` to reproduce pre-0.15.0 behavior. - When ``ignore_na=False`` (default), weights are based on absolute positions. For example, the weights of :math:`x_0` and :math:`x_2` used in calculating the final weighted average of [:math:`x_0`, None, :math:`x_2`] are :math:`(1-\alpha)^2` and :math:`1` if ``adjust=True``, and :math:`(1-\alpha)^2` and :math:`\alpha` if ``adjust=False``. - When ``ignore_na=True`` (reproducing pre-0.15.0 behavior), weights are based on relative positions. For example, the weights of :math:`x_0` and :math:`x_2` used in calculating the final weighted average of [:math:`x_0`, None, :math:`x_2`] are :math:`1-\alpha` and :math:`1` if ``adjust=True``, and :math:`1-\alpha` and :math:`\alpha` if ``adjust=False``. axis : {0, 1}, default 0 The axis to use. The value 0 identifies the rows, and 1 identifies the columns. times : str, np.ndarray, Series, default None .. versionadded:: 1.1.0 Times corresponding to the observations. Must be monotonically increasing and ``datetime64[ns]`` dtype. If str, the name of the column in the DataFrame representing the times. If 1-D array like, a sequence with the same shape as the observations. Only applicable to ``mean()``. Returns ------- DataFrame A Window sub-classed for the particular operation. See Also -------- rolling : Provides rolling window calculations. expanding : Provides expanding transformations. Notes ----- More details can be found at: :ref:`Exponentially weighted windows <window.exponentially_weighted>`. Examples -------- >>> df = pd.DataFrame({'B': [0, 1, 2, np.nan, 4]}) >>> df B 0 0.0 1 1.0 2 2.0 3 NaN 4 4.0 >>> df.ewm(com=0.5).mean() B 0 0.000000 1 0.750000 2 1.615385 3 1.615385 4 3.670213 Specifying ``times`` with a timedelta ``halflife`` when computing mean. >>> times = ['2020-01-01', '2020-01-03', '2020-01-10', '2020-01-15', '2020-01-17'] >>> df.ewm(halflife='4 days', times=pd.DatetimeIndex(times)).mean() B 0 0.000000 1 0.585786 2 1.523889 3 1.523889 4 3.233686 """ _attributes = ["com", "min_periods", "adjust", "ignore_na", "axis"] def __init__( self, obj, com: Optional[float] = None, span: Optional[float] = None, halflife: Optional[Union[float, TimedeltaConvertibleTypes]] = None, alpha: Optional[float] = None, min_periods: int = 0, adjust: bool = True, ignore_na: bool = False, axis: int = 0, times: Optional[Union[str, np.ndarray, FrameOrSeries]] = None, **kwargs, ): self.obj = obj self.min_periods = max(int(min_periods), 1) self.adjust = adjust self.ignore_na = ignore_na self.axis = axis self.on = None self.center = False self.closed = None self.method = "single" if times is not None: if isinstance(times, str): times = self._selected_obj[times] if not is_datetime64_ns_dtype(times): raise ValueError("times must be datetime64[ns] dtype.") if len(times) != len(obj): raise ValueError( "times must be the same length as the object.") if not isinstance(halflife, (str, datetime.timedelta)): raise ValueError( "halflife must be a string or datetime.timedelta object") if isna(times).any(): raise ValueError("Cannot convert NaT values to integer") self.times = np.asarray(times.view(np.int64)) self.halflife = Timedelta(halflife).value # Halflife is no longer applicable when calculating COM # But allow COM to still be calculated if the user passes other decay args if common.count_not_none(com, span, alpha) > 0: self.com = get_center_of_mass(com, span, None, alpha) else: self.com = 0.0 else: if halflife is not None and isinstance(halflife, (str, datetime.timedelta)): raise ValueError( "halflife can only be a timedelta convertible argument if " "times is not None.") self.times = None self.halflife = None self.com = get_center_of_mass(com, span, halflife, alpha) def _get_window_indexer(self) -> BaseIndexer: """ Return an indexer class that will compute the window start and end bounds """ return ExponentialMovingWindowIndexer() @doc( _shared_docs["aggregate"], see_also=dedent(""" See Also -------- pandas.DataFrame.rolling.aggregate """), examples=dedent(""" Examples -------- >>> df = pd.DataFrame({"A": [1, 2, 3], "B": [4, 5, 6], "C": [7, 8, 9]}) >>> df A B C 0 1 4 7 1 2 5 8 2 3 6 9 >>> df.ewm(alpha=0.5).mean() A B C 0 1.000000 4.000000 7.000000 1 1.666667 4.666667 7.666667 2 2.428571 5.428571 8.428571 """), klass="Series/Dataframe", axis="", ) def aggregate(self, func, *args, **kwargs): return super().aggregate(func, *args, **kwargs) agg = aggregate @doc( template_header, create_section_header("Parameters"), args_compat, kwargs_compat, create_section_header("Returns"), template_returns, create_section_header("See Also"), template_see_also[:-1], window_method="ewm", aggregation_description="(exponential weighted moment) mean", agg_method="mean", ) def mean(self, *args, **kwargs): nv.validate_window_func("mean", args, kwargs) if self.times is not None: window_func = window_aggregations.ewma_time window_func = partial( window_func, times=self.times, halflife=self.halflife, ) else: window_func = window_aggregations.ewma window_func = partial( window_func, com=self.com, adjust=self.adjust, ignore_na=self.ignore_na, ) return self._apply(window_func) @doc( template_header, create_section_header("Parameters"), dedent(""" bias : bool, default False Use a standard estimation bias correction. """).replace("\n", "", 1), args_compat, kwargs_compat, create_section_header("Returns"), template_returns, create_section_header("See Also"), template_see_also[:-1], window_method="ewm", aggregation_description= "(exponential weighted moment) standard deviation", agg_method="std", ) def std(self, bias: bool = False, *args, **kwargs): nv.validate_window_func("std", args, kwargs) return zsqrt(self.var(bias=bias, **kwargs)) def vol(self, bias: bool = False, *args, **kwargs): warnings.warn( ("vol is deprecated will be removed in a future version. " "Use std instead."), FutureWarning, stacklevel=2, ) return self.std(bias, *args, **kwargs) @doc( template_header, create_section_header("Parameters"), dedent(""" bias : bool, default False Use a standard estimation bias correction. """).replace("\n", "", 1), args_compat, kwargs_compat, create_section_header("Returns"), template_returns, create_section_header("See Also"), template_see_also[:-1], window_method="ewm", aggregation_description="(exponential weighted moment) variance", agg_method="var", ) def var(self, bias: bool = False, *args, **kwargs): nv.validate_window_func("var", args, kwargs) window_func = window_aggregations.ewmcov window_func = partial( window_func, com=self.com, adjust=self.adjust, ignore_na=self.ignore_na, bias=bias, ) def var_func(values, begin, end, min_periods): return window_func(values, begin, end, min_periods, values) return self._apply(var_func) @doc( template_header, create_section_header("Parameters"), dedent(""" other : Series, DataFrame, or ndarray, optional If not supplied then will default to self and produce pairwise output. pairwise : bool, default None If False then only matching columns between self and other will be used and the output will be a DataFrame. If True then all pairwise combinations will be calculated and the output will be a MultiIndex DataFrame in the case of DataFrame inputs. In the case of missing elements, only complete pairwise observations will be used. bias : bool, default False Use a standard estimation bias correction. """).replace("\n", "", 1), kwargs_compat, create_section_header("Returns"), template_returns, create_section_header("See Also"), template_see_also[:-1], window_method="ewm", aggregation_description= "(exponential weighted moment) sample covariance", agg_method="cov", ) def cov( self, other: Optional[Union[np.ndarray, FrameOrSeries]] = None, pairwise: Optional[bool] = None, bias: bool = False, **kwargs, ): if other is None: other = self._selected_obj # only default unset pairwise = True if pairwise is None else pairwise other = self._shallow_copy(other) def _get_cov(X, Y): X = self._shallow_copy(X) Y = self._shallow_copy(Y) cov = window_aggregations.ewmcov( X._prep_values(), np.array([0], dtype=np.int64), np.array([0], dtype=np.int64), self.min_periods, Y._prep_values(), self.com, self.adjust, self.ignore_na, bias, ) return wrap_result(X, cov) return flex_binary_moment(self._selected_obj, other._selected_obj, _get_cov, pairwise=bool(pairwise)) @doc( template_header, create_section_header("Parameters"), dedent(""" other : Series, DataFrame, or ndarray, optional If not supplied then will default to self and produce pairwise output. pairwise : bool, default None If False then only matching columns between self and other will be used and the output will be a DataFrame. If True then all pairwise combinations will be calculated and the output will be a MultiIndex DataFrame in the case of DataFrame inputs. In the case of missing elements, only complete pairwise observations will be used. """).replace("\n", "", 1), kwargs_compat, create_section_header("Returns"), template_returns, create_section_header("See Also"), template_see_also[:-1], window_method="ewm", aggregation_description= "(exponential weighted moment) sample correlation", agg_method="corr", ) def corr( self, other: Optional[Union[np.ndarray, FrameOrSeries]] = None, pairwise: Optional[bool] = None, **kwargs, ): if other is None: other = self._selected_obj # only default unset pairwise = True if pairwise is None else pairwise other = self._shallow_copy(other) def _get_corr(X, Y): X = self._shallow_copy(X) Y = self._shallow_copy(Y) def _cov(x, y): return window_aggregations.ewmcov( x, np.array([0], dtype=np.int64), np.array([0], dtype=np.int64), self.min_periods, y, self.com, self.adjust, self.ignore_na, 1, ) x_values = X._prep_values() y_values = Y._prep_values() with np.errstate(all="ignore"): cov = _cov(x_values, y_values) x_var = _cov(x_values, x_values) y_var = _cov(y_values, y_values) corr = cov / zsqrt(x_var * y_var) return wrap_result(X, corr) return flex_binary_moment(self._selected_obj, other._selected_obj, _get_corr, pairwise=bool(pairwise))