def test_freq_to_period(): from pandas.tseries.frequencies import to_offset freqs = ['A', 'AS-MAR', 'Q', 'QS', 'QS-APR', 'W', 'W-MON', 'B'] expected = [1, 1, 4, 4, 4, 52, 52, 52] for i, j in zip(freqs, expected): assert_equal(tools.freq_to_period(i), j) assert_equal(tools.freq_to_period(to_offset(i)), j)
def test_freq_to_period(): from pandas.tseries.frequencies import to_offset freqs = ['A', 'AS-MAR', 'Q', 'QS', 'QS-APR', 'W', 'W-MON', 'B', 'D', 'H'] expected = [1, 1, 4, 4, 4, 52, 52, 5, 7, 24] for i, j in zip(freqs, expected): assert_equal(tools.freq_to_period(i), j) assert_equal(tools.freq_to_period(to_offset(i)), j)
def test_freq_to_period(): from pandas.tseries.frequencies import to_offset freqs = ["A", "AS-MAR", "Q", "QS", "QS-APR", "W", "W-MON", "B", "D", "H"] expected = [1, 1, 4, 4, 4, 52, 52, 5, 7, 24] for i, j in zip(freqs, expected): assert_equal(tools.freq_to_period(i), j) assert_equal(tools.freq_to_period(to_offset(i)), j)
def __init__(self, endog, trend=None, damped=False, seasonal=None, seasonal_periods=None, dates=None, freq=None, missing='none'): super(ExponentialSmoothing, self).__init__(endog, None, dates, freq, missing=missing) self.endog = self.endog.astype(np.double) if trend in ['additive', 'multiplicative']: trend = {'additive': 'add', 'multiplicative': 'mul'}[trend] self.trend = trend self.damped = damped if seasonal in ['additive', 'multiplicative']: seasonal = {'additive': 'add', 'multiplicative': 'mul'}[seasonal] self.seasonal = seasonal self.trending = trend in ['mul', 'add'] self.seasoning = seasonal in ['mul', 'add'] if (self.trend == 'mul' or self.seasonal == 'mul') and np.any(endog <= 0.0): raise ValueError('endog must be strictly positive when using multiplicative ' 'trend or seasonal components.') if self.damped and not self.trending: raise ValueError('Can only dampen the trend component') if self.seasoning: self.seasonal_periods = seasonal_periods if seasonal_periods is None: self.seasonal_periods = freq_to_period(self._index_freq) if self.seasonal_periods <= 1: raise ValueError('seasonal_periods must be larger than 1.') else: self.seasonal_periods = 0 self.nobs = len(self.endog)
def __init__(self, endog, trend=None, damped=False, seasonal=None, seasonal_periods=None, dates=None, freq=None, missing='none'): super(ExponentialSmoothing, self).__init__( endog, None, dates, freq, missing=missing) if trend in ['additive', 'multiplicative']: trend = {'additive': 'add', 'multiplicative': 'mul'}[trend] self.trend = trend self.damped = damped if seasonal in ['additive', 'multiplicative']: seasonal = {'additive': 'add', 'multiplicative': 'mul'}[seasonal] self.seasonal = seasonal self.trending = trend in ['mul', 'add'] self.seasoning = seasonal in ['mul', 'add'] if (self.trend == 'mul' or self.seasonal == 'mul') and np.any(endog <= 0.0): raise ValueError('endog must be strictly positive when using multiplicative ' 'trend or seasonal components.') if self.damped and not self.trending: raise ValueError('Can only dampen the trend component') if self.seasoning: self.seasonal_periods = seasonal_periods if seasonal_periods is None: self.seasonal_periods = freq_to_period(self._index_freq) if self.seasonal_periods <= 1: raise ValueError('seasonal_periods must be larger than 1.') else: self.seasonal_periods = 0 self.nobs = len(self.endog)
def from_index( cls, index: Union[Sequence[Hashable], pd.DatetimeIndex, pd.PeriodIndex] ) -> "Seasonality": """ Construct a seasonality directly from an index using its frequency. Parameters ---------- index : {DatetimeIndex, PeriodIndex} An index with its frequency (`freq`) set. Returns ------- Seasonality The initialized Seasonality instance. """ index = cls._index_like(index) if isinstance(index, pd.PeriodIndex): freq = index.freq elif isinstance(index, pd.DatetimeIndex): freq = index.freq if index.freq else index.inferred_freq else: raise TypeError("index must be a DatetimeIndex or PeriodIndex") if freq is None: raise ValueError("index must have a freq or inferred_freq set") period = freq_to_period(freq) return cls(period=period)
def __init__( self, index: Union[Sequence[Hashable], pd.Index], *, period: Optional[Union[float, int]] = None, constant: bool = False, order: int = 0, seasonal: bool = False, fourier: int = 0, additional_terms: Sequence[DeterministicTerm] = (), drop: bool = False, ): if not isinstance(index, pd.Index): index = pd.Index(index) self._index = index self._deterministic_terms: List[DeterministicTerm] = [] self._extendable = False self._index_freq = None self._validate_index() period = float_like(period, "period", optional=True) self._constant = constant = bool_like(constant, "constant") self._order = required_int_like(order, "order") self._seasonal = seasonal = bool_like(seasonal, "seasonal") self._fourier = required_int_like(fourier, "fourier") additional_terms = tuple(additional_terms) self._cached_in_sample = None self._drop = bool_like(drop, "drop") self._additional_terms = additional_terms if constant or order: self._deterministic_terms.append(TimeTrend(constant, order)) if seasonal and fourier: raise ValueError( """seasonal and fourier can be initialized through the constructor since\ these will be necessarily perfectly collinear. Instead, you can pass \ additional components using the additional_terms input.""") if (seasonal or fourier) and period is None: if period is None: self._period = period = freq_to_period(self._index_freq) if seasonal: period = required_int_like(period, "period") self._deterministic_terms.append(Seasonality(period)) elif fourier: period = float_like(period, "period") assert period is not None self._deterministic_terms.append(Fourier(period, order=fourier)) for term in additional_terms: if not isinstance(term, DeterministicTerm): raise TypeError( "All additional terms must be instances of subsclasses " "of DeterministicTerm") if term not in self._deterministic_terms: self._deterministic_terms.append(term) else: raise ValueError( "One or more terms in additional_terms has been added " "through the parameters of the constructor. Terms must " "be unique.") self._period = period self._retain_cols: Optional[List[Hashable]] = None
def _infer_period(self) -> int: freq = None if isinstance(self.endog, (pd.Series, pd.DataFrame)): freq = getattr(self.endog.index, "inferred_freq", None) if freq is None: raise ValueError("Unable to determine period from endog") period = freq_to_period(freq) return period
def new_func(X, *args, **kwargs): # quick pass-through for do nothing case if not _is_using_pandas(X, None): return func(X, *args, **kwargs) wrapper_func = _get_pandas_wrapper(X, trim_head, trim_tail, columns) index = X.index freq = index.inferred_freq kwargs.update({freq_kw: freq_to_period(freq)}) ret = func(X, *args, **kwargs) ret = wrapper_func(ret) return ret
def __init__(self, endog, trend=None, damped=False, seasonal=None, seasonal_periods=None, dates=None, freq=None, missing='none'): super(ExponentialSmoothing, self).__init__(endog, None, dates, freq, missing=missing) self.endog = self.endog self._y = self._data = array_like(endog, 'endog', contiguous=True, order='C') options = ("add", "mul", "additive", "multiplicative") trend = string_like(trend, 'trend', options=options, optional=True) if trend in ['additive', 'multiplicative']: trend = {'additive': 'add', 'multiplicative': 'mul'}[trend] self.trend = trend self.damped = bool_like(damped, 'damped') seasonal = string_like(seasonal, 'seasonal', options=options, optional=True) if seasonal in ['additive', 'multiplicative']: seasonal = {'additive': 'add', 'multiplicative': 'mul'}[seasonal] self.seasonal = seasonal self.trending = trend in ['mul', 'add'] self.seasoning = seasonal in ['mul', 'add'] if (self.trend == 'mul' or self.seasonal == 'mul') and \ not np.all(self._data > 0.0): raise ValueError('endog must be strictly positive when using' 'multiplicative trend or seasonal components.') if self.damped and not self.trending: raise ValueError('Can only dampen the trend component') if self.seasoning: self.seasonal_periods = int_like(seasonal_periods, 'seasonal_periods', optional=True) if seasonal_periods is None: self.seasonal_periods = freq_to_period(self._index_freq) if self.seasonal_periods <= 1: raise ValueError('seasonal_periods must be larger than 1.') else: self.seasonal_periods = 0 self.nobs = len(self.endog)
def __init__( self, endog, *, period: Optional[int] = None, deseasonalize: bool = True, use_test: bool = True, method: str = "auto", difference: bool = False ) -> None: self._y = array_like(endog, "endog", ndim=1) if isinstance(endog, pd.DataFrame): self.endog_orig = endog.iloc[:, 0] else: self.endog_orig = endog self._period = int_like(period, "period", optional=True) self._deseasonalize = bool_like(deseasonalize, "deseasonalize") self._use_test = ( bool_like(use_test, "use_test") and self._deseasonalize ) self._diff = bool_like(difference, "difference") self._method = string_like( method, "model", options=("auto", "additive", "multiplicative", "mul", "add"), ) if self._period is None and self._deseasonalize: idx = getattr(endog, "index", None) pfreq = None if idx is not None: pfreq = getattr(idx, "freq", None) if pfreq is None: pfreq = getattr(idx, "inferred_freq", None) if pfreq is not None: self._period = freq_to_period(pfreq) else: raise ValueError( "You must specify a period or endog must be a " "pandas object with a DatetimeIndex with " "a freq not set to None" ) self._has_seasonality = self._deseasonalize
def seasonal_decompose(x, model="additive", filt=None, period=None, two_sided=True, extrapolate_trend=0): """ Seasonal decomposition using moving averages. Parameters ---------- x : array_like Time series. If 2d, individual series are in columns. x must contain 2 complete cycles. model : {"additive", "multiplicative"}, optional Type of seasonal component. Abbreviations are accepted. filt : array_like, optional The filter coefficients for filtering out the seasonal component. The concrete moving average method used in filtering is determined by two_sided. period : int, optional Period of the series. Must be used if x is not a pandas object or if the index of x does not have a frequency. Overrides default periodicity of x if x is a pandas object with a timeseries index. two_sided : bool, optional The moving average method used in filtering. If True (default), a centered moving average is computed using the filt. If False, the filter coefficients are for past values only. extrapolate_trend : int or 'freq', optional If set to > 0, the trend resulting from the convolution is linear least-squares extrapolated on both ends (or the single one if two_sided is False) considering this many (+1) closest points. If set to 'freq', use `freq` closest points. Setting this parameter results in no NaN values in trend or resid components. Returns ------- DecomposeResult A object with seasonal, trend, and resid attributes. See Also -------- statsmodels.tsa.filters.bk_filter.bkfilter statsmodels.tsa.filters.cf_filter.xffilter statsmodels.tsa.filters.hp_filter.hpfilter statsmodels.tsa.filters.convolution_filter statsmodels.tsa.seasonal.STL Notes ----- This is a naive decomposition. More sophisticated methods should be preferred. The additive model is Y[t] = T[t] + S[t] + e[t] The multiplicative model is Y[t] = T[t] * S[t] * e[t] The seasonal component is first removed by applying a convolution filter to the data. The average of this smoothed series for each period is the returned seasonal component. """ pfreq = period pw = PandasWrapper(x) if period is None: pfreq = getattr(getattr(x, 'index', None), 'inferred_freq', None) x = array_like(x, 'x', maxdim=2) nobs = len(x) if not np.all(np.isfinite(x)): raise ValueError("This function does not handle missing values") if model.startswith('m'): if np.any(x <= 0): raise ValueError("Multiplicative seasonality is not appropriate " "for zero and negative values") if period is None: if pfreq is not None: pfreq = freq_to_period(pfreq) period = pfreq else: raise ValueError("You must specify a period or x must be a " "pandas object with a DatetimeIndex with " "a freq not set to None") if x.shape[0] < 2 * pfreq: raise ValueError('x must have 2 complete cycles requires {0} ' 'observations. x only has {1} ' 'observation(s)'.format(2 * pfreq, x.shape[0])) if filt is None: if period % 2 == 0: # split weights at ends filt = np.array([.5] + [1] * (period - 1) + [.5]) / period else: filt = np.repeat(1. / period, period) nsides = int(two_sided) + 1 trend = convolution_filter(x, filt, nsides) if extrapolate_trend == 'freq': extrapolate_trend = period - 1 if extrapolate_trend > 0: trend = _extrapolate_trend(trend, extrapolate_trend + 1) if model.startswith('m'): detrended = x / trend else: detrended = x - trend period_averages = seasonal_mean(detrended, period) if model.startswith('m'): period_averages /= np.mean(period_averages, axis=0) else: period_averages -= np.mean(period_averages, axis=0) seasonal = np.tile(period_averages.T, nobs // period + 1).T[:nobs] if model.startswith('m'): resid = x / seasonal / trend else: resid = detrended - seasonal results = [] for s, name in zip((seasonal, trend, resid, x), ('seasonal', 'trend', 'resid', None)): results.append(pw.wrap(s.squeeze(), columns=name)) return DecomposeResult(seasonal=results[0], trend=results[1], resid=results[2], observed=results[3])
def seasonal_decompose(x, model="additive", filt=None, freq=None): """ Parameters ---------- x : array-like Time series model : str {"additive", "multiplicative"} Type of seasonal component. Abbreviations are accepted. filt : array-like The filter coefficients for filtering out the seasonal component. The default is a symmetric moving average. freq : int, optional Frequency of the series. Must be used if x is not a pandas object with a timeseries index. Returns ------- results : obj A object with seasonal, trend, and resid attributes. Notes ----- This is a naive decomposition. More sophisticated methods should be preferred. The additive model is Y[t] = T[t] + S[t] + e[t] The multiplicative model is Y[t] = T[t] * S[t] * e[t] The seasonal component is first removed by applying a convolution filter to the data. The average of this smoothed series for each period is the returned seasonal component. See Also -------- statsmodels.tsa.filters.convolution_filter """ _pandas_wrapper, pfreq = _maybe_get_pandas_wrapper_freq(x) x = np.asanyarray(x).squeeze() nobs = len(x) if not np.all(np.isfinite(x)): raise ValueError("This function does not handle missing values") if model.startswith('m'): if np.any(x <= 0): raise ValueError("Multiplicative seasonality is not appropriate " "for zero and negative values") if pfreq is not None: pfreq = freq_to_period(pfreq) if freq and pfreq != freq: raise ValueError("Inferred frequency of index and frequency " "don't match. This function does not re-sample") else: freq = pfreq elif freq is None: raise ValueError("You must specify a freq or x must be a " "pandas object with a timeseries index") if filt is None: if freq % 2 == 0: # split weights at ends filt = np.array([.5] + [1] * (freq - 1) + [.5]) / freq else: filt = np.repeat(1./freq, freq) trend = convolution_filter(x, filt) # nan pad for conformability - convolve doesn't do it if model.startswith('m'): detrended = x / trend else: detrended = x - trend period_averages = seasonal_mean(detrended, freq) if model.startswith('m'): period_averages /= np.mean(period_averages) else: period_averages -= np.mean(period_averages) seasonal = np.tile(period_averages, nobs // freq + 1)[:nobs] if model.startswith('m'): resid = x / seasonal / trend else: resid = detrended - seasonal results = lmap(_pandas_wrapper, [seasonal, trend, resid, x]) return DecomposeResult(seasonal=results[0], trend=results[1], resid=results[2], observed=results[3])
def seasonal_decompose(x, model="additive", filt=None, freq=None): """ Parameters ---------- x : array-like Time series model : str {"additive", "multiplicative"} Type of seasonal component. Abbreviations are accepted. filt : array-like The filter coefficients for filtering out the seasonal component. The default is a symmetric moving average. freq : int, optional Frequency of the series. Must be used if x is not a pandas object with a timeseries index. Returns ------- results : obj A object with seasonal, trend, and resid attributes. Notes ----- This is a naive decomposition. More sophisticated methods should be preferred. The additive model is Y[t] = T[t] + S[t] + e[t] The multiplicative model is Y[t] = T[t] * S[t] * e[t] The seasonal component is first removed by applying a convolution filter to the data. The average of this smoothed series for each period is the returned seasonal component. See Also -------- statsmodels.tsa.filters.convolution_filter """ _pandas_wrapper, pfreq = _maybe_get_pandas_wrapper_freq(x) x = np.asanyarray(x).squeeze() nobs = len(x) if not np.all(np.isfinite(x)): raise ValueError("This function does not handle missing values") if model.startswith('m'): if np.any(x <= 0): raise ValueError("Multiplicative seasonality is not appropriate " "for zero and negative values") if pfreq is not None: pfreq = freq_to_period(pfreq) if freq and pfreq != freq: raise ValueError("Inferred frequency of index and frequency " "don't match. This function does not re-sample") else: freq = pfreq elif freq is None: raise ValueError("You must specify a freq or x must be a " "pandas object with a timeseries index") if filt is None: if freq % 2 == 0: # split weights at ends filt = np.array([.5] + [1] * (freq - 1) + [.5]) / freq else: filt = np.repeat(1. / freq, freq) trend = convolution_filter(x, filt) # nan pad for conformability - convolve doesn't do it if model.startswith('m'): detrended = x / trend else: detrended = x - trend period_averages = seasonal_mean(detrended, freq) if model.startswith('m'): period_averages /= np.mean(period_averages) else: period_averages -= np.mean(period_averages) seasonal = np.tile(period_averages, nobs // freq + 1)[:nobs] if model.startswith('m'): resid = x / seasonal / trend else: resid = detrended - seasonal results = lmap(_pandas_wrapper, [seasonal, trend, resid, x]) return DecomposeResult(seasonal=results[0], trend=results[1], resid=results[2], observed=results[3])
def seasonal_decompose(x, model="additive", filt=None, freq=None, two_sided=True, extrapolate_trend=0): """ Seasonal decomposition using moving averages Parameters ---------- x : array-like Time series. If 2d, individual series are in columns. model : str {"additive", "multiplicative"} Type of seasonal component. Abbreviations are accepted. filt : array-like The filter coefficients for filtering out the seasonal component. The concrete moving average method used in filtering is determined by two_sided. freq : int, optional Frequency of the series. Must be used if x is not a pandas object. Overrides default periodicity of x if x is a pandas object with a timeseries index. two_sided : bool The moving average method used in filtering. If True (default), a centered moving average is computed using the filt. If False, the filter coefficients are for past values only. extrapolate_trend : int or 'freq', optional If set to > 0, the trend resulting from the convolution is linear least-squares extrapolated on both ends (or the single one if two_sided is False) considering this many (+1) closest points. If set to 'freq', use `freq` closest points. Setting this parameter results in no NaN values in trend or resid components. Returns ------- results : obj A object with seasonal, trend, and resid attributes. Notes ----- This is a naive decomposition. More sophisticated methods should be preferred. The additive model is Y[t] = T[t] + S[t] + e[t] The multiplicative model is Y[t] = T[t] * S[t] * e[t] The seasonal component is first removed by applying a convolution filter to the data. The average of this smoothed series for each period is the returned seasonal component. See Also -------- statsmodels.tsa.filters.bk_filter.bkfilter statsmodels.tsa.filters.cf_filter.xffilter statsmodels.tsa.filters.hp_filter.hpfilter statsmodels.tsa.filters.convolution_filter """ if freq is None: _pandas_wrapper, pfreq = _maybe_get_pandas_wrapper_freq(x) else: _pandas_wrapper = _maybe_get_pandas_wrapper(x) pfreq = None x = np.asanyarray(x).squeeze() nobs = len(x) if not np.all(np.isfinite(x)): raise ValueError("This function does not handle missing values") if model.startswith('m'): if np.any(x <= 0): raise ValueError("Multiplicative seasonality is not appropriate " "for zero and negative values") if freq is None: if pfreq is not None: pfreq = freq_to_period(pfreq) freq = pfreq else: raise ValueError("You must specify a freq or x must be a " "pandas object with a timeseries index with " "a freq not set to None") if filt is None: if freq % 2 == 0: # split weights at ends filt = np.array([.5] + [1] * (freq - 1) + [.5]) / freq else: filt = np.repeat(1. / freq, freq) nsides = int(two_sided) + 1 trend = convolution_filter(x, filt, nsides) if extrapolate_trend == 'freq': extrapolate_trend = freq - 1 if extrapolate_trend > 0: trend = _extrapolate_trend(trend, extrapolate_trend + 1) if model.startswith('m'): detrended = x / trend else: detrended = x - trend period_averages = seasonal_mean(detrended, freq) if model.startswith('m'): period_averages /= np.mean(period_averages, axis=0) else: period_averages -= np.mean(period_averages, axis=0) seasonal = np.tile(period_averages.T, nobs // freq + 1).T[:nobs] if model.startswith('m'): resid = x / seasonal / trend else: resid = detrended - seasonal results = lmap(_pandas_wrapper, [seasonal, trend, resid, x]) return DecomposeResult(seasonal=results[0], trend=results[1], resid=results[2], observed=results[3])
def seasonal_decompose(x, model="additive", filt=None, freq=None, two_sided=True, extrapolate_trend=0): """ Seasonal decomposition using moving averages Parameters ---------- x : array-like Time series. If 2d, individual series are in columns. model : str {"additive", "multiplicative"} Type of seasonal component. Abbreviations are accepted. filt : array-like The filter coefficients for filtering out the seasonal component. The concrete moving average method used in filtering is determined by two_sided. freq : int, optional Frequency of the series. Must be used if x is not a pandas object. Overrides default periodicity of x if x is a pandas object with a timeseries index. two_sided : bool The moving average method used in filtering. If True (default), a centered moving average is computed using the filt. If False, the filter coefficients are for past values only. extrapolate_trend : int or 'freq', optional If set to > 0, the trend resulting from the convolution is linear least-squares extrapolated on both ends (or the single one if two_sided is False) considering this many (+1) closest points. If set to 'freq', use `freq` closest points. Setting this parameter results in no NaN values in trend or resid components. Returns ------- results : obj A object with seasonal, trend, and resid attributes. Notes ----- This is a naive decomposition. More sophisticated methods should be preferred. The additive model is Y[t] = T[t] + S[t] + e[t] The multiplicative model is Y[t] = T[t] * S[t] * e[t] The seasonal component is first removed by applying a convolution filter to the data. The average of this smoothed series for each period is the returned seasonal component. See Also -------- statsmodels.tsa.filters.bk_filter.bkfilter statsmodels.tsa.filters.cf_filter.xffilter statsmodels.tsa.filters.hp_filter.hpfilter statsmodels.tsa.filters.convolution_filter """ if freq is None: _pandas_wrapper, pfreq = _maybe_get_pandas_wrapper_freq(x) else: _pandas_wrapper = _maybe_get_pandas_wrapper(x) pfreq = None x = np.asanyarray(x).squeeze() nobs = len(x) if not np.all(np.isfinite(x)): raise ValueError("This function does not handle missing values") if model.startswith('m'): if np.any(x <= 0): raise ValueError("Multiplicative seasonality is not appropriate " "for zero and negative values") if freq is None: if pfreq is not None: pfreq = freq_to_period(pfreq) freq = pfreq else: raise ValueError("You must specify a freq or x must be a " "pandas object with a timeseries index with " "a freq not set to None") if filt is None: if freq % 2 == 0: # split weights at ends filt = np.array([.5] + [1] * (freq - 1) + [.5]) / freq else: filt = np.repeat(1./freq, freq) nsides = int(two_sided) + 1 trend = convolution_filter(x, filt, nsides) if extrapolate_trend == 'freq': extrapolate_trend = freq - 1 if extrapolate_trend > 0: trend = _extrapolate_trend(trend, extrapolate_trend + 1) if model.startswith('m'): detrended = x / trend else: detrended = x - trend period_averages = seasonal_mean(detrended, freq) if model.startswith('m'): period_averages /= np.mean(period_averages, axis=0) else: period_averages -= np.mean(period_averages, axis=0) seasonal = np.tile(period_averages.T, nobs // freq + 1).T[:nobs] if model.startswith('m'): resid = x / seasonal / trend else: resid = detrended - seasonal results = lmap(_pandas_wrapper, [seasonal, trend, resid, x]) return DecomposeResult(seasonal=results[0], trend=results[1], resid=results[2], observed=results[3])