Пример #1
0
    def test_odd_length_filter(self):
        start = datetime(1951, 3, 31)
        end = datetime(1958, 12, 31)
        x = self.data[0]
        res = convolution_filter(x, [.75, .5, .3, .2, .1])
        expected = self.expected.conv2_odd
        np.testing.assert_almost_equal(res.values.squeeze(), expected)
        np.testing.assert_(res.index[0] == start)
        np.testing.assert_(res.index[-1] == end)

        res = convolution_filter(x, [.75, .5, .3, .2, .1], nsides=1)
        expected = self.expected.conv1_odd
        np.testing.assert_almost_equal(res.values.squeeze(), expected)
        np.testing.assert_(res.index[0] == start)
        np.testing.assert_(res.index[-1] == end)
        # with no NAs

        # not a stable filter
        res = recursive_filter(x, [.75, .5, .3, .2, .1], init=[150, 100,
                                                               125, 135,
                                                               145])
        expected = self.expected.recurse_odd
        # only have 12 characters in R and this blows up and gets big
        np.testing.assert_almost_equal(res.values.squeeze(), expected, 4)
        np.testing.assert_(res.index[0] == start)
        np.testing.assert_(res.index[-1] == end)
Пример #2
0
 def test_pandas2d(self):
     start = datetime(1951, 3, 31)
     end = datetime(1958, 12, 31)
     x = concat((self.data[0], self.data[0]), axis=1)
     res = convolution_filter(x, [[.75, .75], [.25, .25]])
     assert_(res.index[0] == start)
     assert_(res.index[-1] == end)
Пример #3
0
    def test_convolution(self):
        x = self.data.values.squeeze()
        res = convolution_filter(x, [.75, .25])
        expected = self.expected.conv2
        np.testing.assert_almost_equal(res, expected)

        res = convolution_filter(x, [.75, .25], nsides=1)
        expected = self.expected.conv1
        np.testing.assert_almost_equal(res, expected)

        x = self.datana.values.squeeze()
        res = convolution_filter(x, [.75, .25])
        expected = self.expected.conv2_na
        np.testing.assert_almost_equal(res, expected)

        res = convolution_filter(x, [.75, .25], nsides=1)
        expected = self.expected.conv1_na
        np.testing.assert_almost_equal(res, expected)
Пример #4
0
def _moving_average(table, input_cols, weights_array=None, window_size=1, weights='uniform_weights', mode='past_values_only'):
    out_table = table.copy()
    nsides = 1
    if mode == 'centered_moving_average':
        nsides = 2
    if weights == 'uniform_weights':
        weights_array = np.ones(window_size)
    for column in input_cols:
        out_table[column + '_MA'] = sm.convolution_filter(out_table[column], weights_array, nsides) / sum(weights_array)
    return{'out_table':out_table}
Пример #5
0
    def test_convolution2d(self):
        x = self.data.values
        res = convolution_filter(x, [[.75], [.25]])
        expected = self.expected.conv2
        np.testing.assert_almost_equal(res, expected[:, None])
        res = convolution_filter(np.c_[x, x], [[.75, .75], [.25, .25]])
        np.testing.assert_almost_equal(res, np.c_[expected, expected])

        res = convolution_filter(x, [[.75], [.25]], nsides=1)
        expected = self.expected.conv1
        np.testing.assert_almost_equal(res, expected[:, None])

        x = self.datana.values
        res = convolution_filter(x, [[.75], [.25]])
        expected = self.expected.conv2_na
        np.testing.assert_almost_equal(res, expected[:, None])

        res = convolution_filter(x, [[.75], [.25]], nsides=1)
        expected = self.expected.conv1_na
        np.testing.assert_almost_equal(res, expected[:, None])
Пример #6
0
    def test_pandas(self):
        start = datetime(1951, 3, 31)
        end = datetime(1958, 12, 31)
        x = self.data[0]
        res = convolution_filter(x, [.75, .25])
        assert_(res.index[0] == start)
        assert_(res.index[-1] == end)

        res = convolution_filter(x, [.75, .25], nsides=1)
        assert_(res.index[0] == start)
        # with no nan-padding q1 if not
        assert_(res.index[-1] == end)

        res = recursive_filter(x, [.75, .25])
        assert_(res.index[0] == start)
        assert_(res.index[-1] == end)

        x = self.datana
        res = recursive_filter(x, [.75, .25])
        assert_(res.index[0] == start)
        assert_(res.index[-1] == end)
Пример #7
0
def seasonal_decompose(x, model="additive", filt=None, freq=None):
    """
    Parameters
    ----------
    x : array-like
        Time series
    model : str {"additive", "multiplicative"}
        Type of seasonal component. Abbreviations are accepted.
    filt : array-like
        The filter coefficients for filtering out the seasonal component.
        The default is a symmetric moving average.
    freq : int, optional
        Frequency of the series. Must be used if x is not a pandas
        object with a timeseries index.

    Returns
    -------
    results : obj
        A object with seasonal, trend, and resid attributes.

    Notes
    -----
    This is a naive decomposition. More sophisticated methods should
    be preferred.

    The additive model is Y[t] = T[t] + S[t] + e[t]

    The multiplicative model is Y[t] = T[t] * S[t] * e[t]

    The seasonal component is first removed by applying a convolution
    filter to the data. The average of this smoothed series for each
    period is the returned seasonal component.

    See Also
    --------
    statsmodels.tsa.filters.convolution_filter
    """
    _pandas_wrapper, pfreq = _maybe_get_pandas_wrapper_freq(x)
    x = np.asanyarray(x).squeeze()
    nobs = len(x)

    if not np.all(np.isfinite(x)):
        raise ValueError("This function does not handle missing values")
    if model.startswith('m'):
        if np.any(x <= 0):
            raise ValueError("Multiplicative seasonality is not appropriate "
                             "for zero and negative values")

    if pfreq is not None:
        pfreq = ym_freq_to_period.freq_to_period(pfreq)
        if freq and pfreq != freq:
            raise ValueError("Inferred frequency of index and frequency "
                             "don't match. This function does not re-sample")
        else:
            freq = pfreq

    elif freq is None:
        raise ValueError("You must specify a freq or x must be a "
                         "pandas object with a timeseries index")

    if filt is None:
        if freq % 2 == 0:  # split weights at ends
            filt = np.array([.5] + [1] * (freq - 1) + [.5]) / freq
        else:
            filt = np.repeat(1. / freq, freq)

    trend = convolution_filter(x, filt)

    # nan pad for conformability - convolve doesn't do it
    if model.startswith('m'):
        detrended = x / trend
    else:
        detrended = x - trend

    period_averages = seasonal_mean(detrended, freq)

    if model.startswith('m'):
        period_averages /= np.mean(period_averages)
    else:
        period_averages -= np.mean(period_averages)

    seasonal = np.tile(period_averages, nobs // freq + 1)[:nobs]

    if model.startswith('m'):
        resid = x / seasonal / trend
    else:
        resid = detrended - seasonal

    results = lmap(_pandas_wrapper, [seasonal, trend, resid, x])
    return DecomposeResult(seasonal=results[0],
                           trend=results[1],
                           resid=results[2],
                           observed=results[3])
Пример #8
0
def seasonal_decompose(
    x,
    model="additive",
    filt=None,
    period=None,
    two_sided=True,
    extrapolate_trend=0,
):
    """
    Seasonal decomposition using moving averages.

    Parameters
    ----------
    x : array_like
        Time series. If 2d, individual series are in columns. x must contain 2
        complete cycles.
    model : {"additive", "multiplicative"}, optional
        Type of seasonal component. Abbreviations are accepted.
    filt : array_like, optional
        The filter coefficients for filtering out the seasonal component.
        The concrete moving average method used in filtering is determined by
        two_sided.
    period : int, optional
        Period of the series. Must be used if x is not a pandas object or if
        the index of x does not have  a frequency. Overrides default
        periodicity of x if x is a pandas object with a timeseries index.
    two_sided : bool, optional
        The moving average method used in filtering.
        If True (default), a centered moving average is computed using the
        filt. If False, the filter coefficients are for past values only.
    extrapolate_trend : int or 'freq', optional
        If set to > 0, the trend resulting from the convolution is
        linear least-squares extrapolated on both ends (or the single one
        if two_sided is False) considering this many (+1) closest points.
        If set to 'freq', use `freq` closest points. Setting this parameter
        results in no NaN values in trend or resid components.

    Returns
    -------
    DecomposeResult
        A object with seasonal, trend, and resid attributes.

    See Also
    --------
    statsmodels.tsa.filters.bk_filter.bkfilter
        Baxter-King filter.
    statsmodels.tsa.filters.cf_filter.cffilter
        Christiano-Fitzgerald asymmetric, random walk filter.
    statsmodels.tsa.filters.hp_filter.hpfilter
        Hodrick-Prescott filter.
    statsmodels.tsa.filters.convolution_filter
        Linear filtering via convolution.
    statsmodels.tsa.seasonal.STL
        Season-Trend decomposition using LOESS.

    Notes
    -----
    This is a naive decomposition. More sophisticated methods should
    be preferred.

    The additive model is Y[t] = T[t] + S[t] + e[t]

    The multiplicative model is Y[t] = T[t] * S[t] * e[t]

    The results are obtained by first estimating the trend by applying
    a convolution filter to the data. The trend is then removed from the
    series and the average of this de-trended series for each period is
    the returned seasonal component.
    """
    pfreq = period
    pw = PandasWrapper(x)
    if period is None:
        pfreq = getattr(getattr(x, "index", None), "inferred_freq", None)

    x = array_like(x, "x", maxdim=2)
    nobs = len(x)

    if not np.all(np.isfinite(x)):
        raise ValueError("This function does not handle missing values")
    if model.startswith("m"):
        if np.any(x <= 0):
            raise ValueError("Multiplicative seasonality is not appropriate "
                             "for zero and negative values")

    if period is None:
        if pfreq is not None:
            pfreq = freq_to_period(pfreq)
            period = pfreq
        else:
            raise ValueError(
                "You must specify a period or x must be a pandas object with "
                "a PeriodIndex or a DatetimeIndex with a freq not set to None")
    if x.shape[0] < 2 * pfreq:
        raise ValueError(
            f"x must have 2 complete cycles requires {2 * pfreq} "
            f"observations. x only has {x.shape[0]} observation(s)")

    if filt is None:
        if period % 2 == 0:  # split weights at ends
            filt = np.array([0.5] + [1] * (period - 1) + [0.5]) / period
        else:
            filt = np.repeat(1.0 / period, period)

    nsides = int(two_sided) + 1
    trend = convolution_filter(x, filt, nsides)

    if extrapolate_trend == "freq":
        extrapolate_trend = period - 1

    if extrapolate_trend > 0:
        trend = _extrapolate_trend(trend, extrapolate_trend + 1)

    if model.startswith("m"):
        detrended = x / trend
    else:
        detrended = x - trend

    period_averages = seasonal_mean(detrended, period)

    if model.startswith("m"):
        period_averages /= np.mean(period_averages, axis=0)
    else:
        period_averages -= np.mean(period_averages, axis=0)

    seasonal = np.tile(period_averages.T, nobs // period + 1).T[:nobs]

    if model.startswith("m"):
        resid = x / seasonal / trend
    else:
        resid = detrended - seasonal

    results = []
    for s, name in zip((seasonal, trend, resid, x),
                       ("seasonal", "trend", "resid", None)):
        results.append(pw.wrap(s.squeeze(), columns=name))
    return DecomposeResult(
        seasonal=results[0],
        trend=results[1],
        resid=results[2],
        observed=results[3],
    )
def seasonal_decompose(x, model="additive", filt=None, freq=None, two_sided=True,
                       extrapolate_trend=0):
    """
    Seasonal decomposition using moving averages
    Parameters
    ----------
    x : array-like
        Time series. If 2d, individual series are in columns.
    model : str {"additive", "multiplicative"}
        Type of seasonal component. Abbreviations are accepted.
    filt : array-like
        The filter coefficients for filtering out the seasonal component.
        The concrete moving average method used in filtering is determined by two_sided.
    freq : int, optional
        Frequency of the series. Must be used if x is not a pandas object.
        Overrides default periodicity of x if x is a pandas
        object with a timeseries index.
    two_sided : bool
        The moving average method used in filtering.
        If True (default), a centered moving average is computed using the filt.
        If False, the filter coefficients are for past values only.
    extrapolate_trend : int or 'freq', optional
        If set to > 0, the trend resulting from the convolution is
        linear least-squares extrapolated on both ends (or the single one
        if two_sided is False) considering this many (+1) closest points.
        If set to 'freq', use `freq` closest points. Setting this parameter
        results in no NaN values in trend or resid components.
    Returns
    -------
    results : obj
        A object with seasonal, trend, and resid attributes.
    Notes
    -----
    This is a naive decomposition. More sophisticated methods should
    be preferred.
    The additive model is Y[t] = T[t] + S[t] + e[t]
    The multiplicative model is Y[t] = T[t] * S[t] * e[t]
    The seasonal component is first removed by applying a convolution
    filter to the data. The average of this smoothed series for each
    period is the returned seasonal component.
    See Also
    --------
    statsmodels.tsa.filters.bk_filter.bkfilter
    statsmodels.tsa.filters.cf_filter.xffilter
    statsmodels.tsa.filters.hp_filter.hpfilter
    statsmodels.tsa.filters.convolution_filter
    """
    if freq is None:
        _pandas_wrapper, pfreq = _maybe_get_pandas_wrapper_freq(x)
    else:
        _pandas_wrapper = _maybe_get_pandas_wrapper(x)
        pfreq = None
    x = np.asanyarray(x).squeeze()
    nobs = len(x)

    if not np.all(np.isfinite(x)):
        raise ValueError("This function does not handle missing values")
    if model.startswith('m'):
        if np.any(x <= 0):
            raise ValueError("Multiplicative seasonality is not appropriate "
                             "for zero and negative values")

    if freq is None:
        if pfreq is not None:
            pfreq = freq_to_period(pfreq)
            freq = pfreq
        else:
            raise ValueError("You must specify a freq or x must be a "
                             "pandas object with a timeseries index with"
                             "a freq not set to None")

    if filt is None:
        if freq % 2 == 0:  # split weights at ends
            filt = np.array([.5] + [1] * (freq - 1) + [.5]) / freq
        else:
            filt = np.repeat(1./freq, freq)

    nsides = int(two_sided) + 1
    trend = convolution_filter(x, filt, nsides)

    if extrapolate_trend == 'freq':
        extrapolate_trend = freq - 1

    if extrapolate_trend > 0:
        trend = _extrapolate_trend(trend, extrapolate_trend + 1)

    if model.startswith('m'):
        detrended = x / trend
    else:
        detrended = x - trend

    period_averages = seasonal_mean(detrended, freq)

    if model.startswith('m'):
        period_averages /= np.mean(period_averages, axis=0)
    else:
        period_averages -= np.mean(period_averages, axis=0)

    seasonal = np.tile(period_averages.T, nobs // freq + 1).T[:nobs]

    if model.startswith('m'):
        resid = x / seasonal / trend
    else:
        resid = detrended - seasonal

    results = lmap(_pandas_wrapper, [seasonal, trend, resid, x])
    return DecomposeResult(seasonal=results[0], trend=results[1],
                           resid=results[2], observed=results[3])