Пример #1
0
def test_autoreg_append_deterministic(append_data):
    y = append_data.y
    y_oos = append_data.y_oos
    y_both = append_data.y_both
    x = append_data.x
    x_oos = append_data.x_oos
    x_both = append_data.x_both

    terms = [TimeTrend(constant=True, order=1), Seasonality(12)]
    dp = DeterministicProcess(y.index, additional_terms=terms)

    res = AutoReg(y, lags=3, trend="n", deterministic=dp).fit()
    res_append = res.append(y_oos, refit=True)
    res_direct = AutoReg(y_both,
                         lags=3,
                         trend="n",
                         deterministic=dp.apply(y_both.index)).fit()
    assert_allclose(res_append.params, res_direct.params)

    res_np = AutoReg(np.asarray(y), lags=3, trend="n", deterministic=dp).fit()
    res_append_np = res_np.append(np.asarray(y_oos))
    assert_allclose(res_np.params, res_append_np.params)

    res = AutoReg(y, exog=x, lags=3, trend="n", deterministic=dp).fit()
    res_append = res.append(y_oos, exog=x_oos, refit=True)
    res_direct = AutoReg(
        y_both,
        exog=x_both,
        lags=3,
        trend="n",
        deterministic=dp.apply(y_both.index),
    ).fit()
    assert_allclose(res_append.params, res_direct.params)
Пример #2
0
def test_range_error():
    idx = pd.Index([0, 1, 1, 2, 3, 5, 8, 13])
    dp = DeterministicProcess(
        idx, constant=True, order=2, seasonal=True, period=2
    )
    with pytest.raises(TypeError, match="The index in the deterministic"):
        dp.range(0, 12)
Пример #3
0
    def check(self, dp, X):
        from statsmodels.tsa.deterministic import (CalendarFourier,
                                                   DeterministicProcess)
        y = load_average_sales()['2017']
        fourier = CalendarFourier(freq='M', order=4)
        dp = DeterministicProcess(
            index=y.index,
            constant=True,
            order=1,
            seasonal=True,
            additional_terms=[fourier],
            drop=True,
        )
        X_true = dp.in_sample()

        import pandas as pd
        assert all(
            dp._index == y.index
        ), f"`index` argument to `DeterministicProcess` should be `y.index`. You gave {dp._index}."
        assert dp._constant, f"`constant` argument to `DeterministicProcess` should be `True`. You gave {dp._constant}."
        assert dp._order == 1, f"`order` argument to `DeterministicProcess` should be `1`. You gave {dp._order}."
        assert dp._seasonal, f"`seasonal` argument to `DeterministicProcess` should be `True`. You gave {dp._seasonal}."
        assert len(
            dp._additional_terms
        ) == 1, f"`additional_terms` argument to `DeterministicProcess` should be `[fourier]`. You gave {dp._additional_terms}."
        assert isinstance(
            dp._additional_terms[0], CalendarFourier
        ), f"`additional_terms` argument to `DeterministicProcess` should be `[fourier]`. You gave {dp._additional_terms}."
        assert dp._additional_terms[
            0]._order == 4, f"`order` argument to `CalendarFourier` should be `4`. You gave {dp._additional_terms[0]._order}."
        assert isinstance(
            dp._additional_terms[0]._freq, pd.offsets.MonthEnd
        ), f"`freq` argument to `CalendarFourier` should be `'M'`."
        assert dp._drop, f"`additional_terms` argument to `DeterministicProcess` should be `True`. You gave {dp._drop}."
        assert_equal(X, X_true, 'X')
Пример #4
0
def test_drop():
    index = pd.RangeIndex(0, 200)
    dummy = DummyTerm()
    str(dummy)
    assert dummy != TimeTrend()
    dp = DeterministicProcess(index, additional_terms=[dummy], drop=True)
    in_samp = dp.in_sample()
    assert in_samp.shape == (200, 4)
    oos = dp.out_of_sample(37)
    assert oos.shape == (37, 4)
    assert list(oos.columns) == list(in_samp.columns)
    valid = ("const", "trend", "dummy", "normal")
    for valid_col in valid:
        assert sum([1 for col in oos if valid_col in col]) == 1
Пример #5
0
class Q3(EqualityCheckProblem):  # Create trend feature
    from statsmodels.tsa.deterministic import DeterministicProcess

    y = load_average_sales()
    dp = DeterministicProcess(index=y.index, order=3)
    X = dp.in_sample()
    X_fore = dp.out_of_sample(steps=90)

    _vars = ['X', 'X_fore']
    _expected = [X, X_fore]
    _hint = """Your answer should look like:
```python
from statsmodels.tsa.deterministic import DeterministicProcess

y = average_sales.copy()

dp = DeterministicProcess(index=____, order=____)
X = dp.in_sample()
X_fore = dp.out_of_sample(steps=____)
```
"""
    _solution = CS("""
from statsmodels.tsa.deterministic import DeterministicProcess

y = average_sales.copy()

dp = DeterministicProcess(index=y.index, order=3)
X = dp.in_sample()
X_fore = dp.out_of_sample(steps=90)
""")
Пример #6
0
def test_deterministic_process(time_index, constant, order, seasonal, fourier,
                               period, drop):
    if seasonal and fourier:
        return
    dp = DeterministicProcess(
        time_index,
        constant=constant,
        order=order,
        seasonal=seasonal,
        fourier=fourier,
        period=period,
        drop=drop,
    )
    terms = dp.in_sample()
    pd.testing.assert_index_equal(terms.index, time_index)
    terms = dp.out_of_sample(23)
    assert isinstance(terms, pd.DataFrame)
Пример #7
0
def test_range_casting():
    idx = np.arange(120)
    dp = DeterministicProcess(
        idx, constant=True, order=1, seasonal=True, period=12
    )
    idx = pd.RangeIndex(0, 120)
    dp2 = DeterministicProcess(
        idx, constant=True, order=1, seasonal=True, period=12
    )
    pd.testing.assert_frame_equal(dp.in_sample(), dp2.in_sample())
    pd.testing.assert_frame_equal(dp.range(100, 150), dp2.range(100, 150))
Пример #8
0
def test_drop_two_consants(time_index):
    tt = TimeTrend(constant=True, order=1)
    dp = DeterministicProcess(
        time_index, constant=True, additional_terms=[tt], drop=True
    )
    assert dp.in_sample().shape[1] == 2
    dp2 = DeterministicProcess(time_index, additional_terms=[tt], drop=True)
    pd.testing.assert_frame_equal(dp.in_sample(), dp2.in_sample())
Пример #9
0
def test_deterministic(reset_randomstate):
    y = pd.Series(np.random.normal(size=200))
    terms = [TimeTrend(constant=True, order=1), Seasonality(12)]
    dp = DeterministicProcess(y.index, additional_terms=terms)
    m = AutoReg(y, trend="n", seasonal=False, lags=2, deterministic=dp)
    res = m.fit()
    m2 = AutoReg(y, trend="ct", seasonal=True, lags=2, period=12)
    res2 = m2.fit()
    assert_almost_equal(np.asarray(res.params), np.asarray(res2.params))
    with pytest.warns(RuntimeWarning, match="When using deterministic, trend"):
        AutoReg(y, trend="ct", seasonal=False, lags=2, deterministic=dp)
Пример #10
0
def test_additional_terms(time_index):
    add_terms = [TimeTrend(True, order=1)]
    dp = DeterministicProcess(time_index, additional_terms=add_terms)
    dp2 = DeterministicProcess(time_index, constant=True, order=1)
    pd.testing.assert_frame_equal(dp.in_sample(), dp2.in_sample())
    with pytest.raises(ValueError,
                       match="One or more terms in additional_terms"):
        DeterministicProcess(time_index,
                             additional_terms=add_terms + add_terms)
    with pytest.raises(ValueError,
                       match="One or more terms in additional_terms"):
        DeterministicProcess(time_index,
                             constant=True,
                             order=1,
                             additional_terms=add_terms)
Пример #11
0
def test_non_unit_range():
    idx = pd.RangeIndex(0, 700, 7)
    dp = DeterministicProcess(idx, constant=True)
    with pytest.raises(ValueError, match="The step of the index is not 1"):
        dp.range(11, 900)
Пример #12
0
class Q4(EqualityCheckProblem):  # Create time series features
    import pandas as pd
    from sklearn.linear_model import LinearRegression
    from statsmodels.tsa.deterministic import (CalendarFourier,
                                               DeterministicProcess)
    family_sales = load_family_sales()
    y = family_sales.loc[:,
                         ('sales',
                          'SCHOOL AND OFFICE SUPPLIES')].rename("Supply Sales")
    onpromotion = family_sales.loc[:, (
        'onpromotion', 'SCHOOL AND OFFICE SUPPLIES')].rename("onpromotion")
    fourier = CalendarFourier(freq='M', order=4)
    dp = DeterministicProcess(
        constant=True,
        index=y.index,
        order=1,
        seasonal=True,
        drop=True,
        additional_terms=[fourier],
    )
    X_time = dp.in_sample()
    X_time['NewYearsDay'] = (X_time.index.dayofyear == 1)
    model = LinearRegression(fit_intercept=False)
    model.fit(X_time, y)
    y_deseason = y - model.predict(X_time)
    y_deseason.name = 'sales_deseasoned'
    X_lags = make_lags(y_deseason, lags=1)
    X_promo = pd.concat([
        make_lags(onpromotion, lags=1),
        onpromotion,
        make_leads(onpromotion, leads=1),
    ],
                        axis=1)

    _vars = ['X_lags', 'X_promo']
    _expected = [X_lags, X_promo]

    _hint = """Your solution should look like:
```python
X_lags = make_lags(y_deseason, lags=____)

X_promo = pd.concat([
    make_lags(onpromotion, lags=____),
    onpromotion,
    make_leads(onpromotion, leads=____),
], axis=1)

X = pd.concat([X_time, X_lags, X_promo], axis=1).dropna()
y, X = y.align(X, join='inner')
```
"""
    _solution = CS("""
X_lags = make_lags(y_deseason, lags=1)

X_promo = pd.concat([
    make_lags(onpromotion, lags=1),
    onpromotion,
    make_leads(onpromotion, leads=1),
], axis=1)

X = pd.concat([X_time, X_lags, X_promo], axis=1).dropna()
y, X = y.align(X, join='inner')
""")
Пример #13
0
# `DeterministicProcess`. These can include a constant, a time trend of any
# order, and either a seasonal or a Fourier component.
#
# The process requires an index, which is the index of the full-sample (or
# in-sample).
#
# First, we initialize a deterministic process with a constant, a linear
# time trend, and a 5-period seasonal term. The `in_sample` method returns
# the full set of values that match the index.

from statsmodels.tsa.deterministic import DeterministicProcess

index = pd.RangeIndex(0, 100)
det_proc = DeterministicProcess(index,
                                constant=True,
                                order=1,
                                seasonal=True,
                                period=5)
det_proc.in_sample()

# The `out_of_sample` returns the next `steps` values after the end of the
# in-sample.

det_proc.out_of_sample(15)

# `range(start, stop)` can also be used to produce the deterministic terms
# over any range including in- and out-of-sample.
#
# ### Notes
#
# * When the index is a pandas `DatetimeIndex` or a `PeriodIndex`, then
Пример #14
0
def test_deterministic_process_errors(time_index):
    with pytest.raises(ValueError, match="seasonal and fourier"):
        DeterministicProcess(time_index, seasonal=True, fourier=2, period=5)
    with pytest.raises(TypeError, match="All additional terms"):
        DeterministicProcess(time_index, seasonal=True, additional_terms=[1])
Пример #15
0
# includes them as exogenous regressors.

from statsmodels.tsa.api import SARIMAX

sarimax_mod = SARIMAX(ind_prod, order=((1, 5, 12, 13), 0, 0), trend="c")
sarimax_res = sarimax_mod.fit()
print(sarimax_res.summary())

sarimax_params = sarimax_res.params.iloc[:-1].copy()
sarimax_params.index = res_glob.params.index
params = pd.concat([res_glob.params, sarimax_params], axis=1, sort=False)
params.columns = ["AutoReg", "SARIMAX"]
params

# ## Custom Deterministic Processes
#
# The `deterministic` parameter allows a custom `DeterministicProcess` to
# be used. This allows for more complex deterministic terms to be
# constructed, for example one that includes seasonal components with two
# periods, or, as the next example shows, one that uses a Fourier series
# rather than seasonal dummies.

from statsmodels.tsa.deterministic import DeterministicProcess

dp = DeterministicProcess(housing.index, constant=True, period=12, fourier=2)
mod = AutoReg(housing, 2, trend="n", seasonal=False, deterministic=dp)
res = mod.fit()
print(res.summary())

fig = res.plot_predict(720, 840)
Пример #16
0
def test_range_index_basic():
    idx = pd.date_range("2000-1-1", freq="M", periods=120)
    dp = DeterministicProcess(idx, constant=True, order=1, seasonal=True)
    dp.range("2001-1-1", "2008-1-1")
    dp.range("2001-1-1", "2015-1-1")
    dp.range("2013-1-1", "2008-1-1")
    dp.range(0, 100)
    dp.range(100, 150)
    dp.range(130, 150)
    with pytest.raises(ValueError):
        dp.range("1990-1-1", "2010-1-1")

    idx = pd.period_range("2000-1-1", freq="M", periods=120)
    dp = DeterministicProcess(idx, constant=True, order=1, seasonal=True)
    dp.range("2001-1-1", "2008-1-1")
    dp.range("2001-1-1", "2015-1-1")
    dp.range("2013-1-1", "2008-1-1")
    with pytest.raises(ValueError, match="start must be non-negative"):
        dp.range(-7, 200)

    dp.range(0, 100)
    dp.range(100, 150)
    dp.range(130, 150)

    idx = pd.RangeIndex(0, 120)
    dp = DeterministicProcess(idx,
                              constant=True,
                              order=1,
                              seasonal=True,
                              period=12)
    dp.range(0, 100)
    dp.range(100, 150)
    dp.range(120, 150)
    dp.range(130, 150)
    with pytest.raises(ValueError):
        dp.range(-10, 0)
Пример #17
0
def test_determintic_term_equiv(index):
    base = DeterministicProcess(pd.RangeIndex(0, 200), constant=True, order=2)
    dp = DeterministicProcess(index, constant=True, order=2)
    np.testing.assert_array_equal(base.in_sample(), dp.in_sample())
    np.testing.assert_array_equal(base.out_of_sample(37), dp.out_of_sample(37))
    np.testing.assert_array_equal(base.range(200, 237), dp.range(200, 237))
    np.testing.assert_array_equal(base.range(50, 150), dp.range(50, 150))
    np.testing.assert_array_equal(base.range(50, 250), dp.range(50, 250))