def get_seasonal_dummies(df):
    """Accepts a time-indexed df of hourly data, returns hourly and weekday dummies as a df
    to passed as exogenous variables in a SARIMAX model"""
    columns = df.columns
    new_df = df.copy()
    new_df['time'] = new_df.index

    # create weekday dummy generator
    wday_dumgen = ppc.DateFeaturizer(column_name='time',
                                     with_day_of_month=False)

    # since all have the same index, we can use any column in the df to generate the day_dums
    _, wday_dums = wday_dumgen.fit_transform(new_df[columns[0]], new_df)

    # drop the columns that aren't dummies
    wday_dums = wday_dums[wday_dums.columns[-7:]]

    # set the index for easy merging
    wday_dums.set_index(new_df.index, inplace=True)

    # create hourly dummy generator
    hourly_dumgen = CalendarSeasonality('H', 'D')

    # generate dummies
    hourly_dums = hourly_dumgen.in_sample(new_df.index)

    # merge results
    full_dums = wday_dums.merge(hourly_dums, on='time')

    return full_dums
示例#2
0
def test_calendar_seasonal_period_q():
    period = "Q"
    index = pd.date_range("2000-01-01", freq="M", periods=600)
    cs = CalendarSeasonality("M", period=period)
    terms = cs.in_sample(index)
    assert np.all(terms.sum(1) == 1.0)
    for i in range(index.shape[0]):
        assert terms.iloc[i, i % 3] == 1.0
示例#3
0
def test_calendar_seasonality(time_index, forecast_index, freq_period):
    freq, period = freq_period
    cs = CalendarSeasonality(period, freq)
    cs.in_sample(time_index)
    steps = 83 if forecast_index is None else len(forecast_index)
    cs.out_of_sample(steps, time_index, forecast_index)
    assert isinstance(cs.period, str)
    assert isinstance(cs.freq, str)
    str(cs)
    repr(cs)
    hash(cs)
    cs2 = CalendarSeasonality(period, freq)
    assert cs == cs2
示例#4
0
def test_calendar_seasonal_period_w():
    period = "W"
    index = pd.date_range("2000-01-03", freq="H", periods=600)
    cs = CalendarSeasonality("H", period=period)
    terms = cs.in_sample(index)
    assert np.all(terms.sum(1) == 1.0)
    for i in range(index.shape[0]):
        assert terms.iloc[i, i % 168] == 1.0

    index = pd.date_range("2000-01-03", freq="B", periods=600)
    cs = CalendarSeasonality("B", period=period)
    terms = cs.in_sample(index)
    assert np.all(terms.sum(1) == 1.0)
    for i in range(index.shape[0]):
        assert terms.iloc[i, i % 5] == 1.0

    index = pd.date_range("2000-01-03", freq="D", periods=600)
    cs = CalendarSeasonality("D", period=period)
    terms = cs.in_sample(index)
    assert np.all(terms.sum(1) == 1.0)
    for i in range(index.shape[0]):
        assert terms.iloc[i, i % 7] == 1.0
示例#5
0
def test_invalid_freq_period(time_index):
    with pytest.raises(ValueError, match="The combination of freq="):
        CalendarSeasonality("H", "A")
    cs = CalendarSeasonality("B", "W")
    with pytest.raises(ValueError, match="freq is B but index contains"):
        cs.in_sample(pd.date_range("2000-1-1", periods=10, freq="D"))