def get_seasonal_dummies(df): """Accepts a time-indexed df of hourly data, returns hourly and weekday dummies as a df to passed as exogenous variables in a SARIMAX model""" columns = df.columns new_df = df.copy() new_df['time'] = new_df.index # create weekday dummy generator wday_dumgen = ppc.DateFeaturizer(column_name='time', with_day_of_month=False) # since all have the same index, we can use any column in the df to generate the day_dums _, wday_dums = wday_dumgen.fit_transform(new_df[columns[0]], new_df) # drop the columns that aren't dummies wday_dums = wday_dums[wday_dums.columns[-7:]] # set the index for easy merging wday_dums.set_index(new_df.index, inplace=True) # create hourly dummy generator hourly_dumgen = CalendarSeasonality('H', 'D') # generate dummies hourly_dums = hourly_dumgen.in_sample(new_df.index) # merge results full_dums = wday_dums.merge(hourly_dums, on='time') return full_dums
def test_calendar_seasonal_period_q(): period = "Q" index = pd.date_range("2000-01-01", freq="M", periods=600) cs = CalendarSeasonality("M", period=period) terms = cs.in_sample(index) assert np.all(terms.sum(1) == 1.0) for i in range(index.shape[0]): assert terms.iloc[i, i % 3] == 1.0
def test_calendar_seasonality(time_index, forecast_index, freq_period): freq, period = freq_period cs = CalendarSeasonality(period, freq) cs.in_sample(time_index) steps = 83 if forecast_index is None else len(forecast_index) cs.out_of_sample(steps, time_index, forecast_index) assert isinstance(cs.period, str) assert isinstance(cs.freq, str) str(cs) repr(cs) hash(cs) cs2 = CalendarSeasonality(period, freq) assert cs == cs2
def test_calendar_seasonal_period_w(): period = "W" index = pd.date_range("2000-01-03", freq="H", periods=600) cs = CalendarSeasonality("H", period=period) terms = cs.in_sample(index) assert np.all(terms.sum(1) == 1.0) for i in range(index.shape[0]): assert terms.iloc[i, i % 168] == 1.0 index = pd.date_range("2000-01-03", freq="B", periods=600) cs = CalendarSeasonality("B", period=period) terms = cs.in_sample(index) assert np.all(terms.sum(1) == 1.0) for i in range(index.shape[0]): assert terms.iloc[i, i % 5] == 1.0 index = pd.date_range("2000-01-03", freq="D", periods=600) cs = CalendarSeasonality("D", period=period) terms = cs.in_sample(index) assert np.all(terms.sum(1) == 1.0) for i in range(index.shape[0]): assert terms.iloc[i, i % 7] == 1.0
def test_invalid_freq_period(time_index): with pytest.raises(ValueError, match="The combination of freq="): CalendarSeasonality("H", "A") cs = CalendarSeasonality("B", "W") with pytest.raises(ValueError, match="freq is B but index contains"): cs.in_sample(pd.date_range("2000-1-1", periods=10, freq="D"))