def test_dlt_map_univariate(synthetic_data): train_df, test_df, coef = synthetic_data dlt = DLTMAP( response_col='response', date_col='week', seasonality=52, num_warmup=50, verbose=False, ) dlt.fit(train_df) init_call = dlt.get_init_values() assert isinstance(init_call, DLTInitializer) assert init_call.s == 52 init_values = init_call() assert init_values['init_sea'].shape == (51, ) predict_df = dlt.predict(test_df) expected_columns = ['week', 'prediction_5', 'prediction', 'prediction_95'] expected_shape = (51, len(expected_columns)) expected_num_parameters = 12 assert predict_df.shape == expected_shape assert predict_df.columns.tolist() == expected_columns assert len(dlt._posterior_samples) == expected_num_parameters
def test_dlt_predict_mixed_regular_positive(iclaims_training_data): df = iclaims_training_data dlt = DLTMAP( response_col='claims', date_col='week', regressor_col=['trend.unemploy', 'trend.filling', 'trend.job'], regressor_sign=['=', '+', '='], seasonality=52, seed=8888, ) dlt.fit(df) predicted_df = dlt.predict(df) dlt_new = DLTMAP( response_col='claims', date_col='week', regressor_col=['trend.unemploy', 'trend.job', 'trend.filling'], regressor_sign=['=', '=', '+'], seasonality=52, seed=8888, ) dlt_new.fit(df) predicted_df_new = dlt_new.predict(df) assert np.allclose(predicted_df['prediction'].values, predicted_df_new['prediction'].values)
def test_dlt_fixed_sm_input(synthetic_data, level_sm_input, seasonality_sm_input, slope_sm_input): train_df, test_df, coef = synthetic_data dlt = DLTMAP( response_col='response', date_col='week', regressor_col=train_df.columns.tolist()[2:], level_sm_input=level_sm_input, seasonality_sm_input=seasonality_sm_input, slope_sm_input=slope_sm_input, seasonality=52, num_warmup=50, verbose=False, ) dlt.fit(train_df) predict_df = dlt.predict(test_df) regression_out = dlt.get_regression_coefs() num_regressors = regression_out.shape[0] expected_columns = ['week', 'prediction_5', 'prediction', 'prediction_95'] expected_shape = (51, len(expected_columns)) expected_regression_shape = (6, 3) assert predict_df.shape == expected_shape assert predict_df.columns.tolist() == expected_columns assert regression_out.shape == expected_regression_shape assert num_regressors == len(train_df.columns.tolist()[2:])
def test_dlt_predict_all_positive_reg(iclaims_training_data): df = iclaims_training_data dlt = DLTMAP( response_col='claims', date_col='week', regressor_col=['trend.unemploy', 'trend.filling', 'trend.job'], regressor_sign=['+', '+', '+'], seasonality=52, seed=8888, ) dlt.fit(df) predicted_df = dlt.predict(df, decompose=True) assert any(predicted_df['regression'].values)
def test_dlt_map_global_trend(synthetic_data, global_trend_option): train_df, test_df, coef = synthetic_data dlt = DLTMAP( response_col='response', date_col='week', seasonality=52, global_trend_option=global_trend_option, ) dlt.fit(train_df) predict_df = dlt.predict(test_df) expected_columns = ['week', 'prediction'] expected_shape = (51, len(expected_columns)) assert predict_df.shape == expected_shape assert predict_df.columns.tolist() == expected_columns
def test_dlt_mixed_signs_and_order(iclaims_training_data, regressor_signs): df = iclaims_training_data df['claims'] = np.log(df['claims']) raw_regressor_col = ['trend.unemploy', 'trend.filling', 'trend.job'] new_regressor_col = [raw_regressor_col[idx] for idx in [1, 2, 0]] new_regressor_signs = [regressor_signs[idx] for idx in [1, 2, 0]] # mixiing ordering of cols in df of prediction new_df = df[['claims', 'week'] + new_regressor_col] dlt = DLTMAP( response_col='claims', date_col='week', regressor_col=raw_regressor_col, regressor_sign=regressor_signs, seasonality=52, seed=8888, ) dlt.fit(df) predicted_df_v1 = dlt.predict(df) predicted_df_v2 = dlt.predict(new_df) # mixing ordering of signs dlt_new = DLTMAP( response_col='claims', date_col='week', regressor_col=new_regressor_col, regressor_sign=new_regressor_signs, seasonality=52, seed=8888, ) dlt_new.fit(df) predicted_df_v3 = dlt_new.predict(df) predicted_df_v4 = dlt_new.predict(new_df) pred_v1 = predicted_df_v1['prediction'].values pred_v2 = predicted_df_v2['prediction'].values pred_v3 = predicted_df_v3['prediction'].values pred_v4 = predicted_df_v4['prediction'].values # they should be all identical; ordering of signs or columns in prediction show not matter assert np.allclose(pred_v1, pred_v2, atol=1e-3) assert np.allclose(pred_v1, pred_v3, atol=1e-3) assert np.allclose(pred_v1, pred_v4, atol=1e-3)
def test_dlt_map_univariate(synthetic_data): train_df, test_df, coef = synthetic_data dlt = DLTMAP( response_col='response', date_col='week', seasonality=52, num_warmup=50, verbose=False, ) dlt.fit(train_df) predict_df = dlt.predict(test_df) expected_columns = ['week', 'prediction'] expected_shape = (51, len(expected_columns)) expected_num_parameters = 12 # no `lp__` parameter in optimizing() assert predict_df.shape == expected_shape assert predict_df.columns.tolist() == expected_columns assert len(dlt._posterior_samples) == expected_num_parameters
def test_dlt_map_single_regressor(iclaims_training_data): df = iclaims_training_data df['claims'] = np.log(df['claims']) regressor_col = ['trend.unemploy'] dlt = DLTMAP( response_col='claims', date_col='week', regressor_col=regressor_col, seasonality=52, seed=8888, ) dlt.fit(df) predicted_df = dlt.predict(df) expected_num_parameters = 13 expected_columns = ['week', 'prediction_5', 'prediction', 'prediction_95'] assert predicted_df.shape[0] == df.shape[0] assert predicted_df.columns.tolist() == expected_columns assert len(dlt._posterior_samples) == expected_num_parameters
def test_dlt_map_reproducibility(synthetic_data, seasonality): train_df, test_df, coef = synthetic_data dlt1 = DLTMAP( response_col='response', date_col='week', prediction_percentiles=[5, 95], seasonality=seasonality, ) # first fit and predict dlt1.fit(train_df) posteriors1 = copy(dlt1._aggregate_posteriors['map']) prediction1 = dlt1.predict(test_df) # second fit and predict # note a new instance must be created to reset the seed # note both fit and predict contain random generation processes dlt2 = DLTMAP( response_col='response', date_col='week', prediction_percentiles=[5, 95], seasonality=seasonality, ) dlt2.fit(train_df) posteriors2 = copy(dlt2._aggregate_posteriors['map']) prediction2 = dlt2.predict(test_df) # assert same posterior keys assert set(posteriors1.keys()) == set(posteriors2.keys()) # assert posterior draws are reproducible for k, v in posteriors1.items(): assert np.allclose(posteriors1[k], posteriors2[k]) # assert prediction is reproducible assert np.allclose(prediction1['prediction'].values, prediction2['prediction'].values)