def test_train_eval_test(self): log.info("testing: Train Eval Test") m = NeuralProphet( n_lags=10, n_forecasts=3, ar_sparsity=0.1, epochs=3, batch_size=32, ) df = pd.read_csv(PEYTON_FILE, nrows=95) df = df_utils.check_dataframe(df, check_y=False) df = m._handle_missing_data(df, freq="D", predicting=False) df_train, df_test = m.split_df(df, freq="D", valid_p=0.1, inputs_overbleed=True) metrics = m.fit(df_train, freq="D", validate_each_epoch=True, valid_p=0.1) metrics = m.fit(df_train, freq="D") val_metrics = m.test(df_test) log.debug("Metrics: train/eval: \n {}".format( metrics.to_string(float_format=lambda x: "{:6.3f}".format(x)))) log.debug("Metrics: test: \n {}".format( val_metrics.to_string(float_format=lambda x: "{:6.3f}".format(x))))
def test_time_dataset(self): # manually load any file that stores a time series, for example: df_in = pd.read_csv(AIR_FILE, index_col=False) log.debug("Infile shape: {}".format(df_in.shape)) n_lags = 3 n_forecasts = 1 valid_p = 0.2 df_train, df_val = df_utils.split_df(df_in, n_lags, n_forecasts, valid_p, inputs_overbleed=True) # create a tabularized dataset from time series df = df_utils.check_dataframe(df_train) data_params = df_utils.init_data_params(df, normalize="minmax") df = df_utils.normalize(df, data_params) inputs, targets = time_dataset.tabularize_univariate_datetime( df, n_lags=n_lags, n_forecasts=n_forecasts, ) log.debug("tabularized inputs: {}".format("; ".join([ "{}: {}".format(inp, values.shape) for inp, values in inputs.items() ])))
def test_df_utils_func(): log.info("testing: df_utils Test") df = pd.read_csv(PEYTON_FILE, nrows=95) df = df_utils.check_dataframe(df, check_y=False) # test find_time_threshold df_dict, _ = df_utils.prep_copy_df_dict(df) time_threshold = df_utils.find_time_threshold(df_dict, n_lags=2, valid_p=0.2, inputs_overbleed=True) df_train, df_val = df_utils.split_considering_timestamp( df_dict, n_lags=2, n_forecasts=2, inputs_overbleed=True, threshold_time_stamp=time_threshold) # init data params with a list global_data_params = df_utils.init_data_params(df_dict, normalize="soft") global_data_params = df_utils.init_data_params(df_dict, normalize="soft1") global_data_params = df_utils.init_data_params(df_dict, normalize="standardize") log.debug("Time Threshold: \n {}".format(time_threshold)) log.debug("Df_train: \n {}".format(type(df_train))) log.debug("Df_val: \n {}".format(type(df_val)))
def check_split(df_in, df_len_expected, n_lags, n_forecasts, freq, p=0.1): m = NeuralProphet( n_lags=n_lags, n_forecasts=n_forecasts, ) df_in = df_utils.check_dataframe(df_in, check_y=False) df_in = m._handle_missing_data(df_in, freq=freq, predicting=False) assert df_len_expected == len(df_in) total_samples = len(df_in) - n_lags - 2 * n_forecasts + 2 df_train, df_test = m.split_df(df_in, freq=freq, valid_p=0.1, inputs_overbleed=True) n_train = len(df_train) - n_lags - n_forecasts + 1 n_test = len(df_test) - n_lags - n_forecasts + 1 assert total_samples == n_train + n_test n_test_expected = max(1, int(total_samples * p)) n_train_expected = total_samples - n_test_expected assert n_train == n_train_expected assert n_test == n_test_expected