def train(file_path, P, frac): target, df = create_dataset(file_path) i = 0 rolling_test = [] train_size = int(frac * df.shape[0]) starts = [pd.Timestamp(df.index[0]) for _ in range(len(target))] delay = 0 grouper_train = MultivariateGrouper(max_target_dim=df.shape[0]) grouper_test = MultivariateGrouper(max_target_dim=df.shape[0]) train_ds = ListDataset([{ FieldName.TARGET: targets, FieldName.START: start } for (targets, start) in zip(target[:, 0:train_size - P], starts)], freq='1B') train_ds = grouper_train(train_ds) while train_size + delay < df.shape[0]: delay = int(P) * i test_ds = ListDataset([{ FieldName.TARGET: targets, FieldName.START: start } for (targets, start) in zip(target[:, 0:train_size + delay], starts) ], freq='1B') test_ds = grouper_test(test_ds) rolling_test.append(test_ds) i += 1 estimator = GPVAREstimator(prediction_length=pred_len, context_length=6, freq='1B', target_dim=df.shape[1], trainer=Trainer(ctx="cpu", epochs=200)) return train_ds, rolling_test, estimator, train_size
def make_predictions(predictor, region_df_dict, test_date, regions_list, target_col, feat_dynamic_cols=None, num_eval_samples=100): if feat_dynamic_cols is not None: test_data = ListDataset( [{"item_id": region, "start": region_df_dict[region].index[0], "target": region_df_dict[region][target_col][:test_date + timedelta(hours=md.NB_HOURS_PRED)], "feat_dynamic_real": [ region_df_dict[region][feat_dynamic_col][:test_date + timedelta(hours=md.NB_HOURS_PRED)] for feat_dynamic_col in feat_dynamic_cols] } for region in regions_list], freq=md.FREQ ) else: test_data = ListDataset( [{"item_id": region, "start": region_df_dict[region].index[0], "target": region_df_dict[region][target_col][:test_date + timedelta(hours=md.NB_HOURS_PRED)], } for region in regions_list], freq=md.FREQ ) forecast_it, ts_it = make_evaluation_predictions(test_data, predictor=predictor, num_eval_samples=num_eval_samples) return list(forecast_it), list(ts_it)
def test_estimator_with_features(estimator_constructor): freq = "1h" prediction_length = 12 training_dataset = ListDataset( [ { "start": "2021-01-01 00:00:00", "target": [1.0] * 200, "feat_static_cat": [0, 1], "feat_static_real": [42.0], "feat_dynamic_real": [[1.0] * 200] * 3, }, { "start": "2021-02-01 00:00:00", "target": [1.0] * 100, "feat_static_cat": [1, 0], "feat_static_real": [1.0], "feat_dynamic_real": [[1.0] * 100] * 3, }, ], freq=freq, ) prediction_dataset = ListDataset( [ { "start": "2021-01-01 00:00:00", "target": [1.0] * 200, "feat_static_cat": [0, 1], "feat_static_real": [42.0], "feat_dynamic_real": [[1.0] * (200 + prediction_length)] * 3, }, { "start": "2021-02-01 00:00:00", "target": [1.0] * 100, "feat_static_cat": [1, 0], "feat_static_real": [1.0], "feat_dynamic_real": [[1.0] * (100 + prediction_length)] * 3, }, ], freq=freq, ) estimator = estimator_constructor(freq, prediction_length) predictor = estimator.train( training_data=training_dataset, validation_data=training_dataset, shuffle_buffer_length=5, ) with tempfile.TemporaryDirectory() as td: predictor.serialize(Path(td)) predictor_copy = Predictor.deserialize(Path(td)) forecasts = predictor_copy.predict(prediction_dataset) for f in islice(forecasts, 5): f.mean
def test_from_gluonts_list_dataset(): number_of_ts = 10 # number of time series ts_length = 100 # number of timesteps prediction_length = 24 freq = "D" custom_dataset = np.random.normal(size=(number_of_ts, ts_length)) start = pd.Timestamp("01-01-2019", freq=freq) # type: ignore gluonts_list_dataset = ListDataset( [{ "target": x, "start": start } for x in custom_dataset[:, :-prediction_length]], freq=freq, ) TimeSeriesDataFrame(gluonts_list_dataset) ts_df = TimeSeriesDataFrame(ListDataset(SAMPLE_ITERABLE, freq=freq)) pd.testing.assert_frame_equal(ts_df, SAMPLE_TS_DATAFRAME, check_dtype=False) empty_list_dataset = ListDataset([], freq=freq) with pytest.raises(ValueError): TimeSeriesDataFrame(empty_list_dataset)
def train_predictor(df_dict, end_train_date, regions_list, target_col, feat_dynamic_cols=None): estimator = DeepAREstimator(freq=data_freq, prediction_length=nb_hours_pred, trainer=Trainer(epochs=max_epochs, learning_rate = learning_rate, learning_rate_decay_factor=0.01, patience=patience), use_feat_dynamic_real=feat_dynamic_cols is not None) if feat_dynamic_cols is not None: training_data = ListDataset( [{"item_id": region, "start": df_dict[region].index[0], "target": df_dict[region][target_col][:end_train_date], "feat_dynamic_real": [df_dict[region][feat_dynamic_col][:end_train_date] for feat_dynamic_col in feat_dynamic_cols] } for region in regions_list], freq = data_freq ) else: training_data = ListDataset( [{"item_id": region, "start": df_dict[region].index[0], "target": df_dict[region][target_col][:end_train_date] } for region in regions_list], freq = data_freq ) predictor = estimator.train(training_data=training_data) return predictor
def preprocess_by_single_team(self, data): self.__log.info( "Starting preprocessing time series by a single team before training routine starts" ) team_initials = self.team_initials custom_dataset = data[data.index.str.contains(team_initials)] custom_dataset = data.goals.values custom_dataset = custom_dataset.reshape(1, -1) prediction_length = self.prediction_length start_date = self.start_date freq = self.freq start = pd.Timestamp(start_date, freq) # train dataset: cut the last window of length "prediction_length", add "target" and "start" fields train_ds = ListDataset([{ 'target': x, 'start': start } for x in custom_dataset[:, :-prediction_length]], freq='1H') # test datListDatasetaset: use the whole dataset, add "target" and "start" fields test_ds = ListDataset([{ 'target': x, 'start': start } for x in custom_dataset], freq='1H') self.__log.info("Finished preprocessing time series by single team") return train_ds, test_ds
def train_and_test(training_data, test_data, freq, num_test_windows, model, require_train=False): forecasts = [] tss = [] training_data = ListDataset(training_data, freq=freq) test_data = ListDataset(test_data, freq=freq) if require_train: predictor = model.train(training_data=training_data) else: predictor = model # Save the model locally for later deployment. model_name = model.__class__.__name__ model_path = Path(f"models/{model_name}") os.makedirs(model_path, exist_ok=True) predictor.serialize(model_path) # Do the forecast on the test set. forecast_it, ts_it = make_evaluation_predictions(test_data, predictor=predictor, num_samples=100) forecasts.extend(list(forecast_it)) tss.extend(list(ts_it)) return forecasts, tss
def GlounTS(): #from pts.dataset import ListDataset #from pts.model.deepar import DeepAREstimator #from pts import Trainer #from pts.dataset import to_pandas # gluonts crash in my sistem. from gluonts.dataset.common import ListDataset from gluonts.model.deepar import DeepAREstimator from gluonts.trainer import Trainer training_data = ListDataset([{ "start": df.index[0], "target": df.value[:"2015-03-08 23:22:53"] }], freq="5min") #estimator = DeepAREstimator(freq="5min",input_size = 43, prediction_length=forecast_size, trainer=Trainer(epochs=20)) estimator = DeepAREstimator(freq="5min", prediction_length=forecast_size, trainer=Trainer(epochs=20)) predictor = estimator.train(training_data=training_data) test_data = ListDataset([{ "start": df.index[0], "target": df.value[:"2015-03-08 23:22:53"] }], freq="5min") GluonTS_prediction = next(predictor.predict(test_data)) GluonTS_mean_yhat = GluonTS_prediction.mean GluonTS_median_yhat = GluonTS_prediction.median return GluonTS_mean_yhat.tolist(), GluonTS_median_yhat.tolist( ), GluonTS_prediction
def prepare(df, P, frac, ep): rolling_test = [] train_size = int(frac * df.shape[0]) i = 0 delay = 0 train_ds = ListDataset([{ "start": pd.Timestamp(df.index[0]), "target": df.Diff[0:train_size - P], 'feat_dynamic_real': [ df.fear[0:train_size - P], df.anger[0:train_size - P], df.anticipation[0:train_size - P], df.trust[0:train_size - P], df.suprise[0:train_size - P], df.positive[0:train_size - P], df.negative[0:train_size - P], df.sadness[0:train_size - P], df.disgust[0:train_size - P], df.joy[0:train_size - P], df.Volume_of_tweets[0:train_size - P], df.Retweet[0:train_size - P], df.Replies[0:train_size - P], df.Likes[0:train_size - P] ] }], freq='1B') while train_size + delay < df.shape[0]: delay = int(P) * i test_ds = ListDataset([ dict(start=pd.Timestamp(df.index[0]), target=df.Diff[0:train_size + delay], feat_dynamic_real=[ df.fear[0:train_size + delay], df.anger[0:train_size + delay], df.anticipation[0:train_size + delay], df.trust[0:train_size + delay], df.suprise[0:train_size + delay], df.positive[0:train_size + delay], df.negative[0:train_size + delay], df.sadness[0:train_size + delay], df.disgust[0:train_size + delay], df.joy[0:train_size + delay], df.Volume_of_tweets[0:train_size + delay], df.Retweet[0:train_size + delay], df.Replies[0:train_size + delay], df.Likes[0:train_size + delay] ]) ], freq='1B') i += 1 rolling_test.append(test_ds) print("We have 1 training set of", train_size, "days and then ", len(rolling_test), "testing sets of ", delay, " days total") estimator = DeepAREstimator(prediction_length=P, context_length=5, freq='1B', use_feat_dynamic_real=True, trainer=Trainer( ctx="cpu", epochs=ep, )) #hybridize=False, ), ) return train_ds, rolling_test, estimator, train_size, i
def get_custom_dataset(name, horizon): """ """ if name == "electricity": csv_path = r'/scratch/project_2002244/DeepAR/data/elect/electricity.csv' df = pd.read_csv(csv_path, sep=",", index_col=0, parse_dates=True, decimal='.').astype(float) df.fillna(0, inplace=True) train_start = '2012-01-01 00:00:00' train_end = '2014-05-26 23:00:00' test_start = '2014-05-27 00:00:00' test_end = '2014-12-31 23:00:00' elif name == "europe_power_system": csv_path = r'/scratch/project_2002244/DeepAR/data/elect/europe_power_system.csv' df = pd.read_csv(csv_path, sep=",", index_col=0, parse_dates=True, decimal='.').astype(float) df.fillna(0, inplace=True) train_start = '2015-01-01 00:00:00' train_end = '2017-06-23 23:00:00' test_start = '2017-06-24 00:00:00' test_end = '2017-11-30 23:00:00' train_target_values = df[:train_end].T.values test_target_values = df[:(pd.Timestamp(test_start) - timedelta(hours=1))].T.values start_dates = np.array([ pd.Timestamp(df.index[0], freq='1H') for _ in range(train_target_values.shape[0]) ]) train_ds = ListDataset([{ FieldName.TARGET: target, FieldName.START: start } for (target, start) in zip(train_target_values, start_dates)], freq="1H") test_ds = ListDataset( [{ FieldName.TARGET: target, FieldName.START: start } for index in pd.date_range(start=(pd.Timestamp(test_start) - timedelta(hours=1) + timedelta(hours=horizon)), end=pd.Timestamp(test_end), freq='{}H'.format(horizon)) for (target, start) in zip(df[:index].T.values, start_dates)], freq="1H") return train_ds, test_ds
def anomaly_gluonts(lista_datos, num_fut, desv_mse=0, train=True, name='model-name'): lista_puntos = np.arange(0, len(lista_datos), 1) df, df_train, df_test = create_train_test(lista_puntos, lista_datos) data_list = [{ "start": "01-01-2012 04:05:00", "target": df_train['valores'].values }] dataset = ListDataset(data_iter=data_list, freq="5min") trainer = Trainer(epochs=15) estimator = deepar.DeepAREstimator(freq="5min", prediction_length=len( df_test['valores']), trainer=trainer) predictor = estimator.train(training_data=dataset) prediction = next(predictor.predict(dataset)) engine = engine_output_creation('gluonts') engine.alerts_creation(prediction.mean.tolist(), df_test) engine.debug_creation(prediction.mean.tolist(), df_test) print('longitud del test' + str(df_test.shape) + 'frente a la prediccion' + str(len(prediction.mean.tolist()))) engine.metrics_generation(df_test['valores'].values, prediction.mean.tolist()) ############## ANOMALY FINISHED, print("Anomaly finished. Start forecasting") ############## FORECAST START data_list = [{ "start": "01-01-2012 04:05:00", "target": df['valores'].values }] dataset = ListDataset(data_iter=data_list, freq="5min") trainer = Trainer(epochs=15) estimator = deepar.DeepAREstimator(freq="5min", prediction_length=num_fut, trainer=trainer) predictor = estimator.train(training_data=dataset) prediction = next(predictor.predict(dataset)) engine.forecast_creation(prediction.mean.tolist(), len(lista_datos), num_fut) return (engine.engine_output)
def train(epochs, prediction_length, num_layers, dropout_rate): #create train dataset df = pd.read_csv(filepath_or_buffer=os.environ['SM_CHANNEL_TRAIN'] + "/train.csv", header=0, index_col=0) training_data = ListDataset([{ "start": df.index[0], "target": df.value[:] }], freq="5min") #define DeepAR estimator deepar_estimator = DeepAREstimator(freq="5min", prediction_length=prediction_length, dropout_rate=dropout_rate, num_layers=num_layers, trainer=Trainer(epochs=epochs)) #train the model deepar_predictor = deepar_estimator.train(training_data=training_data) #create test dataset df = pd.read_csv(filepath_or_buffer=os.environ['SM_CHANNEL_TEST'] + "/test.csv", header=0, index_col=0) test_data = ListDataset([{ "start": df.index[0], "target": df.value[:] }], freq="5min") #evaluate trained model on test data forecast_it, ts_it = make_evaluation_predictions(test_data, deepar_predictor, num_samples=100) forecasts = list(forecast_it) tss = list(ts_it) evaluator = Evaluator(quantiles=[0.1, 0.5, 0.9]) agg_metrics, item_metrics = evaluator(iter(tss), iter(forecasts), num_series=len(test_data)) print("MSE:", agg_metrics["MSE"]) #save the model deepar_predictor.serialize(pathlib.Path(os.environ['SM_MODEL_DIR'])) return deepar_predictor
def predict(self, test_frame, pred_intervals): """ makes in-sample, out-of-sample, or both in-sample and out-of-sample predictions using test_frame for all timesteps included in pred_intervals """ if not self.train_dataset.has_group_cols(): interval = pred_intervals[0] feat_df = self.train_dataset.get_features(self.train_frame) targets = self.train_dataset.get_targets(self.train_frame) min_interval = np.min(interval) max_interval = np.max(interval) self.max_intervals.append(max_interval) if np.max(interval) >= targets.shape[0]: feat_df = pd.concat((feat_df, test_frame)) self._iterate_over_series( 0, feat_df, targets, min_interval, max_interval, ) else: group_cols = self.train_dataset.get_group_names() for series_idx, ((group, test_df), interval) in enumerate( zip(test_frame.groupby(group_cols, sort=False), pred_intervals)): if len(group_cols) == 1: group = [group] query_list = [ f'{grp_col}=="{grp}"' for grp_col, grp in zip(group_cols, group) ] train_df = self.train_frame.query(' & '.join(query_list)) min_interval = np.min(interval) max_interval = np.max(interval) self.max_intervals.append(max_interval) if not train_df.shape[0]: self.series_idxs.append(-1) self.pre_pad_lens.append(0) self.total_in_samples.append(0) else: feat_df = self.train_dataset.get_features(train_df) targets = self.train_dataset.get_targets(train_df) if np.max(interval) >= targets.shape[0]: feat_df = pd.concat((feat_df, test_df)) self._iterate_over_series(series_idx, feat_df, targets, min_interval, max_interval) self.series_idxs = np.array(self.series_idxs) self.data = ListDataset(self.data, freq=self.train_dataset.get_freq()) forecasts = self._forecast() forecasts = self._pad(forecasts) return forecasts # Num Series, Quantiles, Horizon
def test_multivariate_grouper_train(univariate_ts, multivariate_ts, train_fill_rule) -> None: univariate_ds = ListDataset(univariate_ts, freq="1D") multivariate_ds = ListDataset(multivariate_ts, freq="1D", one_dim_target=False) grouper = MultivariateGrouper(train_fill_rule=train_fill_rule) assert (list(grouper(univariate_ds))[0]["target"] == list(multivariate_ds) [0]["target"]).all() assert (list(grouper(univariate_ds))[0]["start"] == list(multivariate_ds) [0]["start"])
def apply(self): train_scale = map(self._max_normalize, iter(self.datasets.train)) unzip_train_scale = list(zip(*train_scale)) train = ListDataset(unzip_train_scale[0], freq=self.freq) scales = unzip_train_scale[1] test = None if self.datasets.test is not None: test_scale = zip(iter(self.datasets.test), scales) test = ListDataset( map(lambda x: self._max_normalize(x[0], x[1])[0], test_scale), freq=self.freq, ) self.datasets = TrainDatasets(self.datasets.metadata, train, test) return self
def constant_dataset() -> Tuple[DatasetInfo, Dataset, Dataset]: metadata = MetaData( freq='1H', feat_static_cat=[ CategoricalFeatureInfo( name='feat_static_cat_000', cardinality='10' ) ], feat_static_real=[BasicFeatureInfo(name='feat_static_real_000')], ) start_date = '2000-01-01 00:00:00' train_ds = ListDataset( data_iter=[ { 'item': str(i), 'start': start_date, 'target': [float(i)] * 24, 'feat_static_cat': [i], 'feat_static_real': [float(i)], } for i in range(10) ], freq=metadata.freq, ) test_ds = ListDataset( data_iter=[ { 'item': str(i), 'start': start_date, 'target': [float(i)] * 30, 'feat_static_cat': [i], 'feat_static_real': [float(i)], } for i in range(10) ], freq=metadata.freq, ) info = DatasetInfo( name='constant_dataset', metadata=metadata, prediction_length=2, train_statistics=calculate_dataset_statistics(train_ds), test_statistics=calculate_dataset_statistics(test_ds), ) return info, train_ds, test_ds
def make_dummy_datasets_with_features( num_ts: int = 5, start: str = "2018-01-01", freq: str = "D", min_length: int = 5, max_length: int = 10, prediction_length: int = 3, cardinality: List[int] = [], num_feat_dynamic_real: int = 0, num_past_feat_dynamic_real: int = 0, ) -> Tuple[ListDataset, ListDataset]: data_iter_train = [] data_iter_test = [] for k in range(num_ts): ts_length = randint(min_length, max_length) data_entry_train = { FieldName.START: start, FieldName.TARGET: [0.0] * ts_length, } if len(cardinality) > 0: data_entry_train[FieldName.FEAT_STATIC_CAT] = [ randint(0, c) for c in cardinality ] if num_past_feat_dynamic_real > 0: data_entry_train[FieldName.PAST_FEAT_DYNAMIC_REAL] = [ [float(1 + k)] * ts_length for k in range(num_past_feat_dynamic_real) ] # Since used directly in predict and not in make_evaluate_predictions, # where the test target would be chopped, test and train target have # the same lengths data_entry_test = data_entry_train.copy() if num_feat_dynamic_real > 0: data_entry_train[FieldName.FEAT_DYNAMIC_REAL] = [ [float(1 + k)] * ts_length for k in range(num_feat_dynamic_real) ] data_entry_test[FieldName.FEAT_DYNAMIC_REAL] = [ [float(1 + k)] * (ts_length + prediction_length) for k in range(num_feat_dynamic_real) ] data_iter_train.append(data_entry_train) data_iter_test.append(data_entry_test) return ( ListDataset(data_iter=data_iter_train, freq=freq), ListDataset(data_iter=data_iter_test, freq=freq), )
def constant_dataset() -> Tuple[DatasetInfo, Dataset, Dataset]: metadata = MetaData( freq="1H", feat_static_cat=[ CategoricalFeatureInfo( name="feat_static_cat_000", cardinality="10" ) ], feat_static_real=[BasicFeatureInfo(name="feat_static_real_000")], ) start_date = "2000-01-01 00:00:00" train_ds = ListDataset( data_iter=[ { "item": str(i), "start": start_date, "target": [float(i)] * 24, "feat_static_cat": [i], "feat_static_real": [float(i)], } for i in range(10) ], freq=metadata.freq, ) test_ds = ListDataset( data_iter=[ { "item": str(i), "start": start_date, "target": [float(i)] * 30, "feat_static_cat": [i], "feat_static_real": [float(i)], } for i in range(10) ], freq=metadata.freq, ) info = DatasetInfo( name="constant_dataset", metadata=metadata, prediction_length=2, train_statistics=calculate_dataset_statistics(train_ds), test_statistics=calculate_dataset_statistics(test_ds), ) return info, train_ds, test_ds
def constant_dataset() -> Tuple[DatasetInfo, Dataset, Dataset]: metadata = MetaData( freq="1H", feat_static_cat=[ CategoricalFeatureInfo( name="feat_static_cat_000", cardinality="10" ) ], feat_static_real=[BasicFeatureInfo(name="feat_static_real_000")], ) start_date = "2000-01-01 00:00:00" train_ds = ListDataset( data_iter=[ { FieldName.ITEM_ID: str(i), FieldName.START: start_date, FieldName.TARGET: [float(i)] * 24, FieldName.FEAT_STATIC_CAT: [i], FieldName.FEAT_STATIC_REAL: [float(i)], } for i in range(10) ], freq=metadata.freq, ) test_ds = ListDataset( data_iter=[ { FieldName.ITEM_ID: str(i), FieldName.START: start_date, FieldName.TARGET: [float(i)] * 30, FieldName.FEAT_STATIC_CAT: [i], FieldName.FEAT_STATIC_REAL: [float(i)], } for i in range(10) ], freq=metadata.freq, ) info = DatasetInfo( name="constant_dataset", metadata=metadata, prediction_length=2, train_statistics=calculate_dataset_statistics(train_ds), test_statistics=calculate_dataset_statistics(test_ds), ) return info, train_ds, test_ds
def easy_train(): import pandas as pd df = pd.read_csv("optiver_hacktheburgh/sp.csv", header=0, index_col=0, usecols=[0, 2], skiprows=lambda x: x % 5 != 0) # df[:100].plot(linewidth=2) print("Showing") # plt.show() from gluonts.dataset.common import ListDataset training_data = ListDataset([{ "start": df.index[0], "target": df.values.flatten() }], freq="1s") #from gluonts.model.simple_feedforward import SimpleFeedForwardEstimator from gluonts.model.deepar import DeepAREstimator from gluonts.trainer import Trainer estimator = DeepAREstimator(freq="1min", prediction_length=100, trainer=Trainer(epochs=20)) predictor = estimator.train(training_data=training_data) test_data = ListDataset([{ "start": df.index[0], "target": df.values.flatten()[:1000] }], freq="10s") full_test_data = ListDataset([{ "start": df.index[0], "target": df.values.flatten() }], freq="10s") means = [] for i, (test_entry, forecast) in enumerate( zip(full_test_data, predictor.predict(test_data))): # if i > 0: # break print(forecast.dim()) plt.plot(test_entry["target"]) #forecast.plot(color='g', prediction_intervals=[], output_file="test.png") means.extend(list(forecast.mean)) print(forecast.mean) l = len(test_entry["target"]) plt.axhline(y=means[0], xmin=0, xmax=l, linewidth=2, color='r') plt.axvline(x=5000, color='b') plt.grid(which='both') plt.show()
def format_input(df, freq, target=None): if target: return ListDataset( [{ "start": df.index[0], "target": df.to_frame().Target[:target] }], freq=freq, ) return ListDataset([{ "start": df.index[0], "target": df.to_frame().Target }], freq=freq)
def get_dataset(): data_entry_list = [ { "target": np.c_[ np.array([0.2, 0.7, 0.2, 0.5, 0.3, 0.3, 0.2, 0.1]), np.array([0, 1, 2, 0, 1, 2, 2, 2]), ].T, "start": pd.Timestamp("2011-01-01 00:00:00", freq="H"), "end": pd.Timestamp("2011-01-01 03:00:00", freq="H"), }, { "target": np.c_[ np.array([0.2, 0.1, 0.2, 0.5, 0.4]), np.array([0, 1, 2, 1, 1]) ].T, "start": pd.Timestamp("2011-01-01 00:00:00", freq="H"), "end": pd.Timestamp("2011-01-01 03:00:00", freq="H"), }, { "target": np.c_[ np.array([0.2, 0.7, 0.2, 0.5, 0.1, 0.2, 0.1]), np.array([0, 1, 2, 0, 1, 0, 2]), ].T, "start": pd.Timestamp("2011-01-01 00:00:00", freq="H"), "end": pd.Timestamp("2011-01-01 03:00:00", freq="H"), }, ] return ListDataset(data_entry_list, freq="H", one_dim_target=False)
def test_related_time_series_fail(): params = dict(freq="1D", prediction_length=3, prophet={}) dataset = ListDataset( data_iter=[ { 'start': '2017-01-01', 'target': np.array([1.0, 2.0, 3.0, 4.0]), 'feat_dynamic_real': np.array( [ [1.0, 2.0, 3.0, 4.0, 5.0, 6.0], [1.0, 2.0, 3.0, 4.0, 5.0, 6.0], ] ), } ], freq=params['freq'], ) with pytest.raises(AssertionError) as excinfo: predictor = ProphetPredictor(**params) list(predictor.predict(dataset)) assert str(excinfo.value) == ( 'Length mismatch for dynamic real-valued feature #0: ' 'expected 7, got 6' )
def test_parallelized_predictor(): dataset = ListDataset( data_iter=[{ "start": "2012-01-01", "target": (np.zeros(20) + i).tolist() } for i in range(300)], freq="1H", ) base_predictor = IdentityPredictor(freq="1H", prediction_length=10, num_samples=100) predictor = ParallelizedPredictor(base_predictor=base_predictor, num_workers=10, chunk_size=2) predictions = list(base_predictor.predict(dataset)) parallel_predictions = list(predictor.predict(dataset)) assert len(predictions) == len(parallel_predictions) for p, pp in zip(predictions, parallel_predictions): assert np.all(p.samples == pp.samples) assert np.all(p.index == pp.index)
def invocations() -> Response: request_data = request.data.decode("utf8").strip() instances = list(map(json.loads, request_data.splitlines())) predictions = [] # we have to take this as the initial start-time since the first # forecast is produced before the loop in predictor.predict start = time.time() forecast_iter = predictor.predict( ListDataset(instances, predictor.freq), num_samples=configuration.num_samples, ) for forecast in forecast_iter: end = time.time() prediction = forecast.as_json_dict(configuration) if DEBUG: prediction["debug"] = {"timing": end - start} predictions.append(prediction) start = time.time() lines = list(map(json.dumps, map(jsonify_floats, predictions))) return Response("\n".join(lines), mimetype="application/jsonlines")
def predict_fn(input_data, model): print('[DEBUG] input_data type:', type(input_data), input_data) if 'freq' in input_data: freq = input_data['freq'] else: freq = '1H' if 'target_quantile' in input_data: target_quantile = float(input_data['target_quantile']) else: target_quantile = 0.5 if 'use_log1p' in input_data: use_log1p = input_data['use_log1p'] else: use_log1p = False if 'instances' in input_data: instances = input_data['instances'] else: if isinstance(input_data, list): instances = input_data elif isinstance(data, dict): instances = [input_data] ds = ListDataset(parse_data(instances), freq=freq) inference_result = model.predict(ds) if use_log1p: result = [np.expm1(resulti.quantile(target_quantile)).tolist() for resulti in inference_result] else: result = [resulti.quantile(target_quantile).tolist() for resulti in inference_result] return result
def test_feat_dynamic_real_success(): params = dict( freq="1D", prediction_length=3, prophet_params=dict(n_changepoints=20) ) dataset = ListDataset( data_iter=[ { "start": "2017-01-01", "target": np.array([1.0, 2.0, 3.0, 4.0]), "feat_dynamic_real": np.array( [ [1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0], [1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0], ] ), } ], freq=params["freq"], ) predictor = ProphetPredictor(**params) act_fcst = next(predictor.predict(dataset)) exp_fcst = np.arange(5.0, 5.0 + params["prediction_length"]) assert np.all(np.isclose(act_fcst.quantile(0.1), exp_fcst, atol=0.02)) assert np.all(np.isclose(act_fcst.quantile(0.5), exp_fcst, atol=0.02)) assert np.all(np.isclose(act_fcst.quantile(0.9), exp_fcst, atol=0.02))
def load(self, frequency: str, subset_filter: str, training: bool) -> ListDataset: """ Load electricity dataset. :param frequency: :param subset_filter: dates as "from_date:to_date" in "YYYY-mm-dd H" format. :param training: If False then to_date will be extended to 7 days in future. :return: """ dates = subset_filter.split(':') from_date = pd.to_datetime(dates[0]) to_date = pd.to_datetime(dates[1]) if not training: to_date = to_date + relativedelta(hours=24 * 7) items_all = [{ 'item_id': i, 'start': from_date, 'horizon': 24, 'target': values } for i, values in enumerate( self.values[:, self._dates_to_index(from_date, to_date)])] return ListDataset(items_all, freq=frequency)
def initialize_model() -> nn.HybridBlock: # dummy training data N = 10 # number of time series T = 100 # number of timesteps prediction_length = 24 freq = "1H" custom_dataset = np.zeros(shape=(N, T)) start = pd.Timestamp("01-01-2019", freq=freq) # can be different for each time series train_ds = ListDataset( [{ "target": x, "start": start } for x in custom_dataset[:, :-prediction_length]], freq=freq, ) # create a simple model estimator = SimpleFeedForwardEstimator( num_hidden_dimensions=[10], prediction_length=prediction_length, context_length=T, freq=freq, trainer=Trainer( ctx="cpu", epochs=1, learning_rate=1e-3, num_batches_per_epoch=1, ), ) # train model predictor = estimator.train(train_ds) return predictor.prediction_net
def test_feat_dynamic_real_bad_size(): params = dict(freq="1D", prediction_length=3, prophet_params={}) dataset = ListDataset( data_iter=[ { "start": "2017-01-01", "target": np.array([1.0, 2.0, 3.0, 4.0]), "feat_dynamic_real": np.array( [ [1.0, 2.0, 3.0, 4.0, 5.0, 6.0], [1.0, 2.0, 3.0, 4.0, 5.0, 6.0], ] ), } ], freq=params["freq"], ) with pytest.raises(AssertionError) as excinfo: predictor = ProphetPredictor(**params) list(predictor.predict(dataset)) assert str(excinfo.value) == ( "Length mismatch for dynamic real-valued feature #0: " "expected 7, got 6" )