def test_related_time_series_fail(): params = dict(freq="1D", prediction_length=3, prophet={}) dataset = ListDataset( data_iter=[ { 'start': '2017-01-01', 'target': np.array([1.0, 2.0, 3.0, 4.0]), 'feat_dynamic_real': np.array( [ [1.0, 2.0, 3.0, 4.0, 5.0, 6.0], [1.0, 2.0, 3.0, 4.0, 5.0, 6.0], ] ), } ], freq=params['freq'], ) with pytest.raises(AssertionError) as excinfo: predictor = ProphetPredictor(**params) list(predictor.predict(dataset)) assert str(excinfo.value) == ( 'Length mismatch for dynamic real-valued feature #0: ' 'expected 7, got 6' )
def test_feat_dynamic_real_success(): params = dict( freq="1D", prediction_length=3, prophet_params=dict(n_changepoints=20) ) dataset = ListDataset( data_iter=[ { "start": "2017-01-01", "target": np.array([1.0, 2.0, 3.0, 4.0]), "feat_dynamic_real": np.array( [ [1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0], [1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0], ] ), } ], freq=params["freq"], ) predictor = ProphetPredictor(**params) act_fcst = next(predictor.predict(dataset)) exp_fcst = np.arange(5.0, 5.0 + params["prediction_length"]) assert np.all(np.isclose(act_fcst.quantile(0.1), exp_fcst, atol=0.02)) assert np.all(np.isclose(act_fcst.quantile(0.5), exp_fcst, atol=0.02)) assert np.all(np.isclose(act_fcst.quantile(0.9), exp_fcst, atol=0.02))
def test_feat_dynamic_real_bad_size(): params = dict(freq="1D", prediction_length=3, prophet_params={}) dataset = ListDataset( data_iter=[ { "start": "2017-01-01", "target": np.array([1.0, 2.0, 3.0, 4.0]), "feat_dynamic_real": np.array( [ [1.0, 2.0, 3.0, 4.0, 5.0, 6.0], [1.0, 2.0, 3.0, 4.0, 5.0, 6.0], ] ), } ], freq=params["freq"], ) with pytest.raises(AssertionError) as excinfo: predictor = ProphetPredictor(**params) list(predictor.predict(dataset)) assert str(excinfo.value) == ( "Length mismatch for dynamic real-valued feature #0: " "expected 7, got 6" )
def gluonts_prophet(dataset,freq,pred_length,prophet_params={}): params = dict(freq=freq, prediction_length=pred_length, prophet_params=prophet_params) predictor = ProphetPredictor(**params) fcst = predictor.predict(dataset) fcstlist = [] for i in fcst: fcstlist.append(i) return fcstlist
def test_min_obs(): params = dict(freq="1D", prediction_length=10, prophet={}) dataset = ListDataset( data_iter=[{'start': '2017-01-01', 'target': np.array([1.0])}], freq=params['freq'], ) predictor = ProphetPredictor(**params) act_forecast = next(predictor.predict(dataset)) exp_forecast = np.ones(params["prediction_length"]) assert np.array_equal(act_forecast.yhat, exp_forecast) assert np.array_equal(act_forecast.yhat_lower, exp_forecast) assert np.array_equal(act_forecast.yhat_upper, exp_forecast)
def test_min_obs_error(): params = dict(freq="1D", prediction_length=10, prophet_params={}) dataset = ListDataset( data_iter=[{"start": "2017-01-01", "target": np.array([1.0])}], freq=params["freq"], ) with pytest.raises(ValueError) as excinfo: predictor = ProphetPredictor(**params) list(predictor.predict(dataset)) act_error_msg = str(excinfo.value) exp_error_msg = "Dataframe has less than 2 non-NaN rows." assert act_error_msg == exp_error_msg
def define_DeepAR_predictor(self, freq, prediction_length, epochs, num_layers, batch_size): self.predictor = DeepAREstimator(freq=freq, prediction_length=prediction_length, context_length=prediction_length, trainer=Trainer( ctx="cpu", epochs=epochs, batch_size=batch_size, num_batches_per_epoch=100), num_layers=num_layers, use_feat_dynamic_real=True)
def test_related_time_series_success(): params = dict( freq="1D", prediction_length=3, prophet=dict(n_changepoints=20) ) dataset = ListDataset( data_iter=[ { 'start': '2017-01-01', 'target': np.array([1.0, 2.0, 3.0, 4.0]), 'feat_dynamic_real': np.array( [ [1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0], [1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0], ] ), } ], freq=params['freq'], ) predictor = ProphetPredictor(**params) list(predictor.predict(dataset))
def test_mean_forecast(): params = dict( freq="1D", prediction_length=10, min_nonnan_obs=3, prophet=dict(n_changepoints=20), ) dataset = ListDataset( data_iter=[ {'start': '2017-01-01', 'target': [2.0, 3.0, 'nan', 'nan']} ], freq=params['freq'], ) predictor = ProphetPredictor(**params) act_forecast = next(predictor.predict(dataset)) exp_forecast = 2.5 * np.ones(params["prediction_length"]) assert np.array_equal(act_forecast.yhat, exp_forecast) assert np.array_equal(act_forecast.yhat_lower, exp_forecast) assert np.array_equal(act_forecast.yhat_upper, exp_forecast)
def __init__(self, freq="D", prediction_length=30, epochs=50, batch_size=16, num_batches_per_epoch=100, num_layers=4, list_products=list_products): self.predictor = DeepAREstimator( freq=freq, prediction_length=prediction_length, trainer=Trainer(ctx="cpu", epochs=epochs, batch_size=batch_size, num_batches_per_epoch=num_batches_per_epoch), num_layers=num_layers) self.algorithm = algorithm self.list_products_names = TransactionsData.get_list_names( list_products)
def train(args): freq = args.freq.replace('"', '') prediction_length = args.prediction_length context_length = args.context_length use_feat_dynamic_real = args.use_feat_dynamic_real use_past_feat_dynamic_real = args.use_past_feat_dynamic_real use_feat_static_cat = args.use_feat_static_cat use_log1p = args.use_log1p print('freq:', freq) print('prediction_length:', prediction_length) print('context_length:', context_length) print('use_feat_dynamic_real:', use_feat_dynamic_real) print('use_past_feat_dynamic_real:', use_past_feat_dynamic_real) print('use_feat_static_cat:', use_feat_static_cat) print('use_log1p:', use_log1p) batch_size = args.batch_size print('batch_size:', batch_size) train = load_json(os.path.join(args.train, 'train_'+freq+'.json')) test = load_json(os.path.join(args.test, 'test_'+freq+'.json')) num_timeseries = len(train) print('num_timeseries:', num_timeseries) train_ds = ListDataset(parse_data(train, use_log1p=use_log1p), freq=freq) test_ds = ListDataset(parse_data(test, use_log1p=use_log1p), freq=freq) predictor = None trainer= Trainer(ctx="cpu", epochs=args.epochs, num_batches_per_epoch=args.num_batches_per_epoch, learning_rate=args.learning_rate, learning_rate_decay_factor=args.learning_rate_decay_factor, patience=args.patience, minimum_learning_rate=args.minimum_learning_rate, clip_gradient=args.clip_gradient, weight_decay=args.weight_decay, init=args.init.replace('"', ''), hybridize=args.hybridize) print('trainer:', trainer) cardinality = None if args.cardinality != '': cardinality = args.cardinality.replace('"', '').replace(' ', '').replace('[', '').replace(']', '').split(',') for i in range(len(cardinality)): cardinality[i] = int(cardinality[i]) print('cardinality:', cardinality) embedding_dimension = [min(50, (cat+1)//2) for cat in cardinality] if cardinality is not None else None print('embedding_dimension:', embedding_dimension) algo_name = args.algo_name.replace('"', '') print('algo_name:', algo_name) if algo_name == 'CanonicalRNN': estimator = CanonicalRNNEstimator( freq=freq, prediction_length=prediction_length, context_length=context_length, trainer=trainer, batch_size=batch_size, num_layers=5, num_cells=50, cell_type='lstm', num_parallel_samples=100, cardinality=cardinality, embedding_dimension=10, ) elif algo_name == 'DeepFactor': estimator = DeepFactorEstimator( freq=freq, prediction_length=prediction_length, context_length=context_length, trainer=trainer, batch_size=batch_size, cardinality=cardinality, embedding_dimension=10, ) elif algo_name == 'DeepAR': estimator = DeepAREstimator( freq = freq, # – Frequency of the data to train on and predict prediction_length = prediction_length, # – Length of the prediction horizon trainer = trainer, # – Trainer object to be used (default: Trainer()) context_length = context_length, # – Number of steps to unroll the RNN for before computing predictions (default: None, in which case context_length = prediction_length) num_layers = 2, # – Number of RNN layers (default: 2) num_cells = 40, # – Number of RNN cells for each layer (default: 40) cell_type = 'lstm', # – Type of recurrent cells to use (available: ‘lstm’ or ‘gru’; default: ‘lstm’) dropoutcell_type = 'ZoneoutCell', # – Type of dropout cells to use (available: ‘ZoneoutCell’, ‘RNNZoneoutCell’, ‘VariationalDropoutCell’ or ‘VariationalZoneoutCell’; default: ‘ZoneoutCell’) dropout_rate = 0.1, # – Dropout regularization parameter (default: 0.1) use_feat_dynamic_real = use_feat_dynamic_real, # – Whether to use the feat_dynamic_real field from the data (default: False) use_feat_static_cat = use_feat_static_cat, # – Whether to use the feat_static_cat field from the data (default: False) use_feat_static_real = False, # – Whether to use the feat_static_real field from the data (default: False) cardinality = cardinality, # – Number of values of each categorical feature. This must be set if use_feat_static_cat == True (default: None) embedding_dimension = embedding_dimension, # – Dimension of the embeddings for categorical features (default: [min(50, (cat+1)//2) for cat in cardinality]) # distr_output = StudentTOutput(), # – Distribution to use to evaluate observations and sample predictions (default: StudentTOutput()) # scaling = True, # – Whether to automatically scale the target values (default: true) # lags_seq = None, # – Indices of the lagged target values to use as inputs of the RNN (default: None, in which case these are automatically determined based on freq) # time_features = None, # – Time features to use as inputs of the RNN (default: None, in which case these are automatically determined based on freq) # num_parallel_samples = 100, # – Number of evaluation samples per time series to increase parallelism during inference. This is a model optimization that does not affect the accuracy (default: 100) # imputation_method = None, # – One of the methods from ImputationStrategy # train_sampler = None, # – Controls the sampling of windows during training. # validation_sampler = None, # – Controls the sampling of windows during validation. # alpha = None, # – The scaling coefficient of the activation regularization # beta = None, # – The scaling coefficient of the temporal activation regularization batch_size = batch_size, # – The size of the batches to be used training and prediction. # minimum_scale = None, # – The minimum scale that is returned by the MeanScaler # default_scale = None, # – Default scale that is applied if the context length window is completely unobserved. If not set, the scale in this case will be the mean scale in the batch. # impute_missing_values = None, # – Whether to impute the missing values during training by using the current model parameters. Recommended if the dataset contains many missing values. However, this is a lot slower than the default mode. # num_imputation_samples = None, # – How many samples to use to impute values when impute_missing_values=True ) elif algo_name == 'DeepState': estimator = DeepStateEstimator( freq=freq, prediction_length=prediction_length, trainer=trainer, batch_size=batch_size, use_feat_dynamic_real=use_feat_dynamic_real, use_feat_static_cat=use_feat_static_cat, cardinality=cardinality, ) elif algo_name == 'DeepVAR': estimator = DeepVAREstimator( # use multi freq=freq, prediction_length=prediction_length, context_length=context_length, trainer=trainer, batch_size=batch_size, target_dim=96, ) elif algo_name == 'GaussianProcess': # # TODO # estimator = GaussianProcessEstimator( # freq=freq, # prediction_length=prediction_length, # context_length=context_length, # trainer=trainer, # batch_size=batch_size, # cardinality=num_timeseries, # ) pass elif algo_name == 'GPVAR': estimator = GPVAREstimator( # use multi freq=freq, prediction_length=prediction_length, context_length=context_length, trainer=trainer, batch_size=batch_size, target_dim=96, ) elif algo_name == 'LSTNet': estimator = LSTNetEstimator( # use multi freq=freq, prediction_length=prediction_length, context_length=context_length, num_series=96, skip_size=4, ar_window=4, channels=72, trainer=trainer, batch_size=batch_size, ) elif algo_name == 'NBEATS': estimator = NBEATSEstimator( freq=freq, prediction_length=prediction_length, context_length=context_length, trainer=trainer, batch_size=batch_size, ) elif algo_name == 'DeepRenewalProcess': estimator = DeepRenewalProcessEstimator( freq=freq, prediction_length=prediction_length, context_length=context_length, trainer=trainer, batch_size=batch_size, num_cells=40, num_layers=2, ) elif algo_name == 'Tree': estimator = TreePredictor( freq = freq, prediction_length = prediction_length, context_length = context_length, n_ignore_last = 0, lead_time = 0, max_n_datapts = 1000000, min_bin_size = 100, # Used only for "QRX" method. use_feat_static_real = False, use_feat_dynamic_cat = False, use_feat_dynamic_real = use_feat_dynamic_real, cardinality = cardinality, one_hot_encode = False, model_params = {'eta': 0.1, 'max_depth': 6, 'silent': 0, 'nthread': -1, 'n_jobs': -1, 'gamma': 1, 'subsample': 0.9, 'min_child_weight': 1, 'colsample_bytree': 0.9, 'lambda': 1, 'booster': 'gbtree'}, max_workers = 4, # default: None method = "QRX", # "QRX", "QuantileRegression", "QRF" quantiles=None, # Used only for "QuantileRegression" method. model=None, seed=None, ) elif algo_name == 'SelfAttention': # # TODO # estimator = SelfAttentionEstimator( # freq=freq, # prediction_length=prediction_length, # context_length=context_length, # trainer=trainer, # batch_size=batch_size, # ) pass elif algo_name == 'MQCNN': estimator = MQCNNEstimator( freq=freq, prediction_length=prediction_length, context_length=context_length, trainer=trainer, batch_size=batch_size, use_past_feat_dynamic_real=use_past_feat_dynamic_real, use_feat_dynamic_real=use_feat_dynamic_real, use_feat_static_cat=use_feat_static_cat, cardinality=cardinality, embedding_dimension=embedding_dimension, add_time_feature=True, add_age_feature=False, enable_encoder_dynamic_feature=True, enable_decoder_dynamic_feature=True, seed=None, decoder_mlp_dim_seq=None, channels_seq=None, dilation_seq=None, kernel_size_seq=None, use_residual=True, quantiles=None, distr_output=None, scaling=None, scaling_decoder_dynamic_feature=False, num_forking=None, max_ts_len=None, ) elif algo_name == 'MQRNN': estimator = MQRNNEstimator( freq=freq, prediction_length=prediction_length, context_length=context_length, trainer=trainer, batch_size=batch_size, ) elif algo_name == 'Seq2Seq': # # TODO # estimator = Seq2SeqEstimator( # freq=freq, # prediction_length=prediction_length, # context_length=context_length, # trainer=trainer, # cardinality=cardinality, # embedding_dimension=4, # encoder=Seq2SeqEncoder(), # decoder_mlp_layer=[4], # decoder_mlp_static_dim=4 # ) pass elif algo_name == 'SimpleFeedForward': estimator = SimpleFeedForwardEstimator( num_hidden_dimensions=[40, 40], prediction_length=prediction_length, context_length=context_length, freq=freq, trainer=trainer, batch_size=batch_size, ) elif algo_name == 'TemporalFusionTransformer': estimator = TemporalFusionTransformerEstimator( prediction_length=prediction_length, context_length=context_length, freq=freq, trainer=trainer, batch_size=batch_size, hidden_dim = 32, variable_dim = None, num_heads = 4, num_outputs = 3, num_instance_per_series = 100, dropout_rate = 0.1, # time_features = [], # static_cardinalities = {}, # dynamic_cardinalities = {}, # static_feature_dims = {}, # dynamic_feature_dims = {}, # past_dynamic_features = [] ) elif algo_name == 'DeepTPP': # # TODO # estimator = DeepTPPEstimator( # prediction_interval_length=prediction_length, # context_interval_length=context_length, # freq=freq, # trainer=trainer, # batch_size=batch_size, # num_marks=len(cardinality) if cardinality is not None else 0, # ) pass elif algo_name == 'Transformer': estimator = TransformerEstimator( freq=freq, prediction_length=prediction_length, trainer=trainer, batch_size=batch_size, cardinality=cardinality, ) elif algo_name == 'WaveNet': estimator = WaveNetEstimator( freq=freq, prediction_length=prediction_length, trainer=trainer, batch_size=batch_size, cardinality=cardinality, ) elif algo_name == 'Naive2': # TODO Multiplicative seasonality is not appropriate for zero and negative values predictor = Naive2Predictor(freq=freq, prediction_length=prediction_length, season_length=context_length) elif algo_name == 'NPTS': predictor = NPTSPredictor(freq=freq, prediction_length=prediction_length, context_length=context_length) elif algo_name == 'Prophet': def configure_model(model): model.add_seasonality( name='weekly', period=7, fourier_order=3, prior_scale=0.1 ) return model predictor = ProphetPredictor(freq=freq, prediction_length=prediction_length, init_model=configure_model) elif algo_name == 'ARIMA': predictor = RForecastPredictor(freq=freq, prediction_length=prediction_length, method_name='arima', period=context_length, trunc_length=len(train[0]['target'])) elif algo_name == 'ETS': predictor = RForecastPredictor(freq=freq, prediction_length=prediction_length, method_name='ets', period=context_length, trunc_length=len(train[0]['target'])) elif algo_name == 'TBATS': predictor = RForecastPredictor(freq=freq, prediction_length=prediction_length, method_name='tbats', period=context_length, trunc_length=len(train[0]['target'])) elif algo_name == 'CROSTON': predictor = RForecastPredictor(freq=freq, prediction_length=prediction_length, method_name='croston', period=context_length, trunc_length=len(train[0]['target'])) elif algo_name == 'MLP': predictor = RForecastPredictor(freq=freq, prediction_length=prediction_length, method_name='mlp', period=context_length, trunc_length=len(train[0]['target'])) elif algo_name == 'SeasonalNaive': predictor = SeasonalNaivePredictor(freq=freq, prediction_length=prediction_length) else: print('[ERROR]:', algo_name, 'not supported') return if predictor is None: try: predictor = estimator.train(train_ds, test_ds) except Exception as e: print(e) try: grouper_train = MultivariateGrouper(max_target_dim=num_timeseries) train_ds_multi = grouper_train(train_ds) test_ds_multi = grouper_train(test_ds) predictor = estimator.train(train_ds_multi, test_ds_multi) except Exception as e: print(e) forecast_it, ts_it = make_evaluation_predictions( dataset=test_ds, # test dataset predictor=predictor, # predictor num_samples=100, # number of sample paths we want for evaluation ) forecasts = list(forecast_it) tss = list(ts_it) # print(len(forecasts), len(tss)) evaluator = Evaluator(quantiles=[0.1, 0.5, 0.9]) agg_metrics, item_metrics = evaluator(iter(tss), iter(forecasts), num_series=len(test_ds)) print(json.dumps(agg_metrics, indent=4)) model_dir = os.path.join(args.model_dir, algo_name) if not os.path.exists(model_dir): os.makedirs(model_dir) predictor.serialize(Path(model_dir))
def load_predictor(self, path: Path) -> Predictor: file = path / "metadata.pickle" with file.open("r") as f: meta = json.load(f) return ProphetPredictor(freq=meta["freq"], prediction_length=meta["prediction_length"])
def define_Prophet_predictor(self, freq, prediction_length, prophet_params): self.predictor = ProphetPredictor(freq=freq, prediction_length=prediction_length, prophet_params=prophet_params)
class Predictor_sales(object): def __init__(self, freq="D", prediction_length=30, epochs=50, batch_size=16, num_batches_per_epoch=100, num_layers=4, list_products=list_products): self.predictor = DeepAREstimator( freq=freq, prediction_length=prediction_length, trainer=Trainer(ctx="cpu", epochs=epochs, batch_size=batch_size, num_batches_per_epoch=num_batches_per_epoch), num_layers=num_layers) self.algorithm = algorithm self.list_products_names = TransactionsData.get_list_names( list_products) # DeepAR instance to be explicitly trained before predicting def define_DeepAR_predictor(self, freq, prediction_length, epochs, num_layers, batch_size): self.predictor = DeepAREstimator(freq=freq, prediction_length=prediction_length, context_length=prediction_length, trainer=Trainer( ctx="cpu", epochs=epochs, batch_size=batch_size, num_batches_per_epoch=100), num_layers=num_layers, use_feat_dynamic_real=True) # Prophet instance to implicitly trained during definition def define_Prophet_predictor(self, freq, prediction_length, prophet_params): self.predictor = ProphetPredictor(freq=freq, prediction_length=prediction_length, prophet_params=prophet_params) # ARIMA instance to implicitly trained during definition def train_ARIMA_predictor(self, eval_ds, p): return auto_arima(eval_ds.list_data[p]['target'][:-prediction_length], error_action='ignore', suppress_warnings=True, n_jobs=-1) def train_predictor(self, train_ds): self.predictor = self.predictor.train(training_data=train_ds) return self.predictor # Making predictions depend on the algo instance def make_predictions(self, eval_ds): if self.algorithm == 'DeepAR': forecast_it, ts_it = make_evaluation_predictions( eval_ds, predictor=self.predictor, num_samples=100) elif self.algorithm == 'Prophet': train_ds = copy.deepcopy(eval_ds) for p in range(len(list_products)): train_ds.list_data[p]['target'] = train_ds.list_data[p][ 'target'][:-prediction_length] forecast_it = self.predictor.predict(train_ds) ts_it = [] for p in range(len(list_products)): ts_it.append( pd.DataFrame({0: eval_ds.list_data[p]['target']}, index=pd.date_range( min_date, periods=len( eval_ds.list_data[p]['target']), freq=freq, tz=None))) elif self.algorithm == 'ARIMA': ts_it = [] period_list_pred = pd.date_range( min_date, periods=len(eval_ds.list_data[0]['target']), freq=freq, tz=None)[-prediction_length:] for p in range(len(list_products)): AMIMA_predictor = self.train_ARIMA_predictor(eval_ds, p) pred = AMIMA_predictor.predict(n_periods=prediction_length) if p == 0: forecast_it = pd.DataFrame({ 'OrderDate': period_list_pred, 'Product': pred }) else: temp = pd.DataFrame({ 'OrderDate': period_list_pred, 'Product': pred }) forecast_it = forecast_it.merge(temp, on='OrderDate', how='left') forecast_it = forecast_it.rename( columns={'Product': self.list_products_names[p]}) ts_it.append( pd.DataFrame({0: eval_ds.list_data[p]['target']}, index=pd.date_range( min_date, periods=len( eval_ds.list_data[p]['target']), freq=freq, tz=None))) return forecast_it, ts_it return list(forecast_it), list(ts_it) # Plotting depends on the prediction output structure def plot_prob_forecasts(self, forecast_plot, ts_plot): if len(list_products) != 1: print('Which product no?') p = int( input({ key: value for (key, value) in enumerate(self.list_products_names) })) else: p = 0 if self.algorithm not in ['ARIMA']: ts_entry = ts_plot[ p] # we plot only the first time serie to forecast forecast_entry = forecast_plot[p] plot_length = 70 prediction_intervals = (50.0, 90.0) legend = ["observations", "median prediction"] + [ f"{k}% prediction interval" for k in prediction_intervals ][::-1] _, ax = plt.subplots(1, 1, figsize=(10, 7)) pd.plotting.register_matplotlib_converters() ts_entry[-plot_length:].plot(ax=ax) # plot the time series forecast_entry.plot(prediction_intervals=prediction_intervals, color='b') plt.grid(which="both") plt.legend(legend, loc="upper left") plt.show() else: history_plot_lenth = min(prediction_length * 5, len(ts_plot[0])) ts_plot = ts_plot[p][-history_plot_lenth:].set_index( pd.DatetimeIndex(ts_plot[p][-history_plot_lenth:].index)) forecast_plot = forecast_plot.set_index( pd.DatetimeIndex(forecast_plot['OrderDate'])).drop( columns=['OrderDate']).iloc[:, p] plt.figure(figsize=(10, 6)) plt.plot(ts_plot, color='C0', label='Observations') plt.plot(forecast_plot, color='b', label='Predictions') plt.legend() plt.show() # Run saving function before plotting anything def save_csv(self, name, forecast_it, ts_it, scaler): ts_name = "ts " + name + ".csv" forecast_name = "forecast " + name + ".csv" #ts_name = "ts" +"_"+ str(data)+ "_"+ str(min_date) +"_"+ str(max_date) +"_"+ str(algorithm) +"_"+ str(freq) +"_"+ name +"_"+str(list_products[0])+ ".csv" #forecast_name = "forecast" +"_"+ str(data)+"_"+ str(min_date) +"_"+ str(max_date) +"_"+ str(algorithm) +"_"+ str(freq) +"_"+ name +"_"+str(list_products[0])+".csv" if self.algorithm not in ['ARIMA']: if len(list_products) != 1: forecast_entry = [] for p in range(len(list_products)): forecast_entry.append(forecast_it[p].mean) start_dt = pd.date_range(min_date, periods=len(ts_it[0]), freq=freq, tz=None)[-prediction_length] #print(start_dt) forecast_csv = pd.DataFrame(data=scaler.inverse_transform( np.array(forecast_entry).transpose()), columns=self.list_products_names, index=pd.date_range( start_dt, periods=prediction_length, freq=freq)) forecast_csv = forecast_csv.rename_axis( 'OrderDate').reset_index() forecast_csv.to_csv(os.path.join(OUTPUT_FOLDER, forecast_name), index=False) for p in range(len(list_products)): if p == 0: ts_csv = ts_it[0] else: ts_csv = ts_csv.join(ts_it[p], rsuffix=p) idx_ts = ts_csv.index ts_csv = scaler.inverse_transform(ts_csv) ts_csv = pd.DataFrame(ts_csv, columns=self.list_products_names, index=idx_ts) ts_csv = ts_csv.rename_axis('OrderDate').reset_index() ts_csv.to_csv(os.path.join(OUTPUT_FOLDER, ts_name), index=False) else: forecast_entry = forecast_it[0] ts_entry = ts_it[0] forecast_csv = pd.Series(scaler.inverse_transform( np.array(forecast_entry.mean).reshape(-1, 1)).reshape(-1), index=pd.date_range( forecast_entry.start_date, periods=prediction_length, freq=freq), name=self.list_products_names[0]) forecast_csv = forecast_csv.rename_axis( 'OrderDate').reset_index() forecast_csv.to_csv(os.path.join(OUTPUT_FOLDER, forecast_name), index=False) idx_ts = ts_entry.index ts_csv = scaler.inverse_transform( np.array(ts_entry).reshape(-1, 1)).reshape(-1) ts_csv = pd.DataFrame(ts_csv, columns=self.list_products_names, index=idx_ts) ts_csv = ts_csv.rename_axis('OrderDate').reset_index() ts_csv.to_csv(os.path.join(OUTPUT_FOLDER, ts_name), index=False) else: # For ARIMA idx_fs = forecast_it.set_index('OrderDate').index forecast_csv = pd.DataFrame(data=scaler.inverse_transform( np.array(forecast_it.set_index('OrderDate'))), columns=self.list_products_names, index=idx_fs) forecast_csv.rename_axis('OrderDate').reset_index().to_csv( os.path.join(OUTPUT_FOLDER, forecast_name), index=False) if len(list_products) != 1: for p in range(len(list_products)): if p == 0: ts_csv = ts_it[0] else: ts_csv = ts_csv.join(ts_it[p], rsuffix=p) idx_ts = ts_csv.index ts_csv = scaler.inverse_transform(ts_csv) ts_csv = pd.DataFrame(ts_csv, columns=self.list_products_names, index=idx_ts) ts_csv = ts_csv.rename_axis('OrderDate').reset_index() ts_csv.to_csv(os.path.join(OUTPUT_FOLDER, ts_name), index=False) else: ts_entry = ts_it[0] idx_ts = ts_entry.index ts_csv = pd.DataFrame(scaler.inverse_transform( np.array(ts_entry).reshape(-1, 1)).reshape(-1), columns=self.list_products_names, index=idx_ts) ts_csv = ts_csv.rename_axis('OrderDate').reset_index() ts_csv.to_csv(os.path.join(OUTPUT_FOLDER, ts_name), index=False) # MSE computation on test data def mse_compute(self, forecast_txt, ts_txt, scaler=None): ts_csv = ts_txt.copy() forecast_csv = forecast_txt.copy() ts_csv = ts_csv.loc[ts_csv['OrderDate'].isin( forecast_csv['OrderDate'])] ts_csv.set_index('OrderDate', inplace=True) forecast_csv.set_index('OrderDate', inplace=True) mse_products = [] for p in range(len(list_products)): if scaler is not None: ts_csv = scaler.transform(ts_csv) forecast_csv = scaler.transform(forecast_csv) mse_products.append( mean_squared_error(ts_csv[:, p], forecast_csv[:, p])) else: mse_products.append( mean_squared_error(ts_csv.iloc[:, p], forecast_csv.iloc[:, p])) mse_df = pd.DataFrame({ 'Granulcolname': self.list_products_names, 'MSE': mse_products }) if scaler is not None: print(">> Rescaled MSE:") else: print(">> Actual MSE, no rescaling:") print(mse_df) return (mse_df) def dtw_compute(self, forecast_txt, ts_txt, scaler=None): import dtw ts_csv = ts_txt.copy() forecast_csv = forecast_txt.copy() ts_csv = ts_csv.loc[ts_csv['OrderDate'].isin( forecast_csv['OrderDate'])] ts_csv.set_index('OrderDate', inplace=True) forecast_csv.set_index('OrderDate', inplace=True) dtw_products = [] for p in range(len(list_products)): if scaler is not None: ts_csv = scaler.transform(ts_csv) forecast_csv = scaler.transform(forecast_csv) distance = dtw.dtw(ts_csv[:, p], forecast_csv[:, p], distance_only=True).distance else: distance = dtw.dtw(np.array(ts_csv.iloc[:, p]), np.array(forecast_csv.iloc[:, p]), distance_only=True).distance dtw_products.append(distance) dtw_df = pd.DataFrame({ 'Granulcolname': self.list_products_names, 'DTW': dtw_products }) if scaler is not None: print(">> Rescaled DTW:") else: print(">> Actual DTW, no rescaling:") print(dtw_df) return (dtw_df)
def test_prophet_serialization(): predictor = ProphetPredictor(freq="1D", prediction_length=3) assert predictor == serde.decode(serde.encode(predictor))