Пример #1
0
def test_inference_quantile_prediction(quantiles, inference_quantiles):
    hps = {
        "seed": 42,
        "freq": "D",
        "prediction_length": 3,
        "quantiles": quantiles,
        "epochs": 3,
        "num_batches_per_epoch": 3,
        "is_iqf": True,
    }

    dataset_train, dataset_test = make_dummy_datasets_with_features(
        cardinality=[3, 10],
        num_feat_dynamic_real=2,
        freq=hps["freq"],
        prediction_length=hps["prediction_length"],
    )

    estimator = MQCNNEstimator.from_inputs(dataset_train, **hps)

    predictor = estimator.train(dataset_train, num_workers=None)
    forecasts = list(predictor.predict(dataset_test))
    assert len(forecasts) == len(dataset_test)
    item_id = 0
    for inference_quantile in inference_quantiles:
        assert (len(forecasts[item_id].quantile(inference_quantile)) ==
                hps["prediction_length"])
    inference_quantile = sorted(inference_quantiles)
    previous_quantile_prediction = forecasts[item_id].quantile(
        inference_quantile[0])
    for inference_quantile in inference_quantiles[1:]:
        assert all(previous_quantile_prediction[i] <= pred
                   for (i, pred) in enumerate(forecasts[item_id].quantile(
                       inference_quantile))), f"quantile-crossing occurred"
Пример #2
0
 def create_estimator(
     self,
     freq: str,
     prediction_length: int,
     time_features: bool,
     training_time: float,
     validation_milestones: List[float],
     callbacks: List[Callback],
 ) -> Estimator:
     return MQCNNEstimator(
         freq=freq,
         prediction_length=prediction_length,
         channels_seq=[self.num_filters] * 3,
         kernel_size_seq=[
             self.kernel_size_first,
             self.kernel_size_hidden,
             self.kernel_size_last,
         ],
         add_time_feature=time_features,
         trainer=self._create_trainer(
             training_time,
             validation_milestones,
             callbacks,  # type: ignore
         ),
         context_length=self.context_length_multiple * prediction_length,
     )
Пример #3
0
def test_mqcnn_covariate_smoke_test(
    use_feat_dynamic_real,
    add_time_feature,
    add_age_feature,
    enable_decoder_dynamic_feature,
    hybridize,
):
    hps = {
        "seed": 42,
        "freq": "D",
        "prediction_length": 3,
        "quantiles": [0.5, 0.1],
        "epochs": 3,
        "num_batches_per_epoch": 3,
        "use_feat_dynamic_real": use_feat_dynamic_real,
        "add_time_feature": add_time_feature,
        "add_age_feature": add_age_feature,
        "enable_decoder_dynamic_feature": enable_decoder_dynamic_feature,
        "hybridize": hybridize,
    }

    dataset_train, dataset_test = make_dummy_datasets_with_features(
        cardinality=[3, 10],
        num_feat_dynamic_real=2,
        freq=hps["freq"],
        prediction_length=hps["prediction_length"],
    )

    estimator = MQCNNEstimator.from_hyperparameters(**hps)

    predictor = estimator.train(dataset_train, num_workers=0)
    forecasts = list(predictor.predict(dataset_test))
    assert len(forecasts) == len(dataset_test)
Пример #4
0
def test_mqcnn_covariate_smoke_test(
    use_past_feat_dynamic_real,
    use_feat_dynamic_real,
    add_time_feature,
    add_age_feature,
    enable_encoder_dynamic_feature,
    enable_decoder_dynamic_feature,
    hybridize,
    quantiles,
    distr_output,
    is_iqf,
):
    hps = {
        "seed": 42,
        "freq": "Y",
        "context_length": 5,
        "prediction_length": 3,
        "quantiles": quantiles,
        "distr_output": distr_output,
        "epochs": 3,
        "num_batches_per_epoch": 3,
        "use_past_feat_dynamic_real": use_past_feat_dynamic_real,
        "use_feat_dynamic_real": use_feat_dynamic_real,
        "add_time_feature": add_time_feature,
        "add_age_feature": add_age_feature,
        "enable_encoder_dynamic_feature": enable_encoder_dynamic_feature,
        "enable_decoder_dynamic_feature": enable_decoder_dynamic_feature,
        "hybridize": hybridize,
        "is_iqf": is_iqf,
    }

    dataset_train, dataset_test = make_dummy_datasets_with_features(
        cardinality=[3, 10],
        num_feat_dynamic_real=2,
        num_past_feat_dynamic_real=4,
        freq=hps["freq"],
        prediction_length=hps["prediction_length"],
    )

    estimator = MQCNNEstimator.from_hyperparameters(**hps)

    predictor = estimator.train(dataset_train, num_workers=None)
    forecasts = list(predictor.predict(dataset_test))
    assert len(forecasts) == len(dataset_test)
Пример #5
0
def test_backwards_compatibility():
    hps = {
        "freq": "D",
        "context_length": 5,
        "num_forking": 4,
        "prediction_length": 3,
        "quantiles": [0.5, 0.1],
        "epochs": 3,
        "num_batches_per_epoch": 3,
        "use_feat_dynamic_real": True,
        "use_past_feat_dynamic_real": True,
        "enable_encoder_dynamic_feature": True,
        "enable_decoder_dynamic_feature": True,
        "scaling": True,
        "scaling_decoder_dynamic_feature": True,
    }

    dataset_train, dataset_test = make_dummy_datasets_with_features(
        cardinality=[3, 10],
        num_feat_dynamic_real=2,
        num_past_feat_dynamic_real=4,
        freq=hps["freq"],
        prediction_length=hps["prediction_length"],
    )

    for i in range(len(dataset_train)):
        dataset_train.list_data[i]["dynamic_feat"] = dataset_train.list_data[
            i
        ]["feat_dynamic_real"]
        del dataset_train.list_data[i]["feat_dynamic_real"]

    for i in range(len(dataset_test)):
        dataset_test.list_data[i]["dynamic_feat"] = dataset_test.list_data[i][
            "feat_dynamic_real"
        ]
        del dataset_test.list_data[i]["feat_dynamic_real"]

    estimator = MQCNNEstimator.from_inputs(dataset_train, **hps)

    predictor = estimator.train(dataset_train, num_workers=None)
    forecasts = list(predictor.predict(dataset_test))
    assert len(forecasts) == len(dataset_test)
Пример #6
0
def test_feat_static_cat_smoke_test(use_feat_static_cat, cardinality):
    hps = {
        "seed": 42,
        "freq": "D",
        "prediction_length": 3,
        "quantiles": [0.5, 0.1],
        "epochs": 3,
        "num_batches_per_epoch": 3,
        "use_feat_static_cat": use_feat_static_cat,
    }

    dataset_train, dataset_test = make_dummy_datasets_with_features(
        cardinality=cardinality,
        num_feat_dynamic_real=2,
        freq=hps["freq"],
        prediction_length=hps["prediction_length"],
    )
    estimator = MQCNNEstimator.from_inputs(dataset_train, **hps)

    predictor = estimator.train(dataset_train, num_workers=None)
    forecasts = list(predictor.predict(dataset_test))
    assert len(forecasts) == len(dataset_test)
Пример #7
0
def test_mqcnn_scaling_smoke_test(scaling):
    hps = {
        "seed": 42,
        "freq": "D",
        "prediction_length": 3,
        "quantiles": [0.5, 0.1],
        "epochs": 3,
        "num_batches_per_epoch": 3,
        "scaling": scaling,
    }

    dataset_train, dataset_test = make_dummy_datasets_with_features(
        cardinality=[3, 10],
        num_feat_dynamic_real=2,
        freq=hps["freq"],
        prediction_length=hps["prediction_length"],
    )

    estimator = MQCNNEstimator.from_inputs(dataset_train, **hps)

    predictor = estimator.train(dataset_train, num_workers=0)
    forecasts = list(predictor.predict(dataset_test))
    assert len(forecasts) == len(dataset_test)
Пример #8
0
def train(args):
    freq = args.freq.replace('"', '')
    prediction_length = args.prediction_length
    context_length = args.context_length
    use_feat_dynamic_real = args.use_feat_dynamic_real
    use_past_feat_dynamic_real = args.use_past_feat_dynamic_real
    use_feat_static_cat = args.use_feat_static_cat
    use_log1p = args.use_log1p
    
    print('freq:', freq)
    print('prediction_length:', prediction_length)
    print('context_length:', context_length)
    print('use_feat_dynamic_real:', use_feat_dynamic_real)
    print('use_past_feat_dynamic_real:', use_past_feat_dynamic_real)
    print('use_feat_static_cat:', use_feat_static_cat)
    print('use_log1p:', use_log1p)
    
    batch_size = args.batch_size
    print('batch_size:', batch_size)

    train = load_json(os.path.join(args.train, 'train_'+freq+'.json'))
    test = load_json(os.path.join(args.test, 'test_'+freq+'.json'))
    
    num_timeseries = len(train)
    print('num_timeseries:', num_timeseries)

    train_ds = ListDataset(parse_data(train, use_log1p=use_log1p), freq=freq)
    test_ds = ListDataset(parse_data(test, use_log1p=use_log1p), freq=freq)
    
    predictor = None
    
    trainer= Trainer(ctx="cpu", 
                    epochs=args.epochs, 
                    num_batches_per_epoch=args.num_batches_per_epoch,
                    learning_rate=args.learning_rate, 
                    learning_rate_decay_factor=args.learning_rate_decay_factor,
                    patience=args.patience,
                    minimum_learning_rate=args.minimum_learning_rate,
                    clip_gradient=args.clip_gradient,
                    weight_decay=args.weight_decay,
                    init=args.init.replace('"', ''),
                    hybridize=args.hybridize)
    print('trainer:', trainer)
    
    cardinality = None
    if args.cardinality != '':
        cardinality = args.cardinality.replace('"', '').replace(' ', '').replace('[', '').replace(']', '').split(',')
        for i in range(len(cardinality)):
            cardinality[i] = int(cardinality[i])
    print('cardinality:', cardinality)
    
    embedding_dimension = [min(50, (cat+1)//2) for cat in cardinality] if cardinality is not None else None
    print('embedding_dimension:', embedding_dimension)
    
    algo_name = args.algo_name.replace('"', '')
    print('algo_name:', algo_name)
    
    if algo_name == 'CanonicalRNN':
        estimator = CanonicalRNNEstimator(
            freq=freq,
            prediction_length=prediction_length,
            context_length=context_length,
            trainer=trainer,
            batch_size=batch_size,
            num_layers=5, 
            num_cells=50, 
            cell_type='lstm', 
            num_parallel_samples=100,
            cardinality=cardinality,
            embedding_dimension=10,
        )
    elif algo_name == 'DeepFactor':
        estimator = DeepFactorEstimator(
            freq=freq,
            prediction_length=prediction_length,
            context_length=context_length,
            trainer=trainer,
            batch_size=batch_size,
            cardinality=cardinality,
            embedding_dimension=10,
        )
    elif algo_name == 'DeepAR':
        estimator = DeepAREstimator(
            freq = freq,  # – Frequency of the data to train on and predict
            prediction_length = prediction_length,  # – Length of the prediction horizon
            trainer = trainer,  # – Trainer object to be used (default: Trainer())
            context_length = context_length,  # – Number of steps to unroll the RNN for before computing predictions (default: None, in which case context_length = prediction_length)
            num_layers = 2,  # – Number of RNN layers (default: 2)
            num_cells = 40,  # – Number of RNN cells for each layer (default: 40)
            cell_type = 'lstm',  # – Type of recurrent cells to use (available: ‘lstm’ or ‘gru’; default: ‘lstm’)
            dropoutcell_type = 'ZoneoutCell',  # – Type of dropout cells to use (available: ‘ZoneoutCell’, ‘RNNZoneoutCell’, ‘VariationalDropoutCell’ or ‘VariationalZoneoutCell’; default: ‘ZoneoutCell’)
            dropout_rate = 0.1,  # – Dropout regularization parameter (default: 0.1)
            use_feat_dynamic_real = use_feat_dynamic_real,  # – Whether to use the feat_dynamic_real field from the data (default: False)
            use_feat_static_cat = use_feat_static_cat,  # – Whether to use the feat_static_cat field from the data (default: False)
            use_feat_static_real = False,  # – Whether to use the feat_static_real field from the data (default: False)
            cardinality = cardinality,  # – Number of values of each categorical feature. This must be set if use_feat_static_cat == True (default: None)
            embedding_dimension = embedding_dimension,  # – Dimension of the embeddings for categorical features (default: [min(50, (cat+1)//2) for cat in cardinality])
        #     distr_output = StudentTOutput(),  # – Distribution to use to evaluate observations and sample predictions (default: StudentTOutput())
        #     scaling = True,  # – Whether to automatically scale the target values (default: true)
        #     lags_seq = None,  # – Indices of the lagged target values to use as inputs of the RNN (default: None, in which case these are automatically determined based on freq)
        #     time_features = None,  # – Time features to use as inputs of the RNN (default: None, in which case these are automatically determined based on freq)
        #     num_parallel_samples = 100,  # – Number of evaluation samples per time series to increase parallelism during inference. This is a model optimization that does not affect the accuracy (default: 100)
        #     imputation_method = None,  # – One of the methods from ImputationStrategy
        #     train_sampler = None,  # – Controls the sampling of windows during training.
        #     validation_sampler = None,  # – Controls the sampling of windows during validation.
        #     alpha = None,  # – The scaling coefficient of the activation regularization
        #     beta = None,  # – The scaling coefficient of the temporal activation regularization
            batch_size = batch_size,  # – The size of the batches to be used training and prediction.
        #     minimum_scale = None,  # – The minimum scale that is returned by the MeanScaler
        #     default_scale = None,  # – Default scale that is applied if the context length window is completely unobserved. If not set, the scale in this case will be the mean scale in the batch.
        #     impute_missing_values = None,  # – Whether to impute the missing values during training by using the current model parameters. Recommended if the dataset contains many missing values. However, this is a lot slower than the default mode.
        #     num_imputation_samples = None,  # – How many samples to use to impute values when impute_missing_values=True
        )
    elif algo_name == 'DeepState':
        estimator = DeepStateEstimator(
            freq=freq,
            prediction_length=prediction_length,
            trainer=trainer,
            batch_size=batch_size,
            use_feat_dynamic_real=use_feat_dynamic_real,
            use_feat_static_cat=use_feat_static_cat,
            cardinality=cardinality,
        )
    elif algo_name == 'DeepVAR':
        estimator = DeepVAREstimator(  # use multi
            freq=freq,
            prediction_length=prediction_length,
            context_length=context_length,
            trainer=trainer,
            batch_size=batch_size,
            target_dim=96,
        )
    elif algo_name == 'GaussianProcess':
#         # TODO
#         estimator = GaussianProcessEstimator(
#             freq=freq,
#             prediction_length=prediction_length,
#             context_length=context_length,
#             trainer=trainer,
#             batch_size=batch_size,
#             cardinality=num_timeseries,
#         )
        pass
    elif algo_name == 'GPVAR':
        estimator = GPVAREstimator(  # use multi
            freq=freq,
            prediction_length=prediction_length,
            context_length=context_length,
            trainer=trainer,
            batch_size=batch_size,
            target_dim=96,
        )
    elif algo_name == 'LSTNet':
        estimator = LSTNetEstimator(  # use multi
            freq=freq,
            prediction_length=prediction_length,
            context_length=context_length,
            num_series=96,
            skip_size=4,
            ar_window=4,
            channels=72,
            trainer=trainer,
            batch_size=batch_size,
        )
    elif algo_name == 'NBEATS':
        estimator = NBEATSEstimator(
            freq=freq,
            prediction_length=prediction_length,
            context_length=context_length,
            trainer=trainer,
            batch_size=batch_size,
        )
    elif algo_name == 'DeepRenewalProcess':
        estimator = DeepRenewalProcessEstimator(
            freq=freq,
            prediction_length=prediction_length,
            context_length=context_length,
            trainer=trainer,
            batch_size=batch_size,
            num_cells=40,
            num_layers=2,
        )
    elif algo_name == 'Tree':
        estimator = TreePredictor(
            freq = freq,
            prediction_length = prediction_length,
            context_length = context_length,
            n_ignore_last = 0,
            lead_time = 0,
            max_n_datapts = 1000000,
            min_bin_size = 100,  # Used only for "QRX" method.
            use_feat_static_real = False,
            use_feat_dynamic_cat = False,
            use_feat_dynamic_real = use_feat_dynamic_real,
            cardinality = cardinality,
            one_hot_encode = False,
            model_params = {'eta': 0.1, 'max_depth': 6, 'silent': 0, 'nthread': -1, 'n_jobs': -1, 'gamma': 1, 'subsample': 0.9, 'min_child_weight': 1, 'colsample_bytree': 0.9, 'lambda': 1, 'booster': 'gbtree'},
            max_workers = 4,  # default: None
            method = "QRX",  # "QRX",  "QuantileRegression", "QRF"
            quantiles=None,  # Used only for "QuantileRegression" method.
            model=None,
            seed=None,
        )
    elif algo_name == 'SelfAttention':
#         # TODO
#         estimator = SelfAttentionEstimator(
#             freq=freq,
#             prediction_length=prediction_length,
#             context_length=context_length,
#             trainer=trainer,
#             batch_size=batch_size,
#         )
        pass
    elif algo_name == 'MQCNN':
        estimator = MQCNNEstimator(
            freq=freq,
            prediction_length=prediction_length,
            context_length=context_length,
            trainer=trainer,
            batch_size=batch_size,
            use_past_feat_dynamic_real=use_past_feat_dynamic_real,
            use_feat_dynamic_real=use_feat_dynamic_real,
            use_feat_static_cat=use_feat_static_cat,
            cardinality=cardinality,
            embedding_dimension=embedding_dimension,
            add_time_feature=True,
            add_age_feature=False,
            enable_encoder_dynamic_feature=True,
            enable_decoder_dynamic_feature=True,
            seed=None,
            decoder_mlp_dim_seq=None,
            channels_seq=None,
            dilation_seq=None,
            kernel_size_seq=None,
            use_residual=True,
            quantiles=None,
            distr_output=None,
            scaling=None,
            scaling_decoder_dynamic_feature=False,
            num_forking=None,
            max_ts_len=None,
        )
    elif algo_name == 'MQRNN':
        estimator = MQRNNEstimator(
            freq=freq,
            prediction_length=prediction_length,
            context_length=context_length,
            trainer=trainer,
            batch_size=batch_size,
        )
    elif algo_name == 'Seq2Seq':
        # # TODO
        # estimator = Seq2SeqEstimator(
        #     freq=freq,
        #     prediction_length=prediction_length,
        #     context_length=context_length,
        #     trainer=trainer,
        #     cardinality=cardinality,
        #     embedding_dimension=4,
        #     encoder=Seq2SeqEncoder(),
        #     decoder_mlp_layer=[4],
        #     decoder_mlp_static_dim=4
        # )
        pass
    elif algo_name == 'SimpleFeedForward':
        estimator = SimpleFeedForwardEstimator(
            num_hidden_dimensions=[40, 40],
            prediction_length=prediction_length,
            context_length=context_length,
            freq=freq,
            trainer=trainer,
            batch_size=batch_size,
        )
    elif algo_name == 'TemporalFusionTransformer':
        estimator = TemporalFusionTransformerEstimator(
            prediction_length=prediction_length,
            context_length=context_length,
            freq=freq,
            trainer=trainer,
            batch_size=batch_size,
            hidden_dim = 32, 
            variable_dim = None, 
            num_heads = 4, 
            num_outputs = 3, 
            num_instance_per_series = 100, 
            dropout_rate = 0.1, 
        #     time_features = [], 
        #     static_cardinalities = {}, 
        #     dynamic_cardinalities = {}, 
        #     static_feature_dims = {}, 
        #     dynamic_feature_dims = {}, 
        #     past_dynamic_features = []
        )
    elif algo_name == 'DeepTPP':
#         # TODO
#         estimator = DeepTPPEstimator(
#             prediction_interval_length=prediction_length,
#             context_interval_length=context_length,
#             freq=freq,
#             trainer=trainer,
#             batch_size=batch_size,
#             num_marks=len(cardinality) if cardinality is not None else 0,
#         )
        pass
    elif algo_name == 'Transformer':
        estimator = TransformerEstimator(
            freq=freq,
            prediction_length=prediction_length,
            trainer=trainer,
            batch_size=batch_size,
            cardinality=cardinality,
        )
    elif algo_name == 'WaveNet':
        estimator = WaveNetEstimator(
            freq=freq,
            prediction_length=prediction_length,
            trainer=trainer,
            batch_size=batch_size,
            cardinality=cardinality,
        )
    elif algo_name == 'Naive2':
        # TODO Multiplicative seasonality is not appropriate for zero and negative values
        predictor = Naive2Predictor(freq=freq, prediction_length=prediction_length, season_length=context_length)
    elif algo_name == 'NPTS':
        predictor = NPTSPredictor(freq=freq, prediction_length=prediction_length, context_length=context_length)
    elif algo_name == 'Prophet':
        def configure_model(model):
            model.add_seasonality(
                name='weekly', period=7, fourier_order=3, prior_scale=0.1
            )
            return model
        predictor = ProphetPredictor(freq=freq, prediction_length=prediction_length, init_model=configure_model)
    elif algo_name == 'ARIMA':
        predictor = RForecastPredictor(freq=freq,
                                      prediction_length=prediction_length,
                                      method_name='arima',
                                      period=context_length,
                                      trunc_length=len(train[0]['target']))
    elif algo_name == 'ETS':
        predictor = RForecastPredictor(freq=freq,
                                      prediction_length=prediction_length,
                                      method_name='ets',
                                      period=context_length,
                                      trunc_length=len(train[0]['target']))
    elif algo_name == 'TBATS':
        predictor = RForecastPredictor(freq=freq,
                                      prediction_length=prediction_length,
                                      method_name='tbats',
                                      period=context_length,
                                      trunc_length=len(train[0]['target']))
    elif algo_name == 'CROSTON':
        predictor = RForecastPredictor(freq=freq,
                                      prediction_length=prediction_length,
                                      method_name='croston',
                                      period=context_length,
                                      trunc_length=len(train[0]['target']))
    elif algo_name == 'MLP':
        predictor = RForecastPredictor(freq=freq,
                                      prediction_length=prediction_length,
                                      method_name='mlp',
                                      period=context_length,
                                      trunc_length=len(train[0]['target']))
    elif algo_name == 'SeasonalNaive':
        predictor = SeasonalNaivePredictor(freq=freq, prediction_length=prediction_length)
    else:
        print('[ERROR]:', algo_name, 'not supported')
        return
    
    if predictor is None:
        try:
            predictor = estimator.train(train_ds, test_ds)
        except Exception as e:
            print(e)
            try:
                grouper_train = MultivariateGrouper(max_target_dim=num_timeseries)
                train_ds_multi = grouper_train(train_ds)
                test_ds_multi = grouper_train(test_ds)
                predictor = estimator.train(train_ds_multi, test_ds_multi)
            except Exception as e:
                print(e)

    forecast_it, ts_it = make_evaluation_predictions(
        dataset=test_ds,  # test dataset
        predictor=predictor,  # predictor
        num_samples=100,  # number of sample paths we want for evaluation
    )

    forecasts = list(forecast_it)
    tss = list(ts_it)
#     print(len(forecasts), len(tss))
    
    evaluator = Evaluator(quantiles=[0.1, 0.5, 0.9])
    agg_metrics, item_metrics = evaluator(iter(tss), iter(forecasts), num_series=len(test_ds))

    print(json.dumps(agg_metrics, indent=4))
    
    model_dir = os.path.join(args.model_dir, algo_name)
    if not os.path.exists(model_dir):
        os.makedirs(model_dir)
    predictor.serialize(Path(model_dir))
Пример #9
0
import numpy as np

datasets = [
    'm4_daily', 'm4_hourly', 'm4_weekly', 'solar-energy', 'electricity'
]
for ds in datasets:
    dataset = get_dataset(ds)
    freq = dataset.metadata.freq
    prediction_length = dataset.metadata.prediction_length
    deepar = DeepAREstimator(
        freq=freq,
        prediction_length=prediction_length,
    )

    mqcnn = MQCNNEstimator(
        freq=freq,
        prediction_length=prediction_length,
    )

    sff = SimpleFeedForwardEstimator(
        num_hidden_dimensions=[10],
        context_length=100,
        freq=freq,
        prediction_length=prediction_length,
    )

    wn = WaveNetEstimator(
        freq=freq,
        prediction_length=prediction_length,
    )

    num_batches_per_epoch = 10
Пример #10
0
    def fit(self, df, future_regressor=[]):
        """Train algorithm given data supplied.

        Args:
            df (pandas.DataFrame): Datetime Indexed
        """
        df = self.basic_profile(df)

        try:
            from mxnet.random import seed as mxnet_seed

            mxnet_seed(self.random_seed)
        except Exception:
            pass

        gluon_train = df.transpose()
        self.train_index = gluon_train.index

        gluon_freq = str(self.frequency).split('-')[0]
        if gluon_freq in ["MS", "1MS"]:
            gluon_freq = "M"

        if int(self.verbose) > 1:
            print(f"Gluon Frequency is {gluon_freq}")

        if str(self.context_length).replace('.', '').isdigit():
            self.gluon_context_length = int(float(self.context_length))
        elif 'forecastlength' in str(self.context_length).lower():
            len_int = int([x for x in str(self.context_length)
                           if x.isdigit()][0])
            self.gluon_context_length = int(len_int * self.forecast_length)
        else:
            self.gluon_context_length = 2 * self.forecast_length
            self.context_length = '2ForecastLength'
        ts_metadata = {
            'num_series':
            len(gluon_train.index),
            'freq':
            gluon_freq,
            'gluon_start':
            [gluon_train.columns[0] for _ in range(len(gluon_train.index))],
            'context_length':
            self.gluon_context_length,
            'forecast_length':
            self.forecast_length,
        }
        self.test_ds = ListDataset(
            [{
                FieldName.TARGET: target,
                FieldName.START: start
            }
             for (target,
                  start) in zip(gluon_train.values, ts_metadata['gluon_start'])
             ],
            freq=ts_metadata['freq'],
        )
        if self.gluon_model == 'DeepAR':
            from gluonts.model.deepar import DeepAREstimator

            estimator = DeepAREstimator(
                freq=ts_metadata['freq'],
                context_length=ts_metadata['context_length'],
                prediction_length=ts_metadata['forecast_length'],
                trainer=Trainer(epochs=self.epochs,
                                learning_rate=self.learning_rate),
            )
        elif self.gluon_model == 'NPTS':
            from gluonts.model.npts import NPTSEstimator

            estimator = NPTSEstimator(
                freq=ts_metadata['freq'],
                context_length=ts_metadata['context_length'],
                prediction_length=ts_metadata['forecast_length'],
            )

        elif self.gluon_model == 'MQCNN':
            from gluonts.model.seq2seq import MQCNNEstimator

            estimator = MQCNNEstimator(
                freq=ts_metadata['freq'],
                context_length=ts_metadata['context_length'],
                prediction_length=ts_metadata['forecast_length'],
                trainer=Trainer(epochs=self.epochs,
                                learning_rate=self.learning_rate),
            )

        elif self.gluon_model == 'SFF':
            from gluonts.model.simple_feedforward import SimpleFeedForwardEstimator

            estimator = SimpleFeedForwardEstimator(
                prediction_length=ts_metadata['forecast_length'],
                context_length=ts_metadata['context_length'],
                freq=ts_metadata['freq'],
                trainer=Trainer(
                    epochs=self.epochs,
                    learning_rate=self.learning_rate,
                    hybridize=False,
                    num_batches_per_epoch=100,
                ),
            )

        elif self.gluon_model == 'Transformer':
            from gluonts.model.transformer import TransformerEstimator

            estimator = TransformerEstimator(
                prediction_length=ts_metadata['forecast_length'],
                context_length=ts_metadata['context_length'],
                freq=ts_metadata['freq'],
                trainer=Trainer(epochs=self.epochs,
                                learning_rate=self.learning_rate),
            )

        elif self.gluon_model == 'DeepState':
            from gluonts.model.deepstate import DeepStateEstimator

            estimator = DeepStateEstimator(
                prediction_length=ts_metadata['forecast_length'],
                past_length=ts_metadata['context_length'],
                freq=ts_metadata['freq'],
                use_feat_static_cat=False,
                cardinality=[1],
                trainer=Trainer(ctx='cpu',
                                epochs=self.epochs,
                                learning_rate=self.learning_rate),
            )

        elif self.gluon_model == 'DeepFactor':
            from gluonts.model.deep_factor import DeepFactorEstimator

            estimator = DeepFactorEstimator(
                freq=ts_metadata['freq'],
                context_length=ts_metadata['context_length'],
                prediction_length=ts_metadata['forecast_length'],
                trainer=Trainer(epochs=self.epochs,
                                learning_rate=self.learning_rate),
            )

        elif self.gluon_model == 'WaveNet':
            # Usually needs more epochs/training iterations than other models do
            from gluonts.model.wavenet import WaveNetEstimator

            estimator = WaveNetEstimator(
                freq=ts_metadata['freq'],
                prediction_length=ts_metadata['forecast_length'],
                trainer=Trainer(epochs=self.epochs,
                                learning_rate=self.learning_rate),
            )
        else:
            raise ValueError("'gluon_model' not recognized.")

        self.GluonPredictor = estimator.train(self.test_ds)
        self.ts_metadata = ts_metadata
        self.fit_runtime = datetime.datetime.now() - self.startTime
        return self
def train(bucket, seq, algo, freq, prediction_length, epochs, learning_rate,
          hybridize, num_batches_per_epoch):

    #create train dataset
    df = pd.read_csv(filepath_or_buffer=os.environ['SM_CHANNEL_TRAIN'] +
                     "/train.csv",
                     header=0,
                     index_col=0)

    training_data = ListDataset([{
        "start": df.index[0],
        "target": df.usage[:],
        "item_id": df.client[:]
    }],
                                freq=freq)

    #create test dataset
    df = pd.read_csv(filepath_or_buffer=os.environ['SM_CHANNEL_TEST'] +
                     "/test.csv",
                     header=0,
                     index_col=0)

    test_data = ListDataset([{
        "start": df.index[0],
        "target": df.usage[:],
        "item_id": 'client_12'
    }],
                            freq=freq)

    hook = Hook.create_from_json_file()
    #determine estimators##################################
    if algo == "DeepAR":
        estimator = DeepAREstimator(
            freq=freq,
            prediction_length=prediction_length,
            context_length=1,
            trainer=Trainer(ctx="cpu",
                            epochs=epochs,
                            learning_rate=learning_rate,
                            hybridize=hybridize,
                            num_batches_per_epoch=num_batches_per_epoch))

        #train the model
        predictor = estimator.train(training_data=training_data)
        print("DeepAR training is complete SUCCESS")
    elif algo == "SFeedFwd":
        estimator = SimpleFeedForwardEstimator(
            freq=freq,
            prediction_length=prediction_length,
            trainer=Trainer(ctx="cpu",
                            epochs=epochs,
                            learning_rate=learning_rate,
                            hybridize=hybridize,
                            num_batches_per_epoch=num_batches_per_epoch))

        #train the model
        predictor = estimator.train(training_data=training_data)
        print("training is complete SUCCESS")
    elif algo == "lstnet":
        # Needed for LSTNet ONLY
        grouper = MultivariateGrouper(max_target_dim=6)
        training_data = grouper(training_data)
        test_data = grouper(test_data)
        context_length = prediction_length
        num_series = 1
        skip_size = 1
        ar_window = 1
        channels = 4

        estimator = LSTNetEstimator(
            freq=freq,
            prediction_length=prediction_length,
            context_length=context_length,
            num_series=num_series,
            skip_size=skip_size,
            ar_window=ar_window,
            channels=channels,
            trainer=Trainer(ctx="cpu",
                            epochs=epochs,
                            learning_rate=learning_rate,
                            hybridize=hybridize,
                            num_batches_per_epoch=num_batches_per_epoch))

        #train the model
        predictor = estimator.train(training_data=training_data)
        print("training is complete SUCCESS")
    elif algo == "seq2seq":
        estimator = MQCNNEstimator(
            freq=freq,
            prediction_length=prediction_length,
            trainer=Trainer(ctx="cpu",
                            epochs=epochs,
                            learning_rate=learning_rate,
                            hybridize=hybridize,
                            num_batches_per_epoch=num_batches_per_epoch))

        #train the model
        predictor = estimator.train(training_data=training_data)
        print("training is complete SUCCESS")
    else:
        estimator = TransformerEstimator(
            freq=freq,
            prediction_length=prediction_length,
            trainer=Trainer(ctx="cpu",
                            epochs=epochs,
                            learning_rate=learning_rate,
                            hybridize=hybridize,
                            num_batches_per_epoch=num_batches_per_epoch))

        #train the model
        predictor = estimator.train(training_data=training_data)
        print("training is complete SUCCESS")

    ###################################################

    #evaluate trained model on test data
    forecast_it, ts_it = make_evaluation_predictions(test_data,
                                                     predictor,
                                                     num_samples=100)
    print("EVALUATION is complete SUCCESS")
    forecasts = list(forecast_it)
    tss = list(ts_it)
    evaluator = Evaluator(quantiles=[0.1, 0.5, 0.9])
    agg_metrics, item_metrics = evaluator(iter(tss),
                                          iter(forecasts),
                                          num_series=len(test_data))
    print("METRICS retrieved SUCCESS")
    #bucket = "bwp-sandbox"

    mainpref = "gluonts/blog-models/"
    prefix = mainpref + str(seq) + "/"
    agg_df = pd.DataFrame(agg_metrics, index=[0])
    file = "metrics" + str(seq) + ".csv"
    os.system('mkdir metrics')
    cspath = os.path.join('metrics', file)
    agg_df.to_csv(cspath)
    s3.upload_file(cspath, bucket, mainpref + "metrics/" + file)

    hook.save_scalar("MAPE", agg_metrics["MAPE"], sm_metric=True)
    hook.save_scalar("RMSE", agg_metrics["RMSE"], sm_metric=True)
    hook.save_scalar("MASE", agg_metrics["MASE"], sm_metric=True)
    hook.save_scalar("MSE", agg_metrics["MSE"], sm_metric=True)

    print("MAPE:", agg_metrics["MAPE"])

    #save the model
    predictor.serialize(pathlib.Path(os.environ['SM_MODEL_DIR']))

    uploadDirectory(os.environ['SM_MODEL_DIR'], prefix, bucket)

    return predictor
Пример #12
0
    def fit(self, df, future_regressor=None):
        """Train algorithm given data supplied.

        Args:
            df (pandas.DataFrame): Datetime Indexed
        """
        if not _has_gluonts:
            raise ImportError(
                "GluonTS installation not found or installed version is incompatible with AutoTS."
            )

        df = self.basic_profile(df)

        try:
            from mxnet.random import seed as mxnet_seed

            mxnet_seed(self.random_seed)
        except Exception:
            pass

        gluon_train = df.to_numpy().T
        self.train_index = df.columns
        self.train_columns = df.index

        gluon_freq = str(self.frequency).split('-')[0]
        if self.regression_type == "User":
            if future_regressor is None:
                raise ValueError(
                    "regression_type='User' but no future_regressor supplied")
        if gluon_freq in ["MS", "1MS"]:
            gluon_freq = "M"

        if int(self.verbose) > 1:
            print(f"Gluon Frequency is {gluon_freq}")
        if int(self.verbose) < 1:
            try:
                logging.getLogger().disabled = True
                logging.getLogger("mxnet").addFilter(lambda record: False)
            except Exception:
                pass

        if str(self.context_length).replace('.', '').isdigit():
            self.gluon_context_length = int(float(self.context_length))
        elif 'forecastlength' in str(self.context_length).lower():
            len_int = int([x for x in str(self.context_length)
                           if x.isdigit()][0])
            self.gluon_context_length = int(len_int * self.forecast_length)
        else:
            self.gluon_context_length = 20
            self.context_length = '20'
        ts_metadata = {
            'num_series':
            len(self.train_index),
            'freq':
            gluon_freq,
            'start_ts':
            df.index[0],
            'gluon_start':
            [self.train_columns[0] for _ in range(len(self.train_index))],
            'context_length':
            self.gluon_context_length,
            'forecast_length':
            self.forecast_length,
        }
        if self.gluon_model in self.multivariate_mods:
            if self.regression_type == "User":
                regr = future_regressor.to_numpy().T
                self.regr_train = regr
                self.test_ds = ListDataset(
                    [{
                        "start": df.index[0],
                        "target": gluon_train,
                        "feat_dynamic_real": regr,
                    }],
                    freq=ts_metadata['freq'],
                    one_dim_target=False,
                )
            else:
                self.test_ds = ListDataset(
                    [{
                        "start": df.index[0],
                        "target": gluon_train
                    }],
                    freq=ts_metadata['freq'],
                    one_dim_target=False,
                )
        else:
            if self.regression_type == "User":
                self.gluon_train = gluon_train
                regr = future_regressor.to_numpy().T
                self.regr_train = regr
                self.test_ds = ListDataset(
                    [{
                        FieldName.TARGET: target,
                        FieldName.START: ts_metadata['start_ts'],
                        FieldName.FEAT_DYNAMIC_REAL: regr,
                    } for target in gluon_train],
                    freq=ts_metadata['freq'],
                )
            else:
                # use the actual start date, if NaN given (semi-hidden)
                # ts_metadata['gluon_start'] = df.notna().idxmax().tolist()
                self.test_ds = ListDataset(
                    [{
                        FieldName.TARGET: target,
                        FieldName.START: start
                    } for (target, start
                           ) in zip(gluon_train, ts_metadata['gluon_start'])],
                    freq=ts_metadata['freq'],
                )
        if self.gluon_model == 'DeepAR':
            from gluonts.model.deepar import DeepAREstimator

            estimator = DeepAREstimator(
                freq=ts_metadata['freq'],
                context_length=ts_metadata['context_length'],
                prediction_length=ts_metadata['forecast_length'],
                trainer=Trainer(epochs=self.epochs,
                                learning_rate=self.learning_rate),
            )
        elif self.gluon_model == 'NPTS':
            from gluonts.model.npts import NPTSEstimator

            estimator = NPTSEstimator(
                freq=ts_metadata['freq'],
                context_length=ts_metadata['context_length'],
                prediction_length=ts_metadata['forecast_length'],
            )

        elif self.gluon_model == 'MQCNN':
            from gluonts.model.seq2seq import MQCNNEstimator

            estimator = MQCNNEstimator(
                freq=ts_metadata['freq'],
                context_length=ts_metadata['context_length'],
                prediction_length=ts_metadata['forecast_length'],
                trainer=Trainer(epochs=self.epochs,
                                learning_rate=self.learning_rate),
            )

        elif self.gluon_model == 'SFF':
            from gluonts.model.simple_feedforward import SimpleFeedForwardEstimator

            estimator = SimpleFeedForwardEstimator(
                prediction_length=ts_metadata['forecast_length'],
                context_length=ts_metadata['context_length'],
                freq=ts_metadata['freq'],
                trainer=Trainer(
                    epochs=self.epochs,
                    learning_rate=self.learning_rate,
                    hybridize=False,
                    num_batches_per_epoch=100,
                ),
            )

        elif self.gluon_model == 'Transformer':
            from gluonts.model.transformer import TransformerEstimator

            estimator = TransformerEstimator(
                prediction_length=ts_metadata['forecast_length'],
                context_length=ts_metadata['context_length'],
                freq=ts_metadata['freq'],
                trainer=Trainer(epochs=self.epochs,
                                learning_rate=self.learning_rate),
            )

        elif self.gluon_model == 'DeepState':
            from gluonts.model.deepstate import DeepStateEstimator

            estimator = DeepStateEstimator(
                prediction_length=ts_metadata['forecast_length'],
                past_length=ts_metadata['context_length'],
                freq=ts_metadata['freq'],
                use_feat_static_cat=False,
                cardinality=[1],
                trainer=Trainer(ctx='cpu',
                                epochs=self.epochs,
                                learning_rate=self.learning_rate),
            )

        elif self.gluon_model == 'DeepFactor':
            from gluonts.model.deep_factor import DeepFactorEstimator

            estimator = DeepFactorEstimator(
                freq=ts_metadata['freq'],
                context_length=ts_metadata['context_length'],
                prediction_length=ts_metadata['forecast_length'],
                trainer=Trainer(epochs=self.epochs,
                                learning_rate=self.learning_rate),
            )

        elif self.gluon_model == 'WaveNet':
            # Usually needs more epochs/training iterations than other models do
            from gluonts.model.wavenet import WaveNetEstimator

            estimator = WaveNetEstimator(
                freq=ts_metadata['freq'],
                prediction_length=ts_metadata['forecast_length'],
                trainer=Trainer(epochs=self.epochs,
                                learning_rate=self.learning_rate),
            )
        elif self.gluon_model == 'DeepVAR':
            from gluonts.model.deepvar import DeepVAREstimator

            estimator = DeepVAREstimator(
                target_dim=gluon_train.shape[0],
                freq=ts_metadata['freq'],
                context_length=ts_metadata['context_length'],
                prediction_length=ts_metadata['forecast_length'],
                trainer=Trainer(epochs=self.epochs,
                                learning_rate=self.learning_rate),
            )
        elif self.gluon_model == 'GPVAR':
            from gluonts.model.gpvar import GPVAREstimator

            estimator = GPVAREstimator(
                target_dim=gluon_train.shape[0],
                freq=ts_metadata['freq'],
                context_length=ts_metadata['context_length'],
                prediction_length=ts_metadata['forecast_length'],
                trainer=Trainer(epochs=self.epochs,
                                learning_rate=self.learning_rate),
            )
        elif self.gluon_model == 'LSTNet':
            from gluonts.model.lstnet import LSTNetEstimator

            estimator = LSTNetEstimator(
                freq=ts_metadata['freq'],
                num_series=len(self.train_index),
                skip_size=0,
                ar_window=1,
                channels=2,
                context_length=ts_metadata['context_length'],
                prediction_length=ts_metadata['forecast_length'],
                trainer=Trainer(epochs=self.epochs,
                                learning_rate=self.learning_rate),
            )
        elif self.gluon_model == 'NBEATS':
            from gluonts.model.n_beats import NBEATSEstimator

            estimator = NBEATSEstimator(
                freq=ts_metadata['freq'],
                context_length=ts_metadata['context_length'],
                prediction_length=ts_metadata['forecast_length'],
                trainer=Trainer(epochs=self.epochs,
                                learning_rate=self.learning_rate),
            )
        elif self.gluon_model == 'Rotbaum':
            from gluonts.model.rotbaum import TreeEstimator

            estimator = TreeEstimator(
                freq=ts_metadata['freq'],
                context_length=ts_metadata['context_length'],
                prediction_length=ts_metadata['forecast_length'],
                # trainer=Trainer(epochs=self.epochs, learning_rate=self.learning_rate),
            )
        elif self.gluon_model == 'DeepRenewalProcess':
            from gluonts.model.renewal import DeepRenewalProcessEstimator

            estimator = DeepRenewalProcessEstimator(
                prediction_length=ts_metadata['forecast_length'],
                context_length=ts_metadata['context_length'],
                num_layers=1,  # original paper used 1 layer, 10 cells
                num_cells=10,
                freq=ts_metadata['freq'],
                trainer=Trainer(epochs=self.epochs,
                                learning_rate=self.learning_rate),
            )
        elif self.gluon_model == 'SelfAttention':
            from gluonts.model.san import SelfAttentionEstimator

            estimator = SelfAttentionEstimator(
                prediction_length=ts_metadata['forecast_length'],
                context_length=ts_metadata['context_length'],
                freq=ts_metadata['freq'],
                trainer=Trainer(
                    epochs=self.epochs,
                    learning_rate=self.learning_rate,
                    use_feature_dynamic_real=False,
                ),
            )
        elif self.gluon_model == 'TemporalFusionTransformer':
            from gluonts.model.tft import TemporalFusionTransformerEstimator

            estimator = TemporalFusionTransformerEstimator(
                prediction_length=ts_metadata['forecast_length'],
                context_length=ts_metadata['context_length'],
                freq=ts_metadata['freq'],
                trainer=Trainer(epochs=self.epochs,
                                learning_rate=self.learning_rate),
            )
        elif self.gluon_model == 'DeepTPP':
            from gluonts.model.tpp.deeptpp import DeepTPPEstimator

            estimator = DeepTPPEstimator(
                prediction_interval_length=ts_metadata['forecast_length'],
                context_interval_length=ts_metadata['context_length'],
                num_marks=1,  # cardinality
                freq=ts_metadata['freq'],
                trainer=Trainer(
                    epochs=self.epochs,
                    learning_rate=self.learning_rate,
                    hybridize=False,
                ),
            )
        else:
            raise ValueError("'gluon_model' not recognized.")

        self.GluonPredictor = estimator.train(self.test_ds)
        self.ts_metadata = ts_metadata
        self.fit_runtime = datetime.datetime.now() - self.startTime
        return self