Пример #1
0
def gluonts_npts(dataset,freq, pred_length,context_length=None,
                 kernel_type="exponential",exp_kernel_weights=1.0,
                 use_seasonal_model = True):
    predictor = NPTSPredictor(freq=freq, prediction_length=pred_length,
                              context_length=context_length,
                              kernel_type=kernel_type,
                              exp_kernel_weights=exp_kernel_weights,
                              use_seasonal_model = use_seasonal_model)
    fcst = predictor.predict(dataset)
    fcstlist = []
    for i in fcst:
        fcstlist.append(i)
    return fcstlist
Пример #2
0
def npts_test():
	import ast
	from gluonts.model.npts import NPTSPredictor

	dataset = get_dataset("exchange_rate", regenerate=False)

	prediction_length = dataset.metadata.prediction_length
	freq = dataset.metadata.freq
	cardinality = ast.literal_eval(dataset.metadata.feat_static_cat[0].cardinality)
	train_ds = dataset.train
	test_ds = dataset.test

	npts_predictor = NPTSPredictor(freq=freq, prediction_length=prediction_length, context_length=300, kernel_type="uniform", use_seasonal_model=False)

	npts_forecast = list(npts_predictor.predict(train_ds))
Пример #3
0
def fit_estimators(
    all_estimators: List[Model],
    training_datasets: Tuple[ListDataset],
    dataset_name: str,
    pred_length: int,
) -> List[Predictor]:

    train_data, train_deepf, train_interpolate = training_datasets

    predictors = []
    for estimator in all_estimators:
        print(f"Fitting {type(estimator)}")
        if type(estimator) is DeepFactorEstimator:
            predictor = estimator.train(train_deepf)
        elif type(estimator) is WaveNetEstimator:
            predictor = estimator.train(train_data)
        else:
            predictor = estimator.train(train_interpolate)
        predictors.append(predictor)

    predictors += [
        NPTSPredictor(
            freq=freqs[dataset_name],
            prediction_length=pred_length,
        ),
        #     # ProphetPredictor(
        #     #     freq = freqs[dataset_name],
        #     #     prediction_length = pred_length,
        #     # )
    ]

    return predictors
Пример #4
0
def _test_nans_in_target(predictor: NPTSPredictor, dataset: Dataset) -> None:
    """
    Test that the model behaves as expected when the target time series
    contains NaN values.

    Parameters
    ----------
    predictor
        the predictor instance to test
    dataset
        a dataset (with targets without NaNs) to use as a base for the test
    """

    # a copy of dataset with 90% of the target entries NaNs
    ds_090pct_nans = ListDataset(
        data_iter=[
            _inject_nans_in_target(data_entry, p=0.9) for data_entry in dataset
        ],
        freq=predictor.freq,
    )

    # a copy of dataset with 100% of the target entries NaNs
    ds_100pct_nans = ListDataset(
        data_iter=[
            _inject_nans_in_target(data_entry, p=1.0) for data_entry in dataset
        ],
        freq=predictor.freq,
    )

    # assert that we can tolerate a high percentage of NaNs
    for forecast in predictor.predict(ds_090pct_nans):
        assert np.all(np.isfinite(forecast.samples)), "Forecast contains NaNs."

    # assert that an exception is thrown if 100% of the values are NaN
    with pytest.raises(GluonTSDataError) as excinfo:
        for _ in predictor.predict(ds_100pct_nans):
            pass
    assert (
        f"The last {predictor.context_length} positions of the target time "
        f"series are all NaN. Please increase the `context_length` "
        f"parameter of your NPTS model so the last "
        f"{predictor.context_length} positions of each target contain at "
        f"least one non-NaN value.") in str(excinfo.value)
Пример #5
0
def train(args):
    freq = args.freq.replace('"', '')
    prediction_length = args.prediction_length
    context_length = args.context_length
    use_feat_dynamic_real = args.use_feat_dynamic_real
    use_past_feat_dynamic_real = args.use_past_feat_dynamic_real
    use_feat_static_cat = args.use_feat_static_cat
    use_log1p = args.use_log1p
    
    print('freq:', freq)
    print('prediction_length:', prediction_length)
    print('context_length:', context_length)
    print('use_feat_dynamic_real:', use_feat_dynamic_real)
    print('use_past_feat_dynamic_real:', use_past_feat_dynamic_real)
    print('use_feat_static_cat:', use_feat_static_cat)
    print('use_log1p:', use_log1p)
    
    batch_size = args.batch_size
    print('batch_size:', batch_size)

    train = load_json(os.path.join(args.train, 'train_'+freq+'.json'))
    test = load_json(os.path.join(args.test, 'test_'+freq+'.json'))
    
    num_timeseries = len(train)
    print('num_timeseries:', num_timeseries)

    train_ds = ListDataset(parse_data(train, use_log1p=use_log1p), freq=freq)
    test_ds = ListDataset(parse_data(test, use_log1p=use_log1p), freq=freq)
    
    predictor = None
    
    trainer= Trainer(ctx="cpu", 
                    epochs=args.epochs, 
                    num_batches_per_epoch=args.num_batches_per_epoch,
                    learning_rate=args.learning_rate, 
                    learning_rate_decay_factor=args.learning_rate_decay_factor,
                    patience=args.patience,
                    minimum_learning_rate=args.minimum_learning_rate,
                    clip_gradient=args.clip_gradient,
                    weight_decay=args.weight_decay,
                    init=args.init.replace('"', ''),
                    hybridize=args.hybridize)
    print('trainer:', trainer)
    
    cardinality = None
    if args.cardinality != '':
        cardinality = args.cardinality.replace('"', '').replace(' ', '').replace('[', '').replace(']', '').split(',')
        for i in range(len(cardinality)):
            cardinality[i] = int(cardinality[i])
    print('cardinality:', cardinality)
    
    embedding_dimension = [min(50, (cat+1)//2) for cat in cardinality] if cardinality is not None else None
    print('embedding_dimension:', embedding_dimension)
    
    algo_name = args.algo_name.replace('"', '')
    print('algo_name:', algo_name)
    
    if algo_name == 'CanonicalRNN':
        estimator = CanonicalRNNEstimator(
            freq=freq,
            prediction_length=prediction_length,
            context_length=context_length,
            trainer=trainer,
            batch_size=batch_size,
            num_layers=5, 
            num_cells=50, 
            cell_type='lstm', 
            num_parallel_samples=100,
            cardinality=cardinality,
            embedding_dimension=10,
        )
    elif algo_name == 'DeepFactor':
        estimator = DeepFactorEstimator(
            freq=freq,
            prediction_length=prediction_length,
            context_length=context_length,
            trainer=trainer,
            batch_size=batch_size,
            cardinality=cardinality,
            embedding_dimension=10,
        )
    elif algo_name == 'DeepAR':
        estimator = DeepAREstimator(
            freq = freq,  # – Frequency of the data to train on and predict
            prediction_length = prediction_length,  # – Length of the prediction horizon
            trainer = trainer,  # – Trainer object to be used (default: Trainer())
            context_length = context_length,  # – Number of steps to unroll the RNN for before computing predictions (default: None, in which case context_length = prediction_length)
            num_layers = 2,  # – Number of RNN layers (default: 2)
            num_cells = 40,  # – Number of RNN cells for each layer (default: 40)
            cell_type = 'lstm',  # – Type of recurrent cells to use (available: ‘lstm’ or ‘gru’; default: ‘lstm’)
            dropoutcell_type = 'ZoneoutCell',  # – Type of dropout cells to use (available: ‘ZoneoutCell’, ‘RNNZoneoutCell’, ‘VariationalDropoutCell’ or ‘VariationalZoneoutCell’; default: ‘ZoneoutCell’)
            dropout_rate = 0.1,  # – Dropout regularization parameter (default: 0.1)
            use_feat_dynamic_real = use_feat_dynamic_real,  # – Whether to use the feat_dynamic_real field from the data (default: False)
            use_feat_static_cat = use_feat_static_cat,  # – Whether to use the feat_static_cat field from the data (default: False)
            use_feat_static_real = False,  # – Whether to use the feat_static_real field from the data (default: False)
            cardinality = cardinality,  # – Number of values of each categorical feature. This must be set if use_feat_static_cat == True (default: None)
            embedding_dimension = embedding_dimension,  # – Dimension of the embeddings for categorical features (default: [min(50, (cat+1)//2) for cat in cardinality])
        #     distr_output = StudentTOutput(),  # – Distribution to use to evaluate observations and sample predictions (default: StudentTOutput())
        #     scaling = True,  # – Whether to automatically scale the target values (default: true)
        #     lags_seq = None,  # – Indices of the lagged target values to use as inputs of the RNN (default: None, in which case these are automatically determined based on freq)
        #     time_features = None,  # – Time features to use as inputs of the RNN (default: None, in which case these are automatically determined based on freq)
        #     num_parallel_samples = 100,  # – Number of evaluation samples per time series to increase parallelism during inference. This is a model optimization that does not affect the accuracy (default: 100)
        #     imputation_method = None,  # – One of the methods from ImputationStrategy
        #     train_sampler = None,  # – Controls the sampling of windows during training.
        #     validation_sampler = None,  # – Controls the sampling of windows during validation.
        #     alpha = None,  # – The scaling coefficient of the activation regularization
        #     beta = None,  # – The scaling coefficient of the temporal activation regularization
            batch_size = batch_size,  # – The size of the batches to be used training and prediction.
        #     minimum_scale = None,  # – The minimum scale that is returned by the MeanScaler
        #     default_scale = None,  # – Default scale that is applied if the context length window is completely unobserved. If not set, the scale in this case will be the mean scale in the batch.
        #     impute_missing_values = None,  # – Whether to impute the missing values during training by using the current model parameters. Recommended if the dataset contains many missing values. However, this is a lot slower than the default mode.
        #     num_imputation_samples = None,  # – How many samples to use to impute values when impute_missing_values=True
        )
    elif algo_name == 'DeepState':
        estimator = DeepStateEstimator(
            freq=freq,
            prediction_length=prediction_length,
            trainer=trainer,
            batch_size=batch_size,
            use_feat_dynamic_real=use_feat_dynamic_real,
            use_feat_static_cat=use_feat_static_cat,
            cardinality=cardinality,
        )
    elif algo_name == 'DeepVAR':
        estimator = DeepVAREstimator(  # use multi
            freq=freq,
            prediction_length=prediction_length,
            context_length=context_length,
            trainer=trainer,
            batch_size=batch_size,
            target_dim=96,
        )
    elif algo_name == 'GaussianProcess':
#         # TODO
#         estimator = GaussianProcessEstimator(
#             freq=freq,
#             prediction_length=prediction_length,
#             context_length=context_length,
#             trainer=trainer,
#             batch_size=batch_size,
#             cardinality=num_timeseries,
#         )
        pass
    elif algo_name == 'GPVAR':
        estimator = GPVAREstimator(  # use multi
            freq=freq,
            prediction_length=prediction_length,
            context_length=context_length,
            trainer=trainer,
            batch_size=batch_size,
            target_dim=96,
        )
    elif algo_name == 'LSTNet':
        estimator = LSTNetEstimator(  # use multi
            freq=freq,
            prediction_length=prediction_length,
            context_length=context_length,
            num_series=96,
            skip_size=4,
            ar_window=4,
            channels=72,
            trainer=trainer,
            batch_size=batch_size,
        )
    elif algo_name == 'NBEATS':
        estimator = NBEATSEstimator(
            freq=freq,
            prediction_length=prediction_length,
            context_length=context_length,
            trainer=trainer,
            batch_size=batch_size,
        )
    elif algo_name == 'DeepRenewalProcess':
        estimator = DeepRenewalProcessEstimator(
            freq=freq,
            prediction_length=prediction_length,
            context_length=context_length,
            trainer=trainer,
            batch_size=batch_size,
            num_cells=40,
            num_layers=2,
        )
    elif algo_name == 'Tree':
        estimator = TreePredictor(
            freq = freq,
            prediction_length = prediction_length,
            context_length = context_length,
            n_ignore_last = 0,
            lead_time = 0,
            max_n_datapts = 1000000,
            min_bin_size = 100,  # Used only for "QRX" method.
            use_feat_static_real = False,
            use_feat_dynamic_cat = False,
            use_feat_dynamic_real = use_feat_dynamic_real,
            cardinality = cardinality,
            one_hot_encode = False,
            model_params = {'eta': 0.1, 'max_depth': 6, 'silent': 0, 'nthread': -1, 'n_jobs': -1, 'gamma': 1, 'subsample': 0.9, 'min_child_weight': 1, 'colsample_bytree': 0.9, 'lambda': 1, 'booster': 'gbtree'},
            max_workers = 4,  # default: None
            method = "QRX",  # "QRX",  "QuantileRegression", "QRF"
            quantiles=None,  # Used only for "QuantileRegression" method.
            model=None,
            seed=None,
        )
    elif algo_name == 'SelfAttention':
#         # TODO
#         estimator = SelfAttentionEstimator(
#             freq=freq,
#             prediction_length=prediction_length,
#             context_length=context_length,
#             trainer=trainer,
#             batch_size=batch_size,
#         )
        pass
    elif algo_name == 'MQCNN':
        estimator = MQCNNEstimator(
            freq=freq,
            prediction_length=prediction_length,
            context_length=context_length,
            trainer=trainer,
            batch_size=batch_size,
            use_past_feat_dynamic_real=use_past_feat_dynamic_real,
            use_feat_dynamic_real=use_feat_dynamic_real,
            use_feat_static_cat=use_feat_static_cat,
            cardinality=cardinality,
            embedding_dimension=embedding_dimension,
            add_time_feature=True,
            add_age_feature=False,
            enable_encoder_dynamic_feature=True,
            enable_decoder_dynamic_feature=True,
            seed=None,
            decoder_mlp_dim_seq=None,
            channels_seq=None,
            dilation_seq=None,
            kernel_size_seq=None,
            use_residual=True,
            quantiles=None,
            distr_output=None,
            scaling=None,
            scaling_decoder_dynamic_feature=False,
            num_forking=None,
            max_ts_len=None,
        )
    elif algo_name == 'MQRNN':
        estimator = MQRNNEstimator(
            freq=freq,
            prediction_length=prediction_length,
            context_length=context_length,
            trainer=trainer,
            batch_size=batch_size,
        )
    elif algo_name == 'Seq2Seq':
        # # TODO
        # estimator = Seq2SeqEstimator(
        #     freq=freq,
        #     prediction_length=prediction_length,
        #     context_length=context_length,
        #     trainer=trainer,
        #     cardinality=cardinality,
        #     embedding_dimension=4,
        #     encoder=Seq2SeqEncoder(),
        #     decoder_mlp_layer=[4],
        #     decoder_mlp_static_dim=4
        # )
        pass
    elif algo_name == 'SimpleFeedForward':
        estimator = SimpleFeedForwardEstimator(
            num_hidden_dimensions=[40, 40],
            prediction_length=prediction_length,
            context_length=context_length,
            freq=freq,
            trainer=trainer,
            batch_size=batch_size,
        )
    elif algo_name == 'TemporalFusionTransformer':
        estimator = TemporalFusionTransformerEstimator(
            prediction_length=prediction_length,
            context_length=context_length,
            freq=freq,
            trainer=trainer,
            batch_size=batch_size,
            hidden_dim = 32, 
            variable_dim = None, 
            num_heads = 4, 
            num_outputs = 3, 
            num_instance_per_series = 100, 
            dropout_rate = 0.1, 
        #     time_features = [], 
        #     static_cardinalities = {}, 
        #     dynamic_cardinalities = {}, 
        #     static_feature_dims = {}, 
        #     dynamic_feature_dims = {}, 
        #     past_dynamic_features = []
        )
    elif algo_name == 'DeepTPP':
#         # TODO
#         estimator = DeepTPPEstimator(
#             prediction_interval_length=prediction_length,
#             context_interval_length=context_length,
#             freq=freq,
#             trainer=trainer,
#             batch_size=batch_size,
#             num_marks=len(cardinality) if cardinality is not None else 0,
#         )
        pass
    elif algo_name == 'Transformer':
        estimator = TransformerEstimator(
            freq=freq,
            prediction_length=prediction_length,
            trainer=trainer,
            batch_size=batch_size,
            cardinality=cardinality,
        )
    elif algo_name == 'WaveNet':
        estimator = WaveNetEstimator(
            freq=freq,
            prediction_length=prediction_length,
            trainer=trainer,
            batch_size=batch_size,
            cardinality=cardinality,
        )
    elif algo_name == 'Naive2':
        # TODO Multiplicative seasonality is not appropriate for zero and negative values
        predictor = Naive2Predictor(freq=freq, prediction_length=prediction_length, season_length=context_length)
    elif algo_name == 'NPTS':
        predictor = NPTSPredictor(freq=freq, prediction_length=prediction_length, context_length=context_length)
    elif algo_name == 'Prophet':
        def configure_model(model):
            model.add_seasonality(
                name='weekly', period=7, fourier_order=3, prior_scale=0.1
            )
            return model
        predictor = ProphetPredictor(freq=freq, prediction_length=prediction_length, init_model=configure_model)
    elif algo_name == 'ARIMA':
        predictor = RForecastPredictor(freq=freq,
                                      prediction_length=prediction_length,
                                      method_name='arima',
                                      period=context_length,
                                      trunc_length=len(train[0]['target']))
    elif algo_name == 'ETS':
        predictor = RForecastPredictor(freq=freq,
                                      prediction_length=prediction_length,
                                      method_name='ets',
                                      period=context_length,
                                      trunc_length=len(train[0]['target']))
    elif algo_name == 'TBATS':
        predictor = RForecastPredictor(freq=freq,
                                      prediction_length=prediction_length,
                                      method_name='tbats',
                                      period=context_length,
                                      trunc_length=len(train[0]['target']))
    elif algo_name == 'CROSTON':
        predictor = RForecastPredictor(freq=freq,
                                      prediction_length=prediction_length,
                                      method_name='croston',
                                      period=context_length,
                                      trunc_length=len(train[0]['target']))
    elif algo_name == 'MLP':
        predictor = RForecastPredictor(freq=freq,
                                      prediction_length=prediction_length,
                                      method_name='mlp',
                                      period=context_length,
                                      trunc_length=len(train[0]['target']))
    elif algo_name == 'SeasonalNaive':
        predictor = SeasonalNaivePredictor(freq=freq, prediction_length=prediction_length)
    else:
        print('[ERROR]:', algo_name, 'not supported')
        return
    
    if predictor is None:
        try:
            predictor = estimator.train(train_ds, test_ds)
        except Exception as e:
            print(e)
            try:
                grouper_train = MultivariateGrouper(max_target_dim=num_timeseries)
                train_ds_multi = grouper_train(train_ds)
                test_ds_multi = grouper_train(test_ds)
                predictor = estimator.train(train_ds_multi, test_ds_multi)
            except Exception as e:
                print(e)

    forecast_it, ts_it = make_evaluation_predictions(
        dataset=test_ds,  # test dataset
        predictor=predictor,  # predictor
        num_samples=100,  # number of sample paths we want for evaluation
    )

    forecasts = list(forecast_it)
    tss = list(ts_it)
#     print(len(forecasts), len(tss))
    
    evaluator = Evaluator(quantiles=[0.1, 0.5, 0.9])
    agg_metrics, item_metrics = evaluator(iter(tss), iter(forecasts), num_series=len(test_ds))

    print(json.dumps(agg_metrics, indent=4))
    
    model_dir = os.path.join(args.model_dir, algo_name)
    if not os.path.exists(model_dir):
        os.makedirs(model_dir)
    predictor.serialize(Path(model_dir))
Пример #6
0
def test_climatological_forecaster(
    freq: str,
    history_length: int,
    num_seasons: int,
    use_seasonal_model: bool,
    context_length_frac: float,
) -> None:
    """
    Here we test Climatological forecaster both seasonal and non-seasonal
    variants for various frequencies.

    We further parametrize the test with `context_length_frac` parameter which
    indicates what fraction of the history should actually be used including
    the case where `context_length` is larger than the history length.

    In particular, we check that:

    1. predictions are uniformly sampled over the entire past for non-seasonal
       variant and only over the past seasons for the seasonal variant;
    2. predictions do not come from the targets outsides of the
       `context_length`, if `context_length` is present.

    Parameters
    ----------
    freq
        frequency of the time series
    history_length
        length of the time series to be generated for tests
    num_seasons
        number of seasons present in a given frequency
    use_seasonal_model
        use the seasonal variant?
    context_length_frac
        fraction of history length that should be used as context length
    """

    train_ts = get_test_data(history_length=history_length, freq=freq)

    # For seasonal variant we check the predictions of Climatological
    # forecaster for all seasons.
    # Non-seasonal variant is modeled in the tests as `num_seasons = 1`, and
    # the predictions are checked for only one step.
    num_seasons = num_seasons if use_seasonal_model else 1
    pred_length = num_seasons

    context_length = (int(context_length_frac * history_length)
                      if context_length_frac is not None else None)
    predictor = NPTSPredictor(
        prediction_length=pred_length,
        context_length=context_length,
        freq=freq,
        use_seasonal_model=use_seasonal_model,
        kernel_type=KernelType.uniform,
    )

    dataset = ListDataset(
        [{
            "start": str(train_ts.index[0]),
            "target": train_ts.values
        }],
        freq=freq,
    )

    # validate that the predictor works with targets with NaNs
    _test_nans_in_target(predictor, dataset)

    forecast = next(predictor.predict(dataset, num_samples=2000))

    train_targets = (train_ts.values if context_length is None else
                     train_ts.values[-min(history_length, context_length):])
    targets_outside_context = (None if context_length is None
                               or context_length >= history_length else
                               train_ts.values[:context_length])
    targets_str = "seasons" if use_seasonal_model else "targets"
    seasonal_str = "seasonal" if use_seasonal_model else ""
    for t in range(pred_length):
        targets_prev_seasons = train_targets[range(t, len(train_targets),
                                                   num_seasons)]

        # Predictions must have come from the past seasons only
        assert set(forecast.samples[:, t]).issubset(targets_prev_seasons), (
            f"Predictions for {seasonal_str} Climatological forecaster are "
            f"not generated from the target values of past {targets_str}.\n"
            f"Past {targets_str}: {targets_prev_seasons}\n"
            f"Predictions: {set(forecast.samples[:, t])}")

        # Predictions must have been uniformly sampled from the targets in
        # the previous seasons
        prediction_dist, _ = np.histogram(
            forecast.samples[:, t], np.append(targets_prev_seasons, np.inf))
        prediction_dist = prediction_dist / sum(prediction_dist)

        expected_dist = np.ones_like(targets_prev_seasons) / len(
            targets_prev_seasons)

        np.testing.assert_almost_equal(
            prediction_dist,
            expected_dist,
            1,
            f"Predictions of {seasonal_str} Climatological forecaster are not "
            f"uniformly sampled from past {targets_str}\n"
            f"Expected distribution over the past "
            f"{targets_str}: {expected_dist}\n"
            f"Prediction distribution: {prediction_dist}\n",
        )

        if targets_outside_context is not None:
            # Predictions should never be from the targets outside the
            # context length
            assert not set.intersection(
                set(forecast.samples[:, t]), set(targets_outside_context)
            ), (f"Predictions of Climatological forecaster are sampled from "
                f"targets outside the context length.\n"
                f"Targets outside the context length: "
                f"{targets_outside_context}\n"
                f"Predictions: {set(forecast.samples[:, t])}\n")
Пример #7
0
def test_npts_custom_features(
    use_seasonal_model: bool,
    feature_scale: float,
    min_frac_samples_from_seasons: float,
    context_length_frac: Optional[float],
) -> None:
    """
    Same as `test_npts_forecaster` except that we use the weekly frequency and
    a dummy custom feature to define seasonality. The dummy feature defines 52
    weeks as one cycle.

    We explicitly disable `use_default_time_features` so that the seasonality
    is defined based only on the custom feature.

    Parameters
    ----------
    use_seasonal_model
        use the seasonal variant?
    feature_scale
        scale for the seasonal features to enforce strict sampling over the
        past seasons
    min_frac_samples_from_seasons
        the minimum threshold for fraction of times the predictions should come
        exclusively from past seasons
    context_length_frac
        fraction of history length that should be used as context length
    """
    freq = "W"
    history_length = 52 * 8  # approx. 8 years (seasons)
    train_ts = get_test_data(history_length=history_length, freq=freq)
    context_length = (int(context_length_frac * history_length)
                      if context_length_frac is not None else None)

    num_seasons = 52 if use_seasonal_model else 1
    pred_length = num_seasons

    # Custom features should be defined both in training and prediction ranges
    full_time_index = pd.date_range(
        train_ts.index.min(),
        periods=len(train_ts) + pred_length,
        freq=train_ts.index.freq,
    )
    # Dummy feature defining 52 seasons
    feat_dynamic_real = [[(ix % 52) / 51.0 - 0.5
                          for ix, timestamp in enumerate(full_time_index)]]

    predictor = NPTSPredictor(
        prediction_length=pred_length,
        freq=freq,
        context_length=context_length,
        kernel_type=KernelType.exponential,
        feature_scale=feature_scale,
        use_seasonal_model=use_seasonal_model,
        use_default_time_features=False,  # disable default time features
    )

    dataset = ListDataset(
        [{
            "start": str(train_ts.index[0]),
            "target": train_ts.values,
            "feat_dynamic_real": feat_dynamic_real,
        }],
        freq=freq,
    )

    # validate that the predictor works with targets with NaNs
    _test_nans_in_target(predictor, dataset)

    forecast = next(predictor.predict(dataset, num_samples=2000))

    train_targets = (train_ts.values if context_length is None else
                     train_ts.values[-min(history_length, context_length):])
    targets_outside_context = (None if context_length is None
                               or context_length >= history_length else
                               train_ts.values[:context_length])
    targets_str = "seasons" if use_seasonal_model else "targets"
    seasonal_str = "seasonal" if use_seasonal_model else ""
    for t in range(pred_length):
        prev_seasons_ix = range(t, len(train_targets), num_seasons)

        # Prediction distribution over all the training targets
        prediction_dist, _ = np.histogram(forecast.samples[:, t],
                                          np.append(train_targets, np.inf))
        prediction_dist = prediction_dist / sum(prediction_dist)

        # The fraction of times the targets from past seasons are sampled
        # exclusively should be above some threshold.
        assert (
            sum(prediction_dist[prev_seasons_ix]) >
            min_frac_samples_from_seasons
        ), (f"Predictions of {seasonal_str} NPTS are not sampled from past "
            f"{targets_str} enough number of times.\n"
            f"Expected frequency over past {targets_str}: "
            f"{min_frac_samples_from_seasons}\n"
            f"Sampled frequency: {sum(prediction_dist[prev_seasons_ix])}")

        # Here we use large value of `feature_scale`, so we sample mostly
        # from past seasons. In this case, the past seasons must be sampled
        # with exponentially decaying weights which depend only
        # on the time index feature: f(t) = t / (train_length + pred_length)
        distance_to_prev_seasons = np.arange(
            len(train_targets) + 1, 1,
            -num_seasons) / (len(train_targets) + pred_length)
        expected_dist_seasons = np.exp(-distance_to_prev_seasons)
        expected_dist_seasons /= sum(expected_dist_seasons)

        prediction_dist_seasons = prediction_dist[prev_seasons_ix]
        prediction_dist_seasons /= sum(prediction_dist_seasons)

        np.testing.assert_almost_equal(
            expected_dist_seasons,
            prediction_dist_seasons,
            1,
            f"Predictions of {seasonal_str} NPTS are not sampled with "
            f"exponentially decaying weights over the "
            f"past {targets_str}.\nExpected distribution over the past "
            f"{targets_str}: {expected_dist_seasons}\n"
            f"Prediction_dist: {prediction_dist_seasons}",
        )

        if targets_outside_context is not None:
            # Predictions should never be from the targets outside the context
            # length
            assert not set.intersection(set(
                forecast.samples[:, t]), set(targets_outside_context)), (
                    f"Predictions of NPTS forecaster are sampled from targets"
                    f"outside the context length.\n"
                    f"Targets outside the context length:"
                    f"{targets_outside_context}\n"
                    f"Predictions: {set(forecast.samples[:, t])}")
Пример #8
0
def test_npts_forecaster(
    freq: str,
    history_length: int,
    num_seasons: int,
    use_seasonal_model: bool,
    feature_scale: float,
    min_frac_samples_from_seasons: float,
    context_length_frac: Optional[float],
) -> None:
    """
    Here we test both seasonal (num_seasons=24) and non-seasonal
    (num_seasons=1) variants of NPTS for various frequencies.

    We further parametrize the test with `context_length_frac` parameter which
    indicates what fraction of the history should actually be used.

    In particular, we check that the

    1. the predictions must come from past seasons exclusively with high
       probability for large value of `feature_scale`

    2. predictions are sampled according to exponentially decaying weights over
       the targets from past seasons or whole of training history depending on
       the flag `use_seasonal_model`

    3. predictions are sampled from time points that are not seasons as well
       for small value of `feature_scale`

    4. predictions do not come from the targets outsides of the context length,
       if `context_length` is present.

    Parameters
    ----------
    freq
        frequency of the time series
    history_length
        length of the time series to be generated for tests
    num_seasons
        number of seasons present in a given frequency
    use_seasonal_model
        use the seasonal variant?
    feature_scale
        scale for the seasonal features to enforce strict sampling over the
        past seasons
    min_frac_samples_from_seasons
        the minimum threshold for fraction of times the predictions should come
        exclusively from past seasons
    context_length_frac
        fraction of history length that should be used as context length
    """

    train_ts = get_test_data(history_length=history_length, freq=freq)

    # For seasonal variant we check the predictions of NPTS forecaster for
    # all seasons.
    # Non-seasonal variant is modeled in the tests as `num_seasons = 1`, and
    # the predictions are checked for only one step.
    num_seasons = num_seasons if use_seasonal_model else 1
    pred_length = num_seasons

    context_length = (int(context_length_frac * history_length)
                      if context_length_frac is not None else None)
    predictor = NPTSPredictor(
        prediction_length=pred_length,
        context_length=context_length,
        freq=freq,
        kernel_type=KernelType.exponential,
        feature_scale=feature_scale,
        use_seasonal_model=use_seasonal_model,
    )

    dataset = ListDataset(
        [{
            "start": str(train_ts.index[0]),
            "target": train_ts.values
        }],
        freq=freq,
    )

    # validate that the predictor works with targets with NaNs
    _test_nans_in_target(predictor, dataset)

    forecast = next(predictor.predict(dataset, num_samples=2000))

    train_targets = (train_ts.values if context_length is None else
                     train_ts.values[-min(history_length, context_length):])
    targets_outside_context = (None if context_length is None
                               or context_length >= history_length else
                               train_ts.values[:context_length])
    targets_str = "seasons" if use_seasonal_model else "targets"
    seasonal_str = "seasonal" if use_seasonal_model else ""
    for t in range(pred_length):
        prev_seasons_ix = range(t, len(train_targets), num_seasons)

        # Prediction distribution over all the training targets
        prediction_dist, _ = np.histogram(forecast.samples[:, t],
                                          np.append(train_targets, np.inf))
        prediction_dist = prediction_dist / sum(prediction_dist)

        # The fraction of times the targets from past seasons are sampled
        # exclusively should be above some threshold.
        assert (
            sum(prediction_dist[prev_seasons_ix]) >
            min_frac_samples_from_seasons
        ), (f"Predictions of {seasonal_str} NPTS are not sampled from past "
            f"{targets_str} enough number of times.\n"
            f"Expected frequency over past {targets_str}: "
            f"{min_frac_samples_from_seasons}\n"
            f"Sampled frequency: {sum(prediction_dist[prev_seasons_ix])}")

        if feature_scale == 1.0:
            # Time index feature and seasonal features are given equal
            # importance so we expect to see some predictions
            # coming outside of past seasons.
            non_seasons_ix = list(
                set(range(len(train_targets))) - set(prev_seasons_ix))
            if non_seasons_ix:
                assert sum(prediction_dist[non_seasons_ix]) > 0.0, (
                    "Predictions of {seasonal_str} NPTS are expected to come "
                    "from targets not in the previous seasons"
                    "for small value of feature_scale: {feature_scale}")

        if feature_scale == 1000.0:
            # Here we sample mostly from past seasons. In this case, the past
            # seasons must be sampled with exponentially
            # decaying weights which depend only on the time index
            # feature: f(t) = t / (train_length + pred_length)
            distance_to_prev_seasons = np.arange(
                len(train_targets) + 1, 1,
                -num_seasons) / (len(train_targets) + pred_length)
            expected_dist_seasons = np.exp(-distance_to_prev_seasons)
            expected_dist_seasons /= sum(expected_dist_seasons)

            prediction_dist_seasons = prediction_dist[prev_seasons_ix]
            prediction_dist_seasons /= sum(prediction_dist_seasons)

            np.testing.assert_almost_equal(
                prediction_dist_seasons,
                expected_dist_seasons,
                1,
                f"Predictions of {seasonal_str} NPTS are not sampled with "
                f"exponentially decaying weights over the past "
                f"{targets_str}.\nExpected distribution over the past "
                f"{targets_str}: {expected_dist_seasons}\n"
                f"Prediction_dist: {prediction_dist_seasons}",
            )

        if targets_outside_context is not None:
            # Predictions should never be from the targets outside the context
            # length
            assert not set.intersection(set(
                forecast.samples[:, t]), set(targets_outside_context)), (
                    "Predictions of NPTS forecaster are sampled from targets "
                    "outside the context length.\n"
                    f"Targets outside the context length: "
                    f"{targets_outside_context}\n"
                    f"Predictions: {set(forecast.samples[:, t])}")
Пример #9
0
sba_predictor = CrostonForecastPredictor(freq=freq,
                                         prediction_length=prediction_length,
                                         variant='sba',
                                         no_of_params=2)
sba_forecast = list(sba_predictor.predict(train_ds))

# sbj
sbj_predictor = CrostonForecastPredictor(freq=freq,
                                         prediction_length=prediction_length,
                                         variant='sbj',
                                         no_of_params=2)
sbj_forecast = list(sbj_predictor.predict(train_ds))

# npts predictor
npts_predictor = NPTSPredictor(freq=freq,
                               prediction_length=prediction_length,
                               context_length=300, kernel_type='uniform',
                               use_seasonal_model=False)
npts_forecast = list(npts_predictor.predict(train_ds))


# deep ar
distr = PiecewiseLinearOutput(7)
deep_ar_trainer = Trainer(
    ctx=mx.context.gpu() if is_gpu & args.use_cuda else mx.context.cpu(),
    batch_size=128,
    learning_rate=1e-2,
    epochs=20,
    num_batches_per_epoch=args.number_of_batches_per_epoch,
    clip_gradient=5.48481845049343,
    weight_decay=0.001,
    hybridize=False)