Exemplo n.º 1
0
def train_models(train,
                 models,
                 forecast_len,
                 full_df=None,
                 seasonality="infer_from_data",
                 in_sample=None):

    seasons = select_seasonality(train, seasonality)

    models_dict = {}
    for m in models:
        if m == "ARIMA":
            models_dict["ARIMA"] = pm.auto_arima(train,
                                                 seasonal=True,
                                                 m=seasons)
        if m == "Prophet":
            model = Prophet()
            models_dict["Prophet"] = model.fit(prophet_dataframe(train))
        if m == "HWAAS":
            models_dict["HWAAS"] = ExponentialSmoothing(
                train,
                seasonal_periods=seasons,
                trend='add',
                seasonal='add',
                damped=True).fit(use_boxcox=True)
        if m == "HWAMS":
            models_dict["HWAMS"] = ExponentialSmoothing(
                train,
                seasonal_periods=seasons,
                trend='add',
                seasonal='mul',
                damped=True).fit(use_boxcox=True)
        # if m=="HOLT":
        #   models_dict["HOLT"] = Holt(train,exponential=True).fit()
        if m == "PYAF":
            model = autof.cForecastEngine()
            model.train(iInputDS=train.reset_index(),
                        iTime='Date',
                        iSignal='Target',
                        iHorizon=len(train))  # bad coding to have horison here
            models_dict["PYAF"] = model.forecast(iInputDS=train.reset_index(),
                                                 iHorizon=forecast_len)
        if m == "Gluonts":
            freqed = pd.infer_freq(train.index)
            if freqed == "MS":
                freq = "M"
            else:
                freq = freqed
            estimator = DeepAREstimator(
                freq=freq,
                prediction_length=forecast_len,
                trainer=Trainer(epochs=2))  #use_feat_dynamic_real=True
            print(train)
            print(type(train))
            print(gluonts_dataframe(train))
            models_dict["Gluonts"] = estimator.train(
                training_data=gluonts_dataframe(train))
        if m == "NBEATS":

            device = torch.device('cpu')
            seasons = select_seasonality(train, seasonality)

            if os.path.isfile(CHECKPOINT_NAME):
                os.remove(CHECKPOINT_NAME)
            stepped = 5
            batch_size = 10
            if in_sample:
                x_train, y_train, x_test, y_test, net, norm_constant = nbeats_dataframe(
                    full_df, forecast_len, in_sample=True)
                optimiser = optim.Adam(net.parameters())
                data = data_generator(x_train, y_train, batch_size)
                #test_losses = []
                for r in range(stepped):

                    train_100_grad_steps(data, device, net,
                                         optimiser)  #test_losses
                models_dict["NBEATS"] = {}
                models_dict["NBEATS"]["model"] = net
                models_dict["NBEATS"]["x_test"] = x_test
                models_dict["NBEATS"]["y_test"] = y_test
                models_dict["NBEATS"]["constant"] = norm_constant

            else:  # if out_sample train is df

                x_train, y_train, net, norm_constant = nbeats_dataframe(
                    full_df, forecast_len, in_sample=False)

                batch_size = 10  # greater than 4 for viz
                optimiser = optim.Adam(net.parameters())
                data = data_generator(x_train, y_train, batch_size)
                stepped = 5
                #test_losses = []
                for r in range(stepped):
                    _, forecast = net(torch.tensor(
                        x_train, dtype=torch.float))  ### Not Used
                    p = forecast.detach().numpy()  ### Not Used
                    train_100_grad_steps(data, device, net,
                                         optimiser)  #test_losses
                models_dict["NBEATS"] = {}
                models_dict["NBEATS"]["model"] = net
                models_dict["NBEATS"]["tuple"] = (x_train, y_train, net,
                                                  norm_constant)

        # if m=="ProphetGluonts":
        #   freqed = pd.infer_freq(train.index)
        #   if freqed=="MS":
        #     freq= "M"
        #   else:
        #     freq= freqed
        #   models_dict["ProphetGluonts"] = ProphetPredictor(freq=freq, prediction_length=forecast_len) #use_feat_dynamic_real=True
        #   models_dict["ProphetGluonts"] = list(models_dict["ProphetGluonts"])


# create a forecast engine. This is the main object handling all the operations
# We use the test-dataset as the last step of our training to generate the evaluation-metrics and do not use the test-dataset during prediction.
# get the best time series model for predicting one week

    return models_dict
def prep_estimators(
    pred_length: int,
    dataset_name: str,
    num_series: int,
    cardinalities: List[int],
    epochs: int,
) -> List[Model]:

    trainer = Trainer(epochs=epochs)

    models = [
        # DeepAREstimator(
        #     freq = freqs[dataset_name],
        #     prediction_length = pred_length,
        #     trainer = trainer,
        #     use_feat_static_cat = True,
        #     cardinality = cardinalities,
        #     distr_output=NegativeBinomialOutput()
        # ),
        # DeepFactorEstimator(
        #     freq = freqs[dataset_name],
        #     prediction_length = pred_length,
        #     trainer = trainer,
        #     cardinality = [num_series],
        #     distr_output=NegativeBinomialOutput(),
        # ),
        # DeepStateEstimator(
        #     freq = freqs[dataset_name],
        #     prediction_length = pred_length,
        #     trainer = trainer,
        #     cardinality = cardinalities,
        # ),
        # NBEATSEstimator(
        #     freq = freqs[dataset_name],
        #     prediction_length = pred_length,
        #     trainer = trainer,
        #     # TODO is the loss function/evaluation metric we want to use?
        #     loss_function = 'MAPE',
        # ),
        NBEATSEnsembleEstimator(freq=freqs[dataset_name],
                                prediction_length=pred_length,
                                trainer=trainer,
                                meta_bagging_size=1),
        NBEATSEnsembleEstimator(freq=freqs[dataset_name],
                                prediction_length=pred_length,
                                trainer=trainer,
                                num_stacks=2,
                                num_blocks=[3],
                                widths=[256, 2048],
                                sharing=[True],
                                expansion_coefficient_lengths=[3],
                                stack_types=["T", "S"],
                                meta_bagging_size=1),
        # MQCNNEstimator(
        #     freq = freqs[dataset_name],
        #     prediction_length = pred_length,
        #     trainer = trainer,
        # ),
        # MQRNNEstimator(
        #     freq = freqs[dataset_name],
        #     prediction_length = pred_length,
        #     trainer = trainer,
        # ),
        # WaveNetEstimator(
        #     freq = freqs[dataset_name],
        #     prediction_length = pred_length,
        #     trainer = trainer,
        #     cardinality = cardinalities
        # ),
    ]
    return models
Exemplo n.º 3
0
from dataset import dataset
from estimator import estimator
from gluonts.dataset.loader import TrainDataLoader
from gluonts.trainer import Trainer
import numpy as np
from estimator import net
from gluonts.gluonts_tqdm import tqdm

training_data = dataset.train
transformation = estimator.create_transformation()
dtype = np.float32
num_workers = None
num_prefetch = None
shuffle_buffer_length = None
trainer = Trainer(ctx="cpu",
                  epochs=1,
                  learning_rate=0.01,
                  num_batches_per_epoch=100)
training_data_loader = TrainDataLoader(
    dataset=training_data,
    transform=transformation,
    batch_size=trainer.batch_size,
    num_batches_per_epoch=trainer.num_batches_per_epoch,
    ctx=trainer.ctx,
    dtype=dtype,
    num_workers=num_workers,
    num_prefetch=num_prefetch,
)
input_names = ['past_target', 'future_target']
with tqdm(training_data_loader) as it:
    for batch_no, data_entry in enumerate(it, start=1):
Exemplo n.º 4
0
    def __init__(
        self,
        freq: str,
        prediction_length: int,
        context_length: Optional[int] = None,
        trainer: Trainer = Trainer(),
        num_stacks: int = 30,
        widths: Optional[List[int]] = None,
        num_blocks: Optional[List[int]] = None,
        num_block_layers: Optional[List[int]] = None,
        expansion_coefficient_lengths: Optional[List[int]] = None,
        sharing: Optional[List[bool]] = None,
        stack_types: Optional[List[str]] = None,
        loss_function: Optional[str] = "MAPE",
        **kwargs,
    ) -> None:
        """
        Defines an estimator. All parameters should be serializable.
        """
        super().__init__(trainer=trainer, **kwargs)

        assert (prediction_length >
                0), "The value of `prediction_length` should be > 0"
        assert (context_length is None or context_length > 0
                ), "The value of `context_length` should be > 0"
        assert (num_stacks is None
                or num_stacks > 0), "The value of `num_stacks` should be > 0"
        assert (
            loss_function is None or loss_function in VALID_LOSS_FUNCTIONS
        ), f"The loss function has to be one of the following: {VALID_LOSS_FUNCTIONS}."

        self.freq = freq
        self.prediction_length = prediction_length
        self.context_length = (context_length if context_length is not None
                               else 2 * prediction_length)
        # num_stacks has to be handled separately because other arguments have to match its length
        self.num_stacks = num_stacks
        self.loss_function = loss_function

        self.widths = self._validate_nbeats_argument(
            argument_value=widths,
            argument_name="widths",
            default_value=[512],
            validation_condition=lambda val: val > 0,
            invalidation_message="Values of 'widths' should be > 0",
        )
        self.num_blocks = self._validate_nbeats_argument(
            argument_value=num_blocks,
            argument_name="num_blocks",
            default_value=[1],
            validation_condition=lambda val: val > 0,
            invalidation_message="Values of 'num_blocks' should be > 0",
        )
        self.num_block_layers = self._validate_nbeats_argument(
            argument_value=num_block_layers,
            argument_name="num_block_layers",
            default_value=[4],
            validation_condition=lambda val: val > 0,
            invalidation_message="Values of 'block_layers' should be > 0",
        )
        self.sharing = self._validate_nbeats_argument(
            argument_value=sharing,
            argument_name="sharing",
            default_value=[False],
            validation_condition=lambda val: True,
            invalidation_message="",
        )
        self.expansion_coefficient_lengths = self._validate_nbeats_argument(
            argument_value=expansion_coefficient_lengths,
            argument_name="expansion_coefficient_lengths",
            default_value=[2],
            validation_condition=lambda val: val > 0,
            invalidation_message=
            "Values of 'expansion_coefficient_lengths' should be > 0",
        )
        self.stack_types = self._validate_nbeats_argument(
            argument_value=stack_types,
            argument_name="stack_types",
            default_value=["G"],
            validation_condition=lambda val: val in VALID_N_BEATS_STACK_TYPES,
            invalidation_message=
            f"Values of 'stack_types' should be one of {VALID_N_BEATS_STACK_TYPES}",
        )
import pandas as pd
import matplotlib.pyplot as plt
csv_path = '/Users/seenli/Documents/workspace/code/pytorch_learn2/time_series_DL/Twitter_volume_AMZN.csv'
df = pd.read_csv(csv_path,header=0,sep=',')
df['timestamp'] = pd.to_datetime(df['timestamp'])
df.set_index(['timestamp'],inplace=True)

# print(df.value[:"2015-04-22 20:47:53"]) # 最后的时间戳是包含[2015-04-22 20:47:53]
# print(df.value[:"2015-04-23 20:47:53"]) # 如果所给时间戳超出了数据的范围的时候就会输出有的数据
# print("开始时间戳", df.index[0]) # start是开始的时间戳,target对应的是对应时间戳的序列信息
data = common.ListDataset([{'start': df.index[0], 'target': df.value[:"2015-04-22 21:00:00"]}], freq='H')#这个数据格式是固定的
# 这里df.index是时间戳,df.value是时间戳对应的值

estimator = deepar.DeepAREstimator(
    freq='H',
    prediction_length=24,
    trainer=Trainer(epochs=50)
)

predictor = estimator.train(training_data=data)

predictor.serialize(Path("/Users/seenli/Documents/workspace/code/pytorch_learn2/time_series_DL/model_save"))
for train_entry, predict_result in zip(data, predictor.predict(data)):
    to_pandas(train_entry)[-60:].plot(linewidth=2)
    predict_result.plot(color='g', prediction_intervals=[50.0, 90.0])
plt.grid(which='both')
plt.show()
##输出预测结果
prediction = next(predictor.predict(data))
print(prediction.mean)
prediction.plot(output_file='graph.png')
    def create_predictor(self, transformation: Transformation,
                         trained_network: HybridBlock) -> Predictor:
        prediction_network = MyPredNetwork1(
            prediction_length=self.prediction_length, num_cells=self.num_cells)

        copy_parameters(trained_network, prediction_network)

        return RepresentableBlockPredictor(
            input_transform=transformation,
            prediction_net=prediction_network,
            batch_size=self.trainer.batch_size,
            freq=self.freq,
            prediction_length=self.prediction_length,
            ctx=self.trainer.ctx)


import mxnet as mx
pred_length = 6

estimator1 = MyEstimator1(prediction_length=pred_length,
                          context_length=2 * pred_length,
                          freq="1H",
                          num_cells=10,
                          trainer=Trainer(ctx="cpu",
                                          epochs=5,
                                          learning_rate=1e-3,
                                          hybridize=False,
                                          num_batches_per_epoch=20))

predictor1 = estimator1.train(morningtrain)
Exemplo n.º 7
0
    def fit(self,
            *,
            timeout: float = None,
            iterations: int = None) -> CallResult[None]:
        """ Fits NBEATS model using training data from set_training_data and hyperparameters
            
            Keyword Arguments:
                timeout {float} -- timeout, considered (default: {None})
                iterations {int} -- iterations, considered (default: {None})
            
            Returns:
                CallResult[None]
        """

        if iterations is None:
            iterations = self.hyperparams["epochs"]
            has_finished = True
        else:
            has_finished = False

        if self.hyperparams['interpretable']:
            num_stacks = 2
            num_blocks = [1]
            widths = [256, 2048]
            sharing = [True]
            expansion_coefficient_lengths = [3]
            stack_types = ["T", "S"]
            estimator_class = NBEATSEnsembleEstimatorHook
        else:
            num_stacks = 30
            num_blocks = [3]
            widths = [512]
            sharing = [False]
            expansion_coefficient_lengths = [32]
            stack_types = ["G"]
            estimator_class = NBEATSEnsembleEstimator

        estimator = estimator_class(
            freq=self._freq,
            prediction_length=self.hyperparams['prediction_length'],
            meta_context_length=[
                i
                for i in range(2, self.hyperparams['num_context_lengths'] + 2)
            ],
            meta_loss_function=['sMAPE', 'MASE', 'MAPE'],
            meta_bagging_size=self.hyperparams['num_estimators'],
            num_stacks=num_stacks,
            num_blocks=num_blocks,
            widths=widths,
            sharing=sharing,
            expansion_coefficient_lengths=expansion_coefficient_lengths,
            stack_types=stack_types,
            trainer=Trainer(
                epochs=iterations,
                learning_rate=self.hyperparams['learning_rate'],
                batch_size=self.hyperparams['training_batch_size'],
                num_batches_per_epoch=self.hyperparams['steps_per_epoch']),
        )

        logger.info(f"Fitting for {iterations} iterations")
        start_time = time.time()
        predictor = estimator.train(self._train_data)
        predictor.batch_size = self.hyperparams['inference_batch_size']
        predictor.set_aggregation_method('none')
        self._is_fit = True
        logger.info(
            f"Fit for {iterations} epochs, took {time.time() - start_time}s")

        if not os.path.isdir(self.hyperparams['weights_dir']):
            os.mkdir(self.hyperparams['weights_dir'])
        predictor.serialize(Path(self.hyperparams['weights_dir']))

        return CallResult(None, has_finished=has_finished)
Exemplo n.º 8
0
    def __init__(
        self,
        freq: str,
        prediction_length: int,
        meta_context_length: Optional[List[int]] = None,
        meta_loss_function: Optional[List[str]] = None,
        meta_bagging_size: int = 10,
        trainer: Trainer = Trainer(),
        num_stacks: int = 30,
        widths: Optional[List[int]] = None,
        num_blocks: Optional[List[int]] = None,
        num_block_layers: Optional[List[int]] = None,
        expansion_coefficient_lengths: Optional[List[int]] = None,
        sharing: Optional[List[bool]] = None,
        stack_types: Optional[List[str]] = None,
        **kwargs,
    ) -> None:
        super().__init__()

        assert (
            prediction_length > 0
        ), "The value of `prediction_length` should be > 0"

        self.freq = freq
        self.prediction_length = prediction_length

        assert meta_loss_function is None or all(
            [
                loss_function in VALID_LOSS_FUNCTIONS
                for loss_function in meta_loss_function
            ]
        ), f"Each loss function has to be one of the following: {VALID_LOSS_FUNCTIONS}."
        assert meta_context_length is None or all(
            [context_length > 0 for context_length in meta_context_length]
        ), "The value of each `context_length` should be > 0"
        assert (
            meta_bagging_size is None or meta_bagging_size > 0
        ), "The value of each `context_length` should be > 0"

        self.meta_context_length = (
            meta_context_length
            if meta_context_length is not None
            else [multiplier * prediction_length for multiplier in range(2, 8)]
        )
        self.meta_loss_function = (
            meta_loss_function
            if meta_loss_function is not None
            else VALID_LOSS_FUNCTIONS
        )
        self.meta_bagging_size = meta_bagging_size

        # The following arguments are validated in the NBEATSEstimator:
        self.trainer = trainer
        print(f"TRAINER:{str(trainer)}")
        self.num_stacks = num_stacks
        self.widths = widths
        self.num_blocks = num_blocks
        self.num_block_layers = num_block_layers
        self.expansion_coefficient_lengths = expansion_coefficient_lengths
        self.sharing = sharing
        self.stack_types = stack_types

        # Actually instantiate the different models
        self.estimators = self._estimator_factory(**kwargs)
Exemplo n.º 9
0
    def __init__(
        self,
        freq: str,
        prediction_length: int,
        context_length: Optional[int] = None,
        trainer: Trainer = Trainer(),
        dropout_rate: float = 0.1,
        cardinality: Optional[List[int]] = None,
        embedding_dimension: int = 20,
        distr_output: DistributionOutput = StudentTOutput(),
        model_dim: int = 32,
        inner_ff_dim_scale: int = 4,
        pre_seq: str = "dn",
        post_seq: str = "drn",
        act_type: str = "softrelu",
        num_heads: int = 8,
        scaling: bool = True,
        lags_seq: Optional[List[int]] = None,
        time_features: Optional[List[TimeFeature]] = None,
        use_feat_dynamic_real: bool = False,
        use_feat_static_cat: bool = False,
        num_parallel_samples: int = 100,
    ) -> None:
        super().__init__(trainer=trainer)

        assert (
            prediction_length > 0
        ), "The value of `prediction_length` should be > 0"
        assert (
            context_length is None or context_length > 0
        ), "The value of `context_length` should be > 0"
        assert dropout_rate >= 0, "The value of `dropout_rate` should be >= 0"
        assert (
            cardinality is not None or not use_feat_static_cat
        ), "You must set `cardinality` if `use_feat_static_cat=True`"
        assert cardinality is None or [
            c > 0 for c in cardinality
        ], "Elements of `cardinality` should be > 0"
        assert (
            embedding_dimension > 0
        ), "The value of `embedding_dimension` should be > 0"
        assert (
            num_parallel_samples > 0
        ), "The value of `num_parallel_samples` should be > 0"

        self.freq = freq
        self.prediction_length = prediction_length
        self.context_length = (
            context_length if context_length is not None else prediction_length
        )
        self.distr_output = distr_output
        self.dropout_rate = dropout_rate
        self.use_feat_dynamic_real = use_feat_dynamic_real
        self.use_feat_static_cat = use_feat_static_cat
        self.cardinality = cardinality if use_feat_static_cat else [1]
        self.embedding_dimension = embedding_dimension
        self.num_parallel_samples = num_parallel_samples
        self.lags_seq = (
            lags_seq
            if lags_seq is not None
            else get_lags_for_frequency(freq_str=freq)
        )
        self.time_features = (
            time_features
            if time_features is not None
            else time_features_from_frequency_str(self.freq)
        )
        self.history_length = self.context_length + max(self.lags_seq)
        self.scaling = scaling

        self.config = {
            "model_dim": model_dim,
            "pre_seq": pre_seq,
            "post_seq": post_seq,
            "dropout_rate": dropout_rate,
            "inner_ff_dim_scale": inner_ff_dim_scale,
            "act_type": act_type,
            "num_heads": num_heads,
        }

        self.encoder = TransformerEncoder(
            self.context_length, self.config, prefix="enc_"
        )
        self.decoder = TransformerDecoder(
            self.prediction_length, self.config, prefix="dec_"
        )
Exemplo n.º 10
0
    def __init__(
        self,
        freq: str,
        prediction_length: int,
        trainer: Trainer = Trainer(),
        context_length: Optional[int] = None,
        num_layers: int = 2,
        num_cells: int = 40,
        cell_type: str = "lstm",
        dropout_rate: float = 0.1,
        use_feat_dynamic_real: bool = False,
        use_feat_static_cat: bool = False,
        use_feat_static_real: bool = False,
        cardinality: Optional[List[int]] = None,
        embedding_dimension: int = 20,
        distr_output: DistributionOutput = StudentTOutput(),
        scaling: bool = True,
        lags_seq: Optional[List[int]] = None,
        time_features: Optional[List[TimeFeature]] = None,
        num_parallel_samples: int = 100,
    ) -> None:
        super().__init__(trainer=trainer)

        assert (prediction_length >
                0), "The value of `prediction_length` should be > 0"
        assert (context_length is None or context_length > 0
                ), "The value of `context_length` should be > 0"
        assert num_layers > 0, "The value of `num_layers` should be > 0"
        assert num_cells > 0, "The value of `num_cells` should be > 0"
        assert dropout_rate >= 0, "The value of `dropout_rate` should be >= 0"
        assert (cardinality is not None and use_feat_static_cat) or (
            cardinality is None and not use_feat_static_cat
        ), "You should set `cardinality` if and only if `use_feat_static_cat=True`"
        assert cardinality is None or [
            c > 0 for c in cardinality
        ], "Elements of `cardinality` should be > 0"
        assert (embedding_dimension >
                0), "The value of `embedding_dimension` should be > 0"
        assert (num_parallel_samples >
                0), "The value of `num_parallel_samples` should be > 0"

        self.freq = freq
        self.context_length = (context_length if context_length is not None
                               else prediction_length)
        self.prediction_length = prediction_length
        self.distr_output = distr_output
        self.num_layers = num_layers
        self.num_cells = num_cells
        self.cell_type = cell_type
        self.dropout_rate = dropout_rate
        self.use_feat_dynamic_real = use_feat_dynamic_real
        self.use_feat_static_cat = use_feat_static_cat
        self.use_feat_static_real = use_feat_static_real
        self.cardinality = cardinality if use_feat_static_cat else [1]
        self.embedding_dimension = embedding_dimension
        self.scaling = scaling
        self.lags_seq = (lags_seq if lags_seq is not None else
                         get_lags_for_frequency(freq_str=freq))
        self.time_features = (time_features if time_features is not None else
                              time_features_from_frequency_str(self.freq))

        self.history_length = self.context_length + max(self.lags_seq)

        self.num_parallel_samples = num_parallel_samples
Exemplo n.º 11
0
    def __init__(
        self,
        freq: str,
        prediction_length: int,
        target_dim: int,
        trainer: Trainer = Trainer(),
        # number of dimension to sample at training time
        context_length: Optional[int] = None,
        num_layers: int = 2,
        num_cells: int = 40,
        cell_type: str = "lstm",
        num_parallel_samples: int = 100,
        dropout_rate: float = 0.1,
        target_dim_sample: Optional[int] = None,
        distr_output: Optional[DistributionOutput] = None,
        rank: Optional[int] = 2,
        scaling: bool = True,
        pick_incomplete: bool = False,
        lags_seq: Optional[List[int]] = None,
        shuffle_target_dim: bool = True,
        time_features: Optional[List[TimeFeature]] = None,
        conditioning_length: int = 100,
        use_marginal_transformation: bool = False,
    ) -> None:
        super().__init__(trainer=trainer)

        assert (prediction_length >
                0), "The value of `prediction_length` should be > 0"
        assert (context_length is None or context_length > 0
                ), "The value of `context_length` should be > 0"
        assert num_layers > 0, "The value of `num_layers` should be > 0"
        assert num_cells > 0, "The value of `num_cells` should be > 0"
        assert (num_parallel_samples >
                0), "The value of `num_eval_samples` should be > 0"
        assert dropout_rate >= 0, "The value of `dropout_rate` should be >= 0"

        if distr_output is not None:
            self.distr_output = distr_output
        else:
            self.distr_output = LowrankGPOutput(rank=rank)
        self.freq = freq
        self.context_length = (context_length if context_length is not None
                               else prediction_length)
        self.prediction_length = prediction_length
        self.target_dim = target_dim
        self.target_dim_sample = (target_dim if target_dim_sample is None else
                                  min(target_dim_sample, target_dim))
        self.shuffle_target_dim = shuffle_target_dim
        self.num_layers = num_layers
        self.num_cells = num_cells
        self.cell_type = cell_type
        self.num_parallel_samples = num_parallel_samples
        self.dropout_rate = dropout_rate

        self.lags_seq = (lags_seq if lags_seq is not None else
                         get_lags_for_frequency(freq_str=freq))
        self.time_features = (time_features if time_features is not None else
                              time_features_from_frequency_str(self.freq))

        self.history_length = self.context_length + max(self.lags_seq)
        self.pick_incomplete = pick_incomplete
        self.scaling = scaling
        self.conditioning_length = conditioning_length
        self.use_marginal_transformation = use_marginal_transformation
        if self.use_marginal_transformation:
            self.output_transform = cdf_to_gaussian_forward_transform
        else:
            self.output_transform = None
Exemplo n.º 12
0
dataset_name = "m4_hourly"
dataset = get_dataset(dataset_name)
results = []
# If you want to use GPU, please set ctx="gpu(0)"
estimators = [
    # partial(
    #     DeepAREstimator,
    #     trainer=Trainer(
    #         ctx="cpu",
    #         epochs=epochs,
    #         num_batches_per_epoch=num_batches_per_epoch
    #     )
    # ),
    partial(MQCNNEstimator,
            trainer=Trainer(ctx="cpu",
                            epochs=epochs,
                            num_batches_per_epoch=num_batches_per_epoch)),
]

for estimator in estimators:
    estimator = estimator(prediction_length=dataset.metadata.prediction_length,
                          freq=dataset.metadata.freq)
    predictor = estimator.train(dataset.train)
    forecast_it, ts_it = make_evaluation_predictions(dataset.test,
                                                     predictor=predictor,
                                                     num_eval_samples=100)

    agg_metrics, item_metrics = Evaluator()(ts_it,
                                            forecast_it,
                                            num_series=len(dataset.test))
Exemplo n.º 13
0
def train_predictor(region_df_dict,
                    end_train_date,
                    regions_list,
                    max_epochs,
                    learning_rate,
                    target_col,
                    feat_dynamic_cols=None,
                    fixed_seeds=False):
    if fixed_seeds:
        # Seeds setting taken from
        # https://gluon-ts.mxnet.io/examples/extended_forecasting_tutorial/extended_tutorial.html
        mx.random.seed(0)
        np.random.seed(0)

    estimator = DeepAREstimator(
        freq=md.FREQ,
        prediction_length=md.NB_HOURS_PRED,
        trainer=Trainer(epochs=max_epochs,
                        learning_rate=learning_rate,
                        learning_rate_decay_factor=md.LR_DECAY_FACTOR),
        use_feat_dynamic_real=feat_dynamic_cols is not None)
    if feat_dynamic_cols is not None:

        training_data = ListDataset([{
            "item_id":
            region,
            "start":
            region_df_dict[region].index[0],
            "target":
            region_df_dict[region][target_col][:end_train_date],
            "feat_dynamic_real": [
                region_df_dict[region][feat_dynamic_col][:end_train_date]
                for feat_dynamic_col in feat_dynamic_cols
            ]
        } for region in regions_list],
                                    freq=md.FREQ)
    else:
        training_data = ListDataset(
            [{
                "item_id": region,
                "start": region_df_dict[region].index[0],
                "target": region_df_dict[region][target_col][:end_train_date]
            } for region in regions_list],
            freq=md.FREQ)
    model_path = predictor_path(region_df_dict,
                                regions_list,
                                max_epochs,
                                learning_rate,
                                feat_dynamic_cols,
                                fixed_seeds=fixed_seeds)
    model_dir, model_name = os.path.split(model_path)
    logging.info("Training deepar model {}".format(model_name))
    logging.getLogger().setLevel(logging.WARNING)
    predictor = estimator.train(training_data=training_data)
    logging.getLogger().setLevel(logging.INFO)

    logging.info("Saving model with {} epochs and learning rate of {}".format(
        max_epochs, learning_rate))
    with open(model_path, "wb") as file:
        pickle.dump(predictor, file)

    return predictor
Exemplo n.º 14
0
    def fit(self, df, future_regressor = []):
        """Train algorithm given data supplied.
        
        Args:
            df (pandas.DataFrame): Datetime Indexed
        """
        df = self.basic_profile(df)
        
        try:
            from mxnet.random import seed as mxnet_seed
            mxnet_seed(self.random_seed)
        except Exception:
            pass

        gluon_train = df.transpose()
        self.train_index = gluon_train.index

        gluon_freq = str(self.frequency).split('-')[0]
        if gluon_freq in ["MS", "1MS"]:
            gluon_freq = "M"

        if int(self.verbose) > 1:
            print(f"Gluon Frequency is {gluon_freq}")

        if str(self.context_length).replace('.', '').isdigit():
            self.gluon_context_length = int(float(self.context_length))
        elif 'forecastlength' in str(self.context_length).lower():
            len_int = int([x for x in str(self.context_length) if x.isdigit()][0])
            self.gluon_context_length = int(len_int * self.forecast_length)
        else:
            self.gluon_context_length = 2 * self.forecast_length
            self.context_length = '2ForecastLength'
        ts_metadata = {'num_series': len(gluon_train.index),
                              'freq': gluon_freq,
                              'gluon_start': [gluon_train.columns[0] for _ in range(len(gluon_train.index))],
                              'context_length': self.gluon_context_length,
                              'forecast_length': self.forecast_length
                             }
        self.test_ds = ListDataset([{FieldName.TARGET: target, 
                                 FieldName.START: start
                                 }
                                for (target, start) in zip(
                                        gluon_train.values, 
                                        ts_metadata['gluon_start']
                                        )],
                                freq=ts_metadata['freq']
                                )
        if self.gluon_model == 'DeepAR':
            from gluonts.model.deepar import DeepAREstimator
            estimator = DeepAREstimator(freq=ts_metadata['freq'],
                                        context_length=ts_metadata['context_length'],
                                        prediction_length=ts_metadata['forecast_length'] 
                                        ,trainer=Trainer(epochs=self.epochs,
                                                         learning_rate=self.learning_rate)
                                        )
        elif self.gluon_model == 'NPTS':
            from gluonts.model.npts import NPTSEstimator
            estimator = NPTSEstimator(freq=ts_metadata['freq'],
                                      context_length=ts_metadata['context_length'],
                                      prediction_length=ts_metadata['forecast_length'])
        
        elif self.gluon_model == 'MQCNN':
            from gluonts.model.seq2seq import MQCNNEstimator
            estimator = MQCNNEstimator(freq=ts_metadata['freq'],
                                        context_length=ts_metadata['context_length'],
                                        prediction_length=ts_metadata['forecast_length'] 
                                        ,trainer=Trainer(epochs=self.epochs,
                                                         learning_rate=self.learning_rate)
                                        )
        
        elif self.gluon_model == 'SFF':
            from gluonts.model.simple_feedforward import SimpleFeedForwardEstimator
            estimator = SimpleFeedForwardEstimator(
                prediction_length=ts_metadata['forecast_length'],
                context_length=ts_metadata['context_length'],
                freq=ts_metadata['freq'],
                trainer=Trainer(epochs=self.epochs,
                                learning_rate=self.learning_rate, 
                                hybridize=False, num_batches_per_epoch=100
                               ))
        
        elif self.gluon_model == 'Transformer':
            from gluonts.model.transformer import TransformerEstimator
            estimator = TransformerEstimator(
                prediction_length=ts_metadata['forecast_length'],
                context_length=ts_metadata['context_length'],
                freq=ts_metadata['freq'],
                trainer=Trainer(epochs=self.epochs,
                                learning_rate=self.learning_rate))
        
        elif self.gluon_model == 'DeepState':
                    from gluonts.model.deepstate import DeepStateEstimator
                    estimator = DeepStateEstimator(
                        prediction_length=ts_metadata['forecast_length'],
                        past_length=ts_metadata['context_length'],
                        freq=ts_metadata['freq'],
                        use_feat_static_cat=False,
                        cardinality = [1],
                        trainer=Trainer(ctx='cpu', epochs=self.epochs,
                                        learning_rate=self.learning_rate))
        
        elif self.gluon_model == 'DeepFactor':
                from gluonts.model.deep_factor import DeepFactorEstimator
                estimator = DeepFactorEstimator(freq=ts_metadata['freq'],
                                context_length=ts_metadata['context_length'],
                                prediction_length=ts_metadata['forecast_length'] 
                                ,trainer=Trainer(epochs=self.epochs,
                                                 learning_rate=self.learning_rate)
                                )
        
        elif self.gluon_model == 'WaveNet':
            # Usually needs more epochs/training iterations than other models do
            from gluonts.model.wavenet import WaveNetEstimator
            estimator = WaveNetEstimator(freq=ts_metadata['freq'],
                                        prediction_length=ts_metadata['forecast_length'] 
                                        ,trainer=Trainer(epochs=self.epochs,
                                                 learning_rate=self.learning_rate)
                                        )
        else:
            raise ValueError("'gluon_model' not recognized.")
        
        self.GluonPredictor = estimator.train(self.test_ds)
        self.ts_metadata = ts_metadata
        self.fit_runtime = datetime.datetime.now() - self.startTime
        return self
Exemplo n.º 15
0
# npts predictor
npts_predictor = NPTSPredictor(freq=freq,
                               prediction_length=prediction_length,
                               context_length=300, kernel_type='uniform',
                               use_seasonal_model=False)
npts_forecast = list(npts_predictor.predict(train_ds))


# deep ar
distr = PiecewiseLinearOutput(7)
deep_ar_trainer = Trainer(
    ctx=mx.context.gpu() if is_gpu & args.use_cuda else mx.context.cpu(),
    batch_size=128,
    learning_rate=1e-2,
    epochs=20,
    num_batches_per_epoch=args.number_of_batches_per_epoch,
    clip_gradient=5.48481845049343,
    weight_decay=0.001,
    hybridize=False)

deep_ar_estimator = DeepAREstimator(
    prediction_length=prediction_length,
    context_length=prediction_length*2,
    num_layers=2,
    num_cells=128,
    cell_type='gru',
    dropout_rate=0.1,
    scaling=True,
    lags_seq=np.arange(1, 1+1).tolist(),
    freq=freq,
def train_models(train,
                 models,
                 forecast_len,
                 full_df=None,
                 seasonality="infer_from_data",
                 in_sample=None,
                 freq=None,
                 GPU=None):

    seasons = select_seasonality(train, seasonality)

    periods = select_seasonality(train, 'periodocity')

    models_dict = {}
    for m in models:
        if in_sample:
            print(
                "Model {} is being trained for in sample prediction".format(m))
        else:
            print("Model {} is being trained for out of sample prediction".
                  format(m))
        if m == "ARIMA":
            models_dict[m] = pm.auto_arima(train, seasonal=True, m=seasons)
        if m == "Prophet":
            if freq == "D":
                model = Prophet(daily_seasonality=True)
            else:
                model = Prophet()
            models_dict[m] = model.fit(prophet_dataframe(train))
        if m == "HWAAS":
            try:
                models_dict[m] = ExponentialSmoothing(
                    train,
                    seasonal_periods=seasons,
                    trend='add',
                    seasonal='add',
                    damped=True).fit(use_boxcox=True)
            except:
                models_dict[m] = ExponentialSmoothing(
                    train,
                    seasonal_periods=seasons,
                    trend='add',
                    seasonal='add',
                    damped=True).fit(use_boxcox=False)
        if m == "HWAMS":
            try:
                models_dict[m] = ExponentialSmoothing(
                    train,
                    seasonal_periods=seasons,
                    trend='add',
                    seasonal='mul',
                    damped=True).fit(use_boxcox=True)
            except:
                try:
                    models_dict[m] = ExponentialSmoothing(
                        train,
                        seasonal_periods=seasons,
                        trend='add',
                        seasonal='mul',
                        damped=True).fit(use_boxcox=False)
                except:
                    models_dict[m] = ExponentialSmoothing(
                        train,
                        seasonal_periods=seasons,
                        trend=None,
                        seasonal='add').fit(use_boxcox=False)

        # if m=="HOLT":
        #   models_dict["HOLT"] = Holt(train,exponential=True).fit()
        if m == "PYAF":
            model = autof()
            model.train(iInputDS=train.reset_index(),
                        iTime='Date',
                        iSignal='Target',
                        iHorizon=len(train))  # bad coding to have horison here
            models_dict[m] = model.forecast(iInputDS=train.reset_index(),
                                            iHorizon=forecast_len)
        if m == "Gluonts":
            freqed = pd.infer_freq(train.index)
            if freqed == "MS":
                freq = "M"
            else:
                freq = freqed
            estimator = DeepAREstimator(
                freq=freq,
                prediction_length=forecast_len,
                trainer=Trainer(epochs=6,
                                ctx='gpu'))  #use_feat_dynamic_real=True
            if GPU:
                models_dict[m] = estimator.train(
                    training_data=gluonts_dataframe(train))
            else:
                models_dict[m] = estimator.train(
                    training_data=gluonts_dataframe(train))
        if m == "NBEATS":

            if GPU:
                device = torch.device('cuda')
            else:
                device = torch.device('cpu')

            if os.path.isfile(CHECKPOINT_NAME):
                os.remove(CHECKPOINT_NAME)
            stepped = 35
            batch_size = 10
            if in_sample:
                x_train, y_train, x_test, y_test, net, norm_constant = nbeats_dataframe(
                    full_df, forecast_len, in_sample=True, device=device)
                optimiser = optim.Adam(net.parameters())
                data = data_generator(x_train, y_train, batch_size)
                #test_losses = []
                for r in range(stepped):

                    train_100_grad_steps(data, device, net,
                                         optimiser)  #test_losses
                models_dict[m] = {}
                models_dict[m]["model"] = net
                models_dict[m]["x_test"] = x_test
                models_dict[m]["y_test"] = y_test
                models_dict[m]["constant"] = norm_constant

            else:  # if out_sample train is df

                x_train, y_train, net, norm_constant = nbeats_dataframe(
                    full_df, forecast_len, in_sample=False, device=device)

                batch_size = 10  # greater than 4 for viz
                optimiser = optim.Adam(net.parameters())
                data = data_generator(x_train, y_train, batch_size)
                stepped = 5
                #test_losses = []
                for r in range(stepped):
                    # _, forecast = net(torch.tensor(x_train, dtype=torch.float)) ### Not Used
                    # if GPU:
                    #   p = forecast.detach().numpy()                               ### Not Used
                    # else:
                    #   p = forecast.detach().numpy()                               ### Not Used
                    train_100_grad_steps(data, device, net,
                                         optimiser)  #test_losses
                models_dict[m] = {}
                models_dict[m]["model"] = net
                models_dict[m]["tuple"] = (x_train, y_train, net,
                                           norm_constant)

        # if m=="TBA":
        #   bat = TBATS(use_arma_errors=False,use_box_cox=True)
        #   models_dict[m] = bat.fit(train)
        if m == "TATS":
            bat = TBATS(seasonal_periods=list(
                get_unique_N(season_list(train), 1)),
                        use_arma_errors=False,
                        use_trend=True)
            models_dict[m] = bat.fit(train)
        if m == "TBAT":
            bat = TBATS(use_arma_errors=False,
                        use_box_cox=True,
                        use_trend=True)
            models_dict[m] = bat.fit(train)
        if m == "TBATS1":
            bat = TBATS(seasonal_periods=[seasons],
                        use_arma_errors=False,
                        use_box_cox=True,
                        use_trend=True)
            models_dict[m] = bat.fit(train)
        if m == "TBATP1":
            bat = TBATS(seasonal_periods=[periods],
                        use_arma_errors=False,
                        use_box_cox=True,
                        use_trend=True)
            models_dict[m] = bat.fit(train)
        if m == "TBATS2":
            bat = TBATS(seasonal_periods=list(
                get_unique_N(season_list(train), 2)),
                        use_arma_errors=False,
                        use_box_cox=True,
                        use_trend=True)
            models_dict[m] = bat.fit(train)

        # if m=="ProphetGluonts":
        #   freqed = pd.infer_freq(train.index)
        #   if freqed=="MS":
        #     freq= "M"
        #   else:
        #     freq= freqed
        #   models_dict["ProphetGluonts"] = ProphetPredictor(freq=freq, prediction_length=forecast_len) #use_feat_dynamic_real=True
        #   models_dict["ProphetGluonts"] = list(models_dict["ProphetGluonts"])

    return models_dict, seasons
import pyximport; pyximport.install()
import sys
import pandas as pd
from GTS_new_data import load_dataset
from pathlib import Path
from gluonts.model.predictor import Predictor
from gluonts.model.deepar import DeepAREstimator
from gluonts.trainer import Trainer
from gluonts.dataset.common import ListDataset

if __name__ == "__main__":
    filename = sys.argv[1]
    df = load_dataset(filename)

    training_data = ListDataset(
        [{"start": df.index[1], "target": df.iloc[:-12].values[:, 1]}],
        freq = "1min"
    )

    estimator = DeepAREstimator(freq="1min", prediction_length=12, trainer=Trainer(epochs=100))
    predictor = estimator.train(training_data=training_data)
    predictor.serialize(Path("."))

Exemplo n.º 18
0
    def __init__(
        self,
        freq: str,
        prediction_length: int,
        trainer: Trainer = Trainer(),
        context_length: Optional[int] = None,
        num_layers: int = 2,
        num_cells: int = 40,
        cell_type: str = "lstm",
        dropout_rate: float = 0.1,
        use_feat_dynamic_real: bool = False,
        use_feat_static_cat: bool = False,
        use_feat_static_real: bool = False,
        cardinality: Optional[List[int]] = None,
        embedding_dimension: Optional[List[int]] = None,
        distr_output: DistributionOutput = StudentTOutput(),
        scaling: bool = True,
        lags_seq: Optional[List[int]] = None,
        time_features: Optional[List[TimeFeature]] = None,
        num_parallel_samples: int = 100,
        imputation_method: Optional[MissingValueImputation] = None,
        dtype: DType = np.float32,
    ) -> None:
        super().__init__(trainer=trainer, dtype=dtype)

        assert (
            prediction_length > 0
        ), "The value of `prediction_length` should be > 0"
        assert (
            context_length is None or context_length > 0
        ), "The value of `context_length` should be > 0"
        assert num_layers > 0, "The value of `num_layers` should be > 0"
        assert num_cells > 0, "The value of `num_cells` should be > 0"
        assert dropout_rate >= 0, "The value of `dropout_rate` should be >= 0"
        assert (cardinality and use_feat_static_cat) or (
            not (cardinality or use_feat_static_cat)
        ), "You should set `cardinality` if and only if `use_feat_static_cat=True`"
        assert cardinality is None or all(
            [c > 0 for c in cardinality]
        ), "Elements of `cardinality` should be > 0"
        assert embedding_dimension is None or all(
            [e > 0 for e in embedding_dimension]
        ), "Elements of `embedding_dimension` should be > 0"
        assert (
            num_parallel_samples > 0
        ), "The value of `num_parallel_samples` should be > 0"

        self.freq = freq
        self.context_length = (
            context_length if context_length is not None else prediction_length
        )
        self.prediction_length = prediction_length
        self.distr_output = distr_output
        self.distr_output.dtype = dtype
        self.num_layers = num_layers
        self.num_cells = num_cells
        self.cell_type = cell_type
        self.dropout_rate = dropout_rate
        self.use_feat_dynamic_real = use_feat_dynamic_real
        self.use_feat_static_cat = use_feat_static_cat
        self.use_feat_static_real = use_feat_static_real
        self.cardinality = (
            cardinality if cardinality and use_feat_static_cat else [1]
        )
        self.embedding_dimension = (
            embedding_dimension
            if embedding_dimension is not None
            else [min(50, (cat + 1) // 2) for cat in self.cardinality]
        )
        self.scaling = scaling
        self.lags_seq = (
            lags_seq
            if lags_seq is not None
            else get_lags_for_frequency(freq_str=freq)
        )
        self.time_features = (
            time_features
            if time_features is not None
            else time_features_from_frequency_str(self.freq)
        )

        self.history_length = self.context_length + max(self.lags_seq)

        self.num_parallel_samples = num_parallel_samples

        self.imputation_method = (
            imputation_method
            if imputation_method is not None
            else DummyValueImputation(self.distr_output.value_in_support)
        )
Exemplo n.º 19
0
# Standard library imports
from functools import partial

# Third-party imports
import pytest

# First-party imports
from gluonts.model.deepstate import DeepStateEstimator
from gluonts.testutil.dummy_datasets import make_dummy_datasets_with_features
from gluonts.trainer import Trainer

common_estimator_hps = dict(
    freq="D",
    prediction_length=3,
    trainer=Trainer(epochs=3, num_batches_per_epoch=2, batch_size=1),
    past_length=10,
)


@pytest.mark.parametrize(
    "estimator, datasets",
    [
        # No features
        (
            partial(
                DeepStateEstimator,
                **common_estimator_hps,
                cardinality=[1],
                use_feat_static_cat=False,
            ),
Exemplo n.º 20
0
    def __init__(
            self,
            freq: str,
            prediction_length: int,
            context_length: Optional[int] = None,
            use_feat_dynamic_real: bool = False,
            use_feat_static_cat: bool = False,
            cardinality: List[int] = None,
            embedding_dimension: List[int] = None,
            add_time_feature: bool = False,
            add_age_feature: bool = False,
            enable_decoder_dynamic_feature: bool = False,
            seed: Optional[int] = None,
            decoder_mlp_dim_seq: Optional[List[int]] = None,
            channels_seq: Optional[List[int]] = None,
            dilation_seq: Optional[List[int]] = None,
            kernel_size_seq: Optional[List[int]] = None,
            use_residual: bool = True,
            quantiles: Optional[List[float]] = None,
            trainer: Trainer = Trainer(),
            scaling: bool = False,
    ) -> None:

        assert (prediction_length >
                0), f"Invalid prediction length: {prediction_length}."
        assert decoder_mlp_dim_seq is None or all(
            d > 0 for d in decoder_mlp_dim_seq
        ), "Elements of `mlp_hidden_dimension_seq` should be > 0"
        assert channels_seq is None or all(
            d > 0
            for d in channels_seq), "Elements of `channels_seq` should be > 0"
        assert dilation_seq is None or all(
            d > 0
            for d in dilation_seq), "Elements of `dilation_seq` should be > 0"
        # TODO: add support for kernel size=1
        assert kernel_size_seq is None or all(
            d > 1 for d in
            kernel_size_seq), "Elements of `kernel_size_seq` should be > 0"
        assert quantiles is None or all(
            0 <= d <= 1 for d in
            quantiles), "Elements of `quantiles` should be >= 0 and <= 1"

        self.decoder_mlp_dim_seq = (decoder_mlp_dim_seq if decoder_mlp_dim_seq
                                    is not None else [30])
        self.channels_seq = (channels_seq
                             if channels_seq is not None else [30, 30, 30])
        self.dilation_seq = (dilation_seq
                             if dilation_seq is not None else [1, 3, 5])
        self.kernel_size_seq = (kernel_size_seq
                                if kernel_size_seq is not None else [7, 3, 3])
        self.quantiles = (quantiles if quantiles is not None else
                          [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9])

        assert (len(self.channels_seq) == len(self.dilation_seq) == len(
            self.kernel_size_seq)), (
                f"mismatch CNN configurations: {len(self.channels_seq)} vs. "
                f"{len(self.dilation_seq)} vs. {len(self.kernel_size_seq)}")

        if seed:
            np.random.seed(seed)
            mx.random.seed(seed)

        # `use_static_feat` and `use_dynamic_feat` always True because network
        # always receives input; either from the input data or constants
        encoder = HierarchicalCausalConv1DEncoder(
            dilation_seq=self.dilation_seq,
            kernel_size_seq=self.kernel_size_seq,
            channels_seq=self.channels_seq,
            use_residual=use_residual,
            use_static_feat=True,
            use_dynamic_feat=True,
            prefix="encoder_",
        )

        decoder = ForkingMLPDecoder(
            dec_len=prediction_length,
            final_dim=self.decoder_mlp_dim_seq[-1],
            hidden_dimension_sequence=self.decoder_mlp_dim_seq[:-1],
            prefix="decoder_",
        )

        quantile_output = QuantileOutput(self.quantiles)

        super().__init__(
            encoder=encoder,
            decoder=decoder,
            quantile_output=quantile_output,
            freq=freq,
            prediction_length=prediction_length,
            context_length=context_length,
            use_feat_dynamic_real=use_feat_dynamic_real,
            use_feat_static_cat=use_feat_static_cat,
            enable_decoder_dynamic_feature=enable_decoder_dynamic_feature,
            cardinality=cardinality,
            embedding_dimension=embedding_dimension,
            add_time_feature=add_time_feature,
            add_age_feature=add_age_feature,
            trainer=trainer,
            scaling=scaling,
        )
Exemplo n.º 21
0
def forecast_dataset(dataset,
                     epochs=100,
                     learning_rate=1e-3,
                     num_samples=100,
                     model="SimpleFeedForward",
                     r_method="ets",
                     alpha=0,
                     distrib="Gaussian"):
    if distrib == "Gaussian":
        distr_output = GaussianOutput()
    elif distrib == "Laplace":
        distr_output = LaplaceOutput()
    elif distrib == "PiecewiseLinear":
        distr_output = PiecewiseLinearOutput(num_pieces=2)
    elif distrib == "Uniform":
        distr_output = UniformOutput()
    elif distrib == "Student":
        distr_output = StudentTOutput()
    else:
        distr_output = None

    if model != "GaussianProcess":
        ctx = mx.Context("gpu")
    else:
        ctx = mx.Context("cpu")

    # Trainer
    trainer = Trainer(epochs=epochs,
                      learning_rate=learning_rate,
                      num_batches_per_epoch=100,
                      ctx=ctx,
                      hybridize=True if model[0] != "c" else False)

    # Estimator (if machine learning model)
    if model == "SimpleFeedForward":  # 10s / epochs for context 60*24
        estimator = SimpleFeedForwardEstimator(
            num_hidden_dimensions=[10],
            prediction_length=dataset.prediction_length,
            context_length=dataset.context_length,
            freq=dataset.freq,
            trainer=trainer,
            distr_output=distr_output)
    elif model == "cSimpleFeedForward":  # 10s / epochs for context 60*24
        estimator = CustomSimpleFeedForwardEstimator(
            prediction_length=dataset.prediction_length,
            context_length=dataset.context_length,
            freq=dataset.freq,
            trainer=trainer,
            num_cells=40,
            alpha=alpha,
            distr_output=distr_output,
            distr_output_type=distrib)
    elif model == "CanonicalRNN":  # 80s /epochs for context 60*24, idem for 60*1
        estimator = canonical.CanonicalRNNEstimator(
            freq=dataset.freq,
            context_length=dataset.context_length,
            prediction_length=dataset.prediction_length,
            trainer=trainer,
            distr_output=distr_output,
        )
    elif model == "DeepAr":
        estimator = deepar.DeepAREstimator(
            freq=dataset.freq,
            context_length=dataset.context_length,
            prediction_length=dataset.prediction_length,
            trainer=trainer,
            distr_output=distr_output,
        )
    elif model == "DeepFactor":  # 120 s/epochs if one big time serie, 1.5s if 183 time series
        estimator = deep_factor.DeepFactorEstimator(
            freq=dataset.freq,
            context_length=dataset.context_length,
            prediction_length=dataset.prediction_length,
            trainer=trainer,
            distr_output=distr_output,
        )
    elif model == "DeepState":  # Very slow on cpu
        estimator = deepstate.DeepStateEstimator(
            freq=dataset.freq,
            prediction_length=dataset.prediction_length,
            trainer=trainer,
            cardinality=list([1]),
            use_feat_static_cat=False)
    elif model == "GaussianProcess":  # CPU / GPU problem
        estimator = gp_forecaster.GaussianProcessEstimator(
            freq=dataset.freq,
            prediction_length=dataset.prediction_length,
            trainer=trainer,
            cardinality=1)
    elif model == "NPTS":
        estimator = npts.NPTSEstimator(
            freq=dataset.freq, prediction_length=dataset.prediction_length)
    elif model == "MQCNN":
        estimator = seq2seq.MQCNNEstimator(
            prediction_length=dataset.prediction_length,
            freq=dataset.freq,
            context_length=dataset.context_length,
            trainer=trainer,
            quantiles=list([0.005, 0.05, 0.25, 0.5, 0.75, 0.95, 0.995]))
    elif model == "MQRNN":
        estimator = seq2seq.MQRNNEstimator(
            prediction_length=dataset.prediction_length,
            freq=dataset.freq,
            context_length=dataset.context_length,
            trainer=trainer,
            quantiles=list([0.005, 0.05, 0.25, 0.5, 0.75, 0.95, 0.995]))
    elif model == "RNN2QR":  # Must be investigated
        estimator = seq2seq.RNN2QRForecaster(
            prediction_length=dataset.prediction_length,
            freq=dataset.freq,
            context_length=dataset.context_length,
            trainer=trainer,
            cardinality=dataset.cardinality,
            embedding_dimension=1,
            encoder_rnn_layer=1,
            encoder_rnn_num_hidden=1,
            decoder_mlp_layer=[1],
            decoder_mlp_static_dim=1)
    elif model == "SeqToSeq":  # Must be investigated
        estimator = seq2seq.Seq2SeqEstimator(
            prediction_length=dataset.prediction_length,
            freq=dataset.freq,
            context_length=dataset.context_length,
            trainer=trainer,
            cardinality=[1],
            embedding_dimension=1,
            decoder_mlp_layer=[1],
            decoder_mlp_static_dim=1,
            encoder=Seq2SeqEncoder())
    elif model == "Transformer":  # Make the computer lag the first time
        estimator = transformer.TransformerEstimator(
            prediction_length=dataset.prediction_length,
            freq=dataset.freq,
            context_length=dataset.context_length,
            trainer=trainer)

    else:
        estimator = None

    # Predictor (directly if non machine learning model and from estimator if machine learning)
    if model == "Prophet":
        predictor = prophet.ProphetPredictor(
            freq=dataset.freq,
            prediction_length=dataset.prediction_length,
        )
    elif model == "R":
        predictor = r_forecast.RForecastPredictor(
            freq=dataset.freq,
            prediction_length=dataset.prediction_length,
            method_name=r_method)
    elif model == "SeasonalNaive":
        predictor = seasonal_naive.SeasonalNaivePredictor(
            freq=dataset.freq,
            prediction_length=dataset.prediction_length,
            season_length=24)
    else:
        predictor = estimator.train(dataset.train_ds)
        if model[0] != "c":
            predictor.serialize(Path("temp"))
            predictor = Predictor.deserialize(
                Path("temp"), ctx=mx.cpu(0))  # fix for deepstate

    # Evaluate
    forecast_it, ts_it = make_evaluation_predictions(
        dataset=dataset.test_ds,  # test dataset
        predictor=predictor,  # predictor
        num_samples=num_samples,  # num of sample paths we want for evaluation
    )

    return list(forecast_it), list(ts_it)
Exemplo n.º 22
0
model2 = module.load(load_pars ={ 'path': out_pars['path'] +"/model/"})

from gluonts.model.deepar import DeepAREstimator
from gluonts.distribution.neg_binomial import NegativeBinomialOutput
from gluonts.trainer import Trainer

estimator = DeepAREstimator(
    prediction_length     = 12,
    freq                  = "D",
    distr_output          = NegativeBinomialOutput(),
    use_feat_static_cat   =True,
    use_feat_dynamic_real =True,
    cardinality           = [3049, 7, 3, 10, 3],
    trainer               = Trainer(
    learning_rate         = 1e-3,
    epochs                = 1,
    num_batches_per_epoch = 10,
    batch_size            = 10
    )
)

predictor = estimator.train(TD.train)

from gluonts.evaluation.backtest import make_evaluation_predictions
from gluonts.evaluation import Evaluator

forecast_it, ts_it = make_evaluation_predictions(
    dataset=TD.test,
    predictor=predictor,
    num_samples=100
)
Exemplo n.º 23
0
def assert_invalid_param(param_name: str, param_values: List[Any],
                         exp_msg: str) -> None:
    for x in param_values:
        with pytest.raises(AssertionError) as excinfo:
            Trainer(**{param_name: x})
            assert exp_msg in str(excinfo.value)
Exemplo n.º 24
0
    estimator = NBEATSEnsembleEstimator(
        prediction_length=prediction_length,
        #context_length=7*prediction_length,
        meta_bagging_size=3,  # 3, ## Change back to 10 after testing??
        meta_context_length=[prediction_length * mlp for mlp in [3, 5, 7]
                             ],  ## Change back to (2,7) // 3,5,7
        meta_loss_function=['sMAPE'
                            ],  ## Change back to all three MAPE, MASE ...
        num_stacks=30,
        widths=[512],
        freq="D",
        trainer=Trainer(
            learning_rate=6e-4,
            #clip_gradient=1.0,
            epochs=12,  #10
            num_batches_per_epoch=1000,
            batch_size=16
            #ctx=mx.context.gpu()
        ))

# In[ ]:

if SUBMISSION:
    predictor = estimator.train(train_ds)
else:
    predictor = estimator.train(train_ds, test_ds)

# # Analyze forcasts - Errors and Visual inspection
#

# In[ ]:
Exemplo n.º 25
0
train_data = common.FileDataset(
    "/home/root/mxnetTS/GluonTS-Learning-in-Action/chapter-2/data/train",
    freq="H")
test_data = common.FileDataset(
    "/home/root/mxnetTS/GluonTS-Learning-in-Action/chapter-2/data/val",
    freq="H")

estimator = deepar.DeepAREstimator(prediction_length=24,
                                   context_length=100,
                                   use_feat_static_cat=True,
                                   use_feat_dynamic_real=True,
                                   num_parallel_samples=100,
                                   cardinality=[2, 1],
                                   freq="H",
                                   trainer=Trainer(ctx="cpu",
                                                   epochs=200,
                                                   learning_rate=1e-3))
predictor = estimator.train(training_data=train_data)

for test_entry, forecast in zip(test_data, predictor.predict(test_data)):
    to_pandas(test_entry)[-100:].plot(figsize=(12, 5), linewidth=2)
    forecast.plot(color='g', prediction_intervals=[50.0, 90.0])
plt.grid(which='both')
plt.legend([
    "past observations", "median prediction", "90% prediction interval",
    "50% prediction interval"
])
plt.show()

prediction = next(predictor.predict(test_data))
print(prediction.mean)
Exemplo n.º 26
0
    def __init__(
        self,
        freq: str,
        prediction_length: int,
        trainer: Trainer = Trainer(
            learning_rate=0.01,
            epochs=200,
            num_batches_per_epoch=50,
            hybridize=False,
        ),
        cardinality: List[int] = [1],
        seasonality: Optional[int] = None,
        embedding_dimension: int = 5,
        num_bins: int = 1024,
        hybridize_prediction_net: bool = False,
        n_residue=24,
        n_skip=32,
        dilation_depth: Optional[int] = None,
        n_stacks: int = 1,
        train_window_length: Optional[int] = None,
        temperature: float = 1.0,
        act_type: str = "elu",
        num_parallel_samples: int = 200,
    ) -> None:
        """
        Model with Wavenet architecture and quantized target.

        :param freq:
        :param prediction_length:
        :param trainer:
        :param num_eval_samples:
        :param cardinality:
        :param embedding_dimension:
        :param num_bins: Number of bins used for quantization of signal
        :param hybridize_prediction_net:
        :param n_residue: Number of residual channels in wavenet architecture
        :param n_skip: Number of skip channels in wavenet architecture
        :param dilation_depth: number of dilation layers in wavenet architecture.
          If set to None, dialation_depth is set such that the receptive length is at
          least as long as 2 * seasonality for the frequency and at least
          2 * prediction_length.
        :param n_stacks: Number of dilation stacks in wavenet architecture
        :param train_window_length: Length of windows used for training. This should be
          longer than prediction length. Larger values result in more efficient
          reuse of computations for convolutions.
        :param temperature: Temparature used for sampling from softmax distribution.
          For temperature = 1.0 sampling is according to estimated probability.
        :param act_type: Activation type used after before output layer.
          Can be any of
              'elu', 'relu', 'sigmoid', 'tanh', 'softrelu', 'softsign'
        """

        super().__init__(trainer=trainer)

        self.freq = freq
        self.prediction_length = prediction_length
        self.cardinality = cardinality
        self.embedding_dimension = embedding_dimension
        self.num_bins = num_bins
        self.hybridize_prediction_net = hybridize_prediction_net

        self.n_residue = n_residue
        self.n_skip = n_skip
        self.n_stacks = n_stacks
        self.train_window_length = (train_window_length if train_window_length
                                    is not None else prediction_length)
        self.temperature = temperature
        self.act_type = act_type
        self.num_parallel_samples = num_parallel_samples

        seasonality = (_get_seasonality(
            self.freq,
            {
                "H": 7 * 24,
                "D": 7,
                "W": 52,
                "M": 12,
                "B": 7 * 5,
                "min": 24 * 60,
            },
        ) if seasonality is None else seasonality)

        goal_receptive_length = max(2 * seasonality,
                                    2 * self.prediction_length)
        if dilation_depth is None:
            d = 1
            while (WaveNet.get_receptive_field(dilation_depth=d,
                                               n_stacks=n_stacks) <
                   goal_receptive_length):
                d += 1
            self.dilation_depth = d
        else:
            self.dilation_depth = dilation_depth
        self.context_length = WaveNet.get_receptive_field(
            dilation_depth=self.dilation_depth, n_stacks=n_stacks)
        self.logger = logging.getLogger(__name__)
        self.logger.info(
            f"Using dilation depth {self.dilation_depth} and receptive field length {self.context_length}"
        )
Exemplo n.º 27
0
    def __init__(
        self,
        freq: str,
        prediction_length: int,
        add_trend: bool = False,
        past_length: Optional[int] = None,
        num_periods_to_train: int = 4,
        trainer: Trainer = Trainer(epochs=25, hybridize=False),
        num_layers: int = 2,
        num_cells: int = 40,
        cell_type: str = "lstm",
        num_eval_samples: int = 100,
        dropout_rate: float = 0.1,
        use_feat_dynamic_real: bool = False,
        use_feat_static_cat: bool = False,
        cardinality: Optional[List[int]] = None,
        embedding_dimension: Optional[List[int]] = None,
        issm: Optional[ISSM] = None,
        scaling: bool = True,
        time_features: Optional[List[TimeFeature]] = None,
    ) -> None:
        super().__init__(trainer=trainer)

        assert (
            prediction_length > 0
        ), "The value of `prediction_length` should be > 0"
        assert (
            past_length is None or past_length > 0
        ), "The value of `past_length` should be > 0"
        assert num_layers > 0, "The value of `num_layers` should be > 0"
        assert num_cells > 0, "The value of `num_cells` should be > 0"
        assert (
            num_eval_samples > 0
        ), "The value of `num_eval_samples` should be > 0"
        assert dropout_rate >= 0, "The value of `dropout_rate` should be >= 0"
        assert (cardinality is not None and use_feat_static_cat) or (
            cardinality is None and not use_feat_static_cat
        ), "You should set `cardinality` if and only if `use_feat_static_cat=True`"
        assert cardinality is None or [
            c > 0 for c in cardinality
        ], "Elements of `cardinality` should be > 0"
        assert embedding_dimension is None or [
            e > 0 for e in embedding_dimension
        ], "Elements of `embedding_dimension` should be > 0"

        self.freq = freq
        self.past_length = (
            past_length
            if past_length is not None
            else num_periods_to_train * longest_period_from_frequency_str(freq)
        )
        self.prediction_length = prediction_length
        self.add_trend = add_trend
        self.num_layers = num_layers
        self.num_cells = num_cells
        self.cell_type = cell_type
        self.num_sample_paths = num_eval_samples
        self.scaling = scaling
        self.dropout_rate = dropout_rate
        self.use_feat_dynamic_real = use_feat_dynamic_real
        self.use_feat_static_cat = use_feat_static_cat
        self.cardinality = (
            cardinality if cardinality and use_feat_static_cat else [1]
        )
        self.embedding_dimension = (
            embedding_dimension
            if embedding_dimension is not None
            else [min(50, (cat + 1) // 2) for cat in self.cardinality]
        )

        self.issm = (
            issm
            if issm is not None
            else CompositeISSM.get_from_freq(freq, add_trend)
        )

        self.time_features = (
            time_features
            if time_features is not None
            else time_features_from_frequency_str(self.freq)
        )
Exemplo n.º 28
0
from gluonts.dataset.loader import TrainDataLoader
from gluonts.model.deepar import DeepAREstimator
from gluonts.support.util import get_hybrid_forward_input_names
from gluonts.trainer import Trainer
from gluonts.dataset.repository.datasets import get_dataset

if __name__ == "__main__":

    dataset = get_dataset(dataset_name="electricity")

    estimator = DeepAREstimator(
        prediction_length=dataset.metadata.prediction_length,
        freq=dataset.metadata.freq,
        trainer=Trainer(learning_rate=1e-3,
                        epochs=50,
                        num_batches_per_epoch=100),
    )

    # instead of calling `train` method, we call `train_model` that returns more things including the training model
    train_output = estimator.train_model(dataset.train)

    # we construct a data_entry that contains 500 random windows
    batch_size = 500
    num_samples = 100
    training_data_loader = TrainDataLoader(
        dataset=dataset.train,
        transform=train_output.transformation,
        batch_size=batch_size,
        num_batches_per_epoch=estimator.trainer.num_batches_per_epoch,
        ctx=mx.cpu(),
Exemplo n.º 29
0
def test_appendix_c():
    """
    Test GluonTS paper examples from arxiv paper:
    https://arxiv.org/abs/1906.05264

    Appendix C
    """
    from typing import List
    from mxnet import gluon
    from gluonts.model.estimator import GluonEstimator
    from gluonts.model.predictor import Predictor, RepresentableBlockPredictor
    from gluonts.trainer import Trainer
    from gluonts.transform import (
        InstanceSplitter,
        Transformation,
        ExpectedNumInstanceSampler,
    )
    from gluonts.core.component import validated
    from gluonts.support.util import copy_parameters

    class MyTrainNetwork(gluon.HybridBlock):
        def __init__(self, prediction_length, cells, act_type, **kwargs):
            super().__init__(**kwargs)
            self.prediction_length = prediction_length
            with self.name_scope():
                # Set up a network that predicts the target
                self.nn = gluon.nn.HybridSequential()
                for c in cells:
                    self.nn.add(gluon.nn.Dense(units=c, activation=act_type))
                    self.nn.add(
                        gluon.nn.Dense(units=self.prediction_length,
                                       activation=act_type))

        def hybrid_forward(self, F, past_target, future_target):
            prediction = self.nn(past_target)
            # calculate L1 loss to learn the median
            return (prediction - future_target).abs().mean(axis=-1)

    class MyPredNetwork(MyTrainNetwork):
        # The prediction network only receives
        # past target and returns predictions
        def hybrid_forward(self, F, past_target):
            prediction = self.nn(past_target)
            return prediction.expand_dims(axis=1)

    class MyEstimator(GluonEstimator):
        @validated()
        def __init__(
                self,
                freq: str,
                prediction_length: int,
                act_type: str = "relu",
                context_length: int = 30,
                cells: List[int] = [40, 40, 40],
                trainer: Trainer = Trainer(epochs=10),
        ) -> None:
            super().__init__(trainer=trainer)
            self.freq = freq
            self.prediction_length = prediction_length
            self.act_type = act_type
            self.context_length = context_length
            self.cells = cells

        def create_training_network(self) -> MyTrainNetwork:
            return MyTrainNetwork(
                prediction_length=self.prediction_length,
                cells=self.cells,
                act_type=self.act_type,
            )

        def create_predictor(
            self,
            transformation: Transformation,
            trained_network: gluon.HybridBlock,
        ) -> Predictor:
            prediction_network = MyPredNetwork(
                prediction_length=self.prediction_length,
                cells=self.cells,
                act_type=self.act_type,
            )

            copy_parameters(trained_network, prediction_network)

            return RepresentableBlockPredictor(
                input_transform=transformation,
                prediction_net=prediction_network,
                batch_size=self.trainer.batch_size,
                freq=self.freq,
                prediction_length=self.prediction_length,
                ctx=self.trainer.ctx,
            )

        def create_transformation(self):
            # Model specific input transform
            # Here we use a transformation that randomly
            # selects training samples from all series.
            return InstanceSplitter(
                target_field=FieldName.TARGET,
                is_pad_field=FieldName.IS_PAD,
                start_field=FieldName.START,
                forecast_start_field=FieldName.FORECAST_START,
                train_sampler=ExpectedNumInstanceSampler(num_instances=1),
                past_length=self.context_length,
                future_length=self.prediction_length,
            )

    from gluonts.trainer import Trainer
    from gluonts.evaluation import Evaluator
    from gluonts.evaluation.backtest import backtest_metrics

    dataset_info, train_ds, test_ds = constant_dataset()

    meta = dataset_info.metadata
    estimator = MyEstimator(
        freq=meta.freq,
        prediction_length=1,
        trainer=Trainer(epochs=1, batch_size=32),
    )
    predictor = estimator.train(train_ds)

    evaluator = Evaluator(quantiles=(0.1, 0.5, 0.9))
    agg_metrics, item_metrics = backtest_metrics(
        train_dataset=train_ds,
        test_dataset=test_ds,
        forecaster=predictor,
        evaluator=evaluator,
    )
Exemplo n.º 30
0
from gluonts.model.simple_feedforward import SimpleFeedForwardEstimator
from gluonts.trainer import Trainer
from gluonts.dataset.loader import TrainDataLoader
import numpy as np
from gluonts.support.util import get_hybrid_forward_input_names
from gluonts.evaluation.backtest import make_evaluation_predictions

from gluonts.evaluation import Evaluator
from dataset import dataset
estimator = SimpleFeedForwardEstimator(
    num_hidden_dimensions=[10],
    prediction_length=dataset.metadata.prediction_length,
    context_length=100,
    freq=dataset.metadata.freq,
    trainer=Trainer(ctx="cpu",
                    epochs=5,
                    learning_rate=1e-3,
                    num_batches_per_epoch=100))

net = estimator.create_training_network()