Пример #1
0
    def _evaluate(self, loader):
        """Evaluates model performance.

        Args:
            loader (torch DataLoader):  instantiated Validation Dataloader (with TimeDataset)
        Returns:
            df with evaluation metrics
        """
        test_metrics = metrics.MetricsCollection([m.new() for m in self.metrics.batch_metrics])
        if self.highlight_forecast_step_n is not None:
            test_metrics.add_specific_target(target_pos=self.highlight_forecast_step_n - 1)
        ## Run

        self.test_metrics = test_metrics
        self.trainer.test(self.model, test_dataloaders=loader, ckpt_path=None, verbose=False)

        test_metrics_dict = self.test_metrics.compute(save=True)

        log.info("Validation metrics: {}".format(utils.print_epoch_metrics(test_metrics_dict)))
        val_metrics_df = self.test_metrics.get_stored_as_df()
        return val_metrics_df
Пример #2
0
    def __init__(
        self,
        n_lags=60,
        n_forecasts=20,
        batch_size=None,
        epochs=100,
        patience_early_stopping=10,
        early_stop=True,
        learning_rate=3e-2,
        auto_lr_find=True,
        num_workers=3,
        loss_func="QuantileLoss",
        hidden_size=32,
        attention_head_size=1,
        hidden_continuous_size=8,
        dropout=0.1,
    ):
        """
        Args:
            n_lags: int, — Number of time units that condition the predictions. Also known as 'lookback period'.
                Should be between 1-10 times the prediction length. Can be seen as equivalent for n_lags in NP
            n_forecasts: int - Number of time units that the model predicts
            batch_size: int, — batch_size. If set to None, automatic batch size will be set
            epochs: int, — number of epochs for training. Will be overwritten, if EarlyStopping is applied
            patience_early_stopping: int, — patience parameter of EarlyStopping callback
            early_stop: bool, — whether to use EarlyStopping callback
            learning_rate: float, — learning rate for the model. Will be overwritten, if auto_lr_find is used
            auto_lr_find: bool, — whether to use automatic laerning rate finder
            num_workers: int, — number of workers for DataLoaders
            loss_func: str, loss function taking prediction and targets, should be from MultiHorizonMetric class,
                defaults to QuantileLoss.
            hidden_size: int, hidden size of network which is its main hyperparameter and can range from 8 to 512
            attention_head_size: int, number of attention heads, lager values (up to 8) for large amount of data
            hidden_continuous_size: int, dictionary mapping continuous input indices to sizes for variable selection
            dropout: dropout in RNN layers, should be between 0 and 1.
        """

        self.batch_size = batch_size

        self.epochs = epochs
        self.patience_early_stopping = patience_early_stopping
        self.early_stop = early_stop
        self.learning_rate = learning_rate
        self.auto_lr_find = auto_lr_find
        if self.learning_rate != None:
            self.auto_lr_find = False
        self.num_workers = num_workers

        self.context_length = n_lags
        self.prediction_length = n_forecasts

        self.hidden_size = hidden_size
        self.attention_head_size = attention_head_size
        self.hidden_continuous_size = hidden_continuous_size
        self.dropout = dropout
        self.loss_func = loss_func

        self.fitted = False
        self.freq = None

        if type(self.loss_func) == str:
            if self.loss_func.lower() in ["huber", "smoothl1", "smoothl1loss"]:
                self.loss_func = torch.nn.SmoothL1Loss()
            elif self.loss_func.lower() in ["mae", "l1", "l1loss"]:
                self.loss_func = torch.nn.L1Loss()
            elif self.loss_func.lower() in ["mse", "mseloss", "l2", "l2loss"]:
                self.loss_func = torch.nn.MSELoss()
            elif self.loss_func.lower() in ["quantileloss"]:
                self.loss_func = QuantileLoss()
            else:
                raise NotImplementedError("Loss function {} name not defined".format(self.loss_func))
        elif callable(self.loss_func):
            pass
        elif hasattr(torch.nn.modules.loss, self.loss_func.__class__.__name__):
            pass
        else:
            raise NotImplementedError("Loss function {} not found".format(self.loss_func))

        self.metrics = metrics.MetricsCollection(
            metrics=[metrics.LossMetric(torch.nn.SmoothL1Loss()), metrics.MAE(), metrics.MSE(),],
            value_metrics=[
                # metrics.ValueMetric("Loss"),
            ],
        )

        self.val_metrics = metrics.MetricsCollection([m.new() for m in self.metrics.batch_metrics])
    def __init__(
        self,
        n_lags=60,
        n_forecasts=20,
        batch_size=None,
        epochs=100,
        weight_decay=1e-2,
        patience_early_stopping=10,
        early_stop=True,
        learning_rate=3e-2,
        auto_lr_find=False,
        num_workers=3,
        loss_func="huber",
    ):
        """
        Args:
            n_lags: int, — Number of time units that condition the predictions. Also known as 'lookback period'.
                Should be between 1-10 times the prediction length. Can be seen as equivalent for n_lags in NP
            n_forecasts: int - Number of time units that the model predicts
            batch_size: int, — batch_size. If set to None, automatic batch size will be set
            epochs: int, — number of epochs for training. Will be overwritten, if EarlyStopping is applied
            patience_early_stopping: int, — patience parameter of EarlyStopping callback
            early_stop: bool, — whether to use EarlyStopping callback
            weight_decay: float, — weight_decay parameter for NBeats model
            learning_rate: float, — learning rate for the model. Will be overwritten, if auto_lr_find is used
            auto_lr_find: bool, — whether to use automatic laerning rate finder
            num_workers: int, — number of workers for DataLoaders
            loss_func: str, ['huber', 'MSE'] — what loss function will be used
        """

        self.batch_size = batch_size
        self.weight_decay = weight_decay

        self.epochs = epochs
        self.patience_early_stopping = patience_early_stopping
        self.early_stop = early_stop
        self.learning_rate = learning_rate
        self.auto_lr_find = auto_lr_find
        self.num_workers = num_workers

        self.context_length = n_lags
        self.prediction_length = n_forecasts

        self.loss_func = loss_func

        self.fitted = False
        self.freq = None

        if type(self.loss_func) == str:
            if self.loss_func.lower() in ["huber", "smoothl1", "smoothl1loss"]:
                self.loss_func = torch.nn.SmoothL1Loss()
            elif self.loss_func.lower() in ["mae", "l1", "l1loss"]:
                self.loss_func = torch.nn.L1Loss()
            elif self.loss_func.lower() in ["mse", "mseloss", "l2", "l2loss"]:
                self.loss_func = torch.nn.MSELoss()
            else:
                raise NotImplementedError(
                    "Loss function {} name not defined".format(self.loss_func))
        elif callable(self.loss_func):
            pass
        elif hasattr(torch.nn.modules.loss, self.loss_func.__class__.__name__):
            pass
        else:
            raise NotImplementedError("Loss function {} not found".format(
                self.loss_func))

        self.metrics = metrics.MetricsCollection(
            metrics=[
                metrics.LossMetric(torch.nn.SmoothL1Loss()),
                metrics.MAE(),
                metrics.MSE(),
            ],
            value_metrics=[
                # metrics.ValueMetric("Loss"),
            ],
        )

        self.val_metrics = metrics.MetricsCollection(
            [m.new() for m in self.metrics.batch_metrics])
Пример #4
0
    def __init__(
        self,
        n_lags=10,
        n_forecasts=1,
        num_hidden_layers=1,
        d_hidden=10,
        learning_rate=None,
        epochs=None,
        batch_size=None,
        loss_func="Huber",
        optimizer="AdamW",
        train_speed=None,
        normalize="auto",
        impute_missing=True,
        lstm_bias=True,
        lstm_bidirectional=False,
    ):
        """
        Args:

            ## Model Config
            n_forecasts (int): Number of steps ahead of prediction time step to forecast.
            num_hidden_layers (int): number of hidden layer to include in AR-Net. defaults to 0.
            d_hidden (int): dimension of hidden layers of the AR-Net. Ignored if num_hidden_layers == 0.

            ## Train Config
            learning_rate (float): Maximum learning rate setting for 1cycle policy scheduler.
                default: None: Automatically sets the learning_rate based on a learning rate range test.
                For manual values, try values ~0.001-10.
            epochs (int): Number of epochs (complete iterations over dataset) to train model.
                default: None: Automatically sets the number of epochs based on dataset size.
                    For best results also leave batch_size to None.
                For manual values, try ~5-500.
            batch_size (int): Number of samples per mini-batch.
                default: None: Automatically sets the batch_size based on dataset size.
                    For best results also leave epochs to None.
                For manual values, try ~1-512.
            loss_func (str, torch.nn.modules.loss._Loss, 'typing.Callable'):
                Type of loss to use: str ['Huber', 'MSE'],
                or torch loss or callable for custom loss, eg. asymmetric Huber loss

            ## Data config
            normalize (str): Type of normalization to apply to the time series.
                options: ['auto', 'soft', 'off', 'minmax, 'standardize']
                default: 'auto' uses 'minmax' if variable is binary, else 'soft'
                'soft' scales minimum to 0.1 and the 90th quantile to 0.9
            impute_missing (bool): whether to automatically impute missing dates/values
                imputation follows a linear method up to 10 missing values, more are filled with trend.

            ## LSTM specific
            bias (bool): If False, then the layer does not use bias weights b_ih and b_hh. Default: True
            bidirectional (bool): If True, becomes a bidirectional LSTM. Default: False

        """

        kwargs = locals()

        # General
        self.name = "LSTM"
        self.n_forecasts = n_forecasts
        self.n_lags = n_lags

        # Data Preprocessing
        self.normalize = normalize
        self.impute_missing = impute_missing
        self.impute_limit_linear = 5
        self.impute_rolling = 20

        # Training
        self.config_train = configure.from_kwargs(configure.Train, kwargs)

        self.metrics = metrics.MetricsCollection(
            metrics=[metrics.LossMetric(self.config_train.loss_func), metrics.MAE(), metrics.MSE(),],
            value_metrics=[
                # metrics.ValueMetric("Loss"),
            ],
        )

        # Model
        self.config_model = configure.from_kwargs(configure.Model, kwargs)

        # LSTM specific
        self.lstm_bias = lstm_bias
        self.lstm_bidirectional = lstm_bidirectional

        # set during fit()
        self.data_freq = None

        # Set during _train()
        self.fitted = False
        self.data_params = None
        self.optimizer = None
        self.scheduler = None
        self.model = None

        # set during prediction
        self.future_periods = None
        # later set by user (optional)
        self.highlight_forecast_step_n = None
        self.true_ar_weights = None
Пример #5
0
    def _train(self, df, df_val=None, progress_bar=True, plot_live_loss=False, hyperparameter_optim=False):
        """Execute model training procedure for a configured number of epochs.

        Args:
            df (pd.DataFrame): containing column 'ds', 'y' with training data
            df_val (pd.DataFrame): containing column 'ds', 'y' with validation data
            progress_bar (bool): display updating progress bar
            plot_live_loss (bool): plot live training loss,
                requires [live] install or livelossplot package installed.
        Returns:
            df with metrics
        """
        if plot_live_loss:
            try:
                from livelossplot import PlotLosses
            except:
                plot_live_loss = False
                log.warning(
                    "To plot live loss, please install neuralprophet[live]."
                    "Using pip: 'pip install neuralprophet[live]'"
                    "Or install the missing package manually: 'pip install livelossplot'",
                    exc_info=True,
                )

        loader = self._init_train_loader(df)
        val = df_val is not None
        ## Metrics
        if self.highlight_forecast_step_n is not None:
            self.metrics.add_specific_target(target_pos=self.highlight_forecast_step_n - 1)
        if not self.normalize == "off":
            self.metrics.set_shift_scale((self.data_params["y"].shift, self.data_params["y"].scale))
        if val:
            val_loader = self._init_val_loader(df_val)
            val_metrics = metrics.MetricsCollection([m.new() for m in self.metrics.batch_metrics])

            self.val_metrics = val_metrics

        ## Run
        start = time.time()
        if progress_bar:
            training_loop = tqdm(
                range(self.config_train.epochs), total=self.config_train.epochs, leave=log.getEffectiveLevel() <= 20
            )
        else:
            training_loop = range(self.config_train.epochs)
        if plot_live_loss:
            live_out = ["MatplotlibPlot"]
            if not progress_bar:
                live_out.append("ExtremaPrinter")
            live_loss = PlotLosses(outputs=live_out)

        self.metrics.reset()
        if val:
            self.val_metrics.reset()

        self.trainer = Trainer(
            max_epochs=self.config_train.epochs,
            checkpoint_callback=False,
            logger=False
            # logger = log
        )

        if hyperparameter_optim:
            return loader, val_loader, self.model
        else:
            if val:
                self.trainer.fit(self.model, train_dataloader=loader, val_dataloaders=val_loader)
            else:
                self.trainer.fit(self.model, train_dataloader=loader)

            ## Metrics
            log.debug("Train Time: {:8.3f}".format(time.time() - start))
            log.debug("Total Batches: {}".format(self.metrics.total_updates))

            metrics_df = self.metrics.get_stored_as_df()

            if val:
                metrics_df_val = self.val_metrics.get_stored_as_df()
                for col in metrics_df_val.columns:
                    metrics_df["{}_val".format(col)] = metrics_df_val[col]
            return metrics_df
Пример #6
0
    def __init__(
        self,
        n_lags=60,
        n_forecasts=20,
        batch_size=None,
        epochs=100,
        patience_early_stopping=10,
        early_stop=True,
        learning_rate=3e-2,
        auto_lr_find=False,
        num_workers=3,
        loss_func="normaldistributionloss",
        hidden_size=32,
        rnn_layers=2,
        dropout=0.1,
    ):
        """
        Args:
            n_lags: int, — Number of time units that condition the predictions. Also known as 'lookback period'.
                Should be between 1-10 times the prediction length. Can be seen as equivalent for n_lags in NP
            n_forecasts: int - Number of time units that the model predicts
            batch_size: int, — batch_size. If set to None, automatic batch size will be set
            epochs: int, — number of epochs for training. Will be overwritten, if EarlyStopping is applied
            patience_early_stopping: int, — patience parameter of EarlyStopping callback
            early_stop: bool, — whether to use EarlyStopping callback
            learning_rate: float, — learning rate for the model. Will be overwritten, if auto_lr_find is used
            auto_lr_find: bool, — whether to use automatic laerning rate finder
            num_workers: int, — number of workers for DataLoaders
            loss_func: str, Distribution loss function. Keep in mind that each distribution loss function might
                have specific requirements for target normalization. Defaults to NormalDistributionLoss.
            hidden_size: int, hidden recurrent size - the most important hyperparameter along with rnn_layers.
            rnn_layers: int, number of RNN layers - important hyperparameter.
            dropout: float, dropout in RNN layers, should be between 0 and 1.
        """

        self.batch_size = batch_size

        self.epochs = epochs
        self.patience_early_stopping = patience_early_stopping
        self.early_stop = early_stop
        self.learning_rate = learning_rate
        self.auto_lr_find = auto_lr_find
        if self.learning_rate != None:
            self.auto_lr_find = False
        self.num_workers = num_workers

        self.context_length = n_lags
        self.prediction_length = n_forecasts

        self.hidden_size = hidden_size
        self.rnn_layers = rnn_layers
        self.dropout = dropout
        self.loss_func = loss_func

        self.fitted = False
        self.freq = None

        if type(self.loss_func) == str:
            if self.loss_func.lower() in ["huber", "smoothl1", "smoothl1loss"]:
                self.loss_func = torch.nn.SmoothL1Loss()
            elif self.loss_func.lower() in ["mae", "l1", "l1loss"]:
                self.loss_func = torch.nn.L1Loss()
            elif self.loss_func.lower() in ["mse", "mseloss", "l2", "l2loss"]:
                self.loss_func = torch.nn.MSELoss()
            elif self.loss_func.lower() in [
                    "normaldistloss", "ndl", "normaldistributionloss"
            ]:
                self.loss_func = NormalDistributionLoss()
            else:
                raise NotImplementedError(
                    "Loss function {} name not defined".format(self.loss_func))
        elif callable(self.loss_func):
            pass
        elif hasattr(torch.nn.modules.loss, self.loss_func.__class__.__name__):
            pass
        else:
            raise NotImplementedError("Loss function {} not found".format(
                self.loss_func))

        self.metrics = metrics.MetricsCollection(
            metrics=[
                metrics.LossMetric(torch.nn.SmoothL1Loss()),
                metrics.MAE(),
                metrics.MSE(),
            ],
            value_metrics=[
                # metrics.ValueMetric("Loss"),
            ],
        )

        self.val_metrics = metrics.MetricsCollection(
            [m.new() for m in self.metrics.batch_metrics])