示例#1
0
    def __init__(
            self,
            freq: str,
            prediction_length: int,
            context_length: Optional[int] = None,
            num_layers: int = 2,
            hidden_size: int = 40,
            dropout_rate: float = 0.1,
            num_feat_dynamic_real: int = 0,
            num_feat_static_cat: int = 0,
            num_feat_static_real: int = 0,
            cardinality: Optional[List[int]] = None,
            embedding_dimension: Optional[List[int]] = None,
            distr_output: DistributionOutput = StudentTOutput(),
            loss: DistributionLoss = NegativeLogLikelihood(),
            scaling: bool = True,
            lags_seq: Optional[List[int]] = None,
            time_features: Optional[List[TimeFeature]] = None,
            num_parallel_samples: int = 100,
            batch_size: int = 32,
            num_batches_per_epoch: int = 50,
            trainer_kwargs: Optional[Dict[str, Any]] = dict(),
    ) -> None:
        trainer_kwargs = {
            "max_epochs": 100,
            "gradient_clip_val": 10.0,
            **trainer_kwargs,
        }
        super().__init__(trainer_kwargs=trainer_kwargs)

        self.freq = freq
        self.context_length = (context_length if context_length is not None
                               else prediction_length)
        self.prediction_length = prediction_length
        self.distr_output = distr_output
        self.loss = loss
        self.num_layers = num_layers
        self.hidden_size = hidden_size
        self.dropout_rate = dropout_rate
        self.num_feat_dynamic_real = num_feat_dynamic_real
        self.num_feat_static_cat = num_feat_static_cat
        self.num_feat_static_real = num_feat_static_real
        self.cardinality = (cardinality if cardinality
                            and num_feat_static_cat > 0 else [1])
        self.embedding_dimension = embedding_dimension
        self.scaling = scaling
        self.lags_seq = lags_seq
        self.time_features = (time_features if time_features is not None else
                              time_features_from_frequency_str(self.freq))

        self.num_parallel_samples = num_parallel_samples
        self.batch_size = batch_size
        self.num_batches_per_epoch = num_batches_per_epoch

        self.train_sampler = ExpectedNumInstanceSampler(
            num_instances=1.0, min_future=prediction_length)
        self.validation_sampler = ValidationSplitSampler(
            min_future=prediction_length)
示例#2
0
 def __init__(
     self,
     freq: str,
     prediction_length: int,
     context_length: int,
     num_series: int,
     skip_size: int,
     ar_window: int,
     channels: int,
     lead_time: int = 0,
     kernel_size: int = 6,
     trainer: Trainer = Trainer(),
     dropout_rate: Optional[float] = 0.2,
     output_activation: Optional[str] = None,
     rnn_cell_type: str = "gru",
     rnn_num_cells: int = 100,
     rnn_num_layers: int = 3,
     skip_rnn_cell_type: str = "gru",
     skip_rnn_num_layers: int = 1,
     skip_rnn_num_cells: int = 10,
     scaling: bool = True,
     train_sampler: Optional[InstanceSampler] = None,
     validation_sampler: Optional[InstanceSampler] = None,
     batch_size: int = 32,
     dtype: DType = np.float32,
 ) -> None:
     super().__init__(
         trainer=trainer,
         lead_time=lead_time,
         batch_size=batch_size,
         dtype=dtype,
     )
     self.freq = freq
     self.num_series = num_series
     self.skip_size = skip_size
     self.ar_window = ar_window
     self.prediction_length = prediction_length
     self.context_length = context_length
     self.channels = channels
     self.kernel_size = kernel_size
     self.dropout_rate = dropout_rate
     self.output_activation = output_activation
     self.rnn_cell_type = rnn_cell_type
     self.rnn_num_layers = rnn_num_layers
     self.rnn_num_cells = rnn_num_cells
     self.skip_rnn_cell_type = skip_rnn_cell_type
     self.skip_rnn_num_layers = skip_rnn_num_layers
     self.skip_rnn_num_cells = skip_rnn_num_cells
     self.scaling = scaling
     self.train_sampler = (train_sampler if train_sampler is not None else
                           ExpectedNumInstanceSampler(
                               num_instances=1.0,
                               min_future=prediction_length + lead_time))
     self.validation_sampler = (validation_sampler if validation_sampler
                                is not None else ValidationSplitSampler(
                                    min_future=prediction_length +
                                    lead_time))
     self.dtype = dtype
示例#3
0
    def __init__(
        self,
        freq: str,
        prediction_length: int,
        cardinalities: List[int] = [],
        context_length: Optional[int] = None,
        trainer: Trainer = Trainer(),
        model_dim: int = 64,
        ffn_dim_multiplier: int = 2,
        num_heads: int = 4,
        num_layers: int = 3,
        num_outputs: int = 3,
        kernel_sizes: List[int] = [3, 5, 7, 9],
        distance_encoding: Optional[str] = "dot",
        pre_layer_norm: bool = False,
        dropout: float = 0.1,
        temperature: float = 1.0,
        time_features: Optional[List[TimeFeature]] = None,
        use_feat_dynamic_real: bool = True,
        use_feat_dynamic_cat: bool = False,
        use_feat_static_real: bool = False,
        use_feat_static_cat: bool = True,
        train_sampler: Optional[InstanceSampler] = None,
        validation_sampler: Optional[InstanceSampler] = None,
        batch_size: int = 32,
    ):
        super().__init__(trainer=trainer, batch_size=batch_size)
        self.freq = freq
        self.prediction_length = prediction_length
        self.context_length = context_length or prediction_length
        self.model_dim = model_dim
        self.ffn_dim_multiplier = ffn_dim_multiplier
        self.num_heads = num_heads
        self.num_layers = num_layers
        self.num_outputs = num_outputs
        self.cardinalities = cardinalities
        self.kernel_sizes = kernel_sizes
        self.distance_encoding = distance_encoding
        self.pre_layer_norm = pre_layer_norm
        self.dropout = dropout
        self.temperature = temperature

        self.time_features = time_features or time_features_from_frequency_str(
            self.freq)
        self.use_feat_dynamic_cat = use_feat_dynamic_cat
        self.use_feat_dynamic_real = use_feat_dynamic_real
        self.use_feat_static_cat = use_feat_static_cat
        self.use_feat_static_real = use_feat_static_real
        self.train_sampler = (train_sampler if train_sampler is not None else
                              ExpectedNumInstanceSampler(
                                  num_instances=1.0,
                                  min_future=prediction_length))
        self.validation_sampler = (validation_sampler if validation_sampler
                                   is not None else ValidationSplitSampler(
                                       min_future=prediction_length))
示例#4
0
    def __init__(
        self,
        freq: str,
        prediction_length: int,
        sampling: bool = True,
        trainer: Trainer = Trainer(),
        num_hidden_dimensions: Optional[List[int]] = None,
        context_length: Optional[int] = None,
        distr_output: DistributionOutput = StudentTOutput(),
        imputation_method: Optional[MissingValueImputation] = None,
        batch_normalization: bool = False,
        mean_scaling: bool = True,
        num_parallel_samples: int = 100,
        train_sampler: Optional[InstanceSampler] = None,
        validation_sampler: Optional[InstanceSampler] = None,
        batch_size: int = 32,
    ) -> None:
        """
        Defines an estimator. All parameters should be serializable.
        """
        super().__init__(trainer=trainer, batch_size=batch_size)

        assert (prediction_length >
                0), "The value of `prediction_length` should be > 0"
        assert (context_length is None or context_length > 0
                ), "The value of `context_length` should be > 0"
        assert num_hidden_dimensions is None or ([
            d > 0 for d in num_hidden_dimensions
        ]), "Elements of `num_hidden_dimensions` should be > 0"
        assert (num_parallel_samples >
                0), "The value of `num_parallel_samples` should be > 0"

        self.num_hidden_dimensions = (num_hidden_dimensions
                                      if num_hidden_dimensions is not None else
                                      list([40, 40]))
        self.prediction_length = prediction_length
        self.context_length = (context_length if context_length is not None
                               else prediction_length)
        self.freq = freq
        self.distr_output = distr_output
        self.batch_normalization = batch_normalization
        self.mean_scaling = mean_scaling
        self.num_parallel_samples = num_parallel_samples
        self.sampling = sampling
        self.imputation_method = (imputation_method if imputation_method
                                  is not None else DummyValueImputation(
                                      self.distr_output.value_in_support))
        self.train_sampler = (train_sampler if train_sampler is not None else
                              ExpectedNumInstanceSampler(
                                  num_instances=1.0,
                                  min_future=prediction_length))
        self.validation_sampler = (validation_sampler if validation_sampler
                                   is not None else ValidationSplitSampler(
                                       min_future=prediction_length))
示例#5
0
    def _create_instance_splitter(self, mode: str):
        assert mode in ["training", "validation", "test"]

        instance_sampler = {
            "training":
            ValidationSplitSampler(min_future=self.prediction_length),
            "validation":
            ValidationSplitSampler(min_future=self.prediction_length),
            "test": TestSplitSampler(),
        }[mode]

        return InstanceSplitter(
            target_field=FieldName.TARGET,
            is_pad_field=FieldName.IS_PAD,
            start_field=FieldName.START,
            forecast_start_field=FieldName.FORECAST_START,
            instance_sampler=instance_sampler,
            time_series_fields=[FieldName.FEAT_TIME],
            past_length=self.context_length,
            future_length=self.prediction_length,
        )
    def __init__(
        self,
        freq: str,
        prediction_length: int,
        cardinality: List[int],
        embedding_dimension: int,
        encoder: Seq2SeqEncoder,
        decoder_mlp_layer: List[int],
        decoder_mlp_static_dim: int,
        scaler: Scaler = NOPScaler(),
        context_length: Optional[int] = None,
        quantiles: Optional[List[float]] = None,
        trainer: Trainer = Trainer(),
        train_sampler: Optional[InstanceSampler] = None,
        validation_sampler: Optional[InstanceSampler] = None,
        num_parallel_samples: int = 100,
        batch_size: int = 32,
    ) -> None:
        assert (prediction_length >
                0), "The value of `prediction_length` should be > 0"
        assert (context_length is None or context_length > 0
                ), "The value of `context_length` should be > 0"
        assert quantiles is None or all(
            0 <= d <= 1 for d in
            quantiles), "Elements of `quantiles` should be >= 0 and <= 1"

        super().__init__(trainer=trainer, batch_size=batch_size)

        self.context_length = (context_length if context_length is not None
                               else prediction_length)
        self.prediction_length = prediction_length
        self.freq = freq
        self.quantiles = (quantiles
                          if quantiles is not None else [0.1, 0.5, 0.9])
        self.encoder = encoder
        self.decoder_mlp_layer = decoder_mlp_layer
        self.decoder_mlp_static_dim = decoder_mlp_static_dim
        self.scaler = scaler
        self.embedder = FeatureEmbedder(
            cardinalities=cardinality,
            embedding_dims=[embedding_dimension for _ in cardinality],
        )
        self.train_sampler = (train_sampler if train_sampler is not None else
                              ExpectedNumInstanceSampler(
                                  num_instances=1.0,
                                  min_future=prediction_length))
        self.validation_sampler = (validation_sampler if validation_sampler
                                   is not None else ValidationSplitSampler(
                                       min_future=prediction_length))
        self.num_parallel_samples = num_parallel_samples
示例#7
0
    def __init__(
        self,
        freq: str,
        prediction_length: int,
        context_length: int,
        num_cells: int,
        num_layers: int,
        dropout_rate: float = 0.1,
        interval_distr_output: DistributionOutput = NegativeBinomialOutput(),
        size_distr_output: DistributionOutput = NegativeBinomialOutput(),
        train_sampler: Optional[InstanceSampler] = None,
        validation_sampler: Optional[InstanceSampler] = None,
        trainer: Trainer = Trainer(hybridize=False),
        batch_size: int = 32,
        num_parallel_samples: int = 100,
        **kwargs,
    ):
        super().__init__(trainer=trainer, batch_size=batch_size, **kwargs)

        assert (
            prediction_length > 0
        ), "The value of `prediction_length` should be > 0"
        assert (
            context_length is None or context_length > 0
        ), "The value of `context_length` should be > 0"
        assert dropout_rate >= 0, "The value of `dropout_rate` should be >= 0"

        self.freq = freq
        self.context_length = context_length
        self.prediction_length = prediction_length
        self.num_cells = num_cells
        self.num_layers = num_layers
        self.dropout_rate = dropout_rate
        self.interval_distr_output = interval_distr_output
        self.size_distr_output = size_distr_output
        self.num_parallel_samples = num_parallel_samples

        self.train_sampler = (
            train_sampler
            if train_sampler is not None
            else ExpectedNumInstanceSampler(
                num_instances=5, min_future=prediction_length
            )
        )
        self.validation_sampler = (
            validation_sampler
            if validation_sampler is not None
            else ValidationSplitSampler(min_future=prediction_length)
        )
示例#8
0
    def __init__(
        self,
        freq: str,
        prediction_length: Optional[int],
        context_length: int,
        num_series: int,
        ar_window: int = 24,
        skip_size: int = 24,
        channels: int = 100,
        kernel_size: int = 6,
        horizon: Optional[int] = None,
        trainer: Trainer = Trainer(),
        dropout_rate: Optional[float] = 0.2,
        output_activation: Optional[str] = None,
        rnn_cell_type: str = "GRU",
        rnn_num_cells: int = 100,
        skip_rnn_cell_type: str = "GRU",
        skip_rnn_num_cells: int = 5,
        scaling: bool = True,
        dtype: np.dtype = np.float32,
    ):
        super().__init__(trainer, dtype=dtype)

        self.freq = freq
        self.num_series = num_series
        self.skip_size = skip_size
        self.ar_window = ar_window
        self.horizon = horizon
        self.prediction_length = prediction_length

        self.future_length = horizon if horizon is not None else prediction_length
        self.context_length = context_length
        self.channels = channels
        self.kernel_size = kernel_size
        self.dropout_rate = dropout_rate
        self.output_activation = output_activation
        self.rnn_cell_type = rnn_cell_type
        self.rnn_num_cells = rnn_num_cells
        self.skip_rnn_cell_type = skip_rnn_cell_type
        self.skip_rnn_num_cells = skip_rnn_num_cells
        self.scaling = scaling

        self.train_sampler = ExpectedNumInstanceSampler(
            num_instances=1.0, min_future=self.future_length
        )
        self.validation_sampler = ValidationSplitSampler(min_future=self.future_length)

        self.dtype = dtype
def test_forking_sequence_splitter() -> None:
    len_ts = 20
    ds = make_dataset(1, len_ts)
    enc_len = 5
    dec_len = 3

    trans = transform.Chain(
        [
            transform.AddAgeFeature(
                target_field=FieldName.TARGET,
                output_field="age",
                pred_length=dec_len,
            ),
            ForkingSequenceSplitter(
                instance_sampler=ValidationSplitSampler(min_future=dec_len),
                enc_len=enc_len,
                dec_len=dec_len,
                encoder_series_fields=["age"],
            ),
        ]
    )

    out = trans(ds, is_train=True)
    transformed_data = next(iter(out))

    future_target = np.array(
        [
            [13.0, 14.0, 15.0],
            [14.0, 15.0, 16.0],
            [15.0, 16.0, 17.0],
            [16.0, 17.0, 18.0],
            [17.0, 18.0, 19.0],
        ]
    )
    assert (
        np.linalg.norm(future_target - transformed_data["future_target"])
        < 1e-5
    ), "the forking sequence target should be computed correctly."

    age = np.log10(2.0 + np.arange(len_ts))
    assert (
        np.linalg.norm(
            age[-(enc_len + dec_len) : -dec_len]
            - transformed_data["past_age"].flatten()
        )
        < 1e-5
    ), "the forking sequence past feature should be computed correctly."
示例#10
0
    def __init__(
        self,
        freq: str,
        prediction_length: int,
        context_length: Optional[int] = None,
        hidden_dimensions: Optional[List[int]] = None,
        distr_output: DistributionOutput = StudentTOutput(),
        loss: DistributionLoss = NegativeLogLikelihood(),
        batch_norm: bool = False,
        batch_size: int = 32,
        num_batches_per_epoch: int = 50,
        trainer_kwargs: Optional[Dict[str, Any]] = None,
        train_sampler: Optional[InstanceSampler] = None,
        validation_sampler: Optional[InstanceSampler] = None,
    ) -> None:
        default_trainer_kwargs = {
            "max_epochs": 100,
            "gradient_clip_val": 10.0,
        }
        if trainer_kwargs is not None:
            default_trainer_kwargs.update(trainer_kwargs)
        super().__init__(trainer_kwargs=default_trainer_kwargs)

        self.freq = freq
        self.prediction_length = prediction_length
        self.context_length = context_length or 10 * prediction_length
        # TODO find way to enforce same defaults to network and estimator
        # somehow
        self.hidden_dimensions = hidden_dimensions or [20, 20]
        self.distr_output = distr_output
        self.loss = loss
        self.batch_norm = batch_norm
        self.batch_size = batch_size
        self.num_batches_per_epoch = num_batches_per_epoch

        self.train_sampler = train_sampler or ExpectedNumInstanceSampler(
            num_instances=1.0, min_future=prediction_length)
        self.validation_sampler = validation_sampler or ValidationSplitSampler(
            min_future=prediction_length)
示例#11
0
    def __init__(
        self,
        freq: str,
        prediction_length: int,
        target_dim: int,
        trainer: Trainer = Trainer(),
        context_length: Optional[int] = None,
        num_layers: int = 2,
        num_cells: int = 40,
        cell_type: str = "lstm",
        num_parallel_samples: int = 100,
        dropout_rate: float = 0.1,
        cardinality: List[int] = [1],
        embedding_dimension: int = 5,
        distr_output: Optional[DistributionOutput] = None,
        rank: Optional[int] = 5,
        scaling: bool = True,
        pick_incomplete: bool = False,
        lags_seq: Optional[List[int]] = None,
        time_features: Optional[List[TimeFeature]] = None,
        conditioning_length: int = 200,
        use_marginal_transformation=False,
        train_sampler: Optional[InstanceSampler] = None,
        validation_sampler: Optional[InstanceSampler] = None,
        batch_size: int = 32,
        **kwargs,
    ) -> None:
        super().__init__(trainer=trainer, batch_size=batch_size, **kwargs)

        assert (prediction_length >
                0), "The value of `prediction_length` should be > 0"
        assert (context_length is None or context_length > 0
                ), "The value of `context_length` should be > 0"
        assert num_layers > 0, "The value of `num_layers` should be > 0"
        assert num_cells > 0, "The value of `num_cells` should be > 0"
        assert (num_parallel_samples >
                0), "The value of `num_eval_samples` should be > 0"
        assert dropout_rate >= 0, "The value of `dropout_rate` should be >= 0"
        assert all([c > 0 for c in cardinality
                    ]), "Elements of `cardinality` should be > 0"
        assert (embedding_dimension >
                0), "The value of `embedding_dimension` should be > 0"

        self.freq = freq
        self.context_length = (context_length if context_length is not None
                               else prediction_length)

        if distr_output is not None:
            self.distr_output = distr_output
        else:
            self.distr_output = LowrankMultivariateGaussianOutput(
                dim=target_dim, rank=rank)

        self.prediction_length = prediction_length
        self.target_dim = target_dim
        self.num_layers = num_layers
        self.num_cells = num_cells
        self.cell_type = cell_type
        self.num_parallel_samples = num_parallel_samples
        self.dropout_rate = dropout_rate
        self.cardinality = cardinality
        self.embedding_dimension = embedding_dimension
        self.conditioning_length = conditioning_length
        self.use_marginal_transformation = use_marginal_transformation

        self.lags_seq = (lags_seq if lags_seq is not None else
                         get_lags_for_frequency(freq_str=freq))

        self.time_features = (time_features if time_features is not None else
                              time_features_from_frequency_str(self.freq))

        self.history_length = self.context_length + max(self.lags_seq)
        self.pick_incomplete = pick_incomplete
        self.scaling = scaling

        if self.use_marginal_transformation:
            self.output_transform: Optional[
                Callable] = cdf_to_gaussian_forward_transform
        else:
            self.output_transform = None

        self.train_sampler = (
            train_sampler
            if train_sampler is not None else ExpectedNumInstanceSampler(
                num_instances=1.0,
                min_past=0 if pick_incomplete else self.history_length,
                min_future=prediction_length,
            ))
        self.validation_sampler = (
            validation_sampler
            if validation_sampler is not None else ValidationSplitSampler(
                min_past=0 if pick_incomplete else self.history_length,
                min_future=prediction_length,
            ))
示例#12
0
    def __init__(
        self,
        freq: str,
        prediction_length: int,
        target_dim: int,
        trainer: Trainer = Trainer(),
        # number of dimension to sample at training time
        context_length: Optional[int] = None,
        num_layers: int = 2,
        num_cells: int = 40,
        cell_type: str = "lstm",
        num_parallel_samples: int = 100,
        dropout_rate: float = 0.1,
        target_dim_sample: Optional[int] = None,
        distr_output: Optional[DistributionOutput] = None,
        rank: Optional[int] = 2,
        scaling: bool = True,
        pick_incomplete: bool = False,
        lags_seq: Optional[List[int]] = None,
        shuffle_target_dim: bool = True,
        time_features: Optional[List[TimeFeature]] = None,
        conditioning_length: int = 100,
        use_marginal_transformation: bool = False,
        train_sampler: Optional[InstanceSampler] = None,
        validation_sampler: Optional[InstanceSampler] = None,
        batch_size: int = 32,
    ) -> None:
        super().__init__(trainer=trainer, batch_size=batch_size)

        assert (
            prediction_length > 0
        ), "The value of `prediction_length` should be > 0"
        assert (
            context_length is None or context_length > 0
        ), "The value of `context_length` should be > 0"
        assert num_layers > 0, "The value of `num_layers` should be > 0"
        assert num_cells > 0, "The value of `num_cells` should be > 0"
        assert (
            num_parallel_samples > 0
        ), "The value of `num_eval_samples` should be > 0"
        assert dropout_rate >= 0, "The value of `dropout_rate` should be >= 0"

        if distr_output is not None:
            self.distr_output = distr_output
        else:
            self.distr_output = LowrankGPOutput(rank=rank)
        self.freq = freq
        self.context_length = (
            context_length if context_length is not None else prediction_length
        )
        self.prediction_length = prediction_length
        self.target_dim = target_dim
        self.target_dim_sample = (
            target_dim
            if target_dim_sample is None
            else min(target_dim_sample, target_dim)
        )
        self.shuffle_target_dim = shuffle_target_dim
        self.num_layers = num_layers
        self.num_cells = num_cells
        self.cell_type = cell_type
        self.num_parallel_samples = num_parallel_samples
        self.dropout_rate = dropout_rate

        self.lags_seq = (
            lags_seq
            if lags_seq is not None
            else get_lags_for_frequency(freq_str=freq)
        )
        self.time_features = (
            time_features
            if time_features is not None
            else time_features_from_frequency_str(self.freq)
        )

        self.history_length = self.context_length + max(self.lags_seq)
        self.pick_incomplete = pick_incomplete
        self.scaling = scaling
        self.conditioning_length = conditioning_length
        self.use_marginal_transformation = use_marginal_transformation
        self.output_transform = (
            cdf_to_gaussian_forward_transform
            if self.use_marginal_transformation
            else None
        )

        self.train_sampler = (
            train_sampler
            if train_sampler is not None
            else ExpectedNumInstanceSampler(
                num_instances=1.0,
                min_past=0 if pick_incomplete else self.history_length,
                min_future=prediction_length,
            )
        )
        self.validation_sampler = (
            validation_sampler
            if validation_sampler is not None
            else ValidationSplitSampler(
                min_past=0 if pick_incomplete else self.history_length,
                min_future=prediction_length,
            )
        )
示例#13
0
    def __init__(
        self,
        freq: str,
        prediction_length: int,
        context_length: Optional[int] = None,
        trainer: Trainer = Trainer(),
        hidden_dim: int = 32,
        variable_dim: Optional[int] = None,
        num_heads: int = 4,
        num_outputs: int = 3,
        num_instance_per_series: int = 100,
        dropout_rate: float = 0.1,
        time_features: List[TimeFeature] = [],
        static_cardinalities: Dict[str, int] = {},
        dynamic_cardinalities: Dict[str, int] = {},
        static_feature_dims: Dict[str, int] = {},
        dynamic_feature_dims: Dict[str, int] = {},
        past_dynamic_features: List[str] = [],
        train_sampler: Optional[InstanceSampler] = None,
        validation_sampler: Optional[InstanceSampler] = None,
        batch_size: int = 32,
    ) -> None:
        super(TemporalFusionTransformerEstimator,
              self).__init__(trainer=trainer, batch_size=batch_size)
        assert (prediction_length >
                0), "The value of `prediction_length` should be > 0"
        assert (context_length is None or context_length > 0
                ), "The value of `context_length` should be > 0"
        assert dropout_rate >= 0, "The value of `dropout_rate` should be >= 0"

        self.freq = freq
        self.prediction_length = prediction_length
        self.context_length = context_length or prediction_length
        self.dropout_rate = dropout_rate
        self.hidden_dim = hidden_dim
        self.variable_dim = variable_dim or hidden_dim
        self.num_heads = num_heads
        self.num_outputs = num_outputs
        self.num_instance_per_series = num_instance_per_series

        if not time_features:
            self.time_features = time_features_from_frequency_str(self.freq)
            if not self.time_features:
                # If time features are empty (as for yearly data), we add a
                # constant feature of 0
                self.time_features = [Constant()]
        else:
            self.time_features = time_features
        self.static_cardinalities = static_cardinalities
        self.dynamic_cardinalities = dynamic_cardinalities
        self.static_feature_dims = static_feature_dims
        self.dynamic_feature_dims = dynamic_feature_dims
        self.past_dynamic_features = past_dynamic_features

        self.past_dynamic_cardinalities = {}
        self.past_dynamic_feature_dims = {}
        for name in self.past_dynamic_features:
            if name in self.dynamic_cardinalities:
                self.past_dynamic_cardinalities[
                    name] = self.dynamic_cardinalities.pop(name)
            elif name in self.dynamic_feature_dims:
                self.past_dynamic_feature_dims[
                    name] = self.dynamic_feature_dims.pop(name)
            else:
                raise ValueError(
                    f"Feature name {name} is not provided in feature dicts")

        self.train_sampler = (train_sampler if train_sampler is not None else
                              ExpectedNumInstanceSampler(
                                  num_instances=1.0,
                                  min_future=prediction_length))
        self.validation_sampler = (validation_sampler if validation_sampler
                                   is not None else ValidationSplitSampler(
                                       min_future=prediction_length))
示例#14
0
    def __init__(
        self,
        freq: str,
        prediction_length: int,
        context_length: Optional[int] = None,
        trainer: Trainer = Trainer(),
        num_stacks: int = 30,
        widths: Optional[List[int]] = None,
        num_blocks: Optional[List[int]] = None,
        num_block_layers: Optional[List[int]] = None,
        expansion_coefficient_lengths: Optional[List[int]] = None,
        sharing: Optional[List[bool]] = None,
        stack_types: Optional[List[str]] = None,
        loss_function: Optional[str] = "MAPE",
        train_sampler: Optional[InstanceSampler] = None,
        validation_sampler: Optional[InstanceSampler] = None,
        batch_size: int = 32,
        scale: bool = False,
        **kwargs,
    ) -> None:
        super().__init__(trainer=trainer, batch_size=batch_size, **kwargs)

        assert (prediction_length >
                0), "The value of `prediction_length` should be > 0"
        assert (context_length is None or context_length > 0
                ), "The value of `context_length` should be > 0"
        assert (num_stacks is None
                or num_stacks > 0), "The value of `num_stacks` should be > 0"
        assert (loss_function is None
                or loss_function in VALID_LOSS_FUNCTIONS), (
                    "The loss function has to be one of the following:"
                    f" {VALID_LOSS_FUNCTIONS}.")

        self.freq = freq
        self.scale = scale
        self.prediction_length = prediction_length
        self.context_length = (context_length if context_length is not None
                               else 2 * prediction_length)
        # num_stacks has to be handled separately because other arguments have
        # to match its length
        self.num_stacks = num_stacks
        self.loss_function = loss_function

        self.widths = self._validate_nbeats_argument(
            argument_value=widths,
            argument_name="widths",
            default_value=[512],
            validation_condition=lambda val: val > 0,
            invalidation_message="Values of 'widths' should be > 0",
        )
        self.num_blocks = self._validate_nbeats_argument(
            argument_value=num_blocks,
            argument_name="num_blocks",
            default_value=[1],
            validation_condition=lambda val: val > 0,
            invalidation_message="Values of 'num_blocks' should be > 0",
        )
        self.num_block_layers = self._validate_nbeats_argument(
            argument_value=num_block_layers,
            argument_name="num_block_layers",
            default_value=[4],
            validation_condition=lambda val: val > 0,
            invalidation_message="Values of 'block_layers' should be > 0",
        )
        self.sharing = self._validate_nbeats_argument(
            argument_value=sharing,
            argument_name="sharing",
            default_value=[False],
            validation_condition=lambda val: True,
            invalidation_message="",
        )
        self.expansion_coefficient_lengths = self._validate_nbeats_argument(
            argument_value=expansion_coefficient_lengths,
            argument_name="expansion_coefficient_lengths",
            default_value=[32],
            validation_condition=lambda val: val > 0,
            invalidation_message=(
                "Values of 'expansion_coefficient_lengths' should be > 0"),
        )
        self.stack_types = self._validate_nbeats_argument(
            argument_value=stack_types,
            argument_name="stack_types",
            default_value=["G"],
            validation_condition=lambda val: val in VALID_N_BEATS_STACK_TYPES,
            invalidation_message=("Values of 'stack_types' should be one of"
                                  f" {VALID_N_BEATS_STACK_TYPES}"),
        )
        self.train_sampler = (train_sampler if train_sampler is not None else
                              ExpectedNumInstanceSampler(
                                  num_instances=1.0,
                                  min_future=prediction_length))
        self.validation_sampler = (validation_sampler if validation_sampler
                                   is not None else ValidationSplitSampler(
                                       min_future=prediction_length))
    def __init__(
        self,
        encoder: Seq2SeqEncoder,
        decoder: Seq2SeqDecoder,
        freq: str,
        prediction_length: int,
        quantile_output: Optional[QuantileOutput] = None,
        distr_output: Optional[DistributionOutput] = None,
        context_length: Optional[int] = None,
        use_past_feat_dynamic_real: bool = False,
        use_feat_dynamic_real: bool = False,
        use_feat_static_cat: bool = False,
        cardinality: List[int] = None,
        embedding_dimension: List[int] = None,
        add_time_feature: bool = True,
        add_age_feature: bool = False,
        enable_encoder_dynamic_feature: bool = True,
        enable_decoder_dynamic_feature: bool = True,
        trainer: Trainer = Trainer(),
        scaling: Optional[bool] = None,
        scaling_decoder_dynamic_feature: bool = False,
        dtype: DType = np.float32,
        num_forking: Optional[int] = None,
        max_ts_len: Optional[int] = None,
        train_sampler: Optional[InstanceSampler] = None,
        validation_sampler: Optional[InstanceSampler] = None,
        batch_size: int = 32,
    ) -> None:
        super().__init__(trainer=trainer, batch_size=batch_size)

        assert (distr_output is None) != (quantile_output is None)
        assert (
            context_length is None or context_length > 0
        ), "The value of `context_length` should be > 0"
        assert (
            prediction_length > 0
        ), "The value of `prediction_length` should be > 0"
        assert (
            use_feat_static_cat or not cardinality
        ), "You should set `cardinality` if and only if `use_feat_static_cat=True`"
        assert cardinality is None or all(
            c > 0 for c in cardinality
        ), "Elements of `cardinality` should be > 0"
        assert embedding_dimension is None or all(
            e > 0 for e in embedding_dimension
        ), "Elements of `embedding_dimension` should be > 0"

        self.encoder = encoder
        self.decoder = decoder
        self.freq = freq
        self.prediction_length = prediction_length
        self.quantile_output = quantile_output
        self.distr_output = distr_output
        self.context_length = (
            context_length
            if context_length is not None
            else 4 * self.prediction_length
        )
        if max_ts_len is not None:
            max_pad_len = max(max_ts_len - self.prediction_length, 0)
            # Don't allow context_length to be longer than the max pad length
            self.context_length = (
                min(max_pad_len, self.context_length)
                if max_pad_len > 0
                else self.context_length
            )
        self.num_forking = (
            min(num_forking, self.context_length)
            if num_forking is not None
            else self.context_length
        )
        self.use_past_feat_dynamic_real = use_past_feat_dynamic_real
        self.use_feat_dynamic_real = use_feat_dynamic_real
        self.use_feat_static_cat = use_feat_static_cat
        self.cardinality = (
            cardinality if cardinality and use_feat_static_cat else [1]
        )
        self.embedding_dimension = (
            embedding_dimension
            if embedding_dimension is not None
            else [min(50, (cat + 1) // 2) for cat in self.cardinality]
        )
        self.add_time_feature = add_time_feature
        self.add_age_feature = add_age_feature
        self.use_dynamic_feat = (
            use_feat_dynamic_real or add_age_feature or add_time_feature
        )
        self.enable_encoder_dynamic_feature = enable_encoder_dynamic_feature
        self.enable_decoder_dynamic_feature = enable_decoder_dynamic_feature
        self.scaling = (
            scaling if scaling is not None else (quantile_output is None)
        )
        self.scaling_decoder_dynamic_feature = scaling_decoder_dynamic_feature
        self.dtype = dtype

        self.train_sampler = (
            train_sampler
            if train_sampler is not None
            else ValidationSplitSampler(min_future=prediction_length)
        )
        self.validation_sampler = (
            validation_sampler
            if validation_sampler is not None
            else ValidationSplitSampler(min_future=prediction_length)
        )
示例#16
0
    def __init__(
        self,
        input_size: int,
        freq: str,
        prediction_length: int,
        target_dim: int,
        trainer: Trainer = Trainer(),
        context_length: Optional[int] = None,
        d_model: int = 32,
        dim_feedforward_scale: int = 4,
        act_type: str = "gelu",
        num_heads: int = 8,
        num_encoder_layers: int = 3,
        num_decoder_layers: int = 3,
        num_parallel_samples: int = 100,
        dropout_rate: float = 0.1,
        use_feat_dynamic_real: bool = False,
        flow_type="RealNVP",
        n_blocks=3,
        hidden_size=100,
        n_hidden=2,
        conditioning_length: int = 200,
        dequantize: bool = False,
        scaling: bool = True,
        pick_incomplete: bool = False,
        lags_seq: Optional[List[int]] = None,
        time_features: Optional[List[TimeFeature]] = None,
        **kwargs,
    ) -> None:
        super().__init__(trainer=trainer, **kwargs)

        self.freq = freq
        self.context_length = (context_length if context_length is not None
                               else prediction_length)

        self.input_size = input_size
        self.prediction_length = prediction_length
        self.target_dim = target_dim

        self.d_model = d_model
        self.num_heads = num_heads
        self.act_type = act_type
        self.dim_feedforward_scale = dim_feedforward_scale
        self.num_encoder_layers = num_encoder_layers
        self.num_decoder_layers = num_decoder_layers

        self.num_parallel_samples = num_parallel_samples
        self.dropout_rate = dropout_rate

        self.use_feat_dynamic_real = use_feat_dynamic_real

        self.flow_type = flow_type
        self.n_blocks = n_blocks
        self.hidden_size = hidden_size
        self.n_hidden = n_hidden
        self.conditioning_length = conditioning_length
        self.dequantize = dequantize

        self.lags_seq = (lags_seq if lags_seq is not None else
                         lags_for_fourier_time_features_from_frequency(
                             freq_str=freq))

        self.time_features = (time_features if time_features is not None else
                              fourier_time_features_from_frequency(self.freq))

        self.history_length = self.context_length + max(self.lags_seq)
        self.pick_incomplete = pick_incomplete
        self.scaling = scaling

        self.train_sampler = ExpectedNumInstanceSampler(
            num_instances=1.0,
            min_past=0 if pick_incomplete else self.history_length,
            min_future=prediction_length,
        )

        self.validation_sampler = ValidationSplitSampler(
            min_past=0 if pick_incomplete else self.history_length,
            min_future=prediction_length,
        )
    def __init__(
        self,
        input_size: int,
        freq: str,
        prediction_length: int,
        target_dim: int,
        trainer: Trainer = Trainer(),
        context_length: Optional[int] = None,
        num_layers: int = 2,
        num_cells: int = 40,
        cell_type: str = "LSTM",
        num_parallel_samples: int = 100,
        dropout_rate: float = 0.1,
        cardinality: List[int] = [1],
        embedding_dimension: int = 5,
        flow_type="RealNVP",
        n_blocks=3,
        hidden_size=100,
        n_hidden=2,
        conditioning_length: int = 200,
        dequantize: bool = False,
        scaling: bool = True,
        pick_incomplete: bool = False,
        lags_seq: Optional[List[int]] = None,
        time_features: Optional[List[TimeFeature]] = None,
        **kwargs,
    ) -> None:
        super().__init__(trainer=trainer, **kwargs)

        self.freq = freq
        self.context_length = (context_length if context_length is not None
                               else prediction_length)

        self.input_size = input_size
        self.prediction_length = prediction_length
        self.target_dim = target_dim
        self.num_layers = num_layers
        self.num_cells = num_cells
        self.cell_type = cell_type
        self.num_parallel_samples = num_parallel_samples
        self.dropout_rate = dropout_rate
        self.cardinality = cardinality
        self.embedding_dimension = embedding_dimension

        self.flow_type = flow_type
        self.n_blocks = n_blocks
        self.hidden_size = hidden_size
        self.n_hidden = n_hidden
        self.conditioning_length = conditioning_length
        self.dequantize = dequantize

        self.lags_seq = (lags_seq if lags_seq is not None else
                         lags_for_fourier_time_features_from_frequency(
                             freq_str=freq))

        self.time_features = (time_features if time_features is not None else
                              fourier_time_features_from_frequency(self.freq))

        self.history_length = self.context_length + max(self.lags_seq)
        self.pick_incomplete = pick_incomplete
        self.scaling = scaling

        self.train_sampler = ExpectedNumInstanceSampler(
            num_instances=1.0,
            min_past=0 if pick_incomplete else self.history_length,
            min_future=prediction_length,
        )

        self.validation_sampler = ValidationSplitSampler(
            min_past=0 if pick_incomplete else self.history_length,
            min_future=prediction_length,
        )
    def __init__(
        self,
        input_size: int,
        freq: str,
        prediction_length: int,
        target_dim: int,
        trainer: Trainer = Trainer(),
        context_length: Optional[int] = None,
        num_layers: int = 2,
        num_cells: int = 40,
        cell_type: str = "LSTM",
        num_parallel_samples: int = 100,
        dropout_rate: float = 0.1,
        cardinality: List[int] = [1],
        embedding_dimension: int = 5,
        conditioning_length: int = 100,
        diff_steps: int = 100,
        loss_type: str = "l2",
        beta_end=0.1,
        beta_schedule="linear",
        residual_layers=8,
        residual_channels=8,
        dilation_cycle_length=2,
        scaling: bool = True,
        pick_incomplete: bool = False,
        lags_seq: Optional[List[int]] = None,
        time_features: Optional[List[TimeFeature]] = None,
        **kwargs,
    ) -> None:
        super().__init__(trainer=trainer, **kwargs)

        self.freq = freq
        self.context_length = (context_length if context_length is not None
                               else prediction_length)

        self.input_size = input_size
        self.prediction_length = prediction_length
        self.target_dim = target_dim
        self.num_layers = num_layers
        self.num_cells = num_cells
        self.cell_type = cell_type
        self.num_parallel_samples = num_parallel_samples
        self.dropout_rate = dropout_rate
        self.cardinality = cardinality
        self.embedding_dimension = embedding_dimension

        self.conditioning_length = conditioning_length
        self.diff_steps = diff_steps
        self.loss_type = loss_type
        self.beta_end = beta_end
        self.beta_schedule = beta_schedule
        self.residual_layers = residual_layers
        self.residual_channels = residual_channels
        self.dilation_cycle_length = dilation_cycle_length

        self.lags_seq = (lags_seq if lags_seq is not None else
                         lags_for_fourier_time_features_from_frequency(
                             freq_str=freq))

        self.time_features = (time_features if time_features is not None else
                              fourier_time_features_from_frequency(self.freq))

        self.history_length = self.context_length + max(self.lags_seq)
        self.pick_incomplete = pick_incomplete
        self.scaling = scaling

        self.train_sampler = ExpectedNumInstanceSampler(
            num_instances=1.0,
            min_past=0 if pick_incomplete else self.history_length,
            min_future=prediction_length,
        )

        self.validation_sampler = ValidationSplitSampler(
            min_past=0 if pick_incomplete else self.history_length,
            min_future=prediction_length,
        )
示例#19
0
    def __init__(
        self,
        freq: str,
        prediction_length: int,
        context_length: Optional[int] = None,
        trainer: Trainer = Trainer(),
        num_stacks: int = 30,
        widths: Optional[List[int]] = None,
        num_blocks: Optional[List[int]] = None,
        num_block_layers: Optional[List[int]] = None,
        expansion_coefficient_lengths: Optional[List[int]] = None,
        sharing: Optional[List[bool]] = None,
        stack_types: Optional[List[str]] = None,
        loss_function: Optional[str] = "MAPE",
        **kwargs,
    ) -> None:
        super().__init__(trainer=trainer, **kwargs)

        self.freq = freq
        self.prediction_length = prediction_length
        self.context_length = (context_length if context_length is not None
                               else 2 * prediction_length)
        # num_stacks has to be handled separately because other arguments have to match its length
        self.num_stacks = num_stacks
        self.loss_function = loss_function

        self.widths = self._validate_nbeats_argument(
            argument_value=widths,
            argument_name="widths",
            default_value=[512],
            validation_condition=lambda val: val > 0,
            invalidation_message="Values of 'widths' should be > 0",
        )
        self.num_blocks = self._validate_nbeats_argument(
            argument_value=num_blocks,
            argument_name="num_blocks",
            default_value=[1],
            validation_condition=lambda val: val > 0,
            invalidation_message="Values of 'num_blocks' should be > 0",
        )
        self.num_block_layers = self._validate_nbeats_argument(
            argument_value=num_block_layers,
            argument_name="num_block_layers",
            default_value=[4],
            validation_condition=lambda val: val > 0,
            invalidation_message="Values of 'block_layers' should be > 0",
        )
        self.sharing = self._validate_nbeats_argument(
            argument_value=sharing,
            argument_name="sharing",
            default_value=[False],
            validation_condition=lambda val: True,
            invalidation_message="",
        )
        self.expansion_coefficient_lengths = self._validate_nbeats_argument(
            argument_value=expansion_coefficient_lengths,
            argument_name="expansion_coefficient_lengths",
            default_value=[32],
            validation_condition=lambda val: val > 0,
            invalidation_message=
            "Values of 'expansion_coefficient_lengths' should be > 0",
        )
        self.stack_types = self._validate_nbeats_argument(
            argument_value=stack_types,
            argument_name="stack_types",
            default_value=["G"],
            validation_condition=lambda val: val in VALID_N_BEATS_STACK_TYPES,
            invalidation_message=
            f"Values of 'stack_types' should be one of {VALID_N_BEATS_STACK_TYPES}",
        )

        self.train_sampler = ExpectedNumInstanceSampler(
            num_instances=1.0, min_future=prediction_length)
        self.validation_sampler = ValidationSplitSampler(
            min_future=prediction_length)
示例#20
0
    def __init__(
        self,
        freq: str,
        prediction_length: int,
        trainer: Trainer = Trainer(
            learning_rate=0.01,
            epochs=200,
            num_batches_per_epoch=50,
            hybridize=False,
        ),
        cardinality: List[int] = [1],
        seasonality: Optional[int] = None,
        embedding_dimension: int = 5,
        num_bins: int = 1024,
        hybridize_prediction_net: bool = False,
        n_residue=24,
        n_skip=32,
        dilation_depth: Optional[int] = None,
        n_stacks: int = 1,
        train_window_length: Optional[int] = None,
        temperature: float = 1.0,
        act_type: str = "elu",
        num_parallel_samples: int = 200,
        train_sampler: Optional[InstanceSampler] = None,
        validation_sampler: Optional[InstanceSampler] = None,
        batch_size: int = 32,
        negative_data: bool = False,
    ) -> None:
        super().__init__(trainer=trainer, batch_size=batch_size)

        self.freq = freq
        self.prediction_length = prediction_length
        self.cardinality = cardinality
        self.embedding_dimension = embedding_dimension
        self.num_bins = num_bins
        self.hybridize_prediction_net = hybridize_prediction_net

        self.n_residue = n_residue
        self.n_skip = n_skip
        self.n_stacks = n_stacks
        self.train_window_length = (
            train_window_length
            if train_window_length is not None
            else prediction_length
        )
        self.temperature = temperature
        self.act_type = act_type
        self.num_parallel_samples = num_parallel_samples
        self.train_sampler = (
            train_sampler
            if train_sampler is not None
            else ExpectedNumInstanceSampler(
                num_instances=1.0, min_future=self.train_window_length
            )
        )
        self.validation_sampler = (
            validation_sampler
            if validation_sampler is not None
            else ValidationSplitSampler(min_future=self.train_window_length)
        )
        self.negative_data = negative_data

        low = -10.0 if self.negative_data else 0
        high = 10.0
        bin_centers = np.linspace(low, high, self.num_bins)
        bin_edges = np.concatenate(
            [[-1e20], (bin_centers[1:] + bin_centers[:-1]) / 2.0, [1e20]]
        )
        self.bin_centers = bin_centers.tolist()
        self.bin_edges = bin_edges.tolist()

        seasonality = (
            get_seasonality(
                self.freq,
                {
                    "H": 7 * 24,
                    "D": 7,
                    "W": 52,
                    "M": 12,
                    "B": 7 * 5,
                    "min": 24 * 60,
                },
            )
            if seasonality is None
            else seasonality
        )

        goal_receptive_length = max(
            2 * seasonality, 2 * self.prediction_length
        )
        if dilation_depth is None:
            d = 1
            while (
                WaveNet.get_receptive_field(
                    dilation_depth=d, n_stacks=n_stacks
                )
                < goal_receptive_length
            ):
                d += 1
            self.dilation_depth = d
        else:
            self.dilation_depth = dilation_depth
        self.context_length = WaveNet.get_receptive_field(
            dilation_depth=self.dilation_depth, n_stacks=n_stacks
        )
        self.logger = logging.getLogger(__name__)
        self.logger.info(
            f"Using dilation depth {self.dilation_depth} and receptive field length {self.context_length}"
        )
    def __init__(
        self,
        freq: str,
        prediction_length: int,
        input_size: int,
        trainer: Trainer = Trainer(),
        context_length: Optional[int] = None,
        num_layers: int = 2,
        num_cells: int = 40,
        cell_type: str = "LSTM",
        dropout_rate: float = 0.1,
        use_feat_dynamic_real: bool = False,
        use_feat_dynamic_cat: bool = False,
        use_feat_static_cat: bool = False,
        use_feat_static_real: bool = False,
        cardinality: Optional[List[int]] = None,
        embedding_dimension: Optional[List[int]] = None,
        distr_output: DistributionOutput = StudentTOutput(),
        scaling: bool = True,
        lags_seq: Optional[List[int]] = None,
        time_features: Optional[List[TimeFeature]] = None,
        num_parallel_samples: int = 100,
        dtype: np.dtype = np.float32,
    ) -> None:
        super().__init__(trainer=trainer)

        self.freq = freq
        self.context_length = (context_length if context_length is not None
                               else prediction_length)
        self.prediction_length = prediction_length
        self.distr_output = distr_output
        self.distr_output.dtype = dtype
        self.input_size = input_size
        self.num_layers = num_layers
        self.num_cells = num_cells
        self.cell_type = cell_type
        self.dropout_rate = dropout_rate
        self.use_feat_dynamic_real = use_feat_dynamic_real
        self.use_feat_dynamic_cat = use_feat_dynamic_cat
        self.use_feat_static_cat = use_feat_static_cat
        self.use_feat_static_real = use_feat_static_real
        self.cardinality = cardinality if cardinality and use_feat_static_cat else [
            1
        ]
        self.embedding_dimension = (
            embedding_dimension if embedding_dimension is not None else
            [min(50, (cat + 1) // 2) for cat in self.cardinality])
        self.scaling = scaling
        self.lags_seq = (lags_seq if lags_seq is not None else
                         get_lags_for_frequency(freq_str=freq))
        self.time_features = (time_features if time_features is not None else
                              time_features_from_frequency_str(self.freq))

        self.history_length = self.context_length + max(self.lags_seq)

        self.num_parallel_samples = num_parallel_samples

        self.train_sampler = ExpectedNumInstanceSampler(
            num_instances=1.0, min_future=prediction_length)
        self.validation_sampler = ValidationSplitSampler(
            min_future=prediction_length)
    def __init__(
        self,
        input_size: int,
        freq: str,
        prediction_length: int,
        target_dim: int,
        trainer: Trainer = Trainer(),
        context_length: Optional[int] = None,
        num_layers: int = 2,
        num_cells: int = 40,
        cell_type: str = "LSTM",
        num_parallel_samples: int = 100,
        dropout_rate: float = 0.1,
        use_feat_dynamic_real: bool = False,
        use_feat_static_cat: bool = False,
        use_feat_static_real: bool = False,
        cardinality: Optional[List[int]] = None,
        embedding_dimension: Optional[List[int]] = None,
        distr_output: Optional[DistributionOutput] = None,
        rank: Optional[int] = 5,
        scaling: bool = True,
        pick_incomplete: bool = False,
        lags_seq: Optional[List[int]] = None,
        time_features: Optional[List[TimeFeature]] = None,
        conditioning_length: int = 200,
        use_marginal_transformation=False,
        **kwargs,
    ) -> None:
        super().__init__(trainer=trainer, **kwargs)

        self.freq = freq
        self.context_length = (context_length if context_length is not None
                               else prediction_length)

        if distr_output is not None:
            self.distr_output = distr_output
        else:
            self.distr_output = LowRankMultivariateNormalOutput(dim=target_dim,
                                                                rank=rank)

        self.input_size = input_size
        self.prediction_length = prediction_length
        self.target_dim = target_dim
        self.num_layers = num_layers
        self.num_cells = num_cells
        self.cell_type = cell_type
        self.num_parallel_samples = num_parallel_samples
        self.dropout_rate = dropout_rate
        self.use_feat_dynamic_real = use_feat_dynamic_real
        self.use_feat_static_cat = use_feat_static_cat
        self.use_feat_static_real = use_feat_static_real
        self.cardinality = cardinality if cardinality and use_feat_static_cat else [
            1
        ]
        self.embedding_dimension = (
            embedding_dimension if embedding_dimension is not None else
            [min(50, (cat + 1) // 2) for cat in self.cardinality])
        self.conditioning_length = conditioning_length
        self.use_marginal_transformation = use_marginal_transformation

        self.lags_seq = (lags_seq if lags_seq is not None else
                         lags_for_fourier_time_features_from_frequency(
                             freq_str=freq))

        self.time_features = (time_features if time_features is not None else
                              fourier_time_features_from_frequency(self.freq))

        self.history_length = self.context_length + max(self.lags_seq)
        self.pick_incomplete = pick_incomplete
        self.scaling = scaling

        if self.use_marginal_transformation:
            self.output_transform: Optional[
                Callable] = cdf_to_gaussian_forward_transform
        else:
            self.output_transform = None

        self.train_sampler = ExpectedNumInstanceSampler(
            num_instances=1.0,
            min_past=0 if pick_incomplete else self.history_length,
            min_future=prediction_length,
        )

        self.validation_sampler = ValidationSplitSampler(
            min_past=0 if pick_incomplete else self.history_length,
            min_future=prediction_length,
        )
示例#23
0
def test_forking_sequence_with_features(is_train) -> None:
    def make_dataset(N, train_length):
        # generates 2 ** N - 1 timeseries with constant increasing values
        n = 2**N - 1

        targets = np.arange(n * train_length).reshape((n, train_length))

        return ListDataset(
            [{
                "start": "2012-01-01",
                "target": targets[i, :]
            } for i in range(n)],
            freq="D",
        )

    ds = make_dataset(1, 20)
    enc_len = 5
    dec_len = 3
    num_forking = 1
    num_time_feat_daily_freq = 3
    num_age_feat = 1

    trans = transform.Chain(trans=[
        transform.AddAgeFeature(
            target_field=FieldName.TARGET,
            output_field=FieldName.FEAT_AGE,
            pred_length=10,
        ),
        transform.AddTimeFeatures(
            start_field=FieldName.START,
            target_field=FieldName.TARGET,
            output_field=FieldName.FEAT_TIME,
            time_features=time_features_from_frequency_str("D"),
            pred_length=10,
        ),
        ForkingSequenceSplitter(
            instance_sampler=ValidationSplitSampler(
                min_future=dec_len) if is_train else TSplitSampler(),
            enc_len=enc_len,
            dec_len=dec_len,
            num_forking=num_forking,
            encoder_series_fields=[
                FieldName.FEAT_AGE,
                FieldName.FEAT_TIME,
            ],
            decoder_series_fields=[FieldName.FEAT_TIME],
        ),
    ])

    out = trans(iter(ds), is_train=is_train)
    transformed_data = next(iter(out))

    assert transformed_data["past_target"].shape == (enc_len, 1)
    assert transformed_data["past_feat_dynamic_age"].shape == (
        enc_len,
        num_age_feat,
    )
    assert transformed_data["past_time_feat"].shape == (
        enc_len,
        num_time_feat_daily_freq,
    )
    assert transformed_data["future_time_feat"].shape == (
        num_forking,
        dec_len,
        num_time_feat_daily_freq,
    )

    if is_train:
        assert transformed_data["future_target"].shape == (
            num_forking,
            dec_len,
        )
示例#24
0
    def __init__(
        self,
        freq: str,
        prediction_length: int,
        trainer: Trainer = Trainer(),
        context_length: Optional[int] = None,
        num_layers: int = 2,
        num_cells: int = 40,
        cell_type: str = "lstm",
        dropoutcell_type: str = "ZoneoutCell",
        dropout_rate: float = 0.1,
        use_feat_dynamic_real: bool = False,
        use_feat_static_cat: bool = False,
        use_feat_static_real: bool = False,
        cardinality: Optional[List[int]] = None,
        embedding_dimension: Optional[List[int]] = None,
        distr_output: DistributionOutput = StudentTOutput(),
        scaling: bool = True,
        lags_seq: Optional[List[int]] = None,
        time_features: Optional[List[TimeFeature]] = None,
        num_parallel_samples: int = 100,
        imputation_method: Optional[MissingValueImputation] = None,
        train_sampler: Optional[InstanceSampler] = None,
        validation_sampler: Optional[InstanceSampler] = None,
        dtype: DType = np.float32,
        alpha: float = 0.0,
        beta: float = 0.0,
        batch_size: int = 32,
        default_scale: Optional[float] = None,
        minimum_scale: float = 1e-10,
        impute_missing_values: bool = False,
        num_imputation_samples: int = 1,
    ) -> None:
        super().__init__(trainer=trainer, batch_size=batch_size, dtype=dtype)

        assert (prediction_length >
                0), "The value of `prediction_length` should be > 0"
        assert (context_length is None or context_length > 0
                ), "The value of `context_length` should be > 0"
        assert num_layers > 0, "The value of `num_layers` should be > 0"
        assert num_cells > 0, "The value of `num_cells` should be > 0"
        supported_dropoutcell_types = [
            "ZoneoutCell",
            "RNNZoneoutCell",
            "VariationalDropoutCell",
            "VariationalZoneoutCell",
        ]
        assert (
            dropoutcell_type in supported_dropoutcell_types
        ), f"`dropoutcell_type` should be one of {supported_dropoutcell_types}"
        assert dropout_rate >= 0, "The value of `dropout_rate` should be >= 0"
        assert (cardinality and use_feat_static_cat) or (
            not (cardinality or use_feat_static_cat)
        ), "You should set `cardinality` if and only if `use_feat_static_cat=True`"
        assert cardinality is None or all(
            [c > 0
             for c in cardinality]), "Elements of `cardinality` should be > 0"
        assert embedding_dimension is None or all([
            e > 0 for e in embedding_dimension
        ]), "Elements of `embedding_dimension` should be > 0"
        assert (num_parallel_samples >
                0), "The value of `num_parallel_samples` should be > 0"
        assert alpha >= 0, "The value of `alpha` should be >= 0"
        assert beta >= 0, "The value of `beta` should be >= 0"

        self.freq = freq
        self.context_length = (context_length if context_length is not None
                               else prediction_length)
        self.prediction_length = prediction_length
        self.distr_output = distr_output
        self.distr_output.dtype = dtype
        self.num_layers = num_layers
        self.num_cells = num_cells
        self.cell_type = cell_type
        self.dropoutcell_type = dropoutcell_type
        self.dropout_rate = dropout_rate
        self.use_feat_dynamic_real = use_feat_dynamic_real
        self.use_feat_static_cat = use_feat_static_cat
        self.use_feat_static_real = use_feat_static_real
        self.cardinality = (cardinality
                            if cardinality and use_feat_static_cat else [1])
        self.embedding_dimension = (
            embedding_dimension if embedding_dimension is not None else
            [min(50, (cat + 1) // 2) for cat in self.cardinality])
        self.scaling = scaling
        self.lags_seq = (lags_seq if lags_seq is not None else
                         get_lags_for_frequency(freq_str=freq))
        self.time_features = (time_features if time_features is not None else
                              time_features_from_frequency_str(self.freq))

        self.history_length = self.context_length + max(self.lags_seq)

        self.num_parallel_samples = num_parallel_samples

        self.imputation_method = (imputation_method if imputation_method
                                  is not None else DummyValueImputation(
                                      self.distr_output.value_in_support))

        self.train_sampler = (train_sampler if train_sampler is not None else
                              ExpectedNumInstanceSampler(
                                  num_instances=1.0,
                                  min_future=prediction_length))
        self.validation_sampler = (validation_sampler if validation_sampler
                                   is not None else ValidationSplitSampler(
                                       min_future=prediction_length))

        self.alpha = alpha
        self.beta = beta
        self.num_imputation_samples = num_imputation_samples
        self.default_scale = default_scale
        self.minimum_scale = minimum_scale
        self.impute_missing_values = impute_missing_values
示例#25
0
    def __init__(
        self,
        freq: str,
        prediction_length: int,
        context_length: Optional[int] = None,
        trainer: Trainer = Trainer(),
        dropout_rate: float = 0.1,
        cardinality: Optional[List[int]] = None,
        embedding_dimension: int = 20,
        distr_output: DistributionOutput = StudentTOutput(),
        model_dim: int = 32,
        inner_ff_dim_scale: int = 4,
        pre_seq: str = "dn",
        post_seq: str = "drn",
        act_type: str = "softrelu",
        num_heads: int = 8,
        scaling: bool = True,
        lags_seq: Optional[List[int]] = None,
        time_features: Optional[List[TimeFeature]] = None,
        use_feat_dynamic_real: bool = False,
        use_feat_static_cat: bool = False,
        num_parallel_samples: int = 100,
        train_sampler: Optional[InstanceSampler] = None,
        validation_sampler: Optional[InstanceSampler] = None,
        batch_size: int = 32,
    ) -> None:
        super().__init__(trainer=trainer, batch_size=batch_size)

        assert (prediction_length >
                0), "The value of `prediction_length` should be > 0"
        assert (context_length is None or context_length > 0
                ), "The value of `context_length` should be > 0"
        assert dropout_rate >= 0, "The value of `dropout_rate` should be >= 0"
        assert (cardinality is not None or not use_feat_static_cat
                ), "You must set `cardinality` if `use_feat_static_cat=True`"
        assert cardinality is None or all(
            [c > 0
             for c in cardinality]), "Elements of `cardinality` should be > 0"
        assert (embedding_dimension >
                0), "The value of `embedding_dimension` should be > 0"
        assert (num_parallel_samples >
                0), "The value of `num_parallel_samples` should be > 0"

        self.freq = freq
        self.prediction_length = prediction_length
        self.context_length = (context_length if context_length is not None
                               else prediction_length)
        self.distr_output = distr_output
        self.dropout_rate = dropout_rate
        self.use_feat_dynamic_real = use_feat_dynamic_real
        self.use_feat_static_cat = use_feat_static_cat
        self.cardinality = cardinality if use_feat_static_cat else [1]
        self.embedding_dimension = embedding_dimension
        self.num_parallel_samples = num_parallel_samples
        self.lags_seq = (lags_seq if lags_seq is not None else
                         get_lags_for_frequency(freq_str=freq))
        self.time_features = (time_features if time_features is not None else
                              time_features_from_frequency_str(self.freq))
        self.history_length = self.context_length + max(self.lags_seq)
        self.scaling = scaling

        self.config = {
            "model_dim": model_dim,
            "pre_seq": pre_seq,
            "post_seq": post_seq,
            "dropout_rate": dropout_rate,
            "inner_ff_dim_scale": inner_ff_dim_scale,
            "act_type": act_type,
            "num_heads": num_heads,
        }

        self.encoder = TransformerEncoder(self.context_length,
                                          self.config,
                                          prefix="enc_")
        self.decoder = TransformerDecoder(self.prediction_length,
                                          self.config,
                                          prefix="dec_")
        self.train_sampler = (train_sampler if train_sampler is not None else
                              ExpectedNumInstanceSampler(
                                  num_instances=1.0,
                                  min_future=prediction_length))
        self.validation_sampler = (validation_sampler if validation_sampler
                                   is not None else ValidationSplitSampler(
                                       min_future=prediction_length))