示例#1
0
 def __init__(
     self,
     freq: str,
     context_length: int,
     prediction_length: int,
     num_feat_dynamic_real: int,
     num_feat_static_real: int,
     num_feat_static_cat: int,
     cardinality: List[int],
     embedding_dimension: Optional[List[int]] = None,
     num_layers: int = 2,
     hidden_size: int = 40,
     dropout_rate: float = 0.1,
     distr_output: DistributionOutput = StudentTOutput(),
     lags_seq: Optional[List[int]] = None,
     scaling: bool = True,
     num_parallel_samples: int = 100,
 ) -> None:
     super().__init__()
     self.context_length = context_length
     self.prediction_length = prediction_length
     self.distr_output = distr_output
     self.param_proj = distr_output.get_args_proj(hidden_size)
     self.target_shape = distr_output.event_shape
     self.num_feat_dynamic_real = num_feat_dynamic_real
     self.num_feat_static_cat = num_feat_static_cat
     self.num_feat_static_real = num_feat_static_real
     self.embedding_dimension = (
         embedding_dimension
         if embedding_dimension is not None or cardinality is None
         else [min(50, (cat + 1) // 2) for cat in cardinality]
     )
     self.lags_seq = lags_seq or get_lags_for_frequency(freq_str=freq)
     self.num_parallel_samples = num_parallel_samples
     self.history_length = self.context_length + max(self.lags_seq)
     self.embedder = FeatureEmbedder(
         cardinalities=cardinality,
         embedding_dims=self.embedding_dimension,
     )
     if scaling:
         self.scaler = MeanScaler(dim=1, keepdim=True)
     else:
         self.scaler = NOPScaler(dim=1, keepdim=True)
     self.lagged_rnn = LaggedLSTM(
         input_size=1,  # TODO fix
         features_size=self._number_of_features,
         num_layers=num_layers,
         hidden_size=hidden_size,
         dropout_rate=dropout_rate,
         lags_seq=[lag - 1 for lag in self.lags_seq],
     )
示例#2
0
    def __init__(
        self,
        freq: str,
        prediction_length: int,
        lag_indices: Optional[List[int]] = None,
        time_features: Optional[List[TimeFeature]] = None,
        scaling: Callable[[pd.Series], Tuple[pd.Series,
                                             float]] = mean_abs_scaling,
        batch_size: Optional[int] = 32,
        disable_auto_regression: bool = False,
        last_k_for_val: Optional[int] = None,
        quantiles_to_predict: Optional[List[float]] = None,
        eval_metric: str = "mean_absolute_error",
        **kwargs,
    ) -> None:
        super().__init__()

        self.freq = freq
        self.prediction_length = prediction_length
        self.lag_indices = (lag_indices if lag_indices is not None else
                            get_lags_for_frequency(self.freq))
        self.time_features = (time_features if time_features is not None else
                              time_features_from_frequency_str(self.freq))
        self.batch_size = batch_size
        self.disable_auto_regression = disable_auto_regression
        self.scaling = scaling
        self.last_k_for_val = last_k_for_val
        self.eval_metric = eval_metric
        self.quantiles_to_predict = quantiles_to_predict

        if self.disable_auto_regression:
            self.lag_indices = [
                lag_idx for lag_idx in self.lag_indices
                if lag_idx >= self.prediction_length
            ]

        default_kwargs = {
            "time_limit":
            60,
            # "excluded_model_types": ["KNN", "XT", "RF"],
            "presets": [
                "high_quality_fast_inference_only_refit",
                "optimize_for_deployment",
            ],
            "auto_stack":
            True,
        }
        self.kwargs = {**default_kwargs, **kwargs}
示例#3
0
    def __init__(
        self,
        freq: str,
        prediction_length: int,
        trainer: Trainer = Trainer(),
        context_length: Optional[int] = None,
        num_layers: int = 2,
        num_cells: int = 40,
        cell_type: str = "lstm",
        dropoutcell_type: str = "ZoneoutCell",
        dropout_rate: float = 0.1,
        use_feat_dynamic_real: bool = False,
        use_feat_static_cat: bool = False,
        use_feat_static_real: bool = False,
        cardinality: Optional[List[int]] = None,
        embedding_dimension: Optional[List[int]] = None,
        distr_output: DistributionOutput = StudentTOutput(),
        scaling: bool = True,
        lags_seq: Optional[List[int]] = None,
        time_features: Optional[List[TimeFeature]] = None,
        num_parallel_samples: int = 100,
        imputation_method: Optional[MissingValueImputation] = None,
        train_sampler: Optional[InstanceSampler] = None,
        validation_sampler: Optional[InstanceSampler] = None,
        dtype: DType = np.float32,
        alpha: float = 0.0,
        beta: float = 0.0,
        batch_size: int = 32,
        default_scale: Optional[float] = None,
        minimum_scale: float = 1e-10,
        impute_missing_values: bool = False,
        num_imputation_samples: int = 1,
    ) -> None:
        super().__init__(trainer=trainer, batch_size=batch_size, dtype=dtype)

        assert (prediction_length >
                0), "The value of `prediction_length` should be > 0"
        assert (context_length is None or context_length > 0
                ), "The value of `context_length` should be > 0"
        assert num_layers > 0, "The value of `num_layers` should be > 0"
        assert num_cells > 0, "The value of `num_cells` should be > 0"
        supported_dropoutcell_types = [
            "ZoneoutCell",
            "RNNZoneoutCell",
            "VariationalDropoutCell",
            "VariationalZoneoutCell",
        ]
        assert (
            dropoutcell_type in supported_dropoutcell_types
        ), f"`dropoutcell_type` should be one of {supported_dropoutcell_types}"
        assert dropout_rate >= 0, "The value of `dropout_rate` should be >= 0"
        assert (cardinality and use_feat_static_cat) or (
            not (cardinality or use_feat_static_cat)
        ), "You should set `cardinality` if and only if `use_feat_static_cat=True`"
        assert cardinality is None or all(
            [c > 0
             for c in cardinality]), "Elements of `cardinality` should be > 0"
        assert embedding_dimension is None or all([
            e > 0 for e in embedding_dimension
        ]), "Elements of `embedding_dimension` should be > 0"
        assert (num_parallel_samples >
                0), "The value of `num_parallel_samples` should be > 0"
        assert alpha >= 0, "The value of `alpha` should be >= 0"
        assert beta >= 0, "The value of `beta` should be >= 0"

        self.freq = freq
        self.context_length = (context_length if context_length is not None
                               else prediction_length)
        self.prediction_length = prediction_length
        self.distr_output = distr_output
        self.distr_output.dtype = dtype
        self.num_layers = num_layers
        self.num_cells = num_cells
        self.cell_type = cell_type
        self.dropoutcell_type = dropoutcell_type
        self.dropout_rate = dropout_rate
        self.use_feat_dynamic_real = use_feat_dynamic_real
        self.use_feat_static_cat = use_feat_static_cat
        self.use_feat_static_real = use_feat_static_real
        self.cardinality = (cardinality
                            if cardinality and use_feat_static_cat else [1])
        self.embedding_dimension = (
            embedding_dimension if embedding_dimension is not None else
            [min(50, (cat + 1) // 2) for cat in self.cardinality])
        self.scaling = scaling
        self.lags_seq = (lags_seq if lags_seq is not None else
                         get_lags_for_frequency(freq_str=freq))
        self.time_features = (time_features if time_features is not None else
                              time_features_from_frequency_str(self.freq))

        self.history_length = self.context_length + max(self.lags_seq)

        self.num_parallel_samples = num_parallel_samples

        self.imputation_method = (imputation_method if imputation_method
                                  is not None else DummyValueImputation(
                                      self.distr_output.value_in_support))

        self.train_sampler = (train_sampler if train_sampler is not None else
                              ExpectedNumInstanceSampler(
                                  num_instances=1.0,
                                  min_future=prediction_length))
        self.validation_sampler = (validation_sampler if validation_sampler
                                   is not None else ValidationSplitSampler(
                                       min_future=prediction_length))

        self.alpha = alpha
        self.beta = beta
        self.num_imputation_samples = num_imputation_samples
        self.default_scale = default_scale
        self.minimum_scale = minimum_scale
        self.impute_missing_values = impute_missing_values
示例#4
0
    def __init__(
        self,
        freq: str,
        prediction_length: int,
        context_length: Optional[int] = None,
        trainer: Trainer = Trainer(),
        dropout_rate: float = 0.1,
        cardinality: Optional[List[int]] = None,
        embedding_dimension: int = 20,
        distr_output: DistributionOutput = StudentTOutput(),
        model_dim: int = 32,
        inner_ff_dim_scale: int = 4,
        pre_seq: str = "dn",
        post_seq: str = "drn",
        act_type: str = "softrelu",
        num_heads: int = 8,
        scaling: bool = True,
        lags_seq: Optional[List[int]] = None,
        time_features: Optional[List[TimeFeature]] = None,
        use_feat_dynamic_real: bool = False,
        use_feat_static_cat: bool = False,
        num_parallel_samples: int = 100,
        train_sampler: Optional[InstanceSampler] = None,
        validation_sampler: Optional[InstanceSampler] = None,
        batch_size: int = 32,
    ) -> None:
        super().__init__(trainer=trainer, batch_size=batch_size)

        assert (prediction_length >
                0), "The value of `prediction_length` should be > 0"
        assert (context_length is None or context_length > 0
                ), "The value of `context_length` should be > 0"
        assert dropout_rate >= 0, "The value of `dropout_rate` should be >= 0"
        assert (cardinality is not None or not use_feat_static_cat
                ), "You must set `cardinality` if `use_feat_static_cat=True`"
        assert cardinality is None or all(
            [c > 0
             for c in cardinality]), "Elements of `cardinality` should be > 0"
        assert (embedding_dimension >
                0), "The value of `embedding_dimension` should be > 0"
        assert (num_parallel_samples >
                0), "The value of `num_parallel_samples` should be > 0"

        self.freq = freq
        self.prediction_length = prediction_length
        self.context_length = (context_length if context_length is not None
                               else prediction_length)
        self.distr_output = distr_output
        self.dropout_rate = dropout_rate
        self.use_feat_dynamic_real = use_feat_dynamic_real
        self.use_feat_static_cat = use_feat_static_cat
        self.cardinality = cardinality if use_feat_static_cat else [1]
        self.embedding_dimension = embedding_dimension
        self.num_parallel_samples = num_parallel_samples
        self.lags_seq = (lags_seq if lags_seq is not None else
                         get_lags_for_frequency(freq_str=freq))
        self.time_features = (time_features if time_features is not None else
                              time_features_from_frequency_str(self.freq))
        self.history_length = self.context_length + max(self.lags_seq)
        self.scaling = scaling

        self.config = {
            "model_dim": model_dim,
            "pre_seq": pre_seq,
            "post_seq": post_seq,
            "dropout_rate": dropout_rate,
            "inner_ff_dim_scale": inner_ff_dim_scale,
            "act_type": act_type,
            "num_heads": num_heads,
        }

        self.encoder = TransformerEncoder(self.context_length,
                                          self.config,
                                          prefix="enc_")
        self.decoder = TransformerDecoder(self.prediction_length,
                                          self.config,
                                          prefix="dec_")
        self.train_sampler = (train_sampler if train_sampler is not None else
                              ExpectedNumInstanceSampler(
                                  num_instances=1.0,
                                  min_future=prediction_length))
        self.validation_sampler = (validation_sampler if validation_sampler
                                   is not None else ValidationSplitSampler(
                                       min_future=prediction_length))
示例#5
0
    def __init__(
        self,
        freq: str,
        prediction_length: int,
        trainer: Optional[Trainer] = Trainer(),
        context_length: Optional[int] = None,
        num_layers: Optional[int] = 2,
        num_cells: Optional[int] = 40,
        cell_type: Optional[str] = "lstm",
        dropout_rate: Optional[float] = 0.1,
        use_feat_dynamic_real: Optional[bool] = False,
        use_feat_static_cat: Optional[bool] = False,
        use_feat_static_real: Optional[bool] = False,
        cardinality: Optional[List[int]] = None,
        embedding_dimension: Optional[List[int]] = None,
        scaling: Optional[bool] = True,
        lags_seq: Optional[List[int]] = None,
        time_features: Optional[List[TimeFeature]] = None,
        num_parallel_samples: Optional[int] = 100,
        forecast_type: Optional[str] = "flat",
        dtype: Optional[DType] = np.float32,
    ) -> None:

        super().__init__(trainer=trainer, dtype=dtype)

        assert prediction_length > 0, "The value of `prediction_length` should be > 0"
        assert (
            context_length is None or context_length > 0
        ), "The value of `context_length` should be > 0"
        assert num_layers > 0, "The value of `num_layers` should be > 0"
        assert num_cells > 0, "The value of `num_cells` should be > 0"
        assert dropout_rate >= 0, "The value of `dropout_rate` should be >= 0"
        assert (cardinality is not None and use_feat_static_cat) or (
            cardinality is None and not use_feat_static_cat
        ), "You should set `cardinality` if and only if `use_feat_static_cat=True`"
        assert cardinality is None or all(
            [c > 0 for c in cardinality]
        ), "Elements of `cardinality` should be > 0"
        assert embedding_dimension is None or all(
            [e > 0 for e in embedding_dimension]
        ), "Elements of `embedding_dimension` should be > 0"
        assert (
            num_parallel_samples > 0
        ), "The value of `num_parallel_samples` should be > 0"

        self.freq = freq
        self.context_length = (
            context_length if context_length is not None else prediction_length
        )
        self.prediction_length = prediction_length
        self.distr_output_m = NegativeBinomialOutput()
        self.distr_output_q = NegativeBinomialOutput()
        self.distr_output_m.dtype = dtype
        self.distr_output_q.dtype = dtype
        self.num_layers = num_layers
        self.num_cells = num_cells
        self.cell_type = cell_type
        self.dropout_rate = dropout_rate
        self.use_feat_dynamic_real = use_feat_dynamic_real
        self.use_feat_static_cat = use_feat_static_cat
        self.use_feat_static_real = use_feat_static_real
        self.cardinality = cardinality if cardinality and use_feat_static_cat else [1]
        self.embedding_dimension = (
            embedding_dimension
            if embedding_dimension is not None
            else [min(50, (cat + 1) // 2) for cat in self.cardinality]
        )
        self.scaling = scaling
        self.lags_seq = (
            lags_seq if lags_seq is not None else get_lags_for_frequency(freq_str=freq)
        )
        self.time_features = (
            time_features
            if time_features is not None
            else time_features_from_frequency_str(self.freq)
        )

        self.history_length = self.context_length + max(self.lags_seq)

        self.num_parallel_samples = num_parallel_samples
        self.forecast_type = forecast_type
    def __init__(
        self,
        freq: str,
        prediction_length: int,
        input_size: int,
        trainer: Trainer = Trainer(),
        context_length: Optional[int] = None,
        num_layers: int = 2,
        num_cells: int = 40,
        cell_type: str = "LSTM",
        dropout_rate: float = 0.1,
        use_feat_dynamic_real: bool = False,
        use_feat_dynamic_cat: bool = False,
        use_feat_static_cat: bool = False,
        use_feat_static_real: bool = False,
        cardinality: Optional[List[int]] = None,
        embedding_dimension: Optional[List[int]] = None,
        distr_output: DistributionOutput = StudentTOutput(),
        scaling: bool = True,
        lags_seq: Optional[List[int]] = None,
        time_features: Optional[List[TimeFeature]] = None,
        num_parallel_samples: int = 100,
        dtype: np.dtype = np.float32,
    ) -> None:
        super().__init__(trainer=trainer)

        self.freq = freq
        self.context_length = (context_length if context_length is not None
                               else prediction_length)
        self.prediction_length = prediction_length
        self.distr_output = distr_output
        self.distr_output.dtype = dtype
        self.input_size = input_size
        self.num_layers = num_layers
        self.num_cells = num_cells
        self.cell_type = cell_type
        self.dropout_rate = dropout_rate
        self.use_feat_dynamic_real = use_feat_dynamic_real
        self.use_feat_dynamic_cat = use_feat_dynamic_cat
        self.use_feat_static_cat = use_feat_static_cat
        self.use_feat_static_real = use_feat_static_real
        self.cardinality = cardinality if cardinality and use_feat_static_cat else [
            1
        ]
        self.embedding_dimension = (
            embedding_dimension if embedding_dimension is not None else
            [min(50, (cat + 1) // 2) for cat in self.cardinality])
        self.scaling = scaling
        self.lags_seq = (lags_seq if lags_seq is not None else
                         get_lags_for_frequency(freq_str=freq))
        self.time_features = (time_features if time_features is not None else
                              time_features_from_frequency_str(self.freq))

        self.history_length = self.context_length + max(self.lags_seq)

        self.num_parallel_samples = num_parallel_samples

        self.train_sampler = ExpectedNumInstanceSampler(
            num_instances=1.0, min_future=prediction_length)
        self.validation_sampler = ValidationSplitSampler(
            min_future=prediction_length)
示例#7
0
    def __init__(
        self,
        freq: str,
        prediction_length: int,
        trainer: Trainer = Trainer(),
        context_length: Optional[int] = None,
        num_layers: int = 2,
        num_cells: int = 40,
        cell_type: str = "lstm",
        dropout_rate: float = 0.1,
        use_feat_dynamic_real: bool = False,
        use_feat_static_cat: bool = False,
        use_feat_static_real: bool = False,
        cardinality: Optional[List[int]] = None,
        embedding_dimension: Optional[List[int]] = None,
        distr_output: DistributionOutput = StudentTOutput(),
        scaling: bool = True,
        lags_seq: Optional[List[int]] = None,
        time_features: Optional[List[TimeFeature]] = None,
        num_parallel_samples: int = 100,
        imputation_method: Optional[MissingValueImputation] = None,
        dtype: DType = np.float32,
    ) -> None:
        super().__init__(trainer=trainer, dtype=dtype)

        assert (
            prediction_length > 0
        ), "The value of `prediction_length` should be > 0"
        assert (
            context_length is None or context_length > 0
        ), "The value of `context_length` should be > 0"
        assert num_layers > 0, "The value of `num_layers` should be > 0"
        assert num_cells > 0, "The value of `num_cells` should be > 0"
        assert dropout_rate >= 0, "The value of `dropout_rate` should be >= 0"
        assert (cardinality and use_feat_static_cat) or (
            not (cardinality or use_feat_static_cat)
        ), "You should set `cardinality` if and only if `use_feat_static_cat=True`"
        assert cardinality is None or all(
            [c > 0 for c in cardinality]
        ), "Elements of `cardinality` should be > 0"
        assert embedding_dimension is None or all(
            [e > 0 for e in embedding_dimension]
        ), "Elements of `embedding_dimension` should be > 0"
        assert (
            num_parallel_samples > 0
        ), "The value of `num_parallel_samples` should be > 0"

        self.freq = freq
        self.context_length = (
            context_length if context_length is not None else prediction_length
        )
        self.prediction_length = prediction_length
        self.distr_output = distr_output
        self.distr_output.dtype = dtype
        self.num_layers = num_layers
        self.num_cells = num_cells
        self.cell_type = cell_type
        self.dropout_rate = dropout_rate
        self.use_feat_dynamic_real = use_feat_dynamic_real
        self.use_feat_static_cat = use_feat_static_cat
        self.use_feat_static_real = use_feat_static_real
        self.cardinality = (
            cardinality if cardinality and use_feat_static_cat else [1]
        )
        self.embedding_dimension = (
            embedding_dimension
            if embedding_dimension is not None
            else [min(50, (cat + 1) // 2) for cat in self.cardinality]
        )
        self.scaling = scaling
        self.lags_seq = (
            lags_seq
            if lags_seq is not None
            else get_lags_for_frequency(freq_str=freq)
        )
        self.time_features = (
            time_features
            if time_features is not None
            else time_features_from_frequency_str(self.freq)
        )

        self.history_length = self.context_length + max(self.lags_seq)

        self.num_parallel_samples = num_parallel_samples

        self.imputation_method = (
            imputation_method
            if imputation_method is not None
            else DummyValueImputation(self.distr_output.value_in_support)
        )
示例#8
0
    def __init__(
        self,
        freq: str,
        prediction_length: int,
        trainer: Trainer = Trainer(),
        context_length: Optional[int] = None,
        num_layers: int = 2,
        num_cells: int = 40,
        cell_type: str = "lstm",
        dropout_rate: float = 0.1,
        use_feat_dynamic_real: bool = False,
        use_feat_static_cat: bool = False,
        use_feat_static_real: bool = False,
        cardinality: Optional[List[int]] = None,
        embedding_dimension: int = 20,
        distr_output: DistributionOutput = StudentTOutput(),
        scaling: bool = True,
        lags_seq: Optional[List[int]] = None,
        time_features: Optional[List[TimeFeature]] = None,
        num_parallel_samples: int = 100,
    ) -> None:
        super().__init__(trainer=trainer)

        assert (prediction_length >
                0), "The value of `prediction_length` should be > 0"
        assert (context_length is None or context_length > 0
                ), "The value of `context_length` should be > 0"
        assert num_layers > 0, "The value of `num_layers` should be > 0"
        assert num_cells > 0, "The value of `num_cells` should be > 0"
        assert dropout_rate >= 0, "The value of `dropout_rate` should be >= 0"
        assert (cardinality is not None and use_feat_static_cat) or (
            cardinality is None and not use_feat_static_cat
        ), "You should set `cardinality` if and only if `use_feat_static_cat=True`"
        assert cardinality is None or [
            c > 0 for c in cardinality
        ], "Elements of `cardinality` should be > 0"
        assert (embedding_dimension >
                0), "The value of `embedding_dimension` should be > 0"
        assert (num_parallel_samples >
                0), "The value of `num_parallel_samples` should be > 0"

        self.freq = freq
        self.context_length = (context_length if context_length is not None
                               else prediction_length)
        self.prediction_length = prediction_length
        self.distr_output = distr_output
        self.num_layers = num_layers
        self.num_cells = num_cells
        self.cell_type = cell_type
        self.dropout_rate = dropout_rate
        self.use_feat_dynamic_real = use_feat_dynamic_real
        self.use_feat_static_cat = use_feat_static_cat
        self.use_feat_static_real = use_feat_static_real
        self.cardinality = cardinality if use_feat_static_cat else [1]
        self.embedding_dimension = embedding_dimension
        self.scaling = scaling
        self.lags_seq = (lags_seq if lags_seq is not None else
                         get_lags_for_frequency(freq_str=freq))
        self.time_features = (time_features if time_features is not None else
                              time_features_from_frequency_str(self.freq))

        self.history_length = self.context_length + max(self.lags_seq)

        self.num_parallel_samples = num_parallel_samples