def __init__( self, freq: str, prediction_length: int, trainer: Trainer = Trainer(), num_hidden_dimensions: Optional[List[int]] = None, context_length: Optional[int] = None, distr_output: DistributionOutput = StudentTOutput(), batch_normalization: bool = False, mean_scaling: bool = True, num_parallel_samples: int = 100, ) -> None: """ Defines an estimator. All parameters should be serializable. """ super().__init__(trainer=trainer) self.num_hidden_dimensions = (num_hidden_dimensions if num_hidden_dimensions is not None else list([40, 40])) self.prediction_length = prediction_length self.context_length = (context_length if context_length is not None else prediction_length) self.freq = freq self.distr_output = distr_output self.batch_normalization = batch_normalization self.mean_scaling = mean_scaling self.num_parallel_samples = num_parallel_samples self.train_sampler = ExpectedNumInstanceSampler( num_instances=1, min_future=prediction_length) self.validation_sampler = ValidationSplitSampler( min_future=prediction_length)
def __init__( self, freq: str, prediction_length: int, context_length: Optional[int] = None, dropout_rate: float = 0.1, embed_dim: int = 32, num_heads: int = 4, num_outputs: int = 3, variable_dim: Optional[int] = None, time_features: List[TimeFeature] = [], static_cardinalities: Dict[str, int] = {}, dynamic_cardinalities: Dict[str, int] = {}, static_feature_dims: Dict[str, int] = {}, dynamic_feature_dims: Dict[str, int] = {}, past_dynamic_features: List[str] = [], trainer: Trainer = Trainer(), ) -> None: super().__init__(trainer=trainer) self.freq = freq self.prediction_length = prediction_length self.context_length = context_length or prediction_length # MultiheadAttention self.dropout_rate = dropout_rate self.embed_dim = embed_dim self.num_heads = num_heads self.num_outputs = num_outputs self.variable_dim = variable_dim or embed_dim if not time_features: self.time_features = time_features_from_frequency_str(self.freq) else: self.time_features = time_features self.static_cardinalities = static_cardinalities self.dynamic_cardinalities = dynamic_cardinalities self.static_feature_dims = static_feature_dims self.dynamic_feature_dims = dynamic_feature_dims self.past_dynamic_features = past_dynamic_features self.past_dynamic_cardinalities = {} self.past_dynamic_feature_dims = {} for name in self.past_dynamic_features: if name in self.dynamic_cardinalities: self.past_dynamic_cardinalities[ name] = self.dynamic_cardinalities.pop(name) elif name in self.dynamic_feature_dims: self.past_dynamic_feature_dims[ name] = self.dynamic_feature_dims.pop(name) else: raise ValueError( f"Feature name {name} is not provided in feature dicts") self.train_sampler = ExpectedNumInstanceSampler( num_instances=1.0, min_future=prediction_length) self.validation_sampler = ValidationSplitSampler( min_future=prediction_length)
def __init__( self, freq: str, prediction_length: int, cardinality: List[int], embedding_dimension: int, encoder: Seq2SeqEncoder, decoder_mlp_layer: List[int], decoder_mlp_static_dim: int, scaler: Scaler = NOPScaler(), context_length: Optional[int] = None, quantiles: Optional[List[float]] = None, trainer: Trainer = Trainer(), train_sampler: Optional[InstanceSampler] = None, validation_sampler: Optional[InstanceSampler] = None, num_parallel_samples: int = 100, batch_size: int = 32, ) -> None: assert ( prediction_length > 0 ), "The value of `prediction_length` should be > 0" assert ( context_length is None or context_length > 0 ), "The value of `context_length` should be > 0" assert quantiles is None or all( 0 <= d <= 1 for d in quantiles ), "Elements of `quantiles` should be >= 0 and <= 1" super().__init__(trainer=trainer, batch_size=batch_size) self.context_length = ( context_length if context_length is not None else prediction_length ) self.prediction_length = prediction_length self.freq = freq self.quantiles = ( quantiles if quantiles is not None else [0.1, 0.5, 0.9] ) self.encoder = encoder self.decoder_mlp_layer = decoder_mlp_layer self.decoder_mlp_static_dim = decoder_mlp_static_dim self.scaler = scaler self.embedder = FeatureEmbedder( cardinalities=cardinality, embedding_dims=[embedding_dimension for _ in cardinality], ) self.train_sampler = ( train_sampler if train_sampler is not None else ExpectedNumInstanceSampler( num_instances=1.0, min_future=prediction_length ) ) self.validation_sampler = ( validation_sampler if validation_sampler is not None else ValidationSplitSampler(min_future=prediction_length) ) self.num_parallel_samples = num_parallel_samples
def __init__( self, input_size: int, freq: str, prediction_length: int, context_length: Optional[int] = None, trainer: Trainer = Trainer(), dropout_rate: float = 0.1, cardinality: Optional[List[int]] = None, embedding_dimension: List[int] = [20], distr_output: DistributionOutput = StudentTOutput(), d_model: int = 32, dim_feedforward_scale: int = 4, act_type: str = "gelu", num_heads: int = 8, num_encoder_layers: int = 3, num_decoder_layers: int = 3, scaling: bool = True, lags_seq: Optional[List[int]] = None, time_features: Optional[List[TimeFeature]] = None, use_feat_dynamic_real: bool = False, use_feat_static_cat: bool = False, use_feat_static_real: bool = False, num_parallel_samples: int = 100, ) -> None: super().__init__(trainer=trainer) self.input_size = input_size self.freq = freq self.prediction_length = prediction_length self.context_length = (context_length if context_length is not None else prediction_length) self.distr_output = distr_output self.dropout_rate = dropout_rate self.use_feat_dynamic_real = use_feat_dynamic_real self.use_feat_static_cat = use_feat_static_cat self.use_feat_static_real = use_feat_static_real self.cardinality = cardinality if use_feat_static_cat else [1] self.embedding_dimension = embedding_dimension self.num_parallel_samples = num_parallel_samples self.lags_seq = (lags_seq if lags_seq is not None else lags_for_fourier_time_features_from_frequency( freq_str=freq)) self.time_features = (time_features if time_features is not None else fourier_time_features_from_frequency(self.freq)) self.history_length = self.context_length + max(self.lags_seq) self.scaling = scaling self.d_model = d_model self.num_heads = num_heads self.act_type = act_type self.dim_feedforward_scale = dim_feedforward_scale self.num_encoder_layers = num_encoder_layers self.num_decoder_layers = num_decoder_layers self.train_sampler = ExpectedNumInstanceSampler( num_instances=1.0, min_future=prediction_length) self.validation_sampler = ValidationSplitSampler( min_future=prediction_length)
def __init__( self, freq: str, prediction_length: int, context_length: Optional[int] = None, num_layers: int = 2, hidden_size: int = 40, dropout_rate: float = 0.1, num_feat_dynamic_real: int = 0, num_feat_static_cat: int = 0, num_feat_static_real: int = 0, cardinality: Optional[List[int]] = None, embedding_dimension: Optional[List[int]] = None, distr_output: DistributionOutput = StudentTOutput(), loss: DistributionLoss = NegativeLogLikelihood(), scaling: bool = True, lags_seq: Optional[List[int]] = None, time_features: Optional[List[TimeFeature]] = None, num_parallel_samples: int = 100, batch_size: int = 32, num_batches_per_epoch: int = 50, trainer_kwargs: Optional[Dict[str, Any]] = dict(), ) -> None: trainer_kwargs = { "max_epochs": 100, "gradient_clip_val": 10.0, **trainer_kwargs, } super().__init__(trainer_kwargs=trainer_kwargs) self.freq = freq self.context_length = (context_length if context_length is not None else prediction_length) self.prediction_length = prediction_length self.distr_output = distr_output self.loss = loss self.num_layers = num_layers self.hidden_size = hidden_size self.dropout_rate = dropout_rate self.num_feat_dynamic_real = num_feat_dynamic_real self.num_feat_static_cat = num_feat_static_cat self.num_feat_static_real = num_feat_static_real self.cardinality = (cardinality if cardinality and num_feat_static_cat > 0 else [1]) self.embedding_dimension = embedding_dimension self.scaling = scaling self.lags_seq = lags_seq self.time_features = (time_features if time_features is not None else time_features_from_frequency_str(self.freq)) self.num_parallel_samples = num_parallel_samples self.batch_size = batch_size self.num_batches_per_epoch = num_batches_per_epoch self.train_sampler = ExpectedNumInstanceSampler( num_instances=1.0, min_future=prediction_length) self.validation_sampler = ValidationSplitSampler( min_future=prediction_length)
def __init__( self, freq: str, prediction_length: int, context_length: int, num_series: int, skip_size: int, ar_window: int, channels: int, lead_time: int = 0, kernel_size: int = 6, trainer: Trainer = Trainer(), dropout_rate: Optional[float] = 0.2, output_activation: Optional[str] = None, rnn_cell_type: str = "gru", rnn_num_cells: int = 100, rnn_num_layers: int = 3, skip_rnn_cell_type: str = "gru", skip_rnn_num_layers: int = 1, skip_rnn_num_cells: int = 10, scaling: bool = True, train_sampler: Optional[InstanceSampler] = None, validation_sampler: Optional[InstanceSampler] = None, batch_size: int = 32, dtype: DType = np.float32, ) -> None: super().__init__( trainer=trainer, lead_time=lead_time, batch_size=batch_size, dtype=dtype, ) self.freq = freq self.num_series = num_series self.skip_size = skip_size self.ar_window = ar_window self.prediction_length = prediction_length self.context_length = context_length self.channels = channels self.kernel_size = kernel_size self.dropout_rate = dropout_rate self.output_activation = output_activation self.rnn_cell_type = rnn_cell_type self.rnn_num_layers = rnn_num_layers self.rnn_num_cells = rnn_num_cells self.skip_rnn_cell_type = skip_rnn_cell_type self.skip_rnn_num_layers = skip_rnn_num_layers self.skip_rnn_num_cells = skip_rnn_num_cells self.scaling = scaling self.train_sampler = (train_sampler if train_sampler is not None else ExpectedNumInstanceSampler( num_instances=1.0, min_future=prediction_length + lead_time)) self.validation_sampler = (validation_sampler if validation_sampler is not None else ValidationSplitSampler( min_future=prediction_length + lead_time)) self.dtype = dtype
def __init__( self, freq: str, prediction_length: int, cardinalities: List[int] = [], context_length: Optional[int] = None, trainer: Trainer = Trainer(), model_dim: int = 64, ffn_dim_multiplier: int = 2, num_heads: int = 4, num_layers: int = 3, num_outputs: int = 3, kernel_sizes: List[int] = [3, 5, 7, 9], distance_encoding: Optional[str] = "dot", pre_layer_norm: bool = False, dropout: float = 0.1, temperature: float = 1.0, time_features: Optional[List[TimeFeature]] = None, use_feat_dynamic_real: bool = True, use_feat_dynamic_cat: bool = False, use_feat_static_real: bool = False, use_feat_static_cat: bool = True, train_sampler: Optional[InstanceSampler] = None, validation_sampler: Optional[InstanceSampler] = None, batch_size: int = 32, ): super().__init__(trainer=trainer, batch_size=batch_size) self.freq = freq self.prediction_length = prediction_length self.context_length = context_length or prediction_length self.model_dim = model_dim self.ffn_dim_multiplier = ffn_dim_multiplier self.num_heads = num_heads self.num_layers = num_layers self.num_outputs = num_outputs self.cardinalities = cardinalities self.kernel_sizes = kernel_sizes self.distance_encoding = distance_encoding self.pre_layer_norm = pre_layer_norm self.dropout = dropout self.temperature = temperature self.time_features = time_features or time_features_from_frequency_str( self.freq) self.use_feat_dynamic_cat = use_feat_dynamic_cat self.use_feat_dynamic_real = use_feat_dynamic_real self.use_feat_static_cat = use_feat_static_cat self.use_feat_static_real = use_feat_static_real self.train_sampler = (train_sampler if train_sampler is not None else ExpectedNumInstanceSampler( num_instances=1.0, min_future=prediction_length)) self.validation_sampler = (validation_sampler if validation_sampler is not None else ValidationSplitSampler( min_future=prediction_length))
def __init__( self, freq: str, prediction_length: int, sampling: bool = True, trainer: Trainer = Trainer(), num_hidden_dimensions: Optional[List[int]] = None, context_length: Optional[int] = None, distr_output: DistributionOutput = StudentTOutput(), imputation_method: Optional[MissingValueImputation] = None, batch_normalization: bool = False, mean_scaling: bool = True, num_parallel_samples: int = 100, train_sampler: Optional[InstanceSampler] = None, validation_sampler: Optional[InstanceSampler] = None, batch_size: int = 32, ) -> None: """ Defines an estimator. All parameters should be serializable. """ super().__init__(trainer=trainer, batch_size=batch_size) assert (prediction_length > 0), "The value of `prediction_length` should be > 0" assert (context_length is None or context_length > 0 ), "The value of `context_length` should be > 0" assert num_hidden_dimensions is None or ([ d > 0 for d in num_hidden_dimensions ]), "Elements of `num_hidden_dimensions` should be > 0" assert (num_parallel_samples > 0), "The value of `num_parallel_samples` should be > 0" self.num_hidden_dimensions = (num_hidden_dimensions if num_hidden_dimensions is not None else list([40, 40])) self.prediction_length = prediction_length self.context_length = (context_length if context_length is not None else prediction_length) self.freq = freq self.distr_output = distr_output self.batch_normalization = batch_normalization self.mean_scaling = mean_scaling self.num_parallel_samples = num_parallel_samples self.sampling = sampling self.imputation_method = (imputation_method if imputation_method is not None else DummyValueImputation( self.distr_output.value_in_support)) self.train_sampler = (train_sampler if train_sampler is not None else ExpectedNumInstanceSampler( num_instances=1.0, min_future=prediction_length)) self.validation_sampler = (validation_sampler if validation_sampler is not None else ValidationSplitSampler( min_future=prediction_length))
def _create_instance_splitter(self, mode: str): assert mode in ["training", "validation", "test"] instance_sampler = { "training": ValidationSplitSampler(min_future=self.prediction_length), "validation": ValidationSplitSampler(min_future=self.prediction_length), "test": TestSplitSampler(), }[mode] return InstanceSplitter( target_field=FieldName.TARGET, is_pad_field=FieldName.IS_PAD, start_field=FieldName.START, forecast_start_field=FieldName.FORECAST_START, instance_sampler=instance_sampler, time_series_fields=[FieldName.FEAT_TIME], past_length=self.context_length, future_length=self.prediction_length, )
def __init__( self, freq: str, prediction_length: int, context_length: int, num_cells: int, num_layers: int, dropout_rate: float = 0.1, interval_distr_output: DistributionOutput = NegativeBinomialOutput(), size_distr_output: DistributionOutput = NegativeBinomialOutput(), train_sampler: Optional[InstanceSampler] = None, validation_sampler: Optional[InstanceSampler] = None, trainer: Trainer = Trainer(hybridize=False), batch_size: int = 32, num_parallel_samples: int = 100, **kwargs, ): super().__init__(trainer=trainer, batch_size=batch_size, **kwargs) assert ( prediction_length > 0 ), "The value of `prediction_length` should be > 0" assert ( context_length is None or context_length > 0 ), "The value of `context_length` should be > 0" assert dropout_rate >= 0, "The value of `dropout_rate` should be >= 0" self.freq = freq self.context_length = context_length self.prediction_length = prediction_length self.num_cells = num_cells self.num_layers = num_layers self.dropout_rate = dropout_rate self.interval_distr_output = interval_distr_output self.size_distr_output = size_distr_output self.num_parallel_samples = num_parallel_samples self.train_sampler = ( train_sampler if train_sampler is not None else ExpectedNumInstanceSampler( num_instances=5, min_future=prediction_length ) ) self.validation_sampler = ( validation_sampler if validation_sampler is not None else ValidationSplitSampler(min_future=prediction_length) )
def __init__( self, freq: str, prediction_length: Optional[int], context_length: int, num_series: int, ar_window: int = 24, skip_size: int = 24, channels: int = 100, kernel_size: int = 6, horizon: Optional[int] = None, trainer: Trainer = Trainer(), dropout_rate: Optional[float] = 0.2, output_activation: Optional[str] = None, rnn_cell_type: str = "GRU", rnn_num_cells: int = 100, skip_rnn_cell_type: str = "GRU", skip_rnn_num_cells: int = 5, scaling: bool = True, dtype: np.dtype = np.float32, ): super().__init__(trainer, dtype=dtype) self.freq = freq self.num_series = num_series self.skip_size = skip_size self.ar_window = ar_window self.horizon = horizon self.prediction_length = prediction_length self.future_length = horizon if horizon is not None else prediction_length self.context_length = context_length self.channels = channels self.kernel_size = kernel_size self.dropout_rate = dropout_rate self.output_activation = output_activation self.rnn_cell_type = rnn_cell_type self.rnn_num_cells = rnn_num_cells self.skip_rnn_cell_type = skip_rnn_cell_type self.skip_rnn_num_cells = skip_rnn_num_cells self.scaling = scaling self.train_sampler = ExpectedNumInstanceSampler( num_instances=1.0, min_future=self.future_length ) self.validation_sampler = ValidationSplitSampler(min_future=self.future_length) self.dtype = dtype
def test_forking_sequence_splitter() -> None: len_ts = 20 ds = make_dataset(1, len_ts) enc_len = 5 dec_len = 3 trans = transform.Chain( [ transform.AddAgeFeature( target_field=FieldName.TARGET, output_field="age", pred_length=dec_len, ), ForkingSequenceSplitter( instance_sampler=ValidationSplitSampler(min_future=dec_len), enc_len=enc_len, dec_len=dec_len, encoder_series_fields=["age"], ), ] ) out = trans(ds, is_train=True) transformed_data = next(iter(out)) future_target = np.array( [ [13.0, 14.0, 15.0], [14.0, 15.0, 16.0], [15.0, 16.0, 17.0], [16.0, 17.0, 18.0], [17.0, 18.0, 19.0], ] ) assert ( np.linalg.norm(future_target - transformed_data["future_target"]) < 1e-5 ), "the forking sequence target should be computed correctly." age = np.log10(2.0 + np.arange(len_ts)) assert ( np.linalg.norm( age[-(enc_len + dec_len) : -dec_len] - transformed_data["past_age"].flatten() ) < 1e-5 ), "the forking sequence past feature should be computed correctly."
def __init__( self, freq: str, prediction_length: int, context_length: Optional[int] = None, hidden_dimensions: Optional[List[int]] = None, distr_output: DistributionOutput = StudentTOutput(), loss: DistributionLoss = NegativeLogLikelihood(), batch_norm: bool = False, batch_size: int = 32, num_batches_per_epoch: int = 50, trainer_kwargs: Optional[Dict[str, Any]] = None, train_sampler: Optional[InstanceSampler] = None, validation_sampler: Optional[InstanceSampler] = None, ) -> None: default_trainer_kwargs = { "max_epochs": 100, "gradient_clip_val": 10.0, } if trainer_kwargs is not None: default_trainer_kwargs.update(trainer_kwargs) super().__init__(trainer_kwargs=default_trainer_kwargs) self.freq = freq self.prediction_length = prediction_length self.context_length = context_length or 10 * prediction_length # TODO find way to enforce same defaults to network and estimator # somehow self.hidden_dimensions = hidden_dimensions or [20, 20] self.distr_output = distr_output self.loss = loss self.batch_norm = batch_norm self.batch_size = batch_size self.num_batches_per_epoch = num_batches_per_epoch self.train_sampler = train_sampler or ExpectedNumInstanceSampler( num_instances=1.0, min_future=prediction_length) self.validation_sampler = validation_sampler or ValidationSplitSampler( min_future=prediction_length)
def __init__( self, freq: str, prediction_length: int, context_length: Optional[int] = None, trainer: Trainer = Trainer(), dropout_rate: float = 0.1, cardinality: Optional[List[int]] = None, embedding_dimension: int = 20, distr_output: DistributionOutput = StudentTOutput(), model_dim: int = 32, inner_ff_dim_scale: int = 4, pre_seq: str = "dn", post_seq: str = "drn", act_type: str = "softrelu", num_heads: int = 8, scaling: bool = True, lags_seq: Optional[List[int]] = None, time_features: Optional[List[TimeFeature]] = None, use_feat_dynamic_real: bool = False, use_feat_static_cat: bool = False, num_parallel_samples: int = 100, train_sampler: Optional[InstanceSampler] = None, validation_sampler: Optional[InstanceSampler] = None, batch_size: int = 32, ) -> None: super().__init__(trainer=trainer, batch_size=batch_size) assert (prediction_length > 0), "The value of `prediction_length` should be > 0" assert (context_length is None or context_length > 0 ), "The value of `context_length` should be > 0" assert dropout_rate >= 0, "The value of `dropout_rate` should be >= 0" assert (cardinality is not None or not use_feat_static_cat ), "You must set `cardinality` if `use_feat_static_cat=True`" assert cardinality is None or all( [c > 0 for c in cardinality]), "Elements of `cardinality` should be > 0" assert (embedding_dimension > 0), "The value of `embedding_dimension` should be > 0" assert (num_parallel_samples > 0), "The value of `num_parallel_samples` should be > 0" self.freq = freq self.prediction_length = prediction_length self.context_length = (context_length if context_length is not None else prediction_length) self.distr_output = distr_output self.dropout_rate = dropout_rate self.use_feat_dynamic_real = use_feat_dynamic_real self.use_feat_static_cat = use_feat_static_cat self.cardinality = cardinality if use_feat_static_cat else [1] self.embedding_dimension = embedding_dimension self.num_parallel_samples = num_parallel_samples self.lags_seq = (lags_seq if lags_seq is not None else get_lags_for_frequency(freq_str=freq)) self.time_features = (time_features if time_features is not None else time_features_from_frequency_str(self.freq)) self.history_length = self.context_length + max(self.lags_seq) self.scaling = scaling self.config = { "model_dim": model_dim, "pre_seq": pre_seq, "post_seq": post_seq, "dropout_rate": dropout_rate, "inner_ff_dim_scale": inner_ff_dim_scale, "act_type": act_type, "num_heads": num_heads, } self.encoder = TransformerEncoder(self.context_length, self.config, prefix="enc_") self.decoder = TransformerDecoder(self.prediction_length, self.config, prefix="dec_") self.train_sampler = (train_sampler if train_sampler is not None else ExpectedNumInstanceSampler( num_instances=1.0, min_future=prediction_length)) self.validation_sampler = (validation_sampler if validation_sampler is not None else ValidationSplitSampler( min_future=prediction_length))
def __init__( self, freq: str, prediction_length: int, target_dim: int, trainer: Trainer = Trainer(), # number of dimension to sample at training time context_length: Optional[int] = None, num_layers: int = 2, num_cells: int = 40, cell_type: str = "lstm", num_parallel_samples: int = 100, dropout_rate: float = 0.1, target_dim_sample: Optional[int] = None, distr_output: Optional[DistributionOutput] = None, rank: Optional[int] = 2, scaling: bool = True, pick_incomplete: bool = False, lags_seq: Optional[List[int]] = None, shuffle_target_dim: bool = True, time_features: Optional[List[TimeFeature]] = None, conditioning_length: int = 100, use_marginal_transformation: bool = False, train_sampler: Optional[InstanceSampler] = None, validation_sampler: Optional[InstanceSampler] = None, batch_size: int = 32, ) -> None: super().__init__(trainer=trainer, batch_size=batch_size) assert ( prediction_length > 0 ), "The value of `prediction_length` should be > 0" assert ( context_length is None or context_length > 0 ), "The value of `context_length` should be > 0" assert num_layers > 0, "The value of `num_layers` should be > 0" assert num_cells > 0, "The value of `num_cells` should be > 0" assert ( num_parallel_samples > 0 ), "The value of `num_eval_samples` should be > 0" assert dropout_rate >= 0, "The value of `dropout_rate` should be >= 0" if distr_output is not None: self.distr_output = distr_output else: self.distr_output = LowrankGPOutput(rank=rank) self.freq = freq self.context_length = ( context_length if context_length is not None else prediction_length ) self.prediction_length = prediction_length self.target_dim = target_dim self.target_dim_sample = ( target_dim if target_dim_sample is None else min(target_dim_sample, target_dim) ) self.shuffle_target_dim = shuffle_target_dim self.num_layers = num_layers self.num_cells = num_cells self.cell_type = cell_type self.num_parallel_samples = num_parallel_samples self.dropout_rate = dropout_rate self.lags_seq = ( lags_seq if lags_seq is not None else get_lags_for_frequency(freq_str=freq) ) self.time_features = ( time_features if time_features is not None else time_features_from_frequency_str(self.freq) ) self.history_length = self.context_length + max(self.lags_seq) self.pick_incomplete = pick_incomplete self.scaling = scaling self.conditioning_length = conditioning_length self.use_marginal_transformation = use_marginal_transformation self.output_transform = ( cdf_to_gaussian_forward_transform if self.use_marginal_transformation else None ) self.train_sampler = ( train_sampler if train_sampler is not None else ExpectedNumInstanceSampler( num_instances=1.0, min_past=0 if pick_incomplete else self.history_length, min_future=prediction_length, ) ) self.validation_sampler = ( validation_sampler if validation_sampler is not None else ValidationSplitSampler( min_past=0 if pick_incomplete else self.history_length, min_future=prediction_length, ) )
def __init__( self, freq: str, prediction_length: int, context_length: Optional[int] = None, trainer: Trainer = Trainer(), hidden_dim: int = 32, variable_dim: Optional[int] = None, num_heads: int = 4, num_outputs: int = 3, num_instance_per_series: int = 100, dropout_rate: float = 0.1, time_features: List[TimeFeature] = [], static_cardinalities: Dict[str, int] = {}, dynamic_cardinalities: Dict[str, int] = {}, static_feature_dims: Dict[str, int] = {}, dynamic_feature_dims: Dict[str, int] = {}, past_dynamic_features: List[str] = [], train_sampler: Optional[InstanceSampler] = None, validation_sampler: Optional[InstanceSampler] = None, batch_size: int = 32, ) -> None: super(TemporalFusionTransformerEstimator, self).__init__(trainer=trainer, batch_size=batch_size) assert (prediction_length > 0), "The value of `prediction_length` should be > 0" assert (context_length is None or context_length > 0 ), "The value of `context_length` should be > 0" assert dropout_rate >= 0, "The value of `dropout_rate` should be >= 0" self.freq = freq self.prediction_length = prediction_length self.context_length = context_length or prediction_length self.dropout_rate = dropout_rate self.hidden_dim = hidden_dim self.variable_dim = variable_dim or hidden_dim self.num_heads = num_heads self.num_outputs = num_outputs self.num_instance_per_series = num_instance_per_series if not time_features: self.time_features = time_features_from_frequency_str(self.freq) if not self.time_features: # If time features are empty (as for yearly data), we add a # constant feature of 0 self.time_features = [Constant()] else: self.time_features = time_features self.static_cardinalities = static_cardinalities self.dynamic_cardinalities = dynamic_cardinalities self.static_feature_dims = static_feature_dims self.dynamic_feature_dims = dynamic_feature_dims self.past_dynamic_features = past_dynamic_features self.past_dynamic_cardinalities = {} self.past_dynamic_feature_dims = {} for name in self.past_dynamic_features: if name in self.dynamic_cardinalities: self.past_dynamic_cardinalities[ name] = self.dynamic_cardinalities.pop(name) elif name in self.dynamic_feature_dims: self.past_dynamic_feature_dims[ name] = self.dynamic_feature_dims.pop(name) else: raise ValueError( f"Feature name {name} is not provided in feature dicts") self.train_sampler = (train_sampler if train_sampler is not None else ExpectedNumInstanceSampler( num_instances=1.0, min_future=prediction_length)) self.validation_sampler = (validation_sampler if validation_sampler is not None else ValidationSplitSampler( min_future=prediction_length))
def __init__( self, freq: str, prediction_length: int, context_length: Optional[int] = None, trainer: Trainer = Trainer(), num_stacks: int = 30, widths: Optional[List[int]] = None, num_blocks: Optional[List[int]] = None, num_block_layers: Optional[List[int]] = None, expansion_coefficient_lengths: Optional[List[int]] = None, sharing: Optional[List[bool]] = None, stack_types: Optional[List[str]] = None, loss_function: Optional[str] = "MAPE", train_sampler: Optional[InstanceSampler] = None, validation_sampler: Optional[InstanceSampler] = None, batch_size: int = 32, scale: bool = False, **kwargs, ) -> None: super().__init__(trainer=trainer, batch_size=batch_size, **kwargs) assert (prediction_length > 0), "The value of `prediction_length` should be > 0" assert (context_length is None or context_length > 0 ), "The value of `context_length` should be > 0" assert (num_stacks is None or num_stacks > 0), "The value of `num_stacks` should be > 0" assert (loss_function is None or loss_function in VALID_LOSS_FUNCTIONS), ( "The loss function has to be one of the following:" f" {VALID_LOSS_FUNCTIONS}.") self.freq = freq self.scale = scale self.prediction_length = prediction_length self.context_length = (context_length if context_length is not None else 2 * prediction_length) # num_stacks has to be handled separately because other arguments have # to match its length self.num_stacks = num_stacks self.loss_function = loss_function self.widths = self._validate_nbeats_argument( argument_value=widths, argument_name="widths", default_value=[512], validation_condition=lambda val: val > 0, invalidation_message="Values of 'widths' should be > 0", ) self.num_blocks = self._validate_nbeats_argument( argument_value=num_blocks, argument_name="num_blocks", default_value=[1], validation_condition=lambda val: val > 0, invalidation_message="Values of 'num_blocks' should be > 0", ) self.num_block_layers = self._validate_nbeats_argument( argument_value=num_block_layers, argument_name="num_block_layers", default_value=[4], validation_condition=lambda val: val > 0, invalidation_message="Values of 'block_layers' should be > 0", ) self.sharing = self._validate_nbeats_argument( argument_value=sharing, argument_name="sharing", default_value=[False], validation_condition=lambda val: True, invalidation_message="", ) self.expansion_coefficient_lengths = self._validate_nbeats_argument( argument_value=expansion_coefficient_lengths, argument_name="expansion_coefficient_lengths", default_value=[32], validation_condition=lambda val: val > 0, invalidation_message=( "Values of 'expansion_coefficient_lengths' should be > 0"), ) self.stack_types = self._validate_nbeats_argument( argument_value=stack_types, argument_name="stack_types", default_value=["G"], validation_condition=lambda val: val in VALID_N_BEATS_STACK_TYPES, invalidation_message=("Values of 'stack_types' should be one of" f" {VALID_N_BEATS_STACK_TYPES}"), ) self.train_sampler = (train_sampler if train_sampler is not None else ExpectedNumInstanceSampler( num_instances=1.0, min_future=prediction_length)) self.validation_sampler = (validation_sampler if validation_sampler is not None else ValidationSplitSampler( min_future=prediction_length))
def __init__( self, encoder: Seq2SeqEncoder, decoder: Seq2SeqDecoder, freq: str, prediction_length: int, quantile_output: Optional[QuantileOutput] = None, distr_output: Optional[DistributionOutput] = None, context_length: Optional[int] = None, use_past_feat_dynamic_real: bool = False, use_feat_dynamic_real: bool = False, use_feat_static_cat: bool = False, cardinality: List[int] = None, embedding_dimension: List[int] = None, add_time_feature: bool = True, add_age_feature: bool = False, enable_encoder_dynamic_feature: bool = True, enable_decoder_dynamic_feature: bool = True, trainer: Trainer = Trainer(), scaling: Optional[bool] = None, scaling_decoder_dynamic_feature: bool = False, dtype: DType = np.float32, num_forking: Optional[int] = None, max_ts_len: Optional[int] = None, train_sampler: Optional[InstanceSampler] = None, validation_sampler: Optional[InstanceSampler] = None, batch_size: int = 32, ) -> None: super().__init__(trainer=trainer, batch_size=batch_size) assert (distr_output is None) != (quantile_output is None) assert ( context_length is None or context_length > 0 ), "The value of `context_length` should be > 0" assert ( prediction_length > 0 ), "The value of `prediction_length` should be > 0" assert ( use_feat_static_cat or not cardinality ), "You should set `cardinality` if and only if `use_feat_static_cat=True`" assert cardinality is None or all( c > 0 for c in cardinality ), "Elements of `cardinality` should be > 0" assert embedding_dimension is None or all( e > 0 for e in embedding_dimension ), "Elements of `embedding_dimension` should be > 0" self.encoder = encoder self.decoder = decoder self.freq = freq self.prediction_length = prediction_length self.quantile_output = quantile_output self.distr_output = distr_output self.context_length = ( context_length if context_length is not None else 4 * self.prediction_length ) if max_ts_len is not None: max_pad_len = max(max_ts_len - self.prediction_length, 0) # Don't allow context_length to be longer than the max pad length self.context_length = ( min(max_pad_len, self.context_length) if max_pad_len > 0 else self.context_length ) self.num_forking = ( min(num_forking, self.context_length) if num_forking is not None else self.context_length ) self.use_past_feat_dynamic_real = use_past_feat_dynamic_real self.use_feat_dynamic_real = use_feat_dynamic_real self.use_feat_static_cat = use_feat_static_cat self.cardinality = ( cardinality if cardinality and use_feat_static_cat else [1] ) self.embedding_dimension = ( embedding_dimension if embedding_dimension is not None else [min(50, (cat + 1) // 2) for cat in self.cardinality] ) self.add_time_feature = add_time_feature self.add_age_feature = add_age_feature self.use_dynamic_feat = ( use_feat_dynamic_real or add_age_feature or add_time_feature ) self.enable_encoder_dynamic_feature = enable_encoder_dynamic_feature self.enable_decoder_dynamic_feature = enable_decoder_dynamic_feature self.scaling = ( scaling if scaling is not None else (quantile_output is None) ) self.scaling_decoder_dynamic_feature = scaling_decoder_dynamic_feature self.dtype = dtype self.train_sampler = ( train_sampler if train_sampler is not None else ValidationSplitSampler(min_future=prediction_length) ) self.validation_sampler = ( validation_sampler if validation_sampler is not None else ValidationSplitSampler(min_future=prediction_length) )
def __init__( self, input_size: int, freq: str, prediction_length: int, target_dim: int, trainer: Trainer = Trainer(), context_length: Optional[int] = None, d_model: int = 32, dim_feedforward_scale: int = 4, act_type: str = "gelu", num_heads: int = 8, num_encoder_layers: int = 3, num_decoder_layers: int = 3, num_parallel_samples: int = 100, dropout_rate: float = 0.1, use_feat_dynamic_real: bool = False, flow_type="RealNVP", n_blocks=3, hidden_size=100, n_hidden=2, conditioning_length: int = 200, dequantize: bool = False, scaling: bool = True, pick_incomplete: bool = False, lags_seq: Optional[List[int]] = None, time_features: Optional[List[TimeFeature]] = None, **kwargs, ) -> None: super().__init__(trainer=trainer, **kwargs) self.freq = freq self.context_length = (context_length if context_length is not None else prediction_length) self.input_size = input_size self.prediction_length = prediction_length self.target_dim = target_dim self.d_model = d_model self.num_heads = num_heads self.act_type = act_type self.dim_feedforward_scale = dim_feedforward_scale self.num_encoder_layers = num_encoder_layers self.num_decoder_layers = num_decoder_layers self.num_parallel_samples = num_parallel_samples self.dropout_rate = dropout_rate self.use_feat_dynamic_real = use_feat_dynamic_real self.flow_type = flow_type self.n_blocks = n_blocks self.hidden_size = hidden_size self.n_hidden = n_hidden self.conditioning_length = conditioning_length self.dequantize = dequantize self.lags_seq = (lags_seq if lags_seq is not None else lags_for_fourier_time_features_from_frequency( freq_str=freq)) self.time_features = (time_features if time_features is not None else fourier_time_features_from_frequency(self.freq)) self.history_length = self.context_length + max(self.lags_seq) self.pick_incomplete = pick_incomplete self.scaling = scaling self.train_sampler = ExpectedNumInstanceSampler( num_instances=1.0, min_past=0 if pick_incomplete else self.history_length, min_future=prediction_length, ) self.validation_sampler = ValidationSplitSampler( min_past=0 if pick_incomplete else self.history_length, min_future=prediction_length, )
def __init__( self, input_size: int, freq: str, prediction_length: int, target_dim: int, trainer: Trainer = Trainer(), context_length: Optional[int] = None, num_layers: int = 2, num_cells: int = 40, cell_type: str = "LSTM", num_parallel_samples: int = 100, dropout_rate: float = 0.1, cardinality: List[int] = [1], embedding_dimension: int = 5, flow_type="RealNVP", n_blocks=3, hidden_size=100, n_hidden=2, conditioning_length: int = 200, dequantize: bool = False, scaling: bool = True, pick_incomplete: bool = False, lags_seq: Optional[List[int]] = None, time_features: Optional[List[TimeFeature]] = None, **kwargs, ) -> None: super().__init__(trainer=trainer, **kwargs) self.freq = freq self.context_length = (context_length if context_length is not None else prediction_length) self.input_size = input_size self.prediction_length = prediction_length self.target_dim = target_dim self.num_layers = num_layers self.num_cells = num_cells self.cell_type = cell_type self.num_parallel_samples = num_parallel_samples self.dropout_rate = dropout_rate self.cardinality = cardinality self.embedding_dimension = embedding_dimension self.flow_type = flow_type self.n_blocks = n_blocks self.hidden_size = hidden_size self.n_hidden = n_hidden self.conditioning_length = conditioning_length self.dequantize = dequantize self.lags_seq = (lags_seq if lags_seq is not None else lags_for_fourier_time_features_from_frequency( freq_str=freq)) self.time_features = (time_features if time_features is not None else fourier_time_features_from_frequency(self.freq)) self.history_length = self.context_length + max(self.lags_seq) self.pick_incomplete = pick_incomplete self.scaling = scaling self.train_sampler = ExpectedNumInstanceSampler( num_instances=1.0, min_past=0 if pick_incomplete else self.history_length, min_future=prediction_length, ) self.validation_sampler = ValidationSplitSampler( min_past=0 if pick_incomplete else self.history_length, min_future=prediction_length, )
def __init__( self, freq: str, prediction_length: int, context_length: Optional[int] = None, trainer: Trainer = Trainer(), num_stacks: int = 30, widths: Optional[List[int]] = None, num_blocks: Optional[List[int]] = None, num_block_layers: Optional[List[int]] = None, expansion_coefficient_lengths: Optional[List[int]] = None, sharing: Optional[List[bool]] = None, stack_types: Optional[List[str]] = None, loss_function: Optional[str] = "MAPE", **kwargs, ) -> None: super().__init__(trainer=trainer, **kwargs) self.freq = freq self.prediction_length = prediction_length self.context_length = (context_length if context_length is not None else 2 * prediction_length) # num_stacks has to be handled separately because other arguments have to match its length self.num_stacks = num_stacks self.loss_function = loss_function self.widths = self._validate_nbeats_argument( argument_value=widths, argument_name="widths", default_value=[512], validation_condition=lambda val: val > 0, invalidation_message="Values of 'widths' should be > 0", ) self.num_blocks = self._validate_nbeats_argument( argument_value=num_blocks, argument_name="num_blocks", default_value=[1], validation_condition=lambda val: val > 0, invalidation_message="Values of 'num_blocks' should be > 0", ) self.num_block_layers = self._validate_nbeats_argument( argument_value=num_block_layers, argument_name="num_block_layers", default_value=[4], validation_condition=lambda val: val > 0, invalidation_message="Values of 'block_layers' should be > 0", ) self.sharing = self._validate_nbeats_argument( argument_value=sharing, argument_name="sharing", default_value=[False], validation_condition=lambda val: True, invalidation_message="", ) self.expansion_coefficient_lengths = self._validate_nbeats_argument( argument_value=expansion_coefficient_lengths, argument_name="expansion_coefficient_lengths", default_value=[32], validation_condition=lambda val: val > 0, invalidation_message= "Values of 'expansion_coefficient_lengths' should be > 0", ) self.stack_types = self._validate_nbeats_argument( argument_value=stack_types, argument_name="stack_types", default_value=["G"], validation_condition=lambda val: val in VALID_N_BEATS_STACK_TYPES, invalidation_message= f"Values of 'stack_types' should be one of {VALID_N_BEATS_STACK_TYPES}", ) self.train_sampler = ExpectedNumInstanceSampler( num_instances=1.0, min_future=prediction_length) self.validation_sampler = ValidationSplitSampler( min_future=prediction_length)
def test_forking_sequence_with_features(is_train) -> None: def make_dataset(N, train_length): # generates 2 ** N - 1 timeseries with constant increasing values n = 2**N - 1 targets = np.arange(n * train_length).reshape((n, train_length)) return ListDataset( [{ "start": "2012-01-01", "target": targets[i, :] } for i in range(n)], freq="D", ) ds = make_dataset(1, 20) enc_len = 5 dec_len = 3 num_forking = 1 num_time_feat_daily_freq = 3 num_age_feat = 1 trans = transform.Chain(trans=[ transform.AddAgeFeature( target_field=FieldName.TARGET, output_field=FieldName.FEAT_AGE, pred_length=10, ), transform.AddTimeFeatures( start_field=FieldName.START, target_field=FieldName.TARGET, output_field=FieldName.FEAT_TIME, time_features=time_features_from_frequency_str("D"), pred_length=10, ), ForkingSequenceSplitter( instance_sampler=ValidationSplitSampler( min_future=dec_len) if is_train else TSplitSampler(), enc_len=enc_len, dec_len=dec_len, num_forking=num_forking, encoder_series_fields=[ FieldName.FEAT_AGE, FieldName.FEAT_TIME, ], decoder_series_fields=[FieldName.FEAT_TIME], ), ]) out = trans(iter(ds), is_train=is_train) transformed_data = next(iter(out)) assert transformed_data["past_target"].shape == (enc_len, 1) assert transformed_data["past_feat_dynamic_age"].shape == ( enc_len, num_age_feat, ) assert transformed_data["past_time_feat"].shape == ( enc_len, num_time_feat_daily_freq, ) assert transformed_data["future_time_feat"].shape == ( num_forking, dec_len, num_time_feat_daily_freq, ) if is_train: assert transformed_data["future_target"].shape == ( num_forking, dec_len, )
def __init__( self, input_size: int, freq: str, prediction_length: int, target_dim: int, trainer: Trainer = Trainer(), context_length: Optional[int] = None, num_layers: int = 2, num_cells: int = 40, cell_type: str = "LSTM", num_parallel_samples: int = 100, dropout_rate: float = 0.1, use_feat_dynamic_real: bool = False, use_feat_static_cat: bool = False, use_feat_static_real: bool = False, cardinality: Optional[List[int]] = None, embedding_dimension: Optional[List[int]] = None, distr_output: Optional[DistributionOutput] = None, rank: Optional[int] = 5, scaling: bool = True, pick_incomplete: bool = False, lags_seq: Optional[List[int]] = None, time_features: Optional[List[TimeFeature]] = None, conditioning_length: int = 200, use_marginal_transformation=False, **kwargs, ) -> None: super().__init__(trainer=trainer, **kwargs) self.freq = freq self.context_length = (context_length if context_length is not None else prediction_length) if distr_output is not None: self.distr_output = distr_output else: self.distr_output = LowRankMultivariateNormalOutput(dim=target_dim, rank=rank) self.input_size = input_size self.prediction_length = prediction_length self.target_dim = target_dim self.num_layers = num_layers self.num_cells = num_cells self.cell_type = cell_type self.num_parallel_samples = num_parallel_samples self.dropout_rate = dropout_rate self.use_feat_dynamic_real = use_feat_dynamic_real self.use_feat_static_cat = use_feat_static_cat self.use_feat_static_real = use_feat_static_real self.cardinality = cardinality if cardinality and use_feat_static_cat else [ 1 ] self.embedding_dimension = ( embedding_dimension if embedding_dimension is not None else [min(50, (cat + 1) // 2) for cat in self.cardinality]) self.conditioning_length = conditioning_length self.use_marginal_transformation = use_marginal_transformation self.lags_seq = (lags_seq if lags_seq is not None else lags_for_fourier_time_features_from_frequency( freq_str=freq)) self.time_features = (time_features if time_features is not None else fourier_time_features_from_frequency(self.freq)) self.history_length = self.context_length + max(self.lags_seq) self.pick_incomplete = pick_incomplete self.scaling = scaling if self.use_marginal_transformation: self.output_transform: Optional[ Callable] = cdf_to_gaussian_forward_transform else: self.output_transform = None self.train_sampler = ExpectedNumInstanceSampler( num_instances=1.0, min_past=0 if pick_incomplete else self.history_length, min_future=prediction_length, ) self.validation_sampler = ValidationSplitSampler( min_past=0 if pick_incomplete else self.history_length, min_future=prediction_length, )
def __init__( self, freq: str, prediction_length: int, target_dim: int, trainer: Trainer = Trainer(), context_length: Optional[int] = None, num_layers: int = 2, num_cells: int = 40, cell_type: str = "lstm", num_parallel_samples: int = 100, dropout_rate: float = 0.1, cardinality: List[int] = [1], embedding_dimension: int = 5, distr_output: Optional[DistributionOutput] = None, rank: Optional[int] = 5, scaling: bool = True, pick_incomplete: bool = False, lags_seq: Optional[List[int]] = None, time_features: Optional[List[TimeFeature]] = None, conditioning_length: int = 200, use_marginal_transformation=False, train_sampler: Optional[InstanceSampler] = None, validation_sampler: Optional[InstanceSampler] = None, batch_size: int = 32, **kwargs, ) -> None: super().__init__(trainer=trainer, batch_size=batch_size, **kwargs) assert (prediction_length > 0), "The value of `prediction_length` should be > 0" assert (context_length is None or context_length > 0 ), "The value of `context_length` should be > 0" assert num_layers > 0, "The value of `num_layers` should be > 0" assert num_cells > 0, "The value of `num_cells` should be > 0" assert (num_parallel_samples > 0), "The value of `num_eval_samples` should be > 0" assert dropout_rate >= 0, "The value of `dropout_rate` should be >= 0" assert all([c > 0 for c in cardinality ]), "Elements of `cardinality` should be > 0" assert (embedding_dimension > 0), "The value of `embedding_dimension` should be > 0" self.freq = freq self.context_length = (context_length if context_length is not None else prediction_length) if distr_output is not None: self.distr_output = distr_output else: self.distr_output = LowrankMultivariateGaussianOutput( dim=target_dim, rank=rank) self.prediction_length = prediction_length self.target_dim = target_dim self.num_layers = num_layers self.num_cells = num_cells self.cell_type = cell_type self.num_parallel_samples = num_parallel_samples self.dropout_rate = dropout_rate self.cardinality = cardinality self.embedding_dimension = embedding_dimension self.conditioning_length = conditioning_length self.use_marginal_transformation = use_marginal_transformation self.lags_seq = (lags_seq if lags_seq is not None else get_lags_for_frequency(freq_str=freq)) self.time_features = (time_features if time_features is not None else time_features_from_frequency_str(self.freq)) self.history_length = self.context_length + max(self.lags_seq) self.pick_incomplete = pick_incomplete self.scaling = scaling if self.use_marginal_transformation: self.output_transform: Optional[ Callable] = cdf_to_gaussian_forward_transform else: self.output_transform = None self.train_sampler = ( train_sampler if train_sampler is not None else ExpectedNumInstanceSampler( num_instances=1.0, min_past=0 if pick_incomplete else self.history_length, min_future=prediction_length, )) self.validation_sampler = ( validation_sampler if validation_sampler is not None else ValidationSplitSampler( min_past=0 if pick_incomplete else self.history_length, min_future=prediction_length, ))
def __init__( self, freq: str, prediction_length: int, trainer: Trainer = Trainer( learning_rate=0.01, epochs=200, num_batches_per_epoch=50, hybridize=False, ), cardinality: List[int] = [1], seasonality: Optional[int] = None, embedding_dimension: int = 5, num_bins: int = 1024, hybridize_prediction_net: bool = False, n_residue=24, n_skip=32, dilation_depth: Optional[int] = None, n_stacks: int = 1, train_window_length: Optional[int] = None, temperature: float = 1.0, act_type: str = "elu", num_parallel_samples: int = 200, train_sampler: Optional[InstanceSampler] = None, validation_sampler: Optional[InstanceSampler] = None, batch_size: int = 32, negative_data: bool = False, ) -> None: super().__init__(trainer=trainer, batch_size=batch_size) self.freq = freq self.prediction_length = prediction_length self.cardinality = cardinality self.embedding_dimension = embedding_dimension self.num_bins = num_bins self.hybridize_prediction_net = hybridize_prediction_net self.n_residue = n_residue self.n_skip = n_skip self.n_stacks = n_stacks self.train_window_length = ( train_window_length if train_window_length is not None else prediction_length ) self.temperature = temperature self.act_type = act_type self.num_parallel_samples = num_parallel_samples self.train_sampler = ( train_sampler if train_sampler is not None else ExpectedNumInstanceSampler( num_instances=1.0, min_future=self.train_window_length ) ) self.validation_sampler = ( validation_sampler if validation_sampler is not None else ValidationSplitSampler(min_future=self.train_window_length) ) self.negative_data = negative_data low = -10.0 if self.negative_data else 0 high = 10.0 bin_centers = np.linspace(low, high, self.num_bins) bin_edges = np.concatenate( [[-1e20], (bin_centers[1:] + bin_centers[:-1]) / 2.0, [1e20]] ) self.bin_centers = bin_centers.tolist() self.bin_edges = bin_edges.tolist() seasonality = ( get_seasonality( self.freq, { "H": 7 * 24, "D": 7, "W": 52, "M": 12, "B": 7 * 5, "min": 24 * 60, }, ) if seasonality is None else seasonality ) goal_receptive_length = max( 2 * seasonality, 2 * self.prediction_length ) if dilation_depth is None: d = 1 while ( WaveNet.get_receptive_field( dilation_depth=d, n_stacks=n_stacks ) < goal_receptive_length ): d += 1 self.dilation_depth = d else: self.dilation_depth = dilation_depth self.context_length = WaveNet.get_receptive_field( dilation_depth=self.dilation_depth, n_stacks=n_stacks ) self.logger = logging.getLogger(__name__) self.logger.info( f"Using dilation depth {self.dilation_depth} and receptive field length {self.context_length}" )
def __init__( self, freq: str, prediction_length: int, input_size: int, trainer: Trainer = Trainer(), context_length: Optional[int] = None, num_layers: int = 2, num_cells: int = 40, cell_type: str = "LSTM", dropout_rate: float = 0.1, use_feat_dynamic_real: bool = False, use_feat_dynamic_cat: bool = False, use_feat_static_cat: bool = False, use_feat_static_real: bool = False, cardinality: Optional[List[int]] = None, embedding_dimension: Optional[List[int]] = None, distr_output: DistributionOutput = StudentTOutput(), scaling: bool = True, lags_seq: Optional[List[int]] = None, time_features: Optional[List[TimeFeature]] = None, num_parallel_samples: int = 100, dtype: np.dtype = np.float32, ) -> None: super().__init__(trainer=trainer) self.freq = freq self.context_length = (context_length if context_length is not None else prediction_length) self.prediction_length = prediction_length self.distr_output = distr_output self.distr_output.dtype = dtype self.input_size = input_size self.num_layers = num_layers self.num_cells = num_cells self.cell_type = cell_type self.dropout_rate = dropout_rate self.use_feat_dynamic_real = use_feat_dynamic_real self.use_feat_dynamic_cat = use_feat_dynamic_cat self.use_feat_static_cat = use_feat_static_cat self.use_feat_static_real = use_feat_static_real self.cardinality = cardinality if cardinality and use_feat_static_cat else [ 1 ] self.embedding_dimension = ( embedding_dimension if embedding_dimension is not None else [min(50, (cat + 1) // 2) for cat in self.cardinality]) self.scaling = scaling self.lags_seq = (lags_seq if lags_seq is not None else get_lags_for_frequency(freq_str=freq)) self.time_features = (time_features if time_features is not None else time_features_from_frequency_str(self.freq)) self.history_length = self.context_length + max(self.lags_seq) self.num_parallel_samples = num_parallel_samples self.train_sampler = ExpectedNumInstanceSampler( num_instances=1.0, min_future=prediction_length) self.validation_sampler = ValidationSplitSampler( min_future=prediction_length)
def __init__( self, freq: str, prediction_length: int, trainer: Trainer = Trainer(), context_length: Optional[int] = None, num_layers: int = 2, num_cells: int = 40, cell_type: str = "lstm", dropoutcell_type: str = "ZoneoutCell", dropout_rate: float = 0.1, use_feat_dynamic_real: bool = False, use_feat_static_cat: bool = False, use_feat_static_real: bool = False, cardinality: Optional[List[int]] = None, embedding_dimension: Optional[List[int]] = None, distr_output: DistributionOutput = StudentTOutput(), scaling: bool = True, lags_seq: Optional[List[int]] = None, time_features: Optional[List[TimeFeature]] = None, num_parallel_samples: int = 100, imputation_method: Optional[MissingValueImputation] = None, train_sampler: Optional[InstanceSampler] = None, validation_sampler: Optional[InstanceSampler] = None, dtype: DType = np.float32, alpha: float = 0.0, beta: float = 0.0, batch_size: int = 32, default_scale: Optional[float] = None, minimum_scale: float = 1e-10, impute_missing_values: bool = False, num_imputation_samples: int = 1, ) -> None: super().__init__(trainer=trainer, batch_size=batch_size, dtype=dtype) assert (prediction_length > 0), "The value of `prediction_length` should be > 0" assert (context_length is None or context_length > 0 ), "The value of `context_length` should be > 0" assert num_layers > 0, "The value of `num_layers` should be > 0" assert num_cells > 0, "The value of `num_cells` should be > 0" supported_dropoutcell_types = [ "ZoneoutCell", "RNNZoneoutCell", "VariationalDropoutCell", "VariationalZoneoutCell", ] assert ( dropoutcell_type in supported_dropoutcell_types ), f"`dropoutcell_type` should be one of {supported_dropoutcell_types}" assert dropout_rate >= 0, "The value of `dropout_rate` should be >= 0" assert (cardinality and use_feat_static_cat) or ( not (cardinality or use_feat_static_cat) ), "You should set `cardinality` if and only if `use_feat_static_cat=True`" assert cardinality is None or all( [c > 0 for c in cardinality]), "Elements of `cardinality` should be > 0" assert embedding_dimension is None or all([ e > 0 for e in embedding_dimension ]), "Elements of `embedding_dimension` should be > 0" assert (num_parallel_samples > 0), "The value of `num_parallel_samples` should be > 0" assert alpha >= 0, "The value of `alpha` should be >= 0" assert beta >= 0, "The value of `beta` should be >= 0" self.freq = freq self.context_length = (context_length if context_length is not None else prediction_length) self.prediction_length = prediction_length self.distr_output = distr_output self.distr_output.dtype = dtype self.num_layers = num_layers self.num_cells = num_cells self.cell_type = cell_type self.dropoutcell_type = dropoutcell_type self.dropout_rate = dropout_rate self.use_feat_dynamic_real = use_feat_dynamic_real self.use_feat_static_cat = use_feat_static_cat self.use_feat_static_real = use_feat_static_real self.cardinality = (cardinality if cardinality and use_feat_static_cat else [1]) self.embedding_dimension = ( embedding_dimension if embedding_dimension is not None else [min(50, (cat + 1) // 2) for cat in self.cardinality]) self.scaling = scaling self.lags_seq = (lags_seq if lags_seq is not None else get_lags_for_frequency(freq_str=freq)) self.time_features = (time_features if time_features is not None else time_features_from_frequency_str(self.freq)) self.history_length = self.context_length + max(self.lags_seq) self.num_parallel_samples = num_parallel_samples self.imputation_method = (imputation_method if imputation_method is not None else DummyValueImputation( self.distr_output.value_in_support)) self.train_sampler = (train_sampler if train_sampler is not None else ExpectedNumInstanceSampler( num_instances=1.0, min_future=prediction_length)) self.validation_sampler = (validation_sampler if validation_sampler is not None else ValidationSplitSampler( min_future=prediction_length)) self.alpha = alpha self.beta = beta self.num_imputation_samples = num_imputation_samples self.default_scale = default_scale self.minimum_scale = minimum_scale self.impute_missing_values = impute_missing_values
def __init__( self, input_size: int, freq: str, prediction_length: int, target_dim: int, trainer: Trainer = Trainer(), context_length: Optional[int] = None, num_layers: int = 2, num_cells: int = 40, cell_type: str = "LSTM", num_parallel_samples: int = 100, dropout_rate: float = 0.1, cardinality: List[int] = [1], embedding_dimension: int = 5, conditioning_length: int = 100, diff_steps: int = 100, loss_type: str = "l2", beta_end=0.1, beta_schedule="linear", residual_layers=8, residual_channels=8, dilation_cycle_length=2, scaling: bool = True, pick_incomplete: bool = False, lags_seq: Optional[List[int]] = None, time_features: Optional[List[TimeFeature]] = None, **kwargs, ) -> None: super().__init__(trainer=trainer, **kwargs) self.freq = freq self.context_length = (context_length if context_length is not None else prediction_length) self.input_size = input_size self.prediction_length = prediction_length self.target_dim = target_dim self.num_layers = num_layers self.num_cells = num_cells self.cell_type = cell_type self.num_parallel_samples = num_parallel_samples self.dropout_rate = dropout_rate self.cardinality = cardinality self.embedding_dimension = embedding_dimension self.conditioning_length = conditioning_length self.diff_steps = diff_steps self.loss_type = loss_type self.beta_end = beta_end self.beta_schedule = beta_schedule self.residual_layers = residual_layers self.residual_channels = residual_channels self.dilation_cycle_length = dilation_cycle_length self.lags_seq = (lags_seq if lags_seq is not None else lags_for_fourier_time_features_from_frequency( freq_str=freq)) self.time_features = (time_features if time_features is not None else fourier_time_features_from_frequency(self.freq)) self.history_length = self.context_length + max(self.lags_seq) self.pick_incomplete = pick_incomplete self.scaling = scaling self.train_sampler = ExpectedNumInstanceSampler( num_instances=1.0, min_past=0 if pick_incomplete else self.history_length, min_future=prediction_length, ) self.validation_sampler = ValidationSplitSampler( min_past=0 if pick_incomplete else self.history_length, min_future=prediction_length, )