def _index_tensor(x: Tensor, item: Any) -> Tensor: """""" squeeze: List[int] = [] if not isinstance(item, tuple): item = (item, ) saw_ellipsis = False for i, item_i in enumerate(item): axis = i - len(item) if saw_ellipsis else i if isinstance(item_i, int): if item_i != -1: x = x.slice_axis(axis=axis, begin=item_i, end=item_i + 1) else: x = x.slice_axis(axis=axis, begin=-1, end=None) squeeze.append(axis) elif item_i == slice(None): continue elif item_i == Ellipsis: saw_ellipsis = True continue elif isinstance(item_i, slice): assert item_i.step is None start = item_i.start if item_i.start is not None else 0 x = x.slice_axis(axis=axis, begin=start, end=item_i.stop) else: raise RuntimeError(f"invalid indexing item: {item}") if len(squeeze): x = x.squeeze(axis=tuple(squeeze)) return x
def _compute_edges(F, bin_centers: Tensor) -> Tensor: r""" Computes the edges of the bins based on the centers. The first and last edge are set to :math:`10^{-10}` and :math:`10^{10}`, repsectively. Parameters ---------- F bin_centers Tensor of shape `(*batch_shape, num_bins)`. Returns ------- Tensor Tensor of shape (*batch.shape, num_bins+1) """ low = ( F.zeros_like(bin_centers.slice_axis(axis=-1, begin=0, end=1)) - 1.0e10 ) high = ( F.zeros_like(bin_centers.slice_axis(axis=-1, begin=0, end=1)) + 1.0e10 ) means = ( F.broadcast_add( bin_centers.slice_axis(axis=-1, begin=1, end=None), bin_centers.slice_axis(axis=-1, begin=0, end=-1), ) / 2.0 ) return F.concat(low, means, high, dim=-1)
def get_issm_coeff( self, seasonal_indicators: Tensor # (batch_size, time_length) ) -> Tuple[Tensor, Tensor, Tensor]: F = getF(seasonal_indicators) emission_coeff_ls, transition_coeff_ls, innovation_coeff_ls = zip( self.nonseasonal_issm.get_issm_coeff(seasonal_indicators), *[ issm.get_issm_coeff( seasonal_indicators.slice_axis(axis=-1, begin=ix, end=ix + 1)) for ix, issm in enumerate(self.seasonal_issms) ], ) # stack emission and innovation coefficients emission_coeff = F.concat(*emission_coeff_ls, dim=-1) innovation_coeff = F.concat(*innovation_coeff_ls, dim=-1) # transition coefficient is block diagonal! transition_coeff = _make_block_diagonal(transition_coeff_ls) return emission_coeff, transition_coeff, innovation_coeff
def reconciliation_error(A: Tensor, samples: Tensor) -> float: r""" Computes the maximum relative reconciliation error among all the aggregated time series .. math:: \max_i \frac{|y_i - s_i|} {|y_i|}, where :math:`i` refers to the aggregated time series index, :math:`y_i` is the (direct) forecast obtained for the :math:`i^{th}` time series and :math:`s_i` is its aggregated forecast obtained by summing the corresponding bottom-level forecasts. If :math:`y_i` is zero, then the absolute difference, :math:`|s_i|`, is used instead. This can be comupted as follows given the constraint matrix A: .. math:: \max \frac{|A \times samples|} {|samples[:r]|}, where :math:`r` is the number aggregated time series. Parameters ---------- A The constraint matrix A in the equation: Ay = 0 (y being the values/forecasts of all time series in the hierarchy). samples Samples. Shape: `(*batch_shape, target_dim)`. Returns ------- Float Reconciliation error """ num_agg_ts = A.shape[0] forecasts_agg_ts = samples.slice_axis( axis=-1, begin=0, end=num_agg_ts ).asnumpy() abs_err = mx.nd.abs(mx.nd.dot(samples, A, transpose_b=True)).asnumpy() rel_err = np.where( forecasts_agg_ts == 0, abs_err, abs_err / np.abs(forecasts_agg_ts), ) return np.max(rel_err)
def hybrid_forward( self, F, feat_static_cat: Tensor, past_observed_values: Tensor, past_seasonal_indicators: Tensor, past_time_feat: Tensor, past_target: Tensor, ) -> Tensor: lds, _ = self.compute_lds( F, feat_static_cat=feat_static_cat, seasonal_indicators=past_seasonal_indicators.slice_axis( axis=1, begin=-self.past_length, end=None ), time_feat=past_time_feat.slice_axis( axis=1, begin=-self.past_length, end=None ), length=self.past_length, ) _, scale = self.scaler(past_target, past_observed_values) observed_context = past_observed_values.slice_axis( axis=1, begin=-self.past_length, end=None ) ll, _, _ = lds.log_prob( x=past_target.slice_axis( axis=1, begin=-self.past_length, end=None ), observed=observed_context.min(axis=-1, keepdims=False), scale=scale, ) return weighted_average( F=F, x=-ll, axis=1, weights=observed_context.squeeze(axis=-1) )
def _make_2_block_diagonal(F, left: Tensor, right: Tensor) -> Tensor: """ Creates a block diagonal matrix of shape (batch_size, m+n, m+n) where m and n are the sizes of the axis 1 of left and right respectively. Parameters ---------- F left Tensor of shape (batch_size, seq_length, m, m) right Tensor of shape (batch_size, seq_length, n, n) Returns ------- Tensor Block diagonal matrix of shape (batch_size, seq_length, m+n, m+n) """ # shape (batch_size, seq_length, m, n) zeros_off_diag = F.broadcast_add( left.slice_axis( axis=-1, begin=0, end=1).zeros_like(), # shape (batch_size, seq_length, m, 1) right.slice_axis( axis=-2, begin=0, end=1).zeros_like(), # shape (batch_size, seq_length, 1, n) ) # shape (batch_size, n, m) zeros_off_diag_tr = zeros_off_diag.swapaxes(2, 3) # block diagonal: shape (batch_size, seq_length, m+n, m+n) _block_diagonal = F.concat( F.concat(left, zeros_off_diag, dim=3), F.concat(zeros_off_diag_tr, right, dim=3), dim=2, ) return _block_diagonal
def emission_coeff( self, seasonal_indicators: Tensor # (batch_size, time_length) ) -> Tensor: F = getF(seasonal_indicators) _emission_coeff = F.ones(shape=(1, 1, 1, self.latent_dim())) # get the right shape: (batch_size, seq_length, obs_dim, latent_dim) zeros = _broadcast_param( F.zeros_like( seasonal_indicators.slice_axis(axis=-1, begin=0, end=1).squeeze(axis=-1)), axes=[2, 3], sizes=[1, self.latent_dim()], ) return _emission_coeff.broadcast_like(zeros)
def transition_coeff( self, seasonal_indicators: Tensor # (batch_size, time_length) ) -> Tensor: F = getF(seasonal_indicators) _transition_coeff = (F.eye( self.latent_dim()).expand_dims(axis=0).expand_dims(axis=0)) # get the right shape: (batch_size, seq_length, latent_dim, latent_dim) zeros = _broadcast_param( F.zeros_like( seasonal_indicators.slice_axis(axis=-1, begin=0, end=1).squeeze(axis=-1)), axes=[2, 3], sizes=[self.latent_dim(), self.latent_dim()], ) return _transition_coeff.broadcast_like(zeros)
def create_network_input( self, F, feat_static_cat: Tensor, # (batch_size, num_features) past_time_feat: Tensor, # (batch_size, num_features, history_length) past_target: Tensor, # (batch_size, history_length, 1) past_observed_values: Tensor, # (batch_size, history_length) future_time_feat: Optional[ Tensor], # (batch_size, num_features, prediction_length) future_target: Optional[Tensor], # (batch_size, prediction_length) ) -> Tuple[Tensor, Tensor, Tensor]: """ Creates inputs for the transformer network. All tensor arguments should have NTC layout. """ if future_time_feat is None or future_target is None: time_feat = past_time_feat.slice_axis( axis=1, begin=self.history_length - self.context_length, end=None, ) sequence = past_target sequence_length = self.history_length subsequences_length = self.context_length else: time_feat = F.concat( past_time_feat.slice_axis( axis=1, begin=self.history_length - self.context_length, end=None, ), future_time_feat, dim=1, ) sequence = F.concat(past_target, future_target, dim=1) sequence_length = self.history_length + self.prediction_length subsequences_length = self.context_length + self.prediction_length # (batch_size, sub_seq_len, *target_shape, num_lags) lags = self.get_lagged_subsequences( F=F, sequence=sequence, sequence_length=sequence_length, indices=self.lags_seq, subsequences_length=subsequences_length, ) # scale is computed on the context length last units of the past target # scale shape is (batch_size, 1, *target_shape) _, scale = self.scaler( past_target.slice_axis(axis=1, begin=-self.context_length, end=None), past_observed_values.slice_axis(axis=1, begin=-self.context_length, end=None), ) embedded_cat = self.embedder(feat_static_cat) # in addition to embedding features, use the log scale as it can help # prediction too(batch_size, num_features + prod(target_shape)) static_feat = F.concat( embedded_cat, F.log(scale) if len(self.target_shape) == 0 else F.log( scale.squeeze(axis=1)), dim=1, ) repeated_static_feat = static_feat.expand_dims(axis=1).repeat( axis=1, repeats=subsequences_length) # (batch_size, sub_seq_len, *target_shape, num_lags) lags_scaled = F.broadcast_div(lags, scale.expand_dims(axis=-1)) # from (batch_size, sub_seq_len, *target_shape, num_lags) # to (batch_size, sub_seq_len, prod(target_shape) * num_lags) input_lags = F.reshape( data=lags_scaled, shape=( -1, subsequences_length, len(self.lags_seq) * prod(self.target_shape), ), ) # (batch_size, sub_seq_len, input_dim) inputs = F.concat(input_lags, time_feat, repeated_static_feat, dim=-1) return inputs, scale, static_feat
def train_hybrid_forward( self, F, target_dimension_indicator: Tensor, past_time_feat: Tensor, past_target_cdf: Tensor, past_observed_values: Tensor, past_is_pad: Tensor, future_time_feat: Tensor, future_target_cdf: Tensor, future_observed_values: Tensor, ) -> Tuple[Tensor, ...]: """ Computes the loss for training DeepVAR, all inputs tensors representing time series have NTC layout. Parameters ---------- F target_dimension_indicator Indices of the target dimension (batch_size, target_dim) past_time_feat Dynamic features of past time series (batch_size, history_length, num_features) past_target_cdf Past marginal CDF transformed target values (batch_size, history_length, target_dim) past_observed_values Indicator whether or not the values were observed (batch_size, history_length, target_dim) past_is_pad Indicator whether the past target values have been padded (batch_size, history_length) future_time_feat Future time features (batch_size, prediction_length, num_features) future_target_cdf Future marginal CDF transformed target values (batch_size, prediction_length, target_dim) future_observed_values Indicator whether or not the future values were observed (batch_size, prediction_length, target_dim) Returns ------- distr Loss with shape (batch_size, 1) likelihoods Likelihoods for each time step (batch_size, context + prediction_length, 1) distr_args Distribution arguments (context + prediction_length, number_of_arguments) """ seq_len = self.context_length + self.prediction_length # unroll the decoder in "training mode", i.e. by providing future data # as well rnn_outputs, _, scale, lags_scaled, inputs = self.unroll_encoder( F=F, past_time_feat=past_time_feat, past_target_cdf=past_target_cdf, past_observed_values=past_observed_values, past_is_pad=past_is_pad, future_time_feat=future_time_feat, future_target_cdf=future_target_cdf, target_dimension_indicator=target_dimension_indicator, ) # put together target sequence # (batch_size, seq_len, target_dim) target = F.concat( past_target_cdf.slice_axis( axis=1, begin=-self.context_length, end=None ), future_target_cdf, dim=1, ) # assert_shape(target, (-1, seq_len, self.target_dim)) distr, distr_args = self.distr( time_features=inputs, rnn_outputs=rnn_outputs, scale=scale, lags_scaled=lags_scaled, target_dimension_indicator=target_dimension_indicator, seq_len=self.context_length + self.prediction_length, ) # we sum the last axis to have the same shape for all likelihoods # (batch_size, subseq_length, 1) likelihoods = -distr.log_prob(target).expand_dims(axis=-1) assert_shape(likelihoods, (-1, seq_len, 1)) past_observed_values = F.broadcast_minimum( past_observed_values, 1 - past_is_pad.expand_dims(axis=-1) ) # (batch_size, subseq_length, target_dim) observed_values = F.concat( past_observed_values.slice_axis( axis=1, begin=-self.context_length, end=None ), future_observed_values, dim=1, ) # mask the loss at one time step if one or more observations is missing # in the target dimensions (batch_size, subseq_length, 1) loss_weights = observed_values.min(axis=-1, keepdims=True) assert_shape(loss_weights, (-1, seq_len, 1)) loss = weighted_average( F=F, x=likelihoods, weights=loss_weights, axis=1 ) assert_shape(loss, (-1, -1, 1)) self.distribution = distr return (loss, likelihoods) + distr_args
def unroll_encoder( self, F, past_time_feat: Tensor, past_target_cdf: Tensor, past_observed_values: Tensor, past_is_pad: Tensor, future_time_feat: Optional[Tensor], future_target_cdf: Optional[Tensor], target_dimension_indicator: Tensor, ) -> Tuple[Tensor, List[Tensor], Tensor, Tensor, Tensor]: """ Unrolls the RNN encoder over past and, if present, future data. Returns outputs and state of the encoder, plus the scale of past_target_cdf and a vector of static features that was constructed and fed as input to the encoder. All tensor arguments should have NTC layout. Parameters ---------- F past_time_feat Past time features (batch_size, history_length, num_features) past_target_cdf Past marginal CDF transformed target values (batch_size, history_length, target_dim) past_observed_values Indicator whether or not the values were observed (batch_size, history_length, target_dim) past_is_pad Indicator whether the past target values have been padded (batch_size, history_length) future_time_feat Future time features (batch_size, prediction_length, num_features) future_target_cdf Future marginal CDF transformed target values (batch_size, prediction_length, target_dim) target_dimension_indicator Dimensionality of the time series (batch_size, target_dim) Returns ------- outputs RNN outputs (batch_size, seq_len, num_cells) states RNN states. Nested list with (batch_size, num_cells) tensors with dimensions target_dim x num_layers x (batch_size, num_cells) scale Mean scales for the time series (batch_size, 1, target_dim) lags_scaled Scaled lags(batch_size, sub_seq_len, target_dim, num_lags) inputs inputs to the RNN """ past_observed_values = F.broadcast_minimum( past_observed_values, 1 - past_is_pad.expand_dims(axis=-1) ) if future_time_feat is None or future_target_cdf is None: time_feat = past_time_feat.slice_axis( axis=1, begin=-self.context_length, end=None ) sequence = past_target_cdf sequence_length = self.history_length subsequences_length = self.context_length else: time_feat = F.concat( past_time_feat.slice_axis( axis=1, begin=-self.context_length, end=None ), future_time_feat, dim=1, ) sequence = F.concat(past_target_cdf, future_target_cdf, dim=1) sequence_length = self.history_length + self.prediction_length subsequences_length = self.context_length + self.prediction_length # (batch_size, sub_seq_len, target_dim, num_lags) lags = self.get_lagged_subsequences( F=F, sequence=sequence, sequence_length=sequence_length, indices=self.lags_seq, subsequences_length=subsequences_length, ) # scale is computed on the context length last units of the past target # scale shape is (batch_size, 1, target_dim) _, scale = self.scaler( past_target_cdf.slice_axis( axis=1, begin=-self.context_length, end=None ), past_observed_values.slice_axis( axis=1, begin=-self.context_length, end=None ), ) outputs, states, lags_scaled, inputs = self.unroll( F=F, lags=lags, scale=scale, time_feat=time_feat, target_dimension_indicator=target_dimension_indicator, unroll_length=subsequences_length, begin_state=None, ) return outputs, states, scale, lags_scaled, inputs
def hybrid_forward( self, F, feat_static_cat: Tensor, feat_static_real: Tensor, past_time_feat: Tensor, past_target: Tensor, past_observed_values: Tensor, past_is_pad: Optional[Tensor], future_time_feat: Tensor, future_target: Tensor, future_observed_values: Tensor, ) -> Tensor: """ Computes the loss for training DeepAR, all inputs tensors representing time series have NTC layout. Parameters ---------- F feat_static_cat : (batch_size, num_features) feat_static_real : (batch_size, num_features) past_time_feat : (batch_size, history_length, num_features) past_target : (batch_size, history_length, *target_shape) past_observed_values : (batch_size, history_length, *target_shape, seq_len) future_time_feat : (batch_size, prediction_length, num_features) future_target : (batch_size, prediction_length, *target_shape) future_observed_values : (batch_size, prediction_length, *target_shape) Returns loss with shape (batch_size, context + prediction_length, 1) ------- """ outputs = self.distribution( feat_static_cat=feat_static_cat, feat_static_real=feat_static_real, past_time_feat=past_time_feat, past_target=past_target, past_observed_values=past_observed_values, past_is_pad=past_is_pad, future_time_feat=future_time_feat, future_target=future_target, future_observed_values=future_observed_values, return_rnn_outputs=True, ) # since return_rnn_outputs=True, assert: assert isinstance(outputs, tuple) distr, rnn_outputs = outputs # put together target sequence # (batch_size, seq_len, *target_shape) target = F.concat( past_target.slice_axis( axis=1, begin=self.history_length - self.context_length, end=None, ), future_target, dim=1, ) # (batch_size, seq_len) loss = distr.loss(target) # (batch_size, seq_len, *target_shape) observed_values = F.concat( past_observed_values.slice_axis( axis=1, begin=self.history_length - self.context_length, end=self.history_length, ), future_observed_values, dim=1, ) # mask the loss at one time step iff one or more observations is missing in the target dimensions # (batch_size, seq_len) loss_weights = (observed_values if (len(self.target_shape) == 0) else observed_values.min(axis=-1, keepdims=False)) weighted_loss = weighted_average( F=F, x=loss, weights=loss_weights, axis=1, include_zeros_in_denominator=self.include_zeros_in_denominator, ) # need to mask possible nans and -inf loss = F.where(condition=loss_weights, x=loss, y=F.zeros_like(loss)) # rnn_outputs is already merged into a single tensor assert not isinstance(rnn_outputs, list) # it seems that the trainer only uses the first return value for backward # so we only add regularization to weighted_loss if self.alpha: ar_loss = self.ar_loss(rnn_outputs) weighted_loss = weighted_loss + ar_loss if self.beta: tar_loss = self.tar_loss(rnn_outputs) weighted_loss = weighted_loss + tar_loss return weighted_loss, loss
def unroll_encoder_default( self, F, feat_static_cat: Tensor, # (batch_size, num_features) feat_static_real: Tensor, # (batch_size, num_features) past_time_feat: Tensor, # (batch_size, history_length, num_features) past_target: Tensor, # (batch_size, history_length, *target_shape) past_observed_values: Tensor, # (batch_size, history_length, *target_shape) past_is_pad: Tensor, future_observed_values: Optional[Tensor], future_time_feat: Optional[ Tensor], # (batch_size, prediction_length, num_features) future_target: Optional[ Tensor], # (batch_size, prediction_length, *target_shape) ) -> Tuple[Tensor, List, Tensor, Tensor, Tensor]: """ Unrolls the LSTM encoder over past and, if present, future data. Returns outputs and state of the encoder, plus the scale of past_target and a vector of static features that was constructed and fed as input to the encoder. All tensor arguments should have NTC layout. """ if future_time_feat is None or future_target is None: time_feat = past_time_feat.slice_axis( axis=1, begin=self.history_length - self.context_length, end=None, ) is_padded_indicator = past_is_pad.slice_axis( axis=1, begin=self.history_length - self.context_length, end=None, ) sequence = past_target sequence_length = self.history_length subsequences_length = self.context_length else: time_feat = F.concat( past_time_feat.slice_axis( axis=1, begin=self.history_length - self.context_length, end=None, ), future_time_feat, dim=1, ) is_padded_indicator = F.concat( past_is_pad.slice_axis( axis=1, begin=self.history_length - self.context_length, end=None, ), F.zeros_like(future_observed_values), dim=1, ) sequence = F.concat(past_target, future_target, dim=1) sequence_length = self.history_length + self.prediction_length subsequences_length = self.context_length + self.prediction_length # (batch_size, sub_seq_len, *target_shape, num_lags) lags = self.get_lagged_subsequences( F=F, sequence=sequence, sequence_length=sequence_length, indices=self.lags_seq, subsequences_length=subsequences_length, ) # scale is computed on the context length last units of the past target # scale shape is (batch_size, 1, *target_shape) _, scale = self.scaler( past_target.slice_axis(axis=1, begin=-self.context_length, end=None), past_observed_values.slice_axis(axis=1, begin=-self.context_length, end=None), ) # (batch_size, num_features) embedded_cat = self.embedder(feat_static_cat) # in addition to embedding features, use the log scale as it can help # prediction too # (batch_size, num_features + prod(target_shape)) static_feat = F.concat( embedded_cat, feat_static_real, F.log(scale) if len(self.target_shape) == 0 else F.log( scale.squeeze(axis=1)), dim=1, ) # (batch_size, subsequences_length, num_features + 1) repeated_static_feat = static_feat.expand_dims(axis=1).repeat( axis=1, repeats=subsequences_length) # (batch_size, sub_seq_len, *target_shape, num_lags) lags_scaled = F.broadcast_div(lags, scale.expand_dims(axis=-1)) # from (batch_size, sub_seq_len, *target_shape, num_lags) # to (batch_size, sub_seq_len, prod(target_shape) * num_lags) input_lags = F.reshape( data=lags_scaled, shape=( -1, subsequences_length, len(self.lags_seq) * prod(self.target_shape), ), ) # (batch_size, sub_seq_len, input_dim) inputs = F.concat(input_lags, time_feat, repeated_static_feat, dim=-1) begin_state = self.rnn.begin_state( func=F.zeros, dtype=self.dtype, batch_size=inputs.shape[0] if isinstance(inputs, mx.nd.NDArray) else 0, ) state = begin_state # This is a dummy computation to avoid deferred initialization error # when past_is_pad is not used in the computation graph in default # unrolling mode. state = [ F.where( is_padded_indicator.slice_axis(axis=1, begin=0, end=1).repeat( repeats=self.num_cells, axis=1), bs, s, ) for bs, s in zip(begin_state, state) ] # unroll encoder outputs, state = self.rnn.unroll( inputs=inputs, length=subsequences_length, layout="NTC", merge_outputs=True, begin_state=state, ) # outputs: (batch_size, seq_len, num_cells) # state: list of (batch_size, num_cells) tensors # scale: (batch_size, 1, *target_shape) # static_feat: (batch_size, num_features + prod(target_shape)) return outputs, state, scale, static_feat, sequence
def unroll_encoder_imputation( self, F, feat_static_cat: Tensor, # (batch_size, num_features) feat_static_real: Tensor, # (batch_size, num_features) past_time_feat: Tensor, # (batch_size, history_length, num_features) past_target: Tensor, # (batch_size, history_length, *target_shape) past_observed_values: Tensor, # (batch_size, history_length, *target_shape) past_is_pad: Tensor, # (batch_size, history_length, *target_shape) future_observed_values: Optional[ Tensor], # (batch_size, history_length, *target_shape) future_time_feat: Optional[ Tensor], # (batch_size, prediction_length, num_features) future_target: Optional[ Tensor], # (batch_size, prediction_length, *target_shape) ) -> Tuple[Tensor, List, Tensor, Tensor, Tensor]: """ Unrolls the RNN encoder in "imputation mode" which will fill imputed values with samples from the DeepAR model. """ if future_time_feat is None or future_target is None: time_feat = past_time_feat.slice_axis( axis=1, begin=self.history_length - self.context_length, end=None, ) is_padded_indicator = past_is_pad.slice_axis( axis=1, begin=self.history_length - self.context_length, end=None, ) target = past_target.slice_axis( axis=1, begin=self.history_length - self.context_length, end=None, ) target_observed_values = past_observed_values.slice_axis( axis=1, begin=self.history_length - self.context_length, end=None, ) sequence = past_target sequence_length = self.history_length subsequences_length = self.context_length else: time_feat = F.concat( past_time_feat.slice_axis( axis=1, begin=self.history_length - self.context_length, end=None, ), future_time_feat, dim=1, ) is_padded_indicator = F.concat( past_is_pad.slice_axis( axis=1, begin=self.history_length - self.context_length, end=None, ), F.zeros_like(future_observed_values), dim=1, ) target = F.concat( past_target.slice_axis( axis=1, begin=self.history_length - self.context_length, end=None, ), future_target, dim=1, ) target_observed_values = F.concat( past_observed_values.slice_axis( axis=1, begin=self.history_length - self.context_length, end=None, ), future_observed_values, dim=1, ) sequence = F.concat(past_target, future_target, dim=1) sequence_length = self.history_length + self.prediction_length subsequences_length = self.context_length + self.prediction_length # (batch_size, sub_seq_len, *target_shape, num_lags) lags = self.get_lagged_subsequences( F=F, sequence=sequence, sequence_length=sequence_length, indices=self.lags_seq, subsequences_length=subsequences_length, ) # scale is computed on the context length last units of the past target # scale shape is (batch_size, 1, *target_shape) _, scale = self.scaler( past_target.slice_axis(axis=1, begin=-self.context_length, end=None), past_observed_values.slice_axis(axis=1, begin=-self.context_length, end=None), ) # (batch_size, num_features) embedded_cat = self.embedder(feat_static_cat) # in addition to embedding features, use the log scale as it can help # prediction too # (batch_size, num_features + prod(target_shape)) static_feat = F.concat( embedded_cat, feat_static_real, F.log(scale) if len(self.target_shape) == 0 else F.log( scale.squeeze(axis=1)), dim=1, ) # (batch_size, subsequences_length, num_features + 1) repeated_static_feat = static_feat.expand_dims(axis=1).repeat( axis=1, repeats=subsequences_length) # (batch_size, sub_seq_len, *target_shape, num_lags) lags_scaled = F.broadcast_div(lags, scale.expand_dims(axis=-1)) # from (batch_size, sub_seq_len, *target_shape, num_lags) # to (batch_size, sub_seq_len, prod(target_shape) * num_lags) input_lags = F.reshape( data=lags_scaled, shape=( -1, subsequences_length, len(self.lags_seq) * prod(self.target_shape), ), ) # (batch_size, sub_seq_len, input_dim) inputs = F.concat(input_lags, time_feat, repeated_static_feat, dim=-1) # Set initial state begin_state = self.rnn.begin_state( func=F.zeros, dtype=self.dtype, batch_size=inputs.shape[0] if isinstance(inputs, mx.nd.NDArray) else 0, ) unroll_results = self.imputation_rnn_unroll( F, begin_state=begin_state, sequence=sequence, sequence_length=sequence_length, subsequences_length=subsequences_length, scale=scale, target=target, target_observed_values=target_observed_values, time_feat=time_feat, repeated_static_feat=repeated_static_feat, is_padded_indicator=is_padded_indicator, ) outputs, state, imputed_sequence = unroll_results # outputs: (batch_size, seq_len, num_cells) # state: list of (batch_size, num_cells) tensors # scale: (batch_size, 1, *target_shape) # static_feat: (batch_size, num_features + prod(target_shape)) out = F.concat(*outputs, dim=1) return out, state, scale, static_feat, imputed_sequence
def prepare_inputs_imputation_step( self, F, begin_state: List[Tensor], imputed_sequence: Tensor, sequence_length: int, subsequences_length: int, scale: Tensor, target: Tensor, target_observed_values: Tensor, time_feat: Tensor, repeated_static_feat: Tensor, is_padded_indicator: Tensor, state, i: int, ) -> Tuple[Tensor, Tensor, Tensor, Tensor, Tensor, Tensor, Tensor]: """ Prepares inputs for the next LSTM unrolling step at step i. """ lags = self.get_lagged_subsequences( F=F, sequence=imputed_sequence, sequence_length=sequence_length, indices=self.lags_seq, subsequences_length=subsequences_length, ) # (batch_size, sub_seq_len, *target_shape, num_lags) lags_scaled = F.broadcast_div(lags, scale.expand_dims(axis=-1)) # from (batch_size, sub_seq_len, *target_shape, num_lags) # to (batch_size, sub_seq_len, prod(target_shape) * num_lags) input_lags = F.reshape( data=lags_scaled, shape=( -1, subsequences_length, len(self.lags_seq) * prod(self.target_shape), ), ) # (batch_size, sub_seq_len, input_dim) inputs = F.concat(input_lags, time_feat, repeated_static_feat, dim=-1) is_pad = is_padded_indicator.slice_axis(axis=1, begin=i, end=i + 1) current_observed_indicator = target_observed_values.slice_axis(axis=1, begin=i, end=i + 1) current_target = target.slice_axis(axis=1, begin=i, end=i + 1) pre_sequence = imputed_sequence.slice_axis(axis=1, begin=0, end=-subsequences_length + i) post_sequence = imputed_sequence.slice_axis( axis=1, begin=-subsequences_length + i + 1, end=None) # Reset the state to the begin state if the current target is padded state = [ F.where(is_pad.repeat(repeats=self.num_cells, axis=1), bs, s) for bs, s in zip(begin_state, state) ] return ( inputs, is_pad, current_observed_indicator, current_target, pre_sequence, post_sequence, state, )
def hybrid_forward( self, F, feat_static_cat: Tensor, past_observed_values: Tensor, past_seasonal_indicators: Tensor, past_time_feat: Tensor, past_target: Tensor, future_seasonal_indicators: Tensor, future_time_feat: Tensor, ) -> Tensor: lds, lstm_state = self.compute_lds( F, feat_static_cat=feat_static_cat, seasonal_indicators=past_seasonal_indicators.slice_axis( axis=1, begin=-self.past_length, end=None ), time_feat=past_time_feat.slice_axis( axis=1, begin=-self.past_length, end=None ), length=self.past_length, ) _, scale = self.scaler(past_target, past_observed_values) observed_context = past_observed_values.slice_axis( axis=1, begin=-self.past_length, end=None ) _, final_mean, final_cov = lds.log_prob( x=past_target.slice_axis( axis=1, begin=-self.past_length, end=None ), observed=observed_context.min(axis=-1, keepdims=False), scale=scale, ) lds_prediction, _ = self.compute_lds( F, feat_static_cat=feat_static_cat, seasonal_indicators=future_seasonal_indicators, time_feat=future_time_feat, length=self.prediction_length, lstm_begin_state=lstm_state, prior_mean=final_mean, prior_cov=final_cov, ) samples = lds_prediction.sample( num_samples=self.num_parallel_samples, scale=scale ) # convert samples from # (num_samples, batch_size, prediction_length, target_dim) # to # (batch_size, num_samples, prediction_length, target_dim) # and squeeze last axis in the univariate case if self.univariate: return samples.transpose(axes=(1, 0, 2, 3)).squeeze(axis=3) else: return samples.transpose(axes=(1, 0, 2, 3))
def hybrid_forward( self, F, past_target: Tensor, past_observed_values: Tensor ) -> Tensor: """ Given the tensor `past_target`, first we normalize it by the `past_observed_values` which is an indicator tensor with 0 or 1 values. Then it outputs the result of LSTNet. Parameters ---------- F past_target Tensor of shape (batch_size, num_series, context_length) past_observed_values Tensor of shape (batch_size, num_series, context_length) Returns ------- Tensor Shape (batch_size, num_series, 1) if `horizon` was specified and of shape (batch_size, num_series, prediction_length) if `prediction_length` was provided """ context_target = past_target.slice_axis( axis=2, begin=-self.context_length, end=None ) context_observed = past_observed_values.slice_axis( axis=2, begin=-self.context_length, end=None ) scaled_context, scale = self.scaler(context_target, context_observed) cnn_inputs = F.concat( scaled_context.expand_dims(axis=1), context_observed.expand_dims(axis=1), dim=1, ) c = self.cnn(cnn_inputs) c = self.dropout(c) c = F.squeeze(c, axis=2) # NCT r = F.transpose(c, axes=(2, 0, 1)) # TNC if F is mx.ndarray: ctx = ( r.context if isinstance(r, mx.gluon.tensor_types) else r[0].context ) with ctx: rnn_begin_state = self.rnn.begin_state( func=F.zeros, dtype=self.dtype, batch_size=r.shape[1] ) else: rnn_begin_state = self.rnn.begin_state( func=F.zeros, dtype=self.dtype, batch_size=0 ) r, _ = self.rnn.unroll( inputs=r, length=min(self.conv_out, self.context_length), layout="TNC", merge_outputs=True, begin_state=rnn_begin_state, ) r = F.squeeze( F.slice_axis(r, axis=0, begin=-1, end=None), axis=0 ) # NC s = self._skip_rnn_layer(F, c) # make fc broadcastable for output fc = self.fc(F.concat(r, s, dim=1)).expand_dims( axis=2 ) # N x num_series x 1 if self.prediction_length: fc = F.tile( fc, reps=(1, 1, self.prediction_length) ) # N x num_series x prediction_length ar = self._ar_highway(F, scaled_context, context_observed) out = fc + ar if self.output_activation is None: return out, scale return ( ( F.sigmoid(out) if self.output_activation == "sigmoid" else F.tanh(out) ), scale, )