def train_hybrid_forward( self, F, target_dimension_indicator: Tensor, past_time_feat: Tensor, past_target_cdf: Tensor, past_observed_values: Tensor, past_is_pad: Tensor, future_time_feat: Tensor, future_target_cdf: Tensor, future_observed_values: Tensor, ) -> Tuple[Tensor, ...]: """ Computes the loss for training DeepVAR, all inputs tensors representing time series have NTC layout. Parameters ---------- F target_dimension_indicator Indices of the target dimension (batch_size, target_dim) past_time_feat Dynamic features of past time series (batch_size, history_length, num_features) past_target_cdf Past marginal CDF transformed target values (batch_size, history_length, target_dim) past_observed_values Indicator whether or not the values were observed (batch_size, history_length, target_dim) past_is_pad Indicator whether the past target values have been padded (batch_size, history_length) future_time_feat Future time features (batch_size, prediction_length, num_features) future_target_cdf Future marginal CDF transformed target values (batch_size, prediction_length, target_dim) future_observed_values Indicator whether or not the future values were observed (batch_size, prediction_length, target_dim) Returns ------- distr Loss with shape (batch_size, 1) likelihoods Likelihoods for each time step (batch_size, context + prediction_length, 1) distr_args Distribution arguments (context + prediction_length, number_of_arguments) """ seq_len = self.context_length + self.prediction_length # unroll the decoder in "training mode", i.e. by providing future data # as well rnn_outputs, _, scale, lags_scaled, inputs = self.unroll_encoder( F=F, past_time_feat=past_time_feat, past_target_cdf=past_target_cdf, past_observed_values=past_observed_values, past_is_pad=past_is_pad, future_time_feat=future_time_feat, future_target_cdf=future_target_cdf, target_dimension_indicator=target_dimension_indicator, ) # put together target sequence # (batch_size, seq_len, target_dim) target = F.concat( past_target_cdf.slice_axis(axis=1, begin=-self.context_length, end=None), future_target_cdf, dim=1, ) # assert_shape(target, (-1, seq_len, self.target_dim)) distr, distr_args = self.distr( time_features=inputs, rnn_outputs=rnn_outputs, scale=scale, lags_scaled=lags_scaled, target_dimension_indicator=target_dimension_indicator, seq_len=self.context_length + self.prediction_length, ) # we sum the last axis to have the same shape for all likelihoods # (batch_size, subseq_length, 1) likelihoods = -distr.log_prob(target).expand_dims(axis=-1) assert_shape(likelihoods, (-1, seq_len, 1)) past_observed_values = F.broadcast_minimum( past_observed_values, 1 - past_is_pad.expand_dims(axis=-1)) # (batch_size, subseq_length, target_dim) observed_values = F.concat( past_observed_values.slice_axis(axis=1, begin=-self.context_length, end=None), future_observed_values, dim=1, ) # mask the loss at one time step if one or more observations is missing # in the target dimensions (batch_size, subseq_length, 1) loss_weights = observed_values.min(axis=-1, keepdims=True) assert_shape(loss_weights, (-1, seq_len, 1)) loss = weighted_average(F=F, x=likelihoods, weights=loss_weights, axis=1) assert_shape(loss, (-1, -1, 1)) self.distribution = distr return (loss, likelihoods) + distr_args
def unroll( self, F, lags: Tensor, scale: Tensor, time_feat: Tensor, target_dimension_indicator: Tensor, unroll_length: int, begin_state: Optional[List[Tensor]], ) -> Tuple[Tensor, Tensor, Tensor, Tensor]: """ Prepares the input to the RNN and unrolls it the given number of time steps. Parameters ---------- F lags Input lags (batch_size, sub_seq_len, target_dim, num_lags) scale Mean scale (batch_size, 1, target_dim) time_feat Additional time features target_dimension_indicator Indices of the target dimension (batch_size, target_dim) unroll_length length to unroll begin_state State to start the unrolling of the RNN Returns ------- outputs RNN outputs (batch_size, seq_len, num_cells) states RNN states. Nested list with (batch_size, num_cells) tensors with dimensions target_dim x num_layers x (batch_size, num_cells) lags_scaled Scaled lags(batch_size, sub_seq_len, target_dim, num_lags) inputs inputs to the RNN """ # (batch_size, sub_seq_len, target_dim, num_lags) lags_scaled = F.broadcast_div(lags, scale.expand_dims(axis=-1)) assert_shape( lags_scaled, (-1, unroll_length, self.target_dim, len(self.lags_seq)), ) input_lags = F.reshape( data=lags_scaled, shape=(-1, unroll_length, len(self.lags_seq) * self.target_dim), ) # (batch_size, target_dim, embed_dim) index_embeddings = self.embed(target_dimension_indicator) assert_shape(index_embeddings, (-1, self.target_dim, self.embed_dim)) # (batch_size, seq_len, target_dim * embed_dim) repeated_index_embeddings = (index_embeddings.expand_dims( axis=1).repeat(axis=1, repeats=unroll_length).reshape( (-1, unroll_length, self.target_dim * self.embed_dim))) # (batch_size, sub_seq_len, input_dim) inputs = F.concat(input_lags, repeated_index_embeddings, time_feat, dim=-1) # unroll encoder outputs, state = self.rnn.unroll( inputs=inputs, length=unroll_length, layout="NTC", merge_outputs=True, begin_state=begin_state, ) assert_shape(outputs, (-1, unroll_length, self.num_cells)) for s in state: assert_shape(s, (-1, self.num_cells)) assert_shape( lags_scaled, (-1, unroll_length, self.target_dim, len(self.lags_seq)), ) return outputs, state, lags_scaled, inputs