def flatmap_transform(self, data: DataEntry, is_train: bool) -> Iterator[DataEntry]: ts_fields = self.dynamic_feature_fields + [self.target_field] ts_target = data[self.target_field] len_target = ts_target.shape[-1] if is_train: if len_target < self.instance_length: sampling_indices = ( # Returning [] for all time series will cause this to be in loop forever! [len_target] if self.allow_target_padding else []) else: sampling_indices = self.instance_sampler( ts_target, self.instance_length, len_target) else: sampling_indices = [len_target] for i in sampling_indices: d = data.copy() pad_length = max(self.instance_length - i, 0) # update start field d[self.start_field] = shift_timestamp(data[self.start_field], i - self.instance_length) # set is_pad field is_pad = np.zeros(self.instance_length) if pad_length > 0: is_pad[:pad_length] = 1 d[self.is_pad_field] = is_pad # update time series fields for ts_field in ts_fields: full_ts = data[ts_field] if pad_length > 0: pad_pre = self.pad_value * np.ones( shape=full_ts.shape[:-1] + (pad_length, )) past_ts = np.concatenate([pad_pre, full_ts[..., :i]], axis=-1) else: past_ts = full_ts[..., (i - self.instance_length):i] past_ts = past_ts.transpose() if self.output_NTC else past_ts d[self._past(ts_field)] = past_ts if self.use_prediction_features and not is_train: if not ts_field == self.target_field: future_ts = full_ts[..., i:i + self.prediction_length] future_ts = (future_ts.transpose() if self.output_NTC else future_ts) d[self._future(ts_field)] = future_ts del d[ts_field] d[self.forecast_start_field] = shift_timestamp( d[self.start_field], self.instance_length) yield d
def flatmap_transform( self, data: DataEntry, is_train: bool ) -> Iterator[DataEntry]: ts_fields = self.dynamic_feature_fields + [self.target_field] ts_target = data[self.target_field] sampling_indices = self.instance_sampler(ts_target) for i in sampling_indices: d = data.copy() pad_length = max(self.instance_length - i, 0) # update start field d[self.start_field] = ( data[self.start_field] + i - self.instance_length ) # set is_pad field is_pad = np.zeros(self.instance_length, dtype=ts_target.dtype) if pad_length > 0: is_pad[:pad_length] = 1 d[self.is_pad_field] = is_pad # update time series fields for ts_field in ts_fields: full_ts = data[ts_field] if pad_length > 0: pad_pre = self.pad_value * np.ones( shape=full_ts.shape[:-1] + (pad_length,) ) past_ts = np.concatenate( [pad_pre, full_ts[..., :i]], axis=-1 ) else: past_ts = full_ts[..., (i - self.instance_length) : i] past_ts = past_ts.transpose() if self.output_NTC else past_ts d[self._past(ts_field)] = past_ts if self.use_prediction_features: if not ts_field == self.target_field: future_ts = full_ts[ ..., i : i + self.prediction_length ] future_ts = ( future_ts.transpose() if self.output_NTC else future_ts ) d[self._future(ts_field)] = future_ts del d[ts_field] d[self.forecast_start_field] = ( d[self.start_field] + self.instance_length ) yield d
def flatmap_transform( self, data: DataEntry, is_train: bool ) -> Iterator[DataEntry]: pl = self.future_length lt = self.lead_time target = data[self.target_field] sampled_indices = self.instance_sampler(target) slice_cols = ( self.ts_fields + self.past_ts_fields + [self.target_field, self.observed_value_field] ) for i in sampled_indices: pad_length = max(self.past_length - i, 0) d = data.copy() for field in slice_cols: if i >= self.past_length: past_piece = d[field][..., i - self.past_length : i] else: pad_block = np.full( shape=d[field].shape[:-1] + (pad_length,), fill_value=self.dummy_value, dtype=d[field].dtype, ) past_piece = np.concatenate( [pad_block, d[field][..., :i]], axis=-1 ) future_piece = d[field][..., (i + lt) : (i + lt + pl)] if field in self.ts_fields: piece = np.concatenate([past_piece, future_piece], axis=-1) if self.output_NTC: piece = piece.transpose() d[field] = piece else: if self.output_NTC: past_piece = past_piece.transpose() future_piece = future_piece.transpose() if field not in self.past_ts_fields: d[self._past(field)] = past_piece d[self._future(field)] = future_piece del d[field] else: d[field] = past_piece pad_indicator = np.zeros(self.past_length) if pad_length > 0: pad_indicator[:pad_length] = 1 d[self._past(self.is_pad_field)] = pad_indicator d[self.forecast_start_field] = shift_timestamp( d[self.start_field], i + lt ) yield d
def flatmap_transform( self, data: DataEntry, is_train: bool ) -> Iterator[DataEntry]: pl = self.future_length lt = self.lead_time slice_cols = self.ts_fields + [self.target_field] target = data[self.target_field] sampled_indices = self.instance_sampler(target) for i in sampled_indices: pad_length = max(self.past_length - i, 0) d = data.copy() for ts_field in slice_cols: if i > self.past_length: # truncate to past_length past_piece = d[ts_field][..., i - self.past_length : i] elif i < self.past_length: pad_block = ( np.ones( d[ts_field].shape[:-1] + (pad_length,), dtype=d[ts_field].dtype, ) * self.dummy_value ) past_piece = np.concatenate( [pad_block, d[ts_field][..., :i]], axis=-1 ) else: past_piece = d[ts_field][..., :i] d[self._past(ts_field)] = past_piece d[self._future(ts_field)] = d[ts_field][ ..., i + lt : i + lt + pl ] del d[ts_field] pad_indicator = np.zeros(self.past_length, dtype=target.dtype) if pad_length > 0: pad_indicator[:pad_length] = 1 if self.output_NTC: for ts_field in slice_cols: d[self._past(ts_field)] = d[ self._past(ts_field) ].transpose() d[self._future(ts_field)] = d[ self._future(ts_field) ].transpose() d[self._past(self.is_pad_field)] = pad_indicator d[self.forecast_start_field] = d[self.start_field] + i + lt yield d
def flatmap_transform( self, data: DataEntry, is_train: bool ) -> Iterator[DataEntry]: pl = self.future_length lt = self.lead_time slice_cols = self.ts_fields + [self.target_field] target = data[self.target_field] len_target = target.shape[-1] minimum_length = ( self.future_length if self.pick_incomplete else self.past_length + self.future_length ) + self.lead_time if is_train: sampling_bounds = ( ( 0, len_target - self.future_length - self.lead_time, ) # TODO: create parameter lower sampling bound for NBEATS if self.pick_incomplete else ( self.past_length, len_target - self.future_length - self.lead_time, ) ) # We currently cannot handle time series that are # too short during training, so we just skip these. # If we want to include them we would need to pad and to # mask the loss. sampled_indices = ( np.array([], dtype=int) if len_target < minimum_length else self.train_sampler(target, *sampling_bounds) ) else: assert self.pick_incomplete or len_target >= self.past_length sampled_indices = np.array([len_target], dtype=int) for i in sampled_indices: pad_length = max(self.past_length - i, 0) if not self.pick_incomplete: assert ( pad_length == 0 ), f"pad_length should be zero, got {pad_length}" d = data.copy() for ts_field in slice_cols: if i > self.past_length: # truncate to past_length past_piece = d[ts_field][..., i - self.past_length : i] elif i < self.past_length: pad_block = ( np.ones( d[ts_field].shape[:-1] + (pad_length,), dtype=d[ts_field].dtype, ) * self.dummy_value ) past_piece = np.concatenate( [pad_block, d[ts_field][..., :i]], axis=-1 ) else: past_piece = d[ts_field][..., :i] d[self._past(ts_field)] = past_piece d[self._future(ts_field)] = d[ts_field][ ..., i + lt : i + lt + pl ] del d[ts_field] pad_indicator = np.zeros(self.past_length) if pad_length > 0: pad_indicator[:pad_length] = 1 if self.output_NTC: for ts_field in slice_cols: d[self._past(ts_field)] = d[ self._past(ts_field) ].transpose() d[self._future(ts_field)] = d[ self._future(ts_field) ].transpose() d[self._past(self.is_pad_field)] = pad_indicator d[self.forecast_start_field] = shift_timestamp( d[self.start_field], i + lt ) yield d
def transform(self, data: DataEntry) -> DataEntry: return self.func(data.copy())
def flatmap_transform(self, data: DataEntry, is_train: bool) -> Iterator[DataEntry]: target = data[self.target_field] if is_train: # We currently cannot handle time series that are shorter than the # prediction length during training, so we just skip these. # If we want to include them we would need to pad and to mask # the loss. if len(target) < self.dec_len: return sampling_indices = self.train_sampler(target, 0, len(target) - self.dec_len) else: sampling_indices = [len(target)] # Loops over all encoder and decoder fields even those that are disabled to # set to dummy zero fields in those cases ts_fields_counter = Counter( set(self.encoder_series_fields + self.decoder_series_fields)) for sampling_idx in sampling_indices: # irrelevant data should have been removed by now in the # transformation chain, so copying everything is ok out = data.copy() enc_len_diff = sampling_idx - self.enc_len dec_len_diff = sampling_idx - self.num_forking # ensure start indices are not negative start_idx_enc = max(0, enc_len_diff) start_idx_dec = max(0, dec_len_diff) # Define pad length indices for shorter time series of variable length being updated in place pad_length_enc = max(0, -enc_len_diff) pad_length_dec = max(0, -dec_len_diff) for ts_field in list(ts_fields_counter.keys()): # target is 1d, this ensures ts is always 2d ts = np.atleast_2d(out[ts_field]).T ts_len = ts.shape[1] if ts_fields_counter[ts_field] == 1: del out[ts_field] else: ts_fields_counter[ts_field] -= 1 out[self._past(ts_field)] = np.zeros(shape=(self.enc_len, ts_len), dtype=ts.dtype) if ts_field not in self.encoder_disabled_fields: out[self._past(ts_field)][pad_length_enc:] = ts[ start_idx_enc:sampling_idx, :] # exclude some fields at prediction time if (not is_train and ts_field in self.prediction_time_decoder_exclude): continue if ts_field in self.decoder_series_fields: out[self._future(ts_field)] = np.zeros( shape=(self.num_forking, self.dec_len, ts_len), dtype=ts.dtype, ) if ts_field not in self.decoder_disabled_fields: # This is where some of the forking magic happens: # For each of the num_forking time-steps at which the decoder is applied we slice the # corresponding inputs called decoder_fields to the appropriate dec_len decoder_fields = ts[start_idx_dec + 1:sampling_idx + 1, :] # For default row-major arrays, strides = (dtype*n_cols, dtype). Since this array is transposed, # it is stored in column-major (Fortran) ordering with strides = (dtype, dtype*n_rows) stride = decoder_fields.strides out[self._future( ts_field )][pad_length_dec:] = as_strided( decoder_fields, shape=( self.num_forking - pad_length_dec, self.dec_len, ts_len, ), # strides for 2D array expanded to 3D array of shape (dim1, dim2, dim3) = # (1, n_rows, n_cols). For transposed data, strides = # (dtype, dtype * dim1, dtype*dim1*dim2) = (dtype, dtype, dtype*n_rows). strides=stride[0:1] + stride, ) # edge case for prediction_length = 1 if out[self._future(ts_field)].shape[-1] == 1: out[self._future(ts_field)] = np.squeeze( out[self._future(ts_field)], axis=-1) # So far encoder pad indicator not in use - # Marks that left padding for the encoder will occur on shorter time series pad_indicator = np.zeros(self.enc_len) pad_indicator[:pad_length_enc] = True out[self._past(self.is_pad_out)] = pad_indicator # So far pad forecast_start not in use out[FieldName.FORECAST_START] = shift_timestamp( out[self.start_in], sampling_idx) yield out
def flatmap_transform( self, data: DataEntry, is_train: bool ) -> Iterator[DataEntry]: pl = self.future_length slice_cols = self.ts_fields + [self.target_field] target = data[self.target_field] len_target = target.shape[-1] if is_train: if len_target < self.future_length: # We currently cannot handle time series that are shorter than # the prediction length during training, so we just skip these. # If we want to include them we would need to pad and to mask # the loss. sampling_indices: List[int] = [] else: if self.pick_incomplete: sampling_indices = self.train_sampler( target, 0, len_target - self.future_length ) else: sampling_indices = self.train_sampler( target, self.past_length, len_target - self.future_length, ) else: sampling_indices = [len_target] for i in sampling_indices: pad_length = max(self.past_length - i, 0) if not self.pick_incomplete: assert pad_length == 0 d = data.copy() for ts_field in slice_cols: if i > self.past_length: # truncate to past_length past_piece = d[ts_field][..., i - self.past_length : i] elif i < self.past_length: pad_block = np.zeros( d[ts_field].shape[:-1] + (pad_length,), dtype=d[ts_field].dtype, ) past_piece = np.concatenate( [pad_block, d[ts_field][..., :i]], axis=-1 ) else: past_piece = d[ts_field][..., :i] d[self._past(ts_field)] = past_piece d[self._future(ts_field)] = d[ts_field][..., i : i + pl] del d[ts_field] pad_indicator = np.zeros(self.past_length) if pad_length > 0: pad_indicator[:pad_length] = 1 if self.output_NTC: for ts_field in slice_cols: d[self._past(ts_field)] = d[ self._past(ts_field) ].transpose() d[self._future(ts_field)] = d[ self._future(ts_field) ].transpose() d[self._past(self.is_pad_field)] = pad_indicator d[self.forecast_start_field] = shift_timestamp( d[self.start_field], i ) yield d
def flatmap_transform(self, data: DataEntry, is_train: bool) -> Iterator[DataEntry]: target = data[self.target_field] sampled_indices = self.instance_sampler(target) ts_fields = set(self.encoder_series_fields + self.decoder_series_fields) for idx in sampled_indices: # irrelevant data should have been removed by now in the # transformation chain, so copying everything is ok out = data.copy() enc_len_diff = idx - self.enc_len dec_len_diff = idx - self.num_forking # ensure start indices are not negative start_idx_enc = max(0, enc_len_diff) start_idx_dec = max(0, dec_len_diff) # Define pad length indices for shorter time series of variable length being updated in place pad_length_enc = max(0, -enc_len_diff) pad_length_dec = max(0, -dec_len_diff) for ts_field in ts_fields: # target is 1d, this ensures ts is always 2d ts = np.atleast_2d(out[ts_field]).T ts_len = ts.shape[1] del out[ts_field] out[self._past(ts_field)] = np.zeros(shape=(self.enc_len, ts_len), dtype=ts.dtype) if ts_field not in self.encoder_disabled_fields: out[self._past(ts_field)][pad_length_enc:] = ts[ start_idx_enc:idx, :] if ts_field in self.decoder_series_fields: out[self._future(ts_field)] = np.zeros( shape=(self.num_forking, self.dec_len, ts_len), dtype=ts.dtype, ) if ts_field not in self.decoder_disabled_fields: # This is where some of the forking magic happens: # For each of the num_forking time-steps at which the decoder is applied we slice the # corresponding inputs called decoder_fields to the appropriate dec_len decoder_fields = ts[start_idx_dec + 1:idx + 1, :] # For default row-major arrays, strides = (dtype*n_cols, dtype). Since this array is transposed, # it is stored in column-major (Fortran) ordering with strides = (dtype, dtype*n_rows) stride = decoder_fields.strides out[self._future( ts_field )][pad_length_dec:] = as_strided( decoder_fields, shape=( self.num_forking - pad_length_dec, self.dec_len, ts_len, ), # strides for 2D array expanded to 3D array of shape (dim1, dim2, dim3) = # (1, n_rows, n_cols). For transposed data, strides = # (dtype, dtype * dim1, dtype*dim1*dim2) = (dtype, dtype, dtype*n_rows). strides=stride[0:1] + stride, ) # edge case for prediction_length = 1 if out[self._future(ts_field)].shape[-1] == 1: out[self._future(ts_field)] = np.squeeze( out[self._future(ts_field)], axis=-1) # So far encoder pad indicator not in use - # Marks that left padding for the encoder will occur on shorter time series pad_indicator = np.zeros(self.enc_len) pad_indicator[:pad_length_enc] = True out[self._past(self.is_pad_out)] = pad_indicator # So far pad forecast_start not in use out[FieldName.FORECAST_START] = shift_timestamp( out[self.start_in], idx) yield out
def flatmap_transform(self, data: DataEntry, is_train: bool) -> Iterator[DataEntry]: target = data[self.target_field] if is_train: # We currently cannot handle time series that are shorter than the # prediction length during training, so we just skip these. # If we want to include them we would need to pad and to mask # the loss. if len(target) < self.dec_len: return sampling_indices = self.train_sampler(target, 0, len(target) - self.dec_len) else: sampling_indices = [len(target)] ts_fields_counter = Counter( set(self.encoder_series_fields + self.decoder_series_fields)) for sampling_idx in sampling_indices: # ensure start index is not negative start_idx = max(0, sampling_idx - self.enc_len) # irrelevant data should have been removed by now in the # transformation chain, so copying everything is ok out = data.copy() for ts_field in list(ts_fields_counter.keys()): # target is 1d, this ensures ts is always 2d ts = np.atleast_2d(out[ts_field]) if ts_fields_counter[ts_field] == 1: del out[ts_field] else: ts_fields_counter[ts_field] -= 1 # take enc_len values from ts, depending on sampling_idx slice = ts[:, start_idx:sampling_idx] # if we have less than enc_len values, pad_left with 0 past_piece = pad_to_size(slice, self.enc_len) out[self._past(ts_field)] = past_piece.transpose() # exclude some fields at prediction time if (not is_train and ts_field in self.prediction_time_decoder_exclude): continue # This is were some of the forking magic happens: # For each of the encoder_len time-steps at which the decoder is applied we slice the # corresponding inputs called decoder_fields to the appropriate dec_len if (ts_field in self.decoder_series_fields + self.decoder_disabled_fields): forking_dec_field = np.zeros(shape=(self.enc_len, self.dec_len, len(ts))) # in case it's not disabled we copy the actual values if ts_field not in self.decoder_disabled_fields: skip = max(0, self.enc_len - sampling_idx) # This section takes by far the longest time computationally: # This scales linearly in self.enc_len and linearly in self.dec_len for dec_field, idx in zip( forking_dec_field[skip:], range(start_idx + 1, start_idx + self.enc_len + 1), ): dec_field[:] = ts[:, idx:idx + self.dec_len].T if forking_dec_field.shape[-1] == 1: out[self._future(ts_field)] = np.squeeze( forking_dec_field, axis=-1) else: out[self._future(ts_field)] = forking_dec_field # So far pad indicator not in use pad_indicator = np.zeros(self.enc_len) pad_length = max(0, self.enc_len - sampling_idx) pad_indicator[:pad_length] = True out[self._past(self.is_pad_out)] = pad_indicator # So far pad forecast_start not in use out[FieldName.FORECAST_START] = shift_timestamp( out[self.start_in], sampling_idx) yield out
def flatmap_transform( self, data: DataEntry, is_train: bool ) -> Iterator[DataEntry]: target = data[self.target_field] if is_train: # We currently cannot handle time series that are shorter than the # prediction length during training, so we just skip these. # If we want to include them we would need to pad and to mask # the loss. if len(target) < self.dec_len: return sampling_indices = self.train_sampler( target, 0, len(target) - self.dec_len ) else: sampling_indices = [len(target)] # Loops over all encoder and decoder fields even those that are disabled to # set to dummy zero fields in those cases ts_fields_counter = Counter( set(self.encoder_series_fields + self.decoder_series_fields) ) for sampling_idx in sampling_indices: # ensure start index is not negative start_idx = max(0, sampling_idx - self.enc_len) # irrelevant data should have been removed by now in the # transformation chain, so copying everything is ok out = data.copy() for ts_field in list(ts_fields_counter.keys()): # target is 1d, this ensures ts is always 2d ts = np.atleast_2d(out[ts_field]).T if ts_fields_counter[ts_field] == 1: del out[ts_field] else: ts_fields_counter[ts_field] -= 1 # take enc_len values from ts, depending on sampling_idx slice = ts[start_idx:sampling_idx, :] ts_len = ts.shape[1] past_piece = np.zeros( shape=(self.enc_len, ts_len), dtype=ts.dtype ) if ts_field not in self.encoder_disabled_fields: # if we have less than enc_len values, pad_left with 0 past_piece = pad_to_size(slice, self.enc_len) out[self._past(ts_field)] = past_piece # exclude some fields at prediction time if ( not is_train and ts_field in self.prediction_time_decoder_exclude ): continue # This is were some of the forking magic happens: # For each of the encoder_len time-steps at which the decoder is applied we slice the # corresponding inputs called decoder_fields to the appropriate dec_len if ts_field in self.decoder_series_fields: forking_dec_field = np.zeros( shape=(self.num_forking, self.dec_len, ts_len), dtype=ts.dtype, ) # in case it's not disabled we copy the actual values if ts_field not in self.decoder_disabled_fields: # In case we sample and index too close to the beginning of the time series we would run out of # bounds (i.e. try to copy non existent time series data) to prepare the input for the decoder. # Instead of copying the partially available data from the time series and padding it with # zeros, we simply skip copying the partial data. Since copying data would result in overriding # the 0 pre-initialized 3D array, the end result of skipping is that the affected 2D decoder # inputs (entries of the 3D array - of which there are skip many) will still be all 0." skip = max(0, self.num_forking - sampling_idx) start_idx = max(0, sampling_idx - self.num_forking) # For 2D column-major (Fortran) ordering transposed array strides = (dtype, dtype*n_rows) # For standard row-major arrays, strides = (dtype*n_cols, dtype) stride = ts.strides forking_dec_field[skip:, :, :] = as_strided( ts[ start_idx + 1 : start_idx + 1 + self.num_forking - skip, :, ], shape=( self.num_forking - skip, self.dec_len, ts_len, ), # strides for 2D array expanded to 3D array of shape (dim1, dim2, dim3) = # strides for 2D array expanded to 3D array of shape (dim1, dim2, dim3) = # (1, n_rows, n_cols). Note since this array has been transposed, it is stored in # column-major (Fortan) ordering, i.e. for transposed data of shape (dim1, dim2, dim3), # strides = (dtype, dtype * dim1, dtype*dim1*dim2) = (dtype, dtype, dtype*n_rows). strides=stride[0:1] + stride, ) # edge case for prediction_length = 1 if forking_dec_field.shape[-1] == 1: out[self._future(ts_field)] = np.squeeze( forking_dec_field, axis=-1 ) else: out[self._future(ts_field)] = forking_dec_field # So far pad indicator not in use pad_indicator = np.zeros(self.enc_len) pad_length = max(0, self.enc_len - sampling_idx) pad_indicator[:pad_length] = True out[self._past(self.is_pad_out)] = pad_indicator # So far pad forecast_start not in use out[FieldName.FORECAST_START] = shift_timestamp( out[self.start_in], sampling_idx ) yield out
def flatmap_transform(self, data: DataEntry, is_train: bool) -> Iterator[DataEntry]: pl = self.future_length lt = self.lead_time target = data[self.target_field] len_target = target.shape[-1] minimum_length = (self.future_length if self.pick_incomplete else self.past_length + self.future_length) + self.lead_time if is_train: sampling_bounds = (( 0, len_target - self.future_length - self.lead_time, ) if self.pick_incomplete else ( self.past_length, len_target - self.future_length - self.lead_time, )) # We currently cannot handle time series that are # too short during training, so we just skip these. # If we want to include them we would need to pad and to # mask the loss. sampled_indices = (np.array([], dtype=int) if len_target < minimum_length else self.train_sampler(target, *sampling_bounds)) else: assert self.pick_incomplete or len_target >= self.past_length sampled_indices = np.array([len_target], dtype=int) slice_cols = (self.ts_fields + self.past_ts_fields + [self.target_field, self.observed_value_field]) for i in sampled_indices: pad_length = max(self.past_length - i, 0) if not self.pick_incomplete and pad_length > 0: raise RuntimeError( f"pad_length should be zero, got {pad_length}") d = data.copy() for field in slice_cols: if i >= self.past_length: past_piece = d[field][..., i - self.past_length:i] else: pad_block = (np.ones( d[field].shape[:-1] + (pad_length, ), dtype=d[field].dtype, ) * self.dummy_value) past_piece = np.concatenate([pad_block, d[field][..., :i]], axis=-1) future_piece = d[field][..., (i + lt):(i + lt + pl)] if field in self.ts_fields: piece = np.concatenate([past_piece, future_piece], axis=-1) if self.output_NTC: piece = piece.transpose() d[field] = piece else: if self.output_NTC: past_piece = past_piece.transpose() future_piece = future_piece.transpose() d[self._past(field)] = past_piece if field not in self.past_ts_fields: d[self._future(field)] = future_piece del d[field] pad_indicator = np.zeros(self.past_length) if pad_length > 0: pad_indicator[:pad_length] = 1 d[self._past(self.is_pad_field)] = pad_indicator d[self.forecast_start_field] = shift_timestamp( d[self.start_field], i + lt) yield d
def flatmap_transform( self, data: DataEntry, is_train: bool ) -> Iterator[DataEntry]: dec_len = self.dec_len slice_cols = self.ts_fields + [self.target_in] target = data[self.target_in] if is_train: if len(target) < self.dec_len: # We currently cannot handle time series that are shorter than the # prediction length during training, so we just skip these. # If we want to include them we would need to pad and to mask # the loss. sampling_indices: List[int] = [] else: sampling_indices = self.train_sampler( target, 0, len(target) - self.dec_len ) else: sampling_indices = [len(target)] for i in sampling_indices: pad_length = max(self.enc_len - i, 0) d = data.copy() for ts_field in slice_cols: if i > self.enc_len: # truncate to past_length past_piece = d[ts_field][..., i - self.enc_len : i] elif i < self.enc_len: pad_block = np.zeros( d[ts_field].shape[:-1] + (pad_length,) ) past_piece = np.concatenate( [pad_block, d[ts_field][..., :i]], axis=-1 ) else: past_piece = d[ts_field][..., :i] d[self._past(ts_field)] = np.expand_dims(past_piece, -1) if is_train and ts_field is self.target_in: forking_dec_field = np.zeros( shape=(self.enc_len, self.dec_len) ) for j in range(self.enc_len): start_idx = i - self.enc_len + j + 1 if start_idx >= 0: forking_dec_field[j, :] = d[ts_field][ ..., start_idx : start_idx + dec_len ] d[self._future(ts_field)] = forking_dec_field del d[ts_field] pad_indicator = np.zeros(self.enc_len) if pad_length > 0: pad_indicator[:pad_length] = 1 d[self._past(self.is_pad_out)] = pad_indicator d[self.forecast_start_out] = shift_timestamp(d[self.start_in], i) yield d