def create_transformation(self) -> Transformation: chain = [] dynamic_feat_fields = [] remove_field_names = [ FieldName.FEAT_DYNAMIC_CAT, FieldName.FEAT_STATIC_REAL, ] # --- GENERAL TRANSFORMATION CHAIN --- # determine unused input if not self.use_past_feat_dynamic_real: remove_field_names.append(FieldName.PAST_FEAT_DYNAMIC_REAL) if not self.use_feat_dynamic_real: remove_field_names.append(FieldName.FEAT_DYNAMIC_REAL) if not self.use_feat_static_cat: remove_field_names.append(FieldName.FEAT_STATIC_CAT) chain.extend( [ RemoveFields(field_names=remove_field_names), AddObservedValuesIndicator( target_field=FieldName.TARGET, output_field=FieldName.OBSERVED_VALUES, dtype=self.dtype, ), ] ) # --- TRANSFORMATION CHAIN FOR DYNAMIC FEATURES --- if self.add_time_feature: chain.append( AddTimeFeatures( start_field=FieldName.START, target_field=FieldName.TARGET, output_field=FieldName.FEAT_TIME, time_features=time_features_from_frequency_str(self.freq), pred_length=self.prediction_length, dtype=self.dtype, ) ) dynamic_feat_fields.append(FieldName.FEAT_TIME) if self.add_age_feature: chain.append( AddAgeFeature( target_field=FieldName.TARGET, output_field=FieldName.FEAT_AGE, pred_length=self.prediction_length, dtype=self.dtype, ) ) dynamic_feat_fields.append(FieldName.FEAT_AGE) if self.use_feat_dynamic_real: # Backwards compatibility: chain.append( RenameFields({"dynamic_feat": FieldName.FEAT_DYNAMIC_REAL}) ) dynamic_feat_fields.append(FieldName.FEAT_DYNAMIC_REAL) # we need to make sure that there is always some dynamic input # we will however disregard it in the hybrid forward. # the time feature is empty for yearly freq so also adding a dummy feature # in the case that the time feature is the only one on if len(dynamic_feat_fields) == 0 or ( not self.add_age_feature and not self.use_feat_dynamic_real and self.freq == "Y" ): chain.append( AddConstFeature( target_field=FieldName.TARGET, output_field=FieldName.FEAT_CONST, pred_length=self.prediction_length, const=0.0, # For consistency in case with no dynamic features dtype=self.dtype, ) ) dynamic_feat_fields.append(FieldName.FEAT_CONST) # now we map all the dynamic input of length context_length + prediction_length onto FieldName.FEAT_DYNAMIC # we exclude past_feat_dynamic_real since its length is only context_length if len(dynamic_feat_fields) > 1: chain.append( VstackFeatures( output_field=FieldName.FEAT_DYNAMIC, input_fields=dynamic_feat_fields, ) ) elif len(dynamic_feat_fields) == 1: chain.append( RenameFields({dynamic_feat_fields[0]: FieldName.FEAT_DYNAMIC}) ) # --- TRANSFORMATION CHAIN FOR STATIC FEATURES --- if not self.use_feat_static_cat: chain.append( SetField( output_field=FieldName.FEAT_STATIC_CAT, value=np.array([0], dtype=np.int32), ) ) # --- SAMPLE AND CUT THE TIME-SERIES --- chain.append( # because of how the forking decoder works, every time step # in context is used for splitting, which is why we use the TestSplitSampler ForkingSequenceSplitter( train_sampler=TestSplitSampler(), enc_len=self.context_length, dec_len=self.prediction_length, num_forking=self.num_forking, encoder_series_fields=[ FieldName.OBSERVED_VALUES, # RTS with past and future values which is never empty because added dummy constant variable FieldName.FEAT_DYNAMIC, ] + ( # RTS with only past values are only used by the encoder [FieldName.PAST_FEAT_DYNAMIC_REAL] if self.use_past_feat_dynamic_real else [] ), encoder_disabled_fields=( [FieldName.FEAT_DYNAMIC] if not self.enable_encoder_dynamic_feature else [] ) + ( [FieldName.PAST_FEAT_DYNAMIC_REAL] if not self.enable_encoder_dynamic_feature and self.use_past_feat_dynamic_real else [] ), decoder_series_fields=[ FieldName.OBSERVED_VALUES, # Decoder will use all fields under FEAT_DYNAMIC which are the RTS with past and future values FieldName.FEAT_DYNAMIC, ], decoder_disabled_fields=( [FieldName.FEAT_DYNAMIC] if not self.enable_decoder_dynamic_feature else [] ), prediction_time_decoder_exclude=[FieldName.OBSERVED_VALUES], ) ) # past_feat_dynamic features generated above in ForkingSequenceSplitter from those under feat_dynamic - we need # to stack with the other short related time series from the system labeled as past_past_feat_dynamic_real. # The system labels them as past_feat_dynamic_real and the additional past_ is added to the string # in the ForkingSequenceSplitter if self.use_past_feat_dynamic_real: # Stack features from ForkingSequenceSplitter horizontally since they were transposed # so shape is now (enc_len, num_past_feature_dynamic) chain.append( VstackFeatures( output_field=FieldName.PAST_FEAT_DYNAMIC, input_fields=[ "past_" + FieldName.PAST_FEAT_DYNAMIC_REAL, FieldName.PAST_FEAT_DYNAMIC, ], h_stack=True, ) ) return Chain(chain)
def create_transformation(self) -> Transformation: chain = [] dynamic_feat_fields = [] remove_field_names = [ FieldName.FEAT_DYNAMIC_CAT, FieldName.FEAT_STATIC_REAL, ] # --- GENERAL TRANSFORMATION CHAIN --- # determine unused input if not self.use_feat_dynamic_real: remove_field_names.append(FieldName.FEAT_DYNAMIC_REAL) if not self.use_feat_static_cat: remove_field_names.append(FieldName.FEAT_STATIC_CAT) chain.extend([ RemoveFields(field_names=remove_field_names), AddObservedValuesIndicator( target_field=FieldName.TARGET, output_field=FieldName.OBSERVED_VALUES, dtype=self.dtype, ), ]) # --- TRANSFORMATION CHAIN FOR DYNAMIC FEATURES --- if self.add_time_feature: chain.append( AddTimeFeatures( start_field=FieldName.START, target_field=FieldName.TARGET, output_field=FieldName.FEAT_TIME, time_features=time_features_from_frequency_str(self.freq), pred_length=self.prediction_length, ), ) dynamic_feat_fields.append(FieldName.FEAT_TIME) if self.add_age_feature: chain.append( AddAgeFeature( target_field=FieldName.TARGET, output_field=FieldName.FEAT_AGE, pred_length=self.prediction_length, dtype=self.dtype, ), ) dynamic_feat_fields.append(FieldName.FEAT_AGE) if self.use_feat_dynamic_real: dynamic_feat_fields.append(FieldName.FEAT_DYNAMIC_REAL) # we need to make sure that there is always some dynamic input # we will however disregard it in the hybrid forward if len(dynamic_feat_fields) == 0: chain.append( AddConstFeature( target_field=FieldName.TARGET, output_field=FieldName.FEAT_CONST, pred_length=self.prediction_length, dtype=self.dtype, ), ) dynamic_feat_fields.append(FieldName.FEAT_CONST) # now we map all the dynamic input onto FieldName.FEAT_DYNAMIC if len(dynamic_feat_fields) > 1: chain.append( VstackFeatures( output_field=FieldName.FEAT_DYNAMIC, input_fields=dynamic_feat_fields, )) elif len(dynamic_feat_fields) == 1: chain.append( RenameFields({dynamic_feat_fields[0]: FieldName.FEAT_DYNAMIC})) # --- TRANSFORMATION CHAIN FOR STATIC FEATURES --- if not self.use_feat_static_cat: chain.append( SetField( output_field=FieldName.FEAT_STATIC_CAT, value=np.array([0.0]), ), ) # --- SAMPLE AND CUT THE TIME-SERIES --- chain.append( # because of how the forking decoder works, every time step # in context is used for splitting, which is why we use the TestSplitSampler ForkingSequenceSplitter( train_sampler=TestSplitSampler(), enc_len=self.context_length, dec_len=self.prediction_length, encoder_series_fields=[ FieldName.OBSERVED_VALUES, FieldName.FEAT_DYNAMIC, ], decoder_series_fields=[FieldName.OBSERVED_VALUES] + ([FieldName.FEAT_DYNAMIC] if self.enable_decoder_dynamic_feature else []), prediction_time_decoder_exclude=[FieldName.OBSERVED_VALUES], ), ) return Chain(chain)
def create_transformation(self) -> Transformation: chain = [] dynamic_feat_fields = [] remove_field_names = [ FieldName.FEAT_DYNAMIC_CAT, FieldName.FEAT_STATIC_REAL, ] # --- GENERAL TRANSFORMATION CHAIN --- # determine unused input if not self.use_past_feat_dynamic_real: remove_field_names.append(FieldName.PAST_FEAT_DYNAMIC_REAL) if not self.use_feat_dynamic_real: remove_field_names.append(FieldName.FEAT_DYNAMIC_REAL) if not self.use_feat_static_cat: remove_field_names.append(FieldName.FEAT_STATIC_CAT) chain.extend( [ RemoveFields(field_names=remove_field_names), AddObservedValuesIndicator( target_field=FieldName.TARGET, output_field=FieldName.OBSERVED_VALUES, dtype=self.dtype, ), ] ) # --- TRANSFORMATION CHAIN FOR DYNAMIC FEATURES --- if self.add_time_feature: chain.append( AddTimeFeatures( start_field=FieldName.START, target_field=FieldName.TARGET, output_field=FieldName.FEAT_TIME, time_features=time_features_from_frequency_str(self.freq), pred_length=self.prediction_length, dtype=self.dtype, ) ) dynamic_feat_fields.append(FieldName.FEAT_TIME) if self.add_age_feature: chain.append( AddAgeFeature( target_field=FieldName.TARGET, output_field=FieldName.FEAT_AGE, pred_length=self.prediction_length, dtype=self.dtype, ) ) dynamic_feat_fields.append(FieldName.FEAT_AGE) if self.use_feat_dynamic_real: # Backwards compatibility: chain.append( RenameFields({"dynamic_feat": FieldName.FEAT_DYNAMIC_REAL}) ) dynamic_feat_fields.append(FieldName.FEAT_DYNAMIC_REAL) # we need to make sure that there is always some dynamic input # we will however disregard it in the hybrid forward. # the time feature is empty for yearly freq so also adding a dummy feature # in the case that the time feature is the only one on if len(dynamic_feat_fields) == 0 or ( not self.add_age_feature and not self.use_feat_dynamic_real and self.freq == "Y" ): chain.append( AddConstFeature( target_field=FieldName.TARGET, output_field=FieldName.FEAT_CONST, pred_length=self.prediction_length, const=0.0, # For consistency in case with no dynamic features dtype=self.dtype, ) ) dynamic_feat_fields.append(FieldName.FEAT_CONST) # now we map all the dynamic input of length context_length + prediction_length onto FieldName.FEAT_DYNAMIC # we exclude past_feat_dynamic_real since its length is only context_length if len(dynamic_feat_fields) > 1: chain.append( VstackFeatures( output_field=FieldName.FEAT_DYNAMIC, input_fields=dynamic_feat_fields, ) ) elif len(dynamic_feat_fields) == 1: chain.append( RenameFields({dynamic_feat_fields[0]: FieldName.FEAT_DYNAMIC}) ) # --- TRANSFORMATION CHAIN FOR STATIC FEATURES --- if not self.use_feat_static_cat: chain.append( SetField( output_field=FieldName.FEAT_STATIC_CAT, value=np.array([0], dtype=np.int32), ) ) return Chain(chain)
@pytest.mark.parametrize("ds", [ds1, ds2]) @pytest.mark.parametrize( "transform", [ AddObservedValuesIndicator( target_field=FieldName.TARGET, output_field=FieldName.OBSERVED_VALUES, ), AddAgeFeature( target_field=FieldName.TARGET, output_field="age_feature", pred_length=1, ), AddConstFeature( target_field=FieldName.TARGET, output_field="constant", pred_length=1, ), ], ) def test_dataset_imutability(ds, transform): ds_c = deepcopy(ds) # test that using twice the transformation gives the same result out1 = transform(ds, is_train=True) out2 = transform(ds, is_train=True) for o1, o2 in zip(out1, out2): for k in o1: if isinstance(o1[k], np.ndarray): assert np.allclose(o1[k], o2[k], equal_nan=True) else: