def _fit_transform(self, X: DataFrame, **kwargs) -> (DataFrame, dict):
        feature_metadata = self.feature_metadata_in
        for i in range(len(self.generators)):
            self._log(20, f'\tStage {i + 1} Generators:')
            feature_df_list = []
            generator_group_valid = []
            for generator in self.generators[i]:
                if generator.is_valid_metadata_in(feature_metadata):
                    if generator.verbosity > self.verbosity:
                        generator.verbosity = self.verbosity
                    generator.set_log_prefix(log_prefix=self.log_prefix + '\t\t', prepend=True)
                    feature_df_list.append(generator.fit_transform(X, feature_metadata_in=feature_metadata, **kwargs))
                    generator_group_valid.append(generator)
                else:
                    self._log(15, f'\t\tSkipping {generator.__class__.__name__}: No input feature with required dtypes.')

            self.generators[i] = generator_group_valid

            self.generators[i] = [generator for j, generator in enumerate(self.generators[i]) if feature_df_list[j] is not None and len(feature_df_list[j].columns) > 0]
            feature_df_list = [feature_df for feature_df in feature_df_list if feature_df is not None and len(feature_df.columns) > 0]

            if self.generators[i]:
                # Raise an exception if generators expect different raw input types for the same feature.
                FeatureMetadata.join_metadatas([generator.feature_metadata_in for generator in self.generators[i]], shared_raw_features='error_if_diff')

            if self.generators[i]:
                feature_metadata = FeatureMetadata.join_metadatas([generator.feature_metadata for generator in self.generators[i]], shared_raw_features='error')
            else:
                feature_metadata = FeatureMetadata(type_map_raw=dict())

            if not feature_df_list:
                X = DataFrame(index=X.index)
            elif len(feature_df_list) == 1:
                X = feature_df_list[0]
            else:
                X = pd.concat(feature_df_list, axis=1, ignore_index=False, copy=False)

        self._remove_features_out(features=[])
        # Remove useless generators
        # TODO: consider moving to self._remove_features_out
        for i in range(len(self.generators)):
            generator_group_valid = []
            for j in range(len(self.generators[i])):
                if self.generators[i][j].features_out:
                    generator_group_valid.append(self.generators[i][j])
            self.generators[i] = generator_group_valid

        return X, feature_metadata.type_group_map_special
 def _get_unused_features(self, feature_links_chain):
     features_in_list = []
     for i in range(len(self.generators)):
         stage = i + 1
         if stage > 1:
             if self.generators[stage - 2]:
                 features_in = FeatureMetadata.join_metadatas([generator.feature_metadata for generator in self.generators[stage - 2]], shared_raw_features='error').get_features()
             else:
                 features_in = []
         else:
             features_in = self.features_in
         features_in_list.append(features_in)
     return self._get_unused_features_generic(feature_links_chain=feature_links_chain, features_in_list=features_in_list)
def test_feature_metadata(data_helper):
    # Given
    input_data = data_helper.generate_multi_feature_full()

    expected_feature_metadata_full = {
        ('category', ()): ['cat'],
        ('datetime', ()): ['datetime'],
        ('float', ()): ['float'],
        ('int', ()): ['int'],
        ('object', ()): ['obj'],
        ('object', ('datetime_as_object', )): ['datetime_as_object'],
        ('object', ('text', )): ['text']
    }

    expected_feature_metadata_get_features = [
        'int', 'float', 'obj', 'cat', 'datetime', 'text', 'datetime_as_object'
    ]

    expected_type_map_raw = {
        'cat': 'category',
        'datetime': 'datetime',
        'datetime_as_object': 'object',
        'float': 'float',
        'int': 'int',
        'obj': 'object',
        'text': 'object'
    }

    expected_type_group_map_special = {
        'datetime_as_object': ['datetime_as_object'],
        'text': ['text']
    }

    expected_feature_metadata_renamed_full = {
        ('category', ()): ['cat'],
        ('datetime', ()): ['datetime'],
        ('float', ()): ['obj'],
        ('int', ()): ['int_renamed'],
        ('object', ()): ['float'],
        ('object', ('datetime_as_object', )): ['datetime_as_object'],
        ('object', ('text', )): ['text_renamed']
    }

    expected_feature_metadata_recombined_full_full = {
        ('category', ()): ['cat'],
        ('custom_raw_type', ('custom_special_type', )): ['new_feature'],
        ('datetime', ()): ['datetime'],
        ('float', ()): ['float'],
        ('int', ('custom_special_type', )): ['int'],
        ('object', ()): ['obj'],
        ('object', ('datetime_as_object', )): ['datetime_as_object'],
        ('object', ('text', )): ['text']
    }

    # When
    feature_metadata = FeatureMetadata.from_df(input_data)
    feature_metadata_renamed = feature_metadata.rename_features(
        rename_map={
            'text': 'text_renamed',
            'int': 'int_renamed',
            'obj': 'float',
            'float': 'obj'
        })
    feature_metadata_remove = feature_metadata.remove_features(
        features=['text', 'obj', 'float'])
    feature_metadata_keep = feature_metadata.keep_features(
        features=['text', 'obj', 'float'])
    feature_metadata_custom = FeatureMetadata(
        type_map_raw={
            'int': 'int',
            'new_feature': 'custom_raw_type'
        },
        type_group_map_special={'custom_special_type': ['int', 'new_feature']})
    feature_metadata_recombined = feature_metadata_keep.join_metadata(
        feature_metadata_remove)
    feature_metadata_recombined_alternate = FeatureMetadata.join_metadatas(
        metadata_list=[feature_metadata_keep, feature_metadata_remove])
    feature_metadata_recombined_full = FeatureMetadata.join_metadatas(
        metadata_list=[
            feature_metadata_keep, feature_metadata_remove,
            feature_metadata_custom
        ],
        shared_raw_features='error_if_diff')

    # Therefore
    with pytest.raises(AssertionError):
        # Error because special contains feature not in raw
        FeatureMetadata(type_map_raw={'int': 'int'},
                        type_group_map_special={
                            'custom_special_type': ['int', 'new_feature']
                        })
    with pytest.raises(AssertionError):
        # Error because renaming to another existing feature without also renaming that feature
        feature_metadata.rename_features(rename_map={'text': 'obj'})
    with pytest.raises(KeyError):
        # Error if removing unknown feature
        feature_metadata_remove.remove_features(features=['text'])
    with pytest.raises(KeyError):
        # Error if getting unknown feature type
        feature_metadata_remove.get_feature_type_raw('text')
    with pytest.raises(KeyError):
        # Error if getting unknown feature type
        feature_metadata_remove.get_feature_types_special('text')
    with pytest.raises(AssertionError):
        # Error because feature_metadata_remove and feature_metadata_custom share a raw feature
        FeatureMetadata.join_metadatas(metadata_list=[
            feature_metadata_keep, feature_metadata_remove,
            feature_metadata_custom
        ])

    assert feature_metadata.to_dict(
        inverse=True) == expected_feature_metadata_full
    assert feature_metadata.get_features(
    ) == expected_feature_metadata_get_features
    assert feature_metadata.type_map_raw == expected_type_map_raw
    assert dict(feature_metadata.type_group_map_special
                ) == expected_type_group_map_special

    assert feature_metadata.get_feature_type_raw('text') == 'object'
    assert feature_metadata.get_feature_types_special('text') == ['text']
    assert feature_metadata.get_feature_type_raw('int') == 'int'
    assert feature_metadata.get_feature_types_special('int') == []
    assert feature_metadata_recombined_full.get_feature_types_special(
        'int') == ['custom_special_type']
    assert feature_metadata_recombined_full.get_feature_type_raw(
        'new_feature') == 'custom_raw_type'

    assert feature_metadata_renamed.to_dict(
        inverse=True) == expected_feature_metadata_renamed_full
    assert feature_metadata_recombined.to_dict() == feature_metadata.to_dict()
    assert feature_metadata_recombined_alternate.to_dict(
    ) == feature_metadata.to_dict()
    assert feature_metadata_recombined_full.to_dict(
        inverse=True) == expected_feature_metadata_recombined_full_full