def setup_class(self):
     self._identity_mapper_list = DataMapper([([0], IdentityTransformer()),
                                              ([1], IdentityTransformer())])
     column_transformer = ColumnTransformer([
         ("column1", IdentityTransformer(), [0]),
         ("column2", IdentityTransformer(), [1])
     ])
     x = np.ones((10, 5))
     column_transformer.fit(x)
     self._identity_mapper_column_transformer = DataMapper(
         column_transformer)
 def test_many_to_many_support_transformations(self):
     # Instantiate data mapper with many to many transformer support and test whether the feature map is generated
     column_transformer = ColumnTransformer([
         ("column_0_1_2_3", IdentityTransformer(), [0, 1, 2, 3]),
         ("column_4_5", OneHotEncoder(), [4, 5])
     ])
     x = np.ones((10, 6))
     # so that one hot encoder doesn't complain of only one category
     x[0, 4] = 0
     x[0, 5] = 0
     column_transformer.fit(x)
     data_mapper = DataMapper(column_transformer,
                              allow_all_transformations=True)
     data_mapper.transform(x)
     # check feature mapper contents
     feature_map_indices = [[0, 1, 2, 3], [0, 1, 2, 3], [0, 1, 2, 3],
                            [0, 1, 2, 3], [4, 5], [6, 7]]
     x_out = column_transformer.transform(x)
     feature_map = np.zeros((x.shape[1], x_out.shape[1]))
     num_rows = 0
     for i, row in enumerate(feature_map_indices[:4]):
         feature_map[i, row] = 0.25
         num_rows += 1
     for i, row in enumerate(feature_map_indices[4:], start=num_rows):
         feature_map[i, row] = 1.0
     assert (data_mapper.feature_map == feature_map).all()
 def test_many_to_many_exception_column_transformer(self):
     # A transformer that takes input many columns. Since we do not recognize this transformer and it uses
     # many input columns - it is treated as many to many/one map.
     with pytest.raises(ValueError):
         column_transformer = ColumnTransformer([
             ("column_0_1", IdentityTransformer(), [0, 1])
         ])
         x = np.ones((2, 2))
         column_transformer.fit(x)
         DataMapper(column_transformer)
    def test_many_to_many_mapper_nested_pipelines(self):
        pipeline, x = self._get_nested_pipelines_and_data(IdentityTransformer())
        feature_mapper = get_feature_mapper_for_pipeline(pipeline)
        feature_mapper.transform(x)

        feature_map = np.zeros((2, 5))
        feature_map[0, :] = 0.6
        feature_map[1, :] = 0.4

        assert np.all(feature_mapper.feature_map == pytest.approx(feature_map))
Пример #5
0
def get_transformations_many_to_many(feature_names):
    # Instantiate data mapper with many to many transformer support and test whether the feature map is generated

    # IdentityTransformer is our custom transformer, so not recognized as one to many
    transformations = [("column_0_1_2_3",
                        Pipeline([("scaler", StandardScaler()),
                                  ("identity", IdentityTransformer())]),
                        [f for f in feature_names[:-2]]),
                       ("column_4_5", StandardScaler(),
                        [f for f in feature_names[-2:]])]

    # add transformations with pandas index types
    transformations.append(("pandas_index_columns", "passthrough",
                            pd.Index([feature_names[0], feature_names[1]])))

    column_transformer = ColumnTransformer(transformations)

    return column_transformer
 def test_mixed_dtypes(self):
     x = np.ones((10, 2))
     data_mapper = DataMapper([([0], IdentityTransformer()),
                               ([1], SparseTransformer())])
     result = data_mapper.transform(x)
     assert issparse(result)
 def test_many_to_many_exception_list(self):
     # A transformer that takes input many columns. Since we do not recognize this transformer and it uses
     # many input columns - it is treated as many to many/one map.
     with pytest.raises(ValueError):
         DataMapper([([0, 1], IdentityTransformer())])