示例#1
0
    def test_pca(self):
        ct = make_column_transformer((PCA(2), column_number_exclude_timedelta))

        X, y = get_df()
        x_new = ct.fit_transform(X, y)
        assert x_new.shape == (3, 2)

        dfm = DataFrameMapper(
            [
                (column_number_exclude_timedelta, PCA(2)),
                # (column_object_category_bool, [SimpleImputer(strategy='constant'), OneHotEncoder()]),
                (column_object_category_bool, [
                    skex.SafeSimpleImputer(strategy='constant'),
                    OneHotEncoder()
                ]),
                (column_number_exclude_timedelta, PolynomialFeatures(2)),
            ],
            input_df=True,
            df_out=True)
        x_new = dfm.fit_transform(X, y)
        # assert x_new.columns.to_list() == ['b_c_d_l_0', 'b_c_d_l_1', 'a_a', 'a_b', 'a_missing_value', 'e_False',
        #                                    'e_True', 'f_c', 'f_d', 'f_missing_value', '1', 'b', 'c', 'd', 'l',
        #                                    'b^2', 'b c', 'b d', 'b l', 'c^2', 'c d', 'c l', 'd^2', 'd l', 'l^2']
        assert x_new.columns.to_list() == [
            'b_c_d_l_0', 'b_c_d_l_1', 'a_a', 'a_b', 'a_missing_value',
            'e_False', 'e_True', 'f_c', 'f_d', 'f_missing_value'
        ]
示例#2
0
    def test_no_feature(self):
        df = get_df()[0]
        dfm = DataFrameMapper(
            [([], preprocessing.LabelEncoder())],
            input_df=True,
            df_out=True)

        with pytest.raises(ValueError):  # ValueError: No data output, maybe it's because your input feature is empty.
            dfm.fit_transform(df, None)
示例#3
0
 def as_local(self):
     target = DataFrameMapper(
         [],
         default=None,
         df_out=self.df_out,
         input_df=self.input_df,
         df_out_dtype_transforms=self.df_out_dtype_transforms)
     target.fitted_features_ = [(cols, t.as_local(), opts)
                                for cols, t, opts in self.fitted_features_]
     return target
示例#4
0
    def test_no_categorical_feature(self):
        df = get_df()[0][['b', 'd']]

        dfm = DataFrameMapper(
            [(column_object_category_bool, preprocessing.LabelEncoder())],
            input_df=True,
            df_out=True, default=None)

        x_new = dfm.fit_transform(df, None)

        assert 'b' in x_new
        assert 'd' in x_new
示例#5
0
 def test_in_dataframe_mapper(self):
     df = dsutils.load_bank()
     df.drop(['id'], axis=1, inplace=True)
     X_train, X_test = train_test_split(df.head(100),
                                        test_size=0.2,
                                        random_state=42)
     ftt = FeatureGenerationTransformer(
         task='binary',
         trans_primitives=['cross_categorical'],
         categories_cols=column_object_category_bool(X_train))
     dfm = DataFrameMapper(features=[(X_train.columns.to_list(), ftt)],
                           input_df=True,
                           df_out=True)
     X_t = dfm.fit_transform(X_train)
     assert X_t.shape == (80, 62)
示例#6
0
 def test_func_transformer(self):
     dfm = DataFrameMapper(
         [(column_object_category_bool, [
             SimpleImputer(strategy='constant'),
             skex.MultiLabelEncoder(),
         ]
           ),
          ],
         input_df=True,
         df_out=True,
         df_out_dtype_transforms=[
             (column_object, 'category')
         ]
     )
     X, y = get_df()
     x_new = dfm.fit_transform(X, y)
     assert x_new.dtypes.to_list() == [pd.CategoricalDtype(categories=[0, 1, 2], ordered=False),
                                       pd.CategoricalDtype(categories=[0, 1], ordered=False),
                                       pd.CategoricalDtype(categories=[0, 1, 2], ordered=False)]
示例#7
0
def general_preprocessor():
    cat_transformer = Pipeline(
        steps=[('imputer_cat', SimpleImputer(
            strategy='constant')), ('encoder', OrdinalEncoder())])
    num_transformer = Pipeline(
        steps=[('imputer_num',
                SimpleImputer(strategy='mean')), ('scaler', StandardScaler())])

    preprocessor = DataFrameMapper(features=[
        (column_object_category_bool, cat_transformer),
        (column_number_exclude_timedelta, num_transformer)
    ],
                                   input_df=True,
                                   df_out=True)
    return preprocessor