def test_pca(self): ct = make_column_transformer((PCA(2), column_number_exclude_timedelta)) X, y = get_df() x_new = ct.fit_transform(X, y) assert x_new.shape == (3, 2) dfm = DataFrameMapper( [ (column_number_exclude_timedelta, PCA(2)), # (column_object_category_bool, [SimpleImputer(strategy='constant'), OneHotEncoder()]), (column_object_category_bool, [ skex.SafeSimpleImputer(strategy='constant'), OneHotEncoder() ]), (column_number_exclude_timedelta, PolynomialFeatures(2)), ], input_df=True, df_out=True) x_new = dfm.fit_transform(X, y) # assert x_new.columns.to_list() == ['b_c_d_l_0', 'b_c_d_l_1', 'a_a', 'a_b', 'a_missing_value', 'e_False', # 'e_True', 'f_c', 'f_d', 'f_missing_value', '1', 'b', 'c', 'd', 'l', # 'b^2', 'b c', 'b d', 'b l', 'c^2', 'c d', 'c l', 'd^2', 'd l', 'l^2'] assert x_new.columns.to_list() == [ 'b_c_d_l_0', 'b_c_d_l_1', 'a_a', 'a_b', 'a_missing_value', 'e_False', 'e_True', 'f_c', 'f_d', 'f_missing_value' ]
def test_no_feature(self): df = get_df()[0] dfm = DataFrameMapper( [([], preprocessing.LabelEncoder())], input_df=True, df_out=True) with pytest.raises(ValueError): # ValueError: No data output, maybe it's because your input feature is empty. dfm.fit_transform(df, None)
def as_local(self): target = DataFrameMapper( [], default=None, df_out=self.df_out, input_df=self.input_df, df_out_dtype_transforms=self.df_out_dtype_transforms) target.fitted_features_ = [(cols, t.as_local(), opts) for cols, t, opts in self.fitted_features_] return target
def test_no_categorical_feature(self): df = get_df()[0][['b', 'd']] dfm = DataFrameMapper( [(column_object_category_bool, preprocessing.LabelEncoder())], input_df=True, df_out=True, default=None) x_new = dfm.fit_transform(df, None) assert 'b' in x_new assert 'd' in x_new
def test_in_dataframe_mapper(self): df = dsutils.load_bank() df.drop(['id'], axis=1, inplace=True) X_train, X_test = train_test_split(df.head(100), test_size=0.2, random_state=42) ftt = FeatureGenerationTransformer( task='binary', trans_primitives=['cross_categorical'], categories_cols=column_object_category_bool(X_train)) dfm = DataFrameMapper(features=[(X_train.columns.to_list(), ftt)], input_df=True, df_out=True) X_t = dfm.fit_transform(X_train) assert X_t.shape == (80, 62)
def test_func_transformer(self): dfm = DataFrameMapper( [(column_object_category_bool, [ SimpleImputer(strategy='constant'), skex.MultiLabelEncoder(), ] ), ], input_df=True, df_out=True, df_out_dtype_transforms=[ (column_object, 'category') ] ) X, y = get_df() x_new = dfm.fit_transform(X, y) assert x_new.dtypes.to_list() == [pd.CategoricalDtype(categories=[0, 1, 2], ordered=False), pd.CategoricalDtype(categories=[0, 1], ordered=False), pd.CategoricalDtype(categories=[0, 1, 2], ordered=False)]
def general_preprocessor(): cat_transformer = Pipeline( steps=[('imputer_cat', SimpleImputer( strategy='constant')), ('encoder', OrdinalEncoder())]) num_transformer = Pipeline( steps=[('imputer_num', SimpleImputer(strategy='mean')), ('scaler', StandardScaler())]) preprocessor = DataFrameMapper(features=[ (column_object_category_bool, cat_transformer), (column_number_exclude_timedelta, num_transformer) ], input_df=True, df_out=True) return preprocessor