Exemplo n.º 1
0
	def test_fit_transform(self):
		X = _data()
		ct = make_xgboost_column_transformer(X.dtypes, missing_value_aware = False)
		dfm = make_xgboost_column_transformer(X.dtypes, missing_value_aware = False)
		self.assertEqual(ct.fit_transform(X).tolist(), dfm.fit_transform(X).tolist())
		ct = make_xgboost_column_transformer(X.dtypes, missing_value_aware = True)
		dfm = make_xgboost_column_transformer(X.dtypes, missing_value_aware = True)
		self.assertEqual(ct.fit_transform(X).tolist(), dfm.fit_transform(X).tolist())
                            for cont_column in cont_columns])

dtypes = Series(dtypes.values, index=[0, 1, 2, 3, 4, 5, 6])

lightgbm_mapper, lightgbm_categorical_feature = make_lightgbm_column_transformer(
    dtypes, missing_value_aware=True)
lightgbm_pipeline = Pipeline([
    ("mapper", lightgbm_mapper),
    ("classifier",
     LGBMClassifier(n_estimators=31,
                    max_depth=3,
                    random_state=13,
                    categorical_feature=lightgbm_categorical_feature))
])

xgboost_mapper = make_xgboost_column_transformer(dtypes,
                                                 missing_value_aware=True)
xgboost_pipeline = Pipeline([("mapper", xgboost_mapper),
                             ("classifier",
                              XGBClassifier(n_estimators=31,
                                            learning_rate=0.1,
                                            max_depth=3,
                                            random_state=13))])

sklearn_mapper = ColumnTransformer(
    [(str(cat_index), PMMLLabelBinarizer(sparse_output=False), [cat_index])
     for cat_index in range(0, len(cat_columns))] +
    [(str(cont_index), "passthrough", [cont_index])
     for cont_index in range(len(cat_columns), len(cat_columns + cont_columns))
     ],
    remainder="drop")