def test_df_cat_and_num_variables_roc_auc(df_test_num_cat): X, y = df_test_num_cat sel = SelectByTargetMeanPerformance( variables=None, scoring="roc_auc_score", threshold=0.6, bins=3, strategy="equal_width", cv=2, random_state=1, ) sel.fit(X, y) # expected result Xtransformed = X[["var_A", "var_B"]] performance_dict = {"var_A": 0.841, "var_B": 0.776, "var_C": 0.481, "var_D": 0.496} # test init params assert sel.variables == list(X.columns) assert sel.scoring == "roc_auc_score" assert sel.threshold == 0.60 assert sel.cv == 2 assert sel.random_state == 1 # test fit attrs assert sel.variables_categorical_ == ["var_A", "var_B"] assert sel.variables_numerical_ == ["var_C", "var_D"] assert sel.selected_features_ == ["var_A", "var_B"] assert all( np.round(sel.feature_performance_[f], 3) == performance_dict[f] for f in sel.feature_performance_.keys() ) # test transform output pd.testing.assert_frame_equal(sel.transform(X), Xtransformed)
def test_categorical_variables_roc_auc(df_test_num_cat): X, y = df_test_num_cat X = X[["var_A", "var_B"]] sel = SelectByTargetMeanPerformance( variables=None, scoring="roc_auc_score", threshold=0.78, cv=2, random_state=1, ) sel.fit(X, y) # expected result Xtransformed = X["var_A"].to_frame() # performance_dict = {"var_A": 0.841, "var_B": 0.776} # test init params assert sel.variables is None assert sel.scoring == "roc_auc_score" assert sel.threshold == 0.78 assert sel.cv == 2 assert sel.random_state == 1 # test fit attrs assert sel.variables_ == list(X.columns) assert sel.variables_categorical_ == list(X.columns) assert sel.variables_numerical_ == [] assert sel.features_to_drop_ == ["var_B"] # assert all( # np.round(sel.feature_performance_[f], 3) == performance_dict[f] # for f in sel.feature_performance_.keys() # ) # test transform output pd.testing.assert_frame_equal(sel.transform(X), Xtransformed)