def test_fit_attributes(df_duplicate_features): transformer = DropDuplicateFeatures() transformer.fit(df_duplicate_features) assert transformer.features_to_drop_ == {"dob", "dob3", "City2", "Age2"} assert transformer.duplicated_feature_sets_ == [ {"dob", "dob2", "dob3"}, {"City", "City2"}, {"Age", "Age2"}, ]
def test_variables_assigned_correctly(df_duplicate_features): transformer = DropDuplicateFeatures() assert transformer.variables is None transformer.fit(df_duplicate_features) assert transformer.variables == (list(df_duplicate_features.columns))
def fake_columns(var_list,df): dupis = DropDuplicateFeatures() dupis_train = dupis.fit(df[var_list]) duplicates_train = list(dupis_train.features_to_drop_) return duplicates_train