def test_get_feature_names_out_input_features_is_list(df_na): input_features = ["Age", "Marks"] # when add_indicators is false, we've got the generic check from estimator_checks. # We need to test only when true. tr = Winsorizer(tail="left", add_indicators=True, missing_values="ignore") tr.fit(df_na) out = [f + "_left" for f in input_features] assert tr.get_feature_names_out(input_features) == input_features + out tr = Winsorizer(tail="right", add_indicators=True, missing_values="ignore") tr.fit(df_na) out = [f + "_right" for f in input_features] assert tr.get_feature_names_out(input_features) == input_features + out tr = Winsorizer(tail="both", add_indicators=True, missing_values="ignore") tr.fit(df_na) out = ["Age_left", "Age_right", "Marks_left", "Marks_right"] assert tr.get_feature_names_out(input_features) == input_features + out
def test_get_feature_names_out_input_features_is_none(df_na): original_features = df_na.columns.to_list() input_features = ["Age", "Marks"] # when indicators is false, we've got the generic check. # We need to test only when true tr = Winsorizer(tail="left", add_indicators=True, missing_values="ignore") tr.fit(df_na) out = [f + "_left" for f in input_features] assert tr.get_feature_names_out() == original_features + out tr = Winsorizer(tail="right", add_indicators=True, missing_values="ignore") tr.fit(df_na) out = [f + "_right" for f in input_features] assert tr.get_feature_names_out() == original_features + out tr = Winsorizer(tail="both", add_indicators=True, missing_values="ignore") tr.fit(df_na) out = ["Age_left", "Age_right", "Marks_left", "Marks_right"] assert tr.get_feature_names_out() == original_features + out
def test_transform_raises_error_if_na_in_input_df(df_vartypes, df_na): # test case 9: when dataset contains na, transform method with pytest.raises(ValueError): transformer = Winsorizer() transformer.fit(df_vartypes) transformer.transform(df_na[["Name", "City", "Age", "Marks", "dob"]])
def test_fit_raises_error_if_na_in_inut_df(df_na): # test case 8: when dataset contains na, fit method with pytest.raises(ValueError): transformer = Winsorizer() transformer.fit(df_na)
width=700, height=500, ) fig.show() df.drop("Customer", axis=1, inplace=True) df.drop("Effective To Date", axis=1, inplace=True) wind = Winsorizer( capping_method='iqr', tail='both', fold=1.5, variables=['Customer Lifetime Value', 'Income', 'Total Claim Amount']) wind.fit(df) df = wind.transform(df) dummylist = [] dummy_variables = [ "State", "Response", "Coverage", "Education", "EmploymentStatus", "Gender", "Location Code", "Policy Type", "Policy", "Renew Offer Type", "Sales Channel", "Vehicle Class", "Vehicle Size", "Marital Status" ] for var in dummy_variables: dummylist.append( pd.get_dummies(df[var], prefix=var, prefix_sep="_", drop_first=True)) dummies_collected = pd.concat(dummylist, axis=1) df.drop(dummy_variables, axis=1, inplace=True)