def test_get_feature_names_out_input_features_is_list(df_na):
    input_features = ["Age", "Marks"]

    # when add_indicators is false, we've got the generic check from estimator_checks.
    # We need to test only when true.
    tr = Winsorizer(tail="left", add_indicators=True, missing_values="ignore")
    tr.fit(df_na)

    out = [f + "_left" for f in input_features]
    assert tr.get_feature_names_out(input_features) == input_features + out

    tr = Winsorizer(tail="right", add_indicators=True, missing_values="ignore")
    tr.fit(df_na)

    out = [f + "_right" for f in input_features]
    assert tr.get_feature_names_out(input_features) == input_features + out

    tr = Winsorizer(tail="both", add_indicators=True, missing_values="ignore")
    tr.fit(df_na)

    out = ["Age_left", "Age_right", "Marks_left", "Marks_right"]
    assert tr.get_feature_names_out(input_features) == input_features + out
def test_get_feature_names_out_input_features_is_none(df_na):
    original_features = df_na.columns.to_list()
    input_features = ["Age", "Marks"]

    # when indicators is false, we've got the generic check.
    # We need to test only when true
    tr = Winsorizer(tail="left", add_indicators=True, missing_values="ignore")
    tr.fit(df_na)

    out = [f + "_left" for f in input_features]
    assert tr.get_feature_names_out() == original_features + out

    tr = Winsorizer(tail="right", add_indicators=True, missing_values="ignore")
    tr.fit(df_na)

    out = [f + "_right" for f in input_features]
    assert tr.get_feature_names_out() == original_features + out

    tr = Winsorizer(tail="both", add_indicators=True, missing_values="ignore")
    tr.fit(df_na)

    out = ["Age_left", "Age_right", "Marks_left", "Marks_right"]
    assert tr.get_feature_names_out() == original_features + out
def test_transform_raises_error_if_na_in_input_df(df_vartypes, df_na):
    # test case 9: when dataset contains na, transform method
    with pytest.raises(ValueError):
        transformer = Winsorizer()
        transformer.fit(df_vartypes)
        transformer.transform(df_na[["Name", "City", "Age", "Marks", "dob"]])
def test_fit_raises_error_if_na_in_inut_df(df_na):
    # test case 8: when dataset contains na, fit method
    with pytest.raises(ValueError):
        transformer = Winsorizer()
        transformer.fit(df_na)
示例#5
0
    width=700,
    height=500,
)

fig.show()

df.drop("Customer", axis=1, inplace=True)
df.drop("Effective To Date", axis=1, inplace=True)

wind = Winsorizer(
    capping_method='iqr',
    tail='both',
    fold=1.5,
    variables=['Customer Lifetime Value', 'Income', 'Total Claim Amount'])

wind.fit(df)
df = wind.transform(df)

dummylist = []

dummy_variables = [
    "State", "Response", "Coverage", "Education", "EmploymentStatus", "Gender",
    "Location Code", "Policy Type", "Policy", "Renew Offer Type",
    "Sales Channel", "Vehicle Class", "Vehicle Size", "Marital Status"
]
for var in dummy_variables:
    dummylist.append(
        pd.get_dummies(df[var], prefix=var, prefix_sep="_", drop_first=True))
    dummies_collected = pd.concat(dummylist, axis=1)

df.drop(dummy_variables, axis=1, inplace=True)