def test_numerical_user_splits_fixed(): user_splits = [4, 7, 7.1, 10, 16, 20, 23] with raises(ValueError): user_splits_fixed = [True, True, True, True, False, False, False] optb = ContinuousOptimalBinning(user_splits_fixed=user_splits_fixed) optb.fit(x, y) with raises(TypeError): user_splits_fixed = (False, False, False, False, False, True, False) optb = ContinuousOptimalBinning(user_splits=user_splits, user_splits_fixed=user_splits_fixed) optb.fit(x, y) with raises(ValueError): user_splits_fixed = [0, 0, 0, 0, 0, 1, 0] optb = ContinuousOptimalBinning(user_splits=user_splits, user_splits_fixed=user_splits_fixed) optb.fit(x, y) with raises(ValueError): user_splits_fixed = [False, False, False, False] optb = ContinuousOptimalBinning(user_splits=user_splits, user_splits_fixed=user_splits_fixed) optb.fit(x, y) user_splits_fixed = [True, True, True, True, False, False, False] optb = ContinuousOptimalBinning(user_splits=user_splits, user_splits_fixed=user_splits_fixed) optb.fit(x, y) assert optb.status == "INFEASIBLE"
def test_numerical_default_transform(): optb = ContinuousOptimalBinning() with raises(NotFittedError): x_transform = optb.transform(x) optb.fit(x, y) x_transform = optb.transform([0.2, 4.1, 7.2, 26]) assert x_transform == approx([39.718, 39.718, 25.56067416, 11.82978723], rel=1e-6)
def test_numerical_default(): optb = ContinuousOptimalBinning() optb.fit(x, y) assert optb.status == "OPTIMAL" assert optb.splits == approx([ 4.6500001, 5.49499989, 6.86500001, 9.7249999, 11.67499971, 13.0999999, 16.08500004, 19.89999962, 23.31500053 ], rel=1e-6)
def test_numerical_default(): optb = ContinuousOptimalBinning() optb.fit(x, y) assert optb.status == "OPTIMAL" assert optb.splits == approx([4.6500001, 5.49499989, 6.86500001, 9.7249999, 11.67499971, 13.0999999, 16.08500004, 19.89999962, 23.31500053], rel=1e-6) optb.binning_table.build() optb.binning_table.analysis() optb.binning_table.plot(savefig="test_continuous_binning.png") optb.binning_table.plot(add_special=False, savefig="test_continuous_binning_no_special.png") optb.binning_table.plot(add_missing=False, savefig="test_continuous_binning_no_missing.png")
def test_default_transform_continuous(): data = load_boston() variable_names = data.feature_names X = data.data y = data.target process = BinningProcess(variable_names) process.fit(X, y) X_transform = process.transform(X) optb = process.get_binned_variable(variable_names[0]) assert isinstance(optb, ContinuousOptimalBinning) optb = ContinuousOptimalBinning() x = X[:, 5] optb.fit(x, y) assert optb.transform(x) == approx(X_transform[:, 5], rel=1e-6)
def test_params(): with raises(TypeError): optb = ContinuousOptimalBinning(name=1) optb.fit(x, y) with raises(ValueError): optb = ContinuousOptimalBinning(dtype="nominal") optb.fit(x, y) with raises(ValueError): optb = ContinuousOptimalBinning(prebinning_method="new_method") optb.fit(x, y) with raises(ValueError): optb = ContinuousOptimalBinning(max_n_prebins=-2) optb.fit(x, y) with raises(ValueError): optb = ContinuousOptimalBinning(min_prebin_size=0.6) optb.fit(x, y) with raises(ValueError): optb = ContinuousOptimalBinning(min_n_bins=-2) optb.fit(x, y) with raises(ValueError): optb = ContinuousOptimalBinning(max_n_bins=-2.2) optb.fit(x, y) with raises(ValueError): optb = ContinuousOptimalBinning(min_n_bins=3, max_n_bins=2) optb.fit(x, y) with raises(ValueError): optb = ContinuousOptimalBinning(min_bin_size=0.6) optb.fit(x, y) with raises(ValueError): optb = ContinuousOptimalBinning(max_bin_size=-0.6) optb.fit(x, y) with raises(ValueError): optb = ContinuousOptimalBinning(min_bin_size=0.5, max_bin_size=0.3) optb.fit(x, y) with raises(ValueError): optb = ContinuousOptimalBinning(monotonic_trend="new_trend") optb.fit(x, y) with raises(ValueError): optb = ContinuousOptimalBinning(min_mean_diff=-1.1) optb.fit(x, y) with raises(ValueError): optb = ContinuousOptimalBinning(max_pvalue=1.1) optb.fit(x, y) with raises(ValueError): optb = ContinuousOptimalBinning(max_pvalue_policy="new_policy") optb.fit(x, y) with raises(ValueError): optb = ContinuousOptimalBinning(cat_cutoff=-0.2) optb.fit(x, y) with raises(TypeError): optb = ContinuousOptimalBinning(user_splits={"a": [1, 2]}) optb.fit(x, y) with raises(TypeError): optb = ContinuousOptimalBinning(special_codes={1, 2, 3}) optb.fit(x, y) with raises(ValueError): optb = ContinuousOptimalBinning(split_digits=9) optb.fit(x, y) with raises(ValueError): optb = ContinuousOptimalBinning(time_limit=-2) optb.fit(x, y) with raises(TypeError): optb = ContinuousOptimalBinning(verbose=1) optb.fit(x, y)
def test_verbose(): optb = ContinuousOptimalBinning(verbose=True) optb.fit(x, y) assert optb.status == "OPTIMAL"
def test_numerical_user_splits_non_unique(): user_splits = [4, 7, 7, 10, 16, 20, 23] optb = ContinuousOptimalBinning(user_splits=user_splits) with raises(ValueError): optb.fit(x, y)