Пример #1
0
def test_numerical_user_splits_fixed():
    user_splits = [4, 7, 7.1, 10, 16, 20, 23]

    with raises(ValueError):
        user_splits_fixed = [True, True, True, True, False, False, False]
        optb = ContinuousOptimalBinning(user_splits_fixed=user_splits_fixed)
        optb.fit(x, y)

    with raises(TypeError):
        user_splits_fixed = (False, False, False, False, False, True, False)
        optb = ContinuousOptimalBinning(user_splits=user_splits,
                                        user_splits_fixed=user_splits_fixed)
        optb.fit(x, y)

    with raises(ValueError):
        user_splits_fixed = [0, 0, 0, 0, 0, 1, 0]
        optb = ContinuousOptimalBinning(user_splits=user_splits,
                                        user_splits_fixed=user_splits_fixed)
        optb.fit(x, y)

    with raises(ValueError):
        user_splits_fixed = [False, False, False, False]
        optb = ContinuousOptimalBinning(user_splits=user_splits,
                                        user_splits_fixed=user_splits_fixed)
        optb.fit(x, y)

    user_splits_fixed = [True, True, True, True, False, False, False]
    optb = ContinuousOptimalBinning(user_splits=user_splits,
                                    user_splits_fixed=user_splits_fixed)
    optb.fit(x, y)

    assert optb.status == "INFEASIBLE"
Пример #2
0
def test_numerical_default_transform():
    optb = ContinuousOptimalBinning()
    with raises(NotFittedError):
        x_transform = optb.transform(x)

    optb.fit(x, y)

    x_transform = optb.transform([0.2, 4.1, 7.2, 26])
    assert x_transform == approx([39.718, 39.718, 25.56067416, 11.82978723],
                                 rel=1e-6)
Пример #3
0
def test_numerical_default():
    optb = ContinuousOptimalBinning()
    optb.fit(x, y)

    assert optb.status == "OPTIMAL"
    assert optb.splits == approx([
        4.6500001, 5.49499989, 6.86500001, 9.7249999, 11.67499971, 13.0999999,
        16.08500004, 19.89999962, 23.31500053
    ],
                                 rel=1e-6)
def test_numerical_default():
    optb = ContinuousOptimalBinning()
    optb.fit(x, y)

    assert optb.status == "OPTIMAL"
    assert optb.splits == approx([4.6500001, 5.49499989, 6.86500001, 9.7249999,
                                  11.67499971, 13.0999999, 16.08500004,
                                  19.89999962, 23.31500053],
                                 rel=1e-6)

    optb.binning_table.build()
    optb.binning_table.analysis()
    optb.binning_table.plot(savefig="test_continuous_binning.png")
    optb.binning_table.plot(add_special=False,
                            savefig="test_continuous_binning_no_special.png")
    optb.binning_table.plot(add_missing=False,
                            savefig="test_continuous_binning_no_missing.png")
Пример #5
0
def test_default_transform_continuous():
    data = load_boston()
    variable_names = data.feature_names
    X = data.data
    y = data.target

    process = BinningProcess(variable_names)
    process.fit(X, y)
    X_transform = process.transform(X)

    optb = process.get_binned_variable(variable_names[0])
    assert isinstance(optb, ContinuousOptimalBinning)

    optb = ContinuousOptimalBinning()
    x = X[:, 5]
    optb.fit(x, y)
    assert optb.transform(x) == approx(X_transform[:, 5], rel=1e-6)
Пример #6
0
def test_params():
    with raises(TypeError):
        optb = ContinuousOptimalBinning(name=1)
        optb.fit(x, y)

    with raises(ValueError):
        optb = ContinuousOptimalBinning(dtype="nominal")
        optb.fit(x, y)

    with raises(ValueError):
        optb = ContinuousOptimalBinning(prebinning_method="new_method")
        optb.fit(x, y)

    with raises(ValueError):
        optb = ContinuousOptimalBinning(max_n_prebins=-2)
        optb.fit(x, y)

    with raises(ValueError):
        optb = ContinuousOptimalBinning(min_prebin_size=0.6)
        optb.fit(x, y)

    with raises(ValueError):
        optb = ContinuousOptimalBinning(min_n_bins=-2)
        optb.fit(x, y)

    with raises(ValueError):
        optb = ContinuousOptimalBinning(max_n_bins=-2.2)
        optb.fit(x, y)

    with raises(ValueError):
        optb = ContinuousOptimalBinning(min_n_bins=3, max_n_bins=2)
        optb.fit(x, y)

    with raises(ValueError):
        optb = ContinuousOptimalBinning(min_bin_size=0.6)
        optb.fit(x, y)

    with raises(ValueError):
        optb = ContinuousOptimalBinning(max_bin_size=-0.6)
        optb.fit(x, y)

    with raises(ValueError):
        optb = ContinuousOptimalBinning(min_bin_size=0.5, max_bin_size=0.3)
        optb.fit(x, y)

    with raises(ValueError):
        optb = ContinuousOptimalBinning(monotonic_trend="new_trend")
        optb.fit(x, y)

    with raises(ValueError):
        optb = ContinuousOptimalBinning(min_mean_diff=-1.1)
        optb.fit(x, y)

    with raises(ValueError):
        optb = ContinuousOptimalBinning(max_pvalue=1.1)
        optb.fit(x, y)

    with raises(ValueError):
        optb = ContinuousOptimalBinning(max_pvalue_policy="new_policy")
        optb.fit(x, y)

    with raises(ValueError):
        optb = ContinuousOptimalBinning(cat_cutoff=-0.2)
        optb.fit(x, y)

    with raises(TypeError):
        optb = ContinuousOptimalBinning(user_splits={"a": [1, 2]})
        optb.fit(x, y)

    with raises(TypeError):
        optb = ContinuousOptimalBinning(special_codes={1, 2, 3})
        optb.fit(x, y)

    with raises(ValueError):
        optb = ContinuousOptimalBinning(split_digits=9)
        optb.fit(x, y)

    with raises(ValueError):
        optb = ContinuousOptimalBinning(time_limit=-2)
        optb.fit(x, y)

    with raises(TypeError):
        optb = ContinuousOptimalBinning(verbose=1)
        optb.fit(x, y)
Пример #7
0
def test_verbose():
    optb = ContinuousOptimalBinning(verbose=True)
    optb.fit(x, y)

    assert optb.status == "OPTIMAL"
def test_numerical_user_splits_non_unique():
    user_splits = [4, 7, 7, 10, 16, 20, 23]
    optb = ContinuousOptimalBinning(user_splits=user_splits)

    with raises(ValueError):
        optb.fit(x, y)