示例#1
0
def test_numerical_default_transform():
    optb = OptimalBinning()
    with raises(NotFittedError):
        x_transform = optb.transform(x)

    optb.fit(x, y)

    x_transform = optb.transform([12, 14, 15, 21], metric="woe")
    assert x_transform == approx(
        [-2.71097154, -0.15397917, -0.15397917, 5.28332344], rel=1e-6)
示例#2
0
def test_default_fit_transform():
    process = BinningProcess(variable_names)
    X_transform = process.fit_transform(X, y, metric="event_rate")

    optb = OptimalBinning()
    x = X[:, 5]
    optb.fit(x, y)

    assert optb.transform(x, metric="event_rate") == approx(X_transform[:, 5],
                                                            rel=1e-6)
示例#3
0
def test_default_transform():
    process = BinningProcess(variable_names)
    with raises(NotFittedError):
        process.transform(X)

    process.fit(X, y)
    X_transform = process.transform(X)

    optb = OptimalBinning()
    x = X[:, 5]
    optb.fit(x, y)

    assert optb.transform(x) == approx(X_transform[:, 5], rel=1e-6)
def test_default_transform_pandas():
    df = pd.DataFrame(data.data, columns=data.feature_names)

    process = BinningProcess(variable_names)
    process.fit(df, y)

    with raises(TypeError):
        X_transform = process.transform(df.to_dict(), metric="woe")

    X_transform = process.transform(df, metric="woe")

    optb = OptimalBinning()
    x = X[:, 5]
    optb.fit(x, y)

    assert optb.transform(x, metric="woe") == approx(
        X_transform.values[:, 5], rel=1e-6)
示例#5
0
def test_categorical_transform():
    x = np.array([
        'Working', 'State servant', 'Working', 'Working', 'Working',
        'State servant', 'Commercial associate', 'State servant', 'Pensioner',
        'Working', 'Working', 'Pensioner', 'Working', 'Working', 'Working',
        'Working', 'Working', 'Working', 'Working', 'State servant', 'Working',
        'Commercial associate', 'Working', 'Pensioner', 'Working', 'Working',
        'Working', 'Working', 'State servant', 'Working',
        'Commercial associate', 'Working', 'Working', 'Commercial associate',
        'State servant', 'Working', 'Commercial associate', 'Working',
        'Pensioner', 'Working', 'Commercial associate', 'Working', 'Working',
        'Pensioner', 'Working', 'Working', 'Pensioner', 'Working',
        'State servant', 'Working', 'State servant', 'Commercial associate',
        'Working', 'Commercial associate', 'Pensioner', 'Working', 'Pensioner',
        'Working', 'Working', 'Working', 'Commercial associate', 'Working',
        'Pensioner', 'Working', 'Commercial associate', 'Commercial associate',
        'State servant', 'Working', 'Commercial associate',
        'Commercial associate', 'Commercial associate', 'Working', 'Working',
        'Working', 'Commercial associate', 'Working', 'Commercial associate',
        'Working', 'Working', 'Pensioner', 'Working', 'Pensioner', 'Working',
        'Working', 'Pensioner', 'Working', 'State servant', 'Working',
        'Working', 'Working', 'Working', 'Working', 'Commercial associate',
        'Commercial associate', 'Commercial associate', 'Working',
        'Commercial associate', 'Working', 'Working', 'Pensioner'
    ],
                 dtype=object)

    y = np.array([
        1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0,
        0, 0, 0, 0
    ])

    optb = OptimalBinning(dtype="categorical",
                          solver="mip",
                          cat_cutoff=0.1,
                          verbose=True)
    optb.fit(x, y)
    x_transform = optb.transform(
        ["Pensioner", "Working", "Commercial associate", "State servant"])

    assert x_transform == approx(
        [-0.26662866, 0.30873548, -0.55431074, 0.30873548], rel=1e-6)
示例#6
0
def test_transform_some_variables():
    process = BinningProcess(variable_names)
    process.fit(X, y)

    with raises(TypeError):
        process.transform(X, {})

    with raises(ValueError):
        process.transform(X, ["new_1", "new_2"])

    selected_variables = [
        'mean area', 'mean smoothness', 'mean compactness', 'mean concavity'
    ]

    X_transform = process.transform(X, selected_variables)
    assert X_transform.shape[1] == 4

    for i in range(3, 7):
        optb = OptimalBinning()
        x = X[:, i]
        optb.fit(x, y)

        assert optb.transform(x) == approx(X_transform[:, i - 3], rel=1e-6)