示例#1
0
def test_get_feature_shap_values_per_fold(X, y):
    """
    Test with ShapRFECV with features per fold.
    """
    clf = DecisionTreeClassifier(max_depth=1)
    shap_elimination = ShapRFECV(clf)
    shap_values, train_score, test_score = shap_elimination._get_feature_shap_values_per_fold(
        X, y, clf, train_index=[2, 3, 4, 5, 6, 7], val_index=[0, 1], scorer=get_scorer("roc_auc")
    )
    assert test_score == 1
    assert train_score > 0.9
    assert shap_values.shape == (2, 3)
示例#2
0
def test_shap_rfe(X, y, capsys):

    clf = DecisionTreeClassifier(max_depth=1)
    with pytest.warns(None) as record:
        shap_elimination = ShapRFECV(clf,
                                     random_state=1,
                                     step=1,
                                     cv=2,
                                     scoring='roc_auc',
                                     n_jobs=4)
        shap_elimination = shap_elimination.fit(X, y)

    assert shap_elimination.fitted == True
    shap_elimination._check_if_fitted()

    report = shap_elimination.compute()

    assert report.shape[0] == 3
    assert shap_elimination.get_reduced_features_set(1) == ['col_3']

    ax1 = shap_elimination.plot(show=False)

    # Ensure that number of warnings was 0
    assert len(record) == 0
    # Check if there is any prints
    out, _ = capsys.readouterr()
    assert len(out) == 0
示例#3
0
def test_shap_rfe_svm(X, y, capsys):
    """
    Test with ShapRFECV with SVM.
    """
    clf = SVC(C=1, kernel="linear", probability=True)
    with pytest.warns(None) as record:
        shap_elimination = ShapRFECV(clf,
                                     random_state=1,
                                     step=1,
                                     cv=2,
                                     scoring="roc_auc",
                                     n_jobs=4)
        shap_elimination = shap_elimination.fit(X, y)

    assert shap_elimination.fitted
    shap_elimination._check_if_fitted()

    report = shap_elimination.compute()

    assert report.shape[0] == 3
    assert shap_elimination.get_reduced_features_set(1) == ["col_3"]

    _ = shap_elimination.plot(show=False)

    # Ensure that number of warnings was 0
    assert len(record) == 0
    # Check if there is any prints
    out, _ = capsys.readouterr()
    assert len(out) == 0
示例#4
0
def test_shap_rfe(X, y, sample_weight, capsys):
    """
    Test with ShapRFECV.
    """
    clf = DecisionTreeClassifier(max_depth=1, random_state=1)
    with pytest.warns(None) as record:
        shap_elimination = ShapRFECV(
            clf,
            random_state=1,
            step=1,
            cv=2,
            scoring="roc_auc",
            n_jobs=4,
        )
        shap_elimination = shap_elimination.fit(
            X, y, sample_weight=sample_weight, approximate=True, check_additivity=False
        )

    assert shap_elimination.fitted
    shap_elimination._check_if_fitted()

    report = shap_elimination.compute()

    assert report.shape[0] == 3
    assert shap_elimination.get_reduced_features_set(1) == ["col_3"]

    _ = shap_elimination.plot(show=False)

    # Ensure that number of warnings was 0
    assert len(record) == 0
    # Check if there is any prints
    out, _ = capsys.readouterr()
    assert len(out) == 0
示例#5
0
def test_shap_pipeline_error(X, y, capsys):
    """
    Test with ShapRFECV for pipelines.
    """
    clf = Pipeline([("scaler", StandardScaler()), ("dt", DecisionTreeClassifier(max_depth=1, random_state=1))])
    with pytest.raises(TypeError):
        shap_elimination = ShapRFECV(
            clf,
            random_state=1,
            step=1,
            cv=2,
            scoring="roc_auc",
            n_jobs=4,
        )
        shap_elimination = shap_elimination.fit(X, y, approximate=True, check_additivity=False)
示例#6
0
def test_calculate_number_of_features_to_remove():
    """
    Test with ShapRFECV with n features to remove.
    """
    assert 3 == ShapRFECV._calculate_number_of_features_to_remove(
        current_num_of_features=10, num_features_to_remove=3, min_num_features_to_keep=5
    )
    assert 3 == ShapRFECV._calculate_number_of_features_to_remove(
        current_num_of_features=8, num_features_to_remove=5, min_num_features_to_keep=5
    )
    assert 0 == ShapRFECV._calculate_number_of_features_to_remove(
        current_num_of_features=5, num_features_to_remove=1, min_num_features_to_keep=5
    )
    assert 4 == ShapRFECV._calculate_number_of_features_to_remove(
        current_num_of_features=5, num_features_to_remove=7, min_num_features_to_keep=1
    )
示例#7
0
def test_shap_rfe_randomized_search(X, y, capsys):

    clf = DecisionTreeClassifier(max_depth=1)
    param_grid = {'criterion': ['gini'], 'min_samples_split': [1, 2]}
    search = RandomizedSearchCV(clf, param_grid, cv=2, n_iter=2)
    with pytest.warns(None) as record:

        shap_elimination = ShapRFECV(search,
                                     step=0.8,
                                     cv=2,
                                     scoring='roc_auc',
                                     n_jobs=4,
                                     verbose=150)
        report = shap_elimination.fit_compute(X, y)

    assert shap_elimination.fitted == True
    shap_elimination._check_if_fitted()

    assert report.shape[0] == 2
    assert shap_elimination.get_reduced_features_set(1) == ['col_3']

    ax1 = shap_elimination.plot(show=False)

    # Ensure that number of warnings was at least 2 for the verbose (2 generated by probatus + possibly more by SHAP)
    assert len(record) >= 2

    # Check if there is any prints
    out, _ = capsys.readouterr()
    assert len(out) > 0
示例#8
0
def test_shap_rfe_randomized_search_cols_to_keep(X, y, capsys):
    """
    Test with ShapRFECV with column to keep param.
    """
    clf = DecisionTreeClassifier(max_depth=1)
    param_grid = {"criterion": ["gini"], "min_samples_split": [1, 2]}
    search = RandomizedSearchCV(clf, param_grid, cv=2, n_iter=2)
    with pytest.warns(None) as record:

        shap_elimination = ShapRFECV(search,
                                     step=0.8,
                                     cv=2,
                                     scoring="roc_auc",
                                     n_jobs=4,
                                     random_state=1)
        report = shap_elimination.fit_compute(
            X, y, columns_to_keep=["col_2", "col_3"])

    assert shap_elimination.fitted
    shap_elimination._check_if_fitted()

    assert report.shape[0] == 2
    reduced_feature_set = set(
        shap_elimination.get_reduced_features_set(num_features=2))
    assert reduced_feature_set == set(["col_2", "col_3"])

    _ = shap_elimination.plot(show=False)

    # Ensure that number of warnings was at least 2 for the verbose (2 generated by probatus + possibly more by SHAP)
    assert len(record) >= 2

    # Check if there is any prints
    out, _ = capsys.readouterr()
    assert len(out) == 0
示例#9
0
def test_shap_rfe_cols_to_keep(X, y, capsys):
    """
    Test for shap_rfe_cv with feautures to keep parameter.
    """
    clf = DecisionTreeClassifier(max_depth=1, random_state=1)
    with pytest.warns(None) as record:
        shap_elimination = ShapRFECV(clf,
                                     random_state=1,
                                     step=2,
                                     cv=2,
                                     scoring="roc_auc",
                                     n_jobs=4,
                                     min_features_to_select=1)
        shap_elimination = shap_elimination.fit(
            X, y, columns_to_keep=["col_2", "col_3"])

    assert shap_elimination.fitted
    shap_elimination._check_if_fitted()

    report = shap_elimination.compute()

    assert report.shape[0] == 2
    reduced_feature_set = set(
        shap_elimination.get_reduced_features_set(num_features=2))
    assert reduced_feature_set == set(["col_2", "col_3"])

    # Ensure that number of warnings was 0
    assert len(record) == 0
    # Check if there is any prints
    out, _ = capsys.readouterr()
    assert len(out) == 0
示例#10
0
def test_complex_dataset(complex_data, complex_lightgbm):
    """
    Test on complex dataset.
    """
    X, y = complex_data

    param_grid = {
        "n_estimators": [5, 7, 10],
        "num_leaves": [3, 5, 7, 10],
    }
    search = RandomizedSearchCV(complex_lightgbm, param_grid, n_iter=1)

    shap_elimination = ShapRFECV(clf=search, step=1, cv=10, scoring="roc_auc", n_jobs=3, verbose=50)
    with pytest.warns(None) as record:
        report = shap_elimination.fit_compute(X, y)

    assert report.shape[0] == X.shape[1]

    assert len(record) >= 2
示例#11
0
def test_get_feature_shap_values_per_fold(X, y):
    clf = DecisionTreeClassifier(max_depth=1)
    shap_values, train_score, test_score = ShapRFECV._get_feature_shap_values_per_fold(
        X,
        y,
        clf,
        train_index=[2, 3, 4, 5, 6, 7],
        val_index=[0, 1],
        scorer=get_scorer('roc_auc'))
    assert test_score == 1
    assert train_score > 0.9
    assert shap_values.shape == (2, 3)