示例#1
0
def test_kernel_gpu_cpu_shap(dtype, n_features, n_background, model):
    X_train, X_test, y_train, y_test = create_synthetic_dataset(
        n_samples=n_background + 3,
        n_features=n_features,
        test_size=3,
        noise=0.1,
        dtype=dtype)

    mod = model().fit(X_train, y_train)
    explainer, shap_values = get_shap_values(model=mod.predict,
                                             background_dataset=X_train,
                                             explained_dataset=X_test,
                                             explainer=KernelExplainer)

    exp_v = explainer.expected_value

    fx = mod.predict(X_test)
    for test_idx in range(3):
        assert (np.sum(shap_values[test_idx]) -
                abs(fx[test_idx] - exp_v)) <= 1e-5

    if has_shap():
        import shap
        explainer = shap.KernelExplainer(mod.predict, cp.asnumpy(X_train))
        cpu_shap_values = explainer.shap_values(cp.asnumpy(X_test))

        assert np.allclose(shap_values,
                           cpu_shap_values,
                           rtol=1e-01,
                           atol=1e-01)
def test_kernel_gpu_cpu_shap(dtype, nfeatures, nbackground, model):
    X, y = cuml.datasets.make_regression(n_samples=nbackground + 5,
                                         n_features=nfeatures,
                                         noise=0.1)

    X_train, X_test, y_train, y_test = train_test_split(X,
                                                        y,
                                                        test_size=5,
                                                        random_state=42)

    X_train = X_train.astype(dtype)
    X_test = X_test.astype(dtype)
    y_train = y_train.astype(dtype)
    y_test = y_test.astype(dtype)

    mod = model().fit(X_train, y_train)

    cu_explainer = \
        cuml.experimental.explainer.KernelExplainer(model=mod.predict,
                                                    data=X_train,
                                                    is_gpu_model=True)

    cu_shap_values = cu_explainer.shap_values(X_test)

    exp_v = cu_explainer.expected_value
    fx = mod.predict(X_test)
    for test_idx in range(5):
        assert (np.sum(cu_shap_values[test_idx]) -
                abs(fx[test_idx] - exp_v)) <= 1e-5

    if has_shap("0.37"):
        import shap
        explainer = shap.KernelExplainer(mod.predict, cp.asnumpy(X_train))
        shap_values = explainer.shap_values(cp.asnumpy(X_test))

        # note that small variances in the l1_regression with larger
        # n_features, even among runs of the same explainer can cause this
        # test to be flaky, better testing strategy in process.
        assert np.allclose(cu_shap_values, shap_values, rtol=1e-01, atol=1e-01)
示例#3
0
import pytest
import treelite
import numpy as np
import cupy as cp
import cudf
from cuml.experimental.explainer.tree_shap import TreeExplainer
from cuml.common.import_utils import has_xgboost, has_shap
from cuml.common.exceptions import NotFittedError
from cuml.ensemble import RandomForestRegressor as curfr
from cuml.ensemble import RandomForestClassifier as curfc
from sklearn.datasets import make_regression, make_classification

if has_xgboost():
    import xgboost as xgb
if has_shap():
    import shap


@pytest.mark.parametrize('objective', [
    'reg:linear', 'reg:squarederror', 'reg:squaredlogerror',
    'reg:pseudohubererror'
])
@pytest.mark.skipif(not has_xgboost(), reason="need to install xgboost")
@pytest.mark.skipif(not has_shap(), reason="need to install shap")
def test_xgb_regressor(objective):
    n_samples = 100
    X, y = make_regression(n_samples=n_samples,
                           n_features=8,
                           n_informative=8,
                           n_targets=1,