Exemplo n.º 1
0
def test_shape_values_linear_many_features():
    import numpy as np
    import shap_domino
    from sklearn.linear_model import Ridge

    np.random.seed(0)

    coef = np.array([1, 2]).T

    # generate linear data
    X = np.random.normal(1, 10, size=(1000, len(coef)))
    y = np.dot(X, coef) + 1 + np.random.normal(scale=0.1, size=1000)

    # train linear model
    model = Ridge(0.1)
    model.fit(X, y)

    # explain the model's predictions using SHAP values
    explainer = shap_domino.LinearExplainer(model, X)

    values = explainer.shap_values(X)

    assert values.shape == (1000, 2)

    expected = (X - X.mean(0)) * coef
    np.testing.assert_allclose(expected - values, 0, atol=0.01)
Exemplo n.º 2
0
def test_sparse():
    """ Validate running LinearExplainer on scipy sparse data
    """
    import sklearn.linear_model
    from sklearn.datasets import make_multilabel_classification
    from scipy.special import expit
    import numpy as np
    import shap_domino

    np.random.seed(0)
    n_features = 20
    X, y = make_multilabel_classification(n_samples=100,
                                          sparse=True,
                                          n_features=n_features,
                                          n_classes=1,
                                          n_labels=2)

    # train linear model
    model = sklearn.linear_model.LogisticRegression()
    model.fit(X, y)

    # explain the model's predictions using SHAP values
    explainer = shap_domino.LinearExplainer(model, X)
    shap_values = explainer.shap_values(X)
    assert np.max(np.abs(expit(explainer.expected_value + shap_values.sum(1)) - model.predict_proba(X)[:, 1])) < 1e-6
Exemplo n.º 3
0
def test_tied_pair():
    import numpy as np
    import shap_domino
    np.random.seed(0)
    beta = np.array([1, 0, 0])
    mu = np.zeros(3)
    Sigma = np.array([[1, 0.999999, 0], [0.999999, 1, 0], [0, 0, 1]])
    X = np.ones((1,3))
    explainer = shap_domino.LinearExplainer((beta, 0), (mu, Sigma), feature_dependence="correlation")
    assert np.abs(explainer.shap_values(X) - np.array([0.5, 0.5, 0])).max() < 0.05
Exemplo n.º 4
0
def test_tied_triple():
    import numpy as np
    import shap_domino
    np.random.seed(0)
    beta = np.array([0, 1, 0, 0])
    mu = 1*np.ones(4)
    Sigma = np.array([[1, 0.999999, 0.999999, 0], [0.999999, 1, 0.999999, 0], [0.999999, 0.999999, 1, 0], [0, 0, 0, 1]])
    X = 2*np.ones((1,4))
    explainer = shap_domino.LinearExplainer((beta, 0), (mu, Sigma), feature_dependence="correlation")
    assert explainer.expected_value == 1
    assert np.abs(explainer.shap_values(X) - np.array([0.33333, 0.33333, 0.33333, 0])).max() < 0.05
Exemplo n.º 5
0
def test_perfect_colinear():
    import shap_domino
    from sklearn.linear_model import LinearRegression
    import numpy as np

    X,y = shap_domino.datasets.boston()
    X.iloc[:,0] = X.iloc[:,4] # test duplicated features
    X.iloc[:,5] = X.iloc[:,6] - X.iloc[:,6] # test multiple colinear features
    X.iloc[:,3] = 0 # test null features
    model = LinearRegression()
    model.fit(X, y)
    explainer = shap_domino.LinearExplainer(model, X, feature_dependence="correlation")
    shap_values = explainer.shap_values(X)
    assert np.abs(shap_values.sum(1) - model.predict(X) + model.predict(X).mean()).sum() < 1e-7
Exemplo n.º 6
0
def test_sklearn_linear():
    import numpy as np
    import shap_domino
    np.random.seed(0)
    from sklearn.linear_model import Ridge
    import shap_domino

    # train linear model
    X,y = shap_domino.datasets.boston()
    model = Ridge(0.1)
    model.fit(X, y)

    # explain the model's predictions using SHAP values
    explainer = shap_domino.LinearExplainer(model, X)
    assert np.abs(explainer.expected_value - model.predict(X).mean()) < 1e-6
    explainer.shap_values(X)
Exemplo n.º 7
0
def test_sklearn_multiclass_no_intercept():
    import numpy as np
    import shap_domino
    np.random.seed(0)
    from sklearn.linear_model import Ridge
    import shap_domino

    # train linear model
    X,y = shap_domino.datasets.boston()

    # make y multiclass
    multiclass_y = np.expand_dims(y, axis=-1)
    model = Ridge(fit_intercept=False)
    model.fit(X, multiclass_y)

    # explain the model's predictions using SHAP values
    explainer = shap_domino.LinearExplainer(model, X)
    assert np.abs(explainer.expected_value - model.predict(X).mean()) < 1e-6
    explainer.shap_values(X)
Exemplo n.º 8
0
def test_single_feature():
    """ Make sure things work with a univariate linear regression.
    """
    import sklearn.linear_model
    import numpy as np
    import shap_domino

    np.random.seed(0)

    # generate linear data
    X = np.random.normal(1, 10, size=(1000, 1))
    y = 2 * X[:, 0] + 1 + np.random.normal(scale=0.1, size=1000)

    # train linear model
    model = sklearn.linear_model.Ridge(0.1)
    model.fit(X, y)

    # explain the model's predictions using SHAP values
    explainer = shap_domino.LinearExplainer(model, X)
    shap_values = explainer.shap_values(X)
    assert np.abs(explainer.expected_value - model.predict(X).mean()) < 1e-6
    assert np.max(np.abs(explainer.expected_value + shap_values.sum(1) - model.predict(X))) < 1e-6