def test_shape_values_linear_many_features(): import numpy as np import shap_domino from sklearn.linear_model import Ridge np.random.seed(0) coef = np.array([1, 2]).T # generate linear data X = np.random.normal(1, 10, size=(1000, len(coef))) y = np.dot(X, coef) + 1 + np.random.normal(scale=0.1, size=1000) # train linear model model = Ridge(0.1) model.fit(X, y) # explain the model's predictions using SHAP values explainer = shap_domino.LinearExplainer(model, X) values = explainer.shap_values(X) assert values.shape == (1000, 2) expected = (X - X.mean(0)) * coef np.testing.assert_allclose(expected - values, 0, atol=0.01)
def test_sparse(): """ Validate running LinearExplainer on scipy sparse data """ import sklearn.linear_model from sklearn.datasets import make_multilabel_classification from scipy.special import expit import numpy as np import shap_domino np.random.seed(0) n_features = 20 X, y = make_multilabel_classification(n_samples=100, sparse=True, n_features=n_features, n_classes=1, n_labels=2) # train linear model model = sklearn.linear_model.LogisticRegression() model.fit(X, y) # explain the model's predictions using SHAP values explainer = shap_domino.LinearExplainer(model, X) shap_values = explainer.shap_values(X) assert np.max(np.abs(expit(explainer.expected_value + shap_values.sum(1)) - model.predict_proba(X)[:, 1])) < 1e-6
def test_tied_pair(): import numpy as np import shap_domino np.random.seed(0) beta = np.array([1, 0, 0]) mu = np.zeros(3) Sigma = np.array([[1, 0.999999, 0], [0.999999, 1, 0], [0, 0, 1]]) X = np.ones((1,3)) explainer = shap_domino.LinearExplainer((beta, 0), (mu, Sigma), feature_dependence="correlation") assert np.abs(explainer.shap_values(X) - np.array([0.5, 0.5, 0])).max() < 0.05
def test_tied_triple(): import numpy as np import shap_domino np.random.seed(0) beta = np.array([0, 1, 0, 0]) mu = 1*np.ones(4) Sigma = np.array([[1, 0.999999, 0.999999, 0], [0.999999, 1, 0.999999, 0], [0.999999, 0.999999, 1, 0], [0, 0, 0, 1]]) X = 2*np.ones((1,4)) explainer = shap_domino.LinearExplainer((beta, 0), (mu, Sigma), feature_dependence="correlation") assert explainer.expected_value == 1 assert np.abs(explainer.shap_values(X) - np.array([0.33333, 0.33333, 0.33333, 0])).max() < 0.05
def test_perfect_colinear(): import shap_domino from sklearn.linear_model import LinearRegression import numpy as np X,y = shap_domino.datasets.boston() X.iloc[:,0] = X.iloc[:,4] # test duplicated features X.iloc[:,5] = X.iloc[:,6] - X.iloc[:,6] # test multiple colinear features X.iloc[:,3] = 0 # test null features model = LinearRegression() model.fit(X, y) explainer = shap_domino.LinearExplainer(model, X, feature_dependence="correlation") shap_values = explainer.shap_values(X) assert np.abs(shap_values.sum(1) - model.predict(X) + model.predict(X).mean()).sum() < 1e-7
def test_sklearn_linear(): import numpy as np import shap_domino np.random.seed(0) from sklearn.linear_model import Ridge import shap_domino # train linear model X,y = shap_domino.datasets.boston() model = Ridge(0.1) model.fit(X, y) # explain the model's predictions using SHAP values explainer = shap_domino.LinearExplainer(model, X) assert np.abs(explainer.expected_value - model.predict(X).mean()) < 1e-6 explainer.shap_values(X)
def test_sklearn_multiclass_no_intercept(): import numpy as np import shap_domino np.random.seed(0) from sklearn.linear_model import Ridge import shap_domino # train linear model X,y = shap_domino.datasets.boston() # make y multiclass multiclass_y = np.expand_dims(y, axis=-1) model = Ridge(fit_intercept=False) model.fit(X, multiclass_y) # explain the model's predictions using SHAP values explainer = shap_domino.LinearExplainer(model, X) assert np.abs(explainer.expected_value - model.predict(X).mean()) < 1e-6 explainer.shap_values(X)
def test_single_feature(): """ Make sure things work with a univariate linear regression. """ import sklearn.linear_model import numpy as np import shap_domino np.random.seed(0) # generate linear data X = np.random.normal(1, 10, size=(1000, 1)) y = 2 * X[:, 0] + 1 + np.random.normal(scale=0.1, size=1000) # train linear model model = sklearn.linear_model.Ridge(0.1) model.fit(X, y) # explain the model's predictions using SHAP values explainer = shap_domino.LinearExplainer(model, X) shap_values = explainer.shap_values(X) assert np.abs(explainer.expected_value - model.predict(X).mean()) < 1e-6 assert np.max(np.abs(explainer.expected_value + shap_values.sum(1) - model.predict(X))) < 1e-6