def test_shap_decomposition_matrices( best_lgbm_crossfit: LearnerCrossfit[RegressorPipelineDF], feature_names: Set[str], regressor_inspector: LearnerInspector, ) -> None: # Shap decomposition matrices (feature dependencies) # check that dimensions of pairwise feature matrices are equal to # of features, # and value ranges: for matrix, matrix_name in zip( ( regressor_inspector.feature_association_matrix(), regressor_inspector.feature_synergy_matrix(), regressor_inspector.feature_redundancy_matrix(), ), ("association", "synergy", "redundancy"), ): matrix_full_name = f"feature {matrix_name} matrix" n_features = len(feature_names) assert len(matrix) == n_features, f"rows in {matrix_full_name}" assert len( matrix.columns) == n_features, f"columns in {matrix_full_name}" # check values for c in matrix.columns: assert (0.0 <= matrix.fillna(0).loc[:, c].min() <= matrix.fillna(0).loc[:, c].max() <= 1.0), f"Values of [0.0, 1.0] in {matrix_full_name}"
def test_model_inspection_classifier_binary_single_shap_output() -> None: # simulate some data x, y = make_classification( n_samples=200, n_features=5, n_informative=5, n_redundant=0, random_state=42 ) sim_df = pd.DataFrame( np.hstack((x, y[:, np.newaxis])), columns=[*(f"f{i}" for i in range(5)), "target"], ) # create sample object sample_df = Sample(observations=sim_df, target_name="target") # fit the crossfit crossfit = LearnerCrossfit( pipeline=ClassifierPipelineDF( classifier=GradientBoostingClassifierDF(random_state=42) ), cv=BootstrapCV(n_splits=5, random_state=42), random_state=42, n_jobs=-3, ).fit(sample_df) # fit the inspector LearnerInspector(n_jobs=-3).fit(crossfit=crossfit)
def regressor_inspector( best_lgbm_crossfit: LearnerCrossfit[RegressorPipelineDF], n_jobs: int) -> LearnerInspector: inspector = LearnerInspector( explainer_factory=TreeExplainerFactory( feature_perturbation="tree_path_dependent", use_background_dataset=True), legacy=True, n_jobs=n_jobs, ).fit(crossfit=best_lgbm_crossfit) # disable legacy calculations; we used them in the constructor so the legacy # SHAP decomposer is created along with the new SHAP vector projector inspector._legacy = False return inspector
def regressor_inspector( best_lgbm_crossfit: LearnerCrossfit[RegressorPipelineDF], n_jobs: int ) -> LearnerInspector: return LearnerInspector( explainer_factory=TreeExplainerFactory( feature_perturbation="tree_path_dependent", use_background_dataset=True ), n_jobs=n_jobs, ).fit(crossfit=best_lgbm_crossfit)
def iris_inspector_multi_class( iris_classifier_crossfit_multi_class: LearnerCrossfit[ ClassifierPipelineDF[RandomForestClassifierDF] ], n_jobs: int, ) -> LearnerInspector[ClassifierPipelineDF[RandomForestClassifierDF]]: return LearnerInspector(shap_interaction=True, n_jobs=n_jobs).fit( crossfit=iris_classifier_crossfit_multi_class )
def test_model_inspection_classifier_binary( iris_sample_binary: Sample, iris_classifier_crossfit_binary, n_jobs: int ) -> None: model_inspector = LearnerInspector(shap_interaction=False, n_jobs=n_jobs).fit( crossfit=iris_classifier_crossfit_binary ) # calculate the shap value matrix, without any consolidation shap_values = model_inspector.shap_values(consolidate=None) # do the shap values add up to predictions minus a constant value? _validate_shap_values_against_predictions( shap_values=shap_values, crossfit=iris_classifier_crossfit_binary ) shap_matrix_mean = model_inspector.shap_values() # is the consolidation correct? assert_frame_equal(shap_matrix_mean, shap_values.mean(level=1)) # the length of rows in shap_values should be equal to the unique observation # indices we have had in the predictions_df assert len(shap_matrix_mean) == len(iris_sample_binary) # Shap decomposition matrices (feature dependencies) assert model_inspector.feature_association_matrix( clustered=True, symmetrical=True ).values == pytest.approx( np.array( [ [1.0, 0.678, 0.133, 0.005], [0.678, 1.0, 0.145, 0.007], [0.133, 0.145, 1.0, 0.029], [0.005, 0.007, 0.029, 1.0], ] ), abs=0.02, ) linkage_tree = model_inspector.feature_association_linkage() print() DendrogramDrawer(style=DendrogramReportStyle()).draw( data=linkage_tree, title="Iris (binary) feature association linkage" )
def test_model_inspection_classifier_interaction_dual_target( iris_sample_binary_dual_target: Sample, iris_classifier_ranker_dual_target: LearnerRanker[ ClassifierPipelineDF[RandomForestClassifierDF] ], iris_target_name, n_jobs: int, ) -> None: iris_classifier_crossfit_dual_target = ( iris_classifier_ranker_dual_target.best_model_crossfit_ ) with pytest.raises( ValueError, match=( f"only single-output classifiers .* are supported.*" f"{iris_target_name}.*{iris_target_name}2" ), ): LearnerInspector(n_jobs=n_jobs).fit( crossfit=iris_classifier_crossfit_dual_target )
def test_shap_decomposition(regressor_inspector: LearnerInspector) -> None: # noinspection PyPep8Naming def _calculate_relative_syn_and_red( feature_x: str, feature_y: str, is_indirect_syn_valid: bool) -> Tuple[float, float, float, float]: iv = regressor_inspector.shap_interaction_values(consolidate=None) # Get 3 components for each feature: # S = interaction SHAP # A, B = independent SHAP # U, V = sum of interactions with 3rd variables iv_x = iv.xs(feature_x, level=-1) iv_y = iv.xs(feature_y, level=-1) X = iv_x.sum(axis=1).rename("X") Y = iv_y.sum(axis=1).rename("Y") A = iv_x.loc[:, feature_x] B = iv_y.loc[:, feature_y] S = iv_x.loc[:, feature_y] U = X - A - S V = Y - B - S # calculate the "indirect" S, such that cov(U, S) == 0 and cov(V, S) == 0 k_U = max(0.0, cov(S, U) / var(S)) if is_indirect_syn_valid else 0.0 k_V = max(0.0, cov(S, V) / var(S)) if is_indirect_syn_valid else 0.0 print_list(**{"cov(U, S) / var(S)": k_U, "cov(V, S) / var(S)": k_V}) varS = var(S) Su = S if varS == 0 else S * k_U Sv = S if varS == 0 else S * k_V U_ = U - Su V_ = V - Sv print_list( stdS=std(S), stdSu=std(Su), stdSv=std(Sv), stdU=std(U), stdU_=std(U_), stdV=std(V), stdV_=std(V_), ) # calculate the minimal shared vector R, such that cov(X_ - R, Y_ - R) == 0 X_ = X - S - Su Y_ = Y - S - Sv AUT = X_ + Y_ AUT_asym = X_ R_ = AUT / 2 dXY = std(X_ - Y_) dR = std(R_) R = R_ * (1 - dXY / (2 * dR)) print_list( stdX=std(X), stdY=std(Y), stdX_=std(X_), stdY_=std(Y_), stdR=std(R), covX_R_Y_R=round(cov(X_ - R, Y_ - R), 15), ) SYN = 2 * S + Su + Sv SYN_asym = S + Su RED = 2 * R RED_asym = R UNI = X + Y - RED UNI_asym = X - RED_asym syn = std(SYN) aut = std(AUT) red = std(RED) uni = std(UNI) syn_asym = std(SYN_asym) aut_asym = std(AUT_asym) red_asym = std(RED_asym) uni_asym = std(UNI_asym) print_list(syn=syn, aut=aut, red=red, uni=uni) return ( syn / (syn + aut), red / (red + uni), syn_asym / (syn_asym + aut_asym), red_asym / (red_asym + uni_asym), ) for i, j, indirect_syn in [ ("LSTAT", "RM", False), ("LSTAT", "DIS", True), ("LSTAT", "AGE", False), ("LSTAT", "NOX", False), ("LSTAT", "CRIM", False), ("RM", "DIS", False), ("RM", "AGE", False), ("RM", "NOX", False), ("RM", "CRIM", False), ]: print(f"\ncomparing features X={i} and Y={j}") syn_rel, red_rel, syn_rel_asym, red_rel_asym = _calculate_relative_syn_and_red( feature_x=i, feature_y=j, is_indirect_syn_valid=indirect_syn) syn_matrix = regressor_inspector.feature_synergy_matrix( symmetrical=True) red_matrix = regressor_inspector.feature_redundancy_matrix( symmetrical=True) syn_matrix_asym = regressor_inspector.feature_synergy_matrix() red_matrix_asym = regressor_inspector.feature_redundancy_matrix() print_list( syn_rel=syn_rel, red_rel=red_rel, syn_rel_asym=syn_rel_asym, red_rel_asym=red_rel_asym, syn_matrix=syn_matrix.loc[i, j], red_matrix=red_matrix.loc[i, j], syn_matrix_asym=syn_matrix_asym.loc[i, j], red_matrix_asym=red_matrix_asym.loc[i, j], percentage=True, ) assert np.isclose(red_matrix.loc[i, j], red_rel) assert np.isclose(red_matrix.loc[j, i], red_rel) assert np.isclose(syn_matrix.loc[i, j], syn_rel) assert np.isclose(syn_matrix.loc[j, i], syn_rel) assert np.isclose(red_matrix_asym.loc[i, j], red_rel_asym) assert np.isclose(syn_matrix_asym.loc[i, j], syn_rel_asym) # check basic matrix properties n_features = len(regressor_inspector.features) for matrix in (syn_matrix, syn_matrix_asym, red_matrix, red_matrix_asym): # matrix shape is n_features x n_features assert matrix.shape == (n_features, n_features) # values on the diagonal are all 1.0 for a in range(n_features): assert matrix.iloc[a, a] == 1.0 # there are no nan values assert matrix.notna().all().all()
def test_shap_decomposition_matrices( best_lgbm_crossfit: LearnerCrossfit[RegressorPipelineDF], feature_names: Set[str], regressor_inspector: LearnerInspector, ) -> None: # Shap decomposition matrices (feature dependencies) association_matrix: pd.DataFrame = regressor_inspector.feature_association_matrix( clustered=False, symmetrical=True) # check that dimensions of pairwise feature matrices are equal to # of features, # and value ranges: for matrix, matrix_name in zip( ( association_matrix, regressor_inspector.feature_synergy_matrix(), regressor_inspector.feature_redundancy_matrix(), ), ("association", "synergy", "redundancy"), ): matrix_full_name = f"feature {matrix_name} matrix" n_features = len(feature_names) assert len(matrix) == n_features, f"rows in {matrix_full_name}" assert len( matrix.columns) == n_features, f"columns in {matrix_full_name}" # check values for c in matrix.columns: assert (0.0 <= matrix.fillna(0).loc[:, c].min() <= matrix.fillna(0).loc[:, c].max() <= 1.0), f"Values of [0.0, 1.0] in {matrix_full_name}" # check actual values: assert association_matrix.values == pytest.approx( np.array([ [1.0, 0.043, 0.233, 0.0, 0.162, 0.078] + [0.192, 0.156, 0.009, 0.022, 0.035, 0.008, 0.07], [0.043, 1.0, 0.155, 0.0, 0.056, 0.055] + [0.017, 0.225, 0.024, 0.021, 0.049, 0.145, 0.034], [0.233, 0.155, 1.0, 0.0, 0.123, 0.207] + [0.15, 0.044, 0.069, 0.225, 0.241, 0.149, 0.209], [0.0, 0.0, 0.0, 1.0, 0.0, 0.0] + [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.162, 0.056, 0.123, 0.0, 1.0, 0.051] + [0.017, 0.156, 0.19, 0.08, 0.15, 0.025, 0.029], [0.078, 0.055, 0.207, 0.0, 0.051, 1.0] + [0.088, 0.005, 0.081, 0.14, 0.027, 0.058, 0.49], [0.192, 0.017, 0.15, 0.0, 0.017, 0.088] + [1.0, 0.128, 0.015, 0.269, 0.14, 0.096, 0.295], [0.156, 0.225, 0.044, 0.0, 0.156, 0.005] + [0.128, 1.0, 0.255, 0.158, 0.273, 0.132, 0.023], [0.009, 0.024, 0.069, 0.0, 0.19, 0.081] + [0.015, 0.255, 1.0, 0.223, 0.188, 0.035, 0.049], [0.022, 0.021, 0.225, 0.0, 0.08, 0.14] + [0.269, 0.158, 0.223, 1.0, 0.284, 0.182, 0.097], [0.035, 0.049, 0.241, 0.0, 0.15, 0.027] + [0.14, 0.273, 0.188, 0.284, 1.0, 0.027, 0.031], [0.008, 0.145, 0.149, 0.0, 0.025, 0.058] + [0.096, 0.132, 0.035, 0.182, 0.027, 1.0, 0.057], [0.07, 0.034, 0.209, 0.0, 0.029, 0.49] + [0.295, 0.023, 0.049, 0.097, 0.031, 0.057, 1.0], ]), abs=0.02, ) # cluster associated features association_linkage = regressor_inspector.feature_association_linkage() assert isinstance(association_linkage, LinkageTree)
def test_model_inspection( regressor_grids: Sequence[LearnerGrid[RegressorPipelineDF]], regressor_ranker: LearnerRanker[RegressorPipelineDF], best_lgbm_crossfit: LearnerCrossfit[RegressorPipelineDF], feature_names: Set[str], regressor_inspector: LearnerInspector, cv_kfold: KFold, sample: Sample, simple_preprocessor: TransformerDF, n_jobs: int, ) -> None: # define checksums for this test expected_scores = [0.418, 0.4, 0.386, 0.385, 0.122] + [ 0.122, -0.074, -0.074, -0.074, -0.074, ] log.debug(f"\n{regressor_ranker.summary_report()}") check_ranking( ranking=regressor_ranker.ranking_, expected_scores=expected_scores, expected_learners=None, expected_parameters=None, ) # using an invalid consolidation method raises an exception with pytest.raises(ValueError, match="unknown consolidation method: invalid"): regressor_inspector.shap_values(consolidate="invalid") shap_values_raw = regressor_inspector.shap_values(consolidate=None) shap_values_mean = regressor_inspector.shap_values(consolidate="mean") shap_values_std = regressor_inspector.shap_values(consolidate="std") # method shap_values without parameter is equal to "mean" consolidation assert_frame_equal(shap_values_mean, regressor_inspector.shap_values()) # the length of rows in shap_values should be equal to the unique observation # indices we have had in the predictions_df assert len(shap_values_mean) == len(sample) # index names assert shap_values_mean.index.names == [Sample.IDX_OBSERVATION] assert shap_values_mean.columns.names == [Sample.IDX_FEATURE] assert shap_values_std.index.names == [Sample.IDX_OBSERVATION] assert shap_values_std.columns.names == [Sample.IDX_FEATURE] assert shap_values_raw.index.names == (["split", "observation"]) assert shap_values_raw.columns.names == [Sample.IDX_FEATURE] # column index assert set(shap_values_mean.columns) == feature_names # check that the SHAP values add up to the predictions shap_totals_raw = shap_values_raw.sum(axis=1) for split_id, model in enumerate(best_lgbm_crossfit.models()): # for each model in the crossfit, calculate the difference between total # SHAP values and prediction for every observation. This is always the same # constant value, so `mad` (mean absolute deviation) is zero shap_minus_pred = shap_totals_raw.xs(key=split_id) - model.predict( X=sample.features ) assert ( round(shap_minus_pred.mad(), 12) == 0.0 ), f"predictions matching total SHAP for split {split_id}" # test the ModelInspector with a KernelExplainer: inspector_2 = LearnerInspector( explainer_factory=KernelExplainerFactory(link="identity", data_size_limit=20), n_jobs=n_jobs, ).fit(crossfit=best_lgbm_crossfit) inspector_2.shap_values() linkage_tree = inspector_2.feature_association_linkage() print() DendrogramDrawer(style="text").draw(data=linkage_tree, title="Test")
def test_model_inspection_classifier_interaction( iris_sample_binary: Sample, iris_classifier_crossfit_binary: LearnerCrossfit[ ClassifierPipelineDF[RandomForestClassifierDF] ], n_jobs: int, ) -> None: warnings.filterwarnings("ignore", message="You are accessing a training score") model_inspector = LearnerInspector( explainer_factory=TreeExplainerFactory( feature_perturbation="tree_path_dependent", use_background_dataset=True ), n_jobs=n_jobs, ).fit(crossfit=iris_classifier_crossfit_binary) model_inspector_no_interaction = LearnerInspector( shap_interaction=False, explainer_factory=TreeExplainerFactory( feature_perturbation="tree_path_dependent", use_background_dataset=True ), n_jobs=n_jobs, ).fit(crossfit=iris_classifier_crossfit_binary) # calculate shap interaction values shap_interaction_values = model_inspector.shap_interaction_values() # calculate shap values from interaction values shap_values = shap_interaction_values.groupby(by="observation").sum() # shap interaction values add up to shap values # we have to live with differences of up to 0.02, given the different results # returned for SHAP values and SHAP interaction values # todo: review accuracy after implementing use of a background dataset assert ( model_inspector_no_interaction.shap_values() - shap_values ).abs().max().max() < 0.015 # the column names of the shap value data frames are the feature names feature_columns = iris_sample_binary.feature_names assert shap_values.columns.to_list() == feature_columns assert shap_interaction_values.columns.to_list() == feature_columns # the length of rows in shap_values should be equal to the number of observations assert len(shap_values) == len(iris_sample_binary) # the length of rows in shap_interaction_values should be equal to the number of # observations, times the number of features assert len(shap_interaction_values) == ( len(iris_sample_binary) * len(feature_columns) ) # do the shap values add up to predictions minus a constant value? _validate_shap_values_against_predictions( shap_values=model_inspector.shap_interaction_values(consolidate=None) .groupby(level=[0, 1]) .sum(), crossfit=iris_classifier_crossfit_binary, ) assert model_inspector.feature_synergy_matrix( clustered=False, symmetrical=True ).values == pytest.approx( np.array( [ [1.000, 0.047, 0.101, 0.120], [0.047, 1.000, 0.017, 0.021], [0.101, 0.017, 1.000, 0.100], [0.120, 0.021, 0.100, 1.000], ] ), abs=0.02, ) assert model_inspector.feature_synergy_matrix( clustered=True, symmetrical=True ).values == pytest.approx( np.array( [ [1.000, 0.101, 0.100, 0.017], [0.101, 1.000, 0.120, 0.047], [0.100, 0.120, 1.000, 0.021], [0.017, 0.047, 0.021, 1.000], ] ), abs=0.02, ) assert model_inspector.feature_redundancy_matrix( clustered=False, symmetrical=True ).values == pytest.approx( np.array( [ [1.0, 0.039, 0.181, 0.206], [0.039, 1.0, 0.005, 0.011], [0.181, 0.005, 1.0, 0.792], [0.206, 0.011, 0.792, 1.0], ] ), abs=0.02, ) assert model_inspector.feature_redundancy_matrix( clustered=True, symmetrical=True ).values == pytest.approx( np.array( [ [1.000, 0.792, 0.181, 0.005], [0.792, 1.000, 0.206, 0.011], [0.181, 0.206, 1.000, 0.039], [0.005, 0.011, 0.039, 1.000], ] ), abs=0.02, ) assert model_inspector.feature_association_matrix( clustered=False, symmetrical=True ).values == pytest.approx( np.array( [ [1.0, 0.028, 0.14, 0.128], [0.028, 1.0, 0.005, 0.002], [0.14, 0.005, 1.0, 0.681], [0.128, 0.002, 0.681, 1.0], ] ), abs=0.02, ) assert model_inspector.feature_association_matrix( clustered=True, symmetrical=True ).values == pytest.approx( np.array( [ [1.000, 0.681, 0.128, 0.002], [0.681, 1.000, 0.140, 0.005], [0.128, 0.140, 1.000, 0.026], [0.002, 0.005, 0.026, 1.000], ] ), abs=0.02, ) linkage_tree = model_inspector.feature_redundancy_linkage() print() DendrogramDrawer(style=DendrogramReportStyle()).draw( data=linkage_tree, title="Iris (binary) feature redundancy linkage" )
""" pip install gamma-facet Model Inspection FACET implements several model inspection methods for scikit-learn estimators. FACET enhances model inspection by providing global metrics that complement the local perspective of SHAP. The key global metrics for each pair of features in a model are: Synergy The degree to which the model combines information from one feature with another to predict the target. For example, let's assume we are predicting cardiovascular health using age and gender and the fitted model includes a complex interaction between them. This means these two features are synergistic for predicting cardiovascular health. Further, both features are important to the model and removing either one would significantly impact performance. Let's assume age brings more information to the joint contribution than gender. This asymmetric contribution means the synergy for (age, gender) is less than the synergy for (gender, age). To think about it another way, imagine the prediction is a coordinate you are trying to reach. From your starting point, age gets you much closer to this point than gender, however, you need both to get there. Synergy reflects the fact that gender gets more help from age (higher synergy from the perspective of gender) than age does from gender (lower synergy from the perspective of age) to reach the prediction. This leads to an important point: synergy is a naturally asymmetric property of the global information two interacting features contribute to the model predictions. Synergy is expressed as a percentage ranging from 0% (full autonomy) to 100% (full synergy). Redundancy The degree to which a feature in a model duplicates the information of a second feature to predict the target. For example, let's assume we had house size and number of bedrooms for predicting house price. These features capture similar information as the more bedrooms the larger the house and likely a higher price on average. The redundancy for (number of bedrooms, house size) will be greater than the redundancy for (house size, number of bedrooms). This is because house size "knows" more of what number of bedrooms does for predicting house price than vice-versa. Hence, there is greater redundancy from the perspective of number of bedrooms. Another way to think about it is removing house size will be more detrimental to model performance than removing number of bedrooms, as house size can better compensate for the absence of number of bedrooms. This also implies that house size would be a more important feature than number of bedrooms in the model. The important point here is that like synergy, redundancy is a naturally asymmetric property of the global information feature pairs have for predicting an outcome. Redundancy is expressed as a percentage ranging from 0% (full uniqueness) to 100% (full redundancy). """ # fit the model inspector from facet.inspection import LearnerInspector inspector = LearnerInspector() inspector.fit(crossfit=mymodel) Synergy # visualise synergy as a matrix from pytools.viz.matrix import MatrixDrawer synergy_matrix = inspector.feature_synergy_matrix(symmetrical=True) MatrixDrawer(style="matplot%").draw(synergy_matrix, title="Synergy Matrix")