def _check_model(model: Model, data: Table) -> bool: # return whether data.X and model_domain_data.X differ if data.domain.has_discrete_class and isinstance(model, RegModel): raise ValueError( f"{model} can not be used for data with discrete class." ) elif data.domain.has_continuous_class and isinstance(model, ClsModel): raise ValueError( f"{model} can not be used for data with continuous class." ) mod_data_X = model.data_to_model_domain(data).X if data.X.shape != mod_data_X.shape: return True elif sp.issparse(data.X) and sp.issparse(mod_data_X): return (data.X != mod_data_X).nnz != 0 else: return (data.X != mod_data_X).any()
def compute_shap_values( model: Model, data: Table, reference_data: Table, progress_callback: Callable = None, ) -> Tuple[List[np.ndarray], Table, np.ndarray, np.ndarray]: """ Compute SHAP values - explanation for a model. And also give a transformed data table. Parameters ---------- model Model which is explained. data Data to be explained reference_data Background data for perturbation purposes progress_callback The callback for reporting the progress. Returns ------- shap_values Shapely values for each data item computed by the SHAP library. The result is a list of SHAP values for each class - the class order is taken from values in the class_var. Each array in the list has shape (num cases x num attributes) - explanation for the contribution of each attribute to the final prediction. data_transformed The table on which explanation was made: table preprocessed by models preprocessors sample_mask SHAP values are computed just for a data sample. It is a boolean mask that tells which rows in data_transformed are explained. base_value The base value (average prediction on dataset) for each class. """ # ensure that sampling and SHAP value calculation is same for same data with temp_seed(0): if progress_callback is None: progress_callback = dummy_callback progress_callback(0, "Computing explanation ...") data_transformed = model.data_to_model_domain(data) reference_data_transformed = model.data_to_model_domain(reference_data) shap_values, sample_mask, base_value = _explain_trees( model, data_transformed, reference_data_transformed, progress_callback, ) if shap_values is None: shap_values, sample_mask, base_value = _explain_other_models( model, data_transformed, reference_data_transformed, progress_callback, ) # for regression return array with one value if not isinstance(base_value, np.ndarray): base_value = np.array([base_value]) progress_callback(1) return shap_values, data_transformed, sample_mask, base_value