Exemplo n.º 1
0
def _check_model(model: Model, data: Table) -> bool:
    # return whether data.X and model_domain_data.X differ
    if data.domain.has_discrete_class and isinstance(model, RegModel):
        raise ValueError(
            f"{model} can not be used for data with discrete class."
        )
    elif data.domain.has_continuous_class and isinstance(model, ClsModel):
        raise ValueError(
            f"{model} can not be used for data with continuous class."
        )

    mod_data_X = model.data_to_model_domain(data).X
    if data.X.shape != mod_data_X.shape:
        return True
    elif sp.issparse(data.X) and sp.issparse(mod_data_X):
        return (data.X != mod_data_X).nnz != 0
    else:
        return (data.X != mod_data_X).any()
Exemplo n.º 2
0
def compute_shap_values(
    model: Model,
    data: Table,
    reference_data: Table,
    progress_callback: Callable = None,
) -> Tuple[List[np.ndarray], Table, np.ndarray, np.ndarray]:
    """
    Compute SHAP values - explanation for a model. And also give a transformed
    data table.

    Parameters
    ----------
    model
        Model which is explained.
    data
        Data to be explained
    reference_data
        Background data for perturbation purposes
    progress_callback
        The callback for reporting the progress.

    Returns
    -------
    shap_values
        Shapely values for each data item computed by the SHAP library. The
        result is a list of SHAP values for each class - the class order is
        taken from values in the class_var. Each array in the list has shape
        (num cases x num attributes) - explanation for the contribution of each
         attribute to the final prediction.
    data_transformed
        The table on which explanation was made: table preprocessed by models
        preprocessors
    sample_mask
        SHAP values are computed just for a data sample. It is a boolean mask
        that tells which rows in data_transformed are explained.
    base_value
        The base value (average prediction on dataset) for each class.
    """
    # ensure that sampling and SHAP value calculation is same for same data
    with temp_seed(0):
        if progress_callback is None:
            progress_callback = dummy_callback
        progress_callback(0, "Computing explanation ...")

        data_transformed = model.data_to_model_domain(data)
        reference_data_transformed = model.data_to_model_domain(reference_data)

        shap_values, sample_mask, base_value = _explain_trees(
            model,
            data_transformed,
            reference_data_transformed,
            progress_callback,
        )
        if shap_values is None:
            shap_values, sample_mask, base_value = _explain_other_models(
                model,
                data_transformed,
                reference_data_transformed,
                progress_callback,
            )

        # for regression return array with one value
        if not isinstance(base_value, np.ndarray):
            base_value = np.array([base_value])

        progress_callback(1)
    return shap_values, data_transformed, sample_mask, base_value