Пример #1
0
    def shap_values(self, X, **kwargs):
        """ Estimate the SHAP values for a set of samples.

        Parameters
        ----------
        X : numpy.array or pandas.DataFrame
            A matrix of samples (# samples x # features) on which to explain the model's output.

        nsamples : "auto" or int
            Number of times to re-evaluate the model when explaining each prediction. More samples
            lead to lower variance estimates of the SHAP values.

        l1_reg : "auto" or float
            The l1 regularization to use for feature selection (the estimation procedure is based on
            a debiased lasso). Set this to zero to remove the feature selection step before estimation.

        Returns
        -------
        For models with a single output this returns a matrix of SHAP values
        (# samples x # features). Each row sums to the difference between the model output for that
        sample and the expected value of the model output (which is stored as expected_value
        attribute of the explainer). For models with vector outputs this returns a list
        of such matrices, one for each output.
        """

        # convert dataframes
        if str(type(X)).endswith("pandas.core.series.Series'>"):
            X = X.values
        elif str(type(X)).endswith("'pandas.core.frame.DataFrame'>"):
            if self.keep_index:
                index_value = X.index.values
                index_name = X.index.name
                column_name = list(X.columns)
            X = X.values

        assert str(type(X)).endswith("'numpy.ndarray'>"), "Unknown instance type: " + str(type(X))
        assert len(X.shape) == 1 or len(X.shape) == 2, "Instance must have 1 or 2 dimensions!"

        # single instance
        if len(X.shape) == 1:
            data = X.reshape((1, X.shape[0]))
            if self.keep_index:
                data = convert_to_instance_with_index(data, column_name, index_name, index_value)
            explanation = self.explain(data, **kwargs)

            # vector-output
            s = explanation.effects.shape
            if len(s) == 2:
                outs = [np.zeros(s[0]) for j in range(s[1])]
                for j in range(s[1]):
                    outs[j] = explanation.effects[:, j]
                    assert abs(self.link.f(self.expected_value[j]) - explanation.base_value[j]) < 1e-6
                return outs

            # single-output
            else:
                out = np.zeros(s[0])
                out[:] = explanation.effects
                assert abs(self.link.f(self.expected_value) - explanation.base_value) < 1e-6
                return out

        # explain the whole dataset
        elif len(X.shape) == 2:
            explanations = []
            for i in tqdm(range(X.shape[0]), disable=kwargs.get("silent", False)):
                data = X[i:i + 1, :]
                if self.keep_index:
                    data = convert_to_instance_with_index(data, column_name, index_value[i:i + 1], index_name)
                explanations.append(self.explain(data, **kwargs))

            # vector-output
            s = explanations[0].effects.shape
            if len(s) == 2:
                outs = [np.zeros((X.shape[0], s[0])) for j in range(s[1])]
                for i in range(X.shape[0]):
                    for j in range(s[1]):
                        outs[j][i] = explanations[i].effects[:, j]
                        assert abs(self.link.f(self.expected_value[j]) - explanations[i].base_value[j]) < 1e-6
                return outs

            # single-output
            else:
                out = np.zeros((X.shape[0], s[0]))
                for i in range(X.shape[0]):
                    out[i] = explanations[i].effects
                    assert abs(self.link.f(self.expected_value) - explanations[i].base_value) < 1e-6
                return out
Пример #2
0
    def shap_values(self, X, **kwargs):
        # convert dataframes
        if str(type(X)).endswith("pandas.core.series.Series'>"):
            X = X.as_matrix()
        elif str(type(X)).endswith("'pandas.core.frame.DataFrame'>"):
            if self.keep_index:
                index_value = X.index.values
                index_name = X.index.name
                column_name = list(X.columns)
            X = X.as_matrix()

        assert str(type(X)).endswith(
            "'numpy.ndarray'>"), "Unknown instance type: " + str(type(X))
        assert len(X.shape) == 1 or len(
            X.shape) == 2, "Instance must have 1 or 2 dimensions!"

        # single instance
        if len(X.shape) == 1:
            data = X.reshape((1, X.shape[0]))
            if self.keep_index:
                data = convert_to_instance_with_index(data, column_name,
                                                      index_name, index_value)
            explanation = self.explain(data, **kwargs)

            # vector-output
            s = explanation.effects.shape
            if len(s) == 2:
                outs = [np.zeros(s[0] + 1) for j in range(s[1])]
                for j in range(s[1]):
                    outs[j][:-1] = explanation.effects[:, j]
                    outs[j][-1] = explanation.base_value[j]
                return outs

            # single-output
            else:
                out = np.zeros(s[0] + 1)
                out[:-1] = explanation.effects
                out[-1] = explanation.base_value
                return out

        # explain the whole dataset
        elif len(X.shape) == 2:
            explanations = []
            for i in tqdm(range(X.shape[0])):
                data = X[i:i + 1, :]
                if self.keep_index:
                    data = convert_to_instance_with_index(
                        data, column_name, index_value[i:i + 1], index_name)
                explanations.append(self.explain(data, **kwargs))

            # vector-output
            s = explanations[0].effects.shape
            if len(s) == 2:
                outs = [np.zeros((X.shape[0], s[0] + 1)) for j in range(s[1])]
                for i in range(X.shape[0]):
                    for j in range(s[1]):
                        outs[j][i, :-1] = explanations[i].effects[:, j]
                        outs[j][i, -1] = explanations[i].base_value[j]
                return outs

            # single-output
            else:
                out = np.zeros((X.shape[0], s[0] + 1))
                for i in range(X.shape[0]):
                    out[i, :-1] = explanations[i].effects
                    out[i, -1] = explanations[i].base_value
                return out