    def summarize(self):
        The summarize method allows to display the summary of local explainability.
        This method can be configured with modify_mask method to summarize the explainability to suit needs.

        If the user doesn't use modify_mask, the summarize method uses the mask_params parameters specified during
        the initialisation of the SmartPredictor.

        In classification case, The summarize method summarizes the explainability which corresponds to :
            - the predicted values specified by the user or automatically computed (with add_input method)
            - the right probabilities from predict_proba associated to the right predicted values
            - the right contributions ranked and filtered as specify with modify_mask method

            - selected explanation of each row for classification case

        >>> summary_df = predictor.summarize()
        >>> summary_df
        	pred	proba	    feature_1	value_1	    contribution_1	feature_2	value_2	    contribution_2
        0	0	    0.756416	Sex	        1.0	        0.322308	    Pclass	    3.0	        0.155069
        1	3	    0.628911	Sex	        2.0	        0.585475	    Pclass	    1.0	        0.370504
        2	0	    0.543308	Sex	        2.0	        -0.486667	    Pclass	    3.0	        0.255072

        >>> predictor.modify_mask(max_contrib=1)
        >>> summary_df = predictor.summarize()
        >>> summary_df
        	pred	proba	    feature_1	value_1	    contribution_1
        0	0	    0.756416	Sex	        1.0	        0.322308
        1	3	    0.628911	Sex	        2.0	        0.585475
        2	0	    0.543308	Sex	        2.0	        -0.486667
        # data is needed : add_input() method must be called at least once

        if not hasattr(self, "data"):
            raise ValueError("You have to specify dataset x and y_pred arguments. Please use add_input() method.")

        self.summary = assign_contributions(
        # Apply filter method with mask_params attributes parameters

        # Summarize information
        self.data['summary'] = summarize(self.summary['contrib_sorted'],

        # Matching with y_pred
        return pd.concat([self.data["ypred"], self.data['summary']], axis=1)
    def test_rank_contributions_1(self):
        Unit test rank contributions 1
        dataframe_s = pd.DataFrame(
            [[3.4, 1, -9, 4],
             [-45, 3, 43, -9]],
            columns=["Phi_" + str(i) for i in range(4)],
            index=['raw_1', 'raw_2']

        dataframe_x = pd.DataFrame(
            [['Male', 'House', 'Married', 'PhD'],
             ['Female', 'Flat', 'Married', 'Master']],
            columns=["X" + str(i) for i in range(4)],
            index=['raw_1', 'raw_2']

        expected_s_ord = pd.DataFrame(
            data=[[-9, 4, 3.4, 1],
                  [-45, 43, -9, 3]],
            columns=['contribution_' + str(i) for i in range(4)],
            index=['raw_1', 'raw_2']

        expected_x_ord = pd.DataFrame(
            data=[['Married', 'PhD', 'Male', 'House'],
                  ['Female', 'Married', 'Master', 'Flat']],
            columns=['feature_' + str(i) for i in range(4)],
            index=['raw_1', 'raw_2']

        expected_s_dict = pd.DataFrame(
            data=[[2, 3, 0, 1],
                  [0, 2, 3, 1]],
            columns=['feature_' + str(i) for i in range(4)],
            index=['raw_1', 'raw_2']

        s_ord, x_ord, s_dict = rank_contributions(dataframe_s, dataframe_x)

        assert np.array_equal(s_ord.values, expected_s_ord.values)
        assert np.array_equal(x_ord.values, expected_x_ord.values)
        assert np.array_equal(s_dict.values, expected_s_dict.values)

        assert list(s_ord.columns) == list(expected_s_ord.columns)
        assert list(x_ord.columns) == list(expected_x_ord.columns)
        assert list(s_dict.columns) == list(expected_s_dict.columns)

        assert pd.Index.equals(s_ord.index, expected_s_ord.index)
        assert pd.Index.equals(x_ord.index, expected_x_ord.index)
        assert pd.Index.equals(s_dict.index, expected_s_dict.index)
    def test_rank_contributions_1(self):
        Unit test rank contributions 1
        model = RandomForestClassifier(n_estimators=3)
        model.fit(self.x_train, self.y_train)
        explainer = shap.TreeExplainer(model)
        shap_values = explainer.shap_values(self.x_test)
        slist = [
                         columns=self.x_test.columns) for tab in shap_values

        for i in range(3):
            s_ord, x_ord, s_dict = rank_contributions(
                slist[i], pd.DataFrame(data=self.x_test))
            assert np.all(np.diff(np.abs(s_ord), axis=1) <= 0) == 1
            assert np.array_equal(
                np.take_along_axis(self.x_test.values, s_dict.values, axis=1))
    def rank_contributions(self, contributions, x_pred):
        Rank contributions line by line and build a reference dictionary to the prediction set.

        contributions : pandas.DataFrame
            Local contributions to sort.
        x_pred : pandas.DataFrame
            Prediction set.

            Local contributions sorted by decreasing absolute values.
            Input features sorted by decreasing contributions absolute values.
            Input features names sorted for each observation
            by decreasing contributions absolute values.
        return rank_contributions(contributions, x_pred)
    def summarize(self, use_groups=None):
        The summarize method allows to display the summary of local explainability.
        This method can be configured with modify_mask method to summarize the explainability to suit needs.

        If the user doesn't use modify_mask, the summarize method uses the mask_params parameters specified during
        the initialisation of the SmartPredictor.

        In classification case, The summarize method summarizes the explainability which corresponds to :
            - the predicted values specified by the user or automatically computed (with add_input method)
            - the right probabilities from predict_proba associated to the right predicted values
            - the right contributions ranked and filtered as specify with modify_mask method

        use_groups : bool (optional)
            Whether or not to compute groups of features contributions.

            - selected explanation of each row for classification case

        >>> summary_df = predictor.summarize()
        >>> summary_df
        	pred	proba	    feature_1	value_1	    contribution_1	feature_2	value_2	    contribution_2
        0	0	    0.756416	Sex	        1.0	        0.322308	    Pclass	    3.0	        0.155069
        1	3	    0.628911	Sex	        2.0	        0.585475	    Pclass	    1.0	        0.370504
        2	0	    0.543308	Sex	        2.0	        -0.486667	    Pclass	    3.0	        0.255072

        >>> predictor.modify_mask(max_contrib=1)
        >>> summary_df = predictor.summarize()
        >>> summary_df
        	pred	proba	    feature_1	value_1	    contribution_1
        0	0	    0.756416	Sex	        1.0	        0.322308
        1	3	    0.628911	Sex	        2.0	        0.585475
        2	0	    0.543308	Sex	        2.0	        -0.486667
        # data is needed : add_input() method must be called at least once
        use_groups = True if (use_groups is not False
                              and self.features_groups is not None) else False

        if not hasattr(self, "data"):
            raise ValueError(
                "You have to specify dataset x and y_pred arguments. Please use add_input() method."

        if use_groups is True:
            data = self.data_groups
            data = self.data

        if self._drop_option is not None:
            columns_to_keep = [
                x for x in self._drop_option["columns_dict_op"].values()
                if x in data["x_postprocessed"].columns
            if use_groups:
                columns_to_keep += list(self.features_groups.keys())
            x_preprocessed = data["x_postprocessed"][columns_to_keep]
            x_preprocessed = data["x_postprocessed"]

        columns_dict = {i: col for i, col in enumerate(x_preprocessed.columns)}
        features_dict = {
            k: v
            for k, v in self.features_dict.items()
            if k in x_preprocessed.columns

        self.summary = assign_contributions(
            rank_contributions(data["contributions"], x_preprocessed))
        # Apply filter method with mask_params attributes parameters

        # Summarize information
        data['summary'] = summarize(self.summary['contrib_sorted'],
                                    self.summary['x_sorted'], self.mask,
                                    columns_dict, features_dict)

        # Matching with y_pred
        return pd.concat([data["ypred"], data['summary']], axis=1)