Пример #1
0
    def setup_visualization_input(self, classes, predicted_y, list_dataset,
                                  true_y, features):
        if classes is not None:
            classes = convert_to_list(classes)
            self.dashboard_input[
                ExplanationDashboardInterface.CLASS_NAMES] = classes
            class_to_index = {k: v for v, k in enumerate(classes)}

        if predicted_y is not None:
            # If classes specified, convert predicted_y to
            # numeric representation
            if classes is not None and predicted_y[0] in class_to_index:
                for i in range(len(predicted_y)):
                    predicted_y[i] = class_to_index[predicted_y[i]]
            self.dashboard_input[
                ExplanationDashboardInterface.PREDICTED_Y] = predicted_y

        row_length = 0
        feature_length = None

        if list_dataset is not None:
            row_length, feature_length = np.shape(list_dataset)
            if feature_length > 1000:
                raise ValueError("Exceeds maximum number of features for"
                                 " visualization (1000). Please regenerate the"
                                 " explanation using fewer features or"
                                 " initialize the dashboard without passing a"
                                 " dataset.")
            self.dashboard_input[ExplanationDashboardInterface.
                                 TRAINING_DATA] = serialize_json_safe(
                                     list_dataset)

        if true_y is not None and len(true_y) == row_length:
            list_true_y = convert_to_list(true_y)
            # If classes specified, convert true_y to numeric representation
            if classes is not None and list_true_y[0] in class_to_index:
                for i in range(len(list_true_y)):
                    list_true_y[i] = class_to_index[list_true_y[i]]
            self.dashboard_input[
                ExplanationDashboardInterface.TRUE_Y] = list_true_y

        if features is not None:
            features = convert_to_list(features)
            if feature_length is not None and len(features) != feature_length:
                raise ValueError("Feature vector length mismatch:"
                                 " feature names length differs"
                                 " from local explanations dimension")
            self.dashboard_input[FEATURE_NAMES] = features
Пример #2
0
    def test_index_to_list(self):
        input_index = pd.Index(data=[[0, 4], [1, 5], [2, 6]])
        expected_list = [[0, 4], [1, 5], [2, 6]]
        input_as_list = convert_to_list(input_index)

        assert input_as_list is not None
        assert input_as_list == expected_list
Пример #3
0
    def test_series_to_list(self):
        input_series = pd.Series(data=[[0, 4], [1, 5], [2, 6]])
        expected_list = [[0, 4], [1, 5], [2, 6]]
        input_as_list = convert_to_list(input_series)

        assert input_as_list is not None
        assert input_as_list == expected_list
Пример #4
0
    def test_list_to_list(self):
        input_list = [[0, 4], [1, 5], [2, 6]]
        expected_list = [[0, 4], [1, 5], [2, 6]]
        input_as_list = convert_to_list(input_list)

        assert input_as_list is not None
        assert input_as_list == expected_list
Пример #5
0
    def test_array_to_list(self):
        input_array = np.array([[0, 4], [1, 5], [2, 6]])
        expected_list = [[0, 4], [1, 5], [2, 6]]
        input_as_list = convert_to_list(input_array)

        assert input_as_list is not None
        assert input_as_list == expected_list
Пример #6
0
 def predicted_y_to_list(self, predicted_y):
     try:
         predicted_y = convert_to_list(predicted_y)
     except Exception as ex:
         ex_str = _format_exception(ex)
         raise ValueError("Model prediction output of unsupported type,"
                          "inner error: {}".format(ex_str))
     return predicted_y
 def on_predict(self, data):
     try:
         if self._dataframeColumns is not None:
             data = pd.DataFrame(data, columns=self._dataframeColumns)
             data = data.astype(dict(self._dfdtypes))
         if (self._is_classifier):
             prediction = convert_to_list(self._model.predict_proba(data),
                                          EXP_VIZ_ERR_MSG)
         else:
             prediction = convert_to_list(self._model.predict(data),
                                          EXP_VIZ_ERR_MSG)
         return {WidgetRequestResponseConstants.DATA: prediction}
     except Exception:
         return {
             WidgetRequestResponseConstants.ERROR: "Model threw exception"
             " while predicting...",
             WidgetRequestResponseConstants.DATA: []
         }
Пример #8
0
    def test_pandas_dataframe_to_list(self):
        input_dataframe = pd.DataFrame.from_dict({
            "a": [0, 1, 2],
            "b": [4, 5, 6]
        })
        expected_list = [[0, 4], [1, 5], [2, 6]]
        input_as_list = convert_to_list(input_dataframe)

        assert input_as_list is not None
        assert input_as_list == expected_list
Пример #9
0
    def test_csr_matrix_to_list(self):
        input_sparse_matrix = csr_matrix((3, 10000), dtype=np.int8)
        with pytest.raises(ValueError) as ve:
            convert_to_list(input_sparse_matrix)
        assert "Exceeds maximum number of features for " + \
            "visualization (1000)" in str(ve.value)

        with pytest.raises(ValueError) as ve:
            convert_to_list(input_sparse_matrix,
                            custom_err_msg="Error occurred")
        assert "Error occurred" in str(ve.value)

        row = np.array([0, 0, 1, 2, 2, 2])
        col = np.array([0, 2, 2, 0, 1, 2])
        data = np.array([1, 2, 3, 4, 5, 6])
        sparse_matrix = csr_matrix((data, (row, col)), shape=(3, 3))
        expected_list = [[1, 0, 2], [0, 0, 3], [4, 5, 6]]
        input_as_list = convert_to_list(sparse_matrix)

        assert input_as_list is not None
        assert input_as_list == expected_list
 def on_predict(self, data):
     try:
         data = pd.DataFrame(
             data, columns=self.dashboard_input.dataset.feature_names)
         if (self._is_classifier):
             prediction = convert_to_list(
                 self._analysis.model.predict_proba(data), EXP_VIZ_ERR_MSG)
         else:
             prediction = convert_to_list(
                 self._analysis.model.predict(data), EXP_VIZ_ERR_MSG)
         return {WidgetRequestResponseConstants.data: prediction}
     except Exception as e:
         print(e)
         traceback.print_exc()
         e_str = _format_exception(e)
         return {
             WidgetRequestResponseConstants.error:
             "Model threw exception"
             " while predicting..."
             "inner error: {}".format(e_str),
             WidgetRequestResponseConstants.data: []
         }
Пример #11
0
 def setup_pyspark(self, model, dataset, true_y, classes, features,
                   categorical_features, true_y_dataset, pred_y,
                   pred_y_dataset, model_task, metric, max_depth,
                   num_leaves, min_child_samples, sample_dataset,
                   model_available):
     self._error_analyzer = ModelAnalyzer(model, dataset, true_y, features,
                                          categorical_features, model_task,
                                          metric, classes)
     sample = dataset.to_spark().limit(100)
     scored_sample = model.transform(sample)
     pd_sample = scored_sample.toPandas()
     predicted_y = pd_sample["prediction"]
     predicted_y = self.predicted_y_to_list(predicted_y)
     true_y = pd_sample[true_y]
     pd_sample = pd_sample[features]
     list_dataset = convert_to_list(pd_sample)
     self.setup_visualization_input(classes, predicted_y, list_dataset,
                                    true_y, features)
    def _save_metadata(self, path):
        """Save the metadata like target column, categorical features,
           task type and the classes (if any).

        :param path: The directory path to save the RAIInsights to.
        :type path: str
        """
        top_dir = Path(path)
        classes = convert_to_list(self._classes)
        feature_metadata_dict = None
        if self._feature_metadata is not None:
            feature_metadata_dict = self._feature_metadata.to_dict()
        meta = {
            _TARGET_COLUMN: self.target_column,
            _TASK_TYPE: self.task_type,
            _CATEGORICAL_FEATURES: self.categorical_features,
            _CLASSES: classes,
            _FEATURE_COLUMNS: self._feature_columns,
            _FEATURE_RANGES: self._feature_ranges,
            _FEATURE_METADATA: feature_metadata_dict
        }
        with open(top_dir / _META_JSON, 'w') as file:
            json.dump(meta, file)
    def _get_dataset(self):
        dashboard_dataset = Dataset()
        dashboard_dataset.task_type = self.task_type
        dashboard_dataset.categorical_features = self.categorical_features
        dashboard_dataset.class_names = convert_to_list(self._classes)

        if self._feature_metadata is not None:
            dashboard_dataset.feature_metadata = \
                self._feature_metadata.to_dict()
        else:
            dashboard_dataset.feature_metadata = None

        dashboard_dataset.data_balance_measures = \
            self._data_balance_manager.get_data()

        predicted_y = None
        feature_length = None

        dataset: pd.DataFrame = self.test.drop([self.target_column], axis=1)

        if isinstance(dataset, pd.DataFrame) and hasattr(dataset, 'columns'):
            self._dataframeColumns = dataset.columns
        try:
            list_dataset = convert_to_list(dataset)
        except Exception as ex:
            raise ValueError("Unsupported dataset type") from ex
        if dataset is not None and self.model is not None:
            try:
                predicted_y = self.model.predict(dataset)
            except Exception as ex:
                msg = "Model does not support predict method for given"
                "dataset type"
                raise ValueError(msg) from ex
            try:
                predicted_y = convert_to_list(predicted_y)
            except Exception as ex:
                raise ValueError(
                    "Model prediction output of unsupported type,") from ex
        if predicted_y is not None:
            if (self.task_type == "classification"
                    and dashboard_dataset.class_names is not None):
                predicted_y = [
                    dashboard_dataset.class_names.index(y) for y in predicted_y
                ]
            dashboard_dataset.predicted_y = predicted_y
        row_length = 0

        if list_dataset is not None:
            row_length, feature_length = np.shape(list_dataset)
            if row_length > 100000:
                raise ValueError("Exceeds maximum number of rows"
                                 "for visualization (100000)")
            if feature_length > 1000:
                raise ValueError("Exceeds maximum number of features for"
                                 " visualization (1000). Please regenerate the"
                                 " explanation using fewer features or"
                                 " initialize the dashboard without passing a"
                                 " dataset.")
            dashboard_dataset.features = list_dataset

        true_y = self.test[self.target_column]

        if true_y is not None and len(true_y) == row_length:
            if (self.task_type == "classification"
                    and dashboard_dataset.class_names is not None):
                true_y = [
                    dashboard_dataset.class_names.index(y) for y in true_y
                ]
            dashboard_dataset.true_y = convert_to_list(true_y)

        features = dataset.columns

        if features is not None:
            features = convert_to_list(features)
            if feature_length is not None and len(features) != feature_length:
                raise ValueError("Feature vector length mismatch:"
                                 " feature names length differs"
                                 " from local explanations dimension")
            dashboard_dataset.feature_names = features
        dashboard_dataset.target_column = self.target_column
        if is_classifier(self.model) and dataset is not None:
            try:
                probability_y = self.model.predict_proba(dataset)
            except Exception as ex:
                raise ValueError("Model does not support predict_proba method"
                                 " for given dataset type,") from ex
            try:
                probability_y = convert_to_list(probability_y)
            except Exception as ex:
                raise ValueError(
                    "Model predict_proba output of unsupported type,") from ex
            dashboard_dataset.probability_y = probability_y

        return dashboard_dataset
Пример #14
0
    def __init__(self,
                 *,
                 sensitive_features,
                 y_true,
                 y_pred,
                 locale=None,
                 public_ip=None,
                 port=None,
                 fairness_metric_module=None,
                 fairness_metric_mapping=None):
        """Initialize the fairness dashboard."""

        metrics_module = FairnessMetricModule(
            module_name=fairness_metric_module,
            mapping=fairness_metric_mapping)

        if sensitive_features is None or y_true is None or y_pred is None:
            raise ValueError("Required parameters not provided")

        model_dict = convert_to_string_list_dict("Model {0}", y_pred, y_true)
        sf_dict = convert_to_string_list_dict("Sensitive Feature {0}",
                                              sensitive_features, y_true)

        # Make sure that things are as the TS layer expects
        self._y_true = convert_to_list(y_true)
        self._y_pred = list(model_dict.values())
        # Note transpose in the following
        dataset = (np.array(list(sf_dict.values())).T).tolist()

        if np.shape(self._y_true)[0] != np.shape(self._y_pred)[1]:
            raise ValueError("Predicted y does not match true y shape")

        if np.shape(self._y_true)[0] != np.shape(dataset)[0]:
            raise ValueError("Sensitive features shape does not match true y "
                             "shape")

        fairness_input = {
            "true_y": self._y_true,
            "model_names": list(model_dict.keys()),
            "predicted_ys": self._y_pred,
            "features": list(sf_dict.keys()),
            "dataset": dataset,
            "classification_methods": metrics_module.classification_methods,
            "regression_methods": metrics_module.regression_methods,
            "probability_methods": metrics_module.probability_methods,
        }

        super(FairnessDashboard, self).__init__(dashboard_type="Fairness",
                                                model_data=fairness_input,
                                                public_ip=public_ip,
                                                port=port,
                                                locale=locale)

        self.fairness_metrics_module = metrics_module

        def metrics():
            """
            Note:
                This function always calculates the error_function,
                if available, so that the value is cached in the MetricsCache

            Request attributes:
                binVector: the sensitive features binning vector
                metricKey: the metricKey that corresponds to the function that
                    will be calculated
                modelIndex: the model index used to index the predicted y's
                    by that model
            """
            try:
                data = request.get_json(force=True)

                if type(data["binVector"][0]) == np.int32:
                    data['binVector'] = [
                        str(bin_) for bin_ in data['binVector']
                    ]

                metric_name = data['metricKey']
                error_function_name = f"{metric_name} bounds"
                metric_function = \
                    self.fairness_metrics_module._metric_methods.get(
                        data["metricKey"]).get("function")
                metric_method = {metric_name: metric_function}
                error_function = \
                    self.fairness_metrics_module._metric_methods.get(
                        data["metricKey"]).get("error_function")
                if error_function is not None:
                    metric_method.update({error_function_name: error_function})

                metric_frame = self.fairness_metrics_module.MetricFrame(
                    metrics=metric_method,
                    y_true=self.model_data['true_y'],
                    y_pred=self.model_data['predicted_ys'][data["modelIndex"]],
                    sensitive_features=data["binVector"])

                result = {
                    "data": {
                        "global":
                        metric_frame.overall[metric_name],
                        "bins":
                        list([
                            entry for entry in list(
                                metric_frame.by_group.to_dict().values())
                            if not isinstance(entry[0], tuple)
                        ][0].values()),
                    }
                }
                if error_function_name in metric_method:
                    result["data"].update({
                        "bounds": {
                            "lower":
                            metric_frame.overall[error_function_name][0],
                            "upper":
                            metric_frame.overall[error_function_name][1],
                        },
                        # [(x1, y1), (x2, y2), (x3, y3)...]
                        "binBounds": [{
                            "lower": bounds[0],
                            "upper": bounds[1]
                        }
                            for bounds in list(
                                metric_frame.by_group[error_function_name]\
                                .to_dict().values())]
                    })
                return jsonify(result)
            except Exception as ex:
                import sys
                import traceback
                exc_type, exc_value, exc_traceback = sys.exc_info()

                return jsonify({
                    "error":
                    str(ex),
                    "stacktrace":
                    str(
                        repr(
                            traceback.format_exception(exc_type, exc_value,
                                                       exc_traceback))),
                    "locals":
                    str(locals()),
                })

        self.add_url_rule(metrics, '/metrics', methods=["POST"])
    def _get_interpret(self, explanation, evaluation_examples=None):
        interpretation = ModelExplanationData()

        # List of explanations, key of explanation type is "explanation_type"
        if explanation is not None:
            mli_explanations = explanation.data(-1)["mli"]
        else:
            mli_explanations = None
        local_explanation = self._find_first_explanation(
            ExplanationKeys.LOCAL_EXPLANATION_KEY, mli_explanations)
        global_explanation = self._find_first_explanation(
            ExplanationKeys.GLOBAL_EXPLANATION_KEY, mli_explanations)
        ebm_explanation = self._find_first_explanation(
            ExplanationKeys.EBM_GLOBAL_EXPLANATION_KEY, mli_explanations)

        if explanation is not None and hasattr(explanation, 'method'):
            interpretation.method = explanation.method

        local_dim = None

        if local_explanation is not None or global_explanation is not None\
                or ebm_explanation is not None:
            interpretation.precomputedExplanations = PrecomputedExplanations()

        if local_explanation is not None:
            try:
                local_feature_importance = FeatureImportance()
                local_feature_importance.scores = convert_to_list(
                    local_explanation["scores"])
                if np.shape(local_feature_importance.scores)[-1] > 1000:
                    raise ValueError("Exceeds maximum number of features for "
                                     "visualization (1000). Please regenerate"
                                     " the explanation using fewer features.")
                local_feature_importance.intercept = convert_to_list(
                    local_explanation["intercept"])
                # We can ignore perf explanation data.
                # Note if it is added back at any point,
                # the numpy values will need to be converted to python,
                # otherwise serialization fails.
                local_explanation["perf"] = None
                interpretation.precomputedExplanations.localFeatureImportance\
                    = local_feature_importance
            except Exception as ex:
                raise ValueError("Unsupported local explanation type") from ex

            if evaluation_examples is not None:
                _feature_length = evaluation_examples.shape[1]
                _row_length = evaluation_examples.shape[0]
                local_dim = np.shape(local_feature_importance.scores)
                if len(local_dim) != 2 and len(local_dim) != 3:
                    raise ValueError(
                        "Local explanation expected to be a 2D or 3D list")
                if (len(local_dim) == 2 and (local_dim[1] != _feature_length
                                             or local_dim[0] != _row_length)):
                    raise ValueError("Shape mismatch: local explanation"
                                     "length differs from dataset")
                if (len(local_dim) == 3 and (local_dim[2] != _feature_length
                                             or local_dim[1] != _row_length)):
                    raise ValueError("Shape mismatch: local explanation"
                                     " length differs from dataset")
        if global_explanation is not None:
            try:
                global_feature_importance = FeatureImportance()
                global_feature_importance.scores = convert_to_list(
                    global_explanation["scores"])
                if 'intercept' in global_explanation:
                    global_feature_importance.intercept\
                        = convert_to_list(
                            global_explanation["intercept"])
                interpretation.precomputedExplanations.globalFeatureImportance\
                    = global_explanation
            except Exception as ex:
                raise ValueError("Unsupported global explanation type") from ex
        if ebm_explanation is not None:
            try:
                ebm_feature_importance = EBMGlobalExplanation()
                ebm_feature_importance.feature_list\
                    = ebm_explanation["feature_list"]
                interpretation.precomputedExplanations.ebmGlobalExplanation\
                    = ebm_feature_importance

            except Exception as ex:
                raise ValueError("Unsupported ebm explanation type") from ex
        return interpretation
    def __init__(self, explanation, model, dataset, true_y, classes, features):
        """Initialize the Explanation Dashboard Input.

        :param explanation: An object that represents an explanation.
        :type explanation: ExplanationMixin
        :param model: An object that represents a model.
        It is assumed that for the classification case
            it has a method of predict_proba() returning
            the prediction probabilities for each
            class and for the regression case a method of predict()
            returning the prediction value.
        :type model: object
        :param dataset: A matrix of feature vector examples
        (# examples x # features), the same samples
            used to build the explanation.
            Will overwrite any set on explanation object already.
            Must have fewer than
            100000 rows and fewer than 1000 columns.
            Note dashboard may become slow or crash for more than 10000 rows.
        :type dataset: numpy.ndarray or list[][]
        :param true_y: The true labels for the provided dataset.
            Will overwrite any set on
            explanation object already.
        :type true_y: numpy.ndarray or list[]
        :param classes: The class names.
        :type classes: numpy.ndarray or list[]
        :param features: Feature names.
        :type features: numpy.ndarray or list[]
        """
        self._model = model
        self._is_classifier = is_classifier(model)
        self._dataframeColumns = None
        self.dashboard_input = {}
        # List of explanations, key of explanation type is "explanation_type"
        if explanation is not None:
            self._mli_explanations = explanation.data(-1)["mli"]
        else:
            self._mli_explanations = None
        local_explanation = self._find_first_explanation(
            ExplanationDashboardInterface.MLI_LOCAL_EXPLANATION_KEY)
        global_explanation = self._find_first_explanation(
            ExplanationDashboardInterface.MLI_GLOBAL_EXPLANATION_KEY)
        ebm_explanation = self._find_first_explanation(
            ExplanationDashboardInterface.MLI_EBM_GLOBAL_EXPLANATION_KEY)
        dataset_explanation = self._find_first_explanation(
            ExplanationDashboardInterface.MLI_EXPLANATION_DATASET_KEY)

        if explanation is not None and hasattr(explanation, 'method'):
            self.dashboard_input[ExplanationDashboardInterface.
                                 EXPLANATION_METHOD] = explanation.method

        predicted_y = None
        feature_length = None
        if dataset_explanation is not None:
            if dataset is None:
                dataset = dataset_explanation[
                    ExplanationDashboardInterface.MLI_DATASET_X_KEY]
            if true_y is None:
                true_y = dataset_explanation[
                    ExplanationDashboardInterface.MLI_DATASET_Y_KEY]

        if isinstance(dataset, pd.DataFrame) and hasattr(dataset, 'columns'):
            self._dataframeColumns = dataset.columns
            self._dfdtypes = dataset.dtypes
        try:
            list_dataset = convert_to_list(dataset, EXP_VIZ_ERR_MSG)
        except Exception as ex:
            ex_str = _format_exception(ex)
            raise ValueError(
                "Unsupported dataset type, inner error: {}".format(ex_str))
        if dataset is not None and model is not None:
            try:
                predicted_y = model.predict(dataset)
            except Exception as ex:
                ex_str = _format_exception(ex)
                msg = "Model does not support predict method for given"
                "dataset type, inner error: {}".format(ex_str)
                raise ValueError(msg)
            try:
                predicted_y = convert_to_list(predicted_y, EXP_VIZ_ERR_MSG)
            except Exception as ex:
                ex_str = _format_exception(ex)
                raise ValueError("Model prediction output of unsupported type,"
                                 "inner error: {}".format(ex_str))
        if predicted_y is not None:
            self.dashboard_input[
                ExplanationDashboardInterface.PREDICTED_Y] = predicted_y
        row_length = 0
        if list_dataset is not None:
            row_length, feature_length = np.shape(list_dataset)
            if row_length > 100000:
                raise ValueError("Exceeds maximum number of rows"
                                 "for visualization (100000)")
            if feature_length > 1000:
                warnings.warn("Exceeds maximum number of features for"
                              " visualization (1000)."
                              " Please regenerate the"
                              " explanation using fewer features or"
                              " initialize the dashboard without"
                              " passing a dataset. Dashboard will"
                              " show limited view.")
            else:
                self.dashboard_input[ExplanationDashboardInterface.
                                     TRAINING_DATA] = serialize_json_safe(
                                         list_dataset)
            self.dashboard_input[ExplanationDashboardInterface.
                                 IS_CLASSIFIER] = self._is_classifier

        local_dim = None

        if true_y is not None and len(true_y) == row_length:
            self.dashboard_input[
                ExplanationDashboardInterface.TRUE_Y] = convert_to_list(
                    true_y, EXP_VIZ_ERR_MSG)

        if local_explanation is not None:
            try:
                local_explanation["scores"] = convert_to_list(
                    local_explanation["scores"], EXP_VIZ_ERR_MSG)
                local_explanation["intercept"] = convert_to_list(
                    local_explanation["intercept"], EXP_VIZ_ERR_MSG)
                # We can ignore perf explanation data.
                # Note if it is added back at any point,
                # the numpy values will need to be converted to python,
                # otherwise serialization fails.
                local_explanation["perf"] = None
                self.dashboard_input[ExplanationDashboardInterface.
                                     LOCAL_EXPLANATIONS] = local_explanation
            except Exception as ex:
                ex_str = _format_exception(ex)
                raise ValueError("Unsupported local explanation type,"
                                 "inner error: {}".format(ex_str))
            if list_dataset is not None:
                local_dim = np.shape(local_explanation["scores"])
                if len(local_dim) != 2 and len(local_dim) != 3:
                    raise ValueError(
                        "Local explanation expected to be a 2D or 3D list")
                if len(local_dim) == 2 and (local_dim[1] != feature_length
                                            or local_dim[0] != row_length):
                    raise ValueError("Shape mismatch: local explanation"
                                     "length differs from dataset")
                if len(local_dim) == 3 and (local_dim[2] != feature_length
                                            or local_dim[1] != row_length):
                    raise ValueError("Shape mismatch: local explanation"
                                     " length differs from dataset")
        if local_explanation is None and global_explanation is not None:
            try:
                global_explanation["scores"] = convert_to_list(
                    global_explanation["scores"], EXP_VIZ_ERR_MSG)
                if 'intercept' in global_explanation:
                    global_explanation["intercept"] = convert_to_list(
                        global_explanation["intercept"], EXP_VIZ_ERR_MSG)
                self.dashboard_input[ExplanationDashboardInterface.
                                     GLOBAL_EXPLANATION] = global_explanation
            except Exception as ex:
                ex_str = _format_exception(ex)
                raise ValueError("Unsupported global explanation type,"
                                 "inner error: {}".format(ex_str))
        if ebm_explanation is not None:
            try:
                self.dashboard_input[ExplanationDashboardInterface.
                                     EBM_EXPLANATION] = ebm_explanation
            except Exception as ex:
                ex_str = _format_exception(ex)
                raise ValueError(
                    "Unsupported ebm explanation type: {}".format(ex_str))

        if features is None\
                and explanation is not None\
                and hasattr(explanation, 'features')\
                and explanation.features is not None:
            features = explanation.features
        if features is not None:
            features = convert_to_list(features, EXP_VIZ_ERR_MSG)
            if feature_length is not None and len(features) != feature_length:
                raise ValueError("Feature vector length mismatch:"
                                 " feature names length differs"
                                 " from local explanations dimension")
            self.dashboard_input[
                ExplanationDashboardInterface.FEATURE_NAMES] = features
        if classes is None\
                and explanation is not None\
                and hasattr(explanation, 'classes')\
                and explanation.classes is not None:
            classes = explanation.classes
        if classes is not None:
            classes = convert_to_list(classes, EXP_VIZ_ERR_MSG)
            if local_dim is not None and len(classes) != local_dim[0]:
                raise ValueError("Class vector length mismatch:"
                                 "class names length differs from"
                                 "local explanations dimension")
            self.dashboard_input[
                ExplanationDashboardInterface.CLASS_NAMES] = classes
        if is_classifier(model) and dataset is not None:
            try:
                probability_y = model.predict_proba(dataset)
            except Exception as ex:
                ex_str = _format_exception(ex)
                raise ValueError("Model does not support predict_proba method"
                                 " for given dataset type,"
                                 " inner error: {}".format(ex_str))
            try:
                probability_y = convert_to_list(probability_y, EXP_VIZ_ERR_MSG)
            except Exception as ex:
                ex_str = _format_exception(ex)
                raise ValueError(
                    "Model predict_proba output of unsupported type,"
                    "inner error: {}".format(ex_str))
            self.dashboard_input[
                ExplanationDashboardInterface.PROBABILITY_Y] = probability_y
Пример #17
0
    def input_explanation_data(self, list_dataset, classes):
        # List of explanations, key of explanation type is "explanation_type"
        local_explanation = self._find_first_explanation(
            ExplanationDashboardInterface.MLI_LOCAL_EXPLANATION_KEY)
        global_explanation = self._find_first_explanation(
            ExplanationDashboardInterface.MLI_GLOBAL_EXPLANATION_KEY)
        ebm_explanation = self._find_first_explanation(
            ExplanationDashboardInterface.MLI_EBM_GLOBAL_EXPLANATION_KEY)

        if local_explanation is not None:
            try:
                local_explanation["scores"] = convert_to_list(
                    local_explanation["scores"])
                if np.shape(local_explanation["scores"])[-1] > 1000:
                    raise ValueError("Exceeds maximum number of features for "
                                     "visualization (1000). Please regenerate"
                                     " the explanation using fewer features.")
                local_explanation["intercept"] = convert_to_list(
                    local_explanation["intercept"])
                # We can ignore perf explanation data.
                # Note if it is added back at any point,
                # the numpy values will need to be converted to python,
                # otherwise serialization fails.
                local_explanation["perf"] = None
                self.dashboard_input[ExplanationDashboardInterface.
                                     LOCAL_EXPLANATIONS] = local_explanation
            except Exception as ex:
                ex_str = _format_exception(ex)
                raise ValueError("Unsupported local explanation type,"
                                 "inner error: {}".format(ex_str))
            if list_dataset is not None:
                row_length, feature_length = np.shape(list_dataset)
                local_dim = np.shape(local_explanation["scores"])
                if len(local_dim) != 2 and len(local_dim) != 3:
                    raise ValueError(
                        "Local explanation expected to be a 2D or 3D list")
                if len(local_dim) == 2 and (local_dim[1] != feature_length
                                            or local_dim[0] != row_length):
                    raise ValueError("Shape mismatch: local explanation"
                                     "length differs from dataset")
                if len(local_dim) == 3 and (local_dim[2] != feature_length
                                            or local_dim[1] != row_length):
                    raise ValueError("Shape mismatch: local explanation"
                                     " length differs from dataset")
                if classes is not None and len(classes) != local_dim[0]:
                    raise ValueError("Class vector length mismatch:"
                                     "class names length differs from"
                                     "local explanations dimension")
        if local_explanation is None and global_explanation is not None:
            try:
                global_explanation["scores"] = convert_to_list(
                    global_explanation["scores"])
                if 'intercept' in global_explanation:
                    global_explanation["intercept"] = convert_to_list(
                        global_explanation["intercept"])
                self.dashboard_input[ExplanationDashboardInterface.
                                     GLOBAL_EXPLANATION] = global_explanation
            except Exception as ex:
                ex_str = _format_exception(ex)
                raise ValueError("Unsupported global explanation type,"
                                 "inner error: {}".format(ex_str))
        if ebm_explanation is not None:
            try:
                self.dashboard_input[ExplanationDashboardInterface.
                                     EBM_EXPLANATION] = ebm_explanation
            except Exception as ex:
                ex_str = _format_exception(ex)
                raise ValueError(
                    "Unsupported ebm explanation type: {}".format(ex_str))
Пример #18
0
    def setup_local(self, explanation, model, dataset, true_y, classes,
                    features, categorical_features, true_y_dataset, pred_y,
                    pred_y_dataset, model_task, metric, max_depth, num_leaves,
                    min_child_samples, sample_dataset, model_available):
        full_dataset = dataset
        if true_y_dataset is None:
            full_true_y = true_y
        else:
            full_true_y = true_y_dataset
        if pred_y_dataset is None:
            full_pred_y = pred_y
        else:
            full_pred_y = pred_y_dataset
        has_explanation = explanation is not None
        probability_y = None

        if has_explanation:
            if classes is None:
                has_classes_attr = hasattr(explanation, 'classes')
                if has_classes_attr and explanation.classes is not None:
                    classes = explanation.classes
            dataset, true_y = self.input_explanation(explanation, dataset,
                                                     true_y)
            row_length = len(dataset)
            # Only check dataset on explanation for row length bounds
            if row_length > 100000:
                raise ValueError("Exceeds maximum number of rows"
                                 "for visualization (100000)")
        elif sample_dataset is not None:
            dataset = sample_dataset

        if isinstance(dataset, pd.DataFrame) and hasattr(dataset, 'columns'):
            self._dataframeColumns = dataset.columns
            self._dfdtypes = dataset.dtypes
        try:
            list_dataset = convert_to_list(dataset)
        except Exception as ex:
            ex_str = _format_exception(ex)
            raise ValueError(
                "Unsupported dataset type, inner error: {}".format(ex_str))

        if has_explanation:
            self.input_explanation_data(list_dataset, classes)
            if features is None and hasattr(explanation, 'features'):
                features = explanation.features

        if model_available:
            predicted_y = self.compute_predicted_y(model, dataset)
        else:
            predicted_y = self.predicted_y_to_list(pred_y)

        self.setup_visualization_input(classes, predicted_y, list_dataset,
                                       true_y, features)

        if model_available and is_classifier(model) and \
                dataset is not None:
            try:
                probability_y = model.predict_proba(dataset)
            except Exception as ex:
                ex_str = _format_exception(ex)
                raise ValueError("Model does not support predict_proba method"
                                 " for given dataset type,"
                                 " inner error: {}".format(ex_str))
            try:
                probability_y = convert_to_list(probability_y)
            except Exception as ex:
                ex_str = _format_exception(ex)
                raise ValueError(
                    "Model predict_proba output of unsupported type,"
                    "inner error: {}".format(ex_str))
            self.dashboard_input[
                ExplanationDashboardInterface.PROBABILITY_Y] = probability_y
        if model_available:
            self._error_analyzer = ModelAnalyzer(model, full_dataset,
                                                 full_true_y, features,
                                                 categorical_features,
                                                 model_task, metric, classes)
        else:
            # Model task cannot be unknown when passing predictions
            # Assume classification for backwards compatibility
            if model_task == ModelTask.UNKNOWN:
                model_task = ModelTask.CLASSIFICATION
            self._error_analyzer = PredictionsAnalyzer(
                full_pred_y, full_dataset, full_true_y, features,
                categorical_features, model_task, metric, classes)
        if self._categorical_features:
            self.dashboard_input[ExplanationDashboardInterface.
                                 CATEGORICAL_MAP] = serialize_json_safe(
                                     self._error_analyzer.category_dictionary)
        # Compute metrics on all data cohort
        if self._error_analyzer.model_task == ModelTask.CLASSIFICATION:
            if self._error_analyzer.metric is None:
                metric = Metrics.ERROR_RATE
            else:
                metric = self._error_analyzer.metric
        else:
            if self._error_analyzer.metric is None:
                metric = Metrics.MEAN_SQUARED_ERROR
            else:
                metric = self._error_analyzer.metric
        if model_available:
            full_pred_y = self.compute_predicted_y(model, full_dataset)
        # If we don't have an explanation or model/probabilities specified
        # we can try to use model task to figure out the method
        if not has_explanation and probability_y is None:
            method = MethodConstants.REGRESSION
            if self._error_analyzer.model_task == ModelTask.CLASSIFICATION:
                if (len(np.unique(predicted_y)) > 2):
                    method = MethodConstants.MULTICLASS
                else:
                    method = MethodConstants.BINARY
            self.dashboard_input[
                ErrorAnalysisDashboardInterface.METHOD] = method