def test_serialize_json_safe_basic(self): values = [0, 1, 2, 3, 4, 5] result = serialize_json_safe(values) assert result == [0, 1, 2, 3, 4, 5] values = ['a', 'b', 'a', 'c', 'a', 'b'] result = serialize_json_safe(values) assert result == ['a', 'b', 'a', 'c', 'a', 'b']
def test_serialize_via_json_timestamp(self): timestamp_obj = pd.Timestamp(2020, 1, 1) assert isinstance(timestamp_obj, pd.Timestamp) result = json.dumps(serialize_json_safe(timestamp_obj)) assert result is not None assert "2020" in result timestamp_obj_array = np.array([pd.Timestamp(2020, 1, 1)]) result = json.dumps(serialize_json_safe(timestamp_obj_array)) assert result is not None assert "2020" in result
def test_serialize_json_safe_aggregate_types(self): o = {'a': [1, 2, 3], 'c': 'b'} result = serialize_json_safe(o) assert result == o o = ('a', [1, 2, 3]) result = serialize_json_safe(o) assert result == o values = np.array([[1, 2, 3], [4, 5, 6]]) result = serialize_json_safe(values) assert result == values.tolist()
def test_serialize_json_safe_missing(self): values = [0, np.nan, 2, 3, 4, 5] result = serialize_json_safe(values) assert result == [0, 0, 2, 3, 4, 5] values = [0, np.inf, 2, 3, 4, 5] result = serialize_json_safe(values) assert result == [0, 0, 2, 3, 4, 5] values = ['a', 'b', 'a', np.nan, 'a', 'b'] result = serialize_json_safe(values) assert result == ['a', 'b', 'a', 0, 'a', 'b']
def _get_dashboard_data(self): """Get the Python dict representation of the dashboard object.""" if self._dashboard_data is None: dashboard_object = self._get_dashboard_object() self._dashboard_data = serialize_json_safe(dashboard_object) return self._dashboard_data
def test_embedded_object(self): class A: def __init__(self): self.a_data = 'a' class B: def __init__(self): self.b_data = A() result = serialize_json_safe({'B': B()}) assert result == {'B': {'b_data': {'a_data': 'a'}}}
def json_converter(obj): """Helper function to convert ErrorReport object to a dictionary. :param obj: Object to convert to a dictionary which can be saved as json. :type obj: object :return: The converted dictionary which can be saved as json. :rtype: dict """ if isinstance(obj, ErrorReport): rdict = obj.__dict__ return rdict return serialize_json_safe(obj)
def setup_visualization_input(self, classes, predicted_y, list_dataset, true_y, features): if classes is not None: classes = convert_to_list(classes) self.dashboard_input[ ExplanationDashboardInterface.CLASS_NAMES] = classes class_to_index = {k: v for v, k in enumerate(classes)} if predicted_y is not None: # If classes specified, convert predicted_y to # numeric representation if classes is not None and predicted_y[0] in class_to_index: for i in range(len(predicted_y)): predicted_y[i] = class_to_index[predicted_y[i]] self.dashboard_input[ ExplanationDashboardInterface.PREDICTED_Y] = predicted_y row_length = 0 feature_length = None if list_dataset is not None: row_length, feature_length = np.shape(list_dataset) if feature_length > 1000: raise ValueError("Exceeds maximum number of features for" " visualization (1000). Please regenerate the" " explanation using fewer features or" " initialize the dashboard without passing a" " dataset.") self.dashboard_input[ExplanationDashboardInterface. TRAINING_DATA] = serialize_json_safe( list_dataset) if true_y is not None and len(true_y) == row_length: list_true_y = convert_to_list(true_y) # If classes specified, convert true_y to numeric representation if classes is not None and list_true_y[0] in class_to_index: for i in range(len(list_true_y)): list_true_y[i] = class_to_index[list_true_y[i]] self.dashboard_input[ ExplanationDashboardInterface.TRUE_Y] = list_true_y if features is not None: features = convert_to_list(features) if feature_length is not None and len(features) != feature_length: raise ValueError("Feature vector length mismatch:" " feature names length differs" " from local explanations dimension") self.dashboard_input[FEATURE_NAMES] = features
def __init__(self, explanation, model, dataset, true_y, classes, features): """Initialize the Explanation Dashboard Input. :param explanation: An object that represents an explanation. :type explanation: ExplanationMixin :param model: An object that represents a model. It is assumed that for the classification case it has a method of predict_proba() returning the prediction probabilities for each class and for the regression case a method of predict() returning the prediction value. :type model: object :param dataset: A matrix of feature vector examples (# examples x # features), the same samples used to build the explanation. Will overwrite any set on explanation object already. Must have fewer than 100000 rows and fewer than 1000 columns. Note dashboard may become slow or crash for more than 10000 rows. :type dataset: numpy.ndarray or list[][] :param true_y: The true labels for the provided dataset. Will overwrite any set on explanation object already. :type true_y: numpy.ndarray or list[] :param classes: The class names. :type classes: numpy.ndarray or list[] :param features: Feature names. :type features: numpy.ndarray or list[] """ self._model = model self._is_classifier = is_classifier(model) self._dataframeColumns = None self.dashboard_input = {} # List of explanations, key of explanation type is "explanation_type" if explanation is not None: self._mli_explanations = explanation.data(-1)["mli"] else: self._mli_explanations = None local_explanation = self._find_first_explanation( ExplanationDashboardInterface.MLI_LOCAL_EXPLANATION_KEY) global_explanation = self._find_first_explanation( ExplanationDashboardInterface.MLI_GLOBAL_EXPLANATION_KEY) ebm_explanation = self._find_first_explanation( ExplanationDashboardInterface.MLI_EBM_GLOBAL_EXPLANATION_KEY) dataset_explanation = self._find_first_explanation( ExplanationDashboardInterface.MLI_EXPLANATION_DATASET_KEY) if explanation is not None and hasattr(explanation, 'method'): self.dashboard_input[ExplanationDashboardInterface. EXPLANATION_METHOD] = explanation.method predicted_y = None feature_length = None if dataset_explanation is not None: if dataset is None: dataset = dataset_explanation[ ExplanationDashboardInterface.MLI_DATASET_X_KEY] if true_y is None: true_y = dataset_explanation[ ExplanationDashboardInterface.MLI_DATASET_Y_KEY] if isinstance(dataset, pd.DataFrame) and hasattr(dataset, 'columns'): self._dataframeColumns = dataset.columns self._dfdtypes = dataset.dtypes try: list_dataset = convert_to_list(dataset, EXP_VIZ_ERR_MSG) except Exception as ex: ex_str = _format_exception(ex) raise ValueError( "Unsupported dataset type, inner error: {}".format(ex_str)) if dataset is not None and model is not None: try: predicted_y = model.predict(dataset) except Exception as ex: ex_str = _format_exception(ex) msg = "Model does not support predict method for given" "dataset type, inner error: {}".format(ex_str) raise ValueError(msg) try: predicted_y = convert_to_list(predicted_y, EXP_VIZ_ERR_MSG) except Exception as ex: ex_str = _format_exception(ex) raise ValueError("Model prediction output of unsupported type," "inner error: {}".format(ex_str)) if predicted_y is not None: self.dashboard_input[ ExplanationDashboardInterface.PREDICTED_Y] = predicted_y row_length = 0 if list_dataset is not None: row_length, feature_length = np.shape(list_dataset) if row_length > 100000: raise ValueError("Exceeds maximum number of rows" "for visualization (100000)") if feature_length > 1000: warnings.warn("Exceeds maximum number of features for" " visualization (1000)." " Please regenerate the" " explanation using fewer features or" " initialize the dashboard without" " passing a dataset. Dashboard will" " show limited view.") else: self.dashboard_input[ExplanationDashboardInterface. TRAINING_DATA] = serialize_json_safe( list_dataset) self.dashboard_input[ExplanationDashboardInterface. IS_CLASSIFIER] = self._is_classifier local_dim = None if true_y is not None and len(true_y) == row_length: self.dashboard_input[ ExplanationDashboardInterface.TRUE_Y] = convert_to_list( true_y, EXP_VIZ_ERR_MSG) if local_explanation is not None: try: local_explanation["scores"] = convert_to_list( local_explanation["scores"], EXP_VIZ_ERR_MSG) local_explanation["intercept"] = convert_to_list( local_explanation["intercept"], EXP_VIZ_ERR_MSG) # We can ignore perf explanation data. # Note if it is added back at any point, # the numpy values will need to be converted to python, # otherwise serialization fails. local_explanation["perf"] = None self.dashboard_input[ExplanationDashboardInterface. LOCAL_EXPLANATIONS] = local_explanation except Exception as ex: ex_str = _format_exception(ex) raise ValueError("Unsupported local explanation type," "inner error: {}".format(ex_str)) if list_dataset is not None: local_dim = np.shape(local_explanation["scores"]) if len(local_dim) != 2 and len(local_dim) != 3: raise ValueError( "Local explanation expected to be a 2D or 3D list") if len(local_dim) == 2 and (local_dim[1] != feature_length or local_dim[0] != row_length): raise ValueError("Shape mismatch: local explanation" "length differs from dataset") if len(local_dim) == 3 and (local_dim[2] != feature_length or local_dim[1] != row_length): raise ValueError("Shape mismatch: local explanation" " length differs from dataset") if local_explanation is None and global_explanation is not None: try: global_explanation["scores"] = convert_to_list( global_explanation["scores"], EXP_VIZ_ERR_MSG) if 'intercept' in global_explanation: global_explanation["intercept"] = convert_to_list( global_explanation["intercept"], EXP_VIZ_ERR_MSG) self.dashboard_input[ExplanationDashboardInterface. GLOBAL_EXPLANATION] = global_explanation except Exception as ex: ex_str = _format_exception(ex) raise ValueError("Unsupported global explanation type," "inner error: {}".format(ex_str)) if ebm_explanation is not None: try: self.dashboard_input[ExplanationDashboardInterface. EBM_EXPLANATION] = ebm_explanation except Exception as ex: ex_str = _format_exception(ex) raise ValueError( "Unsupported ebm explanation type: {}".format(ex_str)) if features is None\ and explanation is not None\ and hasattr(explanation, 'features')\ and explanation.features is not None: features = explanation.features if features is not None: features = convert_to_list(features, EXP_VIZ_ERR_MSG) if feature_length is not None and len(features) != feature_length: raise ValueError("Feature vector length mismatch:" " feature names length differs" " from local explanations dimension") self.dashboard_input[ ExplanationDashboardInterface.FEATURE_NAMES] = features if classes is None\ and explanation is not None\ and hasattr(explanation, 'classes')\ and explanation.classes is not None: classes = explanation.classes if classes is not None: classes = convert_to_list(classes, EXP_VIZ_ERR_MSG) if local_dim is not None and len(classes) != local_dim[0]: raise ValueError("Class vector length mismatch:" "class names length differs from" "local explanations dimension") self.dashboard_input[ ExplanationDashboardInterface.CLASS_NAMES] = classes if is_classifier(model) and dataset is not None: try: probability_y = model.predict_proba(dataset) except Exception as ex: ex_str = _format_exception(ex) raise ValueError("Model does not support predict_proba method" " for given dataset type," " inner error: {}".format(ex_str)) try: probability_y = convert_to_list(probability_y, EXP_VIZ_ERR_MSG) except Exception as ex: ex_str = _format_exception(ex) raise ValueError( "Model predict_proba output of unsupported type," "inner error: {}".format(ex_str)) self.dashboard_input[ ExplanationDashboardInterface.PROBABILITY_Y] = probability_y
def setup_local(self, explanation, model, dataset, true_y, classes, features, categorical_features, true_y_dataset, pred_y, pred_y_dataset, model_task, metric, max_depth, num_leaves, min_child_samples, sample_dataset, model_available): full_dataset = dataset if true_y_dataset is None: full_true_y = true_y else: full_true_y = true_y_dataset if pred_y_dataset is None: full_pred_y = pred_y else: full_pred_y = pred_y_dataset has_explanation = explanation is not None probability_y = None if has_explanation: if classes is None: has_classes_attr = hasattr(explanation, 'classes') if has_classes_attr and explanation.classes is not None: classes = explanation.classes dataset, true_y = self.input_explanation(explanation, dataset, true_y) row_length = len(dataset) # Only check dataset on explanation for row length bounds if row_length > 100000: raise ValueError("Exceeds maximum number of rows" "for visualization (100000)") elif sample_dataset is not None: dataset = sample_dataset if isinstance(dataset, pd.DataFrame) and hasattr(dataset, 'columns'): self._dataframeColumns = dataset.columns self._dfdtypes = dataset.dtypes try: list_dataset = convert_to_list(dataset) except Exception as ex: ex_str = _format_exception(ex) raise ValueError( "Unsupported dataset type, inner error: {}".format(ex_str)) if has_explanation: self.input_explanation_data(list_dataset, classes) if features is None and hasattr(explanation, 'features'): features = explanation.features if model_available: predicted_y = self.compute_predicted_y(model, dataset) else: predicted_y = self.predicted_y_to_list(pred_y) self.setup_visualization_input(classes, predicted_y, list_dataset, true_y, features) if model_available and is_classifier(model) and \ dataset is not None: try: probability_y = model.predict_proba(dataset) except Exception as ex: ex_str = _format_exception(ex) raise ValueError("Model does not support predict_proba method" " for given dataset type," " inner error: {}".format(ex_str)) try: probability_y = convert_to_list(probability_y) except Exception as ex: ex_str = _format_exception(ex) raise ValueError( "Model predict_proba output of unsupported type," "inner error: {}".format(ex_str)) self.dashboard_input[ ExplanationDashboardInterface.PROBABILITY_Y] = probability_y if model_available: self._error_analyzer = ModelAnalyzer(model, full_dataset, full_true_y, features, categorical_features, model_task, metric, classes) else: # Model task cannot be unknown when passing predictions # Assume classification for backwards compatibility if model_task == ModelTask.UNKNOWN: model_task = ModelTask.CLASSIFICATION self._error_analyzer = PredictionsAnalyzer( full_pred_y, full_dataset, full_true_y, features, categorical_features, model_task, metric, classes) if self._categorical_features: self.dashboard_input[ExplanationDashboardInterface. CATEGORICAL_MAP] = serialize_json_safe( self._error_analyzer.category_dictionary) # Compute metrics on all data cohort if self._error_analyzer.model_task == ModelTask.CLASSIFICATION: if self._error_analyzer.metric is None: metric = Metrics.ERROR_RATE else: metric = self._error_analyzer.metric else: if self._error_analyzer.metric is None: metric = Metrics.MEAN_SQUARED_ERROR else: metric = self._error_analyzer.metric if model_available: full_pred_y = self.compute_predicted_y(model, full_dataset) # If we don't have an explanation or model/probabilities specified # we can try to use model task to figure out the method if not has_explanation and probability_y is None: method = MethodConstants.REGRESSION if self._error_analyzer.model_task == ModelTask.CLASSIFICATION: if (len(np.unique(predicted_y)) > 2): method = MethodConstants.MULTICLASS else: method = MethodConstants.BINARY self.dashboard_input[ ErrorAnalysisDashboardInterface.METHOD] = method
def test_serialize_timestamp(self): datetime_str = "2020-10-10" datetime_object = datetime.datetime.strptime(datetime_str, "%Y-%m-%d") result = serialize_json_safe(datetime_object) assert datetime_str in result
def test_unknown(self): c = complex(1, 2) result = serialize_json_safe([c, 42]) assert result == [c, 42]
def test_numpy(self): result = serialize_json_safe(np.array([1, 2, 3])) assert result == [1, 2, 3]