def explain_instance(self, timeseries, classifier_fn, training_set, num_slices, labels=(1, ), top_labels=None, num_features=10, num_samples=5000, distance_metric='cosine', model_regressor=None, replacement_method='mean'): """Generates explanations for a prediction. Args: time_series: Time Series to be explained. classifier_fn: classifier prediction probability function num_slices: Defines into how many slices the series will be split up labels: iterable with labels to be explained. top_labels: if not None, ignore labels and produce explanations for the K labels with highest prediction probabilities, where K is this parameter. num_features: maximum number of features present in explanation num_samples: size of the neighborhood to learn the linear model distance_metric: the distance metric to use for sample weighting, defaults to cosine similarity model_regressor: sklearn regressor to use in explanation. Defaults to Ridge regression in LimeBase. Must have model_regressor.coef_ and 'sample_weight' as a parameter to model_regressor.fit() Returns: An Explanation object (see explanation.py) with the corresponding explanations. """ domain_mapper = explanation.DomainMapper() data, yss, distances = self.__data_labels_distances( timeseries, classifier_fn, num_samples, num_slices, training_set, replacement_method) if self.class_names is None: self.class_names = [str(x) for x in range(yss[0].shape[0])] ret_exp = explanation.Explanation(domain_mapper=domain_mapper, class_names=self.class_names) ret_exp.predict_proba = yss[0] for label in labels: (ret_exp.intercept[int(label)], ret_exp.local_exp[int(label)], ret_exp.score, ret_exp.local_pred) = self.base.explain_instance_with_data( data, yss, distances, label, num_features, feature_selection=self.feature_selection) ret_exp.local_exp = { k: [(int(j1), float(j2)) for j1, j2 in v] for k, v in ret_exp.local_exp.items() } return ret_exp
def explain_instance(self, text_instance, classifier_fn, labels=(1, ), top_labels=None, num_features=10, num_samples=5000, distance_metric='cosine', model_regressor=None): """This basically just a copy of :class:`LimeTextExplainer` with our custom implementation of :class:`IndexedString`. """ indexed_string = IndexedString(text_instance, bow=self.bow, split_expression=self.split_expression) domain_mapper = TextDomainMapper(indexed_string) data, yss, distances = self.__data_labels_distances( indexed_string, classifier_fn, num_samples, distance_metric=distance_metric) if self.class_names is None: self.class_names = [str(x) for x in range(yss[0].shape[0])] ret_exp = explanation.Explanation(domain_mapper=domain_mapper, class_names=self.class_names, random_state=self.random_state) ret_exp.predict_proba = yss[0] if top_labels: labels = np.argsort(yss[0])[-top_labels:] ret_exp.top_labels = list(labels) ret_exp.top_labels.reverse() for label in labels: (ret_exp.intercept[label], ret_exp.local_exp[label], ret_exp.score, ret_exp.local_pred) = self.base.explain_instance_with_data( data, yss, distances, label, num_features, model_regressor=model_regressor, feature_selection=self.feature_selection) return ret_exp
def explain_instance(self, timeseries_instance, classifier_fn, num_slices, labels=(1, ), top_labels=None, num_features=10, num_samples=5000, model_regressor=None, replacement_method='mean'): """Generates explanations for a prediction. First, we generate neighborhood data by randomly hiding features from the instance (see __data_labels_distance_mapping). We then learn locally weighted linear models on this neighborhood data to explain each of the classes in an interpretable way (see lime_base.py). As distance function DTW metric is used. Args: time_series_instance: time series to be explained. classifier_fn: classifier prediction probability function, which takes a list of d arrays with time series values and outputs a (d, k) numpy array with prediction probabilities, where k is the number of classes. For ScikitClassifiers , this is classifier.predict_proba. num_slices: Defines into how many slices the time series will be split up labels: iterable with labels to be explained. top_labels: if not None, ignore labels and produce explanations for the K labels with highest prediction probabilities, where K is this parameter. num_features: maximum number of features present in explanation num_samples: size of the neighborhood to learn the linear model distance_metric: the distance metric to use for sample weighting, defaults to cosine similarity model_regressor: sklearn regressor to use in explanation. Defaults to Ridge regression in LimeBase. Must have model_regressor.coef_ and 'sample_weight' as a parameter to model_regressor.fit() Returns: An Explanation object (see explanation.py) with the corresponding explanations. """ permutations, predictions, distances = self.__data_labels_distances( timeseries_instance, classifier_fn, num_samples, num_slices, replacement_method) is_multivariate = len(timeseries_instance.shape) > 1 if self.class_names is None: self.class_names = [str(x) for x in range(predictions[0].shape[0])] domain_mapper = TSDomainMapper(self.signal_names, num_slices, is_multivariate) ret_exp = explanation.Explanation(domain_mapper=domain_mapper, class_names=self.class_names) ret_exp.predict_proba = predictions[0] if top_labels: labels = np.argsort(predictions[0])[-top_labels:] ret_exp.top_labels = list(predictions) ret_exp.top_labels.reverse() for label in labels: (ret_exp.intercept[int(label)], ret_exp.local_exp[int(label)], ret_exp.score, ret_exp.local_pred) = self.base.explain_instance_with_data( permutations, predictions, distances, label, num_features, model_regressor=model_regressor, feature_selection=self.feature_selection) return ret_exp
def explain_instance(self, data_row, predict_fn, labels=(1, ), top_labels=None, num_features=10, num_samples=5000, distance_metric='euclidean', model_regressor=None): """Generates explanations for a prediction. First, we generate neighborhood data by randomly perturbing features from the instance (see __data_inverse). We then learn locally weighted linear models on this neighborhood data to explain each of the classes in an interpretable way (see lime_base.py). Args: data_row: 1d numpy array or scipy.sparse matrix, corresponding to a row predict_fn: prediction function. For classifiers, this should be a function that takes a numpy array and outputs prediction probabilities. For regressors, this takes a numpy array and returns the predictions. For ScikitClassifiers, this is `classifier.predict_proba()`. For ScikitRegressors, this is `regressor.predict()`. The prediction function needs to work on multiple feature vectors (the vectors randomly perturbed from the data_row). labels: iterable with labels to be explained. top_labels: if not None, ignore labels and produce explanations for the K labels with highest prediction probabilities, where K is this parameter. num_features: maximum number of features present in explanation num_samples: size of the neighborhood to learn the linear model distance_metric: the distance metric to use for weights. model_regressor: sklearn regressor to use in explanation. Defaults to Ridge regression in LimeBase. Must have model_regressor.coef_ and 'sample_weight' as a parameter to model_regressor.fit() Returns: An Explanation object (see explanation.py) with the corresponding explanations. """ if sp.sparse.issparse( data_row) and not sp.sparse.isspmatrix_csr(data_row): # Preventative code: if sparse, convert to csr format if not in csr format already data_row = data_row.tocsr() data, inverse = self.__data_inverse(data_row, num_samples) if sp.sparse.issparse(data): # Note in sparse case we don't subtract mean since data would become dense scaled_data = data.multiply(self.scaler.scale_) # Multiplying with csr matrix can return a coo sparse matrix if not sp.sparse.isspmatrix_csr(scaled_data): scaled_data = scaled_data.tocsr() else: scaled_data = (data - self.scaler.mean_) / self.scaler.scale_ distances = sklearn.metrics.pairwise_distances( scaled_data, scaled_data[0].reshape(1, -1), metric=distance_metric).ravel() yss = predict_fn(inverse) # for classification, the model needs to provide a list of tuples - classes # along with prediction probabilities if self.mode == "classification": if len(yss.shape) == 1: raise NotImplementedError( "LIME does not currently support " "classifier models without probability " "scores. If this conflicts with your " "use case, please let us know: " "https://github.com/datascienceinc/lime/issues/16") elif len(yss.shape) == 2: if self.class_names is None: self.class_names = [str(x) for x in range(yss[0].shape[0])] else: self.class_names = list(self.class_names) if not np.allclose(yss.sum(axis=1), 1.0): warnings.warn(""" Prediction probabilties do not sum to 1, and thus does not constitute a probability space. Check that you classifier outputs probabilities (Not log probabilities, or actual class predictions). """) else: raise ValueError("Your model outputs " "arrays with {} dimensions".format( len(yss.shape))) # for regression, the output should be a one-dimensional array of predictions else: try: if len(yss.shape) != 1 and len(yss[0].shape) == 1: yss = np.array([v[0] for v in yss]) assert isinstance(yss, np.ndarray) and len(yss.shape) == 1 except AssertionError: raise ValueError( "Your model needs to output single-dimensional \ numpyarrays, not arrays of {} dimensions".format( yss.shape)) predicted_value = yss[0] min_y = min(yss) max_y = max(yss) # add a dimension to be compatible with downstream machinery yss = yss[:, np.newaxis] feature_names = copy.deepcopy(self.feature_names) if feature_names is None: feature_names = [str(x) for x in range(data_row.shape[0])] if sp.sparse.issparse(data_row): values = self.convert_and_round(data_row.data) feature_indexes = data_row.indices else: values = self.convert_and_round(data_row) feature_indexes = None for i in self.categorical_features: if self.discretizer is not None and i in self.discretizer.lambdas: continue name = int(data_row[i]) if i in self.categorical_names: name = self.categorical_names[i][name] feature_names[i] = '%s=%s' % (feature_names[i], name) values[i] = 'True' categorical_features = self.categorical_features discretized_feature_names = None if self.discretizer is not None: categorical_features = range(data.shape[1]) discretized_instance = self.discretizer.discretize(data_row) discretized_feature_names = copy.deepcopy(feature_names) for f in self.discretizer.names: discretized_feature_names[f] = self.discretizer.names[f][int( discretized_instance[f])] domain_mapper = TableDomainMapper( feature_names, values, scaled_data[0], categorical_features=categorical_features, discretized_feature_names=discretized_feature_names, feature_indexes=feature_indexes) ret_exp = explanation.Explanation(domain_mapper, mode=self.mode, class_names=self.class_names) if self.mode == "classification": ret_exp.predict_proba = yss[0] if top_labels: labels = np.argsort(yss[0])[-top_labels:] ret_exp.top_labels = list(labels) ret_exp.top_labels.reverse() else: ret_exp.predicted_value = predicted_value ret_exp.min_value = min_y ret_exp.max_value = max_y labels = [0] for label in labels: (ret_exp.intercept[label], ret_exp.local_exp[label], ret_exp.score, ret_exp.local_pred) = self.base.explain_instance_with_data( scaled_data, yss, distances, label, num_features, model_regressor=model_regressor, feature_selection=self.feature_selection) if self.mode == "regression": ret_exp.intercept[1] = ret_exp.intercept[0] ret_exp.local_exp[1] = [x for x in ret_exp.local_exp[0]] ret_exp.local_exp[0] = [(i, -1 * j) for i, j in ret_exp.local_exp[1]] import pandas as pd self.lime_preds = yss return ret_exp
def explain_instance(self, instance, rec_model, neighborhood_entity, labels=(1, ), num_features=10, num_samples=50, distance_metric='cosine', model_regressor=None): # get neighborhood neighborhood_df = self.generate_neighborhood(instance, neighborhood_entity, num_samples) # compute distance based on interpretable format data, _ = Dataset.convert_to_pyfm_format( neighborhood_df, columns=rec_model.one_hot_columns) distances = sklearn.metrics.pairwise_distances( data, data[0].reshape(1, -1), metric=distance_metric).ravel() # get predictions from original complex model yss = np.array(rec_model.predict(neighborhood_df)) # for classification, the model needs to provide a list of tuples - classes along with prediction probabilities if self.mode == "classification": raise NotImplementedError( "LIME-RS does not currently support classifier models.") # for regression, the output should be a one-dimensional array of predictions else: try: assert isinstance(yss, np.ndarray) and len(yss.shape) == 1 except AssertionError: raise ValueError( "Your model needs to output single-dimensional \ numpyarrays, not arrays of {} dimensions".format( yss.shape)) predicted_value = yss[0] min_y = min(yss) max_y = max(yss) # add a dimension to be compatible with downstream machinery yss = yss[:, np.newaxis] ret_exp = explanation.Explanation(domain_mapper=None, mode=self.mode, class_names=self.class_names) if self.mode == "classification": raise NotImplementedError( "LIME-RS does not currently support classifier models.") else: ret_exp.predicted_value = predicted_value ret_exp.min_value = min_y ret_exp.max_value = max_y labels = [0] for label in labels: (ret_exp.intercept[label], ret_exp.local_exp[label], ret_exp.score, ret_exp.local_pred) = self.base.explain_instance_with_data( data, yss, distances, label, num_features, model_regressor=model_regressor, feature_selection=self.feature_selection) return ret_exp
def explain_instance(self, text_instance, classifier_fn, labels=(1, ), top_labels=None, num_features=10, num_samples=5000, distance_metric='cosine', model_regressor=None, care_words=None, spans=(2, ), include_original_feature=True): """Generates explanations for a prediction. First, we generate neighborhood data by randomly hiding features from the instance (see __data_labels_distance_mapping). We then learn locally weighted linear models on this neighborhood data to explain each of the classes in an interpretable way (see lime_base.py). Args: text_instance: raw text string to be explained. classifier_fn: classifier prediction probability function, which takes a list of d strings and outputs a (d, k) numpy array with prediction probabilities, where k is the number of classes. For ScikitClassifiers , this is classifier.predict_proba. labels: iterable with labels to be explained. top_labels: if not None, ignore labels and produce explanations for the K labels with highest prediction probabilities, where K is this parameter. num_features: maximum number of features present in explanation num_samples: size of the neighborhood to learn the linear model distance_metric: the distance metric to use for sample weighting, defaults to cosine similarity model_regressor: sklearn regressor to use in explanation. Defaults to Ridge regression in LimeBase. Must have model_regressor.coef_ and 'sample_weight' as a parameter to model_regressor.fit() Returns: An Explanation object (see explanation.py) with the corresponding explanations. """ self.care_words = care_words self.spans = spans self.include_original_feature = include_original_feature indexed_string = (IndexedCharacters( text_instance, bow=self.bow, mask_string=self.mask_string) if self.char_level else IndexedString( text_instance, bow=self.bow, split_expression=self.split_expression, mask_string=self.mask_string)) domain_mapper = TextDomainMapper(indexed_string) data, yss, distances = self.__data_labels_distances( indexed_string, classifier_fn, num_samples, distance_metric=distance_metric) if self.class_names is None: self.class_names = [str(x) for x in range(yss[0].shape[0])] ret_exp = explanation.Explanation(domain_mapper=domain_mapper, class_names=self.class_names, random_state=self.random_state) ret_exp.predict_proba = yss[0] if top_labels: labels = np.argsort(yss[0])[-top_labels:] ret_exp.top_labels = list(labels) ret_exp.top_labels.reverse() for label in labels: (ret_exp.intercept[label], ret_exp.local_exp[label], ret_exp.score, ret_exp.local_pred) = self.base.explain_instance_with_data( data, yss, distances, label, num_features, model_regressor=model_regressor, feature_selection=self.feature_selection) return ret_exp