def init_lime(self): """ Initializes a LIME explainer that can later be used for local interpretations for this model. :return: void (Sets the value for lime_explainer) """ from lime.lime_tabular import LimeTabularExplainer from util.commons import RANDOM_NUMBER, convert_to_lime_format if not self.lime_explainer: log.info( "Initializing LIME - generating new explainer." " This operation may be time-consuming so please be patient.") # Transform the categorical feature's labels to a lime-readable format. categorical_names = self.idx2ohe log.debug( "Categorical names for lime: {}".format(categorical_names)) explainer = LimeTabularExplainer( convert_to_lime_format(self.X_test, categorical_names).values, mode="classification", feature_names=self.X_test.columns.tolist(), categorical_names=categorical_names, categorical_features=categorical_names.keys(), discretize_continuous=True, random_state=RANDOM_NUMBER) self.lime_explainer = explainer else: log.info("LIME is already initialized.")
def get_local_interpretation(ID_client, dataframe, modelname, features_importances, label): model = load_model(modelname) X = dataframe[dataframe['SK_ID_CURR'] == int(ID_client)] X = X.drop(['SK_ID_CURR', 'TARGET'], axis=1) dataframe = dataframe.drop(['SK_ID_CURR', 'TARGET'], axis=1) X_train = dataframe.sample(frac=0.1, random_state=42).values explainer = LimeTabularExplainer( training_data=X_train, mode='classification', feature_names=dataframe.columns, training_labels=dataframe.columns.tolist(), verbose=1, random_state=42) #st.write(np.array(X)) #st.write(type(np.array(X))) explanation = explainer.explain_instance( np.ravel(np.array(X)), predict_fn=model.predict_proba, labels=[0, 1], num_features=len(dataframe.columns)) #fig = explanation.as_pyplot_figure(label=label) #st.pyplot(fig) return explanation
def lime(self, instance=None, html_file=False, num_features=2): """ :param instance: :param html_file: :param num_features: :return: """ explainer = LimeTabularExplainer(self.x_train.values, mode="classification", feature_names=self.x_train.columns, class_names=['false', 'true'], training_labels=self.y_train, discretize_continuous=True) if not instance: instance = np.random.randint(0, self.x_test.shape[0]) print('Case: ' + str(instance)) print('Label: ' + str(self.y_test.iloc[instance])) exp = explainer.explain_instance(self.x_test.values[instance], self.model.predict_proba, num_features=num_features) print("Lime explanation: ") exp.as_pyplot_figure(label=1).show() if html_file: exp.save_to_file( str(instance) + "_" + str(self.y_test.iloc[instance]) + "_explain.html")
def lime(): print('Loading dataset ...') X_train, Y_train, X_val, Y_val, X_test, Y_test = get_dataset( minibatch_size=32, sampling='None', numpy='True') net = load_fraudnet() lime_list = [] explainer = LimeTabularExplainer(X_train, training_labels=Y_train) def func_call(x): print(x.shape) input_ = torch.from_numpy(x).to(device=device).float() prob_1 = net(input_).view(-1, 1).cpu().data.numpy() prob_0 = 1 - prob_1 prob = np.concatenate([prob_0, prob_1], axis=1) return prob for i in range(X_test.shape[0]): exp = explainer.explain_instance(X_test[i, :], func_call, labels=(0, 1), num_features=50) lime_list.append(exp) lime_list = np.array(lime_list) lime_list = preprocess(lime_list) pickle.dump(lime_list, open('./saved_attributions/lime.pkl', 'wb'))
def generate_neighborhood_data(self, sample, predict_fn, distance_metric='euclidean', n_samples=500, seed=1, **kwargs): '''Generate neighborhood data for a given point (currently using LIME) Args: train_data: Training data predict_fn was trained on sample: Observed sample predict_fn: Black box predictor to predict all points distance_metric: Distance metric used for weights n_samples: Number of samples to generate Returns: neighor_data (xs around sample), weights (weights of instances in xs), neighor_data_labels (ys around sample, corresponding to xs) ''' from lime.lime_tabular import LimeTabularExplainer e = LimeTabularExplainer( self.train_data, categorical_features=self.categorical_features, discretize_continuous=False) _, neighbor_data = e._LimeTabularExplainer__data_inverse( sample, n_samples) scaled_data = (neighbor_data - e.scaler.mean_) / e.scaler.scale_ return (*self._data(neighbor_data, scaled_data, distance_metric, predict_fn), sample)
def fit(self, X, y, predict_fn, labels_num): self.cluster_labels = self.cluster_method.fit_predict(X) #print(X.shape[1]) for i in range(self.cluster_num): inds = np.where(self.cluster_labels == i) explainer = LimeTabularExplainer(X[inds], discretize_continuous=False, sample_around_instance=True) #print(np.squeeze(X[inds, :])) #print (self.cluster_method.cluster_centers_[i]) #time1=time.clock() simplified_models = explainer.explain_instance( self.cluster_method.cluster_centers_[i], predict_fn, num_samples=10000, labels=range(labels_num), num_features=X.shape[1], retrive_model=True) #print(type(simplified_models)) coef_ = np.zeros((X.shape[1], labels_num)) intercept_ = np.zeros((1, labels_num)) #time2=time.clock() #time3 = time2-time1 #print("explain_instance") #print(time3) for idx in range(labels_num): coef_[:, idx] = simplified_models[idx].coef_ intercept_[0, idx] = simplified_models[idx].intercept_ self.models.append((coef_, intercept_))
def interpret_model(dataframe, feature_set, model): """ dataframe - Specify the Name of the dataframe feature_set - The set of features you want to use(list) models- Should Be in a dictionary form where model should be a function passed as a value with the name of model as the key of dict wrong_predictions=True (Change to `false` if u want to only see the correct classification results for the model) """ X = dataframe[feature_set] y = dataframe['Default_Status'] train_X, test_X, train_y, test_y = train_test_split(X, y, random_state=20) # ============================================================================= # model.fit(train_X,train_y) # model_preds=model.predict(test_X) # ============================================================================= from lime.lime_tabular import LimeTabularExplainer class_names = ['Wont Default', 'Will Default'] #instantiate the explanations for the data set limeexplainer = LimeTabularExplainer(train_X.values, class_names=class_names, feature_names=feature_set, kernel_width=3, verbose=False, mode='classification') return limeexplainer
def fit(self, X, y=None): self.explainer_ = LimeTabularExplainer( X, feature_names=self.feature_names, class_names=self.class_names, discretize_continuous=True) return self
def fit(self, X: Any, class_names: List[str] = None) -> None: if class_names is None: class_names = ['0', '1'] self._explainer = LimeTabularExplainer( training_data=X, feature_names=list(range(X.shape[1])), class_names=class_names, discretize_continuous=False, random_state=self._seed)
def lime_interpreter(dataset_features, x_train, x_test, classifier, model_name, rng=True, instance=None): feature_names = ["f" + str(i) for i in range(dataset_features)] # explainer = LimeTabularExplainer(x_train, feature_names=feature_names, discretize_continuous=True) def wrapped_fn(x_test): p = classifier.predict_proba(x_test).toarray() p_norm = norm_probabilities(p) return p_norm if rng: idx = np.random.randint(0, x_test.shape[0]) else: idx = instance exp = explainer.explain_instance(x_test[idx], predict_fn=wrapped_fn) exp.save_to_file(model_name + '.html') print( "Iterpretation can be found as an HTML file in the currect directory, named :" ) print(model_name) print("")
def fit(self, X, y, lemna_component, predict_fn, labels_num): self.cluster_labels = self.cluster_method.fit_predict(X) self.num_features = X.shape[1] for i in range(self.cluster_num): inds = np.where(self.cluster_labels == i) explainer = LimeTabularExplainer(np.squeeze(X[inds, :]), discretize_continuous=False, sample_around_instance=True) simplified_models = explainer.explain_instance_with_lemna( self.cluster_method.cluster_centers_[i], predict_fn, lemna_component=lemna_component, num_samples=5000, labels=range(labels_num), num_features=X.shape[1], retrive_model=True) # coef_ is a 3-d matrix feature_num * lemna_component * labels_num # intercept is a 2-d matrix lemna_component * labels_num coef_ = np.zeros((X.shape[1], lemna_component, labels_num)) intercept_ = np.zeros((1, lemna_component, labels_num)) for idx in range(labels_num): coef_[:, :, idx] = simplified_models[idx].coef_ intercept_[0, :, idx] = simplified_models[idx].intercept_ pi_ = simplified_models[idx].pi_ self.models.append((coef_, intercept_, pi_))
def lime_tabular_global(): targets = ['academic', 'fiction', 'magazine', 'newspaper'] data = pd.read_pickle('data_explain_tabular.pkl') clf = joblib.load('model_forest_tabular.pkl') feature_names = list(data) target = np.array(data['target']) data = data.drop(['target', 'year', 'ID'], axis=1).as_matrix() explainer = LimeTabularExplainer(data, feature_names=feature_names, class_names=targets) N = data.shape[0] academic, fiction, magazine, newspaper = ([],[],[],[]) academic_w, fiction_w, magazine_w, newspaper_w = ([],[],[],[]) for i in range(N): pred = clf.predict(data[i].reshape(1,-1))[0] if pred == target[i]: explanation = explainer.explain_instance(data[i], clf.predict_proba, num_features=10, top_labels=4) result = explanation.as_list(label=pred) if 0 == target[i]: academic.append((result, pred)) elif 1 == target[i]: fiction.append((result, pred)) elif 2 == target[i]: magazine.append((result, pred)) elif 3 == target[i]: newspaper.append((result, pred)) else: return 1 else: explanation = explainer.explain_instance(data[i], clf.predict_proba, num_features=10, top_labels=4) result = explanation.as_list(label=pred) if 0 == target[i]: academic_w.append((result, pred)) elif 1 == target[i]: fiction_w.append((result, pred)) elif 2 == target[i]: magazine_w.append((result, pred)) elif 3 == target[i]: newspaper_w.append((result, pred)) else: return 1 joblib.dump(academic, 'lime_academic.pkl') joblib.dump(fiction, 'lime_fiction.pkl') joblib.dump(magazine, 'lime_magazine.pkl') joblib.dump(newspaper, 'lime_newspaper.pkl') all_explanations = academic + fiction + magazine + newspaper joblib.dump(all_explanations, 'lime_all.pkl') joblib.dump(academic_w, 'lime_academic_wrong.pkl') joblib.dump(fiction_w, 'lime_fiction_wrong.pkl') joblib.dump(magazine_w, 'lime_magazine_wrong.pkl') joblib.dump(newspaper_w, 'lime_newspaper_wrong.pkl') all_explanations_w = academic_w + fiction_w + magazine_w + newspaper_w joblib.dump(all_explanations_w, 'lime_all_wrong.pkl')
def test_lime_explainer_entropy_discretizer(self): np.random.seed(1) rf = RandomForestClassifier(n_estimators=500) rf.fit(self.train, self.labels_train) i = np.random.randint(0, self.test.shape[0]) explainer = LimeTabularExplainer(self.train, feature_names=self.feature_names, class_names=self.target_names, training_labels=self.labels_train, discretize_continuous=True, discretizer='entropy') exp = explainer.explain_instance(self.test[i], rf.predict_proba, num_features=2) self.assertIsNotNone(exp) keys = [x[0] for x in exp.as_list()] print(keys) self.assertEqual(1, sum([1 if 'petal width' in x else 0 for x in keys]), "Petal Width is a major feature") self.assertEqual(1, sum([1 if 'petal length' in x else 0 for x in keys]), "Petal Length is a major feature")
def testFeatureNamesAndCategoricalFeats(self): training_data = np.array([[0., 1.], [1., 0.]]) explainer = LimeTabularExplainer(training_data=training_data) self.assertEqual(explainer.feature_names, ['0', '1']) self.assertEqual(explainer.categorical_features, [0, 1]) explainer = LimeTabularExplainer(training_data=training_data, feature_names=np.array(['one', 'two'])) self.assertEqual(explainer.feature_names, ['one', 'two']) explainer = LimeTabularExplainer(training_data=training_data, categorical_features=np.array([0]), discretize_continuous=False) self.assertEqual(explainer.categorical_features, [0])
def explain(): try: with open(CFG.TRAINING, 'rb') as f: training = pickle.load(f) my_json = request.get_json() encoded_dict = convert_json(my_json) dictionary = eval(encoded_dict) normalize_age_mons = age_mons_preprocessing.transform( [[dictionary['age_month']]])[0, 0] dictionary['age_month'] = normalize_age_mons pred = np.array([x[1] for x in dictionary.items()]) exp = LimeTabularExplainer(training.values, feature_names=training.columns, discretize_continuous=True) fig = exp.explain_instance(pred, model.predict_proba).as_pyplot_figure() fig.figsize = (30, 10) plt.tight_layout() plt.savefig('explain.png') return send_file('explain.png', mimetype='image/png', as_attachment=True) except ValueError: return 'Bad Request', 400
def __init__(self, bb_classifier, X, class_names, explanation_samples=5000): self.bb_classifier = bb_classifier self.EX, self.StdX = np.mean(X), np.array(np.std(X, axis=0, ddof=0)) self.class_names = class_names self.F = X.shape[1] # number of features self.explanation_samples = explanation_samples # SHAP Kernel self.SHAPEXPL = shap.KernelExplainer(self.bb_classifier.predict_proba, self.EX, nsamples=explanation_samples) # LIME Kernel self.LIMEEXPL = LimeTabularExplainer( X.astype('float'), feature_names=X.columns.tolist(), class_names=self.class_names, discretize_continuous=False, sample_around_instance=True, # categorical_features=categorical_features, # feature_selection='highest_weights', # sample_using_pca=False, # weight_classifier_labels=False, random_state=10) self.metrics = None self.lime_avg_jaccard_bin = self.lime_std_jaccard_bin = None self.shap_avg_jaccard_bin = self.shap_std_jaccard_bin = None
def run(self, load_data=True, tune_parameter=True): if load_data: lines, values = self.data(0, self.num_samples) self.vectorize_text(lines, values) # If tune_parameter is false, we run with our experimented parameters if tune_parameter: self.tune_parameters() else: self.index = 1 self.param = {"alpha": 0.05, "learning_rate": "invscaling", "penalty": "l2"} reg = self.train() y_pred = self.test(reg) print(max(y_pred)) # Using log(y) so convert back to seconds with exp(y_pred) y_pred = np.expm1(y_pred) y_test = np.load(self.Y_test, mmap_mode='r') self.print_stats(y_pred, y_test) X_train = np.load(self.X_train[self.index], mmap_mode='r') X_test = np.load(self.X_test[self.index], mmap_mode='r') explainer = LimeTabularExplainer(X_train, mode="regression") exp = explainer.explain_instance(X_test[self.text_index], reg.predict) exp.as_pyplot_figure()
def explain_with_lime(X_test, model, model_name, encoder, categorical_features_indices, categorical_encoding, class_names, feature_names, test_instance=10): """Explain a prediction from the test set with a trained model.""" columns = X_test.columns.tolist() predict_fn = lambda x: model.predict_proba(encoder.transform(pd.DataFrame(x, columns=columns)).astype(float)) explainer = LimeTabularExplainer(X_test.to_numpy(), mode="classification", feature_names=feature_names, class_names=class_names, categorical_features=categorical_features_indices, categorical_names=categorical_encoding, kernel_width=3) # might set seed? explanation = explainer.explain_instance(X_test.iloc[test_instance, :], predict_fn, num_features=5) # Show and save explanation # explanation.save_to_file(PATHS["03_data_outputs"] + "lime.html") explanation.as_pyplot_figure() plt.tight_layout() plt.savefig(PATHS["03_data_outputs"] + model_name + "_lime_plot.png") plt.close() # access the coefficients, the intercept and the R squared of the linear model print("Coefficients of linear model: ", explanation.local_exp) print("\n") print("Intercept: ", explanation.intercept) print("\n") print("R-squared: ", explanation.score)
def run(self, load_data=True, tune_parameter=True): if load_data: lines, values = self.data(0, self.num_samples) self.vectorize_text(lines, values) # If tune_parameter is false, we run with our experimented parameters if tune_parameter: self.tune_parameters() else: self.index = 0 self.param = { "alpha": 0.1, "learning_rate": "invscaling", "penalty": "l2" } reg = self.train() print(reg.densify()) y_pred = self.test(reg) y_test = np.load(self.Y_test, mmap_mode='r') print(y_pred.shape) self.print_stats(y_pred, y_test) # Show a Lime plot of the regression. The labelings will no be correct since we are using a regression model. X_train = np.load(self.X_train[self.index], mmap_mode='r') X_test = np.load(self.X_test[self.index], mmap_mode='r') explainer = LimeTabularExplainer(X_train, mode="regression") exp = explainer.explain_instance(X_test[self.text_index], reg.predict) exp.as_pyplot_figure()
def __init__( self, predict_fn, data, sampler=None, feature_names=None, feature_types=None, explain_kwargs={}, n_jobs=1, **kwargs ): self.data, _, self.feature_names, self.feature_types = unify_data( data, None, feature_names, feature_types ) self.predict_fn = unify_predict_fn(predict_fn, self.data) self.n_jobs = n_jobs if sampler is not None: # pragma: no cover warnings.warn("Sampler interface not currently supported.") self.sampler = sampler self.explain_kwargs = explain_kwargs self.kwargs = kwargs final_kwargs = {"mode": "regression"} if self.feature_names: final_kwargs["feature_names"] = self.feature_names final_kwargs.update(self.kwargs) self.lime = LimeTabularExplainer(self.data, **final_kwargs)
def test_lime_explainer_no_regressor(self): np.random.seed(1) iris = load_iris() train, test, labels_train, labels_test = ( sklearn.cross_validation.train_test_split(iris.data, iris.target, train_size=0.80)) rf = RandomForestClassifier(n_estimators=500) rf.fit(train, labels_train) i = np.random.randint(0, test.shape[0]) explainer = LimeTabularExplainer(train, feature_names=iris.feature_names, class_names=iris.target_names, discretize_continuous=True) exp = explainer.explain_instance(test[i], rf.predict_proba, num_features=2) self.assertIsNotNone(exp) keys = [x[0] for x in exp.as_list()] self.assertEquals(1, sum([1 if 'petal width' in x else 0 for x in keys]), "Petal Width is a major feature") self.assertEquals(1, sum([1 if 'petal length' in x else 0 for x in keys]), "Petal Length is a major feature")
def calcul_interpretation(clf, client_id): test_features_filled = test_features.fillna(test_features.median()) lime1 = LimeTabularExplainer(test_features_filled, feature_names=test_features_filled.columns, discretize_continuous=False) explain_data = test_features_filled.iloc[test_corrs_removed.index[ test_corrs_removed['SK_ID_CURR'] == int(client_id)]].T.squeeze() exp = lime1.explain_instance(explain_data, clf.predict_proba, num_samples=1000) exp_list = exp.as_list() exp_keys = [] exp_values = [] exp_positives = [] for i in range(len(exp_list)): exp_keys.append(exp_list[i][0]) exp_values.append(exp_list[i][1]) df_data = pd.DataFrame(data=[exp_keys, exp_values]) df_data = df_data.T df_data.columns = ['exp_keys', 'exp_values'] df_data = df_data.iloc[np.abs(df_data['exp_values'].values).argsort()] df_data['color'] = df_data.exp_values.apply(lambda x: 'red' if x > 0 else 'green') return df_data
def get_lime_scores(predictive_model, x_train, x_test): lime_scores = [] FEATS = len(x_train[0]) feat_names = ["X" + str(i) for i in range(len(x_train[0]))] explainer = LimeTabularExplainer(x_train, feature_names=feat_names) for w in range(x_test.shape[0]): exp = explainer.explain_instance(x_test[w], predictive_model.predict_proba, num_features=FEATS) rank_list = exp.as_list() curr_scores = [ np.where( np.array([ pd.Series(rank_list[v][0]).str.contains('X' + str(k))[0] * 1 for k in range(FEATS) ]) == 1)[0][0] for v in range(len(rank_list)) ] lime_score_ = np.zeros((1, x_train.shape[1])) lime_score_[0, np.array(curr_scores)] = np.array( [np.abs(rank_list[v][1]) for v in range(len(rank_list))]) lime_scores.append(lime_score_) lime_scores = np.array(lime_scores).reshape(-1, x_train.shape[1]) return lime_scores
def test_lime_explainer_good_regressor(self): np.random.seed(1) rf = RandomForestClassifier(n_estimators=500) rf.fit(self.train, self.labels_train) i = np.random.randint(0, self.test.shape[0]) explainer = LimeTabularExplainer(self.train, mode="classification", feature_names=self.feature_names, class_names=self.target_names, discretize_continuous=True) exp = explainer.explain_instance(self.test[i], rf.predict_proba, num_features=2, model_regressor=LinearRegression()) self.assertIsNotNone(exp) keys = [x[0] for x in exp.as_list()] self.assertEqual(1, sum([1 if 'petal width' in x else 0 for x in keys]), "Petal Width is a major feature") self.assertEqual(1, sum([1 if 'petal length' in x else 0 for x in keys]), "Petal Length is a major feature")
def createExplainer(self): """ Creates the LIME explainer """ from lime.lime_tabular import LimeTabularExplainer return LimeTabularExplainer(self._featureData, mode="regression", feature_names=self._featureNames, class_names=self._targetNames)
def create_explainer(X_train: pd.DataFrame, y_train: pd.DataFrame): return LimeTabularExplainer(X_train.values, feature_names=X_train.columns.values, training_labels=y_train.values, feature_selection='lasso_path', class_names=['No EPI', 'EPI'], discretize_continuous=True, discretizer='entropy')
def get_tabular_explainer(self): data = self.x_train.copy() # check whether contains categorical features cat_cols = data.select_dtypes(exclude=['number']).columns try: # have categorical features if len(cat_cols) > 0: cat_features = [ list(self.x_train.columns).index(col) for col in cat_cols ] data[cat_cols] = data[cat_cols].astype('category') # label encoding data[cat_cols] = data[cat_cols].apply(lambda x: x.cat.codes) # map dictionary to label encoding label_dic = {} for i, col in enumerate(cat_cols): label_dic[cat_features[i]] = dict( enumerate(self.x_train[col].astype( 'category').cat.categories)) self.x_train = data lime_tab_explainer = LimeTabularExplainer( self.x_train.values, feature_names=self.x_train.columns, class_names=self.class_names, categorical_features=cat_features, categorical_names=label_dic, discretize_continuous=True, sample_around_instance=True) return lime_tab_explainer else: lime_tab_explainer = LimeTabularExplainer( self.x_train.values, feature_names=self.x_train.columns, class_names=self.class_names, discretize_continuous=True, sample_around_instance=True) return lime_tab_explainer except Exception as err: print('Error: model is not supported by LIME {} Explainer'.format( self.explainer_type)) err_logging(err) raise Exception(err)
def explain(): explainer = LimeTabularExplainer(train, class_names=class_names, feature_names=feature_names, categorical_features=categorical_features) return explainer.explain_instance(X.iloc[0], rf.predict_proba, num_features=4)
def create_explainer(model, X): ''' Convenience function for creating a LIME explainer object. ex) create_explainer(model, X_train) ''' explainer = LimeTabularExplainer(X.values, feature_names=X.columns.values) return explainer
def create_model_explainer(self): self.explainer = LimeTabularExplainer( self.train, feature_names=self.feature_names, training_labels=self.labels_train, class_names=self.class_names, categorical_features=self.categorical_feature_indices, categorical_names=self.categorical_names, discretize_continuous=True)