示例#1
0
    def fit(self, X, y, lemna_component, predict_fn, labels_num):
        self.cluster_labels = self.cluster_method.fit_predict(X)
        self.num_features = X.shape[1]

        for i in range(self.cluster_num):
            inds = np.where(self.cluster_labels == i)
            explainer = LimeTabularExplainer(np.squeeze(X[inds, :]),
                                             discretize_continuous=False,
                                             sample_around_instance=True)

            simplified_models = explainer.explain_instance_with_lemna(
                self.cluster_method.cluster_centers_[i],
                predict_fn,
                lemna_component=lemna_component,
                num_samples=5000,
                labels=range(labels_num),
                num_features=X.shape[1],
                retrive_model=True)

            # coef_ is a 3-d matrix feature_num * lemna_component * labels_num
            # intercept is a 2-d matrix lemna_component * labels_num
            coef_ = np.zeros((X.shape[1], lemna_component, labels_num))
            intercept_ = np.zeros((1, lemna_component, labels_num))

            for idx in range(labels_num):
                coef_[:, :, idx] = simplified_models[idx].coef_
                intercept_[0, :, idx] = simplified_models[idx].intercept_
                pi_ = simplified_models[idx].pi_

            self.models.append((coef_, intercept_, pi_))
 def fit(self, X: Any, class_names: List[str] = None) -> None:
     if class_names is None:
         class_names = ['0', '1']
     self._explainer = LimeTabularExplainer(
         training_data=X, feature_names=list(range(X.shape[1])),
         class_names=class_names, discretize_continuous=False,
         random_state=self._seed)
示例#3
0
    def test_lime_explainer_no_regressor(self):
        np.random.seed(1)
        iris = load_iris()
        train, test, labels_train, labels_test = (
            sklearn.cross_validation.train_test_split(iris.data, iris.target,
                                                      train_size=0.80))

        rf = RandomForestClassifier(n_estimators=500)
        rf.fit(train, labels_train)
        i = np.random.randint(0, test.shape[0])

        explainer = LimeTabularExplainer(train,
                                         feature_names=iris.feature_names,
                                         class_names=iris.target_names,
                                         discretize_continuous=True)

        exp = explainer.explain_instance(test[i], rf.predict_proba,
                                         num_features=2)
        self.assertIsNotNone(exp)
        keys = [x[0] for x in exp.as_list()]
        self.assertEquals(1,
                          sum([1 if 'petal width' in x else 0 for x in keys]),
                          "Petal Width is a major feature")
        self.assertEquals(1,
                          sum([1 if 'petal length' in x else 0 for x in keys]),
                          "Petal Length is a major feature")
示例#4
0
def calcul_interpretation(clf, client_id):

    test_features_filled = test_features.fillna(test_features.median())

    lime1 = LimeTabularExplainer(test_features_filled,
                                 feature_names=test_features_filled.columns,
                                 discretize_continuous=False)

    explain_data = test_features_filled.iloc[test_corrs_removed.index[
        test_corrs_removed['SK_ID_CURR'] == int(client_id)]].T.squeeze()
    exp = lime1.explain_instance(explain_data,
                                 clf.predict_proba,
                                 num_samples=1000)

    exp_list = exp.as_list()
    exp_keys = []
    exp_values = []
    exp_positives = []
    for i in range(len(exp_list)):
        exp_keys.append(exp_list[i][0])
        exp_values.append(exp_list[i][1])

    df_data = pd.DataFrame(data=[exp_keys, exp_values])
    df_data = df_data.T
    df_data.columns = ['exp_keys', 'exp_values']
    df_data = df_data.iloc[np.abs(df_data['exp_values'].values).argsort()]
    df_data['color'] = df_data.exp_values.apply(lambda x: 'red'
                                                if x > 0 else 'green')

    return df_data
示例#5
0
def lime_tabular_global():
    targets = ['academic', 'fiction', 'magazine', 'newspaper']
    data = pd.read_pickle('data_explain_tabular.pkl')
    clf = joblib.load('model_forest_tabular.pkl')
    feature_names = list(data)
    target = np.array(data['target'])
    data = data.drop(['target', 'year', 'ID'], axis=1).as_matrix()
    explainer = LimeTabularExplainer(data, feature_names=feature_names, 
                                     class_names=targets)
    N = data.shape[0]
    academic, fiction, magazine, newspaper = ([],[],[],[])
    academic_w, fiction_w, magazine_w, newspaper_w = ([],[],[],[])
    for i in range(N):
        pred = clf.predict(data[i].reshape(1,-1))[0]
        if pred == target[i]:
            explanation = explainer.explain_instance(data[i], 
                                                     clf.predict_proba,
                                                     num_features=10,
                                                     top_labels=4)
            result = explanation.as_list(label=pred)
            if 0 == target[i]:
                academic.append((result, pred))
            elif 1 == target[i]:
                fiction.append((result, pred))
            elif 2 == target[i]:
                magazine.append((result, pred))
            elif 3 == target[i]:
                newspaper.append((result, pred))
            else:
                return 1
        else:
            explanation = explainer.explain_instance(data[i], 
                                                     clf.predict_proba,
                                                     num_features=10,
                                                     top_labels=4)
            result = explanation.as_list(label=pred)
            if 0 == target[i]:
                academic_w.append((result, pred))
            elif 1 == target[i]:
                fiction_w.append((result, pred))
            elif 2 == target[i]:
                magazine_w.append((result, pred))
            elif 3 == target[i]:
                newspaper_w.append((result, pred))
            else:
                return 1

    joblib.dump(academic, 'lime_academic.pkl')
    joblib.dump(fiction, 'lime_fiction.pkl')
    joblib.dump(magazine, 'lime_magazine.pkl')
    joblib.dump(newspaper, 'lime_newspaper.pkl')
    all_explanations = academic + fiction + magazine + newspaper
    joblib.dump(all_explanations, 'lime_all.pkl')

    joblib.dump(academic_w, 'lime_academic_wrong.pkl')
    joblib.dump(fiction_w, 'lime_fiction_wrong.pkl')
    joblib.dump(magazine_w, 'lime_magazine_wrong.pkl')
    joblib.dump(newspaper_w, 'lime_newspaper_wrong.pkl')
    all_explanations_w = academic_w + fiction_w + magazine_w + newspaper_w
    joblib.dump(all_explanations_w, 'lime_all_wrong.pkl')
示例#6
0
def explain():
    try:
        with open(CFG.TRAINING, 'rb') as f:
            training = pickle.load(f)
        my_json = request.get_json()
        encoded_dict = convert_json(my_json)
        dictionary = eval(encoded_dict)

        normalize_age_mons = age_mons_preprocessing.transform(
            [[dictionary['age_month']]])[0, 0]

        dictionary['age_month'] = normalize_age_mons
        pred = np.array([x[1] for x in dictionary.items()])

        exp = LimeTabularExplainer(training.values,
                                   feature_names=training.columns,
                                   discretize_continuous=True)

        fig = exp.explain_instance(pred,
                                   model.predict_proba).as_pyplot_figure()
        fig.figsize = (30, 10)
        plt.tight_layout()
        plt.savefig('explain.png')

        return send_file('explain.png',
                         mimetype='image/png',
                         as_attachment=True)

    except ValueError:
        return 'Bad Request', 400
示例#7
0
    def __init__(self,
                 bb_classifier,
                 X,
                 class_names,
                 explanation_samples=5000):
        self.bb_classifier = bb_classifier
        self.EX, self.StdX = np.mean(X), np.array(np.std(X, axis=0, ddof=0))
        self.class_names = class_names
        self.F = X.shape[1]  # number of features
        self.explanation_samples = explanation_samples

        # SHAP Kernel
        self.SHAPEXPL = shap.KernelExplainer(self.bb_classifier.predict_proba,
                                             self.EX,
                                             nsamples=explanation_samples)

        # LIME Kernel
        self.LIMEEXPL = LimeTabularExplainer(
            X.astype('float'),
            feature_names=X.columns.tolist(),
            class_names=self.class_names,
            discretize_continuous=False,
            sample_around_instance=True,
            # categorical_features=categorical_features,
            # feature_selection='highest_weights',
            # sample_using_pca=False,
            # weight_classifier_labels=False,
            random_state=10)
        self.metrics = None
        self.lime_avg_jaccard_bin = self.lime_std_jaccard_bin = None
        self.shap_avg_jaccard_bin = self.shap_std_jaccard_bin = None
示例#8
0
    def lime(self, instance=None, html_file=False, num_features=2):
        """

        :param instance:
        :param html_file:
        :param num_features:
        :return:
        """
        explainer = LimeTabularExplainer(self.x_train.values,
                                         mode="classification",
                                         feature_names=self.x_train.columns,
                                         class_names=['false', 'true'],
                                         training_labels=self.y_train,
                                         discretize_continuous=True)
        if not instance:
            instance = np.random.randint(0, self.x_test.shape[0])
            print('Case:  ' + str(instance))
            print('Label: ' + str(self.y_test.iloc[instance]))

        exp = explainer.explain_instance(self.x_test.values[instance],
                                         self.model.predict_proba,
                                         num_features=num_features)
        print("Lime explanation: ")
        exp.as_pyplot_figure(label=1).show()
        if html_file:
            exp.save_to_file(
                str(instance) + "_" + str(self.y_test.iloc[instance]) +
                "_explain.html")
    def test_lime_explainer_entropy_discretizer(self):
        np.random.seed(1)

        rf = RandomForestClassifier(n_estimators=500)
        rf.fit(self.train, self.labels_train)
        i = np.random.randint(0, self.test.shape[0])

        explainer = LimeTabularExplainer(self.train,
                                         feature_names=self.feature_names,
                                         class_names=self.target_names,
                                         training_labels=self.labels_train,
                                         discretize_continuous=True,
                                         discretizer='entropy')

        exp = explainer.explain_instance(self.test[i],
                                         rf.predict_proba,
                                         num_features=2)
        self.assertIsNotNone(exp)
        keys = [x[0] for x in exp.as_list()]
        print(keys)
        self.assertEqual(1,
                         sum([1 if 'petal width' in x else 0 for x in keys]),
                         "Petal Width is a major feature")
        self.assertEqual(1,
                         sum([1 if 'petal length' in x else 0 for x in keys]),
                         "Petal Length is a major feature")
示例#10
0
    def test_lime_explainer_good_regressor(self):
        np.random.seed(1)

        rf = RandomForestClassifier(n_estimators=500)
        rf.fit(self.train, self.labels_train)
        i = np.random.randint(0, self.test.shape[0])

        explainer = LimeTabularExplainer(self.train,
                                         mode="classification",
                                         feature_names=self.feature_names,
                                         class_names=self.target_names,
                                         discretize_continuous=True)

        exp = explainer.explain_instance(self.test[i],
                                         rf.predict_proba,
                                         num_features=2,
                                         model_regressor=LinearRegression())

        self.assertIsNotNone(exp)
        keys = [x[0] for x in exp.as_list()]
        self.assertEqual(1,
                         sum([1 if 'petal width' in x else 0 for x in keys]),
                         "Petal Width is a major feature")
        self.assertEqual(1,
                         sum([1 if 'petal length' in x else 0 for x in keys]),
                         "Petal Length is a major feature")
    def test_lime_explainer_good_regressor(self):
        np.random.seed(1)

        rf = RandomForestClassifier(n_estimators=500)
        rf.fit(self.train, self.labels_train)
        i = np.random.randint(0, self.test.shape[0])

        explainer = LimeTabularExplainer(self.train,
                                         mode="classification",
                                         feature_names=self.feature_names,
                                         class_names=self.target_names,
                                         discretize_continuous=True)

        exp = explainer.explain_instance(self.test[i],
                                         rf.predict_proba,
                                         num_features=2,
                                         model_regressor=LinearRegression())

        self.assertIsNotNone(exp)
        keys = [x[0] for x in exp.as_list()]
        self.assertEqual(1,
                         sum([1 if 'petal width' in x else 0 for x in keys]),
                         "Petal Width is a major feature")
        self.assertEqual(1,
                         sum([1 if 'petal length' in x else 0 for x in keys]),
                         "Petal Length is a major feature")
示例#12
0
    def generate_neighborhood_data(self,
                                   sample,
                                   predict_fn,
                                   distance_metric='euclidean',
                                   n_samples=500,
                                   seed=1,
                                   **kwargs):
        '''Generate neighborhood data for a given point (currently using LIME)

        Args:
            train_data: Training data predict_fn was trained on
            sample: Observed sample
            predict_fn: Black box predictor to predict all points
            distance_metric: Distance metric used for weights
            n_samples: Number of samples to generate

        Returns:
            neighor_data (xs around sample),
            weights (weights of instances in xs),
            neighor_data_labels (ys around sample, corresponding to xs)
        '''
        from lime.lime_tabular import LimeTabularExplainer
        e = LimeTabularExplainer(
            self.train_data,
            categorical_features=self.categorical_features,
            discretize_continuous=False)

        _, neighbor_data = e._LimeTabularExplainer__data_inverse(
            sample, n_samples)
        scaled_data = (neighbor_data - e.scaler.mean_) / e.scaler.scale_
        return (*self._data(neighbor_data, scaled_data, distance_metric,
                            predict_fn), sample)
示例#13
0
def lime():

    print('Loading dataset ...')
    X_train, Y_train, X_val, Y_val, X_test, Y_test = get_dataset(
        minibatch_size=32, sampling='None', numpy='True')

    net = load_fraudnet()
    lime_list = []
    explainer = LimeTabularExplainer(X_train, training_labels=Y_train)

    def func_call(x):
        print(x.shape)
        input_ = torch.from_numpy(x).to(device=device).float()
        prob_1 = net(input_).view(-1, 1).cpu().data.numpy()
        prob_0 = 1 - prob_1
        prob = np.concatenate([prob_0, prob_1], axis=1)
        return prob

    for i in range(X_test.shape[0]):

        exp = explainer.explain_instance(X_test[i, :],
                                         func_call,
                                         labels=(0, 1),
                                         num_features=50)
        lime_list.append(exp)

    lime_list = np.array(lime_list)
    lime_list = preprocess(lime_list)
    pickle.dump(lime_list, open('./saved_attributions/lime.pkl', 'wb'))
示例#14
0
文件: lime.py 项目: zzzace2000/GAMs
    def __init__(
        self,
        predict_fn,
        data,
        sampler=None,
        feature_names=None,
        feature_types=None,
        explain_kwargs={},
        n_jobs=1,
        **kwargs
    ):

        self.data, _, self.feature_names, self.feature_types = unify_data(
            data, None, feature_names, feature_types
        )
        self.predict_fn = unify_predict_fn(predict_fn, self.data)
        self.n_jobs = n_jobs

        if sampler is not None:  # pragma: no cover
            warnings.warn("Sampler interface not currently supported.")

        self.sampler = sampler
        self.explain_kwargs = explain_kwargs

        self.kwargs = kwargs
        final_kwargs = {"mode": "regression"}
        if self.feature_names:
            final_kwargs["feature_names"] = self.feature_names
        final_kwargs.update(self.kwargs)

        self.lime = LimeTabularExplainer(self.data, **final_kwargs)
示例#15
0
    def fit(self, X, y, predict_fn, labels_num):
        self.cluster_labels = self.cluster_method.fit_predict(X)
        #print(X.shape[1])

        for i in range(self.cluster_num):
            inds = np.where(self.cluster_labels == i)
            explainer = LimeTabularExplainer(X[inds],
                                             discretize_continuous=False,
                                             sample_around_instance=True)
            #print(np.squeeze(X[inds, :]))
            #print (self.cluster_method.cluster_centers_[i])
            #time1=time.clock()
            simplified_models = explainer.explain_instance(
                self.cluster_method.cluster_centers_[i],
                predict_fn,
                num_samples=10000,
                labels=range(labels_num),
                num_features=X.shape[1],
                retrive_model=True)
            #print(type(simplified_models))
            coef_ = np.zeros((X.shape[1], labels_num))
            intercept_ = np.zeros((1, labels_num))
            #time2=time.clock()
            #time3 = time2-time1
            #print("explain_instance")
            #print(time3)
            for idx in range(labels_num):
                coef_[:, idx] = simplified_models[idx].coef_
                intercept_[0, idx] = simplified_models[idx].intercept_

            self.models.append((coef_, intercept_))
示例#16
0
 def fit(self, X, y=None):
     self.explainer_ = LimeTabularExplainer(
         X,
         feature_names=self.feature_names,
         class_names=self.class_names,
         discretize_continuous=True)
     return self
def explain_with_lime(X_test, model, model_name, encoder, categorical_features_indices, categorical_encoding,
                      class_names, feature_names, test_instance=10):
    """Explain a prediction from the test set with a trained model."""
    columns = X_test.columns.tolist()

    predict_fn = lambda x: model.predict_proba(encoder.transform(pd.DataFrame(x, columns=columns)).astype(float))

    explainer = LimeTabularExplainer(X_test.to_numpy(),
                                     mode="classification",
                                     feature_names=feature_names,
                                     class_names=class_names,
                                     categorical_features=categorical_features_indices,
                                     categorical_names=categorical_encoding,
                                     kernel_width=3)

    # might set seed?
    explanation = explainer.explain_instance(X_test.iloc[test_instance, :], predict_fn, num_features=5)

    # Show and save explanation
    # explanation.save_to_file(PATHS["03_data_outputs"] + "lime.html")

    explanation.as_pyplot_figure()
    plt.tight_layout()
    plt.savefig(PATHS["03_data_outputs"] + model_name + "_lime_plot.png")
    plt.close()

    # access the coefficients, the intercept and the R squared of the linear model
    print("Coefficients of linear model: ", explanation.local_exp)
    print("\n")
    print("Intercept: ", explanation.intercept)
    print("\n")
    print("R-squared: ", explanation.score)
示例#18
0
    def get_local_interpretation(ID_client, dataframe, modelname,
                                 features_importances, label):

        model = load_model(modelname)
        X = dataframe[dataframe['SK_ID_CURR'] == int(ID_client)]
        X = X.drop(['SK_ID_CURR', 'TARGET'], axis=1)
        dataframe = dataframe.drop(['SK_ID_CURR', 'TARGET'], axis=1)

        X_train = dataframe.sample(frac=0.1, random_state=42).values

        explainer = LimeTabularExplainer(
            training_data=X_train,
            mode='classification',
            feature_names=dataframe.columns,
            training_labels=dataframe.columns.tolist(),
            verbose=1,
            random_state=42)
        #st.write(np.array(X))
        #st.write(type(np.array(X)))
        explanation = explainer.explain_instance(
            np.ravel(np.array(X)),
            predict_fn=model.predict_proba,
            labels=[0, 1],
            num_features=len(dataframe.columns))

        #fig = explanation.as_pyplot_figure(label=label)
        #st.pyplot(fig)

        return explanation
def lime_interpreter(dataset_features,
                     x_train,
                     x_test,
                     classifier,
                     model_name,
                     rng=True,
                     instance=None):
    feature_names = ["f" + str(i) for i in range(dataset_features)]  #
    explainer = LimeTabularExplainer(x_train,
                                     feature_names=feature_names,
                                     discretize_continuous=True)

    def wrapped_fn(x_test):
        p = classifier.predict_proba(x_test).toarray()
        p_norm = norm_probabilities(p)
        return p_norm

    if rng:
        idx = np.random.randint(0, x_test.shape[0])
    else:
        idx = instance
    exp = explainer.explain_instance(x_test[idx], predict_fn=wrapped_fn)
    exp.save_to_file(model_name + '.html')
    print(
        "Iterpretation can be found as an HTML file in the currect directory, named :"
    )
    print(model_name)
    print("")
示例#20
0
    def run(self, load_data=True, tune_parameter=True):
        if load_data:
            lines, values = self.data(0, self.num_samples)
            self.vectorize_text(lines, values)

        # If tune_parameter is false, we run with our experimented parameters
        if tune_parameter:
            self.tune_parameters()
        else:
            self.index = 0
            self.param = {
                "alpha": 0.1,
                "learning_rate": "invscaling",
                "penalty": "l2"
            }

        reg = self.train()
        print(reg.densify())
        y_pred = self.test(reg)
        y_test = np.load(self.Y_test, mmap_mode='r')
        print(y_pred.shape)
        self.print_stats(y_pred, y_test)

        # Show a Lime plot of the regression. The labelings will no be correct since we are using a regression model.
        X_train = np.load(self.X_train[self.index], mmap_mode='r')
        X_test = np.load(self.X_test[self.index], mmap_mode='r')
        explainer = LimeTabularExplainer(X_train, mode="regression")
        exp = explainer.explain_instance(X_test[self.text_index], reg.predict)
        exp.as_pyplot_figure()
def get_lime_scores(predictive_model, x_train, x_test):

    lime_scores = []
    FEATS = len(x_train[0])
    feat_names = ["X" + str(i) for i in range(len(x_train[0]))]
    explainer = LimeTabularExplainer(x_train, feature_names=feat_names)

    for w in range(x_test.shape[0]):
        exp = explainer.explain_instance(x_test[w],
                                         predictive_model.predict_proba,
                                         num_features=FEATS)
        rank_list = exp.as_list()

        curr_scores = [
            np.where(
                np.array([
                    pd.Series(rank_list[v][0]).str.contains('X' + str(k))[0] *
                    1 for k in range(FEATS)
                ]) == 1)[0][0] for v in range(len(rank_list))
        ]
        lime_score_ = np.zeros((1, x_train.shape[1]))
        lime_score_[0, np.array(curr_scores)] = np.array(
            [np.abs(rank_list[v][1]) for v in range(len(rank_list))])

        lime_scores.append(lime_score_)

    lime_scores = np.array(lime_scores).reshape(-1, x_train.shape[1])

    return lime_scores
示例#22
0
    def test_lime_explainer_entropy_discretizer(self):
        np.random.seed(1)

        rf = RandomForestClassifier(n_estimators=500)
        rf.fit(self.train, self.labels_train)
        i = np.random.randint(0, self.test.shape[0])

        explainer = LimeTabularExplainer(self.train,
                                         feature_names=self.feature_names,
                                         class_names=self.target_names,
                                         training_labels=self.labels_train,
                                         discretize_continuous=True,
                                         discretizer='entropy')

        exp = explainer.explain_instance(self.test[i],
                                         rf.predict_proba,
                                         num_features=2)
        self.assertIsNotNone(exp)
        keys = [x[0] for x in exp.as_list()]
        print(keys)
        self.assertEqual(1,
                         sum([1 if 'petal width' in x else 0 for x in keys]),
                         "Petal Width is a major feature")
        self.assertEqual(1,
                         sum([1 if 'petal length' in x else 0 for x in keys]),
                         "Petal Length is a major feature")
示例#23
0
    def run(self, load_data=True, tune_parameter=True):
        if load_data:
            lines, values = self.data(0, self.num_samples)
            self.vectorize_text(lines, values)

        # If tune_parameter is false, we run with our experimented parameters
        if tune_parameter:
            self.tune_parameters()
        else:
            self.index = 1
            self.param = {"alpha": 0.05,
                          "learning_rate": "invscaling", "penalty": "l2"}

        reg = self.train()
        y_pred = self.test(reg)
        print(max(y_pred))
        # Using log(y) so convert back to seconds with exp(y_pred)
        y_pred = np.expm1(y_pred)
        y_test = np.load(self.Y_test, mmap_mode='r')
        self.print_stats(y_pred, y_test)

        X_train = np.load(self.X_train[self.index], mmap_mode='r')
        X_test = np.load(self.X_test[self.index], mmap_mode='r')
        explainer = LimeTabularExplainer(X_train, mode="regression")
        exp = explainer.explain_instance(X_test[self.text_index], reg.predict)
        exp.as_pyplot_figure()
示例#24
0
 def create_model_explainer(self):
     self.explainer = LimeTabularExplainer(
         self.train,
         feature_names=self.feature_names,
         training_labels=self.labels_train,
         class_names=self.class_names,
         categorical_features=self.categorical_feature_indices,
         categorical_names=self.categorical_names,
         discretize_continuous=True)
示例#25
0
def explain():
    explainer = LimeTabularExplainer(train,
                                     class_names=class_names,
                                     feature_names=feature_names,
                                     categorical_features=categorical_features)

    return explainer.explain_instance(X.iloc[0],
                                      rf.predict_proba,
                                      num_features=4)
示例#26
0
 def _define_explainer(self):
     # define explainer
     self.explainer = LimeTabularExplainer(
         training_data=self.train_set,
         feature_names=self.input_cols,
         class_names=self.prediction_classes,
         categorical_features=self.categorical_features,
         categorical_names=self.cat_names,
         discretize_continuous=True
             )
示例#27
0
    def __init__(self, model, feature_names, classes, training_data):
        self.model = model
        self.feature_names = feature_names
        self.classes = classes
        self.training_data = training_data

        self.explainer = LimeTabularExplainer(training_data=training_data,
                                              mode='classification',
                                              feature_names=self.feature_names,
                                              class_names=self.classes)
示例#28
0
def create_lime_explanation(explainer, new_observation, **kwargs):
    # utility function for predict_surrogate(type='lime')    
    from lime.lime_tabular import LimeTabularExplainer
    explainer_dict, explanation_dict = unpack_kwargs_lime(explainer, new_observation, **kwargs)
    lime_tabular_explainer = LimeTabularExplainer(**explainer_dict)
    explanation = lime_tabular_explainer.explain_instance(**explanation_dict)
    
    explanation.plot = types.MethodType(plot_lime_custom, explanation)
    explanation.result = pd.DataFrame(explanation.as_list(), columns=['variable', 'effect'])
    return explanation
示例#29
0
def interpret_data(X, y, func):
    explainer = LimeTabularExplainer(X, discretize_continuous=False, kernel_width=3)
    times, scores = [], []
    for r_idx in range(100):
        start_time = time.time()
        explanation = explainer.explain_instance(X[r_idx, :], func)
        times.append(time.time() - start_time)
        scores.append(explanation.score)
        print('...')

    return times, scores
示例#30
0
def interpret_data(X, y, func):
    explainer = LimeTabularExplainer(X, discretize_continuous=False, kernel_width=3)
    times, scores = [], []
    for r_idx in range(100):
        start_time = time.time()
        explanation = explainer.explain_instance(X[r_idx, :], func)
        times.append(time.time() - start_time)
        scores.append(explanation.score)
        print('...')

    return times, scores
示例#31
0
 def __init__(self, random_forest_model, x_train, y_train):
     self.rf_model = random_forest_model
     self.x_train = x_train
     self.y_train = y_train
     self.columns = list(x_train.columns)
     self.explainer = LimeTabularExplainer(x_train.values,
                                           feature_names=self.columns)
     self.model = InMemoryModel(self.rf_model.predict_proba,
                                examples=self.x_train)
     self.interpreter = Interpretation(training_data=self.x_train,
                                       feature_names=self.columns,
                                       training_labels=self.y_train)
示例#32
0
    def explain_tabular(self,
                        trainset,
                        labels,
                        instance,
                        num_features=5,
                        kernel_width=3):
        """Explain categorical and numeric features for a prediction.

        It analyze the prediction by LIME, and returns a report of the most impactful tabular
        features contributing to certain labels.

        Args:
          trainset: a DataFrame representing the training features that LIME can use to decide
              value distributions.
          labels: a list of labels to explain.
          instance: the prediction instance. It needs to conform to model's input. Can be a csv
              line string, or a dict.
          num_features: maximum number of features to show.
          kernel_width: Passed to LIME LimeTabularExplainer directly.

        Returns:
          A LIME's lime.explanation.Explanation.
        """
        from lime.lime_tabular import LimeTabularExplainer

        if isinstance(instance, six.string_types):
            instance = next(
                csv.DictReader([instance], fieldnames=self._headers))

        categories = self._get_unique_categories(trainset)
        np_trainset = self._preprocess_data_for_tabular_explain(
            trainset, categories)
        predict_fn = self._make_tabular_predict_fn(labels, instance,
                                                   categories)
        prediction_df = pd.DataFrame([instance])
        prediction_instance = self._preprocess_data_for_tabular_explain(
            prediction_df, categories)

        explainer = LimeTabularExplainer(
            np_trainset,
            feature_names=(self._categorical_columns + self._numeric_columns),
            class_names=labels,
            categorical_features=range(len(categories)),
            categorical_names={i: v
                               for i, v in enumerate(categories)},
            kernel_width=kernel_width)

        exp = explainer.explain_instance(prediction_instance[0],
                                         predict_fn,
                                         num_features=num_features,
                                         labels=range(len(labels)))
        return exp
示例#33
0
文件: model.py 项目: indyfree/CARLA
    def _get_lime_coefficients(
        self, factuals: pd.DataFrame
    ) -> Tuple[np.ndarray, np.ndarray]:
        """
        Actionable Recourse is only defined on linear models. To make it work for arbitrary non-linear networks
        we need to find the lime coefficients for every instance.

        Parameters
        ----------
        factuals : pd.DataFrame
            Instances we want to get lime coefficients

        Returns
        -------
        coeffs : np.ndArray
        intercepts : np.ndArray

        """
        coeffs = np.zeros(factuals.shape)
        intercepts = []
        lime_data = self._data.df[self._mlmodel.feature_input_order]
        lime_label = self._data.df[self._data.target]

        lime_exp = LimeTabularExplainer(
            training_data=lime_data.values,
            training_labels=lime_label,
            feature_names=self._mlmodel.feature_input_order,
            discretize_continuous=self._discretize_continuous,
            sample_around_instance=self._sample_around_instance,
            categorical_names=[
                cat
                for cat in self._mlmodel.feature_input_order
                if cat not in self._data.continuous
            ]
            # self._data.encoded_normalized's categorical features contain feature name and value, separated by '_'
            # while self._data.categorical do not contain those additional values.
        )

        for index, row in factuals.iterrows():
            factual = row.values
            explanations = lime_exp.explain_instance(
                factual,
                self._mlmodel.predict_proba,
                num_features=len(self._mlmodel.feature_input_order),
            )
            intercepts.append(explanations.intercept[1])

            for tpl in explanations.local_exp[1]:
                coeffs[index][tpl[0]] = tpl[1]

        return coeffs, np.array(intercepts)
示例#34
0
    def test_lime_explainer_bad_regressor(self):
        iris = load_iris()
        train, test, labels_train, labels_test = sklearn.cross_validation.train_test_split(iris.data, iris.target,
                                                                                           train_size=0.80)

        rf = RandomForestClassifier(n_estimators=500)
        rf.fit(train, labels_train)
        lasso = Lasso(alpha=1, fit_intercept=True)
        i = np.random.randint(0, test.shape[0])
        with self.assertRaises(TypeError):
            explainer = LimeTabularExplainer(train, feature_names=iris.feature_names,
                                                               class_names=iris.target_names,
                                                               discretize_continuous=True)

            exp = explainer.explain_instance(test[i], rf.predict_proba, num_features=2, top_labels=1, model_regressor=lasso)
示例#35
0
    def test_lime_explainer_no_regressor(self):
        np.random.seed(1)
        iris = load_iris()
        train, test, labels_train, labels_test = sklearn.cross_validation.train_test_split(iris.data, iris.target,
                                                                                           train_size=0.80)

        rf = RandomForestClassifier(n_estimators=500)
        rf.fit(train, labels_train)
        i = np.random.randint(0, test.shape[0])

        explainer = LimeTabularExplainer(train, feature_names=iris.feature_names,
                                                           class_names=iris.target_names, discretize_continuous=True)

        exp = explainer.explain_instance(test[i], rf.predict_proba, num_features=2)
        self.assertIsNotNone(exp)
示例#36
0
    def test_lime_explainer_bad_regressor(self):

        rf = RandomForestClassifier(n_estimators=500)
        rf.fit(self.train, self.labels_train)
        lasso = Lasso(alpha=1, fit_intercept=True)
        i = np.random.randint(0, self.test.shape[0])
        with self.assertRaises(TypeError):
            explainer = LimeTabularExplainer(self.train,
                                             mode="classification",
                                             feature_names=self.feature_names,
                                             class_names=self.target_names,
                                             discretize_continuous=True)
            exp = explainer.explain_instance(self.test[i],  # noqa:F841
                                             rf.predict_proba,
                                             num_features=2, top_labels=1,
                                             model_regressor=lasso)
    def explain_tabular(self, trainset, labels, instance, num_features=5, kernel_width=3):
        """Explain categorical and numeric features for a prediction.

        It analyze the prediction by LIME, and returns a report of the most impactful tabular
        features contributing to certain labels.

        Args:
          trainset: a DataFrame representing the training features that LIME can use to decide
              value distributions.
          labels: a list of labels to explain.
          instance: the prediction instance. It needs to conform to model's input. Can be a csv
              line string, or a dict.
          num_features: maximum number of features to show.
          kernel_width: Passed to LIME LimeTabularExplainer directly.

        Returns:
          A LIME's lime.explanation.Explanation.
        """
        from lime.lime_tabular import LimeTabularExplainer

        if isinstance(instance, six.string_types):
            instance = next(csv.DictReader([instance], fieldnames=self._headers))

        categories = self._get_unique_categories(trainset)
        np_trainset = self._preprocess_data_for_tabular_explain(trainset, categories)
        predict_fn = self._make_tabular_predict_fn(labels, instance, categories)
        prediction_df = pd.DataFrame([instance])
        prediction_instance = self._preprocess_data_for_tabular_explain(prediction_df, categories)

        explainer = LimeTabularExplainer(
            np_trainset,
            feature_names=(self._categorical_columns + self._numeric_columns),
            class_names=labels,
            categorical_features=range(len(categories)),
            categorical_names={i: v for i, v in enumerate(categories)},
            kernel_width=kernel_width)

        exp = explainer.explain_instance(
            prediction_instance[0],
            predict_fn,
            num_features=num_features,
            labels=range(len(labels)))
        return exp
示例#38
0
    def test_lime_explainer_good_regressor_synthetic_data(self):
        X, y = make_classification(n_samples=1000,
                                   n_features=20,
                                   n_informative=2,
                                   n_redundant=2,
                                   random_state=10)

        rf = RandomForestClassifier(n_estimators=500)
        rf.fit(X, y)
        instance = np.random.randint(0, X.shape[0])
        feature_names = ["feature" + str(i) for i in range(20)]
        explainer = LimeTabularExplainer(X,
                                         feature_names=feature_names,
                                         discretize_continuous=True)

        exp = explainer.explain_instance(X[instance], rf.predict_proba)

        self.assertIsNotNone(exp)
        self.assertEqual(10, len(exp.as_list()))
示例#39
0
from lime.lime_tabular import LimeTabularExplainer
import numpy as np
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import train_test_split
from sklearn.datasets import load_boston

boston = load_boston()

x_train, x_test, y_train, y_test = train_test_split(boston.data, boston.target, train_size=0.8)

rf = RandomForestRegressor(n_estimators=1000)

rf.fit(x_train, y_train)

categorical_features = np.argwhere(np.array([len(set(boston.data[:,x])) for x in range(boston.data.shape[1])]) <= 10).flatten()
explainer = LimeTabularExplainer(x_train, categorical_features=categorical_features, feature_names=boston.feature_names, class_names=['price'], verbose=True, mode='regression')

exp = explainer.explain_instance(x_test[0], rf.predict, num_features=5)

print(exp.as_list())
示例#40
0
    def test_lime_tabular_explainer_not_equal_random_state(self):
        X, y = make_classification(n_samples=1000,
                                   n_features=20,
                                   n_informative=2,
                                   n_redundant=2,
                                   random_state=10)

        rf = RandomForestClassifier(n_estimators=500, random_state=10)
        rf.fit(X, y)
        instance = np.random.RandomState(10).randint(0, X.shape[0])
        feature_names = ["feature" + str(i) for i in range(20)]

        # ----------------------------------------------------------------------
        # -------------------------Quartile Discretizer-------------------------
        # ----------------------------------------------------------------------

        # ---------------------------------[1]----------------------------------
        discretizer = QuartileDiscretizer(X, [], feature_names, y,
                                          random_state=20)
        explainer_1 = LimeTabularExplainer(X,
                                           feature_names=feature_names,
                                           discretize_continuous=True,
                                           discretizer=discretizer,
                                           random_state=10)
        exp_1 = explainer_1.explain_instance(X[instance], rf.predict_proba,
                                             num_samples=500)

        discretizer = QuartileDiscretizer(X, [], feature_names, y,
                                          random_state=10)
        explainer_2 = LimeTabularExplainer(X,
                                           feature_names=feature_names,
                                           discretize_continuous=True,
                                           discretizer=discretizer,
                                           random_state=10)
        exp_2 = explainer_2.explain_instance(X[instance], rf.predict_proba,
                                             num_samples=500)

        self.assertTrue(exp_1.as_map() != exp_2.as_map())

        # ---------------------------------[2]----------------------------------
        discretizer = QuartileDiscretizer(X, [], feature_names, y,
                                          random_state=20)
        explainer_1 = LimeTabularExplainer(X,
                                           feature_names=feature_names,
                                           discretize_continuous=True,
                                           discretizer=discretizer,
                                           random_state=20)
        exp_1 = explainer_1.explain_instance(X[instance], rf.predict_proba,
                                             num_samples=500)

        discretizer = QuartileDiscretizer(X, [], feature_names, y,
                                          random_state=10)
        explainer_2 = LimeTabularExplainer(X,
                                           feature_names=feature_names,
                                           discretize_continuous=True,
                                           discretizer=discretizer,
                                           random_state=10)
        exp_2 = explainer_2.explain_instance(X[instance], rf.predict_proba,
                                             num_samples=500)

        self.assertTrue(exp_1.as_map() != exp_2.as_map())

        # ---------------------------------[3]----------------------------------
        discretizer = QuartileDiscretizer(X, [], feature_names, y,
                                          random_state=20)
        explainer_1 = LimeTabularExplainer(X,
                                           feature_names=feature_names,
                                           discretize_continuous=True,
                                           discretizer=discretizer,
                                           random_state=20)
        exp_1 = explainer_1.explain_instance(X[instance], rf.predict_proba,
                                             num_samples=500)

        discretizer = QuartileDiscretizer(X, [], feature_names, y,
                                          random_state=20)
        explainer_2 = LimeTabularExplainer(X,
                                           feature_names=feature_names,
                                           discretize_continuous=True,
                                           discretizer=discretizer,
                                           random_state=10)
        exp_2 = explainer_2.explain_instance(X[instance], rf.predict_proba,
                                             num_samples=500)

        self.assertTrue(exp_1.as_map() != exp_2.as_map())

        # ---------------------------------[4]----------------------------------
        discretizer = QuartileDiscretizer(X, [], feature_names, y,
                                          random_state=20)
        explainer_1 = LimeTabularExplainer(X,
                                           feature_names=feature_names,
                                           discretize_continuous=True,
                                           discretizer=discretizer,
                                           random_state=20)
        exp_1 = explainer_1.explain_instance(X[instance], rf.predict_proba,
                                             num_samples=500)

        discretizer = QuartileDiscretizer(X, [], feature_names, y,
                                          random_state=20)
        explainer_2 = LimeTabularExplainer(X,
                                           feature_names=feature_names,
                                           discretize_continuous=True,
                                           discretizer=discretizer,
                                           random_state=20)
        exp_2 = explainer_2.explain_instance(X[instance], rf.predict_proba,
                                             num_samples=500)

        self.assertFalse(exp_1.as_map() != exp_2.as_map())

        # ----------------------------------------------------------------------
        # --------------------------Decile Discretizer--------------------------
        # ----------------------------------------------------------------------

        # ---------------------------------[1]----------------------------------
        discretizer = DecileDiscretizer(X, [], feature_names, y,
                                        random_state=20)
        explainer_1 = LimeTabularExplainer(X,
                                           feature_names=feature_names,
                                           discretize_continuous=True,
                                           discretizer=discretizer,
                                           random_state=10)
        exp_1 = explainer_1.explain_instance(X[instance], rf.predict_proba,
                                             num_samples=500)

        discretizer = DecileDiscretizer(X, [], feature_names, y,
                                        random_state=10)
        explainer_2 = LimeTabularExplainer(X,
                                           feature_names=feature_names,
                                           discretize_continuous=True,
                                           discretizer=discretizer,
                                           random_state=10)
        exp_2 = explainer_2.explain_instance(X[instance], rf.predict_proba,
                                             num_samples=500)

        self.assertTrue(exp_1.as_map() != exp_2.as_map())

        # ---------------------------------[2]----------------------------------
        discretizer = DecileDiscretizer(X, [], feature_names, y,
                                        random_state=20)
        explainer_1 = LimeTabularExplainer(X,
                                           feature_names=feature_names,
                                           discretize_continuous=True,
                                           discretizer=discretizer,
                                           random_state=20)
        exp_1 = explainer_1.explain_instance(X[instance], rf.predict_proba,
                                             num_samples=500)

        discretizer = DecileDiscretizer(X, [], feature_names, y,
                                        random_state=10)
        explainer_2 = LimeTabularExplainer(X,
                                           feature_names=feature_names,
                                           discretize_continuous=True,
                                           discretizer=discretizer,
                                           random_state=10)
        exp_2 = explainer_2.explain_instance(X[instance], rf.predict_proba,
                                             num_samples=500)

        self.assertTrue(exp_1.as_map() != exp_2.as_map())

        # ---------------------------------[3]----------------------------------
        discretizer = DecileDiscretizer(X, [], feature_names, y,
                                        random_state=20)
        explainer_1 = LimeTabularExplainer(X,
                                           feature_names=feature_names,
                                           discretize_continuous=True,
                                           discretizer=discretizer,
                                           random_state=20)
        exp_1 = explainer_1.explain_instance(X[instance], rf.predict_proba,
                                             num_samples=500)

        discretizer = DecileDiscretizer(X, [], feature_names, y,
                                        random_state=20)
        explainer_2 = LimeTabularExplainer(X,
                                           feature_names=feature_names,
                                           discretize_continuous=True,
                                           discretizer=discretizer,
                                           random_state=10)
        exp_2 = explainer_2.explain_instance(X[instance], rf.predict_proba,
                                             num_samples=500)

        self.assertTrue(exp_1.as_map() != exp_2.as_map())

        # ---------------------------------[4]----------------------------------
        discretizer = DecileDiscretizer(X, [], feature_names, y,
                                        random_state=20)
        explainer_1 = LimeTabularExplainer(X,
                                           feature_names=feature_names,
                                           discretize_continuous=True,
                                           discretizer=discretizer,
                                           random_state=20)
        exp_1 = explainer_1.explain_instance(X[instance], rf.predict_proba,
                                             num_samples=500)

        discretizer = DecileDiscretizer(X, [], feature_names, y,
                                        random_state=20)
        explainer_2 = LimeTabularExplainer(X,
                                           feature_names=feature_names,
                                           discretize_continuous=True,
                                           discretizer=discretizer,
                                           random_state=20)
        exp_2 = explainer_2.explain_instance(X[instance], rf.predict_proba,
                                             num_samples=500)

        self.assertFalse(exp_1.as_map() != exp_2.as_map())

        # ----------------------------------------------------------------------
        # --------------------------Entropy Discretizer-------------------------
        # ----------------------------------------------------------------------

        # ---------------------------------[1]----------------------------------
        discretizer = EntropyDiscretizer(X, [], feature_names, y,
                                         random_state=20)
        explainer_1 = LimeTabularExplainer(X,
                                           feature_names=feature_names,
                                           discretize_continuous=True,
                                           discretizer=discretizer,
                                           random_state=10)
        exp_1 = explainer_1.explain_instance(X[instance], rf.predict_proba,
                                             num_samples=500)

        discretizer = EntropyDiscretizer(X, [], feature_names, y,
                                         random_state=10)
        explainer_2 = LimeTabularExplainer(X,
                                           feature_names=feature_names,
                                           discretize_continuous=True,
                                           discretizer=discretizer,
                                           random_state=10)
        exp_2 = explainer_2.explain_instance(X[instance], rf.predict_proba,
                                             num_samples=500)

        self.assertTrue(exp_1.as_map() != exp_2.as_map())

        # ---------------------------------[2]----------------------------------
        discretizer = EntropyDiscretizer(X, [], feature_names, y,
                                         random_state=20)
        explainer_1 = LimeTabularExplainer(X,
                                           feature_names=feature_names,
                                           discretize_continuous=True,
                                           discretizer=discretizer,
                                           random_state=20)
        exp_1 = explainer_1.explain_instance(X[instance], rf.predict_proba,
                                             num_samples=500)

        discretizer = EntropyDiscretizer(X, [], feature_names, y,
                                         random_state=10)
        explainer_2 = LimeTabularExplainer(X,
                                           feature_names=feature_names,
                                           discretize_continuous=True,
                                           discretizer=discretizer,
                                           random_state=10)
        exp_2 = explainer_2.explain_instance(X[instance], rf.predict_proba,
                                             num_samples=500)

        self.assertTrue(exp_1.as_map() != exp_2.as_map())

        # ---------------------------------[3]----------------------------------
        discretizer = EntropyDiscretizer(X, [], feature_names, y,
                                         random_state=20)
        explainer_1 = LimeTabularExplainer(X,
                                           feature_names=feature_names,
                                           discretize_continuous=True,
                                           discretizer=discretizer,
                                           random_state=20)
        exp_1 = explainer_1.explain_instance(X[instance], rf.predict_proba,
                                             num_samples=500)

        discretizer = EntropyDiscretizer(X, [], feature_names, y,
                                         random_state=20)
        explainer_2 = LimeTabularExplainer(X,
                                           feature_names=feature_names,
                                           discretize_continuous=True,
                                           discretizer=discretizer,
                                           random_state=10)
        exp_2 = explainer_2.explain_instance(X[instance], rf.predict_proba,
                                             num_samples=500)

        self.assertTrue(exp_1.as_map() != exp_2.as_map())

        # ---------------------------------[4]----------------------------------
        discretizer = EntropyDiscretizer(X, [], feature_names, y,
                                         random_state=20)
        explainer_1 = LimeTabularExplainer(X,
                                           feature_names=feature_names,
                                           discretize_continuous=True,
                                           discretizer=discretizer,
                                           random_state=20)
        exp_1 = explainer_1.explain_instance(X[instance], rf.predict_proba,
                                             num_samples=500)

        discretizer = EntropyDiscretizer(X, [], feature_names, y,
                                         random_state=20)
        explainer_2 = LimeTabularExplainer(X,
                                           feature_names=feature_names,
                                           discretize_continuous=True,
                                           discretizer=discretizer,
                                           random_state=20)
        exp_2 = explainer_2.explain_instance(X[instance], rf.predict_proba,
                                             num_samples=500)

        self.assertFalse(exp_1.as_map() != exp_2.as_map())
示例#41
0
    def test_lime_explainer_with_data_stats(self):
        np.random.seed(1)

        rf = RandomForestClassifier(n_estimators=500)
        rf.fit(self.train, self.labels_train)
        i = np.random.randint(0, self.test.shape[0])

        # Generate stats using a quartile descritizer
        descritizer = QuartileDiscretizer(self.train, [], self.feature_names, self.target_names,
                                          random_state=20)

        d_means = descritizer.means
        d_stds = descritizer.stds
        d_mins = descritizer.mins
        d_maxs = descritizer.maxs
        d_bins = descritizer.bins(self.train, self.target_names)

        # Compute feature values and frequencies of all columns
        cat_features = np.arange(self.train.shape[1])
        discretized_training_data = descritizer.discretize(self.train)

        feature_values = {}
        feature_frequencies = {}
        for feature in cat_features:
            column = discretized_training_data[:, feature]
            feature_count = collections.Counter(column)
            values, frequencies = map(list, zip(*(feature_count.items())))
            feature_values[feature] = values
            feature_frequencies[feature] = frequencies

        # Convert bins to list from array
        d_bins_revised = {}
        index = 0
        for bin in d_bins:
            d_bins_revised[index] = bin.tolist()
            index = index+1

        # Descritized stats
        data_stats = {}
        data_stats["means"] = d_means
        data_stats["stds"] = d_stds
        data_stats["maxs"] = d_maxs
        data_stats["mins"] = d_mins
        data_stats["bins"] = d_bins_revised
        data_stats["feature_values"] = feature_values
        data_stats["feature_frequencies"] = feature_frequencies

        data = np.zeros((2, len(self.feature_names)))
        explainer = LimeTabularExplainer(
            data, feature_names=self.feature_names, random_state=10,
            training_data_stats=data_stats, training_labels=self.target_names)

        exp = explainer.explain_instance(self.test[i],
                                         rf.predict_proba,
                                         num_features=2,
                                         model_regressor=LinearRegression())

        self.assertIsNotNone(exp)
        keys = [x[0] for x in exp.as_list()]
        self.assertEqual(1,
                         sum([1 if 'petal width' in x else 0 for x in keys]),
                         "Petal Width is a major feature")
        self.assertEqual(1,
                         sum([1 if 'petal length' in x else 0 for x in keys]),
                         "Petal Length is a major feature")