Пример #1
0
    def init_lime(self):
        """
        Initializes a LIME explainer that can later be used for local interpretations
        for this model.
        :return: void (Sets the value for lime_explainer)
        """
        from lime.lime_tabular import LimeTabularExplainer
        from util.commons import RANDOM_NUMBER, convert_to_lime_format

        if not self.lime_explainer:

            log.info(
                "Initializing LIME - generating new explainer."
                " This operation may be time-consuming so please be patient.")

            # Transform the categorical feature's labels to a lime-readable format.
            categorical_names = self.idx2ohe
            log.debug(
                "Categorical names for lime: {}".format(categorical_names))

            explainer = LimeTabularExplainer(
                convert_to_lime_format(self.X_test, categorical_names).values,
                mode="classification",
                feature_names=self.X_test.columns.tolist(),
                categorical_names=categorical_names,
                categorical_features=categorical_names.keys(),
                discretize_continuous=True,
                random_state=RANDOM_NUMBER)

            self.lime_explainer = explainer
        else:
            log.info("LIME is already initialized.")
Пример #2
0
    def get_local_interpretation(ID_client, dataframe, modelname,
                                 features_importances, label):

        model = load_model(modelname)
        X = dataframe[dataframe['SK_ID_CURR'] == int(ID_client)]
        X = X.drop(['SK_ID_CURR', 'TARGET'], axis=1)
        dataframe = dataframe.drop(['SK_ID_CURR', 'TARGET'], axis=1)

        X_train = dataframe.sample(frac=0.1, random_state=42).values

        explainer = LimeTabularExplainer(
            training_data=X_train,
            mode='classification',
            feature_names=dataframe.columns,
            training_labels=dataframe.columns.tolist(),
            verbose=1,
            random_state=42)
        #st.write(np.array(X))
        #st.write(type(np.array(X)))
        explanation = explainer.explain_instance(
            np.ravel(np.array(X)),
            predict_fn=model.predict_proba,
            labels=[0, 1],
            num_features=len(dataframe.columns))

        #fig = explanation.as_pyplot_figure(label=label)
        #st.pyplot(fig)

        return explanation
Пример #3
0
    def lime(self, instance=None, html_file=False, num_features=2):
        """

        :param instance:
        :param html_file:
        :param num_features:
        :return:
        """
        explainer = LimeTabularExplainer(self.x_train.values,
                                         mode="classification",
                                         feature_names=self.x_train.columns,
                                         class_names=['false', 'true'],
                                         training_labels=self.y_train,
                                         discretize_continuous=True)
        if not instance:
            instance = np.random.randint(0, self.x_test.shape[0])
            print('Case:  ' + str(instance))
            print('Label: ' + str(self.y_test.iloc[instance]))

        exp = explainer.explain_instance(self.x_test.values[instance],
                                         self.model.predict_proba,
                                         num_features=num_features)
        print("Lime explanation: ")
        exp.as_pyplot_figure(label=1).show()
        if html_file:
            exp.save_to_file(
                str(instance) + "_" + str(self.y_test.iloc[instance]) +
                "_explain.html")
Пример #4
0
def lime():

    print('Loading dataset ...')
    X_train, Y_train, X_val, Y_val, X_test, Y_test = get_dataset(
        minibatch_size=32, sampling='None', numpy='True')

    net = load_fraudnet()
    lime_list = []
    explainer = LimeTabularExplainer(X_train, training_labels=Y_train)

    def func_call(x):
        print(x.shape)
        input_ = torch.from_numpy(x).to(device=device).float()
        prob_1 = net(input_).view(-1, 1).cpu().data.numpy()
        prob_0 = 1 - prob_1
        prob = np.concatenate([prob_0, prob_1], axis=1)
        return prob

    for i in range(X_test.shape[0]):

        exp = explainer.explain_instance(X_test[i, :],
                                         func_call,
                                         labels=(0, 1),
                                         num_features=50)
        lime_list.append(exp)

    lime_list = np.array(lime_list)
    lime_list = preprocess(lime_list)
    pickle.dump(lime_list, open('./saved_attributions/lime.pkl', 'wb'))
Пример #5
0
    def generate_neighborhood_data(self,
                                   sample,
                                   predict_fn,
                                   distance_metric='euclidean',
                                   n_samples=500,
                                   seed=1,
                                   **kwargs):
        '''Generate neighborhood data for a given point (currently using LIME)

        Args:
            train_data: Training data predict_fn was trained on
            sample: Observed sample
            predict_fn: Black box predictor to predict all points
            distance_metric: Distance metric used for weights
            n_samples: Number of samples to generate

        Returns:
            neighor_data (xs around sample),
            weights (weights of instances in xs),
            neighor_data_labels (ys around sample, corresponding to xs)
        '''
        from lime.lime_tabular import LimeTabularExplainer
        e = LimeTabularExplainer(
            self.train_data,
            categorical_features=self.categorical_features,
            discretize_continuous=False)

        _, neighbor_data = e._LimeTabularExplainer__data_inverse(
            sample, n_samples)
        scaled_data = (neighbor_data - e.scaler.mean_) / e.scaler.scale_
        return (*self._data(neighbor_data, scaled_data, distance_metric,
                            predict_fn), sample)
Пример #6
0
    def fit(self, X, y, predict_fn, labels_num):
        self.cluster_labels = self.cluster_method.fit_predict(X)
        #print(X.shape[1])

        for i in range(self.cluster_num):
            inds = np.where(self.cluster_labels == i)
            explainer = LimeTabularExplainer(X[inds],
                                             discretize_continuous=False,
                                             sample_around_instance=True)
            #print(np.squeeze(X[inds, :]))
            #print (self.cluster_method.cluster_centers_[i])
            #time1=time.clock()
            simplified_models = explainer.explain_instance(
                self.cluster_method.cluster_centers_[i],
                predict_fn,
                num_samples=10000,
                labels=range(labels_num),
                num_features=X.shape[1],
                retrive_model=True)
            #print(type(simplified_models))
            coef_ = np.zeros((X.shape[1], labels_num))
            intercept_ = np.zeros((1, labels_num))
            #time2=time.clock()
            #time3 = time2-time1
            #print("explain_instance")
            #print(time3)
            for idx in range(labels_num):
                coef_[:, idx] = simplified_models[idx].coef_
                intercept_[0, idx] = simplified_models[idx].intercept_

            self.models.append((coef_, intercept_))
Пример #7
0
def interpret_model(dataframe, feature_set, model):
    """ dataframe - Specify the Name of the dataframe
      feature_set - The set of features you want to use(list)
      models- Should Be in a dictionary form where model should be a function passed as a value with the name of model as the key of dict
      wrong_predictions=True (Change to `false` if u want to only see the correct classification results for the model)
  """
    X = dataframe[feature_set]
    y = dataframe['Default_Status']
    train_X, test_X, train_y, test_y = train_test_split(X, y, random_state=20)

    # =============================================================================
    #   model.fit(train_X,train_y)
    #   model_preds=model.predict(test_X)
    # =============================================================================

    from lime.lime_tabular import LimeTabularExplainer
    class_names = ['Wont Default', 'Will Default']
    #instantiate the explanations for the data set
    limeexplainer = LimeTabularExplainer(train_X.values,
                                         class_names=class_names,
                                         feature_names=feature_set,
                                         kernel_width=3,
                                         verbose=False,
                                         mode='classification')
    return limeexplainer
Пример #8
0
 def fit(self, X, y=None):
     self.explainer_ = LimeTabularExplainer(
         X,
         feature_names=self.feature_names,
         class_names=self.class_names,
         discretize_continuous=True)
     return self
Пример #9
0
 def fit(self, X: Any, class_names: List[str] = None) -> None:
     if class_names is None:
         class_names = ['0', '1']
     self._explainer = LimeTabularExplainer(
         training_data=X, feature_names=list(range(X.shape[1])),
         class_names=class_names, discretize_continuous=False,
         random_state=self._seed)
def lime_interpreter(dataset_features,
                     x_train,
                     x_test,
                     classifier,
                     model_name,
                     rng=True,
                     instance=None):
    feature_names = ["f" + str(i) for i in range(dataset_features)]  #
    explainer = LimeTabularExplainer(x_train,
                                     feature_names=feature_names,
                                     discretize_continuous=True)

    def wrapped_fn(x_test):
        p = classifier.predict_proba(x_test).toarray()
        p_norm = norm_probabilities(p)
        return p_norm

    if rng:
        idx = np.random.randint(0, x_test.shape[0])
    else:
        idx = instance
    exp = explainer.explain_instance(x_test[idx], predict_fn=wrapped_fn)
    exp.save_to_file(model_name + '.html')
    print(
        "Iterpretation can be found as an HTML file in the currect directory, named :"
    )
    print(model_name)
    print("")
Пример #11
0
    def fit(self, X, y, lemna_component, predict_fn, labels_num):
        self.cluster_labels = self.cluster_method.fit_predict(X)
        self.num_features = X.shape[1]

        for i in range(self.cluster_num):
            inds = np.where(self.cluster_labels == i)
            explainer = LimeTabularExplainer(np.squeeze(X[inds, :]),
                                             discretize_continuous=False,
                                             sample_around_instance=True)

            simplified_models = explainer.explain_instance_with_lemna(
                self.cluster_method.cluster_centers_[i],
                predict_fn,
                lemna_component=lemna_component,
                num_samples=5000,
                labels=range(labels_num),
                num_features=X.shape[1],
                retrive_model=True)

            # coef_ is a 3-d matrix feature_num * lemna_component * labels_num
            # intercept is a 2-d matrix lemna_component * labels_num
            coef_ = np.zeros((X.shape[1], lemna_component, labels_num))
            intercept_ = np.zeros((1, lemna_component, labels_num))

            for idx in range(labels_num):
                coef_[:, :, idx] = simplified_models[idx].coef_
                intercept_[0, :, idx] = simplified_models[idx].intercept_
                pi_ = simplified_models[idx].pi_

            self.models.append((coef_, intercept_, pi_))
Пример #12
0
def lime_tabular_global():
    targets = ['academic', 'fiction', 'magazine', 'newspaper']
    data = pd.read_pickle('data_explain_tabular.pkl')
    clf = joblib.load('model_forest_tabular.pkl')
    feature_names = list(data)
    target = np.array(data['target'])
    data = data.drop(['target', 'year', 'ID'], axis=1).as_matrix()
    explainer = LimeTabularExplainer(data, feature_names=feature_names, 
                                     class_names=targets)
    N = data.shape[0]
    academic, fiction, magazine, newspaper = ([],[],[],[])
    academic_w, fiction_w, magazine_w, newspaper_w = ([],[],[],[])
    for i in range(N):
        pred = clf.predict(data[i].reshape(1,-1))[0]
        if pred == target[i]:
            explanation = explainer.explain_instance(data[i], 
                                                     clf.predict_proba,
                                                     num_features=10,
                                                     top_labels=4)
            result = explanation.as_list(label=pred)
            if 0 == target[i]:
                academic.append((result, pred))
            elif 1 == target[i]:
                fiction.append((result, pred))
            elif 2 == target[i]:
                magazine.append((result, pred))
            elif 3 == target[i]:
                newspaper.append((result, pred))
            else:
                return 1
        else:
            explanation = explainer.explain_instance(data[i], 
                                                     clf.predict_proba,
                                                     num_features=10,
                                                     top_labels=4)
            result = explanation.as_list(label=pred)
            if 0 == target[i]:
                academic_w.append((result, pred))
            elif 1 == target[i]:
                fiction_w.append((result, pred))
            elif 2 == target[i]:
                magazine_w.append((result, pred))
            elif 3 == target[i]:
                newspaper_w.append((result, pred))
            else:
                return 1

    joblib.dump(academic, 'lime_academic.pkl')
    joblib.dump(fiction, 'lime_fiction.pkl')
    joblib.dump(magazine, 'lime_magazine.pkl')
    joblib.dump(newspaper, 'lime_newspaper.pkl')
    all_explanations = academic + fiction + magazine + newspaper
    joblib.dump(all_explanations, 'lime_all.pkl')

    joblib.dump(academic_w, 'lime_academic_wrong.pkl')
    joblib.dump(fiction_w, 'lime_fiction_wrong.pkl')
    joblib.dump(magazine_w, 'lime_magazine_wrong.pkl')
    joblib.dump(newspaper_w, 'lime_newspaper_wrong.pkl')
    all_explanations_w = academic_w + fiction_w + magazine_w + newspaper_w
    joblib.dump(all_explanations_w, 'lime_all_wrong.pkl')
Пример #13
0
    def test_lime_explainer_entropy_discretizer(self):
        np.random.seed(1)

        rf = RandomForestClassifier(n_estimators=500)
        rf.fit(self.train, self.labels_train)
        i = np.random.randint(0, self.test.shape[0])

        explainer = LimeTabularExplainer(self.train,
                                         feature_names=self.feature_names,
                                         class_names=self.target_names,
                                         training_labels=self.labels_train,
                                         discretize_continuous=True,
                                         discretizer='entropy')

        exp = explainer.explain_instance(self.test[i],
                                         rf.predict_proba,
                                         num_features=2)
        self.assertIsNotNone(exp)
        keys = [x[0] for x in exp.as_list()]
        print(keys)
        self.assertEqual(1,
                         sum([1 if 'petal width' in x else 0 for x in keys]),
                         "Petal Width is a major feature")
        self.assertEqual(1,
                         sum([1 if 'petal length' in x else 0 for x in keys]),
                         "Petal Length is a major feature")
Пример #14
0
    def testFeatureNamesAndCategoricalFeats(self):
        training_data = np.array([[0., 1.], [1., 0.]])

        explainer = LimeTabularExplainer(training_data=training_data)
        self.assertEqual(explainer.feature_names, ['0', '1'])
        self.assertEqual(explainer.categorical_features, [0, 1])

        explainer = LimeTabularExplainer(training_data=training_data,
                                         feature_names=np.array(['one',
                                                                 'two']))
        self.assertEqual(explainer.feature_names, ['one', 'two'])

        explainer = LimeTabularExplainer(training_data=training_data,
                                         categorical_features=np.array([0]),
                                         discretize_continuous=False)
        self.assertEqual(explainer.categorical_features, [0])
Пример #15
0
def explain():
    try:
        with open(CFG.TRAINING, 'rb') as f:
            training = pickle.load(f)
        my_json = request.get_json()
        encoded_dict = convert_json(my_json)
        dictionary = eval(encoded_dict)

        normalize_age_mons = age_mons_preprocessing.transform(
            [[dictionary['age_month']]])[0, 0]

        dictionary['age_month'] = normalize_age_mons
        pred = np.array([x[1] for x in dictionary.items()])

        exp = LimeTabularExplainer(training.values,
                                   feature_names=training.columns,
                                   discretize_continuous=True)

        fig = exp.explain_instance(pred,
                                   model.predict_proba).as_pyplot_figure()
        fig.figsize = (30, 10)
        plt.tight_layout()
        plt.savefig('explain.png')

        return send_file('explain.png',
                         mimetype='image/png',
                         as_attachment=True)

    except ValueError:
        return 'Bad Request', 400
Пример #16
0
    def __init__(self,
                 bb_classifier,
                 X,
                 class_names,
                 explanation_samples=5000):
        self.bb_classifier = bb_classifier
        self.EX, self.StdX = np.mean(X), np.array(np.std(X, axis=0, ddof=0))
        self.class_names = class_names
        self.F = X.shape[1]  # number of features
        self.explanation_samples = explanation_samples

        # SHAP Kernel
        self.SHAPEXPL = shap.KernelExplainer(self.bb_classifier.predict_proba,
                                             self.EX,
                                             nsamples=explanation_samples)

        # LIME Kernel
        self.LIMEEXPL = LimeTabularExplainer(
            X.astype('float'),
            feature_names=X.columns.tolist(),
            class_names=self.class_names,
            discretize_continuous=False,
            sample_around_instance=True,
            # categorical_features=categorical_features,
            # feature_selection='highest_weights',
            # sample_using_pca=False,
            # weight_classifier_labels=False,
            random_state=10)
        self.metrics = None
        self.lime_avg_jaccard_bin = self.lime_std_jaccard_bin = None
        self.shap_avg_jaccard_bin = self.shap_std_jaccard_bin = None
Пример #17
0
    def run(self, load_data=True, tune_parameter=True):
        if load_data:
            lines, values = self.data(0, self.num_samples)
            self.vectorize_text(lines, values)

        # If tune_parameter is false, we run with our experimented parameters
        if tune_parameter:
            self.tune_parameters()
        else:
            self.index = 1
            self.param = {"alpha": 0.05,
                          "learning_rate": "invscaling", "penalty": "l2"}

        reg = self.train()
        y_pred = self.test(reg)
        print(max(y_pred))
        # Using log(y) so convert back to seconds with exp(y_pred)
        y_pred = np.expm1(y_pred)
        y_test = np.load(self.Y_test, mmap_mode='r')
        self.print_stats(y_pred, y_test)

        X_train = np.load(self.X_train[self.index], mmap_mode='r')
        X_test = np.load(self.X_test[self.index], mmap_mode='r')
        explainer = LimeTabularExplainer(X_train, mode="regression")
        exp = explainer.explain_instance(X_test[self.text_index], reg.predict)
        exp.as_pyplot_figure()
def explain_with_lime(X_test, model, model_name, encoder, categorical_features_indices, categorical_encoding,
                      class_names, feature_names, test_instance=10):
    """Explain a prediction from the test set with a trained model."""
    columns = X_test.columns.tolist()

    predict_fn = lambda x: model.predict_proba(encoder.transform(pd.DataFrame(x, columns=columns)).astype(float))

    explainer = LimeTabularExplainer(X_test.to_numpy(),
                                     mode="classification",
                                     feature_names=feature_names,
                                     class_names=class_names,
                                     categorical_features=categorical_features_indices,
                                     categorical_names=categorical_encoding,
                                     kernel_width=3)

    # might set seed?
    explanation = explainer.explain_instance(X_test.iloc[test_instance, :], predict_fn, num_features=5)

    # Show and save explanation
    # explanation.save_to_file(PATHS["03_data_outputs"] + "lime.html")

    explanation.as_pyplot_figure()
    plt.tight_layout()
    plt.savefig(PATHS["03_data_outputs"] + model_name + "_lime_plot.png")
    plt.close()

    # access the coefficients, the intercept and the R squared of the linear model
    print("Coefficients of linear model: ", explanation.local_exp)
    print("\n")
    print("Intercept: ", explanation.intercept)
    print("\n")
    print("R-squared: ", explanation.score)
Пример #19
0
    def run(self, load_data=True, tune_parameter=True):
        if load_data:
            lines, values = self.data(0, self.num_samples)
            self.vectorize_text(lines, values)

        # If tune_parameter is false, we run with our experimented parameters
        if tune_parameter:
            self.tune_parameters()
        else:
            self.index = 0
            self.param = {
                "alpha": 0.1,
                "learning_rate": "invscaling",
                "penalty": "l2"
            }

        reg = self.train()
        print(reg.densify())
        y_pred = self.test(reg)
        y_test = np.load(self.Y_test, mmap_mode='r')
        print(y_pred.shape)
        self.print_stats(y_pred, y_test)

        # Show a Lime plot of the regression. The labelings will no be correct since we are using a regression model.
        X_train = np.load(self.X_train[self.index], mmap_mode='r')
        X_test = np.load(self.X_test[self.index], mmap_mode='r')
        explainer = LimeTabularExplainer(X_train, mode="regression")
        exp = explainer.explain_instance(X_test[self.text_index], reg.predict)
        exp.as_pyplot_figure()
Пример #20
0
    def __init__(
        self,
        predict_fn,
        data,
        sampler=None,
        feature_names=None,
        feature_types=None,
        explain_kwargs={},
        n_jobs=1,
        **kwargs
    ):

        self.data, _, self.feature_names, self.feature_types = unify_data(
            data, None, feature_names, feature_types
        )
        self.predict_fn = unify_predict_fn(predict_fn, self.data)
        self.n_jobs = n_jobs

        if sampler is not None:  # pragma: no cover
            warnings.warn("Sampler interface not currently supported.")

        self.sampler = sampler
        self.explain_kwargs = explain_kwargs

        self.kwargs = kwargs
        final_kwargs = {"mode": "regression"}
        if self.feature_names:
            final_kwargs["feature_names"] = self.feature_names
        final_kwargs.update(self.kwargs)

        self.lime = LimeTabularExplainer(self.data, **final_kwargs)
Пример #21
0
    def test_lime_explainer_no_regressor(self):
        np.random.seed(1)
        iris = load_iris()
        train, test, labels_train, labels_test = (
            sklearn.cross_validation.train_test_split(iris.data, iris.target,
                                                      train_size=0.80))

        rf = RandomForestClassifier(n_estimators=500)
        rf.fit(train, labels_train)
        i = np.random.randint(0, test.shape[0])

        explainer = LimeTabularExplainer(train,
                                         feature_names=iris.feature_names,
                                         class_names=iris.target_names,
                                         discretize_continuous=True)

        exp = explainer.explain_instance(test[i], rf.predict_proba,
                                         num_features=2)
        self.assertIsNotNone(exp)
        keys = [x[0] for x in exp.as_list()]
        self.assertEquals(1,
                          sum([1 if 'petal width' in x else 0 for x in keys]),
                          "Petal Width is a major feature")
        self.assertEquals(1,
                          sum([1 if 'petal length' in x else 0 for x in keys]),
                          "Petal Length is a major feature")
Пример #22
0
def calcul_interpretation(clf, client_id):

    test_features_filled = test_features.fillna(test_features.median())

    lime1 = LimeTabularExplainer(test_features_filled,
                                 feature_names=test_features_filled.columns,
                                 discretize_continuous=False)

    explain_data = test_features_filled.iloc[test_corrs_removed.index[
        test_corrs_removed['SK_ID_CURR'] == int(client_id)]].T.squeeze()
    exp = lime1.explain_instance(explain_data,
                                 clf.predict_proba,
                                 num_samples=1000)

    exp_list = exp.as_list()
    exp_keys = []
    exp_values = []
    exp_positives = []
    for i in range(len(exp_list)):
        exp_keys.append(exp_list[i][0])
        exp_values.append(exp_list[i][1])

    df_data = pd.DataFrame(data=[exp_keys, exp_values])
    df_data = df_data.T
    df_data.columns = ['exp_keys', 'exp_values']
    df_data = df_data.iloc[np.abs(df_data['exp_values'].values).argsort()]
    df_data['color'] = df_data.exp_values.apply(lambda x: 'red'
                                                if x > 0 else 'green')

    return df_data
def get_lime_scores(predictive_model, x_train, x_test):

    lime_scores = []
    FEATS = len(x_train[0])
    feat_names = ["X" + str(i) for i in range(len(x_train[0]))]
    explainer = LimeTabularExplainer(x_train, feature_names=feat_names)

    for w in range(x_test.shape[0]):
        exp = explainer.explain_instance(x_test[w],
                                         predictive_model.predict_proba,
                                         num_features=FEATS)
        rank_list = exp.as_list()

        curr_scores = [
            np.where(
                np.array([
                    pd.Series(rank_list[v][0]).str.contains('X' + str(k))[0] *
                    1 for k in range(FEATS)
                ]) == 1)[0][0] for v in range(len(rank_list))
        ]
        lime_score_ = np.zeros((1, x_train.shape[1]))
        lime_score_[0, np.array(curr_scores)] = np.array(
            [np.abs(rank_list[v][1]) for v in range(len(rank_list))])

        lime_scores.append(lime_score_)

    lime_scores = np.array(lime_scores).reshape(-1, x_train.shape[1])

    return lime_scores
Пример #24
0
    def test_lime_explainer_good_regressor(self):
        np.random.seed(1)

        rf = RandomForestClassifier(n_estimators=500)
        rf.fit(self.train, self.labels_train)
        i = np.random.randint(0, self.test.shape[0])

        explainer = LimeTabularExplainer(self.train,
                                         mode="classification",
                                         feature_names=self.feature_names,
                                         class_names=self.target_names,
                                         discretize_continuous=True)

        exp = explainer.explain_instance(self.test[i],
                                         rf.predict_proba,
                                         num_features=2,
                                         model_regressor=LinearRegression())

        self.assertIsNotNone(exp)
        keys = [x[0] for x in exp.as_list()]
        self.assertEqual(1,
                         sum([1 if 'petal width' in x else 0 for x in keys]),
                         "Petal Width is a major feature")
        self.assertEqual(1,
                         sum([1 if 'petal length' in x else 0 for x in keys]),
                         "Petal Length is a major feature")
Пример #25
0
    def createExplainer(self):
        """ Creates the LIME explainer """
        from lime.lime_tabular import LimeTabularExplainer

        return LimeTabularExplainer(self._featureData,
                                    mode="regression",
                                    feature_names=self._featureNames,
                                    class_names=self._targetNames)
Пример #26
0
def create_explainer(X_train: pd.DataFrame, y_train: pd.DataFrame):
    return LimeTabularExplainer(X_train.values,
                                feature_names=X_train.columns.values,
                                training_labels=y_train.values,
                                feature_selection='lasso_path',
                                class_names=['No EPI', 'EPI'],
                                discretize_continuous=True,
                                discretizer='entropy')
Пример #27
0
    def get_tabular_explainer(self):
        data = self.x_train.copy()
        # check whether contains categorical features
        cat_cols = data.select_dtypes(exclude=['number']).columns
        try:
            # have categorical features
            if len(cat_cols) > 0:
                cat_features = [
                    list(self.x_train.columns).index(col) for col in cat_cols
                ]
                data[cat_cols] = data[cat_cols].astype('category')
                # label encoding
                data[cat_cols] = data[cat_cols].apply(lambda x: x.cat.codes)

                # map dictionary to label encoding
                label_dic = {}
                for i, col in enumerate(cat_cols):
                    label_dic[cat_features[i]] = dict(
                        enumerate(self.x_train[col].astype(
                            'category').cat.categories))

                self.x_train = data
                lime_tab_explainer = LimeTabularExplainer(
                    self.x_train.values,
                    feature_names=self.x_train.columns,
                    class_names=self.class_names,
                    categorical_features=cat_features,
                    categorical_names=label_dic,
                    discretize_continuous=True,
                    sample_around_instance=True)

                return lime_tab_explainer
            else:
                lime_tab_explainer = LimeTabularExplainer(
                    self.x_train.values,
                    feature_names=self.x_train.columns,
                    class_names=self.class_names,
                    discretize_continuous=True,
                    sample_around_instance=True)
                return lime_tab_explainer
        except Exception as err:
            print('Error: model is not supported by LIME {} Explainer'.format(
                self.explainer_type))
            err_logging(err)
            raise Exception(err)
Пример #28
0
def explain():
    explainer = LimeTabularExplainer(train,
                                     class_names=class_names,
                                     feature_names=feature_names,
                                     categorical_features=categorical_features)

    return explainer.explain_instance(X.iloc[0],
                                      rf.predict_proba,
                                      num_features=4)
Пример #29
0
def create_explainer(model, X):
    '''
    Convenience function for creating a LIME explainer object.

    ex) create_explainer(model, X_train)
    '''

    explainer = LimeTabularExplainer(X.values, feature_names=X.columns.values)
    return explainer
Пример #30
0
 def create_model_explainer(self):
     self.explainer = LimeTabularExplainer(
         self.train,
         feature_names=self.feature_names,
         training_labels=self.labels_train,
         class_names=self.class_names,
         categorical_features=self.categorical_feature_indices,
         categorical_names=self.categorical_names,
         discretize_continuous=True)