Exemplo n.º 1
0
def compute_explanation(model: Model, x, z):
    """ generates an explanation how the model came to its result
    Parameters
    ----------
    model : Model
        a hassbrain model
    x : np.ndarray
        the raw array conatining the sensor values
    Returns
    -------

    """
    from matplotlib.pyplot import Figure
    wrapped_model = ModelWrapper(model)
    class_names = model.get_state_lbl_lst()
    feature_names = model.get_obs_lbl_lst()

    cat_idxs = [i for i in range(len(class_names))]
    categorical_names = {}
    for i in cat_idxs:
        categorical_names[i] = {}
        categorical_names[i][0] = "off"
        categorical_names[i][1] = "on"

    from skater.core.local_interpretation.lime.lime_tabular import LimeTabularExplainer
    exp = LimeTabularExplainer(x,
                               mode='classification',
                               training_labels=z,
                               feature_names=feature_names,
                               categorical_features=cat_idxs,
                               categorical_names=categorical_names,
                               class_names=class_names)

    fig = exp.explain_instance(x[0],
                               wrapped_model.predict_proba).as_pyplot_figure()
Exemplo n.º 2
0
 def test_regression_without_feature_names(self):
     """
     Ensure lime.lime_tabular works when predict_fn = regressor.predict
     and feature names are NOT passed
     :return:
     """
     interpretor = LimeTabularExplainer(self.X, mode="regression")
     assert interpretor.explain_instance(self.example, self.regressor.predict)
Exemplo n.º 3
0
    def test_classifier_with_proba_without_feature_names(self):
        """
        Ensure lime.lime_tabular works when predict_fn = classifier.predict_proba
        and feature names are NOT passed
        :return:
        """

        interpretor = LimeTabularExplainer(self.X)
        assert interpretor.explain_instance(self.example, self.classifier.predict_proba)
Exemplo n.º 4
0
 def test_regression_without_feature_names(self):
     """
     Ensure lime.lime_tabular works when predict_fn = regressor.predict
     and feature names are NOT passed
     :return:
     """
     interpretor = LimeTabularExplainer(self.X, mode="regression")
     assert interpretor.explain_instance(self.example,
                                         self.regressor.predict)
Exemplo n.º 5
0
    def test_classifier_with_proba_without_feature_names(self):
        """
        Ensure lime.lime_tabular works when predict_fn = classifier.predict_proba
        and feature names are NOT passed
        :return:
        """

        interpretor = LimeTabularExplainer(self.X)
        assert interpretor.explain_instance(self.example,
                                            self.classifier.predict_proba)
def on_value_change(change):
    index = change['new']
    exp = LimeTabularExplainer(X_test, 
                               feature_names=features, 
                               discretize_continuous=False, 
                               class_names=['acdc', 'non_acdc'])
    print("Model behavior at row: {}".format(index))
    # Lets evaluate the prediction from the model and actual target label
    print("prediction from the model:{}".format(estimator.predict(X_test[index].reshape(1, -1))))
    print("Target Label on the row: {}".format(y_test.reshape(1,-1)[0][index]))
    clear_output()
    display(HTML(exp.explain_instance(X_test[index], models['ensemble'].predict_proba).as_html()))
Exemplo n.º 7
0
    def test_lime_coef_accuracy(self):
        """
        Ensure that for a trivial example, the coefficients of a regressor explanation
        are all similar to the true beta values of the generative process.

        :return:
        """

        error_epsilon = .1
        explainer = LimeTabularExplainer(self.X,
                                         discretize_continuous=True, mode="regression")
        explanation = explainer.explain_instance(self.example,
                                                 self.regressor.predict,
                                                 model_regressor=self.model_regressor)

        vals = dict(explanation.as_list())
        keys = ['{} <= 0.00'.format(i) for i in [2, 1, 0]]
        lime_coefs = np.array([vals[key] for key in keys])
        assert (abs(self.regressor.coef_ - lime_coefs) < error_epsilon).all()
Exemplo n.º 8
0
    def test_lime_coef_accuracy(self):
        """
        Ensure that for a trivial example, the coefficients of a regressor explanation
        are all similar to the true beta values of the generative process.

        :return:
        """

        error_epsilon = .1
        explainer = LimeTabularExplainer(self.X,
                                         discretize_continuous=True,
                                         mode="regression")
        explanation = explainer.explain_instance(
            self.example,
            self.regressor.predict,
            model_regressor=self.model_regressor)

        vals = dict(explanation.as_list())
        keys = ['{} <= 0.00'.format(i) for i in [2, 1, 0]]
        lime_coefs = np.array([vals[key] for key in keys])
        assert (abs(self.regressor.coef_ - lime_coefs) < error_epsilon).all()
Exemplo n.º 9
0
def run_explanations(csv_path, csv_columns, target_column, zero_value):
    # Read the dataset from the provided CSV and print out information about it.
    df = pd.read_csv(csv_path,
                     names=csv_columns,
                     skipinitialspace=True,
                     skiprows=1)
    #df = df.drop('Target',axis=1)
    input_features = [name for name in csv_columns if name != target_column]
    #data, labels = shap.datasets.adult(display=True)
    if target_column not in csv_columns:
        print("target column error")
        return ("target column error")
    elif zero_value not in df[target_column].tolist():
        if str.isdecimal(zero_value) and (
                np.int64(zero_value) in df[target_column].tolist()
                or np.float64(zero_value) in df[target_column].tolist()):
            print("happy")
            zero_value = np.int64(zero_value)
        else:
            print(zero_value, df[target_column].tolist(),
                  df[target_column].dtype)
            return ("zero value error")

    labels = df[target_column].tolist()
    #labels = np.array([int(label) for label in labels])
    labels2 = []
    for label in labels:
        if label == zero_value:
            labels2.append(0)
        else:
            labels2.append(1)
    labels = np.array(labels2)

    data = df[input_features]

    for feature in input_features:
        if data[feature].dtype is not np.dtype(
                np.int64) and data[feature].dtype is not np.dtype(
                    np.float64) and data[feature].dtype is not np.dtype(
                        np.float32):
            data[feature] = data[feature].astype('category')

    cat_cols = data.select_dtypes(['category']).columns
    data[cat_cols] = data[cat_cols].apply(lambda x: x.cat.codes)

    from sklearn.model_selection import train_test_split

    X_train, X_test, y_train, y_test = train_test_split(data,
                                                        labels,
                                                        test_size=0.3,
                                                        random_state=42)

    data_disp, labels_disp = shap.datasets.adult(display=True)
    X_train_disp, X_test_disp, y_train_disp, y_test_disp = train_test_split(
        data_disp, labels_disp, test_size=0.3, random_state=42)

    xgc = xgb.XGBClassifier(n_estimators=500,
                            max_depth=5,
                            base_score=0.5,
                            objective='binary:logistic',
                            random_state=42)
    xgc.fit(X_train, y_train)
    predictions = xgc.predict(X_test)

    fig = plt.figure(figsize=(16, 12))
    title = fig.suptitle("Default Feature Importances from XGBoost",
                         fontsize=14)

    ax1 = fig.add_subplot(2, 2, 1)
    xgb.plot_importance(xgc, importance_type='weight', ax=ax1)
    t = ax1.set_title("Feature Importance - Feature Weight")

    ax2 = fig.add_subplot(2, 2, 2)
    xgb.plot_importance(xgc, importance_type='gain', ax=ax2)
    t = ax2.set_title("Feature Importance - Split Mean Gain")

    ax3 = fig.add_subplot(2, 2, 3)
    xgb.plot_importance(xgc, importance_type='cover', ax=ax3)
    t = ax3.set_title("Feature Importance - Sample Coverage")

    #plt.savefig('static/explanations.png')

    explanation = eli5.explain_weights(xgc.get_booster())
    explanation_html = eli5.formatters.html.format_as_html(explanation)
    print(explanation_html)

    with open("templates/explanation.html", "a+") as file:
        file.write(explanation_html)

    doc_num = 0
    print('Actual Label:', y_test[doc_num])
    print('Predicted Label:', predictions[doc_num])
    #eli5.show_prediction(xgc.get_booster(), X_test.iloc[doc_num],
    #                     feature_names=list(data.columns) ,show_feature_values=True)
    explanation2 = eli5.explain_prediction(xgc.get_booster(),
                                           X_test.iloc[doc_num],
                                           feature_names=list(data.columns))
    explanation_html2 = eli5.formatters.html.format_as_html(explanation2)
    with open("templates/explanation.html", "a") as file:
        file.write(explanation_html2)

    doc_num = 2
    print('Actual Label:', y_test[doc_num])
    print('Predicted Label:', predictions[doc_num])
    #eli5.show_predicon(xgc.get_booster(), X_test.iloc[doc_num], feature_names=list(data.columns) ,show_feature_values=True)
    explanation3 = eli5.explain_prediction(xgc.get_booster(),
                                           X_test.iloc[doc_num],
                                           feature_names=list(data.columns))
    explanation_html3 = eli5.formatters.html.format_as_html(explanation3)
    with open("templates/explanation.html", "a") as file:
        file.write(explanation_html3)

    #target_names = ['$50K or less', 'More than $50K']
    interpreter = Interpretation(training_data=X_test,
                                 training_labels=y_test,
                                 feature_names=list(data.columns))
    im_model = InMemoryModel(xgc.predict_proba, examples=X_train)

    plots = interpreter.feature_importance.plot_feature_importance(
        im_model, ascending=True, n_samples=23000)

    plots[0].savefig('skater.png')

    features_pdp = input_features

    xgc_np = xgb.XGBClassifier(n_estimators=500,
                               max_depth=5,
                               base_score=0.5,
                               objective='binary:logistic',
                               random_state=42)
    xgc_np.fit(X_train.values, y_train)

    # In[ ]:

    from skater.core.local_interpretation.lime.lime_tabular import LimeTabularExplainer

    exp = LimeTabularExplainer(X_test.values,
                               feature_names=list(data.columns),
                               discretize_continuous=True)

    doc_num = 0
    print('Actual Label:', y_test[doc_num])
    print('Predicted Label:', predictions[doc_num])
    instance = exp.explain_instance(X_test.iloc[doc_num].values,
                                    xgc_np.predict_proba)
    instance.save_to_file('templates/lime.html', show_all=False)

    doc_num = 2
    print('Actual Label:', y_test[doc_num])
    print('Predicted Label:', predictions[doc_num])
    instance2 = exp.explain_instance(X_test.iloc[doc_num].values,
                                     xgc_np.predict_proba)
    instance2.save_to_file('templates/lime2.html', show_all=False)

    explainer = shap.TreeExplainer(xgc)
    shap_values = explainer.shap_values(X_test)
    pd.DataFrame(shap_values).head()

    #shap.force_plot(explainer.expected_value, shap_values[:,], X_test_disp.iloc[:,],show=False,matplotlib=True)
    #plt.savefig("static/force_plot.png")

    shap.summary_plot(shap_values, X_test, plot_type="bar", show=False)
    plt.savefig("static/summary_plot.png")

    shap.summary_plot(shap_values, X_test, show=False)
    plt.savefig("static/summary_plot2.png")

    return "Everyone Happy"
# Two-way interaction
interpreter.partial_dependence.plot_partial_dependence([("maxDeltaEta_tag_tag", "mass_higgsLikeDijet")], 
                                                       model, 
                                                       grid_resolution=10)


from skater.core.local_interpretation.lime.lime_tabular import LimeTabularExplainer

exp = LimeTabularExplainer(X_train, 
                           feature_names=features,
                           class_names=['acdc', 'non_acdc'],
                           discretize_continuous=True)
plt.show()

# explain prediction for data point for background label 'non_acdc'
exp.explain_instance(X_test[1], estimator.predict_proba)


# Interactive slider for controlling grid resolution
def understanding_interaction():
    pyint_model = InMemoryModel(estimator.predict_proba, examples=X_test, target_names=features)
    # ['worst area', 'mean perimeter'] --> list(feature_selection.value)
    # Two-way iteraction
    interpreter.partial_dependence.plot_partial_dependence(["mass_tag_tag_max_mass", "maxDeltaEta_jet_jet"],
                                                           model, 
                                                           grid_resolution=grid_resolution.value,
                                                           with_variance=True)
        
    # Lets understand interaction using 2-way interaction using the same covariates
    # feature_selection.value --> ('worst area', 'mean perimeter')
    # Two-way iteraction 
Exemplo n.º 11
0
                                colors=['r', 'b', 'y'])

# ## Interpreting Model Predictions

# In[36]:

from skater.core.local_interpretation.lime.lime_tabular import LimeTabularExplainer

exp = LimeTabularExplainer(wqp_train_SX,
                           feature_names=wqp_feature_names,
                           discretize_continuous=True,
                           class_names=wqp_rf.classes_)

# In[80]:

exp.explain_instance(wqp_test_SX[10], wqp_rf.predict_proba,
                     top_labels=1).show_in_notebook()

# In[81]:

exp.explain_instance(wqp_test_SX[747], wqp_rf.predict_proba,
                     top_labels=1).show_in_notebook()

# ## Visualizing partial dependencies

# In[39]:

axes_list = interpreter.partial_dependence.plot_partial_dependence(
    ['alcohol'],
    wqp_im_model,
    grid_resolution=100,
    with_variance=True,
Exemplo n.º 12
0
class Explanator(object):
    def __init__(self, mdl, test_x, test_z):
        from hassbrain_algorithm.benchmark.interpretation import ModelWrapper
        wrapped_model = ModelWrapper(mdl)
        class_names = mdl.get_state_lbl_lst()
        feature_names = mdl.get_obs_lbl_lst()

        cat_idxs = [i for i in range(len(feature_names))]
        categorical_names = {}
        for i in cat_idxs:
            categorical_names[i] = {}
            categorical_names[i][0] = "off"
            categorical_names[i][1] = "on"

        from skater.core.local_interpretation.lime.lime_tabular import LimeTabularExplainer
        self._exp = LimeTabularExplainer(test_x,
                                         mode='classification',
                                         training_labels=test_z,
                                         feature_names=feature_names,
                                         categorical_features=cat_idxs,
                                         categorical_names=categorical_names,
                                         class_names=class_names)

    def get_explanator(self):
        return self._exp

    def explain(self, x):
        from hassbrain_algorithm.benchmark.interpretation import ModelWrapper

        assert isinstance(x, np.ndarray)
        assert len(x.shape) == 1
        model = ModelWrapper(self)
        lst = self._exp.explain_instance(x, model.predict_proba).as_list()
        return lst

    def plot_explanation(self, x):
        from hassbrain_algorithm.benchmark.interpretation import ModelWrapper

        assert isinstance(x, np.ndarray)
        model = ModelWrapper(self)
        fig = self._exp.explain_instance(
            x, model.predict_proba).as_pyplot_figure()
        fig.show()

    def plot_and_save_explanation(self, x, labels, file_paths):
        from hassbrain_algorithm.benchmark.interpretation import ModelWrapper

        assert isinstance(x, np.ndarray)
        model = ModelWrapper(self)
        enc_labels = self.expl_to_ids(labels)
        exp = self._exp.explain_instance(x,
                                         model.predict_proba,
                                         labels=enc_labels)  #type: Explanation
        for lbl, file_path in zip(enc_labels, file_paths):
            fig = self.as_pyplot_figure(exp, label=lbl)
            import matplotlib.pyplot as plt
            plt.tight_layout()
            fig.savefig(file_path, dpi=fig.dpi)

    def expl_to_ids(self, labels):
        """
        Parameters
        ----------
        labels : list
            list of labels
        Returns
        -------
        list
            encoded labels
        """
        tmp2 = self._exp  # type: LimeTabularExplainer
        classnames = tmp2.class_names
        enc_lbl_lst = []
        for lbl in labels:
            assert lbl in classnames
            for i, item in enumerate(classnames):
                if item == lbl:
                    enc_lbl_lst.append(i)
        return enc_lbl_lst

    def as_pyplot_figure(self, exp, label=1, **kwargs):
        """Returns the explanation as a pyplot figure.

        Will throw an error if you don't have matplotlib installed
        Args:
            label: desired label. If you ask for a label for which an
                   explanation wasn't computed, will throw an exception.
                   Will be ignored for regression explanations.
            kwargs: keyword arguments, passed to domain_mapper
        Returns:
            pyplot figure (barchart).
        """
        import matplotlib.pyplot as plt
        explst = exp.as_list(label=label, **kwargs)
        fig = plt.figure()
        vals = [x[1] for x in explst]
        names = [x[0] for x in explst]
        vals.reverse()
        names.reverse()
        colors = ['black' if x > 0 else 'red' for x in vals]
        pos = np.arange(len(explst)) + .5
        plt.barh(pos, vals, align='center', color=colors)
        plt.yticks(pos, names)
        #title = 'Local explanation for class %s' % exp.class_names[label]
        #plt.title(title)
        return fig

# ## Explaining Predictions

# In[39]:


from skater.core.local_interpretation.lime.lime_tabular import LimeTabularExplainer
exp = LimeTabularExplainer(X_train, feature_names=data.feature_names, 
                           discretize_continuous=True, class_names=['0', '1'])


# In[40]:


exp.explain_instance(X_test[0], logistic.predict_proba).show_in_notebook()


# In[41]:


exp.explain_instance(X_test[1], logistic.predict_proba).show_in_notebook()


# # Model Deployment

# ## Persist model to disk

# In[42]:

Exemplo n.º 14
0
#feature_names = hmm_model.get_obs_lbl_lst()
#class_names = hmm_model.get_state_lbl_lst()


def boolean_arr2str(arr):
    res = arr.astype(str)
    return res


cat_idxs = [i for i in range(len(class_names))]
categorical_names = {}
for i in cat_idxs:
    categorical_names[i] = {}
    categorical_names[i][0] = "off"
    categorical_names[i][1] = "on"

from skater.core.local_interpretation.lime.lime_tabular import LimeTabularExplainer
exp = LimeTabularExplainer(train_x,
                           mode='classification',
                           training_labels=train_z,
                           feature_names=feature_names,
                           categorical_features=cat_idxs,
                           categorical_names=categorical_names,
                           class_names=class_names)

#fig = exp.explain_instance(train_x[0], model.predict_proba).as_pyplot_figure()
#fig.show()
x = train_x[0]
lst = exp.explain_instance(x, model.predict_proba).as_list()
print(lst)

# ## Interpreting Model Predictions

# In[36]:

from skater.core.local_interpretation.lime.lime_tabular import LimeTabularExplainer

exp = LimeTabularExplainer(wqp_train_SX, feature_names=wqp_feature_names, 
                           discretize_continuous=True, 
                           class_names=wqp_rf.classes_)


# In[80]:

exp.explain_instance(wqp_test_SX[10], wqp_rf.predict_proba, top_labels=1).show_in_notebook() 


# In[81]:

exp.explain_instance(wqp_test_SX[747], wqp_rf.predict_proba, top_labels=1).show_in_notebook() 


# ## Visualizing partial dependencies

# In[39]:

axes_list = interpreter.partial_dependence.plot_partial_dependence(['alcohol'], wqp_im_model, 
                                                                   grid_resolution=100, 
                                                                   with_variance=True,
                                                                   figsize = (6, 4))
Exemplo n.º 16
0
                                                           with_variance=True,
                                                           figsize=(6, 4))

# ## Explaining Predictions

# In[195]:

from skater.core.local_interpretation.lime.lime_tabular import LimeTabularExplainer
exp = LimeTabularExplainer(X_train,
                           feature_names=data.feature_names,
                           discretize_continuous=True,
                           class_names=['0', '1'])

# In[204]:

exp.explain_instance(X_test[0], logistic.predict_proba).show_in_notebook()

# In[202]:

exp.explain_instance(X_test[1], logistic.predict_proba).show_in_notebook()

# # Model Deployment

# ## Persist model to disk

# In[207]:

from sklearn.externals import joblib
joblib.dump(logistic, 'lr_model.pkl')

# ## Load model from disk