def explain_wrap(index, columns):
    print("DOING {}".format(index))
    global count
    x, y = X_valid[index], Y_valid[index]
    explainer_xgb = explain(xg_predicted, data=X_train, y=Y_train, label="XGBoost model",
        predict_function=lambda X: xgmodel.predict_proba(X.to_numpy())[::, 1], variable_names=column_names)
    explainer_linear = explain(lin_predicted, data=X_train, y=Y_train, label="Logistic model",
        predict_function=lambda X: logmodel.predict_proba(X.to_numpy())[::, 1], variable_names=column_names)
    cp_xgb = individual_variable_profile(explainer_xgb, x, y)
    cp_lin = individual_variable_profile(explainer_linear, x, y)
    plot(cp_xgb, cp_lin, selected_variables=columns, destination="browser", show_observations=False)
    #IFrame(src="./_plot_files/plots{}.html".format(count), width=700, height=600)
    #with open("_plot_files/plots{}.html".format(count), 'r') as myfile:
#        display(HTML(myfile.read()))
    count += 1
Пример #2
0
    def ceterisParibus_connector(self, feature, *arg):
        from ceteris_paribus.plots.plots import plot

        query_instance = dict(s.split(':') for s in arg)

        #print(feature)

        #prepare data instance (nparray)
        categories = self.getCategoricalFeatures()
        np_instance = []
        for f in self.featureNames:
            if f in categories:
                np_instance.append(query_instance[f])
            else:
                np_instance.append(float(query_instance[f]))
        #print(np_instance)

        prediction_proba = self.model.predict_proba(
            pd.DataFrame([query_instance]))[0]
        prediction = np.where(
            prediction_proba == np.amax(prediction_proba))[0][0]
        #print(prediction)

        explainer = explain(
            self.model,
            variable_names=self.featureNames,
            data=self.X_train,
            y=self.Y_train,
            label='Model',
            predict_function=lambda x: self.model.predict_proba(x)[::, 1])

        i = individual_variable_profile(explainer, np.array(np_instance),
                                        np.array([prediction]))

        p = plot(i,
                 selected_variables=[feature],
                 width=700,
                 height=800,
                 size=4)

        options = {'height': '500', 'width': '600'}

        imgkit.from_file('_plot_files/plots' + p + '.html',
                         'temp/plots' + p + '.jpg',
                         options=options)

        self.certainty = "I am 100 percent sure about the graph."
        return ("temp/plots" + str(p) + ".jpg")
numeric_features = ['age', 'bmi', 'children']
numeric_transformer = Pipeline(steps=[('scaler', StandardScaler())])

categorical_features = ['sex', 'smoker', 'region']
categorical_transformer = Pipeline(
    steps=[('onehot', OneHotEncoder(handle_unknown='ignore'))])

preprocessor = ColumnTransformer(transformers=[(
    'num', numeric_transformer,
    numeric_features), ('cat', categorical_transformer, categorical_features)])

# Append classifier to preprocessing pipeline.
# Now we have a full prediction pipeline.
clf = Pipeline(steps=[('preprocessor',
                       preprocessor), ('classifier', RandomForestRegressor())])

clf.fit(x, y)

from ceteris_paribus.explainer import explain

explainer_cat = explain(clf, var_names, x, y, label="categorical_model")

from ceteris_paribus.profiles import individual_variable_profile

cp_cat = individual_variable_profile(explainer_cat, x.iloc[:10], y.iloc[:10])

cp_cat.print_profile()
plot(cp_cat)

plot(cp_cat, color="smoker")
Пример #4
0

if __name__ == "__main__":

    (linear_model, data, labels, variable_names) = linear_regression_model()
    (gb_model, _, _, _) = gradient_boosting_model()
    (svm_model, _, _, _) = supported_vector_machines_model()

    explainer_linear = explain(linear_model, variable_names, data, y)
    explainer_gb = explain(gb_model, variable_names, data, y)
    explainer_svm = explain(svm_model, variable_names, data, y)

    # single profile
    cp_1 = individual_variable_profile(explainer_gb, x[0], y[0])
    plot(cp_1,
         destination="notebook",
         selected_variables=["bmi"],
         print_observations=False)

    # local fit
    neighbours_x, neighbours_y = select_neighbours(x, x[10], y=y, n=10)
    cp_2 = individual_variable_profile(explainer_gb, neighbours_x,
                                       neighbours_y)
    plot(cp_2,
         show_residuals=True,
         selected_variables=["age"],
         print_observations=False,
         color_residuals='red',
         plot_title='')

    # aggregate profiles
    plot(cp_2,
Пример #5
0
                           predict_function=predict_function,
                           label="sRNARFTarget")

    #cp_profile = individual_variable_profile(explainer_rf, data_for_prediction, y = 1, grid_points = 100)
    cp_profile = individual_variable_profile(explainer_rf,
                                             data_for_prediction,
                                             grid_points=200,
                                             variables=[sys.argv[3]])
    plot(cp_profile,
         show_profiles=True,
         show_residuals=True,
         show_rugs=True,
         height=700,
         width=750,
         yaxis_title='Prediction probablity for class 1',
         plot_title='Ceteris paribus profiles of feature ' + sys.argv[3] +
         ' for ' + sys.argv[1] + '-' + sys.argv[2] + ' pair interaction',
         color='blue',
         size=3,
         alpha=0.5,
         color_residuals='red',
         size_residuals=20,
         alpha_residuals=20,
         print_observations=True)

elif ((len(sys.argv) - 1) < 3):

    print(
        "Error: Required parameters not passed! Please pass all three parameters, sRNA ID, mRNA ID, variable name."
    )

else:
    # Train the model using the training set
    rf_model.fit(X_train, y_train)

    # model, data, labels, variable_names
    return rf_model, X_train, y_train, list(boston['feature_names'])


if __name__ == "__main__":
    (model, data, labels, variable_names) = random_forest_regression()
    explainer_rf = explain(model, variable_names, data, labels)

    cp_profile = individual_variable_profile(explainer_rf,
                                             X_train[0],
                                             y=y_train[0],
                                             variables=['TAX', 'CRIM'])
    plot(cp_profile)

    sample = select_sample(X_train, n=3)
    cp2 = individual_variable_profile(explainer_rf,
                                      sample,
                                      variables=['TAX', 'CRIM'])
    plot(cp2)

    neighbours = select_neighbours(X_train,
                                   X_train[0],
                                   variable_names=variable_names,
                                   selected_variables=variable_names,
                                   n=15)
    cp3 = individual_variable_profile(explainer_rf,
                                      neighbours,
                                      variables=['LSTAT', 'RM'],
Пример #7
0
X = iris['data']
y = iris['target']

X_train, X_test, y_train, y_test = train_test_split(X,
                                                    y,
                                                    test_size=0.33,
                                                    random_state=42)

print(iris['feature_names'])


def random_forest_classifier():
    rf_model = RandomForestClassifier(n_estimators=100, random_state=42)

    rf_model.fit(X_train, y_train)

    return rf_model, X_train, y_train, iris['feature_names']


if __name__ == "__main__":
    (model, data, labels, variable_names) = random_forest_classifier()
    predict_function = lambda X: model.predict_proba(X)[::, 0]
    explainer_rf = explain(model,
                           variable_names,
                           data,
                           labels,
                           predict_function=predict_function)
    cp_profile = individual_variable_profile(explainer_rf, X[1], y=y[1])
    plot(cp_profile)
Пример #8
0
                                                  random_state=42)
    gb_model.fit(x, y)
    return gb_model, x, y, var_names


def supported_vector_machines_model():
    svm_model = svm.SVR(C=0.01, gamma='scale')
    svm_model.fit(x, y)
    return svm_model, x, y, var_names


if __name__ == "__main__":
    (linear_model, data, labels, variable_names) = linear_regression_model()
    (gb_model, _, _, _) = gradient_boosting_model()
    (svm_model, _, _, _) = supported_vector_machines_model()

    explainer_linear = explain(linear_model, variable_names, data, y)
    explainer_gb = explain(gb_model, variable_names, data, y)
    explainer_svm = explain(svm_model, variable_names, data, y)

    cp_profile = individual_variable_profile(explainer_linear, x[0], y[0])
    plot(cp_profile, show_residuals=True)

    sample_x, sample_y = select_sample(x, y, n=10)
    cp2 = individual_variable_profile(explainer_gb, sample_x, y=sample_y)

    cp3 = individual_variable_profile(explainer_gb, x[0], y[0])
    plot(cp3, show_residuals=True)

    plot(cp_profile, cp3, show_residuals=True)
Пример #9
0
    model.compile(loss='mean_squared_error', optimizer='adam')
    return model


def keras_model():
    estimators = [('scaler', StandardScaler()),
                  ('mlp',
                   KerasRegressor(build_fn=network_architecture, epochs=200))]
    model = Pipeline(estimators)
    model.fit(x_train, y_train)
    return model, x_train, y_train, boston.feature_names


if __name__ == "__main__":
    model, x_train, y_train, var_names = keras_model()
    explainer_keras = explain(model,
                              var_names,
                              x_train,
                              y_train,
                              label='KerasMLP')
    cp = individual_variable_profile(
        explainer_keras,
        x_train[:10],
        y=y_train[:10],
        variables=["CRIM", "ZN", "AGE", "INDUS", "B"])
    plot(cp,
         show_residuals=True,
         selected_variables=["CRIM", "ZN", "AGE", "B"],
         show_observations=True,
         show_rugs=True)