def tabular_explainer_imp(model, x_train, x_test, allow_eval_sampling=True):
    # Create local tabular explainer without run history
    exp = TabularExplainer(model, x_train, features=list(range(x_train.shape[1])))
    # Validate evaluation sampling
    policy = {
        ExplainParams.SAMPLING_POLICY: SamplingPolicy(
            allow_eval_sampling=allow_eval_sampling
        )
    }
    explanation = exp.explain_global(x_test, **policy)
    return explanation.global_importance_rank
# save model for use outside the script
model_file_name = 'log_reg.pkl'
with open(model_file_name, 'wb') as file:
    joblib.dump(value=clf, filename=os.path.join(OUTPUT_DIR, model_file_name))

# register the model with the model management service for later use
run.upload_file('original_model.pkl', os.path.join(OUTPUT_DIR,
                                                   model_file_name))
original_model = run.register_model(model_name='amlcompute_deploy_model',
                                    model_path='original_model.pkl')

# create an explainer to validate or debug the model
tabular_explainer = TabularExplainer(model,
                                     initialization_examples=x_train,
                                     features=attritionXData.columns,
                                     classes=["Not leaving", "leaving"],
                                     transformations=transformations)

# explain overall model predictions (global explanation)
# passing in test dataset for evaluation examples - note it must be a representative sample of the original data
# more data (e.g. x_train) will likely lead to higher accuracy, but at a time cost
global_explanation = tabular_explainer.explain_global(x_test)

# uploading model explanation data for storage or visualization
comment = 'Global explanation on classification model trained on IBM employee attrition dataset'
client.upload_model_explanation(global_explanation, comment=comment)

# also create a lightweight explainer for scoring time
scoring_explainer = LinearScoringExplainer(tabular_explainer)
# pickle scoring explainer locally
Exemplo n.º 3
0
preds = reg.predict(X_test)
run.log('alpha', alpha)

model_file_name = 'ridge_{0:.2f}.pkl'.format(alpha)
# save model in the outputs folder so it automatically get uploaded
with open(model_file_name, 'wb') as file:
    joblib.dump(value=reg, filename=os.path.join(OUTPUT_DIR, model_file_name))

# register the model
run.upload_file('original_model.pkl',
                os.path.join('./outputs/', model_file_name))
original_model = run.register_model(model_name='original_model',
                                    model_path='original_model.pkl')

# Explain predictions on your local machine
tabular_explainer = TabularExplainer(model,
                                     X_train,
                                     features=boston_data.feature_names)

# Explain overall model predictions (global explanation)
# Passing in test dataset for evaluation examples - note it must be a representative sample of the original data
# x_train can be passed as well, but with more examples explanations it will
# take longer although they may be more accurate
global_explanation = tabular_explainer.explain_global(X_test)

# Uploading model explanation data for storage or visualization in webUX
# The explanation can then be downloaded on any compute
comment = 'Global explanation on regression model trained on boston dataset'
client.upload_model_explanation(global_explanation, comment=comment)
Exemplo n.º 4
0
        # set the arguments and algorithm choice
        args = {'alpha': args.alpha, 'l1_ratio': args.l1_ratio}
        algo = train_elasticnet

    elif args.model_name == 'gbt':
        # log the hyperparameters
        run.log('alpha', args.alpha)
        run.log('l1_ratio', args.l1_ratio)

        # set the arguments and algorithm choice
        args = {'alpha': args.alpha}
        algo = train_gradient_boosted_regressor

    # Train the model
    model = algo(X=X_train, y=y_train, **args)

    # Generate and upload model explanation
    tabular_explainer = TabularExplainer(model, X_train, features=column_names)
    global_explanation = tabular_explainer.explain_global(X_test)
    client.upload_model_explanation(global_explanation)

    # Generate predictions
    preds = predict_and_log_performance(model=model,
                                        X_test=X_test,
                                        y_test=y_test)

    # Plot the residuals
    resid_fig = plot_residuals_v_actuals(y_test, preds)
    resid_hist = plot_resid_histogram(y_test, preds)
    pred_plt = plot_predictions(y_test, preds)
Exemplo n.º 5
0
# ## Global Explanation Using TabularExplainer
#
# **Global Model Explanation** is a holistic understanding of how the model makes decisions. It provides you with insights on what features are most important and their relative strengths in making model predictions.
#
# [TabularExplainer](https://docs.microsoft.com/en-us/python/api/azureml-explain-model/azureml.explain.model.tabularexplainer?view=azure-ml-py) uses one of three explainers: TreeExplainer, DeepExplainer, or KernelExplainer, and is automatically selecting the most appropriate one for our use case. You can learn more about the underlying model explainers at [Azure Model Interpretability](https://docs.microsoft.com/en-us/azure/machine-learning/service/machine-learning-interpretability-explainability).
#
# To initialize an explainer object, you need to pass your model and some training data to the explainer's constructor.
#
# *Note that you can pass in your feature transformation pipeline to the explainer to receive explanations in terms of the raw features before the transformation (rather than engineered features).*

# In[ ]:

# "features" and "classes" fields are optional
tabular_explainer = TabularExplainer(clf.steps[-1][1],
                                     initialization_examples=X_train,
                                     features=X_train.columns,
                                     transformations=transformations)

# ### Get the global feature importance values
#
# Run the below cell and observe the sorted global feature importance. You will note that `tripDistance` is the most important feature in predicting the taxi fares, followed by `hour_of_day`, and `day_of_week`.

# In[ ]:

import warnings
warnings.filterwarnings('ignore')

# You can use the training data or the test data here
global_explanation = tabular_explainer.explain_global(X_test)
# Sorted feature importance values and feature names
sorted_global_importance_values = global_explanation.get_ranked_global_values()
Exemplo n.º 6
0
x_train, x_test, y_train, y_test = train_test_split(X_norm,
                                                    y_flat,
                                                    test_size=0.2)
print("split dataset into training and test sets with an 80-20 partition\n")
clf = svm.SVC(gamma=0.001, C=100, probability=True)
print("initialized SVM classifier\n")
print("fitting the training set into the classifier now\n")
model = clf.fit(x_train, y_train)

#### explain predictions on your local machine ####
# "features" and "classes" fields are optional
print("started model explanation \n")
from azureml.explain.model.tabular_explainer import TabularExplainer

explainer = TabularExplainer(model,
                             x_train,
                             features=feature_names,
                             classes=labels)
print("initialized explainer\n")

#### Explain overall model predictions (global explanation) ####
print("initializing global explainer\n")
global_explanation = explainer.explain_global(x_test, batch_size=200)

# uploading global model explanation data for storage or visualization in webUX
# the explanation can then be downloaded on any compute
# multiple explanations can be uploaded
print("uploading global explainer\n")
client.upload_model_explanation(global_explanation,
                                comment='global explanation: all features')
print("uploaded global explainer\n")
# or you can only upload the explanation object with the top k feature info
Exemplo n.º 7
0
def model_train(ds_df, run):

    ds_df.drop("Sno", axis=1, inplace=True)

    y_raw = ds_df['Risk']
    X_raw = ds_df.drop('Risk', axis=1)

    categorical_features = X_raw.select_dtypes(include=['object']).columns
    numeric_features = X_raw.select_dtypes(include=['int64', 'float']).columns

    categorical_transformer = Pipeline(
        steps=[('imputer',
                SimpleImputer(strategy='constant', fill_value="missing")),
               ('onehotencoder',
                OneHotEncoder(categories='auto', sparse=False))])

    numeric_transformer = Pipeline(steps=[('scaler', StandardScaler())])

    feature_engineering_pipeline = ColumnTransformer(transformers=[
        ('numeric', numeric_transformer, numeric_features),
        ('categorical', categorical_transformer, categorical_features)
    ],
                                                     remainder="drop")

    # Encode Labels
    le = LabelEncoder()
    encoded_y = le.fit_transform(y_raw)

    # Train test split
    X_train, X_test, y_train, y_test = train_test_split(X_raw,
                                                        encoded_y,
                                                        test_size=0.20,
                                                        stratify=encoded_y,
                                                        random_state=42)

    # Create sklearn pipeline
    lr_clf = Pipeline(
        steps=[('preprocessor', feature_engineering_pipeline
                ), ('classifier', LogisticRegression(solver="lbfgs"))])
    # Train the model
    lr_clf.fit(X_train, y_train)

    # Capture metrics
    train_acc = lr_clf.score(X_train, y_train)
    test_acc = lr_clf.score(X_test, y_test)
    print("Training accuracy: %.3f" % train_acc)
    print("Testing accuracy: %.3f" % test_acc)

    # Log to Azure ML
    run.log('Train accuracy', train_acc)
    run.log('Test accuracy', test_acc)

    # Explain model
    explainer = TabularExplainer(lr_clf.steps[-1][1],
                                 initialization_examples=X_train,
                                 features=X_raw.columns,
                                 classes=["Good", "Bad"],
                                 transformations=feature_engineering_pipeline)

    # explain overall model predictions (global explanation)
    global_explanation = explainer.explain_global(X_test)

    # Sorted SHAP values
    print('ranked global importance values: {}'.format(
        global_explanation.get_ranked_global_values()))
    # Corresponding feature names
    print('ranked global importance names: {}'.format(
        global_explanation.get_ranked_global_names()))
    # Feature ranks (based on original order of features)
    print('global importance rank: {}'.format(
        global_explanation.global_importance_rank))

    client = ExplanationClient.from_run(run)
    client.upload_model_explanation(global_explanation,
                                    comment='Global Explanation: All Features')

    return lr_clf