def part_plot_1D(model, total_features, X_val, y_val, feature):
    pdp_dist = pdp.pdp_isolate(model=model,
                               dataset=X_val,
                               model_features=total_features,
                               feature=feature)
    pdp.pdp_plot(pdp_dist, feature)
    plt.show()
Exemplo n.º 2
0
def pdp_1d(clf,X,Y,features_to_plot,label,detailed=False):
    from pdpbox import pdp

    figs = list()
    axs  = list()

    # Extract the classifier object from the clf multilearn object
    index = Y.columns.to_list().index(label)
    clf = clf.classifiers_[index]
    clf.verbose = False #Turn verbose off after this to tidy prints

    for feature in features_to_plot:
        pdp_dist = pdp.pdp_isolate(model=clf, dataset=X, model_features=X.columns.to_list(), feature=feature)
        if(detailed==True):
            fig, ax = pdp.pdp_plot(pdp_dist, feature,
                    plot_pts_dist=True,cluster=True,n_cluster_centers=50,x_quantile=True,show_percentile=True)
        else:
            fig, ax = pdp.pdp_plot(pdp_dist, feature)

        figs.append(fig)
        axs.append(ax)

    clf.verbose = True # reset

    return figs, axs
def pdpPdpbox(data, pr, featureToExamine):
    pdpValues = pdp.pdp_isolate(model=pr,
                                dataset=data,
                                model_features=data.columns,
                                feature=featureToExamine)
    figPdp, axesPdp = pdp.pdp_plot(pdpValues,
                                   featureToExamine,
                                   plot_lines=True,
                                   frac_to_plot=min(100, len(data)))
    for line in axesPdp["pdp_ax"].lines:
        line._alpha = 1
    save("pdpPdpboxIsolate", plt=plt)

    fig, axes = pdp.pdp_plot(pdpValues,
                             featureToExamine,
                             plot_lines=True,
                             frac_to_plot=min(100, len(data)),
                             x_quantile=True,
                             plot_pts_dist=True,
                             show_percentile=True)
    for line in axes["pdp_ax"]["_pdp_ax"].lines:
        line._alpha = 1
    for line in axes["pdp_ax"]["_count_ax"].lines:
        line._alpha = 1
    save("pdpPdpboxPlot", plt=plt, fig=fig)
 def show_PDP_isolate(self, features=[]):
     for f in features:
         pdp_isolate = pdp.pdp_isolate(self.model,self.X_train,
                   model_features=self.feature_names,feature=f,predict_kwds={})
         pdp.pdp_plot(pdp_isolate,feature_name=f)
         plt.xticks(rotation=90)
         plt.show()
def construct_ice_plot(pdp_current, feature):
    ## centered ice-plot for numeric feature:
    fig_center, axes_center = pdp.pdp_plot(
        pdp_current, varnames_long_dict[wch_feature], #wch_feature, 
        center = True,
        plot_lines = True, frac_to_plot = 100,  ## percentage! 
        x_quantile = False, plot_pts_dist = True, show_percentile = True,
        plot_params = plot_params_default)
    axes_center["pdp_ax"]["_pdp_ax"].set_ylabel("Number of bike rides per hour", size = 24)
    axes_center["pdp_ax"]["_count_ax"].set_xlabel(varnames_long_dict[feature], size = 24)
    axes_center["pdp_ax"]["_pdp_ax"].set_title('Partial Dependence and ICE Plot for: %s' % \
        varnames_long_dict[feature], y = 1.1, size = 24)
    axes_center["pdp_ax"]["_pdp_ax"].tick_params(axis = 'both', which = 'major', labelsize = 24)
    
    ## standard ice-plot for numeric feature:
    fig, axes = pdp.pdp_plot(
        pdp_current, varnames_long_dict[wch_feature], #wch_feature, 
        center = False,
        plot_lines = True, frac_to_plot = 100,  ## percentage! 
        x_quantile = False, plot_pts_dist = True, show_percentile = True,
        plot_params = plot_params_default)
    axes["pdp_ax"]["_pdp_ax"].set_ylabel("Number of bike rides per hour", size = 24)
    axes["pdp_ax"]["_count_ax"].set_xlabel(varnames_long_dict[feature], size = 24)
    #axes["pdp_ax"]["_pdp_ax"].set_ylim(0, np.max(vars(pdp_current)['count_data']['count']))
    axes["pdp_ax"]["_pdp_ax"].set_title('Partial Dependence and ICE Plot for: %s' % \
        varnames_long_dict[feature], y = 1.1, size = 24)
    axes["pdp_ax"]["_pdp_ax"].tick_params(axis = 'both', which = 'major', labelsize = 24)
    return fig_center, fig
def pdp_isolate_explain(X, y, feature):
    import category_encoders as ce
    from sklearn.pipeline import make_pipeline
    from sklearn.impute import SimpleImputer
    from sklearn.ensemble import RandomForestClassifier
    from pdpbox.pdp import pdp_isolate, pdp_plot

    # Encode, impute as needed
    X_encoded = ce.OrdinalEncoder().fit_transform(X)
    X_processed = SimpleImputer().fit_transform(X_encoded)

    # Pick a model and fit the data
    pdp_model = RandomForestClassifier(n_estimators=200,
                                       n_jobs=-1,
                                       random_state=6)
    pdp_model.fit(X_processed, y)

    # The actual plotting
    pdp_isolate = pdp_isolate(model=pdp_model,
                              dataset=X_encoded,
                              model_features=X_encoded.columns,
                              feature=feature)
    pdp_plot(pdp_isolate,
             feature_name=feature,
             plot_lines=True,
             frac_to_plot=100)
Exemplo n.º 7
0
def pdp_plotter(feature, model):
    pdp_feat = pdp.pdp_isolate(model=lgb_clf,
                               dataset=test_X,
                               model_features=feature_names,
                               feature=feature)
    pdp.pdp_plot(pdp_feat, feature)
    plt.show()
Exemplo n.º 8
0
def partial_dependence_plot(model, data: pd.DataFrame, model_features: list,
                            column: str):
    pdp_df = pdp.pdp_isolate(model=model,
                             dataset=data,
                             model_features=model_features,
                             feature=column)
    pdp.pdp_plot(pdp_df, column, figsize=(10, 8))
    return plt.show()
Exemplo n.º 9
0
def ploting_pdp(f):
    ''' Function for ploting PDP '''
    pdp_surv = pdp.pdp_isolate(model=rf,
                               dataset=X_train,
                               model_features=X_train.columns,
                               feature=f,
                               cust_grid_points=None)
    pdp.pdp_plot(pdp_surv, f)
    plt.show()
Exemplo n.º 10
0
def partial_dependence_plot(feat_name, model, X_test, base_features, path):
    pdp_dist = pdp.pdp_isolate(model=model,
                               dataset=X_test,
                               model_features=base_features,
                               feature=feat_name)
    pdp.pdp_plot(pdp_dist, feat_name)
    plt.savefig(path)
    print('generate ' + path)
    plt.close()
def plot_1D_partial_dependency(fitted_model, X_test : pd.DataFrame, model_features : list, feature : str ):
	# Create the data that we will plot
	pdp_obj = pdp.pdp_isolate(model = fitted_model,
								dataset = X_test,
								model_features = model_features,
								feature = feature)

	# plot it
	pdp.pdp_plot(pdp_obj, feature)
	plt.show()
Exemplo n.º 12
0
 def pdp_plot(self, model, X_val, feature_to_plot):
     '''plot partial dependence for list of variables.
     model: fitted model
     X_val: validation dataset
     feature_to_plot : list of features
     '''
     for feat_name in feature_to_plot:
         pdp_dist = pdp.pdp_isolate(model, X_val, X_val.columns.tolist(), feat_name)
         pdp.pdp_plot(pdp_dist, feat_name)
         plt.show()
Exemplo n.º 13
0
def show_partial_dep_plots(lin_model, X_test):
    """Prints partial dependence plots for each feature in the dataset."""
    for feat_name in X_test.columns:
        pdp_dist = pdp.pdp_isolate(
            model=lin_model,
            dataset=X_test,
            model_features=X_test.columns,
            feature=feat_name,
        )
        pdp.pdp_plot(pdp_dist, feat_name)
        plt.show()
Exemplo n.º 14
0
def isolated(model, X, feature):
    """
    isolated pair dependancy plot
    """

    #instantiate and isolate variable
    isolated = pdp_isolate(model = model,
                        dataset = X,
                        model_features = X.columns,
                        feature = feature)
    #plot the variable
    pdp_plot(isolated, feature_name = feature)
Exemplo n.º 15
0
def make_pdp_interpretation(dataset, column_names, training_set, model):
    """to display partial dependence plots based on user input"""
    X_pdp = pd.DataFrame(training_set, columns=column_names)
    col_pdp = st.selectbox("Choose the feature to plot", column_names)
    feature = col_pdp
    class_list = list(dataset['Target Exit Destination'].value_counts().index)
    target_value = st.selectbox("Choose the class to plot",
                                class_list,
                                index=1)
    isolated = pdp_isolate(
        model=model,
        dataset=X_pdp,
        model_features=X_pdp.columns,
        feature=feature,
    )
    if target_value == 'Unknown/Other':
        pdp_plot(isolated[0], feature_name=[feature, target_value])
    elif target_value == 'Permanent Exit':
        pdp_plot(isolated[1], feature_name=[feature, target_value])
    elif target_value == 'Emergency Shelter':
        pdp_plot(isolated[2], feature_name=[feature, target_value])
    elif target_value == 'Temporary Exit':
        pdp_plot(isolated[3], feature_name=[feature, target_value])
    elif target_value == 'Transitional Housing':
        pdp_plot(isolated[4], feature_name=[feature, target_value])
    st.pyplot()
    st.markdown("#### Partial Dependence Plot")
    info_global = st.button("How it is calculated")
    if info_global:
        st.info("""
            The partial dependence plot shows how a feature affects
            predictions. Here's how to undertand the pdp plot:

                1. The y axis is interpreted as change in the prediction from
                   what it would be predicted at the baseline or leftmost
                   value.

                2. A blue shaded area indicates level of confidence

            You can choose one of out the five prediction classes to see the
            effects of a selected feature.

            For more information, check out this free course at kaggle:
            [Link](https://www.kaggle.com/dansbecker/partial-plots)

            To check out the pdp box documentation, click the link:
            [PDP Box Documentation](
                https://pdpbox.readthedocs.io/en/latest/index.html
                )
            """)
Exemplo n.º 16
0
    def plot_pdp(self, feature_to_plot, i):
        # creating data to plot
        pdp_feature = pdp.pdp_isolate(model=self.model,
                                      dataset=self.x,
                                      model_features=list(self.x.columns),
                                      feature=feature_to_plot)

        # plot it
        pdp.pdp_plot(pdp_feature, feature_to_plot)

        # saving the plot
        plt.tight_layout()
        plt.savefig(self.out + '/dep_plot' + str(i) + '.jpg', dpi=400)
        plt.close()
Exemplo n.º 17
0
def pdplot(
    model,
    X_val,
    feat,
    image_name='img_pdplot.png',
):
    ml_model = pickle.load(open(model, 'rb'))
    feat_names = X_val.columns.tolist()
    pdp_assign = pdp.pdp_isolate(model=ml_model,
                                 dataset=X_val,
                                 model_features=feat_names,
                                 feature=feat)
    pdp.pdp_plot(pdp_assign, feat)
    plt.show()
    plt.savefig(image_name)
Exemplo n.º 18
0
    def show_pdp(self, feature_name):
        if self.fitted_model is None:
            self.fit()

        partial_dependence = pdp_isolate(self.fitted_model, self.X_train,
                                         self.X_train.columns, feature_name)
        if feature_name == 'Sex':  # encoded feature
            partial_dependence.display_columns = self.encoder.categories_[
                self.categorical.columns.get_loc('Sex')]

        pdp_plot(partial_dependence,
                 feature_name,
                 center=False,
                 plot_lines=True,
                 x_quantile=True,
                 frac_to_plot=0.2)
Exemplo n.º 19
0
def plot_pdp(m,
             X,
             features,
             feature,
             center=True,
             classes=None,
             percentile_range=None,
             plot_params=None):
    p = pdp.pdp_isolate(m,
                        X,
                        features,
                        feature,
                        n_jobs=-1,
                        percentile_range=percentile_range)
    fig, axes = pdp.pdp_plot(p,
                             feature,
                             plot_lines=True,
                             center=center,
                             plot_pts_dist=True,
                             plot_params=plot_params)
    if classes is not None:
        _ = axes['pdp_ax']['_pdp_ax'].set_xticklabels(classes)
        _ = axes['pdp_ax']['_count_ax'].set_xticklabels(classes)
        _ = axes['pdp_ax']['_count_ax'].set_xlabel('')
        _ = axes['pdp_ax']['_count_ax'].set_title('')
        fig.autofmt_xdate()
    plt.show()
def partial_dependence_plot(model, dataset: pd.DataFrame, model_features: list,
                            objective: str, **kwargs):
    """

    :param model:
    :param dataset:
    :param model_features:
    :param objective:
    :return:
    """
    pdp_data = pdp.pdp_isolate(model=model,
                               dataset=dataset,
                               model_features=model_features,
                               feature=objective)
    pdp.pdp_plot(pdp_data, objective, figsize=(10, 8), **kwargs)
    return plt.show()
Exemplo n.º 21
0
 def test_pdp_plot_single_default(self, pdp_sex):
     # single chart without data dist plot
     fig, axes = pdp_plot(pdp_sex, "sex")
     assert type(fig) == matplotlib.figure.Figure
     assert sorted(axes.keys()) == ["pdp_ax", "title_ax"]
     assert type(axes["pdp_ax"]) == matplotlib.axes._subplots.Subplot
     assert type(axes["title_ax"]) == matplotlib.axes._subplots.Subplot
def plot_pdp(feat, clusters=None, feat_name=None):
    feat_name = feat_name or feat
    p = pdp.pdp_isolate(m, x, feat)
    return pdp.pdp_plot(p,
                        feat_name,
                        plot_lines=True,
                        cluster=clusters is not None,
                        n_cluster_centers=clusters)
Exemplo n.º 23
0
def pdp_feat(feat):
	pdp_obj = pdp.pdp_isolate(xgb_clf, test[xgb_clf.booster().feature_names], str(feat))
	pdp.pdp_plot(pdp_obj, str(feat), plot_org_pts=True, x_quantile=True)
	buf = io.BytesIO()
	plt.savefig(buf, format='png')
	buf.seek(0)
	img_tag = "<img class='pdp_plot' src='data:image/png;base64," + base64.b64encode(buf.getvalue()) + "'/>"
	buf.close()
	
	pdp.actual_plot(pdp_obj, str(feat))
	buf2 = io.BytesIO()
	plt.savefig(buf2, format='png')
	buf2.seek(0)
	img_tag_act = "<img class='act_plot' src='data:image/png;base64," + base64.b64encode(buf2.getvalue()) + "'/>"
	buf2.close()
	
	return render_template('pdp.html',feature_name = feat, img_html = img_tag, img_act_html = img_tag_act)
Exemplo n.º 24
0
 def test_pdp_plot_single_distplot(self, pdp_sex):
     # single chart with data dist plot
     fig, axes = pdp_plot(pdp_sex, "sex", plot_pts_dist=True)
     assert sorted(axes.keys()) == ["pdp_ax", "title_ax"]
     assert sorted(axes["pdp_ax"].keys()) == ["_count_ax", "_pdp_ax"]
     assert type(axes["pdp_ax"]["_pdp_ax"]) == matplotlib.axes._subplots.Subplot
     assert type(axes["pdp_ax"]["_count_ax"]) == matplotlib.axes._subplots.Subplot
     assert type(axes["title_ax"]) == matplotlib.axes._subplots.Subplot
Exemplo n.º 25
0
def generateInsight(model,features,data):
    pdp_airbnb = pdp.pdp_isolate(model=model,
                               dataset=data,
                               model_features=data.columns,
                               feature=features)
    fig, axes = pdp.pdp_plot(pdp_isolate_out=pdp_airbnb,
                             feature_name=features,
                             plot_pts_dist=True, 
                             )
Exemplo n.º 26
0
def plot_pdp(df, model, feat, clusters=None, feat_name=None):
    '''Use a sample from the dataframe using get_sample()'''
    feat_name = feat_name or feat
    p = pdp.pdp_isolate(model, df, df.columns, feat)
    return pdp.pdp_plot(p,
                        feat_name,
                        plot_lines=True,
                        cluster=clusters is not None,
                        n_cluster_centers=clusters)
Exemplo n.º 27
0
 def test_pdp_plot_multi_which_classes(self, pdp_feat_67_rf):
     # change which classes
     fig, axes = pdp_plot(pdp_feat_67_rf,
                          'feat_67',
                          center=True,
                          x_quantile=True,
                          ncols=2,
                          which_classes=[0, 3, 7])
     assert len(axes['pdp_ax']) == 3
Exemplo n.º 28
0
 def test_pdp_plot_multi_one_class(self, pdp_feat_67_rf):
     # only keep 1 class
     fig, axes = pdp_plot(pdp_feat_67_rf,
                          'feat_67',
                          center=True,
                          x_quantile=True,
                          ncols=2,
                          which_classes=[5])
     assert type(axes['pdp_ax']) == matplotlib.axes._subplots.Subplot
Exemplo n.º 29
0
def eval_pdp(model, x_dev, feature_names):
    # https://www.kaggle.com/dansbecker/partial-plots

    # pdp_isolate requires the data to be DataFrame so wrap it
    df_x_dev = pd.DataFrame(x_dev, columns=feature_names)

    for feature in feature_names:
        # Create the data that we will plot
        pdp_values = pdp.pdp_isolate(model=model,
                                     dataset=df_x_dev,
                                     model_features=feature_names,
                                     feature=feature,
                                     num_grid_points=100)

        # plot it
        pdp.pdp_plot(pdp_values, feature)
        plt.savefig(flexp.get_file_path("pdp_{}.png".format(feature)))
        plt.clf()
Exemplo n.º 30
0
 def test_pdp_plot_single_distplot(self, pdp_sex):
     # single chart with data dist plot
     fig, axes = pdp_plot(pdp_sex, 'sex', plot_pts_dist=True)
     assert sorted(axes.keys()) == ['pdp_ax', 'title_ax']
     assert sorted(axes['pdp_ax'].keys()) == ['_count_ax', '_pdp_ax']
     assert type(
         axes['pdp_ax']['_pdp_ax']) == matplotlib.axes._subplots.Subplot
     assert type(
         axes['pdp_ax']['_count_ax']) == matplotlib.axes._subplots.Subplot
     assert type(axes['title_ax']) == matplotlib.axes._subplots.Subplot