def plot_2d_pdp(self, features_2d_plot): # creating data to plot inter = pdp.pdp_interact(model=self.model, dataset=self.x, model_features=list(self.x.columns), features=features_2d_plot) # plot it plot_params = { # plot title and subtitle 'title_fontsize': 15, 'subtitle_fontsize': 12, # color for contour line 'contour_color': 'white', 'font_family': 'Arial', # matplotlib color map for interact plot 'cmap': 'viridis', # fill alpha for interact plot 'inter_fill_alpha': 0.8, # fontsize for interact plot text 'inter_fontsize': 9, } pdp.pdp_interact_plot(pdp_interact_out=inter, feature_names=features_2d_plot, plot_type='contour', plot_params=plot_params) # saving the plot plt.tight_layout() plt.savefig(self.out + '/dep_plot_2d.jpg', dpi=300) plt.close()
def plotFeatures(model, df, features, cat_features=[], encodings=None): ''' Interacting features pdp plot ''' interaction = pdp_interact(model=model, dataset=df, model_features=df.columns, features=features,) pdp = interaction.pdp.pivot_table(values='preds', columns=features[0], index=features[1])[::-1] if encodings != None: for item in encodings: if item['col'] in features: feature_mapping = item['mapping'] #feature_mapping = feature_mapping[feature_mapping.index.dropna()] cat_names = list(feature_mapping.keys()) cat_codes = list(feature_mapping.values()) if features.index(item['col']) == 0: pdp = pdp.rename(columns=dict(zip(cat_codes, cat_names))) else: pdp = pdp.rename(index=dict(zip(cat_codes, cat_names))) plt.figure(figsize=(7,7)) sns.heatmap(pdp, annot=True, fmt='.3f', cmap='viridis') plt.title(f'PDP interact for \"{features[0]}\" and \"{features[1]}\"') plt.show()
def pdp_interact_explain(X, y, feature): import category_encoders as ce from sklearn.pipeline import make_pipeline from sklearn.impute import SimpleImputer from sklearn.ensemble import RandomForestClassifier from pdpbox.pdp import pdp_interact, pdp_interact_plot # Encode, impute as needed X_encoded = ce.OrdinalEncoder().fit_transform(X) X_processed = SimpleImputer().fit_transform(X_encoded) # Pick a model and fit the data pdp_model = RandomForestClassifier(n_estimators=200, n_jobs=-1, random_state=6) pdp_model.fit(X_processed, y) # The actual plotting pdp_interact = pdp_interact(model=pdp_model, dataset=X_encoded, model_features=X_encoded.columns, features=feature) # There's a TypeError in the pdpinteract code that prevents the axes from getting labels # and I can't be bothered to go fix their mistakes. # This ignores it and lets it continue with the plotting try: pdp_interact_plot(pdp_interact, feature_names=feature, plot_type='contour') except: pass
def show_PDP_interact(self, features=[]): for f in features: pdp_interact = pdp.pdp_interact(self.model,self.X_train, model_features=self.feature_names, features=f) pdp.pdp_interact_plot(pdp_interact,feature_names=f) plt.xticks(rotation=90) plt.show()
def compute_pdp_interact(model, dataset, model_features, features): pdp_interact_out = pdp.pdp_interact( model=model, dataset=dataset, model_features=model_features, features=features, ) return pdp_interact_out
def part_plot_2D(model, total_features, X_val, y_val, feature1, feature2): inter1 = pdp.pdp_interact(model=model, dataset=X_val, model_features=total_features, features=[feature1, feature2]) pdp.pdp_interact_plot(pdp_interact_out=inter1, feature_names=[feature1, feature2], plot_type='grid') plt.show()
def two_dim_pdp(f): ''' Function for plotting a two dimension PDP''' inter = pdp.pdp_interact(model=rf, dataset=X_train, model_features=X_train.columns, features=f) pdp.pdp_interact_plot(pdp_interact_out=inter, feature_names=f, plot_type='grid') plt.show()
def pdp_plot_bivariate(self, model, X_val, feature_pair): ''' pdp plot for feature pair model: fitted model X_val: validation dataset feature_pair : pair of feature (list) ''' partial_plot = pdp.pdp_interact(model, X_val, X_val.columns.tolist(), feature_pair) pdp.pdp_interact_plot(partial_plot, feature_pair, plot_type='contour') plt.show()
def interaction(model, X, features, type = 'grid'): """ plot interaction between features """ #instantiate interaction vairable interaction = pdp_interact(model = model, dataset = X, model_features = X.columns, features = features) #plot interactions pdp_interact_plot(interaction, plot_type = type, feature_names = features)
def plot_pdp_interact(model, X_train, feats): """ Function to plot dependency of target variable on the feature :param model: Trained model :param X_train: Datafram to get prediction of model from :param feats: List (size 2) of feature to plot target dependency for :param clusters: Flag to indicate is clusters are needed :param feat_name: Feature name to display on plot :return: partial dependency plot """ x = get_sample(X_train, 1000) p = pdp.pdp_interact(model, x, x.columns, feats) return pdp.pdp_interact_plot(p, feats, plot_pdp=True)
def plot_2D_partial_dependency(fitted_model, X_test : pd.DataFrame, model_features : list, features : list , plot_type = 'contour'): """ have an error with the matplolib version 3.0.0 """ # Create the data that we will plot pdp_obj = pdp.pdp_interact(model = fitted_model, dataset = X_test, model_features = model_features, features = features) # plot it pdp.pdp_interact_plot(pdp_interact_out = pdp_obj, feature_names = feature, plot_type = plot_type) plt.show()
def test_binary_onehot(self, titanic_model, titanic_data, titanic_features): pdp_interact_out = pdp_interact( model=titanic_model, dataset=titanic_data, model_features=titanic_features, features=['Sex', ['Embarked_C', 'Embarked_S', 'Embarked_Q']]) assert pdp_interact_out._type == 'PDPInteract_instance' assert pdp_interact_out.n_classes == 2 assert pdp_interact_out.which_class is None assert pdp_interact_out.features == [ 'Sex', ['Embarked_C', 'Embarked_S', 'Embarked_Q'] ] assert pdp_interact_out.feature_types == ['binary', 'onehot'] assert len(pdp_interact_out.feature_grids) == 2 assert_array_equal(pdp_interact_out.feature_grids[0], np.array([0, 1])) assert_array_equal( pdp_interact_out.feature_grids[1], np.array(['Embarked_C', 'Embarked_S', 'Embarked_Q'])) assert len(pdp_interact_out.pdp_isolate_outs) == 2 expected = pd.DataFrame({ 'Embarked_C': { 0: 0, 3: 0, 5: 1 }, 'Embarked_Q': { 0: 1, 3: 1, 5: 0 }, 'Embarked_S': { 0: 0, 3: 0, 5: 0 }, 'Sex': { 0: 0, 3: 1, 5: 1 }, 'preds': { 0: 0.7331125140190125, 3: 0.21476328372955322, 5: 0.2710586190223694 } }) assert_frame_equal(pdp_interact_out.pdp.iloc[[0, 3, 5]], expected, check_like=True, check_dtype=False)
def pdp_interact_plot(self, feature, var_name=None, sample = 10000, which_classes = None, num_grid_points=[10, 10], plot_types = None, plot_params = {'cmap': ["#00cc00", "#002266"]}): ft_plot = pdp.pdp_interact( model=self.md, dataset=self.sample(sample), model_features=self.features, features=feature, num_grid_points=num_grid_points, n_jobs=4) plot_types = ['contour', 'grid'] if plot_types is None else [plot_types] for plot_type in plot_types: figs, ax = pdp.pdp_interact_plot( pdp_interact_out = ft_plot, feature_names = var_name or feature, plot_type= plot_type, plot_pdp=True, which_classes=which_classes, plot_params = plot_params) plt.show()
def plot_2d_pdp(model, X, y=None, X_unscaled=None, model_features=None, features=None, **kwargs): ''' Plots a 1d pdp plot with the x-axis being unscaled. X_scaled: A pandas dataframe or numpy array. Contains the unscaled values of X. All other variables are the same as for plot_1d_pdp() ''' if y is not None: model.fit(X, y) pdp_plt = pdp.pdp_interact(model=model, dataset=X, model_features=model_features, features=features) fig, ax = pdp.pdp_interact_plot(pdp_plt, feature_names=features, **kwargs) if X_unscaled is not None: meanx = X_unscaled[features[0]].mean() stdx = X_unscaled[features[0]].std() #Unscale x values def unscale_xticks(x, pos): return ('%.1f' % (x * stdx + meanx)) meany = X_unscaled[features[1]].mean() stdy = X_unscaled[features[1]].std() #Unscale y values def unscale_yticks(x, pos): return ('%.1f' % (x * stdy + meany)) ax['pdp_inter_ax'].xaxis.set_major_formatter( mticker.FuncFormatter(unscale_xticks)) ax['pdp_inter_ax'].yaxis.set_major_formatter( mticker.FuncFormatter(unscale_yticks)) return fig, ax
def pdp_2d_pdp(term_type, train_data, fea_2d_1, fea_2d_2, fea_nam): fea_1_min = min(train_data[fea_2d_1].values) fea_1_max = max(train_data[fea_2d_1].values) fea_2_min = min(train_data[fea_2d_2].values) fea_2_max = max(train_data[fea_2d_2].values) inter_rf = pdp.pdp_interact( model=rfr, dataset=train_data, model_features=fea_nam, cust_grid_points=[np.linspace(fea_1_min, fea_1_max, 10), np.linspace(fea_2_min, fea_2_max, 10)], features=[fea_2d_1, fea_2d_2]) fig, axes = pdp.pdp_interact_plot( inter_rf, [fea_2d_1, fea_2d_2], x_quantile=False, plot_type='contour', plot_pdp=False) fig.savefig("./results/{}_{}-{}_2d_pdp.png".format(term_type, fea_2d_1, fea_2d_2), dpi=300) return fig
def save_pdp_plot_2d(model, X_train, features, n_jobs, figure_saver=None): model.n_jobs = n_jobs with parallel_backend("threading", n_jobs=n_jobs): pdp_interact_out = pdp.pdp_interact( model=model, dataset=X_train, model_features=X_train.columns, features=features, num_grid_points=[20, 20], ) fig, axes = pdp.pdp_interact_plot( pdp_interact_out, features, x_quantile=True, figsize=(7, 8) ) axes["pdp_inter_ax"].xaxis.set_tick_params(rotation=45) if figure_saver is not None: figure_saver.save_figure(fig, "__".join(features), sub_directory="pdp_2d")
def pdp_2d(clf,X,Y,features_to_plot,label,plot_type='contour'): from pdpbox import pdp # Extract the classifier object from the clf multilearn object index = Y.columns.to_list().index(label) clf = clf.classifiers_[index] clf.verbose = False #Turn verbose off after this to tidy prints inter = pdp.pdp_interact(model=clf, dataset=X, model_features=X.columns.to_list(), features=features_to_plot,percentile_ranges=[(5,95),(5,95)]) if(plot_type=='grid'): fig, ax = pdp.pdp_interact_plot(pdp_interact_out=inter, feature_names=features_to_plot, plot_type='grid', x_quantile=True,plot_pdp=True) elif(plot_type=='contour'): fig, ax = pdp.pdp_interact_plot(pdp_interact_out=inter, feature_names=features_to_plot, plot_type='contour') clf.verbose = True # reset return fig, ax
def test_binary_numeric(self, titanic_model, titanic_data, titanic_features): pdp_interact_out = pdp_interact(model=titanic_model, dataset=titanic_data, model_features=titanic_features, features=['Fare', 'Sex']) assert pdp_interact_out._type == 'PDPInteract_instance' assert pdp_interact_out.n_classes == 2 assert pdp_interact_out.which_class is None assert pdp_interact_out.features == ['Fare', 'Sex'] assert pdp_interact_out.feature_types == ['numeric', 'binary'] assert len(pdp_interact_out.feature_grids) == 2 assert_array_almost_equal(pdp_interact_out.feature_grids[0], np.array([ 0., 7.73284444, 7.8958, 8.6625, 13., 16.7, 26., 35.11111111, 73.5, 512.3292 ]), decimal=8) assert_array_equal(pdp_interact_out.feature_grids[1], np.array([0, 1])) assert len(pdp_interact_out.pdp_isolate_outs) == 2 expected = pd.DataFrame({ 'Fare': { 0: 0.0, 6: 8.6625, 12: 26.0, 18: 512.3292 }, 'Sex': { 0: 0.0, 6: 0.0, 12: 0.0, 18: 0.0 }, 'preds': { 0: 0.6237624883651733, 6: 0.6005081534385681, 12: 0.6391391158103943, 18: 0.7784096002578735 } }) assert_frame_equal(pdp_interact_out.pdp.iloc[[0, 6, 12, 18]], expected, check_like=True, check_dtype=False)
def ind_cond_exp(model_line, X_train, y_data): empty_list = [] for col in X_train.columns: print(col) empty_list.append(col) from pdpbox.pdp import pdp_interact, pdp_interact_plot X_features = empty_list features = empty_list[1:3] interaction = pdp_interact(model=model_line, dataset=X_train, model_features=X_features, features=features) #pdp_goals = pdp.pdp_isolate(model=model_line, dataset=X_train, model_features=X_features, feature='sqft_living') pdp_interact_plot(interaction, plot_type='grid', feature_names=features) import seaborn as sns pdp = interaction.pdp.pivot_table( values='preds', columns=features[0], index=features[1] )[::-1] # Slice notation to reverse index order so y axis is ascending #plt.figure(figsize=(10,8)) # sns.heatmap(pdp, annot=True, fmt='.2f', cmap='viridis') # plt.title('Partial Dependence on Interest Rate on Annual Income & Credit Score'); #import plotly.graph_objs as go surface = go.Surface(x=pdp.columns, y=pdp.index, z=pdp.values) fig = go.Figure(surface) fig.show() ee.layout = html.Div([dcc.Graph(figure=fig)]) print("done") return ee.index()
def plot_modal(): path = os.path.join(app.config['UPLOAD_FOLDER'], session.get("filename", "not set")) index = request.args.get('model', default=0, type=int) estim = request.args.get('estimator', default=None, type=str) target_ft = session.get('target_ft', 'not set') features = session.get('features', 'not set') f1 = request.args.get('f1', default=None, type=str) f2 = request.args.get('f2', default=None, type=str) t1 = request.args.get('t1', default=None, type=str) X, y, data = process_data(path, "csv", target_ft) #remove nans data = data.dropna() chosen_class = list(np.unique(y)).index(int(float(t1))) with open("tmp_files/model_{}_{}.pickle".format(estim, str(index)), 'rb') as filehandler: pipe = pickle.load(filehandler) mod_path = "modal_" + str(f1.replace('.', '_')) + \ "_" + str(f2.replace('.', '_')) pdp_V1_V2 = pdp.pdp_interact(model=pipe, dataset=data, model_features=features, features=[f1, f2], num_grid_points=None, percentile_ranges=[None, None]) fig, axes = pdp.pdp_interact_plot(pdp_V1_V2, [f1, f2], plot_type='grid', x_quantile=True, ncols=2, plot_pdp=True, which_classes=[chosen_class], plot_params={ "subtitle": "For Class {}, Label: {}".format( chosen_class, t1) }) fig.savefig("static/images/figs/" + mod_path, bbox_inches="tight", transparent=True) plt.figure() return render_template("modal_plot.html", plot_name=mod_path)
def interactive_plot(a, b, model, X_encoded): """ Takes any 2 features in a dataset and creates an interactive partial dependency plot. Utilizes the feature_mapping function to get the values of the features inputted. a = first feature (column name) b = second feature (column name) model = Machine Learning model (do *not* use a pipeline here) X_encoded = the encoded X feature dataframe (validation or test) - ensure you fit_transform your training data and transform your validation/ test data before passing Returns an interactive pdp plot. """ # Assign inputs as features features = [a, b] # Build interaction model interaction = pdp_interact(model=model, dataset=X_encoded, model_features=X_encoded.columns, features=features) # Create pivot table pdp = interaction.pdp.pivot_table(values='preds', columns=features[0], index=features[1]) # Get names and codes from encoder.mapping _, a_names, a_codes = feature_mapping(features[0]) _, b_names, b_codes = feature_mapping(features[1]) # Add column & index names to pivot table pdp = pdp.rename(index=dict(zip(b_codes, b_names)), columns=dict(zip(a_codes, a_names))) # Set plot's figure size plt.figure(figsize=(10, 8)) return sns.heatmap(pdp, annot=True, fmt='.2f', cmap='YlGnBu')
def test_binary_numeric(self, ross_model, ross_data, ross_features): pdp_interact_out = pdp_interact( model=ross_model, dataset=ross_data, model_features=ross_features, features=['SchoolHoliday', 'weekofyear']) assert pdp_interact_out._type == 'PDPInteract_instance' assert pdp_interact_out.n_classes == 0 assert pdp_interact_out.which_class is None assert pdp_interact_out.features == ['SchoolHoliday', 'weekofyear'] assert pdp_interact_out.feature_types == ['binary', 'numeric'] assert len(pdp_interact_out.feature_grids) == 2 assert_array_equal(pdp_interact_out.feature_grids[0], np.array([0, 1])) assert_array_equal( pdp_interact_out.feature_grids[1], np.array([1., 5., 10., 15., 20., 25., 30., 37., 45., 52.])) assert len(pdp_interact_out.feature_grids) == 2 expected = pd.DataFrame({ 'SchoolHoliday': { 0: 0.0, 6: 0.0, 12: 1.0, 18: 1.0 }, 'preds': { 0: 6369.878633951306, 6: 5831.552135812868, 12: 7311.965564610852, 18: 7129.481794228513 }, 'weekofyear': { 0: 1.0, 6: 30.0, 12: 10.0, 18: 45.0 } }) assert_frame_equal(pdp_interact_out.pdp.iloc[[0, 6, 12, 18]], expected, check_like=True, check_dtype=False)
def construct_pdp_interact(model, feature_names, dataset_x = dat_train_x, dataset_y = dat_train_y, num_grid_points = num_grid_points_int, n_jobs = n_jobs, model_features = dat_train_x.columns): inter_current = pdp.pdp_interact( model = model, dataset = dataset_x.join(dataset_y), num_grid_points = num_grid_points, n_jobs = n_jobs, ## needs to be 1 for XGBoost model! model_features = model_features, features = feature_names) fig, axes = pdp.pdp_interact_plot( inter_current, feature_names = feature_names, x_quantile = False, plot_type = 'contour', plot_pdp = False, plot_params = plot_params_pdp_int_default) axes["pdp_inter_ax"].set_xlabel(varnames_long_dict[feature_names[0]]) axes["pdp_inter_ax"].set_ylabel(varnames_long_dict[feature_names[1]]) ## [[here]] y-labels! axes["pdp_inter_ax"].set_title('Number of bike rides per hour\n(Partial Dependence Plot) for\n{0} and {1}\n'\ .format(varnames_long_dict[feature_names[0]], varnames_long_dict[feature_names[1]]), y = 1) return fig
def show_partial_dependence(model, val_X, features): ''' Takes the model and dataframe for validation set (X) then plots the partial dependence plot For more on this, check https://www.kaggle.com/dansbecker/partial-plots?utm_medium=email&utm_source=mailchimp&utm_campaign=ml4insights ''' from matplotlib import pyplot as plt from pdpbox import pdp, get_dataset, info_plots # Do I need get_dataset and info_plots??? feature_names = [i for i in val_X.columns if val_X[i].dtype in [np.int64]] if (not type(features) == list): # Create the data that we will plot pdp_feature = pdp.pdp_isolate(model=model, dataset=val_X, model_features=feature_names, feature=features) # plot it pdp.pdp_plot(pdp_feature, feature) plt.show() elif (type(features) == list) & (len(features) == 2): # Similar to previous PDP plot except we use pdp_interact instead of pdp_isolate and pdp_interact_plot instead of pdp_isolate_plot inter1 = pdp.pdp_interact(model=model, dataset=val_X, model_features=feature_names, features=features) pdp.pdp_interact_plot(pdp_interact_out=inter1, feature_names=features_to_plot, plot_type='contour') plt.show() else: print( 'Error, check input and also think of a better error message.... don\'t be lazy' )
def pdp_interact_plot(model, dataset, model_features, feature1, feature2, plot_type="grid", x_quantile=True, plot_pdp=False): """Wrapper for pdp.pdp_interact_plot. Uses pdp.pdp_interact.""" pdp_interact_out = pdp.pdp_interact( model=model, dataset=dataset, model_features=model_features, features=[feature1, feature2], ) fig, _ = pdp.pdp_interact_plot( pdp_interact_out=pdp_interact_out, feature_names=[feature1, feature2], plot_type=plot_type, x_quantile=x_quantile, plot_pdp=plot_pdp, ) return fig
fig, axes, summary_df = info_plots.actual_plot_interact( model=model, X=X, features=x_cols[3:], feature_names=x_cols[3:], which_classes=[2, 5], ) # %% pdp_interact: Preset ---------------------------------------------------- pdp_interacted_tmp = pdp.pdp_interact( model=model, dataset=X, model_features=x_cols, features=x_cols[:2], num_grid_points=[10, 10], percentile_ranges=[None, None], n_jobs=1, ) # %% pdp_interact_plot: grid fig, axes = pdp.pdp_interact_plot( pdp_interacted_tmp, feature_names=x_cols, plot_type='grid', x_quantile=True, ncols=2, plot_pdp=True, which_classes=[1, 2, 3],
pdp_plot_feature = pdp.pdp_plot(pdp_feature, i) graph_name = ''.join( random.sample((string.ascii_uppercase + string.digits), 3)) html_pdp = 'html_pdp_plot' + graph_name + ' + ' encoded = fig_to_base64(pdp_plot_feature) html_pdp = '<img class="img-fluid" src="data:image/png;base64, {}">'.format( encoded.decode('utf-8')) html_partial_plot += html_pdp # ------------------------------------------------------------- # 2D PARTIAL DEPENDENCE PLOTS # Similar to previous PDP plot except we use pdp_interact instead of pdp_isolate # and pdp_interact_plot instead of pdp_isolate_plot # features_to_plot = ['preg', 'skin'] inter1 = pdp.pdp_interact(model=loaded_model, dataset=dataframe_test, model_features=feature_names, features=features_to_plot2d) partial_plot = pdp.pdp_interact_plot( pdp_interact_out=inter1, feature_names=features_to_plot2d ) # plot_type='contour' plot_type='grid' encoded = fig_to_base64(partial_plot) html_partial_plot2d = '<img class="img-fluid" src="data:image/png;base64, {}">'.format( encoded.decode('utf-8')) # ------------------------------------------------------------- # SHAP PLOT data_for_prediction = dataframe_test.iloc[shap_row_to_show] explainer = shap.KernelExplainer(loaded_model.predict_proba, dataframe_test.values) shap_values = explainer.shap_values(data_for_prediction) shap.initjs()
# Plot feature importances n = 6 plt.figure(figsize=(10,n/2)) #plt.title(f'Top {n} features pipeline5') plt.title(f'Top {n} features Gradient Boosting') importances4.sort_values()[-n:].plot.barh(color='grey'); !pip install pdpbox # Partial Dependence Plots with 2 features from pdpbox.pdp import pdp_interact, pdp_interact_plot features2 = ['Latitude', 'Longitude Difference to State Capital'] interaction = pdp_interact( model=gb, dataset=X_val, model_features=X_val.columns, features=features2 ) pdp_interact_plot(interaction, plot_type='grid', feature_names=features2); # A two feature partical dependence plot in 3D pdp = interaction.pdp.pivot_table( values='preds', columns=features2[0], index=features2[1] )[::-1] # Slice notation to reverse index order so y axis is ascending import plotly.graph_objs as go target = 'Value of d parameter' surface = go.Surface(x=pdp.columns,
) pdp_plot(isolated_features, feature_name=pdf_feature); """### Interactive Partial Dependence Plots with 2 features""" from pdpbox.pdp import pdp_interact, pdp_interact_plot # PDF between total of special request and is repeated guest # it shows that the numbers of unique grid points for each # total of special requestis 2 and repeated guest is 4 pdf_features = ['is_repeated_guest', 'total_of_special_requests'] booking_interaction = pdp_interact( model=Rand_forest, dataset=X_val, model_features=X_val.columns, features=pdf_features ) # this multiple classes which is total of special request and is_rpeated_quest # with numbers of grid points that 2:4 pdp_interact_plot(booking_interaction, plot_type='grid', feature_names=pdf_features); """### Shapley Values It is a good technique to show the insight of the model predictor and break down each model individually. """ # explain the individual observation # if I want to look for the first row from X_test # turn it into a datafrme
best_iter = np.argmin(cvbst['l2-mean']) print("Best number of iterations: " + str(best_iter)) # Best number of iterations: 32 # --------------------Model interpretation---------------- # Plotting feature importances gpb.plot_importance(bst) # Partial dependence plots from pdpbox import pdp # Single variable plots (takes a few seconds to compute) pdp_dist = pdp.pdp_isolate(model=bst, dataset=X_train, model_features=X_train.columns, feature='variable_2', num_grid_points=50) pdp.pdp_plot(pdp_dist, 'variable_2', plot_lines=True) # Two variable interaction plot inter_rf = pdp.pdp_interact(model=bst, dataset=X_train, model_features=X_train.columns, features=['variable_1','variable_2']) pdp.pdp_interact_plot(inter_rf, ['variable_1','variable_2'], x_quantile=True, plot_type='contour', plot_pdp=True)# ignore any error message # SHAP values and dependence plots # Note: you need shap version>=0.36.0 import shap shap_values = shap.TreeExplainer(bst).shap_values(X_test) shap.summary_plot(shap_values, X_test) shap.dependence_plot("variable_2", shap_values, X_test) # --------------------Comparison to alternative approaches---------------- results = pd.DataFrame(columns = ["RMSE","Time"], index = ["GPBoost", "Linear_ME","Boosting_Ign","Boosting_Cat","MERF"]) # 1. GPBoost