def plot_shap(model, test, instance=None, feature=None, dataset=False): """ Displays shap plots to explain a black box model. :param model: the model considered. The shap plots are calculated only after the model has been fit. :param test: test dataset. :param instance: instance of the test dataset to explain. default_value=None :param feature: feature of the test dataset to explain. default_value=None :param dataset: if True the entire dataset is taken into account. default_value=False :return: """ # Make an explainer on the model given. Not all the models are supported explainer = TreeExplainer(model) # Compute SHAP values shap_values = explainer.shap_values(test) initjs() # If not None explain single prediction if instance is not None: force_plot(explainer.expected_value, shap_values[instance, :], test.iloc[instance, :], matplotlib=True) # If not None explain single feature if feature is not None: fig, ax = plt.subplots(figsize=(13, 10)) dependence_plot(feature, shap_values, test, ax=ax) # If True explain the entire dataset if dataset: summary_plot(shap_values, test, plot_size=(8, 8)) summary_plot(shap_values, test, plot_type="bar", plot_size=(8, 8))
def shap(self, X, plot=False, plot_type='bar'): """Method for shap values calculation and corresponding plot of feature importances. Args: X (:obj:`pd.DataFrame`, :obj:`pd.Series`): Data for shap values calculation. plot (:obj:`boolean`, optional): Whether to plot a graph. plot_type (:obj:`str`, optional): Type of feature importance graph, takes value in ['dot', 'bar']. Returns: JSON containing shap values. """ explainer = TreeExplainer(self.model) X = DataFrame(X).T if isinstance(X, Series) else X shap_values = explainer.shap_values(X) shap_values = shap_values[0] if isinstance( shap_values, list) and (len(shap_values) == 2) else shap_values expected_value = (explainer.expected_value[0].tolist() if isinstance(shap_values, list) and (len(shap_values) == 2) else [explainer.expected_value]) variables = ['Intercept'] + list(X.columns) mean_shap = expected_value + shap_values.mean(axis=0).tolist() if plot: summary_plot(shap_values, X, plot_type=plot_type) return {variables[i]: mean_shap[i] for i in range(len(variables))}
def test_shap_sklearn_classifier(iris_X, iris_y): from shap import TreeExplainer forest = RandomForestClassifier() forest.fit(iris_X, iris_y) explainer = TreeExplainer(model=forest, data=iris_X) shap_values = explainer.shap_values(iris_X, check_additivity=False) print(shap_values)
def test_shap_sklearn_regressor(boston_X, boston_y): from shap import TreeExplainer forest = RandomForestRegressor() forest.fit(boston_X, boston_y) explainer = TreeExplainer(model=forest, data=boston_X) shap_values = explainer.shap_values(boston_X, check_additivity=False) print(shap_values)
def test_shap_sklearn_classifier(iris_X, iris_y): from shap import TreeExplainer forest = RandomForestClassifier() forest.fit(iris_X, iris_y) explainer = TreeExplainer(model=forest) shap_values = explainer.shap_values(iris_X) print(shap_values)
def test_shap_sklearn_regressor(boston_X, boston_y): from shap import TreeExplainer forest = RandomForestRegressor() forest.fit(boston_X, boston_y) explainer = TreeExplainer(model=forest) shap_values = explainer.shap_values(boston_X) print(shap_values)
def test_shap_classifier(iris_X, iris_y): from shap import TreeExplainer forest = GRFForestClassifier(enable_tree_details=True) forest.fit(iris_X, iris_y) with shap_patch(): explainer = TreeExplainer(model=forest, data=iris_X) shap_values = explainer.shap_values(iris_X, check_additivity=False) print(shap_values)
def test_shap_classifier(iris_X, iris_y): from shap import TreeExplainer forest = RangerForestClassifier(enable_tree_details=True) forest.fit(iris_X, iris_y) with shap_patch(): explainer = TreeExplainer(model=forest) shap_values = explainer.shap_values(iris_X) print(shap_values)
def test_shap_regressor(boston_X, boston_y): from shap import TreeExplainer forest = RangerForestRegressor(enable_tree_details=True) forest.fit(boston_X, boston_y) with shap_patch(): explainer = TreeExplainer(model=forest) shap_values = explainer.shap_values(boston_X) print(shap_values)
def test_shap_regressor(boston_X, boston_y): from shap import TreeExplainer forest = GRFForestRegressor(enable_tree_details=True) forest.fit(boston_X, boston_y) with shap_patch(): explainer = TreeExplainer(model=forest, data=boston_X) shap_values = explainer.shap_values(boston_X, check_additivity=False) print(shap_values)
def _explain_trees( model: Model, transformed_data: Table, transformed_reference_data: Table, progress_callback: Callable, ) -> Tuple[ Optional[List[np.ndarray]], Optional[np.ndarray], Optional[np.ndarray] ]: """ Computes and returns SHAP values for learners that are explained by TreeExplainer: all sci-kit models based on trees. In case that explanation with TreeExplainer is not possible it returns None """ if sparse.issparse(transformed_data.X): # sparse not supported by TreeExplainer, KernelExplainer can handle it return None, None, None try: explainer = TreeExplainer( model.skl_model, data=sample(transformed_reference_data.X, 100), ) # I know it is too broad but this is what TreeExplainer trows except Exception: return None, None, None # TreeExplaner cannot explain in normal time more cases than 1000 data_sample, sample_mask = _subsample_data(transformed_data, 1000) num_classes = ( len(model.domain.class_var.values) if model.domain.class_var.is_discrete else None ) # this method will work in batches since explaining only one attribute # at time the processing timed doubles comparing to batch size 10 shap_values = [] batch_size = 1 # currently set to 1 to minimize widget blocking for i in range(0, len(data_sample), batch_size): progress_callback(i / len(data_sample)) batch = data_sample.X[i : i + batch_size] shap_values.append( explainer.shap_values(batch, check_additivity=False) ) shap_values = _join_shap_values(shap_values) base_value = explainer.expected_value # when in training phase one class value was missing skl_model do not # output probability for it. For other models it is handled by Orange if num_classes is not None: missing_d = num_classes - len(shap_values) shap_values += [ np.zeros(shap_values[0].shape) for _ in range(missing_d) ] base_value = np.hstack((base_value, np.zeros(missing_d))) return shap_values, sample_mask, base_value
def evaluate( self, study: Study, params: Optional[List[str]] = None, *, target: Optional[Callable[[FrozenTrial], float]] = None, ) -> Dict[str, float]: if target is None and study._is_multi_objective(): raise ValueError( "If the `study` is being used for multi-objective optimization, " "please specify the `target`. For example, use " "`target=lambda t: t.values[0]` for the first objective value." ) distributions = _get_distributions(study, params=params) if params is None: params = list(distributions.keys()) assert params is not None if len(params) == 0: return OrderedDict() trials: List[FrozenTrial] = _get_filtered_trials(study, params=params, target=target) trans = _SearchSpaceTransform(distributions, transform_log=False, transform_step=False) trans_params: np.ndarray = _get_trans_params(trials, trans) target_values: np.ndarray = _get_target_values(trials, target) forest = self._forest forest.fit(X=trans_params, y=target_values) # Create Tree Explainer object that can calculate shap values. explainer = TreeExplainer(forest) # Generate SHAP values for the parameters during the trials. feature_shap_values: np.ndarray = explainer.shap_values(trans_params) param_shap_values = np.zeros((len(trials), len(params))) np.add.at(param_shap_values.T, trans.encoded_column_to_column, feature_shap_values.T) # Calculate the mean absolute SHAP value for each parameter. # List of tuples ("feature_name": mean_abs_shap_value). mean_abs_shap_values = np.abs(param_shap_values).mean(axis=0) return _sort_dict_by_importance(_param_importances_to_dict(params, mean_abs_shap_values))
class HyperGBMExplainer: def __init__(self, hypergbm_estimator, data=None): if not has_shap: raise RuntimeError( 'Please install `shap` package first. command: pip install shap' ) self.hypergbm_estimator = hypergbm_estimator if data is not None: data = self.hypergbm_estimator.transform_data(data) self.explainer = TreeExplainer(self.hypergbm_estimator.estimator, data) @property def expected_value(self): return self.explainer.expected_value def shap_values(self, X, y=None, tree_limit=None, approximate=False, check_additivity=True, from_call=False, **kwargs): X = self.hypergbm_estimator.transform_data(X, **kwargs) return self.explainer.shap_values(X, y, tree_limit=tree_limit, approximate=approximate, check_additivity=check_additivity, from_call=from_call) def shap_interaction_values(self, X, y=None, tree_limit=None, **kwargs): X = self.hypergbm_estimator.transform_data(X, **kwargs) return self.explainer.shap_interaction_values(X, y, tree_limit) def transform_data(self, X, **kwargs): X = self.hypergbm_estimator.transform_data(X, **kwargs) return X
def model_interpretation(self, patient_id, patient_preprocessed, pred, prob, model): ''' Fazer gráficos avaliativos do modelo. Argumentos: patient_id = string referente a identificação do paciente patient_preprocessed = dicionario contendo dados do exame do paciente pred = classe predita pelo modelo prob = probabilidade referente a classe predita pelo modelo model = objeto do modelo ''' #### Pegar variaveis necessárias para o plot (import csv) #### Nome dos plots plot_1_name = 'app/ai_models/temp/probacurve-' + str( patient_id) + '.png' plot_2_name = 'app/ai_models/temp/shap-' + str(patient_id) + '.png' plot_3_name = 'app/ai_models/temp/dist-' + str(patient_id) + '.png' plot_4_name = 'app/ai_models/temp/mapa-' + str(patient_id) + '.png' #URL API PLOTS plot_1_api = "http://" + self.IP + ":" + self.API_PORT + "/api/media/probacurve-" + str( patient_id) + ".png" plot_2_api = "http://" + self.IP + ":" + self.API_PORT + "/api/media/shap-" + str( patient_id) + ".png" plot_3_api = "http://" + self.IP + ":" + self.API_PORT + "/api/media/dist-" + str( patient_id) + ".png" plot_4_api = "http://" + self.IP + ":" + self.API_PORT + "/api/media/mapa-" + str( patient_id) + ".png" #### Configurações gerais do plt DPI_IMAGES = 100 FONT_SIZE = 8 FONT_NAME = 'sans-serif' plt.rc('font', family=FONT_NAME, size=FONT_SIZE) plt.rc('axes', titlesize=FONT_SIZE, labelsize=FONT_SIZE) plt.rc('xtick', labelsize=FONT_SIZE) plt.rc('ytick', labelsize=FONT_SIZE) plt.rc('legend', fontsize=FONT_SIZE) #### PLOT 1 - Distribuição da probabilidade dada pelo modelo para pacientes positivos # Itens Necessário: self.probs_df(csv importado) e pred exame_resp = pred exame_prob = prob # Plot fig, axis = plt.subplots(nrows=1, ncols=1, figsize=(5, 5)) sns.kdeplot(self.probs_df['prob_neg'], shade=True, color='#386796', ax=axis, linestyle="--", label='Casos Negativos') sns.kdeplot(self.probs_df['prob_pos'], shade=True, color='#F06C61', ax=axis, label='Casos positivos') # Pegar eixo XY do Plt object para fazer a interpolação if exame_resp == 0: xi = 1 - exame_prob data_x, data_y = axis.lines[0].get_data() elif exame_resp == 1: xi = exame_prob data_x, data_y = axis.lines[1].get_data() # Fazer a interpolação e plot yi = np.interp(xi, data_x, data_y) axis.plot([xi], [yi], linestyle='None', marker="*", color='black', markersize=10, label='Paciente') # Outras configuracoes do plot axis.legend(loc="upper right") #axis.set_title('Probabilidade de ser COVID Positivo pelo modelo', fontweight='bold') axis.set_xlim([0, 1]) axis.set_ylim([0, axis.get_ylim()[1]]) plt.tight_layout() # Salvar plot 1 plt.savefig(plot_1_name, dpi=DPI_IMAGES, bbox_inches='tight', pad_inches=0.1) plt.close() #### PLOT 2 - SHAP # Necessário: patient_preprocessed, pred e model features = np.array(list(patient_preprocessed.keys())) sample_x = np.array(list(patient_preprocessed.values())) # Calcular SHAP Value explainer = TreeExplainer(model=model) # Faz o objeto SHAP shap_values_sample = explainer.shap_values(sample_x) # Calculo do SHAP expected_value = explainer.expected_value[ exame_resp] # Pega o baseline para a classe predita pelo modelo shap_values_sample = explainer.shap_values( sample_x) # Calcular os SHAP values # Plot #plt.title('Valores SHAP', fontweight='bold') waterfall_plot(expected_value, shap_values_sample[exame_resp], sample_x, feature_names=features, max_display=20, show=False) # Salvar imagem plt.tight_layout() plt.savefig(plot_2_name, dpi=DPI_IMAGES, bbox_inches='tight', pad_inches=0) plt.close() #### PLOT 3 - Distribuição das variáveis mais importantes para o modelo # Necessário: self.train_df(csv importado), patient_preprocessed, pred important_features = [ 'Leucócitos', 'Plaquetas', 'Hemácias', 'Eosinófilos' ] target_0 = self.train_df[self.train_df['target'] == 0][[ 'Leucócitos', 'Plaquetas', 'Hemácias', 'Eosinófilos' ]] target_1 = self.train_df[self.train_df['target'] == 1][[ 'Leucócitos', 'Plaquetas', 'Hemácias', 'Eosinófilos' ]] # Plot fig, axes = plt.subplots(nrows=2, ncols=2, figsize=(10, 5)) # Plot settings #sns.set_color_codes() #st = fig.suptitle("Distribuição das variáveis importantes para o modelo", fontweight='bold') #st.set_y (1.05) # Index col/row r = 0 c = 0 # Loop to plot for feat in important_features: # Plot distribuição sns.kdeplot(list(target_0[feat]), shade=True, color='#386796', ax=axes[r][c], label='Casos Negativos', linestyle="--") sns.kdeplot(list(target_1[feat]), shade=True, color='#F06C61', ax=axes[r][c], label='Casos positivos') # Pegar a curva de densidade a partir do resultado do modelo if pred == 0: data_x, data_y = axes[r][c].lines[0].get_data() elif pred == 1: data_x, data_y = axes[r][c].lines[1].get_data() # Pegar a informação (valor) daquela variável importante xi = patient_preprocessed[feat] yi = np.interp(xi, data_x, data_y) ## Plot ponto na curva axes[r][c].plot([xi], [yi], linestyle='None', marker="*", color='black', markersize=10, label='Paciente') axes[r][c].set_title(feat) axes[r][c].legend(loc="upper right") axes[r][c].set_ylim([0, axes[r][c].get_ylim()[1]]) # Mudar onde sera plotado if c == 0: c += 1 else: r += 1 c = 0 # Ajeitar o plot plt.tight_layout() # Salvar imagem plt.savefig(plot_3_name, dpi=DPI_IMAGES, bbox_inches='tight', pad_inches=0.1) plt.close() #### PLOT 4 - Mapa com SVD para os pacientes # Necessário: train_df(csv importado), patient_preprocessed amostra = pd.DataFrame(patient_preprocessed, index=[ 0, ]).drop(axis=1, columns=['Outra gripe']) # Fazer PCA com SVD via prince package y_train = self.train_df['target'] # Salvar coluna target dados = self.train_df.drop( axis=1, columns=['Outra gripe', 'target']).copy() # Dataset para criar o mapa pca_obj = PCA(n_components=2, random_state=42) # Objeto do PCA pca_obj.fit(dados) # Fit no conjunto de dados componentes = pca_obj.transform( dados) # Criar os componentes principais dos dados transf = pca_obj.transform(amostra) # Transformar paciente para PCA xi = transf.loc[0, 0] # Eixo X do paciente para plot yi = transf.loc[0, 1] # Eixo Y do paciente para plot comp = pd.DataFrame() # Dataframe para conter os componentes comp['C1'] = componentes[0] # Componente Principal 1 comp['C2'] = componentes[1] # Componente Principal 2 comp['TG'] = y_train # Variável target para a mascara comp_0 = comp[comp['TG'] == 0][['C1', 'C2' ]] # Dataframe de CP para negativos comp_1 = comp[comp['TG'] == 1][['C1', 'C2' ]] # Dataframe de CP para positivos # Plot fig, ax = plt.subplots(figsize=(8, 8)) plt.margins(0, 0) sns.scatterplot(ax=ax, data=comp_0, x='C1', y='C2', color='#386796', label='Casos Negativos') sns.scatterplot(ax=ax, data=comp_1, x='C1', y='C2', color='#F06C61', label='Casos Positivos') x_mean, y_mean, width, height, angle = self.build_ellipse( comp_0['C1'], comp_0['C2']) ax.add_patch( Ellipse((x_mean, y_mean), width, height, angle=angle, linewidth=2, color='#386796', fill=True, alpha=0.2)) x_mean, y_mean, width, height, angle = self.build_ellipse( comp_1['C1'], comp_1['C2']) ax.add_patch( Ellipse((x_mean, y_mean), width, height, angle=angle, linewidth=2, color='#F06C61', fill=True, alpha=0.2)) ax.plot([xi], [yi], linestyle='None', marker="*", color='black', markersize=10, label='Paciente') # Configurações do plot #ax.set_title('Similaridade entre pacientes',fontweight='bold') ax.set_xticks([]) ax.set_yticks([]) ax.set_ylabel('') ax.set_xlabel('') handles, labels = ax.get_legend_handles_labels() labels, handles = zip( *sorted(zip(labels, handles), key=lambda t: t[0])) ax.legend(handles, labels, loc="upper right") # Salvar imagem plt.axis('off') plt.savefig(plot_4_name, dpi=DPI_IMAGES, bbox_inches='tight', pad_inches=0) plt.close() # Retornar model_result = { 'prediction': pred, 'probability': str(round(prob * 100, 2)), 'probacurve': plot_1_api, 'shap_img': plot_2_api, 'dist_img': plot_3_api, 'mapa_img': plot_4_api } return model_result """
class ShapleyImportanceEvaluator(BaseImportanceEvaluator): """Shapley (SHAP) parameter importance evaluator. This evaluator fits a random forest that predicts objective values given hyperparameter configurations. Feature importances are then computed as the mean absolute SHAP values. .. note:: This evaluator requires the `sklearn <https://scikit-learn.org/stable/>`_ Python package and `SHAP <https://shap.readthedocs.io/en/stable/index.html>`_. The model for the SHAP calculation is based on `sklearn.ensemble.RandomForestClassifier <https://scikit-learn.org/stable/modules/generated/sklearn.ensemble.RandomForestClassifier.html>`_. Args: n_trees: Number of trees in the random forest. max_depth: The maximum depth of each tree in the random forest. seed: Seed for the random forest. """ def __init__( self, *, n_trees: int = 64, max_depth: int = 64, seed: Optional[int] = None ) -> None: _imports.check() # Use the RandomForest as the surrogate model to evaluate the feature importances. self._backend_evaluator = MeanDecreaseImpurityImportanceEvaluator( n_trees=n_trees, max_depth=max_depth, seed=seed ) # Use the TreeExplainer from the SHAP module. self._explainer: TreeExplainer = None def evaluate( self, study: Study, params: Optional[List[str]] = None, *, target: Optional[Callable[[FrozenTrial], float]] = None, ) -> Dict[str, float]: # Train a RandomForest from the backend evaluator. self._backend_evaluator.evaluate(study=study, params=params, target=target) # Create Tree Explainer object that can calculate shap values. self._explainer = TreeExplainer(self._backend_evaluator._forest) # Generate SHAP values for the parameters during the trials. shap_values = self._explainer.shap_values(self._backend_evaluator._trans_params) # Calculate the mean absolute SHAP value for each parameter. # List of tuples ("feature_name": mean_abs_shap_value). mean_abs_shap_values = list( zip(self._backend_evaluator._param_names, np.abs(shap_values).mean(axis=0)) ) # Use the mean absolute SHAP values as the feature importance. mean_abs_shap_values.sort(key=lambda t: t[1], reverse=True) feature_importances = OrderedDict(mean_abs_shap_values) return feature_importances
def cross_val(self, X, y, scoring=None, cv=None, **kwargs): """Method for performing cross-validation given the hyperparameters of initialized or fitted model. Args: X (:obj:`pd.DataFrame`, :obj:`pd.Series`): Training data. y (:obj:`pd.DataFrame`, :obj:`pd.Series`): Training target values. scoring (:obj:`callable`): Metrics passed to sklearn.model_selection.cross_validate calculation. cv (:obj:`int, cross-validation generator or an iterable`, optional): Cross-validation strategy from sklearn. Performs 5-fold cv by default. **kwargs: Other parameters passed to sklearn.model_selection.cross_validate. Returns: pd.DataFrame, pd.DataFrame: DataFrame with metrics on folds, DataFrame with shap values on folds. """ scoring = mean_squared_error if scoring is None else scoring models, metrics = self._cross_val(X, y, scoring=scoring, cv=cv, **kwargs) if callable(scoring): scorers = { scoring.__name__.replace('_', ' '): array([scoring(y, self.model.predict(X))]) } elif isinstance(scoring, (tuple, list)): scorers = { scorer.__name__.replace('_', ' '): array([scorer(y, self.model.predict(X))]) for scorer in scoring } elif isinstance(scoring, str): if scoring in SCORERS: scorers = { scoring.replace('_', ' '): array([SCORERS[scoring](self.model, X=X, y=y)]) } else: raise ValueError(f'Scorer {scoring} is not supported.') else: raise NotImplementedError( f'Scoring of type {scoring} is not supported') metrics = DataFrame({ key: concatenate((scorers[key], metrics[key])) for key in scorers.keys() }).T metrics.columns = [ f'Fold {i}' if i != 0 else 'Overall' for i in range(metrics.shape[1]) ] shap_coefs = [] explainer = TreeExplainer(self.model) shap_coefs.append( ([explainer.expected_value] if explainer.expected_value is None else explainer.expected_value.tolist()) + explainer.shap_values(X).mean(axis=0).tolist()) for model in models: explainer = TreeExplainer(model) shap_coefs.append( ([explainer.expected_value] if explainer.expected_value is None else explainer.expected_value.tolist()) + explainer.shap_values(X).mean(axis=0).tolist()) shapdf = DataFrame(array(shap_coefs).T, columns=['Overall'] + [f'Fold {x}' for x in range(1, len(models) + 1)], index=['Intercept'] + X.columns.tolist()) return metrics, shapdf
def shap_explain(self, data, index=None, link=None, show=True, layout_dict=None): """Method for plotting a waterfall graph or return corresponding JSON if show=False. Args: data (:obj:`pd.DataFrame`, :obj:`pd.Series`): Data for shap values calculation. index (:obj:`int`, optional): Index of the observation of interest, if data is pd.DataFrame. link (:obj:`callable`, optional): A function for transforming shap values into predictions. Unnecessary if self.objective is present and it takes values in ['binary', 'poisson', 'gamma']. show (:obj:`boolean`, optional): Whether to plot a graph or return a json. layout_dict (:obj:`boolean`, optional): Dictionary containing the parameters of plotly figure layout. Returns: None or dict: Waterfall graph or corresponding JSON. """ def logit(x): return true_divide(1, add(1, exp(-x))) explainer = TreeExplainer(self.model) if isinstance(self.model, (XGBClassifier, XGBRegressor)): feature_names = self.model.get_booster().feature_names elif isinstance(self.model, (LGBMClassifier, LGBMRegressor)): feature_names = self.model.feature_name_ elif isinstance(self.model, (CatBoostClassifier, CatBoostRegressor)): feature_names = self.model.feature_names_ else: raise NotImplementedError( f'Error with the backend choice. Supported backends: {self._backends}' ) index = index if (isinstance( data, DataFrame)) and (index is not None) else None data = DataFrame(data).T[feature_names] if isinstance( data, Series) else data[feature_names] data = data if index is None else data.loc[[index], :] shap_values = explainer.shap_values(data) cond_bool = isinstance(shap_values, list) and (len(shap_values) == 2) shap_values = shap_values[0] if cond_bool else shap_values expected_value = explainer.expected_value[ 0] if cond_bool else explainer.expected_value prediction = DataFrame([expected_value] + shap_values.reshape(-1).tolist(), index=['Intercept'] + feature_names, columns=['SHAP Value']) prediction['CumSum'] = cumsum(prediction['SHAP Value']) prediction['Value'] = append(nan, data.values.reshape(-1)) if (self.objective is not None) and (link is None): link = exp if self.objective in [ 'poisson', 'gamma' ] else logit if self.objective == 'binary' else None if link is not None: prediction['Link'] = link(prediction['CumSum']) prediction['Contribution'] = [link(expected_value)] + list( diff(prediction['Link'])) else: prediction['Contribution'] = [expected_value] + list( diff(prediction['CumSum'])) fig = Figure( Waterfall( name=f'Prediction {index}', orientation='h', measure=['relative'] * len(prediction), y=[ prediction.index[i] if i == 0 else f'{prediction.index[i]}={data.values.reshape(-1)[i-1]}' for i in range(len(prediction.index)) ], x=prediction['Contribution'])) fig.update_layout(**(layout_dict if layout_dict is not None else {})) if show: fig.show() else: json_ = prediction[['Value', 'SHAP Value', 'Contribution']].T.to_dict() fig_base64 = b64encode( to_image(fig, format='jpeg', engine='kaleido')).decode('ascii') json_.update({ 'id': int(data.index.values), 'predict': prediction['Link'][-1], "ShapValuesPlot": fig_base64 }) return json_