def setUp(self):
        X_train, y_train, X_test, y_test = titanic_fare()
        self.test_len = len(X_test)

        train_names, test_names = titanic_names()
        _, self.names = titanic_names()

        model = CatBoostRegressor(iterations=5, verbose=0).fit(X_train, y_train)
        explainer = RegressionExplainer(model, X_test, y_test, cats=['Deck', 'Embarked'])
        X_cats, y_cats = explainer.X_merged, explainer.y
        model = CatBoostRegressor(iterations=5, verbose=0).fit(X_cats, y_cats, cat_features=[8, 9])
        self.explainer = RegressionExplainer(model, X_cats, y_cats, cats=['Sex'], idxs=X_test.index)
Пример #2
0
            def dashboard_exp(model, X_data, y_data):
                import dash_bootstrap_components as dbc

                from explainerdashboard import RegressionExplainer, ExplainerDashboard
                ExplainerDashboard(
                    RegressionExplainer(model, X_data, y_data),
                    bootstrap=dbc.themes.SANDSTONE,
                    importances=True,
                    model_summary=False,
                    contributions=True,
                    whatif=True,
                    shap_dependence=False,
                    shap_interaction=False,
                    decision_trees=False,
                    hide_whatifindexselector=True,
                    hide_whatifprediction=True,
                    hide_inputeditor=False,
                    hide_whatifcontributiongraph=False,
                    hide_whatifcontributiontable=True,
                    hide_whatifpdp=False,
                    hide_predindexselector=True,
                    hide_predictionsummary=True,
                    hide_contributiongraph=False,
                    hide_pdp=False,
                    hide_contributiontable=True,
                    hide_dropna=True,
                    hide_range=True,
                    hide_depth=True,
                    hide_sort=True,
                    hide_sample=True,  # hide sample size input on pdp component
                    hide_gridlines=True,  # hide gridlines on pdp component
                    hide_gridpoints=True,
                    hide_cats_sort=
                    True,  # hide the sorting option for categorical features
                    hide_cutoff=
                    True,  # hide cutoff selector on classification components
                    hide_percentage=
                    True,  # hide percentage toggle on classificaiton components
                    hide_log_x=
                    True,  # hide x-axis logs toggle on regression plots
                    hide_log_y=
                    True,  # hide y-axis logs toggle on regression plots
                    hide_ratio=True,  # hide the residuals type dropdown
                    hide_points=
                    True,  # hide the show violin scatter markers toggle
                    hide_winsor=True,  # hide the winsorize input
                    hide_wizard=
                    True,  # hide the wizard toggle in lift curve component
                    hide_star_explanation=True,
                ).run()
Пример #3
0
if days_since_update > 7:

    model = pickle.load(open(MODELS_DIR / 'general_model.pkl', 'rb'))
    y = pd.read_csv(DATA_DIR / 'general_target.csv',
                    index_col=['Ticker']).drop(columns=['Date'])
    X = pd.read_csv(DATA_DIR / f'general_features.csv',
                    index_col=['Ticker']).drop(columns=['Date'])

    # Dashboard Explainer is fussy about Column Names
    X.columns = X.columns.str.replace('.', '')
    feature_names = model.get_booster().feature_names
    feature_names = [x.replace('.', '') for x in feature_names]
    model.get_booster().feature_names = feature_names

    explainer = RegressionExplainer(model, X, y)

    db = ExplainerDashboard(
        explainer,
        title="Stock Valuation Explainer",
        description=
        "Visit https://share.streamlit.io/gardnmi/fundamental-stock-prediction to see the model in use,",
        shap_interaction=False,
        precision='float32',
        decision_trees=False)

    db.to_yaml("dashboard.yaml",
               explainerfile="explainer.joblib",
               dump_explainer=True)

db = ExplainerDashboard.from_config("dashboard.yaml")
Пример #4
0
from explainerdashboard import RegressionExplainer, ExplainerDashboard
from explainerdashboard.custom import *
from joblib import load
import pandas as pd
from sklearn.model_selection import train_test_split

# Load model & data
dec_tree = load('dec_tree_v3.joblib')
df_data = pd.read_csv('data_v3_enc.csv')

# Prepare & split data
X = df_data.drop(['Duration', 'Timestamp'], axis=1)
y = df_data.Duration
Xt, X_small, yt, y_small = train_test_split(X, y, test_size=0.01, random_state=0)

exp = RegressionExplainer(dec_tree, X_small, y_small, cats=['Day_of_week', 'Hour', 'Vehicle', 'Position'])

# Build
db = ExplainerDashboard(exp, [ShapDependenceComposite, WhatIfComposite], hide_whatifpdp=True)

# Save
exp.dump("explainer.joblib")
db.to_yaml("dashboard.yaml")
Пример #5
0
                         n_jobs=8,
                         num_parallel_tree=1,
                         objective='reg:squarederror',
                         random_state=42,
                         reg_alpha=1,
                         reg_lambda=0,
                         scale_pos_weight=1.0,
                         seed=42,
                         subsample=0.6000000000000001,
                         tree_method='exact',
                         validate_parameters=1,
                         verbosity=None)

X, Y = pd.read_csv('X.csv', index_col=0), pd.read_csv('Y.csv', index_col=0)

REmodel = model.fit(X, Y)
explainer = RegressionExplainer(REmodel,
                                X,
                                Y,
                                cats=cats,
                                descriptions=feature_descriptions,
                                units="$")

ExplainerDashboard(
    explainer,
    title='XGBoost Regression Model Explainer: Predicting House Prices',
    description=
    'This dashboard shows the inner workings of a fitted machine learning model, and explains its predictions.',
    shap_interaction=False,
    decision_trees=False).run(port=int(os.environ.get('PORT', 5000)))
Пример #6
0
clas_explainer = ClassifierExplainer(model,
                                     X_test,
                                     y_test,
                                     cats=['Sex', 'Deck', 'Embarked'],
                                     descriptions=feature_descriptions,
                                     labels=['Not survived', 'Survived'])
_ = ExplainerDashboard(clas_explainer)
clas_explainer.dump(pkl_dir / "clas_explainer.joblib")

# regression
X_train, y_train, X_test, y_test = titanic_fare()
model = RandomForestRegressor(n_estimators=50,
                              max_depth=5).fit(X_train, y_train)
reg_explainer = RegressionExplainer(model,
                                    X_test,
                                    y_test,
                                    cats=['Sex', 'Deck', 'Embarked'],
                                    descriptions=feature_descriptions,
                                    units="$")
_ = ExplainerDashboard(reg_explainer)
reg_explainer.dump(pkl_dir / "reg_explainer.joblib")

# multiclass
X_train, y_train, X_test, y_test = titanic_embarked()
model = RandomForestClassifier(n_estimators=50,
                               max_depth=5).fit(X_train, y_train)
multi_explainer = ClassifierExplainer(
    model,
    X_test,
    y_test,
    cats=['Sex', 'Deck'],
    descriptions=feature_descriptions,
Пример #7
0
from explainerdashboard.explainers import RandomForestRegressionExplainer
from sklearn.ensemble import RandomForestRegressor
from sklearn import tree
from explainerdashboard import ClassifierExplainer, ExplainerDashboard, RegressionExplainer
from explainerdashboard.datasets import titanic_survive, feature_descriptions
from sklearn.model_selection import train_test_split
import pandas as pd
import numpy as np


imdb = pd.read_csv("Amélioration/Data/movies2.csv", encoding="latin-1")
imdb = imdb.rename(columns = {'11:14' :'film','7.2' : 'metascore','Crime' : 'genre', 'Greg Marcks':'realisateur', 	'Henry Thomas' : 'acteur_1', 'Colin Hanks':'acteur_2' ,'6000000' : 'budget',	'0': 'votes2',	'0.1': 'vote',	'Aug 12, 2005': 'date'})
colonne = ['genre','acteur_1', 'acteur_2', 'realisateur']
imdb = pd.get_dummies(imdb, columns= colonne)
imdb = imdb.drop(columns= ['film', 'budget', 'votes2', 'vote', 'date'], axis = 1 )


X = imdb.loc[:, imdb.columns != 'metascore' ]
y = imdb.loc[:, imdb.columns == 'metascore' ]

X_train, X_test, y_train, y_test = train_test_split(X,y, test_size = 0.3, random_state=42)

model = RandomForestRegressor().fit(X_train, y_train)
explainer = RegressionExplainer(model, X_test, y_test)

db = ExplainerDashboard(explainer, title="Metascore de film",
                    whatif=False, # you can switch off tabs with bools
                    shap_interaction=False,
                    decision_trees=False)

ExplainerDashboard(explainer).run()