def get_regression_explainer(xgboost=False, include_y=True): X_train, y_train, X_test, y_test = titanic_fare() train_names, test_names = titanic_names() if xgboost: model = XGBRegressor().fit(X_train, y_train) else: model = RandomForestRegressor(n_estimators=50, max_depth=10).fit(X_train, y_train) if include_y: reg_explainer = RegressionExplainer(model, X_test, y_test, cats=['Sex', 'Deck', 'Embarked'], idxs=test_names, units="$") else: reg_explainer = RegressionExplainer(model, X_test, cats=['Sex', 'Deck', 'Embarked'], idxs=test_names, units="$") reg_explainer.calculate_properties() return reg_explainer
def setUp(self): X_train, y_train, X_test, y_test = titanic_fare() model = RandomForestRegressor(n_estimators=50, max_depth=4).fit(X_train, y_train) X_test.reset_index(drop=True, inplace=True) X_test.index = X_test.index.astype(str) X_test1, y_test1 = X_test.iloc[:100], y_test.iloc[:100] X_test2, y_test2 = X_test.iloc[100:], y_test.iloc[100:] self.explainer = RegressionExplainer(model, X_test1, y_test1, cats=['Sex', 'Deck']) def index_exists_func(index): return index in X_test2.index def index_list_func(): # only returns first 50 indexes return list(X_test2.index[:50]) def y_func(index): idx = X_test2.index.get_loc(index) return y_test2.iloc[[idx]] def X_func(index): idx = X_test2.index.get_loc(index) return X_test2.iloc[[idx]] self.explainer.set_index_exists_func(index_exists_func) self.explainer.set_index_list_func(index_list_func) self.explainer.set_X_row_func(X_func) self.explainer.set_y_func(y_func)
def setUp(self): X_train, y_train, X_test, y_test = titanic_fare() model = RandomForestRegressor(n_estimators=5, max_depth=2).fit(X_train, y_train) self.explainer = RegressionExplainer( model, X_test, y_test, cats=[{'Gender': ['Sex_female', 'Sex_male', 'Sex_nan']}, 'Deck', 'Embarked'], cv=3)
def setUp(self): X_train, y_train, X_test, y_test = titanic_fare() self.test_len = len(X_test) model = LinearRegression().fit(X_train, y_train) self.explainer = RegressionExplainer(model, X_test.iloc[:20], y_test.iloc[:20], shap='kernel', X_background=shap.sample( X_train, 5))
def get_catboost_regressor(): X_train, y_train, X_test, y_test = titanic_fare() model = CatBoostRegressor(iterations=5, verbose=0).fit(X_train, y_train) explainer = RegressionExplainer(model, X_test, y_test, cats=["Sex", 'Deck', 'Embarked']) X_cats, y_cats = explainer.X_merged, explainer.y model = CatBoostRegressor(iterations=5, verbose=0).fit(X_cats, y_cats, cat_features=[5, 6, 7]) explainer = RegressionExplainer(model, X_cats, y_cats, idxs=X_test.index) explainer.calculate_properties(include_interactions=False) return explainer
def setUp(self): X_train, y_train, X_test, y_test = titanic_fare() self.test_len = len(X_test) train_names, test_names = titanic_names() _, self.names = titanic_names() model = LGBMRegressor() model.fit(X_train, y_train) self.explainer = RegressionExplainer(model, X_test, y_test, r2_score, shap='tree', cats=['Sex', 'Deck', 'Embarked'], idxs=test_names, units="$")
def setUp(self): X_train, y_train, X_test, y_test = titanic_fare() self.test_len = len(X_test) train_names, test_names = titanic_names() _, self.names = titanic_names() model = XGBRegressor() model.fit(X_train, y_train) self.explainer = RegressionExplainer(model, X_test, y_test, cats=[{'Gender': ['Sex_female', 'Sex_male', 'Sex_nan']}, 'Deck', 'Embarked'], units="$")
def setUp(self): X_train, y_train, X_test, y_test = titanic_fare() self.test_len = len(X_test) train_names, test_names = titanic_names() _, self.names = titanic_names() model = RandomForestRegressor(n_estimators=5, max_depth=2).fit(X_train, y_train) self.explainer = RegressionExplainer( model, X_test, y_test, r2_score, cats=[{'Gender': ['Sex_female', 'Sex_male', 'Sex_nan']}, 'Deck', 'Embarked'], idxs=test_names, target='Fare', units='$')
def setUp(self): X_train, y_train, X_test, y_test = titanic_fare() self.test_len = len(X_test) train_names, test_names = titanic_names() _, self.names = titanic_names() model = CatBoostRegressor(iterations=100, learning_rate=0.1, verbose=0) model.fit(X_train, y_train) self.explainer = RegressionExplainer(model, X_test, y_test, r2_score, shap='tree', cats=['Sex', 'Deck', 'Embarked'], idxs=test_names, units="$")
def setUp(self): X_train, y_train, X_test, y_test = titanic_fare() self.test_len = len(X_test) train_names, test_names = titanic_names() _, self.names = titanic_names() model = CatBoostRegressor(iterations=5, learning_rate=0.1, verbose=0) model.fit(X_train, y_train) self.explainer = RegressionExplainer(model, X_test, y_test, cats=[{'Gender': ['Sex_female', 'Sex_male', 'Sex_nan']}, 'Deck', 'Embarked'], idxs=test_names, units="$")
def setUp(self): X_train, y_train, X_test, y_test = titanic_fare() train_names, test_names = titanic_names() model = XGBRegressor(n_estimators=5, max_depth=2) model.fit(X_train, y_train) self.explainer = RegressionExplainer( model, X_test, y_test, cats=[{ 'Gender': ['Sex_female', 'Sex_male', 'Sex_nan'] }, 'Deck', 'Embarked'], idxs=test_names)
def setUp(self): X_train, y_train, X_test, y_test = titanic_fare() self.test_len = len(X_test) train_names, test_names = titanic_names() _, self.names = titanic_names() model = RandomForestRegressor(n_estimators=5, max_depth=2) model.fit(X_train, y_train) self.explainer = RegressionExplainer(model, X_test, y_test, r2_score, shap='tree', cats=['Sex', 'Cabin', 'Embarked'], idxs=test_names)
def setUp(self): X_train, y_train, X_test, y_test = titanic_fare() self.test_len = len(X_test) train_names, test_names = titanic_names() _, self.names = titanic_names() model = RandomForestRegressor(n_estimators=5, max_depth=2) model.fit(X_train, y_train) self.explainer = RegressionExplainer( model, X_test, y_test, cats=[{ 'Gender': ['Sex_female', 'Sex_male', 'Sex_nan'] }, 'Deck', 'Embarked'], cats_notencoded={'Gender': 'No Gender'}, idxs=test_names)
def setUp(self): X_train, y_train, X_test, y_test = titanic_fare() self.test_len = len(X_test) train_names, test_names = titanic_names() _, self.names = titanic_names() model = LinearRegression() model.fit(X_train, y_train) self.explainer = RegressionExplainer( model, X_test, y_test, r2_score, shap='linear', cats=[{ 'Gender': ['Sex_female', 'Sex_male', 'Sex_nan'] }, 'Deck', 'Embarked'], idxs=test_names, units="$")