def ga2m(self): # Explore the Data marginal = Marginal().explain_data(self.x, self.y, name="Raw Data") # Train the Explainable Boosting Machine(EBM) lr = LinearRegression() lr.fit(self.x, self.y) rt = RegressionTree() rt.fit(self.x, self.y) ebm = ExplainableBoostingRegressor( ) # For Classifier, use ebm = ExplainableBoostingClassifier() ebm.fit(self.x, self.y) # How Does the EBM Model Perform? ebm_perf = RegressionPerf(ebm.predict).explain_perf(self.x, self.y, name="EBM") lr_perf = RegressionPerf(lr.predict).explain_perf( self.x, self.y, name="Linear Regression") rt_perf = RegressionPerf(rt.predict).explain_perf( self.x, self.y, name="Regression Tree") # Global Interpretability - What the Model says for All Data ebm_global = ebm.explain_global(name="EBM") lr_global = lr.explain_global(name="LinearRegression") rt_global = rt.explain_global(name="Regression Tree") # Put All in a Dashboard - This is the best show([ marginal, lr_global, lr_perf, rt_global, rt_perf, ebm_perf, ebm_global ])
def train_titanic_regression(interactions): df = pd.read_csv(os.path.join('examples', 'titanic_train.csv')) df = df.dropna() feature_columns = ['SibSp', 'Fare', 'Pclass'] label_column = "Age" y = df[[label_column]] le = LabelEncoder() y_enc = le.fit_transform(y) x = df[feature_columns] x_train, x_test, y_train, y_test = train_test_split(x, y_enc) model = ExplainableBoostingRegressor(interactions=interactions) model.fit(x_train, y_train) return model, x_test, y_test
def fit(self, X, y, sample_weight=None, eval_set=None, sample_weight_eval_set=None, **kwargs): from interpret.glassbox import ( ExplainableBoostingClassifier, ExplainableBoostingRegressor, ) logging.root.level = ( 10 ) # HACK - EBM can't handle our custom logger with unknown level 9 (DATA) orig_cols = list(X.names) if self.num_classes >= 2: lb = LabelEncoder() lb.fit(self.labels) y = lb.transform(y) model = ExplainableBoostingClassifier(**self.params) else: model = ExplainableBoostingRegressor(**self.params) # Replace missing values with a value smaller than all observed values self.min = dict() for col in X.names: XX = X[:, col] self.min[col] = XX.min1() if self.min[col] is None or np.isnan(self.min[col]): self.min[col] = -1e10 else: self.min[col] -= 1 XX.replace(None, self.min[col]) X[:, col] = XX assert X[dt.isna(dt.f[col]), col].nrows == 0 X = X.to_numpy() model.fit(X, y) importances = self.get_importances(model, X.shape[1]) self.set_model_properties( model=model, features=orig_cols, importances=importances, iterations=self.params["n_estimators"], )
def fit(self, X, y, sample_weight=None, eval_set=None, sample_weight_eval_set=None, **kwargs): from interpret.glassbox import ( ExplainableBoostingClassifier, ExplainableBoostingRegressor, ) logging.root.level = ( 10 ) # HACK - EBM can't handle our custom logger with unknown level 9 (DATA) orig_cols = list(X.names) if self.num_classes >= 2: lb = LabelEncoder() lb.fit(self.labels) y = lb.transform(y) model = ExplainableBoostingClassifier(**self.params) else: model = ExplainableBoostingRegressor(**self.params) X = self.basic_impute(X) X = X.to_numpy() model.fit(X, y) importances = self.get_importances(model, X.shape[1]) self.set_model_properties( model=model, features=orig_cols, importances=importances, iterations=self.params["n_estimators"], )
y = df[label] #X,y = datasets.load_boston(return_X_y=True) seed = 1 X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.20, random_state=seed) from interpret import show from interpret.data import ClassHistogram hist = ClassHistogram().explain_data(X_train, y_train, name='Train Data') show(hist) print(type(hist)) from interpret.glassbox import ExplainableBoostingRegressor, LogisticRegression, ClassificationTree, DecisionListClassifier ebm = ExplainableBoostingRegressor(random_state=seed) ebm.fit(X_train, y_train) #Works on dataframes and numpy arrays ebm_global = ebm.explain_global(name='EBM') for i in range(7): ebm_global.visualize(i).write_html('Concrete_Strength/CS_' + df.columns[i] + '.html') preds = ebm.predict(X_test) #for i in range(len(preds)): #print(preds[i],y_test[i]) print(preds) print(y_test) #ebm_global.visualize(0).write_html('zero.html') #ebm_local = ebm.explain_local(X_test, y_test) #ebm_local.visualize(0).write_html("local_zero.html")
ax.set_ylabel('Cumulative Area Prediction') ax.set_title("Cumulative Gains Chart") ax = liftChart(pred_v, ax=axes[1], labelBars=False) ax.set_ylabel("Lift") plt.tight_layout() plt.show() # ### Explainable Boosting Machine Regression # In[22]: from interpret.glassbox import ExplainableBoostingRegressor ebm = ExplainableBoostingRegressor() ebm.fit(train_X, train_y) # In[23]: # Display the error performance measures regressionSummary(train_y, ebm.predict(train_X)) print() regressionSummary(test_y, ebm.predict(test_X)) # In[19]: from interpret import show ebm_global = ebm.explain_global()
def build_estimator(args, train_data=None): feature_names = [f"featur_{i}" for i in range(train_data[0].shape[1])] return ExplainableBoostingRegressor(random_state=RANDOM_STATE, feature_names=feature_names, **args)