示例#1
0
    def ga2m(self):
        # Explore the Data
        marginal = Marginal().explain_data(self.x, self.y, name="Raw Data")

        # Train the Explainable Boosting Machine(EBM)
        lr = LinearRegression()
        lr.fit(self.x, self.y)

        rt = RegressionTree()
        rt.fit(self.x, self.y)

        ebm = ExplainableBoostingRegressor(
        )  # For Classifier, use ebm = ExplainableBoostingClassifier()
        ebm.fit(self.x, self.y)

        # How Does the EBM Model Perform?
        ebm_perf = RegressionPerf(ebm.predict).explain_perf(self.x,
                                                            self.y,
                                                            name="EBM")
        lr_perf = RegressionPerf(lr.predict).explain_perf(
            self.x, self.y, name="Linear Regression")
        rt_perf = RegressionPerf(rt.predict).explain_perf(
            self.x, self.y, name="Regression Tree")

        # Global Interpretability - What the Model says for All Data
        ebm_global = ebm.explain_global(name="EBM")
        lr_global = lr.explain_global(name="LinearRegression")
        rt_global = rt.explain_global(name="Regression Tree")

        # Put All in a Dashboard - This is the best
        show([
            marginal, lr_global, lr_perf, rt_global, rt_perf, ebm_perf,
            ebm_global
        ])
示例#2
0
def train_titanic_regression(interactions):
    df = pd.read_csv(os.path.join('examples', 'titanic_train.csv'))
    df = df.dropna()
    feature_columns = ['SibSp', 'Fare', 'Pclass']
    label_column = "Age"

    y = df[[label_column]]
    le = LabelEncoder()
    y_enc = le.fit_transform(y)
    x = df[feature_columns]
    x_train, x_test, y_train, y_test = train_test_split(x, y_enc)
    model = ExplainableBoostingRegressor(interactions=interactions)
    model.fit(x_train, y_train)

    return model, x_test, y_test
    def fit(self,
            X,
            y,
            sample_weight=None,
            eval_set=None,
            sample_weight_eval_set=None,
            **kwargs):
        from interpret.glassbox import (
            ExplainableBoostingClassifier,
            ExplainableBoostingRegressor,
        )

        logging.root.level = (
            10
        )  # HACK - EBM can't handle our custom logger with unknown level 9 (DATA)

        orig_cols = list(X.names)
        if self.num_classes >= 2:
            lb = LabelEncoder()
            lb.fit(self.labels)
            y = lb.transform(y)
            model = ExplainableBoostingClassifier(**self.params)
        else:
            model = ExplainableBoostingRegressor(**self.params)

        # Replace missing values with a value smaller than all observed values
        self.min = dict()
        for col in X.names:
            XX = X[:, col]
            self.min[col] = XX.min1()
            if self.min[col] is None or np.isnan(self.min[col]):
                self.min[col] = -1e10
            else:
                self.min[col] -= 1
            XX.replace(None, self.min[col])
            X[:, col] = XX
            assert X[dt.isna(dt.f[col]), col].nrows == 0
        X = X.to_numpy()

        model.fit(X, y)
        importances = self.get_importances(model, X.shape[1])
        self.set_model_properties(
            model=model,
            features=orig_cols,
            importances=importances,
            iterations=self.params["n_estimators"],
        )
示例#4
0
    def fit(self,
            X,
            y,
            sample_weight=None,
            eval_set=None,
            sample_weight_eval_set=None,
            **kwargs):
        from interpret.glassbox import (
            ExplainableBoostingClassifier,
            ExplainableBoostingRegressor,
        )

        logging.root.level = (
            10
        )  # HACK - EBM can't handle our custom logger with unknown level 9 (DATA)

        orig_cols = list(X.names)
        if self.num_classes >= 2:
            lb = LabelEncoder()
            lb.fit(self.labels)
            y = lb.transform(y)
            model = ExplainableBoostingClassifier(**self.params)
        else:
            model = ExplainableBoostingRegressor(**self.params)

        X = self.basic_impute(X)
        X = X.to_numpy()

        model.fit(X, y)
        importances = self.get_importances(model, X.shape[1])
        self.set_model_properties(
            model=model,
            features=orig_cols,
            importances=importances,
            iterations=self.params["n_estimators"],
        )
示例#5
0
y = df[label]
#X,y = datasets.load_boston(return_X_y=True)
seed = 1
X_train, X_test, y_train, y_test = train_test_split(X,
                                                    y,
                                                    test_size=0.20,
                                                    random_state=seed)
from interpret import show
from interpret.data import ClassHistogram

hist = ClassHistogram().explain_data(X_train, y_train, name='Train Data')
show(hist)
print(type(hist))
from interpret.glassbox import ExplainableBoostingRegressor, LogisticRegression, ClassificationTree, DecisionListClassifier

ebm = ExplainableBoostingRegressor(random_state=seed)
ebm.fit(X_train, y_train)  #Works on dataframes and numpy arrays
ebm_global = ebm.explain_global(name='EBM')
for i in range(7):
    ebm_global.visualize(i).write_html('Concrete_Strength/CS_' +
                                       df.columns[i] + '.html')

preds = ebm.predict(X_test)
#for i in range(len(preds)):
#print(preds[i],y_test[i])
print(preds)
print(y_test)

#ebm_global.visualize(0).write_html('zero.html')
#ebm_local = ebm.explain_local(X_test, y_test)
#ebm_local.visualize(0).write_html("local_zero.html")
示例#6
0
ax.set_ylabel('Cumulative Area Prediction')
ax.set_title("Cumulative Gains Chart")

ax = liftChart(pred_v, ax=axes[1], labelBars=False)
ax.set_ylabel("Lift")

plt.tight_layout()
plt.show()

# ### Explainable Boosting Machine Regression

# In[22]:

from interpret.glassbox import ExplainableBoostingRegressor

ebm = ExplainableBoostingRegressor()
ebm.fit(train_X, train_y)

# In[23]:

# Display the error performance measures

regressionSummary(train_y, ebm.predict(train_X))
print()
regressionSummary(test_y, ebm.predict(test_X))

# In[19]:

from interpret import show

ebm_global = ebm.explain_global()
示例#7
0
 def build_estimator(args, train_data=None):
     feature_names = [f"featur_{i}" for i in range(train_data[0].shape[1])]
     return ExplainableBoostingRegressor(random_state=RANDOM_STATE,
                                         feature_names=feature_names,
                                         **args)