Exemplo n.º 1
0
def spline_classification_plot(ax, X, y, X_eval, y_eval, gam_ref):
    # gam = LogisticGAM(s(0)).gridsearch(X, y)
    # documentation of LogisticGAM: https://pygam.readthedocs.io/en/latest/api/logisticgam.html
    gam = LogisticGAM(s(0, constraints='monotonic_inc',
                        n_splines=5)).gridsearch(X, y)  # add a linear term
    #XX = gam.generate_X_grid(term=0)
    XX = np.linspace(0, 1, 100)
    ax.plot(XX, gam.predict_proba(XX), c='g')
    ax.plot(XX, gam.confidence_intervals(XX, width=0.95), c='r', ls='--')
    # compute ece and acc after calibration
    y_ = gam.predict_proba(X_eval)
    ece = EceEval(np.array([1 - y_, y_]).T, y_eval, num_bins=100)
    mce = MceEval(np.array([1 - y_, y_]).T, y_eval, num_bins=100)
    brier = BrierEval(np.array([1 - y_, y_]).T, y_eval)
    mse = MseEval(gam, gam_ref, num_bins=100)
    acc = gam.accuracy(X_eval, y_eval)
    ax.text(0.05,
            0.75,
            'ECE=%.4f\nMCE=%.4f\nBrier=%.4f\nACC=%.4f\nMSE=%.4f' %
            (ece, mce, brier, acc, mse),
            size=6,
            ha='left',
            va='center',
            bbox={
                'facecolor': 'green',
                'alpha': 0.5,
                'pad': 4
            })
    ax.set_xlim(0.0, 1.0)
    ax.set_ylim(0.0, 1.0)
    confi = gam.confidence_intervals(X_eval, width=0.95)
    print gam.summary()
    return ece, mce, brier, acc, mse, ax, confi
Exemplo n.º 2
0
#-----------------------------------------------------
#load the breast cancer data set

ds = load_breast_cancer()

X, y = ds.data, ds.target

#select first 6 features only
X = X[:, 0:6]

selected_features = ds.feature_names[0:6]

#-----------------------------------------------------
#Fit a model with the default parameters
gam = LogisticGAM().fit(X, y)
gam.summary()

roc_auc_score(y, gam.predict_proba(X))  #0.994173140954495
gam.accuracy(X, y)  #0.9560632688927944

#-----------------------------------------------------
# Explore and interpret individual features

plt.ion()
plt.rcParams['figure.figsize'] = (28, 8)

fig, axs = plt.subplots(1, X.shape[1])

for i, ax in enumerate(axs):
    XX = gam.generate_X_grid(term=i, meshgrid=True)
    pdep, confi = gam.partial_dependence(term=i,
Exemplo n.º 3
0
print("\n calculate cross-validated AUC  (M2. X_train_scaled_poly):", auc_log2)
acc_log3 = cross_val_score(logreg, X_train_scaled_poly, Y_train, cv=10, scoring='accuracy').mean()
print("\n calculate cross-validated accurancy  (M2. X_train_scaled_poly):", acc_log2)
acc_logs3 = cross_validation.cross_val_predict(logreg, X_train_scaled_poly, Y_train, cv=10)
print(metrics.accuracy_score(Y_train, acc_logs2))
print(metrics.classification_report(Y_train, acc_logs3))
print(logreg.coef_)
print('\n ------------------------------------------------------------------')
# call predict_proba() to get the list of probabilities that the classifier assigned to each instance for each class:
###############################################################################################################################
# GAM
import pandas as pd
from pygam import LogisticGAM
# Fit a model with the default parameters
gam = LogisticGAM().fit(X_train_scaled, Y_train)
gam.summary()
print('gam.accuracy(X_train_scaled, Y_train):',gam.accuracy(X_train_scaled, Y_train))
print('gam.accuracy(X_test_scaled, Y_test):',gam.accuracy(X_test_scaled, Y_test))
acc_loggamc = cross_val_score(gam, X_train_scaled, Y_train, cv=10, scoring='accuracy').mean()
print('acc_loggam_cross-validation, train_scaled',acc_loggamc)


# make predictions for testing set
Y_scaler_pred_class = logreg.predict(X_test_scaled)
# calculate testing accuracy
from sklearn import metrics

print('\n ------------------------------------------------------------------')
print("\n calculate testing accuracy (M1. X_train_scaled):", metrics.accuracy_score(Y_test, Y_scaler_pred_class))
print('\n ------------------------------------------------------------------')
#  ROC curves and AUC