Exemplo n.º 1
0
def print_measurements(pred_prob):
    churn_prob, is_churn = pred_prob[:,1], y == 1
    print "  %-20s %.4f" % ("Calibration Error", calibration(churn_prob, is_churn))
    print "  %-20s %.4f" % ("Discrimination", discrimination(churn_prob,is_churn))

    print "Note -- Lower calibration is better, higher discrimination is better"

    print "Support vector machines:"
    print_measurements(run_prob_cv(X,y,SVC,probability=True))

    print "Random forests:"
    print_measurements(run_prob_cv(X,y,RF,n_estimators=18))

    print "K-nearest-neighbors:"
X = scaler.fit_transform(X)

def run_prob_cv(X, y, clf_class, **kwargs):
    kf = KFold(len(y), n_folds=5, shuffle=True)
    y_prob = np.zeros((len(y),2))
    for train_index, test_index in kf:
        X_train, X_test = X[train_index], X[test_index]
        y_train = y[train_index]
        clf = clf_class(**kwargs)
        clf.fit(X_train,y_train)
        # Predict probabilities, not classes
        y_prob[test_index] = clf.predict_proba(X_test)
    return y_prob

error = []
n_trees = []

for n in range(5,100):
    probs = run_prob_cv(X,y,RF,n_estimators=n)
    errors[n] = calibration(probs[:,1],y==1)

calibration_errors = pd.DataFrame({'calibration_error': error,
                                   'n_trees': n_trees})

try:
    from ggplot import *
    ggplot(calibration_errors,aes(x='n_trees',y='calibration_error')) + \
            geom_point()
except:
    print calibration_errors
Exemplo n.º 3
0
    kf = KFold(len(y), n_folds=5, shuffle=True)
    y_prob = np.zeros((len(y), 2))
    for train_index, test_index in kf:
        X_train, X_test = X[train_index], X[test_index]
        y_train = y[train_index]
        clf = clf_class(**kwargs)
        clf.fit(X_train, y_train)
        # Predict probabilities, not classes
        y_prob[test_index] = clf.predict_proba(X_test)
    return y_prob


error = []
n_trees = []

for n in range(5, 100):
    probs = run_prob_cv(X, y, RF, n_estimators=n)
    errors[n] = calibration(probs[:, 1], y == 1)

calibration_errors = pd.DataFrame({
    'calibration_error': error,
    'n_trees': n_trees
})

try:
    from ggplot import *
    ggplot(calibration_errors,aes(x='n_trees',y='calibration_error')) + \
            geom_point()
except:
    print calibration_errors
def print_measurements(pred_prob):
    churn_prob, is_churn = pred_prob[:,1], y == 1
    print "  %-20s %.4f" % ("Calibration Error", calibration(churn_prob, is_churn))
    print "  %-20s %.4f" % ("Discrimination", discrimination(churn_prob,is_churn))

    print "Note -- Lower calibration is better, higher discrimination is better"
Exemplo n.º 5
0
def print_measurements(pred_prob):
    churn_prob, is_churn = pred_prob[:, 1], y == 1
    print("  %-20s %.4f" %
          ("Calibration Error", calibration(churn_prob, is_churn)))
    print("  %-20s %.4f" %
          ("Discrimination", discrimination(churn_prob, is_churn)))