def print_measurements(pred_prob): churn_prob, is_churn = pred_prob[:,1], y == 1 print " %-20s %.4f" % ("Calibration Error", calibration(churn_prob, is_churn)) print " %-20s %.4f" % ("Discrimination", discrimination(churn_prob,is_churn)) print "Note -- Lower calibration is better, higher discrimination is better" print "Support vector machines:" print_measurements(run_prob_cv(X,y,SVC,probability=True)) print "Random forests:" print_measurements(run_prob_cv(X,y,RF,n_estimators=18)) print "K-nearest-neighbors:"
X = scaler.fit_transform(X) def run_prob_cv(X, y, clf_class, **kwargs): kf = KFold(len(y), n_folds=5, shuffle=True) y_prob = np.zeros((len(y),2)) for train_index, test_index in kf: X_train, X_test = X[train_index], X[test_index] y_train = y[train_index] clf = clf_class(**kwargs) clf.fit(X_train,y_train) # Predict probabilities, not classes y_prob[test_index] = clf.predict_proba(X_test) return y_prob error = [] n_trees = [] for n in range(5,100): probs = run_prob_cv(X,y,RF,n_estimators=n) errors[n] = calibration(probs[:,1],y==1) calibration_errors = pd.DataFrame({'calibration_error': error, 'n_trees': n_trees}) try: from ggplot import * ggplot(calibration_errors,aes(x='n_trees',y='calibration_error')) + \ geom_point() except: print calibration_errors
kf = KFold(len(y), n_folds=5, shuffle=True) y_prob = np.zeros((len(y), 2)) for train_index, test_index in kf: X_train, X_test = X[train_index], X[test_index] y_train = y[train_index] clf = clf_class(**kwargs) clf.fit(X_train, y_train) # Predict probabilities, not classes y_prob[test_index] = clf.predict_proba(X_test) return y_prob error = [] n_trees = [] for n in range(5, 100): probs = run_prob_cv(X, y, RF, n_estimators=n) errors[n] = calibration(probs[:, 1], y == 1) calibration_errors = pd.DataFrame({ 'calibration_error': error, 'n_trees': n_trees }) try: from ggplot import * ggplot(calibration_errors,aes(x='n_trees',y='calibration_error')) + \ geom_point() except: print calibration_errors
def print_measurements(pred_prob): churn_prob, is_churn = pred_prob[:,1], y == 1 print " %-20s %.4f" % ("Calibration Error", calibration(churn_prob, is_churn)) print " %-20s %.4f" % ("Discrimination", discrimination(churn_prob,is_churn)) print "Note -- Lower calibration is better, higher discrimination is better"
def print_measurements(pred_prob): churn_prob, is_churn = pred_prob[:, 1], y == 1 print(" %-20s %.4f" % ("Calibration Error", calibration(churn_prob, is_churn))) print(" %-20s %.4f" % ("Discrimination", discrimination(churn_prob, is_churn)))