def bac_metric_wrapper(y, y_pred): """ transform y from a vector to a 2D array. """ if y.ndim == 1: y = np.reshape(y, (len(y), 1)) if y_pred.ndim == 1: y_pred = np.reshape(y_pred, (len(y_pred), 1)) return bac_metric(y, y_pred)
def apply_cross_validation(X_train, Y_train, n_folds, clf, clf_call): kfold = cross_validation.KFold(len(X_train), n_folds=n_folds) cross_vals = [] for train, test in kfold: XX = eval('clf.' + clf_call) YY = Y_train[test] [cXX, cYY] = libscores.normalize_array(XX, YY) cur = (libscores.bac_metric(cXX[np.newaxis, :], cYY[np.newaxis, :])) cross_vals.append(cur) return np.mean(cross_vals)
def apply_cross_validation(X_train, Y_train, clf, clf_call, which): kfold = cross_validation.KFold(len(X_train), n_folds=which['n_folds']) cross_vals = [] for train, test in kfold: XX = eval('clf.' + clf_call) YY = Y_train[test] [cXX, cYY] = libscores.normalize_array(XX, YY) if which['metric'] == 'bac_metric': cur = libscores.bac_metric(cXX[np.newaxis, :], cYY[np.newaxis, :]) else: cur = libscores.auc_metric(cXX[np.newaxis, :], cYY[np.newaxis, :]) cross_vals.append(cur) return np.mean(cross_vals)
Ypred_te = myclassifier.predict(D.data['X_test']) # Making probabilistic predictions (each line contains the proba of belonging in each class) Yprob_tr = myclassifier.predict_proba(D.data['X_train']) Yprob_va = myclassifier.predict_proba(D.data['X_valid']) Yprob_te = myclassifier.predict_proba(D.data['X_test']) # Training success rate and error bar: # First the regular accuracy (fraction of correct classifications) acc = accuracy_score(Ytrue_tr, Ypred_tr) # Then two AutoML challenge metrics, working on the other representation auc = auc_metric(Yonehot_tr, Yprob_tr, task='multiclass.classification') bac = bac_metric(Yonehot_tr, Yprob_tr, task='multiclass.classification') # Note that the AutoML metrics are rescaled between 0 and 1. print "%s\t%5.2f\t%5.2f\t%5.2f\t(%5.2f)" % ( key, auc, bac, acc, ebar(acc, Ytrue_tr.shape[0])) print "The error bar is valid for Acc only" # Note: we do not know Ytrue_va and Ytrue_te # See modelTest for a better evaluation using cross-validation # Another useful tool is the confusion matrix from sklearn.metrics import confusion_matrix print "Confusion matrix for %s" % key print confusion_matrix(Ytrue_tr, Ypred_tr) # On peut aussi la visualiser, voir: # http://scikit-learn.org/stable/auto_examples/model_selection/plot_confusion_matrix.html
# scores = cross_validation.cross_val_score(clf, D.data['X_train'], D.data['Y_train'], cv=5) # vprint( verbose, "[+] SVM croos_val done! Mean = %0.2f" % (scores.mean())) clf = RForestClass(n_estimators, random_state=1, n_jobs=4) kfold = cross_validation.KFold(len(nD.data['X_train']), n_folds=10) cross_vals = [] for train, test in kfold: #print "dims" XX = clf.fit(nD.data['X_train'][train], D.data['Y_train'][train]).predict_proba( nD.data['X_train'][test])[:, 1] YY = D.data['Y_train'][test] [cXX, cYY] = libscores.normalize_array(XX, YY) cur = (libscores.bac_metric(cXX[np.newaxis, :], cYY[np.newaxis, :])) cross_vals.append(cur) score_mean = np.mean(cross_vals) if cycle == 0: print("") print("$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$") print("") print("[+] RF croos_val w/o fs done! Mean = %0.2f" % (score_mean)) if cycle > 0: print( "[+] RF croos_val with fs done (%i features)! Mean = %0.2f" % (100 - cycle * 11, score_mean)) if cycle == 8: print("")
def bac_metric(self, y, y_pred): return libscores.bac_metric(y, y_pred, self.task)