Exemplo n.º 1
0
def save_best_classifier(target, X, y, make_clf, verbose=False):
    try:
        best = load_shelve('%s_clf' % target)
    except:
        best = (float('-inf'), False, (False, False))
    new = False
    cv = StratifiedKFold(y, n_folds=5)
    for train_index, test_index in cv:
        clf, name = make_clf()
        X_train, X_test = X[train_index], X[test_index]
        y_train, y_test = y[train_index], y[test_index]
        clf.fit(X_train, y_train)
        y_true, y_pred = y_test, clf.predict_proba(X_test)
        curr_score = roc_auc_score(y_true, y_pred[:, 1])
        if verbose:
            print("Detailed classification report:")
            print()
            print("The model is trained on a folded development set.")
            print("The scores are computed on the full set.")
            print()
            print(classification_report(np.around(y_true), np.around(y_pred[:, 1])))
            print()
            print(curr_score)
            print()
        if curr_score > best[0]:
            new = True
            best = curr_score, clf, (train_index, test_index)
    if new:
        print("Saving the best classifier in %s as %s_clf with a score of %.2f" % (name, target, best[0]))
        insert_shelve(best, '%s_clf' % target)
    else:
        print("Loaded the best classifier in %s as %s_clf with a score of %.2f" % (name, target, best[0]))
    return best
Exemplo n.º 2
0
def save_best_classifier(target, X, y, make_clf, verbose=False):
    try:
        best = load_shelve('%s_clf' % target)
    except:
        best = (float('-inf'), False, (False, False))
    new = False
    cv = StratifiedKFold(y, n_folds=5)
    for train_index, test_index in cv:
        clf, name = make_clf()
        X_train, X_test = X[train_index], X[test_index]
        y_train, y_test = y[train_index], y[test_index]
        clf.fit(X_train, y_train)
        y_true, y_pred = y_test, clf.predict_proba(X_test)
        curr_score = roc_auc_score(y_true, y_pred[:, 1])
        if verbose:
            print("Detailed classification report:")
            print()
            print("The model is trained on a folded development set.")
            print("The scores are computed on the full set.")
            print()
            print(
                classification_report(np.around(y_true), np.around(y_pred[:,
                                                                          1])))
            print()
            print(curr_score)
            print()
        if curr_score > best[0]:
            new = True
            best = curr_score, clf, (train_index, test_index)
    if new:
        print(
            "Saving the best classifier in %s as %s_clf with a score of %.2f" %
            (name, target, best[0]))
        insert_shelve(best, '%s_clf' % target)
    else:
        print(
            "Loaded the best classifier in %s as %s_clf with a score of %.2f" %
            (name, target, best[0]))
    return best
Exemplo n.º 3
0
def postprocess_data(target, X, y, paths, train=False, submission=False):
    loaded = load_shelve('%s_clf' % target)
    score, clf, train = loaded
    XX, yy = accumulate_scores(X, y, clf, paths)
    scores_for_post(XX, yy)
Exemplo n.º 4
0
def postprocess_data(target, X, y, paths, train=False, submission=False):
    loaded = load_shelve('%s_clf' % target)
    score, clf, train = loaded
    XX, yy = accumulate_scores(X, y, clf, paths)
    scores_for_post(XX, yy)