Exemplo n.º 1
0
def train(infile_x="data/X-strat.npy", infile_y="data/y-strat.npy", outfile_model="data/model.pickle", verbose=4):
    dataset = Dataset(infile_x, infile_y)
    clf = models[main.arguments["<model_name>"]]()
    X_train, X_test, y_train, y_test = dataset.train_test_split()

    clf.fit(X_train, y_train)
    with open(outfile_model, "wb") as f:
        cPickle.dump(clf, f, -1)
    print "Saved file to {}".format(outfile_model)
Exemplo n.º 2
0
def train(infile_x='data/X-strat.npy',
          infile_y='data/y-strat.npy',
          outfile_model='data/model.pickle',
          verbose=4):
    dataset = Dataset(infile_x, infile_y)
    clf = models[main.arguments['<model_name>']]()
    X_train, X_test, y_train, y_test = dataset.train_test_split()

    clf.fit(X_train, y_train)
    with open(outfile_model, 'wb') as f:
        cPickle.dump(clf, f, -1)
    print "Saved file to {}".format(outfile_model)
Exemplo n.º 3
0
def report(infile_x='data/X-strat.npy', infile_y='data/y-strat.npy'):
    dataset = Dataset(infile_x, infile_y)
    clf = models[main.arguments['<model_name>']]()
    X_train, X_test, y_train, y_test = dataset.train_test_split()

    clf.fit(X_train, y_train)
    y_pred_probas = clf.predict_proba(X_test)
    y_pred = ((y_pred_probas[:, 1] - y_pred_probas[:, 0]) > 0).astype(int)
    y_pred_train = clf.predict(X_train)

    # Classification report
    print "Classification report for training set:"
    print
    print classification_report(y_train, y_pred_train)
    print "Classification report for test set:"
    print
    print classification_report(y_test, y_pred)

    # Compute confusion matrix
    print "Confusion matrix:"
    cm = confusion_matrix(y_test, y_pred)
    print cm
    print

    # Precision-Recall curve
    precision, recall, thresholds = precision_recall_curve(
        y_test, y_pred_probas[:, 1])
    area = auc(recall, precision)
    print "Area Under Curve: %0.2f" % area

    # Plot
    pl.clf()
    pl.plot(recall, precision, label='precision-recall curve')
    pl.xlabel('recall')
    pl.ylabel('precision')
    pl.ylim([0.0, 1.05])
    pl.xlim([0.0, 1.0])
    pl.title('Precision-Recall; AUC=%0.2f' % area)
    pl.legend(loc='lower left')
    pl.show()
Exemplo n.º 4
0
def curve(infile_x='data/X-strat.npy',
          infile_y='data/y-strat.npy',
          learning_curve=learning_curve):
    dataset = Dataset(infile_x, infile_y)
    clf = models[main.arguments['<model_name>']]()
    scores_train, scores_test, sizes = learning_curve(dataset, clf, verbose=1)
    pl.plot(sizes, scores_train, 'b', label='training set')
    pl.plot(sizes, scores_test, 'r', label='test set')
    pl.xlabel('n training cases')
    pl.ylabel('score')
    pl.title('Learning curve')
    pl.legend(loc='lower right')
    pl.show()
Exemplo n.º 5
0
def report(infile_x="data/X-strat.npy", infile_y="data/y-strat.npy"):
    dataset = Dataset(infile_x, infile_y)
    clf = models[main.arguments["<model_name>"]]()
    X_train, X_test, y_train, y_test = dataset.train_test_split()

    clf.fit(X_train, y_train)
    y_pred_probas = clf.predict_proba(X_test)
    y_pred = ((y_pred_probas[:, 1] - y_pred_probas[:, 0]) > 0).astype(int)
    y_pred_train = clf.predict(X_train)

    # Classification report
    print "Classification report for training set:"
    print
    print classification_report(y_train, y_pred_train)
    print "Classification report for test set:"
    print
    print classification_report(y_test, y_pred)

    # Compute confusion matrix
    print "Confusion matrix:"
    cm = confusion_matrix(y_test, y_pred)
    print cm
    print

    # Precision-Recall curve
    precision, recall, thresholds = precision_recall_curve(y_test, y_pred_probas[:, 1])
    area = auc(recall, precision)
    print "Area Under Curve: %0.2f" % area

    # Plot
    pl.clf()
    pl.plot(recall, precision, label="precision-recall curve")
    pl.xlabel("recall")
    pl.ylabel("precision")
    pl.ylim([0.0, 1.05])
    pl.xlim([0.0, 1.0])
    pl.title("Precision-Recall; AUC=%0.2f" % area)
    pl.legend(loc="lower left")
    pl.show()
Exemplo n.º 6
0
def _analyze(clf, infile_x="data/X-strat.npy", infile_y="data/y-strat.npy"):
    dataset = Dataset(infile_x, infile_y)
    X_train, X_test, y_train, y_test = dataset.train_test_split()

    clf.fit(X_train, y_train)
    y_pred = clf.predict_proba(X_test)
    y_pred = y_pred[:, 1] - y_pred[:, 0]

    X_test_pos = X_test[y_test == 1]
    y_pred_pos = y_pred[y_test == 1]
    y_pred_pos_sorted = y_pred_pos.argsort()

    X_test_neg = X_test[y_test == 0]
    y_pred_neg = y_pred[y_test == 0]
    y_pred_neg_sorted = y_pred_neg.argsort()

    true_pos = (X_test_pos[y_pred_pos_sorted][::-1], y_pred_pos[y_pred_pos_sorted][::-1])
    true_neg = (X_test_neg[y_pred_neg_sorted], y_pred_neg[y_pred_neg_sorted])
    false_pos = (X_test_neg[y_pred_neg_sorted][::-1], y_pred_neg[y_pred_neg_sorted][::-1])
    false_neg = (X_test_pos[y_pred_pos_sorted], y_pred_pos[y_pred_pos_sorted])

    return true_pos, true_neg, false_pos, false_neg
Exemplo n.º 7
0
def search(infile_x='data/X-strat.npy',
           infile_y='data/y-strat.npy',
           verbose=4,
           n_jobs=1):
    model = models[main.arguments['<model_name>']]
    dataset = Dataset(infile_x, infile_y)
    return grid_search(
        dataset,
        model(),
        model.grid_search_params,
        verbose=verbose,
        cv=dataset.split_indices,
        score_func=f1_score,
        n_jobs=n_jobs,
    )
Exemplo n.º 8
0
def _analyze(clf, infile_x='data/X-strat.npy', infile_y='data/y-strat.npy'):
    dataset = Dataset(infile_x, infile_y)
    X_train, X_test, y_train, y_test = dataset.train_test_split()

    clf.fit(X_train, y_train)
    y_pred = clf.predict_proba(X_test)
    y_pred = y_pred[:, 1] - y_pred[:, 0]

    X_test_pos = X_test[y_test == 1]
    y_pred_pos = y_pred[y_test == 1]
    y_pred_pos_sorted = y_pred_pos.argsort()

    X_test_neg = X_test[y_test == 0]
    y_pred_neg = y_pred[y_test == 0]
    y_pred_neg_sorted = y_pred_neg.argsort()

    true_pos = (X_test_pos[y_pred_pos_sorted][::-1],
                y_pred_pos[y_pred_pos_sorted][::-1])
    true_neg = (X_test_neg[y_pred_neg_sorted], y_pred_neg[y_pred_neg_sorted])
    false_pos = (X_test_neg[y_pred_neg_sorted][::-1],
                 y_pred_neg[y_pred_neg_sorted][::-1])
    false_neg = (X_test_pos[y_pred_pos_sorted], y_pred_pos[y_pred_pos_sorted])

    return true_pos, true_neg, false_pos, false_neg