Пример #1
0
def reportBestResult():
    C = [16, 32, 64, 128, 256, 512]
    bestAUC = 0
    bestParam = 0
    for c in C:
        auc_history = load_np_array("results/ada_auc_" + str(c) + ".bin")
        mean_auc = auc_history.mean()
        if (mean_auc > bestAUC):
            bestAUC = mean_auc
            bestParam = c

    print bestParam

    confusion_matrix_history = load_np_array("results/rfc_folds_confusion_" +
                                             str(c) + ".bin")
    print confusion_matrix_history

    mean_cm = np.mean(confusion_matrix_history, axis=2)
    std_cm = np.std(confusion_matrix_history, axis=2)
    #     for i in range(0,2):
    #         for j in range(0,2):
    #            mean_cm[i][j] = confusion_matrix_history[i][j].mean()

    print mean_cm
    print std_cm
    compute_performance_metrics(mean_cm)
    acc_mean = []
    recall_mean = []
    for c in C:
        accuracy_history = load_np_array("results/ada_accuracy_" + str(c) +
                                         ".bin")
        recall_history = load_np_array("results/ada_recall_" + str(c) + ".bin")
        acc_mean.append(accuracy_history.mean())
        recall_mean.append(recall_history.mean())
Пример #2
0
    # Train classifier.
    print "\nTraining classifier param %d" % c

    for i, (train, test) in enumerate(cvs):
        sm = OverSampler(verbose = False, ratio = 2.5)
        train_oversampled_x, train_oversampled_train_y = sm.fit_transform(homesite.train_x[train], homesite.train_y[train])
        probas_ = clf.fit(train_oversampled_x, train_oversampled_train_y).predict_proba(homesite.train_x[test])

        fpr, tpr, thresholds = roc_curve(homesite.train_y[test], probas_[:, 1])
        mean_tpr += interp(mean_fpr, fpr, tpr)
        mean_tpr[0] = 0.0
        roc_auc = compute_auc(homesite.train_y[test], probas_[:, 1])
        fold_cm = confusion_matrix(homesite.train_y[test], np.round(probas_)[:, 1])
        confusion_matrix_history = np.dstack((confusion_matrix_history, fold_cm))

        accuracy, precision, recall = compute_performance_metrics(fold_cm)
        mean_acc += accuracy
        mean_recall += recall
        mean_precision += precision

        accuracy_history.append(accuracy)
        precision_history.append(precision)
        recall_history.append(recall)
        auc_history.append(roc_auc)

        save_np_array("../../results/random_forests/rf_accuracy_" + str(c) + ".bin", np.array(accuracy_history))
        save_np_array("../../results/random_forests/rf_precision_" + str(c) + ".bin", np.array(precision_history))
        save_np_array("../../results/random_forests/rf_recall_" + str(c) + ".bin", np.array(recall_history))
        save_np_array("../../results/random_forests/rf_auc_" + str(c) + ".bin", np.array(auc_history))
        save_np_array("../../results/random_forests/rf_confusion_matrix_" + str(c) + ".bin", np.array(confusion_matrix_history))
        plt.plot(fpr, tpr, lw = 1, label = 'ROC fold %d (area = %0.2f)' % (i, roc_auc))
Пример #3
0
    '''

    oversampled_path = "resources/oversampled_normalized_data_ratio_2.bin"
    homesite = Data()
    homesite.load_sliptted_data(oversampled_path)
    del homesite.test_x  # Deleted to save memory.
    print homesite.train_x.shape

    # Creating classifier.
    # clf = DecisionTreeClassifier()
    clf = RandomForestClassifier(max_features=100)
    # clf = AdaBoostClassifier(n_estimators = 10)
    # clf = svm.SVC(gamma = 0.00005)
    # clf = RandomForestClassifier()
    # clf = MultiplePLS(n_classifiers = 10, n_samples = 5000, n_positive_samples = 2500, threshold = 0.9, acc = 0.999)
    # clf = svm.LinearSVC()

    # Train classifier.
    print "Training classifier."
    clf.fit(homesite.train_x, homesite.train_y)

    # Test classifier.
    print 'Testing classifier.'
    predicted_labels = clf.predict_proba(homesite.validation_x)[:, 1]

    # Show final results.
    results = confusion_matrix(homesite.validation_y,
                               np.round(predicted_labels))
    accuracy, precision, recall = compute_performance_metrics(results)
    auc = compute_auc(homesite.validation_y, predicted_labels)