def active_learning(self, train, train_target, test, test_target):
        param = 0.05
        increment = 0.05
        init_size = int(len(train) * param)
        increment_size = int(len(train) * increment)

        X = train[:init_size]
        Y = train_target[:init_size]
        R = train[init_size:]
        RY = train_target[init_size:]

        while param < 0.5:
            print "labeled data: %.2f%%" % (100.0 * len(X) / len(train))
            adaboost = AdaBoost(OptimalWeakLearner())
            acc, err, auc = adaboost.boost(X, Y, test, test_target)
            self.result.append((acc, err, auc))
            H = adaboost.hypothesis(R)
            H_abs = np.abs(H)
            sorted_indices = H_abs.argsort().tolist()
            selected = sorted_indices[:increment_size]
            remained = sorted_indices[increment_size:]

            X = np.vstack((X, R[selected]))
            # Y = np.append(Y, adaboost.sign(H[selected]))
            Y = np.append(Y, RY[selected])
            R = R[remained]
            RY = RY[remained]
            param += increment
    def active_learning(self, train, train_target, test, test_target):
        param = 0.05
        increment = 0.05
        init_size = int(len(train) * param)
        increment_size = int(len(train) * increment)

        X = train[:init_size]
        Y = train_target[:init_size]
        R = train[init_size:]
        RY = train_target[init_size:]

        while param < 0.5:
            print "labeled data: %.2f%%" % (100.0 * len(X)/len(train))
            adaboost = AdaBoost(OptimalWeakLearner())
            acc, err, auc = adaboost.boost(X, Y, test, test_target)
            self.result.append((acc, err, auc))
            H = adaboost.hypothesis(R)
            H_abs = np.abs(H)
            sorted_indices = H_abs.argsort().tolist()
            selected = sorted_indices[:increment_size]
            remained = sorted_indices[increment_size:]

            X = np.vstack((X, R[selected]))
            # Y = np.append(Y, adaboost.sign(H[selected]))
            Y = np.append(Y, RY[selected])
            R = R[remained]
            RY = RY[remained]
            param += increment
remain_dataset = train[init_size:]
remain_target = train_target[init_size:]


# Active learning
X = init_dataset
Y = init_target
R = remain_dataset
RY = remain_target
result = []
while percentage < 0.5:
    print "labeled data: %.2f%%" % (100.0 * len(X) / len(train))
    adaboost = AdaBoost(OptimalWeakLearner())
    acc, err, auc = adaboost.boost(X, Y, test, test_target)
    result.append((acc, err, auc))
    H = adaboost.hypothesis(R)
    H_abs = np.abs(H)
    sorted_indices = H_abs.argsort().tolist()
    selected = sorted_indices[:increment_size]
    remained = sorted_indices[increment_size:]

    X = np.vstack((X, R[selected]))
    # Y = np.append(Y, adaboost.sign(H[selected]))
    Y = np.append(Y, RY[selected])
    R = R[remained]
    RY = RY[remained]
    percentage += increment



# boosting
Пример #4
0
def optimal_weak_learner():
    print '==============Optimal Weak Learner============'
    train, target = load_spambase()
    train, target = shuffle(train, target)
    target = np.array(map(lambda v: -1.0 if v == 0 else 1.0, target))

    k = 10
    train_size = len(train)
    test_index_generator = k_fold_cross_validation(train_size, k)
    fold = 1
    overall_acc = 0
    overall_error = 0
    overall_auc = 0

    for start, end in test_index_generator:
        print "====================Fold %s============" % fold
        k_fold_train = np.vstack(
            (train[range(0, start)], train[range(end, train_size)]))
        test = train[range(start, end)]
        train_target = np.append(target[range(0, start)],
                                 target[range(end, train_size)])
        test_target = target[range(start, end)]

        adaboost = AdaBoost(OptimalWeakLearner())
        plot = False
        if fold == 1:
            plot = True
        else:
            plot = False
        acc, err, auc = adaboost.boost(k_fold_train,
                                       train_target,
                                       test,
                                       test_target,
                                       plot=plot)

        if plot:
            test_err_points = np.array(adaboost.test_err_array)
            train_err_points = np.array(adaboost.train_err_array)
            auc_points = np.array(adaboost.test_auc_array)
            round_err_points = np.array(adaboost.weighted_err_array)
            plt.xlabel('Round')
            plt.ylabel('Error Rate')
            plt.plot(test_err_points[:, 0],
                     test_err_points[:, 1],
                     c='r',
                     label='Test Error')
            plt.plot(test_err_points[:, 0],
                     train_err_points[:, 1],
                     c='g',
                     label='Train Error')
            plt.plot(test_err_points[:, 0],
                     round_err_points[:, 1],
                     c='b',
                     label='Round Error')
            plt.legend(bbox_to_anchor=(0., 1.02, 1., .102),
                       loc=3,
                       ncol=2,
                       mode="expand",
                       borderaxespad=0.)
            plt.show()

            plt.xlabel('Round')
            plt.ylabel('AUC')
            plt.plot(test_err_points[:, 0],
                     auc_points[:, 1],
                     c='r',
                     label='AUC')
            plt.legend(bbox_to_anchor=(0., 1.02, 1., .102),
                       loc=3,
                       ncol=2,
                       mode="expand",
                       borderaxespad=0.)
            plt.show()

        overall_auc += auc
        overall_acc += acc
        overall_error += err

        if fold == 1:
            hypo = adaboost.hypothesis(test)
            roc_points = roc(test_target, hypo, 1.0, -1.0)
            plt.xlabel('FPR')
            plt.ylabel('TPR')
            plt.xlim(xmin=0)
            plt.ylim(ymin=0)
            plt.scatter(roc_points[:, 1], roc_points[:, 0])
            plt.show()
        fold += 1

    print "Overall test accuracy: %s, overall test error: %s, overall test auc: %s" % (
        overall_acc / k, overall_error / k, overall_auc / k)
Пример #5
0
remain_dataset = train[init_size:]
remain_target = train_target[init_size:]

# Active learning
X = init_dataset
Y = init_target
R = remain_dataset
RY = remain_target
result = []
while percentage < 0.5:
    print "labeled data: %.2f%%" % (100.0 * len(X) / len(train))
    adaboost = AdaBoost(OptimalWeakLearner())
    acc, err, auc = adaboost.boost(X, Y, test, test_target)
    result.append((acc, err, auc))
    H = adaboost.hypothesis(R)
    H_abs = np.abs(H)
    sorted_indices = H_abs.argsort().tolist()
    selected = sorted_indices[:increment_size]
    remained = sorted_indices[increment_size:]

    X = np.vstack((X, R[selected]))
    # Y = np.append(Y, adaboost.sign(H[selected]))
    Y = np.append(Y, RY[selected])
    R = R[remained]
    RY = RY[remained]
    percentage += increment

# boosting
init_dataset = train[:init_size]
init_target = train_target[:init_size]
def optimal_weak_learner():
    print '==============Optimal Weak Learner============'
    train, target = load_spambase()
    train, target = shuffle(train, target)
    target = np.array(map(lambda v: -1.0 if v == 0 else 1.0, target))

    k = 10
    train_size = len(train)
    test_index_generator = k_fold_cross_validation(train_size, k)
    fold = 1
    overall_acc = 0
    overall_error = 0
    overall_auc = 0

    for start, end in test_index_generator:
        print "====================Fold %s============" % fold
        k_fold_train = np.vstack((train[range(0, start)], train[range(end, train_size)]))
        test = train[range(start, end)]
        train_target = np.append(target[range(0, start)], target[range(end, train_size)])
        test_target = target[range(start, end)]

        adaboost = AdaBoost(OptimalWeakLearner())
        plot = False
        if fold == 1:
            plot = True
        else:
            plot = False
        acc, err, auc = adaboost.boost(k_fold_train, train_target, test, test_target, plot=plot)

        if plot:
            test_err_points = np.array(adaboost.test_err_array)
            train_err_points = np.array(adaboost.train_err_array)
            auc_points = np.array(adaboost.test_auc_array)
            round_err_points = np.array(adaboost.weighted_err_array)
            plt.xlabel('Round')
            plt.ylabel('Error Rate')
            plt.plot(test_err_points[:, 0], test_err_points[:, 1], c='r', label='Test Error')
            plt.plot(test_err_points[:, 0], train_err_points[:, 1], c='g', label='Train Error')
            plt.plot(test_err_points[:, 0], round_err_points[:, 1], c='b', label='Round Error')
            plt.legend(bbox_to_anchor=(0., 1.02, 1., .102), loc=3, ncol=2, mode="expand", borderaxespad=0.)
            plt.show()

            plt.xlabel('Round')
            plt.ylabel('AUC')
            plt.plot(test_err_points[:, 0], auc_points[:, 1], c='r', label='AUC')
            plt.legend(bbox_to_anchor=(0., 1.02, 1., .102), loc=3, ncol=2, mode="expand", borderaxespad=0.)
            plt.show()

        overall_auc += auc
        overall_acc += acc
        overall_error += err

        if fold == 1:
            hypo = adaboost.hypothesis(test)
            roc_points = roc(test_target, hypo, 1.0, -1.0)
            plt.xlabel('FPR')
            plt.ylabel('TPR')
            plt.xlim(xmin=0)
            plt.ylim(ymin=0)
            plt.scatter(roc_points[:, 1], roc_points[:, 0])
            plt.show()
        fold += 1

    print "Overall test accuracy: %s, overall test error: %s, overall test auc: %s" % (
        overall_acc / k, overall_error / k, overall_auc / k)