def active_learning(self, train, train_target, test, test_target): param = 0.05 increment = 0.05 init_size = int(len(train) * param) increment_size = int(len(train) * increment) X = train[:init_size] Y = train_target[:init_size] R = train[init_size:] RY = train_target[init_size:] while param < 0.5: print "labeled data: %.2f%%" % (100.0 * len(X) / len(train)) adaboost = AdaBoost(OptimalWeakLearner()) acc, err, auc = adaboost.boost(X, Y, test, test_target) self.result.append((acc, err, auc)) H = adaboost.hypothesis(R) H_abs = np.abs(H) sorted_indices = H_abs.argsort().tolist() selected = sorted_indices[:increment_size] remained = sorted_indices[increment_size:] X = np.vstack((X, R[selected])) # Y = np.append(Y, adaboost.sign(H[selected])) Y = np.append(Y, RY[selected]) R = R[remained] RY = RY[remained] param += increment
def active_learning(self, train, train_target, test, test_target): param = 0.05 increment = 0.05 init_size = int(len(train) * param) increment_size = int(len(train) * increment) X = train[:init_size] Y = train_target[:init_size] R = train[init_size:] RY = train_target[init_size:] while param < 0.5: print "labeled data: %.2f%%" % (100.0 * len(X)/len(train)) adaboost = AdaBoost(OptimalWeakLearner()) acc, err, auc = adaboost.boost(X, Y, test, test_target) self.result.append((acc, err, auc)) H = adaboost.hypothesis(R) H_abs = np.abs(H) sorted_indices = H_abs.argsort().tolist() selected = sorted_indices[:increment_size] remained = sorted_indices[increment_size:] X = np.vstack((X, R[selected])) # Y = np.append(Y, adaboost.sign(H[selected])) Y = np.append(Y, RY[selected]) R = R[remained] RY = RY[remained] param += increment
remain_dataset = train[init_size:] remain_target = train_target[init_size:] # Active learning X = init_dataset Y = init_target R = remain_dataset RY = remain_target result = [] while percentage < 0.5: print "labeled data: %.2f%%" % (100.0 * len(X) / len(train)) adaboost = AdaBoost(OptimalWeakLearner()) acc, err, auc = adaboost.boost(X, Y, test, test_target) result.append((acc, err, auc)) H = adaboost.hypothesis(R) H_abs = np.abs(H) sorted_indices = H_abs.argsort().tolist() selected = sorted_indices[:increment_size] remained = sorted_indices[increment_size:] X = np.vstack((X, R[selected])) # Y = np.append(Y, adaboost.sign(H[selected])) Y = np.append(Y, RY[selected]) R = R[remained] RY = RY[remained] percentage += increment # boosting
def optimal_weak_learner(): print '==============Optimal Weak Learner============' train, target = load_spambase() train, target = shuffle(train, target) target = np.array(map(lambda v: -1.0 if v == 0 else 1.0, target)) k = 10 train_size = len(train) test_index_generator = k_fold_cross_validation(train_size, k) fold = 1 overall_acc = 0 overall_error = 0 overall_auc = 0 for start, end in test_index_generator: print "====================Fold %s============" % fold k_fold_train = np.vstack( (train[range(0, start)], train[range(end, train_size)])) test = train[range(start, end)] train_target = np.append(target[range(0, start)], target[range(end, train_size)]) test_target = target[range(start, end)] adaboost = AdaBoost(OptimalWeakLearner()) plot = False if fold == 1: plot = True else: plot = False acc, err, auc = adaboost.boost(k_fold_train, train_target, test, test_target, plot=plot) if plot: test_err_points = np.array(adaboost.test_err_array) train_err_points = np.array(adaboost.train_err_array) auc_points = np.array(adaboost.test_auc_array) round_err_points = np.array(adaboost.weighted_err_array) plt.xlabel('Round') plt.ylabel('Error Rate') plt.plot(test_err_points[:, 0], test_err_points[:, 1], c='r', label='Test Error') plt.plot(test_err_points[:, 0], train_err_points[:, 1], c='g', label='Train Error') plt.plot(test_err_points[:, 0], round_err_points[:, 1], c='b', label='Round Error') plt.legend(bbox_to_anchor=(0., 1.02, 1., .102), loc=3, ncol=2, mode="expand", borderaxespad=0.) plt.show() plt.xlabel('Round') plt.ylabel('AUC') plt.plot(test_err_points[:, 0], auc_points[:, 1], c='r', label='AUC') plt.legend(bbox_to_anchor=(0., 1.02, 1., .102), loc=3, ncol=2, mode="expand", borderaxespad=0.) plt.show() overall_auc += auc overall_acc += acc overall_error += err if fold == 1: hypo = adaboost.hypothesis(test) roc_points = roc(test_target, hypo, 1.0, -1.0) plt.xlabel('FPR') plt.ylabel('TPR') plt.xlim(xmin=0) plt.ylim(ymin=0) plt.scatter(roc_points[:, 1], roc_points[:, 0]) plt.show() fold += 1 print "Overall test accuracy: %s, overall test error: %s, overall test auc: %s" % ( overall_acc / k, overall_error / k, overall_auc / k)
remain_dataset = train[init_size:] remain_target = train_target[init_size:] # Active learning X = init_dataset Y = init_target R = remain_dataset RY = remain_target result = [] while percentage < 0.5: print "labeled data: %.2f%%" % (100.0 * len(X) / len(train)) adaboost = AdaBoost(OptimalWeakLearner()) acc, err, auc = adaboost.boost(X, Y, test, test_target) result.append((acc, err, auc)) H = adaboost.hypothesis(R) H_abs = np.abs(H) sorted_indices = H_abs.argsort().tolist() selected = sorted_indices[:increment_size] remained = sorted_indices[increment_size:] X = np.vstack((X, R[selected])) # Y = np.append(Y, adaboost.sign(H[selected])) Y = np.append(Y, RY[selected]) R = R[remained] RY = RY[remained] percentage += increment # boosting init_dataset = train[:init_size] init_target = train_target[:init_size]
def optimal_weak_learner(): print '==============Optimal Weak Learner============' train, target = load_spambase() train, target = shuffle(train, target) target = np.array(map(lambda v: -1.0 if v == 0 else 1.0, target)) k = 10 train_size = len(train) test_index_generator = k_fold_cross_validation(train_size, k) fold = 1 overall_acc = 0 overall_error = 0 overall_auc = 0 for start, end in test_index_generator: print "====================Fold %s============" % fold k_fold_train = np.vstack((train[range(0, start)], train[range(end, train_size)])) test = train[range(start, end)] train_target = np.append(target[range(0, start)], target[range(end, train_size)]) test_target = target[range(start, end)] adaboost = AdaBoost(OptimalWeakLearner()) plot = False if fold == 1: plot = True else: plot = False acc, err, auc = adaboost.boost(k_fold_train, train_target, test, test_target, plot=plot) if plot: test_err_points = np.array(adaboost.test_err_array) train_err_points = np.array(adaboost.train_err_array) auc_points = np.array(adaboost.test_auc_array) round_err_points = np.array(adaboost.weighted_err_array) plt.xlabel('Round') plt.ylabel('Error Rate') plt.plot(test_err_points[:, 0], test_err_points[:, 1], c='r', label='Test Error') plt.plot(test_err_points[:, 0], train_err_points[:, 1], c='g', label='Train Error') plt.plot(test_err_points[:, 0], round_err_points[:, 1], c='b', label='Round Error') plt.legend(bbox_to_anchor=(0., 1.02, 1., .102), loc=3, ncol=2, mode="expand", borderaxespad=0.) plt.show() plt.xlabel('Round') plt.ylabel('AUC') plt.plot(test_err_points[:, 0], auc_points[:, 1], c='r', label='AUC') plt.legend(bbox_to_anchor=(0., 1.02, 1., .102), loc=3, ncol=2, mode="expand", borderaxespad=0.) plt.show() overall_auc += auc overall_acc += acc overall_error += err if fold == 1: hypo = adaboost.hypothesis(test) roc_points = roc(test_target, hypo, 1.0, -1.0) plt.xlabel('FPR') plt.ylabel('TPR') plt.xlim(xmin=0) plt.ylim(ymin=0) plt.scatter(roc_points[:, 1], roc_points[:, 0]) plt.show() fold += 1 print "Overall test accuracy: %s, overall test error: %s, overall test auc: %s" % ( overall_acc / k, overall_error / k, overall_auc / k)