def train(self, train, train_target, test, test_target, T=100, percentage=0.5): k, n = self.selected_code.shape train = train[:int(len(train) * percentage)] train_target = train_target[:int(len(train_target) * percentage)] first_time = True predictors = None for f in range(n): print "Run Adaboost on function %f" % f codes = self.selected_code[:, f] labels = self.convert_to_binary(train_target, codes) test_labels = self.convert_to_binary(test_target, codes) learner = OptimalWeakLearner() if not first_time: learner.set_predictors(predictors) adaboost = AdaBoost(learner) adaboost.boost(train, labels, test, test_labels, T, calculate_auc=False) self.functions.append(adaboost) if first_time: first_time = False predictors = learner.get_predictors()
def entire(): data, target = load_crx() train, test, train_target, test_target = train_test_shuffle_split(data, target, len(data) / 10) train_target = np.array(map(lambda v: -1.0 if v == 0 else 1.0, train_target)) test_target = np.array(map(lambda v: -1.0 if v == 0 else 1.0, test_target)) adaboost = AdaBoost(OptimalWeakLearner()) adaboost.boost(train, train_target, test, test_target, discrete_features=range(train.shape[1]))
def random_weak_learner(): print '==============Random Weak Learner============' train, target = load_spambase() train, test, train_target, test_target = train_test_shuffle_split(train, target, len(train) / 10) train_target = np.array(map(lambda v: -1.0 if v == 0 else 1.0, train_target)) test_target = np.array(map(lambda v: -1.0 if v == 0 else 1.0, test_target)) adaboost = AdaBoost(RandomChooseLeaner()) adaboost.boost(train, train_target, test, test_target, T=200)
def random_weak_learner(): print '==============Random Weak Learner============' train, target = load_spambase() train, test, train_target, test_target = train_test_shuffle_split( train, target, len(train) / 10) train_target = np.array( map(lambda v: -1.0 if v == 0 else 1.0, train_target)) test_target = np.array(map(lambda v: -1.0 if v == 0 else 1.0, test_target)) adaboost = AdaBoost(RandomChooseLeaner()) adaboost.boost(train, train_target, test, test_target, T=200)
def entire(): data, target = load_vote() train, test, train_target, test_target = train_test_shuffle_split( data, target, len(data) / 10) train_target = np.array( map(lambda v: -1.0 if v == 0 else 1.0, train_target)) test_target = np.array(map(lambda v: -1.0 if v == 0 else 1.0, test_target)) adaboost = AdaBoost(OptimalWeakLearner()) adaboost.boost(train, train_target, test, test_target, discrete_features=range(train.shape[1]))
def cross(): train, target = load_vote() target = np.array(map(lambda v: -1.0 if v == 0 else 1.0, target)) k = 10 train_size = len(train) test_index_generator = k_fold_cross_validation(train_size, k) fold = 1 overall_acc = 0 overall_error = 0 overall_auc = 0 for start, end in test_index_generator: print "====================Fold %s============" % fold k_fold_train = np.vstack( (train[range(0, start)], train[range(end, train_size)])) test = train[range(start, end)] train_target = np.append(target[range(0, start)], target[range(end, train_size)]) test_target = target[range(start, end)] adaboost = AdaBoost(OptimalWeakLearner()) acc, err, auc = adaboost.boost(k_fold_train, train_target, test, test_target) overall_auc += auc overall_acc += acc overall_error += err fold += 1 print "Overall test accuracy: %s, overall test error: %s, overall test auc: %s" % ( overall_acc / k, overall_error / k, overall_auc / k)
def cross(): train, target = load_crx() train, target = shuffle(train, target) target = np.array(map(lambda v: -1.0 if v == 0 else 1.0, target)) k = 10 train_size = len(train) test_index_generator = k_fold_cross_validation(train_size, k) fold = 1 overall_acc = 0 overall_error = 0 overall_auc = 0 for start, end in test_index_generator: print "====================Fold %s============" % fold k_fold_train = np.vstack((train[range(0, start)], train[range(end, train_size)])) test = train[range(start, end)] train_target = np.append(target[range(0, start)], target[range(end, train_size)]) test_target = target[range(start, end)] adaboost = AdaBoost(OptimalWeakLearner()) acc, err, auc = adaboost.boost(k_fold_train, train_target, test, test_target) overall_auc += auc overall_acc += acc overall_error += err fold += 1 print "Overall test accuracy: %s, overall test error: %s, overall test auc: %s" % ( overall_acc / k, overall_error / k, overall_auc / k)
def optimal_weak_learner_on_random_data(): data, target = load_spambase() train, test, train_target, test_target = train_test_shuffle_split( data, target, len(data) / 10) train_target = np.array( map(lambda v: -1.0 if v == 0 else 1.0, train_target)) test_target = np.array(map(lambda v: -1.0 if v == 0 else 1.0, test_target)) indices = range(len(train)) param = 0.05 res = [] while param < 0.5: print "Choose %.2f%% of data" % (param * 100) choose_size = int(len(indices) * param) choose_indices = random.sample(indices, choose_size) X = train[choose_indices] Y = train_target[choose_indices] adaboost = AdaBoost(OptimalWeakLearner()) acc, err, auc = adaboost.boost(X, Y, test, test_target) res.append((acc, err, auc)) param += 0.05 print res
def active_learning(self, train, train_target, test, test_target): param = 0.05 increment = 0.05 init_size = int(len(train) * param) increment_size = int(len(train) * increment) X = train[:init_size] Y = train_target[:init_size] R = train[init_size:] RY = train_target[init_size:] while param < 0.5: print "labeled data: %.2f%%" % (100.0 * len(X) / len(train)) adaboost = AdaBoost(OptimalWeakLearner()) acc, err, auc = adaboost.boost(X, Y, test, test_target) self.result.append((acc, err, auc)) H = adaboost.hypothesis(R) H_abs = np.abs(H) sorted_indices = H_abs.argsort().tolist() selected = sorted_indices[:increment_size] remained = sorted_indices[increment_size:] X = np.vstack((X, R[selected])) # Y = np.append(Y, adaboost.sign(H[selected])) Y = np.append(Y, RY[selected]) R = R[remained] RY = RY[remained] param += increment
def active_learning(self, train, train_target, test, test_target): param = 0.05 increment = 0.05 init_size = int(len(train) * param) increment_size = int(len(train) * increment) X = train[:init_size] Y = train_target[:init_size] R = train[init_size:] RY = train_target[init_size:] while param < 0.5: print "labeled data: %.2f%%" % (100.0 * len(X)/len(train)) adaboost = AdaBoost(OptimalWeakLearner()) acc, err, auc = adaboost.boost(X, Y, test, test_target) self.result.append((acc, err, auc)) H = adaboost.hypothesis(R) H_abs = np.abs(H) sorted_indices = H_abs.argsort().tolist() selected = sorted_indices[:increment_size] remained = sorted_indices[increment_size:] X = np.vstack((X, R[selected])) # Y = np.append(Y, adaboost.sign(H[selected])) Y = np.append(Y, RY[selected]) R = R[remained] RY = RY[remained] param += increment
def train(self, train, train_target, test, test_target, T=100, percentage = 0.5): k, n = self.selected_code.shape train = train[:int(len(train) * percentage)] train_target = train_target[:int(len(train_target) * percentage)] first_time = True predictors = None for f in range(n): print "Run Adaboost on function %f" % f codes = self.selected_code[:, f] labels = self.convert_to_binary(train_target, codes) test_labels = self.convert_to_binary(test_target, codes) learner = OptimalWeakLearner() if not first_time: learner.set_predictors(predictors) adaboost = AdaBoost(learner) adaboost.boost(train, labels, test, test_labels, T, calculate_auc=False) self.functions.append(adaboost) if first_time: first_time = False predictors = learner.get_predictors()
def random_c(): data, target = load_crx() train, test, train_target, test_target = train_test_shuffle_split(data, target, len(data) / 10) train_target = np.array(map(lambda v: -1.0 if v == 0 else 1.0, train_target)) test_target = np.array(map(lambda v: -1.0 if v == 0 else 1.0, test_target)) indices = range(len(train)) param = 0.05 res = [] while param < 0.5: print "Choose %.2f%% of data" % (param * 100) choose_size = int(len(indices) * param) choose_indices = random.sample(indices, choose_size) X = train[choose_indices] Y = train_target[choose_indices] adaboost = AdaBoost(OptimalWeakLearner()) acc, err, auc = adaboost.boost(X, Y, test, test_target, discrete_features=[0, 3, 4, 5, 6, 8, 9, 11, 12]) res.append((acc, err, auc)) param += 0.05 print res
init_target = train_target[:init_size] remain_dataset = train[init_size:] remain_target = train_target[init_size:] # Active learning X = init_dataset Y = init_target R = remain_dataset RY = remain_target result = [] while percentage < 0.5: print "labeled data: %.2f%%" % (100.0 * len(X) / len(train)) adaboost = AdaBoost(OptimalWeakLearner()) acc, err, auc = adaboost.boost(X, Y, test, test_target) result.append((acc, err, auc)) H = adaboost.hypothesis(R) H_abs = np.abs(H) sorted_indices = H_abs.argsort().tolist() selected = sorted_indices[:increment_size] remained = sorted_indices[increment_size:] X = np.vstack((X, R[selected])) # Y = np.append(Y, adaboost.sign(H[selected])) Y = np.append(Y, RY[selected]) R = R[remained] RY = RY[remained] percentage += increment
def optimal_weak_learner(): print '==============Optimal Weak Learner============' train, target = load_spambase() train, target = shuffle(train, target) target = np.array(map(lambda v: -1.0 if v == 0 else 1.0, target)) k = 10 train_size = len(train) test_index_generator = k_fold_cross_validation(train_size, k) fold = 1 overall_acc = 0 overall_error = 0 overall_auc = 0 for start, end in test_index_generator: print "====================Fold %s============" % fold k_fold_train = np.vstack( (train[range(0, start)], train[range(end, train_size)])) test = train[range(start, end)] train_target = np.append(target[range(0, start)], target[range(end, train_size)]) test_target = target[range(start, end)] adaboost = AdaBoost(OptimalWeakLearner()) plot = False if fold == 1: plot = True else: plot = False acc, err, auc = adaboost.boost(k_fold_train, train_target, test, test_target, plot=plot) if plot: test_err_points = np.array(adaboost.test_err_array) train_err_points = np.array(adaboost.train_err_array) auc_points = np.array(adaboost.test_auc_array) round_err_points = np.array(adaboost.weighted_err_array) plt.xlabel('Round') plt.ylabel('Error Rate') plt.plot(test_err_points[:, 0], test_err_points[:, 1], c='r', label='Test Error') plt.plot(test_err_points[:, 0], train_err_points[:, 1], c='g', label='Train Error') plt.plot(test_err_points[:, 0], round_err_points[:, 1], c='b', label='Round Error') plt.legend(bbox_to_anchor=(0., 1.02, 1., .102), loc=3, ncol=2, mode="expand", borderaxespad=0.) plt.show() plt.xlabel('Round') plt.ylabel('AUC') plt.plot(test_err_points[:, 0], auc_points[:, 1], c='r', label='AUC') plt.legend(bbox_to_anchor=(0., 1.02, 1., .102), loc=3, ncol=2, mode="expand", borderaxespad=0.) plt.show() overall_auc += auc overall_acc += acc overall_error += err if fold == 1: hypo = adaboost.hypothesis(test) roc_points = roc(test_target, hypo, 1.0, -1.0) plt.xlabel('FPR') plt.ylabel('TPR') plt.xlim(xmin=0) plt.ylim(ymin=0) plt.scatter(roc_points[:, 1], roc_points[:, 0]) plt.show() fold += 1 print "Overall test accuracy: %s, overall test error: %s, overall test auc: %s" % ( overall_acc / k, overall_error / k, overall_auc / k)
init_dataset = train[:init_size] init_target = train_target[:init_size] remain_dataset = train[init_size:] remain_target = train_target[init_size:] # Active learning X = init_dataset Y = init_target R = remain_dataset RY = remain_target result = [] while percentage < 0.5: print "labeled data: %.2f%%" % (100.0 * len(X) / len(train)) adaboost = AdaBoost(OptimalWeakLearner()) acc, err, auc = adaboost.boost(X, Y, test, test_target) result.append((acc, err, auc)) H = adaboost.hypothesis(R) H_abs = np.abs(H) sorted_indices = H_abs.argsort().tolist() selected = sorted_indices[:increment_size] remained = sorted_indices[increment_size:] X = np.vstack((X, R[selected])) # Y = np.append(Y, adaboost.sign(H[selected])) Y = np.append(Y, RY[selected]) R = R[remained] RY = RY[remained] percentage += increment # boosting
def optimal_weak_learner(): print '==============Optimal Weak Learner============' train, target = load_spambase() train, target = shuffle(train, target) target = np.array(map(lambda v: -1.0 if v == 0 else 1.0, target)) k = 10 train_size = len(train) test_index_generator = k_fold_cross_validation(train_size, k) fold = 1 overall_acc = 0 overall_error = 0 overall_auc = 0 for start, end in test_index_generator: print "====================Fold %s============" % fold k_fold_train = np.vstack((train[range(0, start)], train[range(end, train_size)])) test = train[range(start, end)] train_target = np.append(target[range(0, start)], target[range(end, train_size)]) test_target = target[range(start, end)] adaboost = AdaBoost(OptimalWeakLearner()) plot = False if fold == 1: plot = True else: plot = False acc, err, auc = adaboost.boost(k_fold_train, train_target, test, test_target, plot=plot) if plot: test_err_points = np.array(adaboost.test_err_array) train_err_points = np.array(adaboost.train_err_array) auc_points = np.array(adaboost.test_auc_array) round_err_points = np.array(adaboost.weighted_err_array) plt.xlabel('Round') plt.ylabel('Error Rate') plt.plot(test_err_points[:, 0], test_err_points[:, 1], c='r', label='Test Error') plt.plot(test_err_points[:, 0], train_err_points[:, 1], c='g', label='Train Error') plt.plot(test_err_points[:, 0], round_err_points[:, 1], c='b', label='Round Error') plt.legend(bbox_to_anchor=(0., 1.02, 1., .102), loc=3, ncol=2, mode="expand", borderaxespad=0.) plt.show() plt.xlabel('Round') plt.ylabel('AUC') plt.plot(test_err_points[:, 0], auc_points[:, 1], c='r', label='AUC') plt.legend(bbox_to_anchor=(0., 1.02, 1., .102), loc=3, ncol=2, mode="expand", borderaxespad=0.) plt.show() overall_auc += auc overall_acc += acc overall_error += err if fold == 1: hypo = adaboost.hypothesis(test) roc_points = roc(test_target, hypo, 1.0, -1.0) plt.xlabel('FPR') plt.ylabel('TPR') plt.xlim(xmin=0) plt.ylim(ymin=0) plt.scatter(roc_points[:, 1], roc_points[:, 0]) plt.show() fold += 1 print "Overall test accuracy: %s, overall test error: %s, overall test auc: %s" % ( overall_acc / k, overall_error / k, overall_auc / k)