def get_ab_errors(self, tx, ty, x, y): errors = [] for t in self.ts: a_boost = AdaBoost(WL=ex4_tools.DecisionStump, T=t) a_boost.train(tx, ty) errors.append(a_boost.error(x, y, t)) return errors
def Q3(): # AdaBoost T = [1,5,10,50,100,200] T_loop = [1,5,10] train_err = [] valid_err = [] plt.figure("decisions of the learned classifiers for T") num_graph = 0 for i in range(3,41): T_loop.append(i*5) for t in T_loop: ada_boost = AdaBoost(DecisionStump, t) ada_boost.train(x_train, y_train) if (t in T): num_graph += 1 plt.subplot(3,2, num_graph) decision_boundaries(ada_boost, x_train, y_train, "T = %d" %t) train_err.append(ada_boost.error(x_train, y_train)) valid_err.append(ada_boost.error(x_val, y_val)) plt.figure("training error and the validation error") plt.plot(T_loop, train_err, 'ro-', hold=False, label= "Training Error") plt.plot(T_loop, valid_err, 'go-', label= "Validation Error") plt.legend() plt.show() ''' find the T min, and plot it with training error ''' plt.figure("decision boundaries of T min, with the training data") T_hat = 5 * np.argmin(valid_err) ada_boost = AdaBoost(DecisionStump, T_hat) ada_boost.train(x_train, y_train) test_err = ada_boost.error(x_test, y_test) decision_boundaries(ada_boost, x_train, y_train, "T = %d" %T_hat) plt.show() print ("The value of T that minimizes the validation error is: ", T_hat) print("the test error of the corresponding classifier is: ", test_err) return
def Q8(): X, y = generate_data(5000, 0) h = AdaBoost(DecisionStump, 500) h.train(X, y) training_err = np.zeros((500, )) test_err = np.zeros((500, )) test_set, labels = generate_data(200, 0) for t in range(1, 501): training_err[t - 1] = h.error(X, y, t) test_err[t - 1] = h.error(test_set, labels, t) plt.plot(range(500), training_err, label='Training error') plt.plot(range(500), test_err, label='Test error') plt.title('question 8') plt.legend(loc='upper right') plt.xlabel('T') plt.ylabel('Error rate') plt.savefig('Q8') plt.show()
def Q3(): # AdaBoost path = "/cs/usr/kotek/PycharmProjects/iml_ex4/SynData/" X_train, y_train = read_from_txt(path + "X_train.txt", path + "y_train.txt") X_val, y_val = read_from_txt(path + "X_val.txt", path + "y_val.txt") X_test, y_test = read_from_txt(path + "X_test.txt", path + "y_test.txt") # -------- First part -------- T = np.arange(5, 105, step=5) T = np.append(T, np.array([200])) training_err = np.zeros(len(T)) validation_err = np.zeros(len(T)) # adaBoost uses a weighted trainer (WL) WL = ex4_tools.DecisionStump for i in range(len(T)): adaboost = AdaBoost(WL, T[i]) adaboost.train(X_train, y_train) training_err[i] = adaboost.error(X_train, y_train) validation_err[i] = adaboost.error(X_val, y_val) plt.plot(T, training_err, label="train error") plt.plot(T, validation_err, label="validation error") plt.legend() plt.show() # ------------------------ # # -------- Second part -------- decision_T = [1, 5, 10, 100, 200] plt.figure() plt.ion() for idx, t in enumerate(decision_T): adaboost = AdaBoost(WL, t) adaboost.train(X_train, y_train) plt.subplot(2, 3, idx + 1) ex4_tools.decision_boundaries(adaboost, X_train, y_train, "T=" + str(t)) plt.show() plt.pause(5)
def Q_adaboost(noise_ratio): X_train, y_train = generate_data(5000, noise_ratio) classifier = AdaBoost(DecisionStump, 500) classifier.train(X_train, y_train) X_test, y_test = generate_data(200, noise_ratio) vals = np.arange(1, 501) plt.plot(vals, [classifier.error(X_train, y_train, t) for t in vals], label='Training Error', lw=1, alpha=0.6) plt.plot(vals, [classifier.error(X_test, y_test, t) for t in vals], label='Test Error', lw=1, alpha=0.6) plt.legend() plt.title( f'Adaboost Training & Test Error according to T, noise={noise_ratio}') plt.show() boosts = [5, 10, 50, 100, 200, 500] for i in range(6): plt.subplot(2, 3, i + 1) decision_boundaries(classifier, X_test, y_test, boosts[i]) plt.title(f'T={boosts[i]}, noise={noise_ratio}') plt.show() test_errors = [classifier.error(X_test, y_test, t) for t in vals] min_t = np.argmin(test_errors) min_err = test_errors[min_t] # print(min_t, min_err) decision_boundaries(classifier, X_train, y_train, min_t) plt.title(f'min test_err {min_err} T={min_t} noise {noise_ratio}') plt.show() decision_boundaries(classifier, X_train, y_train, 499, classifier.D_of_last_iteration) plt.title(f'un-normalized weighed sample T=500, noise={noise_ratio}') plt.show() decision_boundaries( classifier, X_train, y_train, 499, classifier.D_of_last_iteration / np.max(classifier.D_of_last_iteration) * 100) plt.title(f'normalized weighed sample T=500, noise={noise_ratio}') plt.show()
def Q17(): train_images, test_images, train_labels, test_labels = load_images( '../Docs/') train_images = integral_image(train_images) test_images = integral_image(test_images) WL, T = WeakImageClassifier, 50 ada = AdaBoost(WL, T) ada.train(train_images, train_labels) T_range = np.arange(1, T) train_errs = [ada.error(train_images, train_labels, t) for t in T_range] test_errs = [ada.error(test_images, test_labels, t) for t in T_range] fig = plt.figure() fig.suptitle("Train vs Test error, Face Classifier") plt.xlabel('# of Hypotheses (T)') plt.ylabel('Error rate (%)') plt.plot(T_range, train_errs, label='Train Error') plt.plot(T_range, test_errs, label='Test Error') # plt.ylim(top=0.06) plt.legend() plt.savefig(FIG_DIR3 + 'q17') 'TODO complete this function'
def Q9(): X, y = generate_data(300, 0) h = AdaBoost(DecisionStump, 500) h.train(X, y) err = [0] * len(T) f = plt.figure(figsize=(10, 10)) for i, t in enumerate(T): f.add_subplot(3, 2, i + 1) err[i] = h.error(X, y, t) decision_boundaries(h, X, y, t) plt.savefig('Q9') plt.show() return np.array(err)
def Q8(noise=0.0): n_samples_train, n_samples_test, T = 5000, 200, 500 train_X, train_y = generate_data(n_samples_train, noise) test_X, test_y = generate_data(n_samples_test, noise) WL = DecisionStump ada = AdaBoost(WL, T) ada.train(train_X, train_y) T_range = np.arange(1, T) train_errs = [ada.error(train_X, train_y, t) for t in T_range] test_errs = [ada.error(test_X, test_y, t) for t in T_range] fig = plt.figure() fig.suptitle("Train vs Test error, Adaboost") plt.xlabel('# of Hypotheses (T)') plt.ylabel('Error rate (%)') plt.plot(T_range, train_errs, label='Train Error') plt.plot(T_range, test_errs, label='Test Error') # plt.ylim(top=0.06) plt.legend() plt.savefig(FIG_DIR3 + 'q8' + ('' if noise == 0 else '_' + str(noise).replace('.', '_'))) return ada, test_X, test_y, train_X, train_y 'TODO complete this function'
return np.loadtxt(_get_file_path('X_' + name)), np.loadtxt( _get_file_path('y_' + name)) if __name__ == '__main__': X_train, y_train = _load_data('train') X_val, y_val = _load_data('val') T_values = range(5, 200, 5) validation_error = [] training_error = [] for t in T_values: ada_boost = AdaBoost(DecisionStump, t) ada_boost.train(X_train, y_train) validation_error.append(ada_boost.error(X_val, y_val)) training_error.append(ada_boost.error(X_train, y_train)) training_error_plot, = plot(T_values, training_error, linestyle='--', label='training_error') validation_error_plot, = plot(T_values, validation_error, linestyle='--', label='validation_error') legend(handles=[training_error_plot, validation_error_plot]) title('training and validation error vs T values') xlabel('T values')
class ex5: def __init__(self): self.mean = [0, 0] self.cov = np.eye(2) self.svm = SVC(C=1e10, kernel='linear') self.perceptron = None self.a_boost = None self.svm_accs = [] self.perceptrone_accs = [] self.ms = [5, 10, 15, 25, 70] self.ts = [5, 10, 50, 100, 200, 500] def q_3_4_5(self): for m in self.ms: self.calculate_for_m(m) plt.plot(self.ms, self.perceptrone_accs) plt.plot(self.ms, self.svm_accs) plt.legend(("perceptron", "svd")) plt.show() def calculate_for_m(self, m): x = np.random.multivariate_normal(self.mean, self.cov, m) real_labels = self.get_real_labels(x) labeled_1_x, labeled_min_1_x = self.get_x_by_labels(x, real_labels) t = np.arange(int(x.min()) - 1, int(x.max()) + 1, 0.1) self.plt_xs(labeled_1_x, labeled_min_1_x, t) self.perceptron = Perceptron() perc_w = self.perceptron.fit(x, real_labels) plt.plot(t, self.get_y(perc_w[:-1], perc_w[-1], t)) self.svm.fit(x, real_labels) plt.plot(t, self.get_y(self.svm._get_coef()[0], self.svm.intercept_, t)) plt.legend(["True labels", "perceptron", "svm"]) plt.show() self.calculate_svm_perc_acc() def get_real_labels(self, x): labels = [] for j in x: labels.append(self.f(j)) return labels def get_x_by_labels(self, x, labels): x_1, x_minus_1 = [], [] for i in range(len(x)): if labels[i] == 1.0: x_1.append(x[i]) elif labels[i] == -1.0: x_minus_1.append(x[i]) else: pass return x_1, x_minus_1 def f(self, x): return np.sign(np.dot([0.3, -0.5], x) + 0.1) def plt_xs(self, labeled_1_x, labeled_min_1_x, t): plt.scatter([x[0] for x in labeled_1_x], [x[1] for x in labeled_1_x]) plt.scatter([x[0] for x in labeled_min_1_x], [x[1] for x in labeled_min_1_x]) plt.plot(t, self.get_y([0.3, -0.5], 0.1, t)) def get_y(self, w, b, x): y = [] for i in x: y.append(-w[0] * i / w[1] + b / -w[1]) return y def calculate_svm_perc_acc(self): s, p = self.get_svm_prec_acc() self.perceptrone_accs.append(p / 500) self.svm_accs.append(s / 500) def get_svm_prec_acc(self): svm_acc, perceptrone_acc = 0, 0 for i in range(500): x = np.random.multivariate_normal(self.mean, self.cov, 10000) real_labels = [] for j in x: real_labels.append(self.f(j)) svm_acc += self.svm.score(x, real_labels) perceptrone_acc += self.perceptron.score(x, real_labels) return svm_acc, perceptrone_acc def q_7_8_9_10(self): self.q_8() self.q_9() self.q_10() def q_8(self): tx, ty = ex4_tools.generate_data(5000, noise_ratio=0) x, y = ex4_tools.generate_data(200, noise_ratio=0) self.a_boost = AdaBoost(WL=ex4_tools.DecisionStump, T=500) self.a_boost.train(tx, ty) training_errs, test_errs = self.get_ab_errs(tx, ty, x, y) self.plt_q_8(training_errs, test_errs) def get_ab_errs(self, tx, ty, x, y): training_errs, test_errs = [], [] for i in range(500): training_errs.append(self.a_boost.error(tx, ty, i)) test_errs.append(self.a_boost.error(x, y, i)) return training_errs, test_errs def plt_q_8(self, training_errs, test_errs): plt.plot(np.arange(500), training_errs, label="training error") plt.plot(np.arange(500), test_errs, label="test error") plt.title("Adaboost errors as function of (T)") plt.legend() plt.show() def q_9(self): tx, ty = ex4_tools.generate_data(5000, noise_ratio=0) x, y = ex4_tools.generate_data(200, noise_ratio=0) i = 1 for t in self.ts: a_boost = AdaBoost(WL=ex4_tools.DecisionStump, T=t) a_boost.train(tx, ty) plt.subplot(2, 3, i) ex4_tools.decision_boundaries(a_boost, x, y, t) i += 1 plt.show() def q_10(self): tx, ty = ex4_tools.generate_data(5000, noise_ratio=0) x, y = ex4_tools.generate_data(200, noise_ratio=0) errors = self.get_ab_errors(tx, ty, x, y) min_t = np.argmin(errors) a_boost = AdaBoost(WL=ex4_tools.DecisionStump, T=self.ts[min_t]) a_boost.train(tx, ty) ex4_tools.decision_boundaries(a_boost, tx, ty, self.ts[min_t]) plt.title("min error is " + str(errors[min_t]) + " with " + str(self.ts[min_t]) + " classifiers") plt.show() def get_ab_errors(self, tx, ty, x, y): errors = [] for t in self.ts: a_boost = AdaBoost(WL=ex4_tools.DecisionStump, T=t) a_boost.train(tx, ty) errors.append(a_boost.error(x, y, t)) return errors