def fit(self, X, y, n_estimators): ''' Parameters ---------- X : shape (n_samples, n_features) Training data y : shape (n_samples,) Target values, 1 or -1 n_estimators : The number of estimators at which boosting is terminated ''' n_samples = X.shape[0] self.__alpha = np.zeros(n_estimators) self.__estimators = [] w = np.full(n_samples, 1 / n_samples) for i in range(n_estimators): model = decision_stump.DecisionStump() model.fit(X, y, w) h = model.predict(X) eta = np.sum(w[np.flatnonzero(h != y)]) / np.sum(w) beta = np.sqrt((1 - eta) / (eta + 1e-8)) w *= beta**(-y * h) self.__alpha[i] = np.log(beta) self.__estimators.append(model)
def check_decision_stump(): np.random.seed(8) test_sample = np.random.rand(5, 5) try: import decision_stump test_decision_stump = decision_stump.DecisionStump(1, 0.5, 3) predictions = test_decision_stump.predict(test_sample.tolist()) except: return 0 if np.squeeze(predictions).tolist() == [1, 1, 1, -1, -1]: score = 0.5 else: score = 0 return round(score, 1)
assert(len(X_train) == 80) assert(len(X_train[0]) == 4) assert(len(X_test) == 20) assert(len(X_test[0]) == 4) assert(len(y_train) == 80) assert(len(y_test) == 20) # set classifiers h_set = set() s_set = {1, -1} b_set = set(np.linspace(0, 10, 51)) d_set = {0, 1, 2, 3} for s in s_set: for b in b_set: for d in d_set: h_set.add(decision_stump.DecisionStump(s,b,d)) # training Adas = [] for idx, T in enumerate([10, 20, 30]): Adas.append(boosting.AdaBoost(h_set, T=T)) Adas[idx].train(X_train, y_train) # testing Ada_preds = [] Ada_accus = [] for Ada in Adas: pred = Ada.predict(X_test) Ada_preds.append(pred) Ada_accus.append(accuracy_score(pred, y_test))