########### AdaBoostClassifier on Real Input and Discrete Output ################### N = 30 P = 2 NUM_OP_CLASSES = 2 n_estimators = 3 X = pd.DataFrame(np.abs(np.random.randn(N, P))) y = pd.Series(np.random.randint(NUM_OP_CLASSES, size=N), dtype="category") criteria = 'information_gain' Dtree = tree.DecisionTreeClassifier(criterion='entropy', max_depth=1) Classifier_AB = AdaBoostClassifier(base_estimator=Dtree, n_estimators=n_estimators) Classifier_AB.fit(X, y) y_hat = Classifier_AB.predict(X) [fig1, fig2] = Classifier_AB.plot() print('Criteria :', criteria) print('Accuracy: ', accuracy(y_hat, y)) for cls in y.unique(): print('Precision: ', precision(y_hat, y, cls)) print('Recall: ', recall(y_hat, y, cls)) ##### AdaBoostClassifier on Iris data set using the entire data set with sepal width and petal width as the two features def load_iris(): X = [] y = [] with open('./dataset/iris.data') as csv_file: csv_reader = csv.reader(csv_file, delimiter=',') for row in csv_reader:
# y_hat = Classifier_AB.predict(X) # # [fig1, fig2] = Classifier_AB.plot() # print('Criteria :', criteria) # print('Accuracy: ', accuracy(y_hat, y)) # for cls in np.unique(y): # print('Precision of {} is: '.format(cls), precision(y_hat, y, cls)) # print('Recall of {} is: '.format(cls), recall(y_hat, y, cls)) # del(Classifier_AB) ##### AdaBoostClassifier on Iris data set using the entire data set with sepal width and petal width as the two features da = pd.read_csv('iris.csv') col1 = da["sepal_width"] col2 = da["petal_width"] label = np.array(da["species"]) label = np.where(label == "virginica", 1, -1) iris = pd.merge(col1, col2, left_index=True, right_index=True) iris["Truth"] = label iris = iris.sample(frac=1).reset_index(drop=True) split_at = int(0.6 * (iris.shape[0])) X_train = iris.iloc[:split_at, :-1] y_train = iris.iloc[:split_at, -1] X_test = iris.iloc[split_at:, :-1] y_test = iris.iloc[split_at:, -1] Classifier_AB1 = AdaBoostClassifier(n_estimators=3) Classifier_AB1.fit(X_train, y_train) y_hat = Classifier_AB1.predict(X_test) print(list(y_hat), list(y_test)) print("Accuracy: ", accuracy(y_hat, y_test)) Classifier_AB1.plot(X_test)
# Or you could import sklearn DecisionTree from linearRegression.linearRegression import LinearRegression np.random.seed(42) ########### AdaBoostClassifier on Real Input and Discrete Output ################### N = 30 P = 2 NUM_OP_CLASSES = 2 n_estimators = 3 X = pd.DataFrame(np.abs(np.random.randn(N, P))) y = pd.Series(np.random.randint(NUM_OP_CLASSES, size=N), dtype="category") criteria = 'information_gain' tree = DecisionTree(criterion=criteria, max_depth=1) Classifier_AB = AdaBoostClassifier(base_estimator=tree, n_estimators=n_estimators) Classifier_AB.fit(X, y) y_hat = Classifier_AB.predict(X) Classifier_AB.plot() # #[fig1, fig2] = Classifier_AB.plot() print('Criteria :', criteria) print('Accuracy: ', accuracy(y_hat, y)) print() for cls in y.unique(): print('Category: ', cls) print('Precision: ', precision(y_hat, y, cls)) print('Recall: ', recall(y_hat, y, cls)) print()
print('Criteria :', criteria) print('Accuracy: ', accuracy(yhat, y)) for cls in y.unique(): print("***Class :" + str(cls) + "***") print('Precision: ', precision(yhat, y, cls)) print('Recall: ', recall(yhat, y, cls)) print("-----------------------------------------------------------") print("Adaboost on random data") print("-----------------------------------------------------------") Classifier_AB = AdaBoostClassifier(base_estimator=tree, n_estimators=n_estimators) Classifier_AB.fit(X, y) y_hat = Classifier_AB.predict(X) fig1, fig2 = Classifier_AB.plot() print('Criteria :', criteria) print('Accuracy: ', accuracy(y_hat, y)) for cls in y.unique(): print("***Class :" + str(cls) + "***") print('Precision: ', precision(y_hat, y, cls)) print('Recall: ', recall(y_hat, y, cls)) ##### AdaBoostClassifier on Iris data set using the entire data set with sepal width and petal width as the two features X = pd.read_csv("iris.data") # a = [] for i in range(5): if i == 4: a.append(X.columns[i]) else:
########### AdaBoostClassifier on Real Input and Discrete Output ################### N = 30 P = 2 NUM_OP_CLASSES = 2 n_estimators = 5 X = pd.DataFrame(np.abs(np.random.randn(N, P))) y = pd.Series(np.random.randint(NUM_OP_CLASSES, size=N), dtype="category") y = y.cat.rename_categories([-1, 1]) # Changing 0 to -1 for adaboost criteria = 'entropy' tree = DecisionTreeClassifier Classifier_AB = AdaBoostClassifier(base_estimator=tree, n_estimators=n_estimators) Classifier_AB.fit(X, y) y_hat = Classifier_AB.predict(X) [fig1, fig2] = Classifier_AB.plot(X, y) print('Criteria :', criteria) print('Accuracy: ', accuracy(y_hat, y)) for cls in y.unique(): print('Precision: ', precision(y_hat, y, cls)) print('Recall: ', recall(y_hat, y, cls)) # AdaBoostClassifier on Iris data set using the entire data set with sepal width and petal width as the two features split = 0.6 iris = pd.read_csv(os.path.join("data", "iris.csv")) iris["variety"] = iris["variety"].astype("category") shuffled = iris.sample(frac=1).reset_index(drop=True) X = shuffled.iloc[:, :-1].squeeze() X = X[['sepal.width', "petal.width"]]