########### AdaBoostClassifier on Real Input and Discrete Output ###################

N = 30
P = 2
NUM_OP_CLASSES = 2
n_estimators = 3
X = pd.DataFrame(np.abs(np.random.randn(N, P)))
y = pd.Series(np.random.randint(NUM_OP_CLASSES, size=N), dtype="category")

criteria = 'information_gain'
Dtree = tree.DecisionTreeClassifier(criterion='entropy', max_depth=1)
Classifier_AB = AdaBoostClassifier(base_estimator=Dtree,
                                   n_estimators=n_estimators)
Classifier_AB.fit(X, y)
y_hat = Classifier_AB.predict(X)
[fig1, fig2] = Classifier_AB.plot()
print('Criteria :', criteria)
print('Accuracy: ', accuracy(y_hat, y))
for cls in y.unique():
    print('Precision: ', precision(y_hat, y, cls))
    print('Recall: ', recall(y_hat, y, cls))

##### AdaBoostClassifier on Iris data set using the entire data set with sepal width and petal width as the two features


def load_iris():
    X = []
    y = []
    with open('./dataset/iris.data') as csv_file:
        csv_reader = csv.reader(csv_file, delimiter=',')
        for row in csv_reader:
# y_hat = Classifier_AB.predict(X)
# # [fig1, fig2] = Classifier_AB.plot()
# print('Criteria :', criteria)
# print('Accuracy: ', accuracy(y_hat, y))
# for cls in np.unique(y):
#     print('Precision of {} is: '.format(cls), precision(y_hat, y, cls))
#     print('Recall of {} is: '.format(cls), recall(y_hat, y, cls))

# del(Classifier_AB)

##### AdaBoostClassifier on Iris data set using the entire data set with sepal width and petal width as the two features
da = pd.read_csv('iris.csv')
col1 = da["sepal_width"]
col2 = da["petal_width"]
label = np.array(da["species"])
label = np.where(label == "virginica", 1, -1)
iris = pd.merge(col1, col2, left_index=True, right_index=True)
iris["Truth"] = label
iris = iris.sample(frac=1).reset_index(drop=True)
split_at = int(0.6 * (iris.shape[0]))
X_train = iris.iloc[:split_at, :-1]
y_train = iris.iloc[:split_at, -1]
X_test = iris.iloc[split_at:, :-1]
y_test = iris.iloc[split_at:, -1]
Classifier_AB1 = AdaBoostClassifier(n_estimators=3)
Classifier_AB1.fit(X_train, y_train)
y_hat = Classifier_AB1.predict(X_test)
print(list(y_hat), list(y_test))
print("Accuracy: ", accuracy(y_hat, y_test))
Classifier_AB1.plot(X_test)
示例#3
0
# Or you could import sklearn DecisionTree
from linearRegression.linearRegression import LinearRegression

np.random.seed(42)

########### AdaBoostClassifier on Real Input and Discrete Output ###################

N = 30
P = 2
NUM_OP_CLASSES = 2
n_estimators = 3
X = pd.DataFrame(np.abs(np.random.randn(N, P)))
y = pd.Series(np.random.randint(NUM_OP_CLASSES, size=N), dtype="category")

criteria = 'information_gain'
tree = DecisionTree(criterion=criteria, max_depth=1)
Classifier_AB = AdaBoostClassifier(base_estimator=tree,
                                   n_estimators=n_estimators)
Classifier_AB.fit(X, y)
y_hat = Classifier_AB.predict(X)
Classifier_AB.plot()
# #[fig1, fig2] = Classifier_AB.plot()
print('Criteria :', criteria)
print('Accuracy: ', accuracy(y_hat, y))
print()
for cls in y.unique():
    print('Category: ', cls)
    print('Precision: ', precision(y_hat, y, cls))
    print('Recall: ', recall(y_hat, y, cls))
    print()
print('Criteria :', criteria)
print('Accuracy: ', accuracy(yhat, y))
for cls in y.unique():
    print("***Class :" + str(cls) + "***")
    print('Precision: ', precision(yhat, y, cls))
    print('Recall: ', recall(yhat, y, cls))

print("-----------------------------------------------------------")
print("Adaboost on random data")
print("-----------------------------------------------------------")

Classifier_AB = AdaBoostClassifier(base_estimator=tree,
                                   n_estimators=n_estimators)
Classifier_AB.fit(X, y)
y_hat = Classifier_AB.predict(X)
fig1, fig2 = Classifier_AB.plot()
print('Criteria :', criteria)
print('Accuracy: ', accuracy(y_hat, y))
for cls in y.unique():
    print("***Class :" + str(cls) + "***")
    print('Precision: ', precision(y_hat, y, cls))
    print('Recall: ', recall(y_hat, y, cls))

##### AdaBoostClassifier on Iris data set using the entire data set with sepal width and petal width as the two features
X = pd.read_csv("iris.data")
#
a = []
for i in range(5):
    if i == 4:
        a.append(X.columns[i])
    else:
示例#5
0
########### AdaBoostClassifier on Real Input and Discrete Output ###################
N = 30
P = 2
NUM_OP_CLASSES = 2
n_estimators = 5
X = pd.DataFrame(np.abs(np.random.randn(N, P)))
y = pd.Series(np.random.randint(NUM_OP_CLASSES, size=N), dtype="category")
y = y.cat.rename_categories([-1, 1])  # Changing 0 to -1 for adaboost

criteria = 'entropy'
tree = DecisionTreeClassifier
Classifier_AB = AdaBoostClassifier(base_estimator=tree,
                                   n_estimators=n_estimators)
Classifier_AB.fit(X, y)
y_hat = Classifier_AB.predict(X)
[fig1, fig2] = Classifier_AB.plot(X, y)
print('Criteria :', criteria)
print('Accuracy: ', accuracy(y_hat, y))
for cls in y.unique():
    print('Precision: ', precision(y_hat, y, cls))
    print('Recall: ', recall(y_hat, y, cls))

# AdaBoostClassifier on Iris data set using the entire data set with sepal width and petal width as the two features
split = 0.6

iris = pd.read_csv(os.path.join("data", "iris.csv"))
iris["variety"] = iris["variety"].astype("category")
shuffled = iris.sample(frac=1).reset_index(drop=True)

X = shuffled.iloc[:, :-1].squeeze()
X = X[['sepal.width', "petal.width"]]