from sklearn.datasets import make_moons
from ensemble.bagging import BaggingClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.svm import SVC

# 导入数据
X, y = make_moons(n_samples=500, noise=0.30, random_state=42)
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=42)

# base_estimator: 基学习器
# n_estimators: 基学习器的个数
# max_samples:每个基学习器中的样本数,如果是整形,则就是样本个数;如果是float,则是样本个数占所有训练集样本个数的比例
# bootstrap: 是否采用有放回抽样(bagging),为True表示采用,否则为pasting。默认为True
bag_clf = BaggingClassifier(
    DecisionTreeClassifier(),
    n_estimators=500,
    sample_rate=0.2,
    bootstrap=True,
)

bag_clf.fit(X_train, y_train)
score = bag_clf.score(X_test, y_test)
print(score)
# dcf = bag_clf.decision_function(X_test)
# print(dcf)

###############可视化的代码####################

import matplotlib.pyplot as plt
from matplotlib.colors import ListedColormap
from sklearn.tree import DecisionTreeClassifier
import numpy as np
示例#2
0
    model.fit(train_loader)
    toc = time.time()
    training_time = toc - tic

    tic = time.time()
    testing_acc = model.predict(test_loader)
    toc = time.time()
    evaluating_time = toc - tic

    records.append(
        ('VotingClassifier', training_time, evaluating_time, testing_acc))

    # BaggingClassifier
    model = BaggingClassifier(estimator=LeNet5,
                              n_estimators=n_estimators,
                              output_dim=output_dim,
                              lr=lr,
                              weight_decay=weight_decay,
                              epochs=epochs)

    tic = time.time()
    model.fit(train_loader)
    toc = time.time()
    training_time = toc - tic

    tic = time.time()
    testing_acc = model.predict(test_loader)
    toc = time.time()
    evaluating_time = toc - tic

    records.append(
        ('BaggingClassifier', training_time, evaluating_time, testing_acc))
from ensemble.bagging import BaggingClassifier
from tree.base import DecisionTree
# Or use sklearn decision tree
from linearRegression.linearRegression import LinearRegression

########### BaggingClassifier ###################

N = 30
P = 2
NUM_OP_CLASSES = 2
n_estimators = 3
X = pd.DataFrame(np.abs(np.random.randn(N, P)))
y = pd.Series(np.random.randint(NUM_OP_CLASSES, size=N), dtype="category")

criteria = 'information_gain'
tree = DecisionTree(criterion=criteria)
Classifier_B = BaggingClassifier(base_estimator=tree,
                                 n_estimators=n_estimators)
Classifier_B.fit(X, y)
y_hat = Classifier_B.predict(X)
#[fig1, fig2] =
Classifier_B.plot()
print('Criteria :', criteria)
print('Accuracy: ', accuracy(y_hat, y))
print()
for cls in y.unique():
    print('Class: ', cls)
    print('Precision: ', precision(y_hat, y, cls))
    print('Recall: ', recall(y_hat, y, cls))
    print()
示例#4
0
from ensemble.bagging import BaggingClassifier
from sklearn.datasets import load_iris
from tree.tree import DecisionTreeClassifier

base_classifier = DecisionTreeClassifier(criterion='C4.5')
classifier1 = BaggingClassifier(base_estimator=base_classifier, n_estimators=3)
iris_data = load_iris()
X = iris_data.values[:2, :]
y = iris_data.target
classifier1.fit(X, y)
示例#5
0
from tree.base import DecisionTree

from sklearn.tree import DecisionTreeClassifier
# Or use sklearn decision tree
# from linearRegression.linearRegression import LinearRegression

########### BaggingClassifier ###################

N = 50
P = 2
NUM_OP_CLASSES = 2
n_estimators = 5
X = pd.DataFrame(np.abs(np.random.randn(N, P)))
y = pd.Series(np.random.randint(NUM_OP_CLASSES, size=N), dtype="category")

criteria = 'information_gain'
tree = DecisionTreeClassifier
Classifier_B = BaggingClassifier(
    base_estimator=tree, n_estimators=n_estimators,
    criterion="gini")
Classifier_B.fit(X, y)
y_hat = Classifier_B.predict(X)
[fig1, fig2] = Classifier_B.plot(X, y)
print('Criteria :', criteria)
print('Accuracy: ', accuracy(y_hat, y))
for cls in y.unique():
    print('Precision: ', precision(y_hat, y, cls))
    print('Recall: ', recall(y_hat, y, cls))

print("Plots saved as Q6_Fig1.png and Q6_Fig2.png")
示例#6
0
from linearRegression.linearRegression import LinearRegression

########### BaggingClassifier ###################

N = 30
P = 2
NUM_OP_CLASSES = 2
n_estimators = 6
X = pd.DataFrame(np.abs(np.random.randn(N, P)))
X_copy = X
y = pd.Series(np.random.randint(NUM_OP_CLASSES, size=N), dtype="category")

criteria = 'information_gain'
# tree = DecisionTree(criterion=criteria)

Classifier_B = BaggingClassifier(base_estimator='tree',
                                 n_estimators=n_estimators)
Classifier_B.fit(X, y)
X_copy = X_copy.drop(X_copy.columns[2], axis=1)
y_hat = Classifier_B.predict(X_copy)
# Classifier_B.plot()
# [fig1, fig2] = Classifier_B.plot()
print('Criteria :', criteria)
print('Accuracy: ', accuracy(y_hat, y))
for cls in y.unique():
    print('Precision for', cls, ' : ', precision(y_hat, y, cls))
    print('Recall for', cls, ' : ', recall(y_hat, y, cls))

####################################  Reproducing Slides ###############

# X = []
# for i in range (1,9):