Пример #1
0
 def test_adaboost(self):
     train_X,train_y,test_X,test_y = loadHorseColic()
     adaboost = AdaBoostClassifier()
     adaboost.fit(train_X,train_y)
     preds = adaboost.predict(test_X)
     print(accuracy_score(preds,test_y))
     assert accuracy_score(preds,test_y)>0.7
Пример #2
0
def train(X_train, y_train):
    clf = AdaBoostClassifier(DecisionTreeClassifier(max_depth=1),
                             n_weakers_limit=20)
    print("Training a AdaBoost Classifier.")
    clf.fit(X_train, y_train)
    # If model directories don't exist, create them
    if not os.path.isdir(os.path.split(adb_model_path)[0]):
        os.makedirs(os.path.split(adb_model_path)[0])
    clf.save(clf, adb_model_path)
Пример #3
0
def process_boost():
    x_train, y_train, x_valid, y_valid = load_and_split()
    n_weakers_limit = 20
    adaBoost = AdaBoostClassifier(DecisionTreeClassifier, n_weakers_limit)
    adaBoost.fit(x_train, y_train)
    # 测试集预测
    predict_list = adaBoost.predict(x_valid)
    target_names = ['face', 'non_face']
    report = classification_report(y_valid,
                                   predict_list,
                                   target_names=target_names)
    with open("D:/testing/python/classifier_report.txt", "w") as f:
        f.write(report)
Пример #4
0
def test_breast_cancer():
    clf = AdaBoostClassifier(n_weakers_limit=50)
    X, y = load_breast_cancer(True)
    X_train, X_test, y_train, y_test = train_test_split(
        X, y, test_size=0.33, random_state=42)

    clf.fit(X_train, y_train)
    y_pred = clf.predict(X_test)
    print(classification_report(y_test, y_pred))

    skclf = SkAdaBoostClassifier()
    skclf.fit(X_train, y_train)
    print(classification_report(y_test, skclf.predict(X_test)))
Пример #5
0
    def train(train_X, train_y):
        weak_classifier = DecisionTreeClassifier(max_depth=3)
        ada = AdaBoostClassifier(weak_classifier, 5)
        ada.fit(train_X, train_y)
        result = ada.predict(train_X)
        diff = np.abs(result - train_y)
        diff[diff > ep] = 1
        t = np.sum(diff)
        print("错误预测的个数为: ", t)
        target_names = ['人脸', '非人脸']
        report = (classification_report(train_y,
                                        result,
                                        target_names=target_names))

        re_path = "/home/sun/ComputerScience/MachineLearning/Experiments/Experiment_three/ML2017-lab-03/report.txt"
        write_report(re_path, report)
        return ada
Пример #6
0
 def fit(self, X, y):
     # clear the adaboost container
     self.adaboosts.clear()
     # data preprocess
     y_face_nonface = Hierarchy_Adaboost.seperate_face_nonface(y)
     X_male_female, y_male_female = Hierarchy_Adaboost.extract_male_female(
         X, y)
     X_animal_object, y_animal_object = Hierarchy_Adaboost.extract_animal_object(
         X, y)
     # initialize a decision tree classifier
     dt = DecisionTreeClassifier(max_depth=4)
     # train an adaboost for each different situation
     # adaboost for classifying face images and nonface images
     print("train adaboost_face_nonface")
     adaboost_face_nonface = AdaBoostClassifier(dt, self.maximum_weakers)
     adaboost_face_nonface.fit(X, y_face_nonface)
     self.adaboosts.append(adaboost_face_nonface)
     # adaboost for classifying male images and female images
     print("train adaboost_male_female")
     adaboost_male_female = AdaBoostClassifier(dt, self.maximum_weakers)
     adaboost_male_female.fit(X_male_female, y_male_female)
     self.adaboosts.append(adaboost_male_female)
     # adaboost for classifying animal images and object images
     print("train adaboost_animal_object")
     adaboost_animal_object = AdaBoostClassifier(dt, self.maximum_weakers)
     adaboost_animal_object.fit(X_animal_object, y_animal_object)
     self.adaboosts.append(adaboost_animal_object)
Пример #7
0
def test_image():
    path = 'datasets/original/'
    face = io.imread_collection(path + 'face/*.jpg')
    nonface = io.imread_collection(path + 'nonface/*.jpg')
    labels = ['face', 'nonface']

    X = []
    y = []

    # face_list = [get_features(i) for i in face]
    # nonface_list = [get_features(i) for i in nonface]
    # face_list = Parallel(n_jobs=4)(delayed(get_features)(i) for i in face)
    # nonface_list = Parallel(n_jobs=4)(
    #     delayed(get_features)(i) for i in nonface)

    # X += face_list
    # y += list(np.zeros(len(face_list), dtype=int))
    # X += nonface_list
    # y += list(np.ones(len(nonface_list), dtype=int))

    # AdaBoostClassifier.save(X, 'X.pkl')
    # AdaBoostClassifier.save(y, 'y.pkl')
    X = AdaBoostClassifier.load('X.pkl')
    y = AdaBoostClassifier.load('y.pkl')

    X_train, X_test, y_train, y_test = train_test_split(
        np.array(X), np.array(y), test_size=0.33, random_state=42)

    print('start training')

    clf = AdaBoostClassifier(n_weakers_limit=50)
    clf.fit(X_train, y_train)
    y_pred = clf.predict(X_test)

    with open('report.txt', 'w') as f:
        print(classification_report(y_test, y_pred, target_names=labels), file=f)
Пример #8
0
def test_xor():
    X_train = np.array([
        [1, 1],
        [1, 0],
        [0, 1],
        [0, 0]
    ])

    y_train = np.array([
        0,
        1,
        1,
        0
    ])

    clf = AdaBoostClassifier(n_weakers_limit=1000)

    clf.fit(X_train, y_train)
    y_pred = clf.predict(X_train)
    print(classification_report(y_train, y_pred))

    skclf = SkAdaBoostClassifier()
    skclf.fit(X_train, y_train)
    print(classification_report(y_train, skclf.predict(X_train)))
                         str(i).rjust(3, '0') + ".jpg")
        img = cv2.resize(img, dsize=(24, 24))
        img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)  # 转换了灰度化
        npd = NPDFeature(img)
        x.append(npd.extract())
        y.append(1)
    for i in tqdm(range(500)):
        img = cv2.imread("./datasets/original/nonface/nonface_" +
                         str(i).rjust(3, '0') + ".jpg")
        img = cv2.resize(img, dsize=(24, 24))
        img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)  # 转换了灰度化
        npd = NPDFeature(img)
        x.append(npd.extract())
        y.append(-1)
    x_train, x_val, y_train, y_val = train_test_split(x, y, test_size=0.2)

    print('begin train data')
    ada = AdaBoostClassifier()
    ada.fit(x_train, y_train)
    y_predict = ada.predict(x_val, threshold=0)
    print(
        classification_report(y_val,
                              y_predict,
                              target_names=["nonface", "face"],
                              digits=4))
    with open("report.txt", "w") as f:
        f.write(
            classification_report(y_val,
                                  y_predict,
                                  target_names=["nonface", "face"],
                                  digits=4))
Пример #10
0

def split_dataset(dataset, train_ratio=0.8):
    """
    :return: X_train, y_train, X_valid, y_valid
    """
    pivot = int(2 * SAMPLES_N * train_ratio)
    train_set = dataset[0][:pivot], dataset[1][:pivot]
    valid_set = dataset[0][pivot:], dataset[1][pivot:]
    return train_set + valid_set


if __name__ == "__main__":
    X_train, y_train, X_valid, y_valid = split_dataset(load_dataset())

    adaBoost = AdaBoostClassifier(DecisionTreeClassifier, WEAKERS_LIMIT)
    accs = adaBoost.fit(X_train, y_train, X_valid, y_valid)

    plt.figure(figsize=[8, 5])
    plt.title('Accuracy')
    plt.xlabel('Num of weak classifiers')
    plt.ylabel('Accuracy')
    plt.plot(accs[0], '--', c='b', linewidth=3, label='train')
    plt.plot(accs[1], c='r', linewidth=3, label='valid')
    plt.legend()
    plt.grid()
    plt.savefig('AdaBoost-accuracy.png')
    plt.show()

    AdaBoostClassifier.save(adaBoost, 'AdaBoost-Model.pkl')
Пример #11
0
        X.append(NPDFeature(np.array(img)).extract())
        y.append(-1)
        print(i)

    X_train, X_test, y_train, y_test = train_test_split(X,
                                                        y,
                                                        test_size=0.33,
                                                        random_state=42)
    ada = AdaBoostClassifier(DecisionTreeClassifier, 20)

    X_train = np.array(X_train)
    X_test = np.array(X_test)
    y_train = np.array(y_train)
    y_test = np.array(y_test)
    y_train = y_train.reshape((y_train.shape[0], 1))
    y_test = y_test.reshape((y_test.shape[0], 1))

    ada.fit(X_train, y_train)
    h = ada.predict(X_test)
    yes = 0
    no = 0
    for i in range(0, len(h)):
        if (h[i] == y_test[i]): yes += 1
        if (h[i] != y_test[i]): no += 1
    print(yes, "   ", no)

    report = classification_report(y_test, h, target_names=["nonface", "face"])

    file = open('report.txt', 'w')
    file.write(report)
    file.close()
Пример #12
0
        Data = AdaBoostClassifier.load('feature.data')
    else :
        Data = pre_process()

    #将X_data与y_data分开
    X_data,y_data = Data[:,:-1],Data[:,-1]

    #切分训练集与验证集
    X_train,X_test,y_train,y_test = train_test_split(X_data,y_data,test_size=0.3,random_state=10)

    print(len(y_train),len(y_test))

    #进行AdaBoost训练
    mode = tree.DecisionTreeClassifier(max_depth=1)
    adaboost=AdaBoostClassifier(mode,20)
    adaboost.fit(X_train,y_train)

    #得到预测结果
    y_predict=adaboost.predict(X_test)

    #输出正确率
    count=0
    for i in range(len(y_test)):
        if y_test[i]==y_predict[i]:
            count=count+1
    target_names = ['1', '-1']
    print(count/len(y_test))

    #调用classification_report获得预测结果
    report=classification_report(y_test, y_predict, target_names=target_names)
Пример #13
0
        face_im=face_im.convert('L')#灰度化
        face_im=face_im.resize((24,24)) #缩小尺寸
        nonface_im=nonface_im.convert('L')
        nonface_im=nonface_im.resize((24,24)) 
        face.append(np.array(face_im))#转为ndarray
        nonface.append(np.array(nonface_im))
    feature_face=[]
    feature_nonface=[]
    for i in range(500) :
        feature_face.append(NPDFeature(face[i]).extract())
        feature_nonface.append(NPDFeature(nonface[i]).extract())
    # #缓存特征
    # AdaBoostClassifier.save(feature_face,'feature_face')
    # AdaBoostClassifier.save(feature_nonface,'feature_nonface')
    # #读取缓存的特征
    # feature_face=np.array(AdaBoostClassifier.load('feature_face'))
    # feature_nonface=np.array(AdaBoostClassifier.load('feature_nonface'))

    data=np.row_stack((feature_face,feature_nonface))
    label=np.concatenate((np.ones(500),-np.ones(500)))
    X_train,X_validation,y_train,y_validation=train_test_split(data,label,test_size=0.3,random_state=1000)
    #Adaboost 20个分类器,每个决策树只有一个节点
    model=AdaBoostClassifier(DecisionTreeClassifier,20)
    model.fit(X_train,y_train)#训练模型
    y_pre=model.predict(X_validation)#预测
    with open('report.txt',mode='w') as f:
        f.write(classification_report(y_pre,y_validation))
    #单个分类器
    model=DecisionTreeClassifier(max_depth=1).fit(X_train,y_train)
    y_pre=model.predict(X_validation)
    print(classification_report(y_pre,y_validation))
Пример #14
0
    if os.path.exists("./feature.txt") is False:
        get_data()

    X_trainsize = 800  #        训练集大小     (<1000)
    max_depth = 3
    num_of_weakers = 20
    Accuracytrainlist = []
    Accuracytestlist = []

    dataset = load_data(X_trainsize=X_trainsize)
    X_train, y_train, X_test, y_test = dataset[0], dataset[1], dataset[
        2], dataset[3]

    clf = DecisionTreeClassifier(max_depth=max_depth, min_samples_leaf=1)
    adaboost = AdaBoostClassifier(clf, n_weakers_limit=num_of_weakers)
    Accuracytrainlist, Accuracytestlist = adaboost.fit(X_train, y_train,
                                                       X_test, y_test)
    print(Accuracy(adaboost.predict(X_test), y_test))
    print(
        classification_report(y_train,
                              adaboost.predict(X_train),
                              target_names=['non-face',
                                            'face']))  #training_result
    print(
        classification_report(y_test,
                              adaboost.predict(X_test),
                              target_names=['non-face', 'face']))  #test_result

    plt.plot(Accuracytrainlist, 'black', label='Adaboost_train')
    plt.plot(Accuracytestlist, 'blue', label='Adaboost_test')
    plt.title('training Accuracy and test Accuracy')
    #plt.yscale('log')
Пример #15
0
    np.random.shuffle(features_dataset)
    np.save("./datasets/extract_features", features_dataset)


if __name__ == "__main__":
    get_original_features("face")
    get_original_features("nonface")
    face_features = np.load("./datasets/face_features.npy")
    nonface_features = np.load("./datasets/nonface_features.npy")
    get_features_dataset(face_features, nonface_features)
    features_dataset = np.load("./datasets/extract_features.npy")
    print(features_dataset.shape, face_features.shape, nonface_features.shape)
    num_face_feature = features_dataset.shape[1] - 1

    training_size = 800
    X_train = features_dataset[:training_size, :num_face_feature]
    X_validation = features_dataset[training_size:, :num_face_feature]

    y_train = features_dataset[:training_size, -1]
    y_validation = features_dataset[training_size:, -1]
    # print(X_train.shape,y_train.shape,X_validation.shape,y_validation.shape)
    adaboost_classifier = AdaBoostClassifier(
        DecisionTreeClassifier(max_depth=4), 5)
    pred_y = adaboost_classifier.fit(X_train, y_train).predict(X_validation)

    with open("report.txt", "wb") as f:
        report = classification_report(y_validation,
                                       pred_y,
                                       target_names=["nonface", "face"])
        f.write(report.encode())
Пример #16
0
                                                            test_size=0.2,
                                                            random_state=24)
        output = open(datafile, 'wb')
        pickle.dump(X_train, output)
        pickle.dump(X_vali, output)
        pickle.dump(y_train, output)
        pickle.dump(y_vali, output)
        output.close()

    #create adaboost/weak classifier
    dtc = DecisionTreeClassifier(random_state=0,
                                 max_depth=3,
                                 max_features="sqrt")
    classifier = AdaBoostClassifier(dtc, 15)
    #train classifiers
    classifier.fit(X_train, y_train)
    dtc.fit(X_train, y_train)
    #do prediction
    result = classifier.predict(X_vali)
    weakresult = dtc.predict(X_vali)

    #calculate predicting accuracy for both
    adacount = 0
    weakcount = 0
    for i in range(0, result.shape[0]):
        if (np.abs(result[i] - 1) < np.abs(result[i] + 1)):
            result[i] = 1
        else:
            result[i] = -1
        if result[i] == y_vali[i]:
            adacount = adacount + 1
Пример #17
0
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from ensemble import AdaBoostClassifier
#这里是直接读取灰度图,灰度图在original文件夹里面
path1=[os.path.join('G:\\Users\\qqqqqq1997520\\Desktop\\original\\face\\face',f) for f in os.listdir('G:\\Users\\qqqqqq1997520\\Desktop\\original\\face\\face')]
path2 = [os.path.join('G:\\Users\\qqqqqq1997520\\Desktop\\original\\face\\nonface',f) for f in os.listdir('G:\\Users\\qqqqqq1997520\\Desktop\\original\\face\\nonface')]
ABC=AdaBoostClassifier(DecisionTreeClassifier(), 1)
im=[0 for i in range(1000)]
for i in range(500):
    im[i]=plt.imread(path1[i])
for i in arange(500,1000):
    im[i]=plt.imread(path2[(i%500)])
_feature=[0 for i in range(1000)]
for i in range(1000):
    feature=NPDFeature(im[i])
    _feature[i]=feature.extract()
    
feature_data=array(_feature)
y=[1 for i in range(1000)]
for i in range(500,1000):
    y[i]=-1;
y=array(y)
X_train, X_vali, y_train, y_vali = train_test_split(feature_data, y, test_size=0.3, random_state=37)

ABC=AdaBoostClassifier(DecisionTreeClassifier(),20)
ABC.fit(X_train,y_train)
predict=ABC.predict(X_vali,0)
classification_name=["Y","N"]
with open('report.txt', 'w') as f:
    f.write(classification_report(y_vali, predict, target_names=classification_name))
Пример #18
0
    y_test = y_test.reshape(75, 1)
    return y_train, y_test


def acc(y_test, y_preds):
    for n, y in enumerate(y_preds):
        if y > 0:
            y_preds[n] = 1
        if y <= 0:
            y_preds[n] = -1
    num = 0
    for z in zip(y_preds, y_test):
        if int(z[0]) == int(z[1][0]):
            num = num + 1
    print('arr:', num / len(y_test))


if __name__ == "__main__":
    X = trainX()
    X_test = testX()
    y_train, y_test = dataY()
    clf = tree.DecisionTreeClassifier(max_depth=50,
                                      min_samples_leaf=50,
                                      random_state=30,
                                      criterion='gini')
    gbdt = AdaBoostClassifier(clf, 10)
    gbdt.fit(X, y_train)
    y_preds = gbdt.predict(X_test)
    # y_preds
    acc(y_test, y_preds)
Пример #19
0
    else:
        path_face = './datasets/original/face/'
        path_non_face = './datasets/original/nonface/'
        for i in os.listdir(path_face):
            features = processImage(Image.open(path_face + i))
            X.append(features)
            y.append(1)

        for i in os.listdir(path_non_face):
            features = processImage(Image.open(path_non_face + i))
            X.append(features)
            y.append(0)
        with open("data_x", "wb") as f:
            pickle.dump(X, f)
        with open("data_y", "wb") as f:
            pickle.dump(y, f)
    return X, y


if __name__ == "__main__":

    X, y = getData()
    X_train, X_val, y_train, y_val = train_test_split(X,
                                                      y,
                                                      test_size=0.2,
                                                      shuffle=True)
    clf = AdaBoostClassifier(DecisionTreeClassifier, 10)
    clf.fit(X_train, y_train)
    predict = clf.predict(X_val)
    print(classification_report(y_val, predict))
Пример #20
0
features_face_array = np.array(features_face)[0:n_limit]

n_pos_sample = features_face_array.shape[0]
n_feature = features_face_array.shape[1]

features_nonface = load('features_nonface')
features_nonface_array = np.array(features_nonface)[0:n_limit]
n_neg_sample = features_nonface_array.shape[0]

X = np.concatenate((features_face_array, features_nonface_array), axis=0)

y = np.array([1] * n_pos_sample + [-1] * n_neg_sample)

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)

# model = dt()
# model.fit(X_train,y_train)
# s = model.score(X_test,y_test)
# print(s)

Ada = AdaBoostClassifier(dt, 10)
Ada.fit(X_train, y_train)
pred = Ada.predict(X_test)
acc = accuracy_score(pred, y_test)
print('acc:', acc)

f = open('report.txt', 'w')
content = classification_report(pred, y_test)
f.write(content)
f.close()
Пример #21
0
    trainLabel.extend(nonfaceLabel)
    trainLabel.extend(faceLabel)
    X_train, X_test, Y_train, Y_test = train_test_split(trainImage, trainLabel, test_size = 0.33)
    return np.array(X_train), np.array(X_test), np.array(Y_train), np.array(Y_test)


if __name__ == "__main__":
    # write your code here
    #dataProcess()
    trainData, testData, trainLabel, testLabel = divideData('./dataFeature/face', './dataFeature/nonface')
    clf = DecisionTreeClassifier(max_depth=2)
    row, col = trainData.shape
    weightArray = [(1 / row)] * row
    clf.fit(trainData, trainLabel, weightArray)
    predictY = clf.predict(testData)
    rate = calculateAcc(predictY, testLabel)
    print(rate)
    target_name = ['class-1', 'class1']
    with open("./report.txt", 'w') as fp:
        fp.write(classification_report(testLabel, predictY, target_names=target_name))
    print(classification_report(testLabel, predictY, target_names=target_name))
    clf2 = AdaBoostClassifier(DecisionTreeClassifier, 20)
    clf2.fit(trainData, trainLabel)
    predictY2 = clf2.predict(testData)
    rate = calculateAcc(predictY2, testLabel)
    print(rate)
    target_name = ['class-1', 'class1']
    with open("./report1.txt", 'w') as fp:
        fp.write(classification_report(testLabel, predictY2, target_names=target_name))
    print(classification_report(testLabel, predictY2, target_names=target_name))
Пример #22
0
    file1_size = len(file1)
    file2 = load('nonface.txt').tolist()
    file2_size = len(file2)
    X = file1 + file2
    #构造结果值Y
    y_face = np.ones(file1_size).tolist()
    y_nonface = (-1 * np.ones(file2_size)).tolist()
    y = y_face + y_nonface

    #划分训练集和验证集
    x_train, x_test, y_train, y_test = train_test_split(X,
                                                        y,
                                                        test_size=0.33,
                                                        random_state=1)
    classifier = tree.DecisionTreeClassifier
    adaBoostClassifier = AdaBoostClassifier(classifier, 20)
    adaBoostClassifier.fit(x_train, y_train)

    y_true = y_test
    y_pred = adaBoostClassifier.predict(x_test, y_test)
    target_names = ['nonface', 'face']
    print(classification_report(y_true, y_pred, target_names=target_names))
    '''
    weight = np.ones(len(x_train)).tolist()
    
    #test = em.AdaBoostClassifier(classifier, 2)
    classifier.fit(x_train,y_train,sample_weight =weight )
    s = classifier.score(x_test,y_test)
    print(s)
    '''
Пример #23
0
        dataset = np.array(samples)
        with open('tmp.pkl', 'wb') as output:
            pickle.dump(dataset, output, True)

    with open('tmp.pkl', 'rb') as input:
        dataset = pickle.load(input)
        print(dataset.shape)
    # 将数据集切分为训练集和验证集
    X_train = dataset[:dataset.shape[0] * 3 // 4, :dataset.shape[1] - 1]
    y_train = dataset[:dataset.shape[0] * 3 // 4, dataset.shape[1] - 1]
    X_validation = dataset[dataset.shape[0] * 3 // 4:, :dataset.shape[1] - 1]
    y_validation = dataset[dataset.shape[0] * 3 // 4:, dataset.shape[1] - 1]
    return X_train, X_validation, y_train, y_validation


if __name__ == "__main__":
    X_train, X_validation, y_train, y_validation = loadDataSet()
    abc = AdaBoostClassifier(DecisionTreeClassifier, 20)
    abc.fit(X_train, y_train)
    final_pre_y = abc.predict(X_validation)
    error = 0
    for i in range(final_pre_y.shape[0]):
        if final_pre_y[i] != y_validation[i]:
            error = error + 1
    accuracy = 1 - error / y_validation.shape[0]
    print('accuracy: %f' % accuracy)
    target_names = ['face', 'nonface']
    report = classification_report(y_validation, final_pre_y, target_names=target_names)
    print(report)
    with open('report.txt', 'w') as f:
        f.write(report)
Пример #24
0
            img = Image.open(nonfaces_path[i])
            img = img.convert('L').resize((24, 24))
            nf = NPDFeature(np.array(img))
            train[i * 2 + 1] = nf.extract()
        AdaBoostClassifier.save(train, 'train.txt')

    try:
        X = AdaBoostClassifier.load("train.txt")
    except IOError:
        Feature_extract()
        X = AdaBoostClassifier.load("train.txt")

    Y = np.zeros((1000, 1))
    for i in range(1000):
        Y[i] = (i + 1) % 2
    Y = np.where(Y > 0, 1, -1)

    X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.2)

    booster = AdaBoostClassifier(DecisionTreeClassifier, 15)
    booster.fit(X_train, Y_train)
    predict = booster.predict(X_test)
    wrong_count = 0
    for j in range(predict.shape[0]):
        if predict[j] != Y_test[j]:
            wrong_count += 1
    AdaBoostClassifier.save(classification_report(Y_test, predict),
                            "classifier_report.txt")
    pass
Пример #25
0
def preprocess():
    x, y = to_gray_resize('datasets/original/face/')
    x, y = to_gray_resize('datasets/original/nonface/', x, y)

    #write binary
    with open('datasets/features/feature', 'wb') as file:
        pickle.dump(x, file)
    with open('datasets/features/label', 'wb') as file:
        pickle.dump(y, file)
    print(x.shape, y.shape)


if __name__ == "__main__":
    print('loading data...')
    # preprocess()
    with open('datasets/features/feature', 'rb') as file:
        x = pickle.load(file)
    with open('datasets/features/label', 'rb') as file:
        y = pickle.load(file)

    X_train, X_test, y_train, y_test = train_test_split(x,
                                                        y,
                                                        test_size=0.25,
                                                        random_state=42)
    print(X_train.shape, X_test.shape, y_train.shape, y_test.shape)

    print('start training...')
    ada_clf = AdaBoostClassifier(DecisionTreeClassifier(max_depth=3), 10)
    ada_clf.fit(X_train, y_train, X_test, y_test)
    ada_clf.plotting()
Пример #26
0
        label.append(-1)

if __name__ == "__main__":
    # write your code here  
    
    readimg()
    train_feature,validation_feature,train_label,validation_label=train_test_split(feature,label,test_size=0.3)
    #adaboost
    adaboostClassifier=AdaBoostClassifier(DecisionTreeClassifier(max_depth = 1, random_state = 1),20)
    
    adaboostClassifier.save(train_feature,'train_feature')
    adaboostClassifier.save(train_label,'train_label')
    adaboostClassifier.save(validation_feature,'validation_feature')
    adaboostClassifier.save(validation_label,'validation_label')

    adaboostClassifier.fit(train_feature,train_label)
    adaboostClassifier.drawPic()
    

    '''
    #debug
    adaboostClassifier=AdaBoostClassifier(DecisionTreeClassifier(max_depth = 1, random_state = 1),20)
    train_feature=adaboostClassifier.load("train_feature")
    train_label=adaboostClassifier.load("train_label")
    adaboostClassifier.fit(train_feature,train_label)
    adaboostClassifier.drawPic()
    '''
    


Пример #27
0
                                                      shuffle=True)
    print('X_train.shape', X_train.shape)
    print('X_val.shape', X_val.shape)
    print('y_train.shape', y_train.shape)
    print('y_val.shape', y_val.shape)

    # 尝试使用不同深度的决策树,不同数量的决策树来进行建模和预测
    result = []
    max_depth = 4
    max_num_tree = 10
    for depth in range(1, max_depth + 1):
        result_item = []
        for num_tree in range(1, max_num_tree + 1):
            adaboostclassifier = AdaBoostClassifier(
                DecisionTreeClassifier(max_depth=depth), num_tree)
            adaboostclassifier.fit(X_train, y_train)
            pre_label = adaboostclassifier.predict(X_val)
            correct = [1 if a == b else 0 for (a, b) in zip(pre_label, y_val)]
            accurary = sum(correct) / len(correct)
            result_item.append(accurary * 100)

            report = classification_report(y_val,
                                           pre_label,
                                           labels=[-1, 1],
                                           target_names=["face", "nonface"])
            model_num = (depth - 1) * 10 + num_tree
            with open('report.txt', 'a') as f:
                f.write('\nmodel ' + str(model_num) + ':\n')
                f.write('number of decision tree:' + str(num_tree) + '\n')
                f.write('max_depth of decision tree:' + str(depth) + '\n')
                f.write(report)
Пример #28
0
        # train_y.append(-1)

    clf = AdaBoostClassifier(tree.DecisionTreeClassifier, 10)

    #cal NPD
    for i in range(num_face + num_nonface):
        NPD.append(NPDFeature(imgs[i]).extract())

    #save NPD
    # clf.save(NPD,'output')

    #load NPD
    # NPD=np.array(clf.load('output'))

    train_x = np.row_stack((NPD[0:100], NPD[500:600]))
    train_y = np.append(np.ones((1, 100)), np.linspace(-1, -1, 100))
    test_x = np.row_stack((NPD[200:300], NPD[700:800]))
    test_y = np.append(np.ones((1, 100)), np.linspace(-1, -1, 100))

    clf.fit(train_x, train_y)
    y = clf.predict(test_x)

    hit = 0
    for i in range(test_x.shape[0]):
        if (y[i] == test_y[i]):
            hit += 1
    print("hit rate:", hit / test_x.shape[0])

    with open('report.txt', 'w') as f:
        f.write(classification_report(test_y, y))
Пример #29
0
    # 数据集加标签,并划分训练集,验证集
    Data = loadData("data")
    label = np.ones(1000)
    label[500:] = -1
    train_x, train_y, validation_x, validation_y = split(Data, label, 0.4)
    saveData("train", train_x)
    saveData("label", train_y)
    saveData("validation", validation_x)
    saveData("target", validation_y)

    train = loadData("train")
    train_x = np.array(train)
    label = loadData("label")
    train_y = np.array(label)
    validation = loadData("validation")
    test_x = np.array(validation)
    target = loadData("target")
    test_y = np.array(target)
    weakClassifier = DecisionTreeClassifier(max_depth=3)
    cls = AdaBoostClassifier(weakClassifier, num_classifier)
    cls = cls.fit(train_x, train_y)
    result_adaboost = cls.predict(test_x, 0)
    print('adaboost result: ', result_adaboost)
    print('accuracy: ', validate_result(result_adaboost, test_y))
    target_names = {'nonface', 'face'}
    output = open('report.txt', 'w')
    output.write(
        classification_report(test_y,
                              result_adaboost,
                              target_names=target_names))
Пример #30
0
    plt.xlabel('Iteration')
    plt.ylabel('Accuracy')
    plt.plot(range(len(validation_score_list)), validation_score_list)
    #plt.grid()
    plt.show()


if __name__ == "__main__":

    # pre_image()
    with open('features', "rb") as f:
        x = pickle.load(f)
    with open('labels', "rb") as f:
        y = pickle.load(f)
    X_train, X_test, y_train, y_test = train_test_split(x,
                                                        y,
                                                        test_size=0.33,
                                                        random_state=0)
    maxIteration = 10
    s = AdaBoostClassifier(DecisionTreeClassifier(max_depth=3), maxIteration)
    s.fit(X_train, y_train)
    predict_y = s.predict(X_test)

    # acc_plot(validation_score_list)

    with open('report.txt', "wb") as f:
        report = classification_report(y_test,
                                       predict_y,
                                       target_names=["face", "nonface"])
        f.write(report.encode())