示例#1
0
def ada_boost_experiment():
    examples = ID3.data_parsing(FILE_PATH_TRAIN, numeric_cols)

    # hypothesis = AdaBoost.ada_boost(examples, 5, numeric_cols, missing_identifier)

    # print(hypothesis)

    iterations = 100

    hypothesis = AdaBoost.ada_boost(examples, iterations, numeric_cols,
                                    missing_identifier)

    ada_results_train = AdaBoost.test_ada_boost_hypothesis(
        hypothesis, FILE_PATH_TRAIN, numeric_cols, missing_identifier)
    ada_results_test = AdaBoost.test_ada_boost_hypothesis(
        hypothesis, FILE_PATH_TEST, numeric_cols, missing_identifier)

    # for t in range(iterations):
    #     print("AdaBoost Training Set - t:", t, "results:", ada_results_train[t],
    #           "{0:.2%}".format(1-ada_results_train[t][0]/ada_results_train[t][1]))
    # for t in range(iterations):
    #     print("AdaBoost Testing Set - t:", t, "results:", ada_results_test[t],
    #           "{0:.2%}".format(1-ada_results_test[t][0]/ada_results_test[t][1]))
    # for t in range(iterations):
    #     tree_results = ID3.test_tree(hypothesis[t][0],FILE_PATH_TRAIN, numeric_cols, missing_identifier)
    #     print("Decision Tree Training Set - t:", t, "results:", tree_results,
    #           "{0:.2%}".format(1 - tree_results[0] / tree_results[1]))
    # for t in range(iterations):
    #     tree_results = ID3.test_tree(hypothesis[t][0],FILE_PATH_TEST, numeric_cols, missing_identifier)
    #     print("Decision Tree Test Set - t:", t, "results:", tree_results,
    #           "{0:.2%}".format(1 - tree_results[0] / tree_results[1]))

    ada_train = []
    ada_test = []
    dec_train = []
    dec_test = []

    for t in range(iterations):
        ada_train.append(1 - ada_results_train[t][0] / ada_results_train[t][1])
        ada_test.append(1 - ada_results_test[t][0] / ada_results_test[t][1])
        tree_results = ID3.test_tree(hypothesis[t][0], FILE_PATH_TRAIN,
                                     numeric_cols, missing_identifier)
        dec_train.append(1 - tree_results[0] / tree_results[1])
        tree_results = ID3.test_tree(hypothesis[t][0], FILE_PATH_TEST,
                                     numeric_cols, missing_identifier)
        dec_test.append(1 - tree_results[0] / tree_results[1])

    ada_graph = [
        tuple([ada_train, "AdaBoost Train"]),
        tuple([ada_test, "AdaBoost Test"])
    ]
    GraphUtility.graph(ada_graph, "AdaBoost Data", "Iterations", "Error")

    tree_graph = [
        tuple([dec_train, "Tree Train"]),
        tuple([dec_test, "Tree Test"])
    ]
    GraphUtility.graph(tree_graph, "Decision Tree Data", "Iterations", "Error")
示例#2
0
def detect(image,cascade):

    img_height, img_width = image.shape
    int_img = ii.to_integral_image(image)

    step = 2
    scale = 1.5
    height = 48
    width = 48
    curScale = 2

    faces = []

    count = 0
    correct_face = 0
    while width <= img_width and height <= img_height :
        for x in range(0, np.int(img_width-width), np.int(np.ceil(step*scale))):
            for y in range(0, np.int(img_height-height), np.int(np.ceil(step*scale))):
                for classifiers in cascade:
                    correct_face = ab.ensemble_vote_with_scalar(int_img, classifiers, (x, y), curScale)
                    if correct_face == 0:
                        break

                if correct_face == 1:
                    faces.append([x, y, np.int(np.ceil(width)), np.int(np.ceil(height))])
                    count += 1
        width *= scale
        height *= scale
        curScale *= scale


    print(str(count)+' Faces Detected')

    return faces
示例#3
0
def train(feats, labels, T):
	ab_labels = transform_labels(labels)
	ft = feats.transpose()
	abs = []
	for i in range(30):
		abs.append(AdaBoost(T, ft[i], ab_labels))
		abs[-1].train()
	return abs
示例#4
0
def calc_votes(features, images):
    jumlah_image = len(images)
    jumlah_feature = len(features)

    votes = np.zeros((jumlah_image, jumlah_feature))

    for i in tqdm(range(jumlah_image)):
        for j in range(jumlah_feature):
            votes[i, j] = ab.vote(image=images[i], feature=features[j])

    return votes
示例#5
0
def main():

    print "Init data"
    data = pd.read_csv(sys.argv[1])
    columnas = data.columns.tolist()
    x = columnas[len(columnas) - 1]
    target = data[x]
    y = columnas[0:len(columnas) - 1]
    dataset = data[y]

    AdaBoostObject = AdaBoost.AdaBoost(dataset, target, 3, 'SAMME', 10)
    AdaBoostObject.trainingMethod()

    #curva roc
    #curveRocObject = createRocCurve.curveRoc(dataset, target, AdaBoostObject.model, 10, 'user', 'job', 'path')
    #curveRocObject.createCurveROC()

    #precision-recall curve
    #precisionCurve = createPrecisionRecallCurve.curvePrecision(dataset, target, AdaBoostObject.model, 10, 'user', 'job', 'path')
    #precisionCurve.plot_precision_and_recall_curve()

    #learning curve
    #learningCurveDemo = createLearningCurve.curveLearning(dataset, target, AdaBoostObject.model, 10, 'user', 'job', 'path')
    #learningCurveDemo.createLearningCurve()

    #confusion matrix data
    confusionMatrixDemo = createConfusionMatrix.confusionMatrix(
        dataset, target, AdaBoostObject.model, 10, 'user', 'job', 'path',
        ["Clinical", "No Clinical"])
    confusionMatrixDemo.createConfusionMatrix()

    #bagginObject = Baggin.Baggin(dataset,target,3,10)
    #bagginObject.trainingMethod()
    #bernoulliNB = BernoulliNB.Bernoulli(dataset, target, 10)
    #bernoulliNB.trainingMethod()
    #decisionTreeObject = DecisionTree.DecisionTree(dataset, target, 'entropy', 'best',10)
    #decisionTreeObject.trainingMethod()
    #gaussianObject = GaussianNB.Gaussian(dataset, target, 10)
    #gaussianObject.trainingMethod()
    #gradientObject = Gradient.Gradient(dataset,target,3,10)
    #gradientObject.trainingMethod()
    #knnObect = knn.knn(dataset, target, 2, 'auto', 'minkowski', 10)
    #knnObect.trainingMethod()
    #MLPObject = MLP.MLP(dataset,target,'relu', 'sgd', 'adaptive', 1,1,1,10)
    #MLPObject.trainingMethod()
    #nuSVM = NuSVM.NuSVM(dataset,target,'poly',10)
    #nuSVM.trainingMethod()
    #rf = RandomForest.RandomForest(dataset,target,10,'gini',10)
    #rf.trainingMethod()
    #svm = SVM.SVM(dataset, target, 'poly', 10)
    #svm.trainingMethod()

    return 0
示例#6
0
def cascade_latih(faces_ii_data, non_faces_ii_data, features, level_cascade):
    cascade = []
    start_stage = 44
    banned = True
    path_banned = 'bannen_index_stage43.json'
    if banned:
        features_stg = []
        banned_index = ul.load_banned_index(path_banned)
        for i in range(0, start_stage - 1):
            features_stage = ul.load_database('database_stage' + str(i + 1) +
                                              '.json')
            for fitur in features_stage:
                features_stg.append(fitur)
    else:
        banned_index = []

    images = faces_ii_data + non_faces_ii_data

    votes = calc_votes(features, images)

    print('Mulai pelatihan attentional cascade ...')
    #pilih cascade
    for idx, classifier in enumerate(level_cascade):
        if start_stage == idx + 1:
            print('Begin Training ' + str(idx + 1) + ' layer :...')
            classifiers, banned_index = ab.learn(faces_ii_data,
                                                 non_faces_ii_data, features,
                                                 classifier, votes,
                                                 banned_index)
        else:
            classifiers = ul.load_features(features_stg, classifier, idx + 1)
        cascade.append(classifiers)
        #test classifiers
        # correct_faces = sum(ab.ensemble_vote_all(faces_ii_data, classifiers))
        # correct_non_faces = len(non_faces_ii_data) - sum(ab.ensemble_vote_all(non_faces_ii_data, classifiers))
        # print('Result after ' + str(idx + 1) + ' layer(s):\n     Faces: ' + str(correct_faces) + '/' + str(
        #     len(faces_ii_data))
        #       + '  (' + str((float(correct_faces) / len(faces_ii_data)) * 100) + '%)\n  non-Faces: '
        #       + str(correct_non_faces) + '/' + str(len(non_faces_ii_data)) + '  ('
        #       + str((float(correct_non_faces) / len(non_faces_ii_data)) * 100) + '%)')

        database_stage = []
        for clas in classifiers:
            database_stage.append(clas)

        with open('database_stage' + str(idx + 1) + '.json', 'w') as f:
            json.dump(database_stage, f, default=dumper, indent=4)

        with open('bannen_index_stage' + str(idx + 1) + '.json', 'w') as b:
            json.dump(banned_index, b, default=dumper, indent=4)

    return cascade
def main():
    lines = []
    for i in range(11):
        lines.append(i)
    if sys.argv[1] == "train":
        training_file = sys.argv[2]
        hypothesis_file = sys.argv[3]

        sentences = []

        for line in open(training_file, encoding="utf-8-sig"):
            sentences.append(line.strip())

        sentence_attributes = AttributeCalculator.sentences_to_boolean_attributes(sentences)
        node = Node.Node(sentence_attributes, lines)

        if sys.argv[4] == "dt":
            dt = DecisionTreeTraining(node).train_dt(node.sentences, node.input, 0)
            hypothesis_out = open(hypothesis_file, "wb")
            pickle.dump(dt, hypothesis_out)

        elif sys.argv[4] == "ada":
            ada = AdaBoost.AdaBoostTraining(DecisionTreeTraining, AdaBoostTraining.stump).train_ada(node)
            hypothesis_out = open(hypothesis_file, "wb")
            pickle.dump(ada, hypothesis_out)

    elif sys.argv[1] == "predict":
        hypothesis = sys.argv[2]
        prediction_file = sys.argv[3]
        sentences = []
        hyp = open(hypothesis, "rb")
        model = pickle.load(hyp)
        file = open(prediction_file, encoding="utf-8-sig")

        for line in file:
            line = line.strip()
            sentences.append(line)

        sentences = AttributeCalculator.sentences_to_boolean_attributes_predict(sentences)

        if isinstance(model, Decision):
            for sentence in sentences:
                print(model.get_language(sentence))
        elif isinstance(model, WeightedDecision):
            for sentence in sentences:
                result = "".join(model.predict_language(sentence))
                print(result)
示例#8
0
def cascade_stage(faces_ii_data, non_faces_ii_data, features, banned_index,
                  votes, stage):
    level_cascade = [
        2, 10, 20, 20, 30, 30, 50, 50, 50, 50, 60, 60, 80, 100, 100, 100, 100,
        100, 100, 100, 100, 100, 100, 100, 100, 100, 100
    ]
    database_stage = []
    jum_fitur = level_cascade[stage - 1]

    classifiers, banned = ab.learn(faces_ii_data, non_faces_ii_data, features,
                                   jum_fitur, votes, banned_index)
    for clas in classifiers:
        database_stage.append(clas)
        print(str(clas))

    with open('database_stage' + str(stage) + '.json', 'w') as f:
        json.dump(database_stage, f, default=dumper, indent=4)

    with open('banned_index_stage' + str(stage) + '.json', 'w') as b:
        json.dump(banned, b, default=dumper, indent=4)

    return 1
示例#9
0
def runBoost(X_train, y_train, X_test, y_test, numClassifiers=10):
    '''
    initialize AdaBoost
    '''

    boost = ab.Boost()
    boost.train(X_train, y_train, cNum=numClassifiers)

    # test with training data
    pred_boost_train = boost.predict(X_train)
    error_rate_boost_train = (sum([
        0 if pred == true else 1
        for (pred, true) in zip(y_train, pred_boost_train)
    ]) / float(len(y_train)))

    # now test with remaining data
    pred_boost_test = boost.predict(X_test)
    error_rate_boost_test = (sum([
        0 if pred == true else 1
        for (pred, true) in zip(y_test, pred_boost_test)
    ]) / float(len(y_test)))
    '''
    Add prints for diagnostics and results here:
    '''
    '''print('')
    print('***** RESULTS ADABOOST *****')
    decisions = [c.root.splitCriteria for c in boost.classifiers]
    print('Decision Criteria per trees used: ', decisions)
    decision_columns = [c for c,_ in decisions]
    unique_cols = np.unique(decision_columns)
    col_counts = Counter(decision_columns).most_common()
    print('unique columns and frequency of split columns: ', col_counts)
    print('')

    print('Training Error: ', error_rate_boost_train)
    print('Test Error    : ', error_rate_boost_test)'''

    return error_rate_boost_train, error_rate_boost_test
示例#10
0
def TrainAndTestAdaBoostWeakBinPerceptron():
    if len(digits) > 2:
        print "digits should be 2 digits in the case of Binary Perceptron"
    print "Running the AdaBoost with Weak Binary Perceptron"
    print "the digits it shall descriminate between: " + str(
        digits[0]) + "," + str(digits[1])
    print "the label 1 will be given to the digit " + str(digits[0])
    fullimg, lbl = rn.getData(digits, "training")
    binlbl = rn.BinaryLabels(lbl, digit)
    reducedList = rn.getReductionList(colnum, rownum)
    img = rn.ReduceSetDimension(reducedList, fullimg)
    adaboost = ab.AdaBoost(pr.Perceptron, img, binlbl, Twl,
                           int(len(lbl) * alpha))
    print "Error on the Training data: " + str(
        pr.TestClassifier(img, binlbl, adaboost) * 100.0) + "%"
    print "(out of " + str(len(binlbl)) + " samples)"
    print "Extracting the Testing Data"
    fullimg, lbl = rn.getData(digits, "testing")
    img = rn.ReduceSetDimension(reducedList, fullimg)
    binlbl = rn.BinaryLabels(lbl, digit)
    print "Error on the Testing data: " + str(
        pr.TestClassifier(img, binlbl, adaboost) * 100.0) + "%"
    print "(out of " + str(len(binlbl)) + " samples)"
示例#11
0
from numpy import *
import AdaBoost
datMat, classLabels = AdaBoost.loadSimpData()
# D=mat(ones((5,1))/5)
# print(AdaBoost.buildingStump(datMat,classLabels,D))
classEstArr = AdaBoost.adaBoostTrainDS(datMat, classLabels, 10)
print(AdaBoost.adaClassify([[5, 5], [0, 0]], classEstArr))
示例#12
0
import EXTRAS
import AdaBoost
import numpy

# EXTRAS.plotSimpleData()

# dataMat, labels = AdaBoost.loadSimpleData()

# D = numpy.mat(numpy.ones((5, 1)) / 5)

# bestStump, minError, bestClasEst = AdaBoost.buildStump(dataMat, labels, D)

dataMat, labels = AdaBoost.loadDataSet("E:/TestDatas/MachineLearningInAction/Ch07/horseColicTraining2.txt")

classifierArr, aggClassExt = AdaBoost.adaBoostTrainDS(dataMat, labels, 10)

AdaBoost.plotROC(aggClassExt.T, labels)
def loadDataSet(fileName):      #general function to parse tab -delimited floats
    numFeat = len(open(fileName).readline().split('\t')) #get number of fields 
    dataMat = []; labelMat = []
    fr = open(fileName)
    for line in fr.readlines():
        lineArr =[]
        curLine = line.strip().split('\t')
        for i in range(numFeat-1):
            lineArr.append(float(curLine[i]))
        dataMat.append(lineArr)
        labelMat.append(float(curLine[-1]))
    return dataMat,labelMat

datArr,labelArr = loadDataSet('horseColicTest2.txt')
weakClassArr,aggClassEst = AdaBoost.adaBoostTrainDS(datArr,labelArr,40)
'''
testDataArr,testLabelArr= loadDataSet('horseColicTest2.txt')
prediction10=AdaBoost.adaClassify(testDataArr,weakClassArr)
errArr = mat(ones((67,1)))
#统计错误数(错误率=错误数/67)
errArr[prediction10!=mat(testLabelArr).T].sum()
'''


'''非均衡分类问题
假设所有类别的分类代价,在大多数情况下不同类别的分类代价并不相等。
1.调节分类器的阈值
    一种不同分类器的评价方法:ROC曲线、AUC
    度量分类器性能的指标:构建一个同时使正确率和召回率最大的分类器是具有挑战性的。
'''
示例#14
0
# -*- coding: utf-8 -*-
"""

AdaBoost:简单数据集
@author: Jerry
"""
import numpy as np
import AdaBoost


def loadDataSet():
    dataMat = np.matrix(([1., 2.1], [2., 1.1], [1.3, 1.], [1., 1.], [2., 1.]))
    classLabels = [1.0, 1.0, -1.0, -1.0, 1.0]
    return dataMat, classLabels


if __name__ == '__main__':
    dataMat, classLabels = loadDataSet()

    #    AdaBoost.adaBoostTrainDS(dataMat,classLabels, 9)

    classifierArray = AdaBoost.adaBoostTrainDS(dataMat, classLabels, 30)
    predictedLabel = AdaBoost.adaClassify([0, 0], classifierArray)
    print(predictedLabel)
示例#15
0
# Convert numerical attributes to binary based on median thresholds
numericalMedians = DecisionTree.setThreshold(trainData)
binaryTrainData = DecisionTree.setBinary(trainData, numericalMedians)
testData = DecisionTree.setBinary(testData, numericalMedians)

#============================================
# AdaBoost
#============================================
print('Running AdaBoost for 1 to 10 iterations...')
myAccuracy = []
maxAccuracy = 0
nt = range(1, 10, 1)
for n in nt:
    binaryTrainData1 = copy.deepcopy(binaryTrainData)
    binaryTrainData1 = AdaBoost.assignSampleWeights(binaryTrainData1)

    # Build stump
    stumps = []
    stumpWeights = []
    iterations = n
    newTrainData = binaryTrainData1
    weightLookup = None

    # Run adaBoost algorithm
    for run in range(iterations):
        eFeatures = copy.deepcopy(features)
        # Build dictionary of feature values
        c = 0
        for key in eFeatures.keys():
            for line in newTrainData:
示例#16
0
def credit_experiment():

    file_path = "/home/john/PycharmProjects/u1201441_Private_Repository/CS6350_Files/HW2/credit/default of credit card clients.csv"

    numeric_cols = [0, 4, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22]
    missing_identifier = None
    training_data = []
    test_data = []

    data = ID3.data_parsing(file_path, numeric_cols)

    LABEL_INDEX = len(data[0]) - 2

    for instance in data:
        if instance[LABEL_INDEX] == '1':
            instance[LABEL_INDEX] = "yes"
        else:
            instance[LABEL_INDEX] = "no"

    test_indices = random.sample(range(len(data)), len(data))
    for i in test_indices:
        if i < 6000:
            test_data.append(data[i])
        else:
            training_data.append(data[i])

    iterations = 100

    decision_tree = ID3.build_decision_tree(
        training_data,
        max_depth=-1,
        info_gain_type=1,
        numeric_cols=numeric_cols,
        missing_identifier=missing_identifier)
    adaboost = AdaBoost.ada_boost(training_data,
                                  iterations=iterations,
                                  numeric_cols=numeric_cols,
                                  missing_identifier=missing_identifier)
    bagged_tree = BaggedTrees.bagged_trees(
        training_data,
        iterations=iterations,
        sample_size=100,
        numeric_cols=numeric_cols,
        missing_identifier=missing_identifier)
    forest = RandomForest.random_forest(training_data,
                                        iterations=iterations,
                                        sample_size=100,
                                        numeric_cols=numeric_cols,
                                        missing_identifier=missing_identifier,
                                        feature_size=4)

    # Decision Tree results

    tree_results = ID3.test_tree(decision_tree, training_data, numeric_cols,
                                 missing_identifier)
    tree_train = 1 - tree_results[0] / tree_results[1]
    tree_results = ID3.test_tree(decision_tree, test_data, numeric_cols,
                                 missing_identifier)
    tree_test = 1 - tree_results[0] / tree_results[1]

    tree_train_ln = []
    tree_test_ln = []

    for t in range(iterations):
        tree_train_ln.append(tree_train)
        tree_test_ln.append(tree_test)

    # AdaBoost results
    ada_results_train = AdaBoost.test_ada_boost_hypothesis(
        adaboost, training_data, numeric_cols, missing_identifier)
    ada_results_test = AdaBoost.test_ada_boost_hypothesis(
        adaboost, test_data, numeric_cols, missing_identifier)

    ada_train = []
    ada_test = []

    for t in range(iterations):
        ada_train.append(1 - ada_results_train[t][0] / ada_results_train[t][1])
        ada_test.append(1 - ada_results_test[t][0] / ada_results_test[t][1])

    ada_graph = [
        tuple([ada_train, "AdaBoost Train"]),
        tuple([ada_test, "AdaBoost Test"]),
        tuple([tree_train_ln, "Tree Train"]),
        tuple([tree_test_ln, "Tree Test"])
    ]

    GraphUtility.graph(ada_graph, "AdaBoost Data", "Iterations", "Error")

    # Bagging results
    results_train = BaggedTrees.test_bagged_tree_hypothesis(
        bagged_tree, training_data, numeric_cols, missing_identifier)
    results_test = BaggedTrees.test_bagged_tree_hypothesis(
        bagged_tree, test_data, numeric_cols, missing_identifier)

    # Charts
    bag_train = []
    bag_test = []

    for t in range(iterations):
        bag_train.append(1 - results_train[t][0] / results_train[t][1])
        bag_test.append(1 - results_test[t][0] / results_test[t][1])

    bag_graph = [
        tuple([bag_train, "Bagging Train"]),
        tuple([bag_test, "Bagging Test"]),
        tuple([tree_train_ln, "Tree Train"]),
        tuple([tree_test_ln, "Tree Test"])
    ]
    GraphUtility.graph(bag_graph, "Bagged Tree Data", "Num Trees", "Error")

    # Forest Results
    results_train = RandomForest.test_random_forest_hypothesis(
        forest, training_data, numeric_cols, missing_identifier)
    results_test = RandomForest.test_random_forest_hypothesis(
        forest, test_data, numeric_cols, missing_identifier)

    # Charts
    forest_train = []
    forest_test = []
    for t in range(iterations):
        forest_train.append(1 - results_train[t][0] / results_train[t][1])
        forest_test.append(1 - results_test[t][0] / results_test[t][1])

    forest_graph = [
        tuple([forest_train, "Forest Train - " + str(2) + " features"]),
        tuple([forest_test, "Forest Test - " + str(2) + " features"]),
        tuple([tree_train_ln, "Tree Train"]),
        tuple([tree_test_ln, "Tree Test"])
    ]
    GraphUtility.graph(forest_graph, "Random Forest Data", "Num Trees",
                       "Error")
示例#17
0
        

if __name__ == "__main__":
    
    # TODO: select optimal threshold for each feature
    # TODO: attentional cascading
    
    print 'Loading faces..'
    faces = load_images('training/faces', 1)
    print '..done. ' + str(len(faces)) + ' faces loaded.\n\nLoading non faces..'
    non_faces = load_images('training/nonfaces', -1)
    print '..done. ' + str(len(non_faces)) + ' non faces loaded.\n'
    
    T = 20
    if LEARN:
        classifiers = AdaBoost.learn(faces, non_faces, T)
        #STore it . Pickle can't store imagingdraw object which comes inside IntegralImage class. So self.score for
        # the haarfeatures should not hash by integralimage object. just make it empty to avoid error. it'll not be used
        # in test images anyway.
        
        for haarfeature,weight in classifiers:
            haarfeature.score = {}
            
        with open(FILENAME,'wb') as fileobj:
            pickle.dump(classifiers,fileobj)
        
    else:
        with open(FILENAME,'r') as fileobj:
            classifiers = pickle.load(fileobj)
        
        print 'Loading test faces..'
示例#18
0
positif_data_testing = 'testImage'
# negatif_data_testing = 'png/trainset/non-faces_short'

print('load faces data...')
faces_data = ul.load_images(positif_data_testing)
faces_ii_data = list(map(ii.to_integral_image, faces_data))
print(str(len(faces_ii_data)) + ' Has been loaded.\nload non faces data...')
# non_faces_data = ul.load_images(negatif_data_testing)
# non_faces_ii_data = list(map(ii.to_integral_image,non_faces_data))
# print(str(len(non_faces_ii_data))+' Has been loaded.')

print('loaded database')
features = ul.load_database(pathDatabase)

print('create cascade stage')
cascade = cas.cascade_load(features)

correct_faces = 0
correct_non_faces = 0

for idx, classifiers in enumerate(cascade):
    correct_faces = sum(ab.ensemble_vote_all(faces_ii_data, classifiers))
    # correct_non_faces = len(non_faces_data) - sum(ab.ensemble_vote_all(non_faces_ii_data,classifiers))
    print('Result after ' + str(idx + 1) + ' layer(s):\n     Faces: ' +
          str(correct_faces) + '/' + str(len(faces_data)))

    # print('Result after ' + str(idx + 1) + ' layer(s):\n     Faces: ' + str(correct_faces) + '/' + str(
    #     len(faces_data))
    #       + '  (' + str((float(correct_faces) / len(faces_data)) * 100) + '%)\n  non-Faces: '
    #       + str(correct_non_faces) + '/' + str(len(non_faces_data)) + '  ('
    #       + str((float(correct_non_faces) / len(non_faces_data)) * 100) + '%)')
#chap 8 AdaBoost

import DatasetUtil as DU
import AdaBoost as Ada
import numpy as np

if __name__ == "__main__":
    print("\t============ Chap8 AdaBoost ============")

    ds = DU.DATAUtil()
    xx, yy = ds.load(True, r".\dataset.dat")
    print("\t ===== Do Initializing =====")
    model = Ada.Boosting()

    print("\t ===== Do Training =====")
    model.train(xx, yy)

    print("\t ===== Do Testing =====")
    xx_t, yy_t = ds.load(True, r".\testds.dat")
    yy_t = np.squeeze(yy_t)
    y_p = model.predict(xx_t)

    yy_p = ds.y_int2str(y_p)

    print(" Test Result is: ")
    print(yy_p)

    eval_y = np.zeros_like(yy_t, dtype=float)
    eval_y[np.where(yy_t == y_p)] = 1.0
    print(" Test result precision is {:.2%}".format(eval_y.sum() /
                                                    eval_y.shape[0]))
示例#20
0
def main():
    data = file_get_contents('ressource/spambase.data')
    testSet = [data[0], data[30], data[1776], data[3805]]

    print(ada.error_rate(data, testSet, 3))
    print(ada.error_rate(data, testSet, 4))
示例#21
0
positif_data_testing = 'testset/faces'
negatif_data_testing = 'testset/non-faces'

#define level cascade on list
level_cascade = [2,10,20,20,30,30,50,50,50,50,60,60,80,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100]

#load data training
print('Load data training positif...')
faces_data = ul.load_images(positif_data_training)
faces_ii_data = list(map(ii.to_integral_image,faces_data))
print(str(len(faces_ii_data))+' Has been loaded.\nLoad data training negatif...')
non_faces_data = ul.load_images(negatif_data_training)
non_faces_ii_data = list(map(ii.to_integral_image,non_faces_data))
print(str(len(non_faces_ii_data))+' Has been Loaded.')

img_height, img_width = faces_ii_data[0].shape

#create features
features = ab.create_features(24,24,min_feature_height=4,max_feature_height=10,min_feature_width=4,max_feature_width=10)

#cascade => stage of bunch classifiers, alpha => weights every classifier
cascade = cas.cascade_latih(faces_ii_data,non_faces_ii_data,features, level_cascade)

database = []

for casc in cascade:
    for item in casc:
        database.append(item)

with open('database.json','w') as f:
    json.dump(database,f,default=dumper,indent=4)
    ax = plt.subplot(111)
    #画图
    for index in sortedIndicies.tolist()[0]:
        if classLabels[index] == 1.0:
            delX = 0; 
            delY = yStep;
        else:
            delX = xStep; 
            delY = 0;
            ySum += cur[1]
        ax.plot([cur[0],cur[0]-delX],[cur[1],cur[1]-delY], c='b')
        cur = (cur[0]-delX,cur[1]-delY)
    ax.plot([0,1],[0,1],'b--')
    plt.xlabel('False positive rate')
    plt.ylabel('True positive rate')
    plt.title('ROC curve for AdaBoost horse colic detection system')
    ax.axis([0,1,0,1])
    plt.show()
    print("the Area Under the Curve is: ",ySum*xStep)



if __name__ == '__main__':
    trainingMat,trainingLabels = AdaBoost.loadDataSet('horseColicTraining2.txt')
    classifierArray = AdaBoost.adaBoostTrainDS(trainingMat,trainingLabels, 10)
    
    testMat,testLabels = AdaBoost.loadDataSet('horseColicTest2.txt')
    prediction10 = AdaBoost.adaClassify(testMat, classifierArray)
    print(prediction10)
    
    plotROC(prediction10, testLabels)
print('')
print('***** RESULTS DECISION TREE *****')
print('Depth: ', tree.depth)

print('')
print('Training Error: ', error_rate_tree_train)
print('Test Error    : ', error_rate_tree_test)

cNums = range(1, 31)
train_errs = list()
test_errs = list()
for i in cNums:
    trains = list()
    tests = list()
    for j in range(20):
        boost = ab.Boost()
        boost.train(X_train, y_train, cNum=i, verbose=False)
        pred_boost_train = boost.predict(X_train)
        pred_boost_test = boost.predict(X_test[0])
        error_rate_boost_test = (sum([
            0 if pred == true else 1
            for (pred, true) in zip(y_test, pred_boost_test)
        ]) / float(len(y_test)))
        error_rate_boost_train = (sum([
            0 if pred == true else 1
            for (pred, true) in zip(y_train, pred_boost_train)
        ]) / float(len(y_train)))
        trains.append(error_rate_boost_train)
        tests.append(error_rate_boost_test)
    train_errs.append(np.mean(trains))
    test_errs.append(np.mean(tests))
示例#24
0
import sys
sys.path.append("../Basic Functions")
import AdaBoost
from LoadData import LoadData

if __name__ == '__main__':
    trainingDataArray, trainingLabelList = LoadData("HorseColicTraining.txt")
    classifierList, totalPredictValue = AdaBoost.AdaboostTrain(
        trainingDataArray, trainingLabelList, 10)
    testDataArray, testLabelList = LoadData("HorseColicTest.txt")
    result = AdaBoost.AdaClassify(testDataArray, classifierList)
    errorList = [
        i for i in range(len(testLabelList)) if testLabelList[i] != result[i]
    ]
    print(errorList)

    AUC = AdaBoost.PlotROC(trainingLabelList, totalPredictValue)
    print(AUC)
示例#25
0
PP = PreProcess.PreProcess(data, n_buckets=10,
                           func='boolean')  #,swap_labels=True)
data = PP.fit(data)
testset = PP.fit(testset)

# read weights

weights = []

for w in range(6):
    tmp = [float(line.rstrip('\n')) for line in open('result/W' + str(w))]
    if len(tmp) != len(data[0]):
        continue
    weights.append(tmp)

ab = AdaBoost.AdaBoost(weightVector=weights)
ab.fit(data, data_labels)

predictTrain = ab.predict(data)
predictTest = ab.predict(testset)

print("Accuracy for training set:")
print(Stat.Accuracy(predictTrain, data_labels))

print("F1 score for training set:")
print(Stat.F1_Score(predictTrain, data_labels))

print("Precision for training set:")
print(Stat.Precision(predictTrain, data_labels))

print("Recall for training set:")
# -*- coding: utf-8 -*-
"""
Created on Wed Oct 10 20:38:02 2018

@author: tf
"""

import AdaBoost
import numpy as np

#dataMat, labelMat = AdaBoost.loadDataSet()
#print(dataMat, '\n', labelMat)

#D = np.ones((5, 1)) / 5
#bestStump, minErr, bestClassEst = AdaBoost.buildStump(dataMat, labelMat, D)
#print(bestStump, '\n', minErr, '\n', bestClassEst)

#classifierArr = AdaBoost.adaBoostTrainDS(dataMat, labelMat)
#print(classifierArr)
#print(max(0.1,0.2))

#clas = AdaBoost.adaClassify(np.array([[5, 5], [0, 0]]), classifierArr)
#print(clas)

dataMat, labelMat = AdaBoost.loadFileDataSet('horseColicTraining2.txt')
classifierArr = AdaBoost.adaBoostTrainDS(dataMat, labelMat)
#print(classifierArr)

testDataMat, testLabelMat = AdaBoost.loadFileDataSet('horseColicTest2.txt')
errRate = AdaBoost.adaClassify(testDataMat, classifierArr, testLabelMat)
print(errRate)
示例#27
0
def main(number_of_stages, set_pos, set_neg):
    trainer = AdaBoost(set_pos, set_neg, number_of_stages)
    classifiers = trainer.train_simple()
    image = cv2.imread("1.pgm")
    print(StrongClassifier(classifiers, pgm_simplifier(image)).get_result())
示例#28
0
    max_feature_width = 10

    #Loading faces with some basic check
    faces_training = es.load_images(pos_training_path)

    #iterating
    faces_ii_training = list(map(ii.to_integral_image, faces_training))
    print('done faces loaded.')

    non_faces_training = es.load_images(neg_training_path)
    non_faces_ii_training = list(map(ii.to_integral_image, non_faces_training))
    print('done non faces loaded.\n')

    # classifiers are haar like features
    classifiers = ab.learn(faces_ii_training, non_faces_ii_training,
                           num_classifiers, min_feature_height,
                           max_feature_height, min_feature_width,
                           max_feature_width)

    print('Loading test faces..')
    faces_testing = es.load_images(pos_testing_path)
    faces_ii_testing = list(map(ii.to_integral_image, faces_testing))
    print('..done faces loaded.\n')
    non_faces_testing = es.load_images(neg_testing_path)
    non_faces_ii_testing = list(map(ii.to_integral_image, non_faces_testing))
    print('..done non faces loaded.\n')

    print('Testing selected classifiers..')
    correct_faces = 0
    correct_non_faces = 0
    # Classifies given list of integral images using classifiers,
    correct_faces = sum(es.ensemble_vote_all(faces_ii_testing, classifiers))
 def __train(training_set):
     ensemble = ab.AdaBoostEnsemble()
     ensemble.train(training_set, len(training_set), ensemble_size,
                    __train_tree, __evaluate_tree)
     return ensemble
示例#30
0
# -*- coding: utf-8 -*-
from numpy import *
import AdaBoost as ab
import LAB as lab

if __name__ == '__main__':

    mylist = []
    mytrain = []
    # 载入网球
    for index in range(5):
        arr = lab.load('B0_%d.jpg' % index)
        arr2 = lab.LAB(arr).flatten()  # 转化后展平
        mylist.append(arr2.tolist())
        mytrain.append(1.)
    # 载入苹果
    for index in range(5):
        arr = lab.load('B2_%d.jpg' % index)
        arr2 = lab.LAB(arr).flatten()  # 转化后展平
        mylist.append(arr2.tolist())
        mytrain.append(-1.)
    #
    dataArr = matrix(mylist)
    classLabels = matrix(mytrain)
    D = mat(ones((10, 1)) / 10)
    classifierArray = ab.adaBoostTrainDS(dataArr, classLabels, 10)
    for x in classifierArray:
        print('alpha:', x['alpha'], 'dim:', x['dim'], 'thresh:', x['thresh'],
              'ineq:', x['ineq'])
示例#31
0
def reset_weight(Data):
    w0 = 1 / float(len(Data))
    for row in Data:
        row['weight'] = w0


# Q2.a
# Run AdaBoosting for T = 1-500, plot error-T relationship
outFile = open("ada_out.txt", 'w')
outFile.write("iter\terr_train\terr_test\n")
for T in [
        1, 2, 3, 4, 5, 6, 8, 10, 15, 20, 30, 50, 70, 90, 120, 150, 200, 250,
        300, 350, 400, 450, 500
]:
    trees, alphas = AdaBoost.AdaBoost_train(Data_train, Attributes, Labels, T)
    hit_train = AdaBoost.AdaBoost_test(Data_train, trees, alphas)
    hit_test = AdaBoost.AdaBoost_test(Data_test, trees, alphas)
    outFile.write(
        str(T) + "\t" + str(1 - hit_train) + "\t" + str(1 - hit_test) + "\n")
    reset_weight(Data_train)

outFile.close()

# for T=500, find training and test error in each iteration
e_t, e_r = AdaBoost.print_err_Ada(Data_train, Data_test, Attributes, Labels,
                                  500)
t = [i + 1 for i in range(0, 500)]

fig, ax = plt.subplots(figsize=(6, 4))
ax.plot(t, e_r, label='test error', c='grey', alpha=0.3)
示例#32
0
from AdaBoost import *
from sys import argv

x_train = argv[1]
y_train = argv[2]
x_test = argv[3]
y_test = argv[4]
T = int(argv[5])

feats = np.loadtxt(x_train, delimiter=',', dtype=np.float).transpose()
labels = np.loadtxt(y_train, delimiter=',', dtype=np.int)
ab_labels = np.array([1 if i == 1 else -1 for i in labels])
abs = []
for i in range(30):
    abs.append(AdaBoost(T, feats[i], ab_labels))
    abs[-1].train()

x_test = np.loadtxt(x_test, delimiter=',', dtype=np.float)
y_test = np.loadtxt(y_test, delimiter=',', dtype=np.float)
ab_y_test = np.array([1 if i == 1 else -1 for i in y_test])
predictions = []
for i in range(x_test.shape[0]):
    predictions.append(
        np.sign(
            np.sum([abs[j].eval(x_test[i][j])
                    for j in range(x_test.shape[1])])))

print("accuracy = {}".format(
    len(np.where(predictions == ab_y_test)[0]) / len(ab_y_test)))