def ada_boost_experiment(): examples = ID3.data_parsing(FILE_PATH_TRAIN, numeric_cols) # hypothesis = AdaBoost.ada_boost(examples, 5, numeric_cols, missing_identifier) # print(hypothesis) iterations = 100 hypothesis = AdaBoost.ada_boost(examples, iterations, numeric_cols, missing_identifier) ada_results_train = AdaBoost.test_ada_boost_hypothesis( hypothesis, FILE_PATH_TRAIN, numeric_cols, missing_identifier) ada_results_test = AdaBoost.test_ada_boost_hypothesis( hypothesis, FILE_PATH_TEST, numeric_cols, missing_identifier) # for t in range(iterations): # print("AdaBoost Training Set - t:", t, "results:", ada_results_train[t], # "{0:.2%}".format(1-ada_results_train[t][0]/ada_results_train[t][1])) # for t in range(iterations): # print("AdaBoost Testing Set - t:", t, "results:", ada_results_test[t], # "{0:.2%}".format(1-ada_results_test[t][0]/ada_results_test[t][1])) # for t in range(iterations): # tree_results = ID3.test_tree(hypothesis[t][0],FILE_PATH_TRAIN, numeric_cols, missing_identifier) # print("Decision Tree Training Set - t:", t, "results:", tree_results, # "{0:.2%}".format(1 - tree_results[0] / tree_results[1])) # for t in range(iterations): # tree_results = ID3.test_tree(hypothesis[t][0],FILE_PATH_TEST, numeric_cols, missing_identifier) # print("Decision Tree Test Set - t:", t, "results:", tree_results, # "{0:.2%}".format(1 - tree_results[0] / tree_results[1])) ada_train = [] ada_test = [] dec_train = [] dec_test = [] for t in range(iterations): ada_train.append(1 - ada_results_train[t][0] / ada_results_train[t][1]) ada_test.append(1 - ada_results_test[t][0] / ada_results_test[t][1]) tree_results = ID3.test_tree(hypothesis[t][0], FILE_PATH_TRAIN, numeric_cols, missing_identifier) dec_train.append(1 - tree_results[0] / tree_results[1]) tree_results = ID3.test_tree(hypothesis[t][0], FILE_PATH_TEST, numeric_cols, missing_identifier) dec_test.append(1 - tree_results[0] / tree_results[1]) ada_graph = [ tuple([ada_train, "AdaBoost Train"]), tuple([ada_test, "AdaBoost Test"]) ] GraphUtility.graph(ada_graph, "AdaBoost Data", "Iterations", "Error") tree_graph = [ tuple([dec_train, "Tree Train"]), tuple([dec_test, "Tree Test"]) ] GraphUtility.graph(tree_graph, "Decision Tree Data", "Iterations", "Error")
def detect(image,cascade): img_height, img_width = image.shape int_img = ii.to_integral_image(image) step = 2 scale = 1.5 height = 48 width = 48 curScale = 2 faces = [] count = 0 correct_face = 0 while width <= img_width and height <= img_height : for x in range(0, np.int(img_width-width), np.int(np.ceil(step*scale))): for y in range(0, np.int(img_height-height), np.int(np.ceil(step*scale))): for classifiers in cascade: correct_face = ab.ensemble_vote_with_scalar(int_img, classifiers, (x, y), curScale) if correct_face == 0: break if correct_face == 1: faces.append([x, y, np.int(np.ceil(width)), np.int(np.ceil(height))]) count += 1 width *= scale height *= scale curScale *= scale print(str(count)+' Faces Detected') return faces
def train(feats, labels, T): ab_labels = transform_labels(labels) ft = feats.transpose() abs = [] for i in range(30): abs.append(AdaBoost(T, ft[i], ab_labels)) abs[-1].train() return abs
def calc_votes(features, images): jumlah_image = len(images) jumlah_feature = len(features) votes = np.zeros((jumlah_image, jumlah_feature)) for i in tqdm(range(jumlah_image)): for j in range(jumlah_feature): votes[i, j] = ab.vote(image=images[i], feature=features[j]) return votes
def main(): print "Init data" data = pd.read_csv(sys.argv[1]) columnas = data.columns.tolist() x = columnas[len(columnas) - 1] target = data[x] y = columnas[0:len(columnas) - 1] dataset = data[y] AdaBoostObject = AdaBoost.AdaBoost(dataset, target, 3, 'SAMME', 10) AdaBoostObject.trainingMethod() #curva roc #curveRocObject = createRocCurve.curveRoc(dataset, target, AdaBoostObject.model, 10, 'user', 'job', 'path') #curveRocObject.createCurveROC() #precision-recall curve #precisionCurve = createPrecisionRecallCurve.curvePrecision(dataset, target, AdaBoostObject.model, 10, 'user', 'job', 'path') #precisionCurve.plot_precision_and_recall_curve() #learning curve #learningCurveDemo = createLearningCurve.curveLearning(dataset, target, AdaBoostObject.model, 10, 'user', 'job', 'path') #learningCurveDemo.createLearningCurve() #confusion matrix data confusionMatrixDemo = createConfusionMatrix.confusionMatrix( dataset, target, AdaBoostObject.model, 10, 'user', 'job', 'path', ["Clinical", "No Clinical"]) confusionMatrixDemo.createConfusionMatrix() #bagginObject = Baggin.Baggin(dataset,target,3,10) #bagginObject.trainingMethod() #bernoulliNB = BernoulliNB.Bernoulli(dataset, target, 10) #bernoulliNB.trainingMethod() #decisionTreeObject = DecisionTree.DecisionTree(dataset, target, 'entropy', 'best',10) #decisionTreeObject.trainingMethod() #gaussianObject = GaussianNB.Gaussian(dataset, target, 10) #gaussianObject.trainingMethod() #gradientObject = Gradient.Gradient(dataset,target,3,10) #gradientObject.trainingMethod() #knnObect = knn.knn(dataset, target, 2, 'auto', 'minkowski', 10) #knnObect.trainingMethod() #MLPObject = MLP.MLP(dataset,target,'relu', 'sgd', 'adaptive', 1,1,1,10) #MLPObject.trainingMethod() #nuSVM = NuSVM.NuSVM(dataset,target,'poly',10) #nuSVM.trainingMethod() #rf = RandomForest.RandomForest(dataset,target,10,'gini',10) #rf.trainingMethod() #svm = SVM.SVM(dataset, target, 'poly', 10) #svm.trainingMethod() return 0
def cascade_latih(faces_ii_data, non_faces_ii_data, features, level_cascade): cascade = [] start_stage = 44 banned = True path_banned = 'bannen_index_stage43.json' if banned: features_stg = [] banned_index = ul.load_banned_index(path_banned) for i in range(0, start_stage - 1): features_stage = ul.load_database('database_stage' + str(i + 1) + '.json') for fitur in features_stage: features_stg.append(fitur) else: banned_index = [] images = faces_ii_data + non_faces_ii_data votes = calc_votes(features, images) print('Mulai pelatihan attentional cascade ...') #pilih cascade for idx, classifier in enumerate(level_cascade): if start_stage == idx + 1: print('Begin Training ' + str(idx + 1) + ' layer :...') classifiers, banned_index = ab.learn(faces_ii_data, non_faces_ii_data, features, classifier, votes, banned_index) else: classifiers = ul.load_features(features_stg, classifier, idx + 1) cascade.append(classifiers) #test classifiers # correct_faces = sum(ab.ensemble_vote_all(faces_ii_data, classifiers)) # correct_non_faces = len(non_faces_ii_data) - sum(ab.ensemble_vote_all(non_faces_ii_data, classifiers)) # print('Result after ' + str(idx + 1) + ' layer(s):\n Faces: ' + str(correct_faces) + '/' + str( # len(faces_ii_data)) # + ' (' + str((float(correct_faces) / len(faces_ii_data)) * 100) + '%)\n non-Faces: ' # + str(correct_non_faces) + '/' + str(len(non_faces_ii_data)) + ' (' # + str((float(correct_non_faces) / len(non_faces_ii_data)) * 100) + '%)') database_stage = [] for clas in classifiers: database_stage.append(clas) with open('database_stage' + str(idx + 1) + '.json', 'w') as f: json.dump(database_stage, f, default=dumper, indent=4) with open('bannen_index_stage' + str(idx + 1) + '.json', 'w') as b: json.dump(banned_index, b, default=dumper, indent=4) return cascade
def main(): lines = [] for i in range(11): lines.append(i) if sys.argv[1] == "train": training_file = sys.argv[2] hypothesis_file = sys.argv[3] sentences = [] for line in open(training_file, encoding="utf-8-sig"): sentences.append(line.strip()) sentence_attributes = AttributeCalculator.sentences_to_boolean_attributes(sentences) node = Node.Node(sentence_attributes, lines) if sys.argv[4] == "dt": dt = DecisionTreeTraining(node).train_dt(node.sentences, node.input, 0) hypothesis_out = open(hypothesis_file, "wb") pickle.dump(dt, hypothesis_out) elif sys.argv[4] == "ada": ada = AdaBoost.AdaBoostTraining(DecisionTreeTraining, AdaBoostTraining.stump).train_ada(node) hypothesis_out = open(hypothesis_file, "wb") pickle.dump(ada, hypothesis_out) elif sys.argv[1] == "predict": hypothesis = sys.argv[2] prediction_file = sys.argv[3] sentences = [] hyp = open(hypothesis, "rb") model = pickle.load(hyp) file = open(prediction_file, encoding="utf-8-sig") for line in file: line = line.strip() sentences.append(line) sentences = AttributeCalculator.sentences_to_boolean_attributes_predict(sentences) if isinstance(model, Decision): for sentence in sentences: print(model.get_language(sentence)) elif isinstance(model, WeightedDecision): for sentence in sentences: result = "".join(model.predict_language(sentence)) print(result)
def cascade_stage(faces_ii_data, non_faces_ii_data, features, banned_index, votes, stage): level_cascade = [ 2, 10, 20, 20, 30, 30, 50, 50, 50, 50, 60, 60, 80, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100 ] database_stage = [] jum_fitur = level_cascade[stage - 1] classifiers, banned = ab.learn(faces_ii_data, non_faces_ii_data, features, jum_fitur, votes, banned_index) for clas in classifiers: database_stage.append(clas) print(str(clas)) with open('database_stage' + str(stage) + '.json', 'w') as f: json.dump(database_stage, f, default=dumper, indent=4) with open('banned_index_stage' + str(stage) + '.json', 'w') as b: json.dump(banned, b, default=dumper, indent=4) return 1
def runBoost(X_train, y_train, X_test, y_test, numClassifiers=10): ''' initialize AdaBoost ''' boost = ab.Boost() boost.train(X_train, y_train, cNum=numClassifiers) # test with training data pred_boost_train = boost.predict(X_train) error_rate_boost_train = (sum([ 0 if pred == true else 1 for (pred, true) in zip(y_train, pred_boost_train) ]) / float(len(y_train))) # now test with remaining data pred_boost_test = boost.predict(X_test) error_rate_boost_test = (sum([ 0 if pred == true else 1 for (pred, true) in zip(y_test, pred_boost_test) ]) / float(len(y_test))) ''' Add prints for diagnostics and results here: ''' '''print('') print('***** RESULTS ADABOOST *****') decisions = [c.root.splitCriteria for c in boost.classifiers] print('Decision Criteria per trees used: ', decisions) decision_columns = [c for c,_ in decisions] unique_cols = np.unique(decision_columns) col_counts = Counter(decision_columns).most_common() print('unique columns and frequency of split columns: ', col_counts) print('') print('Training Error: ', error_rate_boost_train) print('Test Error : ', error_rate_boost_test)''' return error_rate_boost_train, error_rate_boost_test
def TrainAndTestAdaBoostWeakBinPerceptron(): if len(digits) > 2: print "digits should be 2 digits in the case of Binary Perceptron" print "Running the AdaBoost with Weak Binary Perceptron" print "the digits it shall descriminate between: " + str( digits[0]) + "," + str(digits[1]) print "the label 1 will be given to the digit " + str(digits[0]) fullimg, lbl = rn.getData(digits, "training") binlbl = rn.BinaryLabels(lbl, digit) reducedList = rn.getReductionList(colnum, rownum) img = rn.ReduceSetDimension(reducedList, fullimg) adaboost = ab.AdaBoost(pr.Perceptron, img, binlbl, Twl, int(len(lbl) * alpha)) print "Error on the Training data: " + str( pr.TestClassifier(img, binlbl, adaboost) * 100.0) + "%" print "(out of " + str(len(binlbl)) + " samples)" print "Extracting the Testing Data" fullimg, lbl = rn.getData(digits, "testing") img = rn.ReduceSetDimension(reducedList, fullimg) binlbl = rn.BinaryLabels(lbl, digit) print "Error on the Testing data: " + str( pr.TestClassifier(img, binlbl, adaboost) * 100.0) + "%" print "(out of " + str(len(binlbl)) + " samples)"
from numpy import * import AdaBoost datMat, classLabels = AdaBoost.loadSimpData() # D=mat(ones((5,1))/5) # print(AdaBoost.buildingStump(datMat,classLabels,D)) classEstArr = AdaBoost.adaBoostTrainDS(datMat, classLabels, 10) print(AdaBoost.adaClassify([[5, 5], [0, 0]], classEstArr))
import EXTRAS import AdaBoost import numpy # EXTRAS.plotSimpleData() # dataMat, labels = AdaBoost.loadSimpleData() # D = numpy.mat(numpy.ones((5, 1)) / 5) # bestStump, minError, bestClasEst = AdaBoost.buildStump(dataMat, labels, D) dataMat, labels = AdaBoost.loadDataSet("E:/TestDatas/MachineLearningInAction/Ch07/horseColicTraining2.txt") classifierArr, aggClassExt = AdaBoost.adaBoostTrainDS(dataMat, labels, 10) AdaBoost.plotROC(aggClassExt.T, labels)
def loadDataSet(fileName): #general function to parse tab -delimited floats numFeat = len(open(fileName).readline().split('\t')) #get number of fields dataMat = []; labelMat = [] fr = open(fileName) for line in fr.readlines(): lineArr =[] curLine = line.strip().split('\t') for i in range(numFeat-1): lineArr.append(float(curLine[i])) dataMat.append(lineArr) labelMat.append(float(curLine[-1])) return dataMat,labelMat datArr,labelArr = loadDataSet('horseColicTest2.txt') weakClassArr,aggClassEst = AdaBoost.adaBoostTrainDS(datArr,labelArr,40) ''' testDataArr,testLabelArr= loadDataSet('horseColicTest2.txt') prediction10=AdaBoost.adaClassify(testDataArr,weakClassArr) errArr = mat(ones((67,1))) #统计错误数(错误率=错误数/67) errArr[prediction10!=mat(testLabelArr).T].sum() ''' '''非均衡分类问题 假设所有类别的分类代价,在大多数情况下不同类别的分类代价并不相等。 1.调节分类器的阈值 一种不同分类器的评价方法:ROC曲线、AUC 度量分类器性能的指标:构建一个同时使正确率和召回率最大的分类器是具有挑战性的。 '''
# -*- coding: utf-8 -*- """ AdaBoost:简单数据集 @author: Jerry """ import numpy as np import AdaBoost def loadDataSet(): dataMat = np.matrix(([1., 2.1], [2., 1.1], [1.3, 1.], [1., 1.], [2., 1.])) classLabels = [1.0, 1.0, -1.0, -1.0, 1.0] return dataMat, classLabels if __name__ == '__main__': dataMat, classLabels = loadDataSet() # AdaBoost.adaBoostTrainDS(dataMat,classLabels, 9) classifierArray = AdaBoost.adaBoostTrainDS(dataMat, classLabels, 30) predictedLabel = AdaBoost.adaClassify([0, 0], classifierArray) print(predictedLabel)
# Convert numerical attributes to binary based on median thresholds numericalMedians = DecisionTree.setThreshold(trainData) binaryTrainData = DecisionTree.setBinary(trainData, numericalMedians) testData = DecisionTree.setBinary(testData, numericalMedians) #============================================ # AdaBoost #============================================ print('Running AdaBoost for 1 to 10 iterations...') myAccuracy = [] maxAccuracy = 0 nt = range(1, 10, 1) for n in nt: binaryTrainData1 = copy.deepcopy(binaryTrainData) binaryTrainData1 = AdaBoost.assignSampleWeights(binaryTrainData1) # Build stump stumps = [] stumpWeights = [] iterations = n newTrainData = binaryTrainData1 weightLookup = None # Run adaBoost algorithm for run in range(iterations): eFeatures = copy.deepcopy(features) # Build dictionary of feature values c = 0 for key in eFeatures.keys(): for line in newTrainData:
def credit_experiment(): file_path = "/home/john/PycharmProjects/u1201441_Private_Repository/CS6350_Files/HW2/credit/default of credit card clients.csv" numeric_cols = [0, 4, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22] missing_identifier = None training_data = [] test_data = [] data = ID3.data_parsing(file_path, numeric_cols) LABEL_INDEX = len(data[0]) - 2 for instance in data: if instance[LABEL_INDEX] == '1': instance[LABEL_INDEX] = "yes" else: instance[LABEL_INDEX] = "no" test_indices = random.sample(range(len(data)), len(data)) for i in test_indices: if i < 6000: test_data.append(data[i]) else: training_data.append(data[i]) iterations = 100 decision_tree = ID3.build_decision_tree( training_data, max_depth=-1, info_gain_type=1, numeric_cols=numeric_cols, missing_identifier=missing_identifier) adaboost = AdaBoost.ada_boost(training_data, iterations=iterations, numeric_cols=numeric_cols, missing_identifier=missing_identifier) bagged_tree = BaggedTrees.bagged_trees( training_data, iterations=iterations, sample_size=100, numeric_cols=numeric_cols, missing_identifier=missing_identifier) forest = RandomForest.random_forest(training_data, iterations=iterations, sample_size=100, numeric_cols=numeric_cols, missing_identifier=missing_identifier, feature_size=4) # Decision Tree results tree_results = ID3.test_tree(decision_tree, training_data, numeric_cols, missing_identifier) tree_train = 1 - tree_results[0] / tree_results[1] tree_results = ID3.test_tree(decision_tree, test_data, numeric_cols, missing_identifier) tree_test = 1 - tree_results[0] / tree_results[1] tree_train_ln = [] tree_test_ln = [] for t in range(iterations): tree_train_ln.append(tree_train) tree_test_ln.append(tree_test) # AdaBoost results ada_results_train = AdaBoost.test_ada_boost_hypothesis( adaboost, training_data, numeric_cols, missing_identifier) ada_results_test = AdaBoost.test_ada_boost_hypothesis( adaboost, test_data, numeric_cols, missing_identifier) ada_train = [] ada_test = [] for t in range(iterations): ada_train.append(1 - ada_results_train[t][0] / ada_results_train[t][1]) ada_test.append(1 - ada_results_test[t][0] / ada_results_test[t][1]) ada_graph = [ tuple([ada_train, "AdaBoost Train"]), tuple([ada_test, "AdaBoost Test"]), tuple([tree_train_ln, "Tree Train"]), tuple([tree_test_ln, "Tree Test"]) ] GraphUtility.graph(ada_graph, "AdaBoost Data", "Iterations", "Error") # Bagging results results_train = BaggedTrees.test_bagged_tree_hypothesis( bagged_tree, training_data, numeric_cols, missing_identifier) results_test = BaggedTrees.test_bagged_tree_hypothesis( bagged_tree, test_data, numeric_cols, missing_identifier) # Charts bag_train = [] bag_test = [] for t in range(iterations): bag_train.append(1 - results_train[t][0] / results_train[t][1]) bag_test.append(1 - results_test[t][0] / results_test[t][1]) bag_graph = [ tuple([bag_train, "Bagging Train"]), tuple([bag_test, "Bagging Test"]), tuple([tree_train_ln, "Tree Train"]), tuple([tree_test_ln, "Tree Test"]) ] GraphUtility.graph(bag_graph, "Bagged Tree Data", "Num Trees", "Error") # Forest Results results_train = RandomForest.test_random_forest_hypothesis( forest, training_data, numeric_cols, missing_identifier) results_test = RandomForest.test_random_forest_hypothesis( forest, test_data, numeric_cols, missing_identifier) # Charts forest_train = [] forest_test = [] for t in range(iterations): forest_train.append(1 - results_train[t][0] / results_train[t][1]) forest_test.append(1 - results_test[t][0] / results_test[t][1]) forest_graph = [ tuple([forest_train, "Forest Train - " + str(2) + " features"]), tuple([forest_test, "Forest Test - " + str(2) + " features"]), tuple([tree_train_ln, "Tree Train"]), tuple([tree_test_ln, "Tree Test"]) ] GraphUtility.graph(forest_graph, "Random Forest Data", "Num Trees", "Error")
if __name__ == "__main__": # TODO: select optimal threshold for each feature # TODO: attentional cascading print 'Loading faces..' faces = load_images('training/faces', 1) print '..done. ' + str(len(faces)) + ' faces loaded.\n\nLoading non faces..' non_faces = load_images('training/nonfaces', -1) print '..done. ' + str(len(non_faces)) + ' non faces loaded.\n' T = 20 if LEARN: classifiers = AdaBoost.learn(faces, non_faces, T) #STore it . Pickle can't store imagingdraw object which comes inside IntegralImage class. So self.score for # the haarfeatures should not hash by integralimage object. just make it empty to avoid error. it'll not be used # in test images anyway. for haarfeature,weight in classifiers: haarfeature.score = {} with open(FILENAME,'wb') as fileobj: pickle.dump(classifiers,fileobj) else: with open(FILENAME,'r') as fileobj: classifiers = pickle.load(fileobj) print 'Loading test faces..'
positif_data_testing = 'testImage' # negatif_data_testing = 'png/trainset/non-faces_short' print('load faces data...') faces_data = ul.load_images(positif_data_testing) faces_ii_data = list(map(ii.to_integral_image, faces_data)) print(str(len(faces_ii_data)) + ' Has been loaded.\nload non faces data...') # non_faces_data = ul.load_images(negatif_data_testing) # non_faces_ii_data = list(map(ii.to_integral_image,non_faces_data)) # print(str(len(non_faces_ii_data))+' Has been loaded.') print('loaded database') features = ul.load_database(pathDatabase) print('create cascade stage') cascade = cas.cascade_load(features) correct_faces = 0 correct_non_faces = 0 for idx, classifiers in enumerate(cascade): correct_faces = sum(ab.ensemble_vote_all(faces_ii_data, classifiers)) # correct_non_faces = len(non_faces_data) - sum(ab.ensemble_vote_all(non_faces_ii_data,classifiers)) print('Result after ' + str(idx + 1) + ' layer(s):\n Faces: ' + str(correct_faces) + '/' + str(len(faces_data))) # print('Result after ' + str(idx + 1) + ' layer(s):\n Faces: ' + str(correct_faces) + '/' + str( # len(faces_data)) # + ' (' + str((float(correct_faces) / len(faces_data)) * 100) + '%)\n non-Faces: ' # + str(correct_non_faces) + '/' + str(len(non_faces_data)) + ' (' # + str((float(correct_non_faces) / len(non_faces_data)) * 100) + '%)')
#chap 8 AdaBoost import DatasetUtil as DU import AdaBoost as Ada import numpy as np if __name__ == "__main__": print("\t============ Chap8 AdaBoost ============") ds = DU.DATAUtil() xx, yy = ds.load(True, r".\dataset.dat") print("\t ===== Do Initializing =====") model = Ada.Boosting() print("\t ===== Do Training =====") model.train(xx, yy) print("\t ===== Do Testing =====") xx_t, yy_t = ds.load(True, r".\testds.dat") yy_t = np.squeeze(yy_t) y_p = model.predict(xx_t) yy_p = ds.y_int2str(y_p) print(" Test Result is: ") print(yy_p) eval_y = np.zeros_like(yy_t, dtype=float) eval_y[np.where(yy_t == y_p)] = 1.0 print(" Test result precision is {:.2%}".format(eval_y.sum() / eval_y.shape[0]))
def main(): data = file_get_contents('ressource/spambase.data') testSet = [data[0], data[30], data[1776], data[3805]] print(ada.error_rate(data, testSet, 3)) print(ada.error_rate(data, testSet, 4))
positif_data_testing = 'testset/faces' negatif_data_testing = 'testset/non-faces' #define level cascade on list level_cascade = [2,10,20,20,30,30,50,50,50,50,60,60,80,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100] #load data training print('Load data training positif...') faces_data = ul.load_images(positif_data_training) faces_ii_data = list(map(ii.to_integral_image,faces_data)) print(str(len(faces_ii_data))+' Has been loaded.\nLoad data training negatif...') non_faces_data = ul.load_images(negatif_data_training) non_faces_ii_data = list(map(ii.to_integral_image,non_faces_data)) print(str(len(non_faces_ii_data))+' Has been Loaded.') img_height, img_width = faces_ii_data[0].shape #create features features = ab.create_features(24,24,min_feature_height=4,max_feature_height=10,min_feature_width=4,max_feature_width=10) #cascade => stage of bunch classifiers, alpha => weights every classifier cascade = cas.cascade_latih(faces_ii_data,non_faces_ii_data,features, level_cascade) database = [] for casc in cascade: for item in casc: database.append(item) with open('database.json','w') as f: json.dump(database,f,default=dumper,indent=4)
ax = plt.subplot(111) #画图 for index in sortedIndicies.tolist()[0]: if classLabels[index] == 1.0: delX = 0; delY = yStep; else: delX = xStep; delY = 0; ySum += cur[1] ax.plot([cur[0],cur[0]-delX],[cur[1],cur[1]-delY], c='b') cur = (cur[0]-delX,cur[1]-delY) ax.plot([0,1],[0,1],'b--') plt.xlabel('False positive rate') plt.ylabel('True positive rate') plt.title('ROC curve for AdaBoost horse colic detection system') ax.axis([0,1,0,1]) plt.show() print("the Area Under the Curve is: ",ySum*xStep) if __name__ == '__main__': trainingMat,trainingLabels = AdaBoost.loadDataSet('horseColicTraining2.txt') classifierArray = AdaBoost.adaBoostTrainDS(trainingMat,trainingLabels, 10) testMat,testLabels = AdaBoost.loadDataSet('horseColicTest2.txt') prediction10 = AdaBoost.adaClassify(testMat, classifierArray) print(prediction10) plotROC(prediction10, testLabels)
print('') print('***** RESULTS DECISION TREE *****') print('Depth: ', tree.depth) print('') print('Training Error: ', error_rate_tree_train) print('Test Error : ', error_rate_tree_test) cNums = range(1, 31) train_errs = list() test_errs = list() for i in cNums: trains = list() tests = list() for j in range(20): boost = ab.Boost() boost.train(X_train, y_train, cNum=i, verbose=False) pred_boost_train = boost.predict(X_train) pred_boost_test = boost.predict(X_test[0]) error_rate_boost_test = (sum([ 0 if pred == true else 1 for (pred, true) in zip(y_test, pred_boost_test) ]) / float(len(y_test))) error_rate_boost_train = (sum([ 0 if pred == true else 1 for (pred, true) in zip(y_train, pred_boost_train) ]) / float(len(y_train))) trains.append(error_rate_boost_train) tests.append(error_rate_boost_test) train_errs.append(np.mean(trains)) test_errs.append(np.mean(tests))
import sys sys.path.append("../Basic Functions") import AdaBoost from LoadData import LoadData if __name__ == '__main__': trainingDataArray, trainingLabelList = LoadData("HorseColicTraining.txt") classifierList, totalPredictValue = AdaBoost.AdaboostTrain( trainingDataArray, trainingLabelList, 10) testDataArray, testLabelList = LoadData("HorseColicTest.txt") result = AdaBoost.AdaClassify(testDataArray, classifierList) errorList = [ i for i in range(len(testLabelList)) if testLabelList[i] != result[i] ] print(errorList) AUC = AdaBoost.PlotROC(trainingLabelList, totalPredictValue) print(AUC)
PP = PreProcess.PreProcess(data, n_buckets=10, func='boolean') #,swap_labels=True) data = PP.fit(data) testset = PP.fit(testset) # read weights weights = [] for w in range(6): tmp = [float(line.rstrip('\n')) for line in open('result/W' + str(w))] if len(tmp) != len(data[0]): continue weights.append(tmp) ab = AdaBoost.AdaBoost(weightVector=weights) ab.fit(data, data_labels) predictTrain = ab.predict(data) predictTest = ab.predict(testset) print("Accuracy for training set:") print(Stat.Accuracy(predictTrain, data_labels)) print("F1 score for training set:") print(Stat.F1_Score(predictTrain, data_labels)) print("Precision for training set:") print(Stat.Precision(predictTrain, data_labels)) print("Recall for training set:")
# -*- coding: utf-8 -*- """ Created on Wed Oct 10 20:38:02 2018 @author: tf """ import AdaBoost import numpy as np #dataMat, labelMat = AdaBoost.loadDataSet() #print(dataMat, '\n', labelMat) #D = np.ones((5, 1)) / 5 #bestStump, minErr, bestClassEst = AdaBoost.buildStump(dataMat, labelMat, D) #print(bestStump, '\n', minErr, '\n', bestClassEst) #classifierArr = AdaBoost.adaBoostTrainDS(dataMat, labelMat) #print(classifierArr) #print(max(0.1,0.2)) #clas = AdaBoost.adaClassify(np.array([[5, 5], [0, 0]]), classifierArr) #print(clas) dataMat, labelMat = AdaBoost.loadFileDataSet('horseColicTraining2.txt') classifierArr = AdaBoost.adaBoostTrainDS(dataMat, labelMat) #print(classifierArr) testDataMat, testLabelMat = AdaBoost.loadFileDataSet('horseColicTest2.txt') errRate = AdaBoost.adaClassify(testDataMat, classifierArr, testLabelMat) print(errRate)
def main(number_of_stages, set_pos, set_neg): trainer = AdaBoost(set_pos, set_neg, number_of_stages) classifiers = trainer.train_simple() image = cv2.imread("1.pgm") print(StrongClassifier(classifiers, pgm_simplifier(image)).get_result())
max_feature_width = 10 #Loading faces with some basic check faces_training = es.load_images(pos_training_path) #iterating faces_ii_training = list(map(ii.to_integral_image, faces_training)) print('done faces loaded.') non_faces_training = es.load_images(neg_training_path) non_faces_ii_training = list(map(ii.to_integral_image, non_faces_training)) print('done non faces loaded.\n') # classifiers are haar like features classifiers = ab.learn(faces_ii_training, non_faces_ii_training, num_classifiers, min_feature_height, max_feature_height, min_feature_width, max_feature_width) print('Loading test faces..') faces_testing = es.load_images(pos_testing_path) faces_ii_testing = list(map(ii.to_integral_image, faces_testing)) print('..done faces loaded.\n') non_faces_testing = es.load_images(neg_testing_path) non_faces_ii_testing = list(map(ii.to_integral_image, non_faces_testing)) print('..done non faces loaded.\n') print('Testing selected classifiers..') correct_faces = 0 correct_non_faces = 0 # Classifies given list of integral images using classifiers, correct_faces = sum(es.ensemble_vote_all(faces_ii_testing, classifiers))
def __train(training_set): ensemble = ab.AdaBoostEnsemble() ensemble.train(training_set, len(training_set), ensemble_size, __train_tree, __evaluate_tree) return ensemble
# -*- coding: utf-8 -*- from numpy import * import AdaBoost as ab import LAB as lab if __name__ == '__main__': mylist = [] mytrain = [] # 载入网球 for index in range(5): arr = lab.load('B0_%d.jpg' % index) arr2 = lab.LAB(arr).flatten() # 转化后展平 mylist.append(arr2.tolist()) mytrain.append(1.) # 载入苹果 for index in range(5): arr = lab.load('B2_%d.jpg' % index) arr2 = lab.LAB(arr).flatten() # 转化后展平 mylist.append(arr2.tolist()) mytrain.append(-1.) # dataArr = matrix(mylist) classLabels = matrix(mytrain) D = mat(ones((10, 1)) / 10) classifierArray = ab.adaBoostTrainDS(dataArr, classLabels, 10) for x in classifierArray: print('alpha:', x['alpha'], 'dim:', x['dim'], 'thresh:', x['thresh'], 'ineq:', x['ineq'])
def reset_weight(Data): w0 = 1 / float(len(Data)) for row in Data: row['weight'] = w0 # Q2.a # Run AdaBoosting for T = 1-500, plot error-T relationship outFile = open("ada_out.txt", 'w') outFile.write("iter\terr_train\terr_test\n") for T in [ 1, 2, 3, 4, 5, 6, 8, 10, 15, 20, 30, 50, 70, 90, 120, 150, 200, 250, 300, 350, 400, 450, 500 ]: trees, alphas = AdaBoost.AdaBoost_train(Data_train, Attributes, Labels, T) hit_train = AdaBoost.AdaBoost_test(Data_train, trees, alphas) hit_test = AdaBoost.AdaBoost_test(Data_test, trees, alphas) outFile.write( str(T) + "\t" + str(1 - hit_train) + "\t" + str(1 - hit_test) + "\n") reset_weight(Data_train) outFile.close() # for T=500, find training and test error in each iteration e_t, e_r = AdaBoost.print_err_Ada(Data_train, Data_test, Attributes, Labels, 500) t = [i + 1 for i in range(0, 500)] fig, ax = plt.subplots(figsize=(6, 4)) ax.plot(t, e_r, label='test error', c='grey', alpha=0.3)
from AdaBoost import * from sys import argv x_train = argv[1] y_train = argv[2] x_test = argv[3] y_test = argv[4] T = int(argv[5]) feats = np.loadtxt(x_train, delimiter=',', dtype=np.float).transpose() labels = np.loadtxt(y_train, delimiter=',', dtype=np.int) ab_labels = np.array([1 if i == 1 else -1 for i in labels]) abs = [] for i in range(30): abs.append(AdaBoost(T, feats[i], ab_labels)) abs[-1].train() x_test = np.loadtxt(x_test, delimiter=',', dtype=np.float) y_test = np.loadtxt(y_test, delimiter=',', dtype=np.float) ab_y_test = np.array([1 if i == 1 else -1 for i in y_test]) predictions = [] for i in range(x_test.shape[0]): predictions.append( np.sign( np.sum([abs[j].eval(x_test[i][j]) for j in range(x_test.shape[1])]))) print("accuracy = {}".format( len(np.where(predictions == ab_y_test)[0]) / len(ab_y_test)))