def main(): import adaboost from numpy import mat, ones datMat, classLabels = adaboost.loadSimpleData() D = mat(ones((5, 1)) / 5) bestStump, minError, bestClasEst = adaboost.buildStump( datMat, classLabels, D) classifierArr, aggClassEst = adaboost.adaBoostTrainDS(datMat, classLabels, 9) adaboost.adaClassify([[5, 5], [0, 0]], classifierArr)
def test(): datMat, classLabels = adaboost.loadSimpleData() print("dataMat: [%s] classLabels: [%s]" % (datMat, classLabels)) #adaboost.plt(datMat, classLabels) D = mat(ones((5,1))/5) bestStump, minError, bestClassEst = adaboost.buildStump(datMat, classLabels, D) print("bestStump: ", bestStump, " minError:", minError, " bestClasEst:", bestClassEst) classifierArray, classifierEst = adaboost.adaBoostTrainDS(datMat, classLabels, 9) print("classifierArray:", classifierArray) print(adaboost.adaClassify([0,0], classifierArray)) print(adaboost.adaClassify([[5,5],[0,0]], classifierArray))
def cross_validation(iter_): data_num, data_size = train_data_.shape fold_size = int(data_num / N_fold) residual = data_num - fold_size * N_fold min_error = math.inf train_data = np.zeros((data_num - fold_size, data_size)) train_label = np.zeros(data_num - fold_size) test_data = np.zeros((fold_size, data_size)) test_label = np.zeros(fold_size) total_error = 0 for i in range(0, N_fold): if i == 0: train_data[:, :] = train_data_[fold_size:, :] train_label[:] = train_label_[fold_size:] test_data[:, :] = train_data_[0:fold_size, :] test_label[:] = train_label_[0:fold_size] else: train_data[:, :] = np.append(train_data_[0:i * fold_size, :], train_data_[(i + 1) * fold_size:, :], axis=0) train_label[:] = np.append(train_label_[0:i * fold_size], train_label_[(i + 1) * fold_size:]) test_data[:, :] = train_data_[i * fold_size:(i + 1) * fold_size, :] test_label[:] = train_label_[i * fold_size:(i + 1) * fold_size] best = adaboost.Adaboost(train_data, train_label, 'validation', iter_) error = adaboost.adaClassify(test_data, test_label, 'validation', best) total_error = total_error + error CV_error = total_error / N_fold print('[Result] Cross-Validation Error of T =', iter_, 'is', CV_error) return iter_, CV_error
def test_loadDataSet(self): dataArr, labelArr = adaboost.loadDataSet('train.txt') print "[dataArr]", dataArr print "[labelArr]", labelArr classifierArray = adaboost.adaBoostTrainDS(dataArr, labelArr, 9) testArr, testLabelArr = adaboost.loadDataSet('test.txt') prediction10 = adaboost.adaClassify(testArr, classifierArray) errArr = mat(ones((67, 1))) print errArr[prediction10 != mat(testLabelArr).T].sum()
def main(): print '---------------------training--------------------' datArr, labelArr = loadDataSet('horseColicTraining2.txt') #the last input is the number of classifier classifierArray, aggClassEst= ab.adaBoostTrainDS(datArr, labelArr, 50) print '---------------------testing---------------------' testArr, testLabelArr = loadDataSet('horseColicTest2.txt') prediction10 = ab.adaClassify(testArr, classifierArray) errArr = mat(ones((67,1))) print 'error rate:', errArr[prediction10 != mat(testLabelArr).T].sum()/67
def main(): print '---------------------training--------------------' datArr, labelArr = loadDataSet('horseColicTraining2.txt') #the last input is the number of classifier classifierArray, aggClassEst = ab.adaBoostTrainDS(datArr, labelArr, 50) print '---------------------testing---------------------' testArr, testLabelArr = loadDataSet('horseColicTest2.txt') prediction10 = ab.adaClassify(testArr, classifierArray) errArr = mat(ones((67, 1))) print 'error rate:', errArr[prediction10 != mat(testLabelArr).T].sum() / 67
def testHolic(): datArr,labelArr = adaboost.loadDataSet('horseColicTraining2.txt') classifierArray, classifierEst = adaboost.adaBoostTrainDS(datArr, labelArr, 10) testArr, testLabelArr = adaboost.loadDataSet('horseColicTest2.txt') prediction10 = adaboost.adaClassify(testArr, classifierArray) print("prediction:", prediction10) errArr = mat(ones((67,1))) errCnt = errArr[prediction10 != mat(testLabelArr).T].sum() print("err count:%d error rate:%.2f" % (errCnt, float(errCnt)/67)) adaboost.plotROC(classifierEst.T, labelArr)
import adaboost datmat, classlabel=adaboost.loadSimpData() from numpy import * d = mat(ones((5,1))/5) #print(adaboost.buildStump(datmat,classlabel,d)) #classifier,aggClassEst = adaboost.adaBoostTrainDS(datmat,classlabel,9) #print(classifier) #print(aggClassEst) #print(adaboost.adaClassify([[0,0],[1,1]],classifier)) datarr,labelarr = adaboost.loadDataSet('horseColicTraining2.txt') classifier,aggClassEst = adaboost.adaBoostTrainDS(datarr,labelarr,40) testarr,testlabelarr = adaboost.loadDataSet('horseColicTest2.txt') prediction = adaboost.adaClassify(testarr,classifier) errarr = mat(ones((67,1))) print(errarr[prediction != mat(testlabelarr).T].sum()) adaboost.plotROC(aggClassEst.T,labelarr)
from adaboost import adaboost_trian,adaClassify from load_data import read_data if __name__ =='__main__': data,label = read_data() classifier_array = adaboost_trian(data,label,9) # print(classifier_array) re = adaClassify([[5,5],[0,0]],classifier_array) print(re)
# -*- coding: utf-8 -*- import adaboost from numpy import * dataArr, labelArr = adaboost.loadDataSet('horseColicTraining2.txt') classfierArray = adaboost.adaBoostTrainDS(dataArr, labelArr, 10) testArr, testlabelArr = adaboost.loadDataSet('horseColicTest2.txt') prediction10 = adaboost.adaClassify(testArr, testlabelArr)
import adaboost from numpy import * datMat, classLabels = adaboost.loadSimpData() D = mat(ones((5, 1)) / 5) print adaboost.buildStump(datMat, classLabels, D) classifierArray = adaboost.adaBoostTrainDS(datMat, classLabels, 9) print classifierArray datArr, labelArr = adaboost.loadSimpData() classifierArr = adaboost.adaBoostTrainDS(datArr, labelArr, 30) print adaboost.adaClassify([0, 0], classifierArr) print adaboost.adaClassify([[5, 5], [0, 0]], classifierArr)
# This is a sample Python script. # Press Shift+F10 to execute it or replace it with your code. # Press Double Shift to search everywhere for classes, files, tool windows, actions, and settings. import adaboost # Press the green button in the gutter to run the script. if __name__ == '__main__': datMat, classLabels = adaboost.loadSimpData() classifierArray, aggClassEst = adaboost.adaBoostTrainDS( datMat, classLabels, 30) # adaboost.adaClassify([0, 0], classifierArray) #一个数据 result = adaboost.adaClassify([[5, 5], [0, 0]], classifierArray) # 多个数据 print(result)
# dataArr=dataMat label = labelArr skf = StratifiedKFold(n_splits=10) for train, test in skf.split(dataArr, labelArr): print("%s %s" % (train, test)) train.tolist() train_in = dataArr[train] test_in = dataArr[test] train_out = label[train] test_out = label[test] train_in, train_out = RandomOverSampler().fit_sample( train_in, train_out) #训练集过采样,平衡样本 classifierArray, aggClassEst = adaboost.adaBoostTrainDS( train_in, train_out, 200) prediction_train, prob_train = adaboost.adaClassify( train_in, classifierArray) #测试训练集 prediction_test, prob_test = adaboost.adaClassify(test_in, classifierArray) #测试测试集 tmp_train, fp_train_tmp = adaboost.evaluatemodel(train_out, prediction_train, prob_train) #evaluate_train=np.array(evaluate_train); evaluate_train.extend(tmp_train) #训练集结果评估 fp_train.extend(fp_train_tmp) tmp_test, fp_test_tmp = adaboost.evaluatemodel(test_out, prediction_test, prob_test) evaluate_test.extend(tmp_test)
import adaboost from numpy import * ''' datMat, classLabels= adaboost.loadSimpData() D = mat(ones((5,1))/5) adaboost.draw(datMat,classLabels) weakClassArr = classifyierArray = adaboost.adaBoostTrainDS(datMat, classLabels, 9) #print('bestStump = ',bestStump) #print('minError = ',minError) #print('bestClasEst = ',bestClasEst) print('weakClassArr = ',weakClassArr) datToClass = [[5,5],[0,0]] adaboost.adaClassify(datToClass,weakClassArr) ''' dataMat, labelMat = adaboost.loadDataSet("horseColicTraining2.txt") weakClassArr, aggClassEst = adaboost.adaBoostTrainDS(dataMat, labelMat, 10) testMat, testlabelMat = adaboost.loadDataSet("horseColicTest2.txt") prediction = adaboost.adaClassify(testMat, weakClassArr) print("prediction= ", prediction) errorRate = adaboost.errorRate(testlabelMat, prediction) print("errorRate= ", errorRate) adaboost.plotROC(aggClassEst.T, labelMat)
datMat, classLabels = adaboost.loadSimpData() D = mat(ones((5, 1)) / 5) print "datMat:", datMat print "classLabels:", classLabels print "D:", D print ":", adaboost.buildStump(datMat, classLabels, D) #7.4 完整AdaBoost算法的实现 classifierArr = adaboost.adaBoostTrainDS(datMat, classLabels, 9) print "classifierArr:", classifierArr #7.5 测试算法:基于AdaBoost的分类 datMat, classLabels = adaboost.loadSimpData() classifierArr = adaboost.adaBoostTrainDS(datMat, classLabels, 30) print "分类1:", adaboost.adaClassify([0, 0], classifierArr) print "分类2:", adaboost.adaClassify([[5, 5], [0, 0]], classifierArr) #7.6 示例:在一个难数据集上应用AdaBoost datArr, labelArr = adaboost.loadDataSet(homedir + 'horseColicTraining2.txt') print "datArr:", datArr print "labelArr:", labelArr classifierArray = adaboost.adaBoostTrainDS(datArr, labelArr, 500) testArr, testLabelArr = adaboost.loadDataSet(homedir + 'horseColicTest2.txt') prediction10 = adaboost.adaClassify(testArr, classifierArray) errArr = mat(ones((67, 1))) errorNum = errArr[prediction10 != mat(testLabelArr).T].sum() errorrate = errorNum / 67 print "prediction10:", prediction10 print "errArr:", errArr print "errorrate:", errorrate
best_T) out_hypothesis = [] for i in range(best_T): out_hypothesis.append([ best_hypothesis[i]['iter'], best_hypothesis[i]['dim'], best_hypothesis[i]['thresh'], best_hypothesis[i]['inequal'], best_hypothesis[i]['alpha'] ]) with open(str(N_fold) + '_fold_output_AdaBoost_hypothesis_header.csv', 'w', newline='') as f: w = csv.writer(f) w.writerow([ 'iteration_index', 'attribute_index', 'threshold', 'direction', 'boosting_parameter' ]) w.writerows(out_hypothesis) train_accu, _ = adaboost.adaClassify(train_data_, train_label_, 'testing', best_hypothesis) test_accu, predict_output = adaboost.adaClassify(test_data, test_label, 'testing', best_hypothesis) predict_output = predict_output.tolist() with open(str(N_fold) + '_fold_predict_output', 'w', newline='') as f: w = csv.writer(f, delimiter=',') w.writerow(predict_output) print('Training Accuracy =', train_accu, '%') print('Testing Accuracy =', test_accu, '%')
#!/usr/bin/python from numpy import * import adaboost datMat, classLabels = adaboost.loadDataSet() #D = matrix(ones((5,1))/5) #adaboost.buildStump(datMat, classLabels, D) classifierArr = adaboost.adaBoostTrainDS(datMat, classLabels,9) adaboost.adaClassify([0,0],classifierArr)
for i in range(len(classifierArr)): classEst = stumpClassify( dataMatrix, classifierArr[i]['dim'], classifierArr[i]['thresh'], classifierArr[i]['ineq']) #call stump classify aggClassEst += classifierArr[i]['alpha'] * classEst print(aggClassEst) return sign(aggClassEst) if __name__ == "__main__": import numpy as np import adaboost dataMat, classLabels = loadSimpData() print(dataMat, "\n", classLabels) D = np.mat((np.ones((5, 1))) / 5) #print(D) print(adaboost.buildStump(dataMat, classLabels, D)) classifierArray = adaboost.adaBoostTrainDS(dataMat, classLabels, 9) print("\nclassifierArray:\n", classifierArray) print("\nadaboost.adaClassify([0, 0]:") print(adaboost.adaClassify([0, 0], classifierArray)) print("\nadaboost.adaClassify([[5,5],[0,0]]:") print(adaboost.adaClassify([[5, 5], [0, 0]], classifierArray)) datArr, labelArr = adaboost.loadDataSet('horseColicTraining2.txt') classifierArray = adaboost.adaBoostTrainDS(datArr, labelArr, 10) print(classifierArray)
@author: laiwei date: 2017年3月4日 ''' import adaboost from numpy import * #datMat, classLabels = adaboost.loadSimpData() #adaboost.plotData(datMat, classLabels) datMat, classLabels = adaboost.loadDataSet('horseColicTraining2.txt') #D = mat(ones((5, 1))/5) #bestStump,minError,bestClasEst = adaboost.buildStump(datMat, classLabels, D) #print(bestStump);print(minError);print(bestClasEst) weakClassArr, aggClassEst = adaboost.adaBoostTrainDS(datMat, classLabels, 37) #aggClassEst[0,0] = -0.2 #classLabels[0] = -1 #print(weakClassArr);print(aggClassEst) #print(adaboost.adaClassify([[0,0],[5,5]], weakClassArr)) # 当预测label按大小排序,对应真实label不是先全部-1,再全部+1,而是中间有错乱时,曲线下弯 #adaboost.plotROC(mat(classLabels), classLabels) adaboost.plotROC(aggClassEst.T, classLabels) testdatMat, testclassLabels = adaboost.loadDataSet('horseColicTest2.txt') testResult = adaboost.adaClassify(testdatMat, weakClassArr) errArr = mat(ones((len(testclassLabels), 1))) print(errArr[testResult != mat(testclassLabels).T].sum(), "of total", shape(testclassLabels))
# encoding=utf-8 import adaboost from numpy import * datMat, classLabels = adaboost.loadSimData() #print datMat #print classLabels D = mat(ones((5, 1)) / 5) bestStump, minError, bestClasEst = adaboost.buildStump(datMat, classLabels, D) #print bestStump #print minError #print bestClasEst classifierArray = adaboost.adaBoostTrainDS(datMat, classLabels, 30) adaboost.adaClassify([0, 0], classifierArray)
import adaboost from numpy import * ''' datMat, classLabels= adaboost.loadSimpData() D = mat(ones((5,1))/5) adaboost.draw(datMat,classLabels) weakClassArr = classifyierArray = adaboost.adaBoostTrainDS(datMat, classLabels, 9) #print('bestStump = ',bestStump) #print('minError = ',minError) #print('bestClasEst = ',bestClasEst) print('weakClassArr = ',weakClassArr) datToClass = [[5,5],[0,0]] adaboost.adaClassify(datToClass,weakClassArr) ''' dataMat, labelMat = adaboost.loadDataSet("horseColicTraining2.txt") classifyierArray = adaboost.adaBoostTrainDS(dataMat, labelMat, 10) testMat, testlabelMat = adaboost.loadDataSet("horseColicTest2.txt") prediction = adaboost.adaClassify(testMat, classifyierArray) print("prediction= ", prediction) errorRate = adaboost.errorRate(testlabelMat, prediction) print("errorRate= ", errorRate)
def AdaFeature(train_in, train_out, test_in): classifierArray, aggClassEst = adaboost.adaBoostTrainDS( train_in, train_out, 200) test_predict, prob_test = adaboost.adaClassify(test_in, classifierArray) # 测试测试集 return test_predict
# -*- coding:utf-8 -*- import adaboost from numpy import * myData,myLabels = adaboost.loadSimpData() ''' print ('myData is ' , myData) print ('myLabels is' , myLabels) D = mat(ones((5,1))/5) print ('D is', D) myBStump,myMError,myBCE = adaboost.buildStump(myData, myLabels, D) print ('myBStump is', myBStump) print ('myMError is', myMError) print ('myBCE is', myBCE) ''' classiFierArray,classEst = adaboost.adaBoostTrainDS(myData,myLabels,30) print ('classiFierArray is ',classiFierArray) aggClassEst = adaboost.adaClassify([[5,5],[0,0]], classiFierArray) print ('aggClassEst is ' , aggClassEst)
# train classifier classifierArr = adaboost.adaBoostTrainDS(mat(features), labels, 30) old_trading_day=trading_date_open[trading_date_open['calendarDate'] < trading_day]['calendarDate'].values[-1] predict_data = pd.read_csv('data/factor_old'+old_trading_day+'.csv') predict_data=predict_data.dropna() predict_data.iloc[:,3:]=predict_data.iloc[:,3:].rank(method='first').apply(lambda x : x/len(predict_data)) x=predict_data.iloc[:,3:].values.tolist() # predict y=adaboost.adaClassify(x,classifierArr) predict_label=predict_data.loc[:,['secID','tradeDate']] predict_label['pro']=y predict_label['label']=sign(y) buy=predict_label[predict_label['label'] == 1] buy=buy.sort(columns=['pro'],ascending=False)[:45] buy_list_one=buy['secID'].values.tolist() buy_list_all.append(buy_list_one) for i in range(len(buy_list_all)): buylist_one=buy_list_all[i] if trading_day < '2015-12-31': next_trading_day = monthend_day[monthend_day['calendarDate']>trading_day]['calendarDate'].values[0] buy_price_one = DataAPI.MktEqudAdjGet(tradeDate=trading_day,secID=buylist_one,field=["tradeDate", "secID", "ticker", "closePrice","isOpen"],pandas="1")
#!/usr/bin/env python #-*- coding:utf-8 -*- import adaboost from numpy import * #dataMat,classLabels=adaboost.loadSimData() # D=mat(ones((5,1))/5) # print D # # bestStump,minError,bestClasEst=adaboost.buildStump(dataMat,classLabels,D) # print bestStump # classifierArray=adaboost.adaBoostTrainDS(dataMat,classLabels,30) # print adaboost.adaClassify([[5,5],[0,0]],classifierArray) dataArr,labelArr=adaboost.loadDataSet('./dataSet/horseColicTraining2.txt') classifierArray,aggClassEst=adaboost.adaBoostTrainDS(dataArr,labelArr,10) adaboost.plotROC(aggClassEst.T,labelArr) testArr,testLabelArr=adaboost.loadDataSet('./dataSet/horseColicTest2.txt') prediction10=adaboost.adaClassify(testArr,classifierArray) errArr=mat(ones((67,1))) print errArr[prediction10!=mat(testLabelArr).T].sum()
import adaboost import matplotlib.pyplot as plt from numpy import * dataMat, classLabels = adaboost.loadSimpData() #plt.scatter(dataMat[:,0],dataMat[:,1]) # D是样本的权重矩阵 D = mat(ones((5, 1)) / 5) #adaboost.buildStump(dataMat,classLabels,D) print 'data train...' classifierArr = adaboost.adaBoostTrainDS(dataMat, classLabels, 30) print 'getClassifier:', classifierArr print 'data predict...' # 学习得到3个分类器,predict时,每一个分类器级联分类得到的预测累加值 # aggClassEst越来越远离0,也就是正越大或负越大,也就是分类结果越来越强 adaboost.adaClassify([[1, 0.8], [1.8, 2]], classifierArr) # 0,lt,1.3 1,lt,1.0 0,lt,0.9 plt.figure() I = nonzero(classLabels > 0)[0] plt.scatter(dataMat[I, 0], dataMat[I, 1], s=60, c=u'r', marker=u'o') I = nonzero(classLabels < 0)[0] plt.scatter(dataMat[I, 0], dataMat[I, 1], s=60, c=u'b', marker=u'o') plt.plot([1.32, 1.32], [0.5, 2.5]) plt.plot([0.5, 2.5], [1.42, 1.42]) plt.plot([0.97, 0.97], [0.5, 2.5]) ''' plt.figure() I = nonzero(classLabels>0)[0] plt.scatter(dataMat[I,0],dataMat[I,1],s=60,c=u'r',marker=u'o') I = nonzero(classLabels<0)[0]
#adaboost只能区分-1和1的标签 for ii in range(len(labelBrr)): if labelBrr[ii] == 0: labelBrr[ii] = -1 #adaboost只能区分-1和1的标签 train_in = dataArr.tolist() train_out = labelArr.tolist() test_in = dataBrr.tolist() test_out = labelBrr classifierArray, aggClassEst = adaboost.adaBoostTrainDS( train_in, train_out, 50) # prediction_train=adaboost.adaClassify(train_in,classifierArray);#测试训练集 prob = adaboost.adaClassify(test_in, classifierArray) #测试测试集 y_pred = sign(prob) score.append(prob) label.append(y_pred) # tmp_test=adaboost.evaluatemodel(test_out,y_pred); # evaluate_test.extend(tmp_test); # evaluate_test=np.array(evaluate_test); #混淆矩阵参数 tn, fp, fn, tp = confusion_matrix(test_out, y_pred).ravel() TPR = tp / (tp + fn) SPC = tn / (fp + tn) PPV = tp / (tp + fp) NPV = tn / (tn + fn)
import adaboost from numpy import * datMat, classLabels = adaboost.loadSimpData() D = mat(ones((5, 1))/5) print adaboost.buildStump(datMat, classLabels, D) classifierArray = adaboost.adaBoostTrainDS(datMat, classLabels, 9) print classifierArray datArr, labelArr = adaboost.loadSimpData() classifierArr = adaboost.adaBoostTrainDS(datArr, labelArr, 30) print adaboost.adaClassify([0, 0], classifierArr) print adaboost.adaClassify([[5, 5], [0, 0]], classifierArr)
#设置ROI区域 cv2.rectangle(frame,(int(3*frame.shape[1]/8),int(3*frame.shape[0]/8)),(int(5*frame.shape[1]/8),int(5*frame.shape[0]/8)),[0,0,255]) cv2.imshow('frame',frame) if (cv2.waitKey(1) == 27): capture.release() break cv2.destroyAllWindows() #截取ROI部分 img = frame[int(3*frame.shape[0]/8)+2:int(5*frame.shape[0]/8)-1,int(3*frame.shape[1]/8)+2:int(5*frame.shape[1]/8)-1,:].copy() #提取包围数字的最小ROI,转化为32*32大小,并存入一个向量之中 roi = cv.findROI(img) roi32 = cv.roiTo32(roi) vec1024 = cv.roi2Vect(roi32) #开始分类 print "Start finding..." predict = adaboost.adaClassify(vec1024,weakClassArr) if sign(predict) == sign(-1): print "识别结果:","0" else: print "识别结果:","非0" #显示32*32图像,并等待循环 cv2.imshow('roi',roi32) char = cv2.waitKey() cv2.destroyAllWindows() if char == 27 :break
import adaboost import matplotlib.pyplot as plt from numpy import * dataMat,classLabels = adaboost.loadSimpData() #plt.scatter(dataMat[:,0],dataMat[:,1]) # D是样本的权重矩阵 D = mat(ones((5,1))/5) #adaboost.buildStump(dataMat,classLabels,D) print 'data train...' classifierArr = adaboost.adaBoostTrainDS(dataMat,classLabels,30) print 'getClassifier:',classifierArr print 'data predict...' # 学习得到3个分类器,predict时,每一个分类器级联分类得到的预测累加值 # aggClassEst越来越远离0,也就是正越大或负越大,也就是分类结果越来越强 adaboost.adaClassify([[1,0.8],[1.8,2]],classifierArr) # 0,lt,1.3 1,lt,1.0 0,lt,0.9 plt.figure() I = nonzero(classLabels>0)[0] plt.scatter(dataMat[I,0],dataMat[I,1],s=60,c=u'r',marker=u'o') I = nonzero(classLabels<0)[0] plt.scatter(dataMat[I,0],dataMat[I,1],s=60,c=u'b',marker=u'o') plt.plot([1.32,1.32],[0.5,2.5]) plt.plot([0.5,2.5],[1.42,1.42]) plt.plot([0.97,0.97],[0.5,2.5]) ''' plt.figure() I = nonzero(classLabels>0)[0]
# -*- coding: utf-8 -*- import adaboost from numpy import * da, la = adaboost.loadDataSet('horseColicTraining.txt') ca = adaboost.adaBoostTrainDS(da, la, 10) tda, tla = adaboost.loadDataSet('horseColicTest.txt') prediction10 = adaboost.adaClassify(tda, ca) errArr = mat(ones((67, 1))) errArr[prediction10 != mat(tla).T].sum() reload(adaboost) da, la = adaboost.loadDataSet('horseColicTraining.txt') ca, ace = adaboost.adaBoostTrainDS(da, la, 40) adaboost.plotROC(ace.T, la)
classifierArr = adaboost.adaBoostTrainDS(mat(features), labels, 30) old_trading_day = trading_date_open[ trading_date_open['calendarDate'] < trading_day]['calendarDate'].values[-1] predict_data = pd.read_csv('data/factor_old' + old_trading_day + '.csv') predict_data = predict_data.dropna() predict_data.iloc[:, 3:] = predict_data.iloc[:, 3:].rank( method='first').apply(lambda x: x / len(predict_data)) x = predict_data.iloc[:, 3:].values.tolist() # predict y = adaboost.adaClassify(x, classifierArr) predict_label = predict_data.loc[:, ['secID', 'tradeDate']] predict_label['pro'] = y predict_label['label'] = sign(y) buy = predict_label[predict_label['label'] == 1] buy = buy.sort(columns=['pro'], ascending=False)[:45] buy_list_one = buy['secID'].values.tolist() buy_list_all.append(buy_list_one) for i in range(len(buy_list_all)): buylist_one = buy_list_all[i] if trading_day < '2015-12-31': next_trading_day = monthend_day[ monthend_day['calendarDate'] > trading_day]['calendarDate'].values[0] buy_price_one = DataAPI.MktEqudAdjGet(tradeDate=trading_day,
#!/usr/bin/env python3 # -*- coding: utf-8 -*- '7.6' __author__ = 'lxp' import adaboost import numpy as np datArr, labelArr = adaboost.loadDataSet('horseColicTraining2.txt') classifierArray = adaboost.adaBoostTrainDS(datArr, labelArr, 10) testArr, testLabelArr = adaboost.loadDataSet('horseColicTest2.txt') prediction10 = adaboost.adaClassify(testArr, classifierArray) errArr = np.mat(np.ones((67, 1))) print(errArr[prediction10 != np.mat(testLabelArr).T].sum())
datArr,labelArr=adaboost.loadDataSet('horseColicTraining2.txt') classifierArray,aggClassEst=adaboost.adaBoostTrainDS(datArr,labelArr,40) #print(classifierArray) #print(aggClassEst[0:10]) #print(shape(aggClassEst.T)) #sortedIndicies = aggClassEst.T.argsort() #print(shape(sortedIndicies)) #print(sortedIndicies[0,:10]) #print(sortedIndicies[0]) #print(len(classifierArray)) #adaboost.plotROC(aggClassEst.T,labelArr) ##利用测试集作检测 datatest,labeltest=adaboost.loadDataSet('horseColicTest2.txt') pre=adaboost.adaClassify(datatest,classifierArray) s=0 wrong=0 for i in range(len(pre)): s+=1 if pre[i]!=labeltest[i]: wrong+=1 print(wrong/s)