# shannonEnt = Trees.calcShannonEnt(myDat) # print shannonEnt # retDataSet = Trees.splitDataSet(myDat, 0, 1) # print retDataSet # bestFeature = Trees.chooseBestFeatureToSplit(myDat) # print bestFeature # tree = Trees.createTree(myDat, labels) # print tree ''' # TreePlotter ''' # TreePlotter.createPlot() tree = TreePlotter.retrieveTree(0) # numLeafs = TreePlotter.getNumLeafs(tree) # depth = TreePlotter.getTreeDepth(tree) # print "leafs nums: %d, depth: %d" % (numLeafs, depth) TreePlotter.createPlot(tree) ''' # Classify ''' # myDat, labels = Trees.createDataSet() # myTree = TreePlotter.retrieveTree(0)
0: 'lef node', 1: { 'level 2': { 0: 'leaf node', 1: 'leaf node' } }, 2: { 'lead 2': { 0: 'leaf node', 1: 'leaf node' } } } } tp.createPlot(myTree) # 图与网格结构的可视化 data = np.mat([[0.1, 0.1], [0.9, 0.5], [0.3, 0.6], [0.7, 0.2], [0.1, 0.7], [0.5, 0.1]]) m, n = np.shape(data) fig = plt.figure() ax = fig.add_subplot(1, 1, 1) ax.scatter(data.T[0].tolist(), data.T[1].tolist(), color='blue', marker='o') for point in data.tolist(): plt.annotate("(" + str(point[0]) + "," + str(point[1]) + ")", xy=(point[0], point[1])) xList = [] yList = [] for px, py in zip(data.T.tolist()[0], data.T.tolist()[1]):
:param filename: 存储的文件 :return: """ import pickle with open(filename, 'w') as fw: pickle.dumps(input_tree, fw) def grab_tree(filename): """ 从文件中读取决策树 :param filename: 要读取的文件 :return: 决策树 """ import pickle with open(filename) as fr: return pickle.load(fr) if __name__ == '__main__': with open('lenses.txt') as fr: lenses = [inst.strip().split('\t') for inst in fr.readlines()] # 标签列表 lensesLabels = ['age', 'prescript', 'astigmatic', 'tearRate'] # 创建决策树 lensesTree = create_tree(lenses, lensesLabels) # 打印树 print(lensesTree) # 显示树形图 TreePlotter.create_plot(lensesTree)
pickle.dump(obj, file) file.close() def readDump(path): file = open(path, "rb") data = pickle.load(file) file.close() return data dtree = ID3Tree() if not os.path.exists(ID3SavePath): # 不是文件夹 print("生成数据") dtree.dataSet = loadDataSet(ID3LoadPath) dtree.labels = ["age", "revenue", "student", "credit"] # dtree.dataSet = dataSet # dtree.labels = labels print("训练数据") dtree.train() print("持久化数据") saveDump(ID3SavePath, dtree.getDumpData()) else: print("读取持久化") dtree.loadDumpData(readDump(ID3SavePath)) print("正在生成树") tp.createPlot(dtree.tree) print("预测结果为:", dtree.predict(dtree.tree, [0, 0, 0, 0]))
2. Prepare: Parse tab-delimited lines. 3. Analyze: Quickly review data visually to make sure it was parsed properly. The final tree will be plotted with createPlot(). 4. Train: Use createTree() from section 3.1. 5. Test: Write a function to descend the tree for a given instance. 6. Use: Persist the tree data structure so it can be recalled without building the tree; then use it in any application. ''' from numpy import * import LoadData as ld import DicisionTree as dts import TreePlotter as tplt dataSet, labels = ld.createDataSet('lenses.txt') #testLabels = zeros(len(labels),1)#也不能采用 #testLabels = labels# 不能采用 lensesTree = dts.createTree(dataSet, labels) print lensesTree tplt.createPlot(lensesTree) print labels #利用训练数据做测试数据 dataSet, testLabels = ld.createDataSet('lenses.txt') errorCount = 0 numTestVec = 0 for testVec in dataSet: numTestVec += 1.0 classLabel = dts.classify(lensesTree, testLabels, testVec) if classLabel != testVec[-1]: errorCount += 1 errorRate = (float(errorCount) / numTestVec) print "the error rate of this test is: %f" % errorRate
[u'T', u'D', u'P', 'yes'], [u'D', u'D', u'B', 'yes'], [u'D', u'N', u'B', 'yes'], [u'D', u'D', u'D', 'yes'], [u'D', u'D', u'D', 'yes'], [u'D', u'A', u'D', 'yes'], [u'T', u'D', u'P', 'yes'], [u'T', u'D', u'B', 'yes'], [u'D', u'D', u'D', 'yes'], [u'T', u'D', u'D', 'yes'], [u'T', u'N', u'B', 'yes'], [u'D', u'A', u'D', 'yes'], [u'T', u'D', u'D', 'yes'], [u'T', u'D', u'D', 'yes'], [u'D', u'D', u'D', 'yes'], [u'D', u'D', u'D', 'yes'], [u'T', u'D', u'D', 'yes'], [u'D', u'A', u'D', 'yes'], [u'D', u'D', u'D', 'yes'], [u'D', u'D', u'D', 'yes'], [u'D', u'A', u'D', 'yes'], [u'D', u'D', u'D', 'yes'], [u'D', u'D', u'D', 'yes'], [u'T', u'D', u'B', 'yes'], [u'T', u'D', u'P', 'yes'], [u'D', u'D', u'D', 'yes'], [u'D', u'D', u'D', 'yes'], [u'T', u'D', u'D', 'yes'], [u'T', u'D', u'D', 'yes'], [u'T', u'D', u'D', 'yes'], [u'D', u'D', u'D', 'yes'], [u'T', u'D', u'B', 'yes'], [u'T', u'P', u'B', 'yes'], [u'D', u'D', u'D', 'yes'], [u'D', u'A', u'P', 'yes'], [u'T', u'D', u'D', 'yes'], [u'D', u'D', u'P', 'yes'], [u'T', u'D', u'D', 'yes'], [u'T', u'D', u'D', 'yes'], [u'T', u'D', u'D', 'yes'], [u'T', u'D', u'D', 'yes'], [u'D', u'D', u'D', 'yes'], [u'T', u'N', u'B', 'yes'], [u'D', u'N', u'B', 'yes'], [u'T', u'D', u'D', 'yes'], [u'D', u'D', u'D', 'yes'], [u'D', u'D', u'D', 'yes'], [u'D', u'D', u'D', 'yes'], [u'D', u'A', u'D', 'yes'], [u'T', u'N', u'B', 'yes'] ] #[[1,1,'yes'],[1,1,'yes'],[1,0,'no'],[0,1,'no'],[0,1,'no']] labels = ['surfacing', 'flippers', 'ff'] myTree = CreatTree(dataset, labels) print myTree TreePlotter.CreatPlot(myTree)
import operator import os import Trees myData,labels=Trees.createDataSet() print(myData) print(labels) print(Trees.calcShannonEnt(myData)) myData[0][-1]='maybe' print(myData) print(labels) print(Trees.calcShannonEnt(myData)) myDat,labels=Trees.createDataSet() print(myDat) print(Trees.splitDataSet(myDat,0,1)) print(Trees.splitDataSet(myData,0,0)) print(Trees.chooseBestFeatureToSplit(myDat)) myTree=Trees.createTree(myDat,labels) print(myTree) import TreePlotter TreePlotter.createPlot()
fw.close() def grabTree(filename): import pickle fr = open(filename,"rb") return pickle.load(fr) dataSet,labels = createDataSet() # shannonEnt = calcShannoEnt(dataSet) # print(shannonEnt) # retDataSet = splitDataSet(dataSet,0,1) # 取出第0个特征为1的数据,并去掉该特征 # print(dataSet) # print(retDataSet) # bestFeature = chooseBestFeatureToSplit(dataSet) # print(bestFeature) # myTree = createTree(dataSet,labels) # print(myTree) # 使用算法 fr = open('/Users/lixiwei-mac/Documents/IdeaProjects/MachineLearningInAction/DecisionTree/lenses.txt') lenses = [inst.strip().split('\t') for inst in fr.readlines()] lensesLabels = ['age','prescript','astigmatic','tearRage'] lensesTree = createTree(lenses,lensesLabels) print(lensesTree) treePlotter.createPlot(lensesTree)
maxMJJ=610.0 minMVV=1000.0 maxMVV=7000.0 binsMJJ=290 binsMVV=160 cuts['acceptance']= "(jj_LV_mass>{minMVV}&&jj_LV_mass<{maxMVV}&&jj_l1_softDrop_mass>{minMJJ}&&jj_l1_softDrop_mass<{maxMJJ})".format(minMVV=minMVV,maxMVV=maxMVV,minMJJ=minMJJ,maxMJJ=maxMJJ) cuts['acceptanceGEN']='(jj_l1_gen_softDrop_mass>0&&jj_gen_partialMass>0)' cuts['nonres'] = '1' cut='*'.join([cuts['common'],cuts['nonres'], '(jj_l1_softDrop_mass>30&&jj_l1_softDrop_mass<610)','(jj_LV_mass>1000&&jj_LV_mass<7000)',cuts['HP'],'(jj_l1_gen_softDrop_mass>0&&jj_gen_partialMass>0)']) dataPlotters=[] dataPlottersNW=[] dataPlotters.append(TreePlotter(fromsample+'.root','tree')) dataPlotters[-1].setupFromFile(fromsample+'.pck') dataPlotters[-1].addCorrectionFactor('xsec','tree') dataPlotters[-1].addCorrectionFactor('genWeight','tree') dataPlotters[-1].addCorrectionFactor('puWeight','tree') data=MergedPlotter(dataPlotters) sampleHisto=dataPlotters[0].drawTH2("jj_l1_gen_softDrop_mass:jj_LV_mass",cut,"1",binsMVV,minMVV,maxMVV,binsMJJ,minMJJ,maxMJJ,"M_{qV} mass","GeV","Softdrop mass","GeV","COLZ" ) sampleHisto1Dmjet=dataPlotters[0].drawTH1('jj_l1_softDrop_mass',cut,"1",binsMJJ,minMJJ,maxMJJ) sampleHisto1Dmjet.Scale(1/sampleHisto1Dmjet.Integral()) print sampleHisto
# 获取信息增益最大的特征及其增益 highest_gain_feature, highest_gain = get_feature_with_highest_Gain( data_set) # 增益小于ε,单一节点,返回实例数最大的类 if highest_gain < eps: return get_most_common_class(data_set) # 构建树 decision_tree_dict = {highest_gain_feature: {}} # 对每个最高增益特征的取值进行分割数据集,并进行递归调用生成树 feature_values = set(data_set[highest_gain_feature]) for one_value in feature_values: # 分割D divided_data_set = data_set[data_set[highest_gain_feature] == one_value] # 去除列,A = A - {Ak}i divided_data_set = divided_data_set.drop(labels=highest_gain_feature, axis=1) # 生成子树 decision_tree_dict[highest_gain_feature][ one_value] = generate_decision_tree(divided_data_set, eps) return decision_tree_dict if __name__ == '__main__': data_set = init_data('resources/lenses.txt') decision_tree = generate_decision_tree(data_set, eps=0.0001) print(decision_tree) TreePlotter.createPlot(decision_tree) # print(data_set[(data_set['tearRate'] == 'normal') & (data_set['astigmatic'] == 'yes') & (data_set['prescript'] == 'myope')]) # print(data_set[data_set['tearRate'] == 'reduced'])
def lensesStudy(filepath): fr = open(filepath) lenses = [inst.strip().split("\t") for inst in fr.readlines()] lensesLabels = ['age', 'prescript', 'astigmatic', 'tearRate'] lensesTree = createTree(lenses, lensesLabels) TreePlotter.createPlot(lensesTree)