def test_classify(): mydat, labels = creatDataSet() import treePlotter myTree = treePlotter.retrieveTree(0) print(myTree) print(classify(myTree, labels, [1, 0])) print(classify(myTree, labels, [1, 1]))
def test_store_tree(): mydat, labels = creatDataSet() import treePlotter myTree = treePlotter.retrieveTree(0) print(myTree) storeTree(myTree, 'classifierStorage.txt') grabTree('classifierStorage.txt')
def test(): dataSet, labels = createDataSet() # sdataSet = splitDataSet(dataSet, 0, 1) # ent = calcShannonEnt(dataSet) # bestFeature = chooseBestFeatureToSplit(dataSet) # myTree = createTree(dataSet, labels) myTree = treePlotter.retrieveTree(0) print(classify(myTree, labels, [1, 1]))
def main7(): ''' 生成决策树并存储为.txt 从.txt文件导入决策树 ''' import treePlotter myDat, labels = createDataSet() myTree = treePlotter.retrieveTree(0) print classify(myTree, labels, [1, 0]) storeTree(myTree, 'classifierStorage.txt') print grabTree('classifierStorage.txt')
import treePlotter as tp print tp.retrieveTree(0) print tp.retrieveTree(1) myTree = tp.retrieveTree(0) print tp.getNumLeafs(myTree) print tp.getTreeDepth(myTree) # tp.createPlot(myTree) tp.createPlot(tp.retrieveTree(1))
classLabel = secondDict[key] return classLabel def storeTreee(inputTree, filename): """存储树""" import pickle with open(filename, 'wb') as f: pickle.dump(inputTree, f) def grabTree(filename): """读取树""" import pickle with open(filename, 'rb') as f: return pickle.load(f) if __name__ == '__main__': # myDat, labels = createDataSet() # print(labels) myTree = tp.retrieveTree(0) print(myTree) # result = classify(myTree, labels, [1, 1]) # print(result) # storeTreee(myTree, 'classifierStorage.txt') tree = grabTree('classifierStorage.txt') print(tree)
# fr = open('lensesCN.txt') lenses = [unicode(inst, 'utf-8').strip().strip().split('\t') for inst in fr.readlines()] #lensesLabels = ["年龄组" , "规定", "闪光", "泪液扫除率"] lensesLabels = ['age' , 'prescript', 'astigmatic', 'tearRate'] lensesTree = tr.createTree(lenses,lensesLabels) print(lensesTree) tp.createPlot(lensesTree) dataSet, labels = tr.createDataSet() shannonEnt = tr.calcShannonEnt(dataSet) print(shannonEnt) print(tp.retrieveTree(1)) myTree = tp.retrieveTree(0) numLeafs = tp.getNumLeafs(myTree) treeDepth = tp.getTreeDepth(myTree) print(numLeafs) print(treeDepth) myTree = tp.retrieveTree(0) tp.createPlot(myTree) myTree['no surfacing'][3] = 'maybe' tp.createPlot(myTree)
splittedDat = DT.splitDataSet(myDat, 0, 1) # [[1, 'yes'], [1, 'yes'], [0, 'no']] splittedDat = DT.splitDataSet(myDat, 0, 0) # [[1, 'no'], [1, 'no']] bestFeature = DT.chooseBestFeatureToSplit(myDat) # 0 myTree = DT.createTree( myDat, labels ) # {'no surfacing': {0: 'no', 1: {'flippers': {0: 'no', 1: 'yes'}}}} import treePlotter as TP # TP.createPlot() myTree = TP.retrieveTree( 0) #{'no surfacing': {0: 'no', 1: {'flippers': {0: 'no', 1: 'yes'}}}} n = TP.getNumLeafs(myTree) # 3 d = TP.getTreeDepth(myTree) # 2 TP.createPlot(myTree) # classify myDat, labels = DT.createDataSet() myTree = TP.retrieveTree( 0) # {'no surfacing': {0: 'no', 1: {'flippers': {0: 'no', 1: 'yes'}}}} class1 = DT.classify(myTree, labels, [1, 0]) # no class2 = DT.classify(myTree, labels, [1, 1]) # yes # storing the tree pickeld form DT.storeTree(myTree, 'data/classifierStorage.txt') grabedTree = DT.grabTree(
fig = plt.figure(1, facecolor='white') fig.clf() createPlot.ax1 = plt.subplot(111, frameon=False) #绘制子图 plotNode('a decision node', (0.5, 0.1), (0.1, 0.5), decisionNode) plotNode('a leaf node', (0.8, 0.1), (0.3, 0.8), leafNode) plt.show() # 绘制树型的示例程序 createPlot() # 统计叶子个数和树的层数 import treePlotter myTree = treePlotter.retrieveTree(0) # 直接从上面的结果导入树结构 print('叶子节点数:', treePlotter.getNumLeafs(myTree)) # 统计叶子节点个数,以计算x轴长度 print('树的层数:', treePlotter.getTreeDepth(myTree)) # 统计树的层数,以计算y轴高度 # 绘制完整的决策树模型 print('绘制完整的决策树模型') treePlotter.createPlot(myTree) ### 应用数据构造决策树并用于预测 import W_tree import treePlotter myDat, labels = W_tree.createDataSet() myTree = treePlotter.retrieveTree(0) print('[1,0]的分类结果是', W_tree.classify(myTree, labels, [1, 0])) # [1,0]的分类结果是 no print('[1,1]的分类结果是', W_tree.classify(myTree, labels,
def test1(): mydat, labels = createDataSet() myTree = treePlotter.retrieveTree(0) print(myTree) print(classify(myTree, labels, [1, 1]))
import pickle fr = open(filename) return pickle.load(fr) if __name__ == "__main__": myDat, labels = createDataSet() # print clacShannonEnt(myDat) # print splitDataSet(myDat, 0, 1) # print splitDataSet(myDat, 0, 0) # print chooseBestFeatureToSplit(myDat) # myTree = createTree(myDat, labels) # print myTree from treePlotter import retrieveTree, createPlot # myTree = retrieveTree(0) # print myTree # print classify(myTree, labels, [1, 0]) # print classify(myTree, labels, [1, 1]) # storeTree(myTree, 'classifierStorage.txt') # print grabTree('classifierStorage.txt') fr = open('lenses.txt') lenses = [inst.strip().split("\t") for inst in fr.readlines()] lensesLabels = ['age', 'prescript', 'astigmatic', 'tearRate'] lensesTree = createTree(lenses, lensesLabels) print lensesTree createPlot(lensesTree)
import trees import treePlotter treePlotter.retrieveTree(1) myTree = treePlotter.retrieveTree(0) fr = open('lenses.txt') lenses=[inst.strip().split('\t') for inst in fr.readlines()] lensesLabels=['age', 'prescript', 'astigmatix', 'tearRate'] lensesTree = trees.createTree(lenses, lensesLabels) treePlotter.createPlot(lensesTree)
def testDumpAndLoadDecisionTree(): myTree = treePlotter.retrieveTree(0) storeTree(myTree, 'classTree.txt') print(grabTree('classTree.txt'))
firstStr = inputTree.keys()[0] secondDict = inputTree[firstStr] featIndex = featLabels.index(firstStr) #将标签字符串转换为索引 for key in secondDict.keys(): if testVec[featIndex] == key: if type(secondDict[key]).__name__ == 'dict': classLabel = classify(secondDict[key], featLabels, testVec) else: classLabel = secondDict[key] return classLabel myDat, labels = createDataSet() #myTree = createTree(myDat,labels) print(labels) print(treePlotter.retrieveTree(1)) print(treePlotter.retrieveTree(0)) myTree = treePlotter.retrieveTree(0) print(classify(myTree, labels, [1, 0])) print(classify(myTree, labels, [1, 1])) #treePlotter.createPlot() print('-------隐形眼镜数据-----') fr = open('/Users/wakemeup/Documents/MLiA/ch03/lenses.txt') lenses = [inst.strip().split('\t') for inst in fr.readlines()] lensesLabels = ['age', 'prescript', 'astigmatic', 'tearRate'] lensesTree = createTree(lenses, lensesLabels) print(lensesTree)
import pickle fw = open(filename, 'wb') pickle.dump(inputTree, fw) fw.close() def grabTree(filename): import pickle fr = open(filename, 'rb') return pickle.load(fr) if __name__ == "__main__": myDat, labels = createDataSet() print(labels) myTrees = tplt.retrieveTree(0) print(myTrees) print(classify(myTrees, labels, [1, 0])) print(classify(myTrees, labels, [1, 1])) storeTree(myTrees, "classifierStorage.txt") print("Saved!") gt = grabTree("classifierStorage.txt") print("Loaded!") print(gt) fr = open("lenses.txt") lenses = [inst.strip().split('\t') for inst in fr.readlines()] lensesLabels = ["age", "prescript", "astigmatic", "tearRate"] lenseTree = createTree(lenses, lensesLabels) print(lenseTree) tplt.createPlot(lenseTree)
# -*- coding: utf-8 -*- # 求使数据集熵最大的列 import trees ds, ls = trees.createDataSet() trees.chooseBestFeatureToSplit(ds) # 创建决策树 import trees ds, ls = trees.createDataSet() trees.createTree(ds, ls) # 绘制树 import treePlotter mt = treePlotter.retrieveTree(0) treePlotter.createPlot(mt) # 利用决策树判断分类 import trees import treePlotter it = treePlotter.retrieveTree(0) ds, ls = trees.createDataSet() trees.classify(it, ls, [0, 0]) # 序列化与反序列化决策树 import trees import treePlotter it = treePlotter.retrieveTree(0) trees.storeTree(it, 'classifierStorage.txt') ot = trees.grabTree('classifierStorage.txt')
def main(): dataSet, labels = createDataSet() myTree = treePlotter.retrieveTree() print classify(myTree, labels, [1, 0]) print classify(myTree, labels, [1, 1])
bestFeatLabel = labels[bestFeat] myTree = {bestFeatLabel:{}} del(labels[bestFeat]) #delete the best feature , so it can find the next best feature featValues = [example[bestFeat] for example in dataSet] uniqueVals = set(featValues) for value in uniqueVals: subLabels = labels[:] #copy all of labels, so trees don't mess up existing labels myTree[bestFeatLabel][value] = createTree(splitDataSet(dataSet, bestFeat, value),subLabels) return myTree #决策树的存储 def storeTree( inputTree, filename ): import json with open( filename, 'w') as f: f.write(json.dumps(inputTree)) def grabTree( filename ): import json f = open( filename) return json.loads(f.read()) # dataSet, labels = createDataSet() # tree = createTree(dataSet, labels) # storeTree(tree,'classifierStorage.txt') import treePlotter myTree = treePlotter.retrieveTree(0) treePlotter.createPlot(myTree)
# plt.xlabel('count') # plt.ylabel('result') # plt.title('Hahaha Goooood!!!') # fig.savefig('plot.svg') # import matplotlib # matplotlib.use('Agg') # import matplotlib.pyplot as plt # fig = plt.figure(1, facecolor='white') # fig.clf() # ax = plt.subplot(111, frameon=True) # # ax.scatter([.2, .5], [.1, .5]) # plt.figure(1, figsize=(3,3)) # ax = plt.subplot(111) # ax.annotate("Test", xy=(0.2, 0.2), xycoords='data', xytext=(0.8, 0.8), # textcoords='data', size=20, va="center", ha="center", # bbox=dict(boxstyle="round4", fc="w"), # arrowprops=dict(arrowstyle="-|>", # connectionstyle="arc3,rad=-0.2", fc="w"), ) # ax.annotate("This is my text", xy=(0.2, 0.1), xycoords='data', # xytext=(0.4, 0.3), textcoords='data', ha='center', va='center', # arrowprops=dict(arrowstyle="->", connectionstyle="arc3"), ) # fig.savefig('plot.svg') # import textPlotter # textPlotter.createPlot() import treePlotter treePlotter.createPlot(treePlotter.retrieveTree(0))
#输出手动创建的数据集,计算香农熵 myDat,labels=trees.createDataSet() print "myDat 数据集是:",myDat print "\nlabels 标签是:",labels rawCalc =trees.calcShannonEnt(myDat) print "\ncalcShannonEnt(myDat) 数据集的原始熵是:",rawCalc print "\ntrees.splitDataSet( myDat,1,1)将数据集的按 特征[1]=1(即 flippers==1) 提取出来的矩阵是:",trees.splitDataSet(myDat,1,1) # bestLabel = trees.chooseBestFeatureToSplit(myDat) print "\nchooseBestFeatureToSplit(myDat) 数据集的bestLabel最好特征的[下标]是:",bestLabel,"\tlabels[bestLabel]最好特征是:",labels[bestLabel] # myTree = trees.createTree(myDat,labels) print "\ntrees.createTree(myDat,labels) 根据数据集创建的树是:", myTree #读取预先存储的树[0] 并绘制图形 print "\n读取预先存储的树[0] 并绘制出第一个图形:" myTree0 = treePlotter.retrieveTree(0) treePlotter.createPlot(myTree0) #读取预先存储的树[1] 并绘制图形 print "\n读取预先存储的树[1] 并绘制出第二个图形:" myTree1 = treePlotter.retrieveTree(1) treePlotter.createPlot(myTree1) #change one date in "no surfacing" #and print ''' myTree['no surfacing'][3] = 'maybe' print('after change is:') print myTree treePlotter.createPlot(myTree)rag '''
#calculate shannonEnt print '-------------- calculate shannonEnt --------------------' shannonEnt = calcShannonEnt(myDat) print shannonEnt #split dataset print '-------------- split dataset --------------------' print splitDataSet(myDat,0,1) print splitDataSet(myDat,0,0) #get best feature print '-------------- best feature --------------------' print 'best feature:' ,chooseBestFeatureToSplit(myDat) print 'createTree ', createTree(myDat,labels) #plot trees print '-------------- plot-trees --------------------' myTree = tp.retrieveTree(0) print 'myTree ', myTree print 'labels', labels print 'numLeafs ', tp.getNumLeafs(myTree) print 'treeDepth ', tp.getTreeDepth(myTree) #tp.createPlot(myTree) #update dict and plot again #myTree['no surfacing'][3] = 'maybe' #tp.createPlot(myTree) #classify print '-------------- classify --------------------' myDat, labels = createDataSet() print 'labels', labels myTree = tp.retrieveTree(0)
import trees import treePlotter myDat, labels = trees.createDataSet() print myDat print trees.calcShannonEnt(myDat) print trees.splitDataSet(myDat, 0, 1) print trees.splitDataSet(myDat, 0, 0) print trees.splitDataSet(myDat, 1, 1) print trees.chooseBestFeatureToSplit(myDat) print trees.createTree(myDat, labels) treePlotter.createPlot() print 'createPlot over' print treePlotter.retrieveTree(1) myTree = treePlotter.retrieveTree(0) print treePlotter.getNumLeafs(myTree) print treePlotter.getTreeDepth(myTree)
import treePlotter if __name__ == '__main__': print treePlotter.retrieveTree(1) myTree = treePlotter.retrieveTree(1) print treePlotter.getNumLeafs(myTree) print treePlotter.getTreeDepth(myTree)
# -*- coding:utf-8 -*- import trees import treePlotter def createDataSet(): dataSet = [ [1, 1, 'yes'], [1, 1, 'yes'], [1, 0, 'no'], [0, 1, 'no'], [0, 1, 'no'] ] labels = ['no surfacing', 'flippers'] return dataSet, labels myDat, labels = createDataSet() print myDat # print trees.calcShannonEnt(myDat) #熵越高,则混合的数据越多,如果增加maybe分类myDat[0][-1]='maybe',则信息熵更高了 # print trees.splitDataSet(myDat, 0, 1) # 循环dataSet对象,对于每一个循环值,索引为0的值看是否和1相等,如果相等,返回后边的值 # print trees.chooseBestFeatureToSplit(myDat) # print trees.createTree(myDat, labels) myTree = treePlotter.retrieveTree(0) # 这是生成的决策树 print trees.classify(myTree, labels, [1, 1])