def main(): # createPlot() dataSet,labels = trees.createDataSet() labelsTmp = copy.deepcopy(labels) mytree = trees.createTree(dataSet,labelsTmp) print mytree print dataSet print labels print getNumLeafs(mytree) print getTreeDepth(mytree) # createPlot(mytree) print trees.classify(mytree,labels,[1,0])
sys.setdefaultencoding('utf8') print(sys.getdefaultencoding()) # fr = open('lensesCN.txt') lenses = [unicode(inst, 'utf-8').strip().strip().split('\t') for inst in fr.readlines()] #lensesLabels = ["年龄组" , "规定", "闪光", "泪液扫除率"] lensesLabels = ['age' , 'prescript', 'astigmatic', 'tearRate'] lensesTree = tr.createTree(lenses,lensesLabels) print(lensesTree) tp.createPlot(lensesTree) dataSet, labels = tr.createDataSet() shannonEnt = tr.calcShannonEnt(dataSet) print(shannonEnt) print(tp.retrieveTree(1)) myTree = tp.retrieveTree(0) numLeafs = tp.getNumLeafs(myTree) treeDepth = tp.getTreeDepth(myTree) print(numLeafs) print(treeDepth)
plotNode(firstStr, cntrPt, parentPt, decisionNode) secondDict = myTree[firstStr] plotTree.yOff = plotTree.yOff - 1.0/plotTree.totalD for key in secondDict.keys(): if type(secondDict[key]).__name__=='dict':#test to see if the nodes are dictonaires, if not they are leaf nodes plotTree(secondDict[key],cntrPt,str(key)) #recursion else: #it's a leaf node print the leaf node plotTree.xOff = plotTree.xOff + 1.0/plotTree.totalW plotNode(secondDict[key], (plotTree.xOff, plotTree.yOff), cntrPt, leafNode) plotMidText((plotTree.xOff, plotTree.yOff), cntrPt, str(key)) plotTree.yOff = plotTree.yOff + 1.0/plotTree.totalD #if you do get a dictonary you know it's a tree, and the first element will be another dict def createPlot(inTree): fig = plt.figure(1, facecolor='white') fig.clf() axprops = dict(xticks=[], yticks=[]) createPlot.ax1 = plt.subplot(111, frameon=False, **axprops) #no ticks #createPlot.ax1 = plt.subplot(111, frameon=False) #ticks for demo puropses plotTree.totalW = float(getNumLeafs(inTree)) plotTree.totalD = float(getTreeDepth(inTree)) plotTree.xOff = -0.5/plotTree.totalW; plotTree.yOff = 1.0; plotTree(inTree, (0.5,1.0), '') plt.show() # collect data myDat, labels = trees.createDataSet() mytree = trees.createTree(myDat, labels) #visualize decision tree createPlot(mytree)
import trees myDat, lables = trees.createDataSet() print("------ shannon ------") print(myDat) print(trees.calcShannonEnt(myDat)) # print("------ shannon after changed ------") # myDat[0][-1] = 'maybe' # print(myDat) # print(trees.calcShannonEnt(myDat)) print("------ split data set ------") print(trees.splitDataSet(myDat, 0, 1)) print(trees.splitDataSet(myDat, 0, 0)) print("------ choose best feature to split ------") print(trees.chooseBestFeatureToSplit(myDat)) print("------ create tree ------") tree = trees.createTree(myDat, lables) print(tree) print("------ test tree classify ------") print(trees.classify(tree, ['no surfacing', 'flippers'], [1, 0]))
# -*- coding:utf-8 -*- import trees myData,myLabels = trees.createDataSet() testLabels = myLabels.copy() print ('myData is ' , myData) #计算无序数据集的香农熵 #myShannonEnt = trees.calcShannonEnt(myData) #print ('myShannonEnt is ' , myShannonEnt ) ###测试划分数据集函数 #mySplitDat = trees.splitDataSet(myData, 1, 0) #print ('mySplitDat is ' , mySplitDat ) #myBestData = trees.chooseBestFeatureToSplit(myData) #print ('myBestData is ' , myBestData ) myTree = trees.createTree(myData, myLabels) print ('myTree is ' ,myTree) #测试训练集 print ('testLabels is ' ,testLabels) testResult = trees.classify(myTree, testLabels, [1,1]) print ('testResult is ' ,testResult) #trees.storeTree(myTree, 'classifierStorage.txt') fromFileTree = trees.grabTree('classifierStorage.txt') print ('fromFileTree is' , fromFileTree)
# This is a sample Python script. # Press Shift+F10 to execute it or replace it with your code. # Press Double Shift to search everywhere for classes, files, tool windows, actions, and settings. import trees # Press the green button in the gutter to run the script. if __name__ == '__main__': myDat, labels = trees.createDataSet() # 划分数据集 retDataSet = trees.splitDataSet(myDat, 0, 5) print(retDataSet) bestFeature = trees.chooseBestFeatureToSplit(myDat) print("best feature: %d" % bestFeature)
# -*- coding: utf-8 -*- # 求使数据集熵最大的列 import trees ds, ls = trees.createDataSet() trees.chooseBestFeatureToSplit(ds) # 创建决策树 import trees ds, ls = trees.createDataSet() trees.createTree(ds, ls) # 绘制树 import treePlotter mt = treePlotter.retrieveTree(0) treePlotter.createPlot(mt) # 利用决策树判断分类 import trees import treePlotter it = treePlotter.retrieveTree(0) ds, ls = trees.createDataSet() trees.classify(it, ls, [0, 0]) # 序列化与反序列化决策树 import trees import treePlotter it = treePlotter.retrieveTree(0) trees.storeTree(it, 'classifierStorage.txt') ot = trees.grabTree('classifierStorage.txt')
''' 决策树测试类 ''' import trees ''' dataSet,lables = trees.createDataSet() print(dataSet) print(lables) shannonEnt = trees.calcShannonEnt(dataSet) print(shannonEnt) ''' dataSet,labels = trees.createDataSet() tree = trees.createTree(dataSet,labels) d,l = trees.createDataSet() result = trees.classify(tree,l,[1,0]) print(result)
def testCreateDataSet(): myData, labels = trees.createDataSet() # print myData # print labels return myData, labels
import trees as tr """ 函数说明:按照给定特征划分数据集 Parameters: dataSet - 待划分的数据集 axis - 划分数据集的特征 value - 需要返回的特征的值 Returns: 无 Modify: 2020-04-11 """ def splitDataSet(dataSet, axis, value): retDataSet = [] # 创建返回的数据集列表 for featVec in dataSet: # 遍历数据集 if featVec[axis] == value: reducedFeatVec = featVec[:axis] # 去掉axis特征 reducedFeatVec.extend(featVec[axis + 1:]) # 将符合条件的添加到返回的数据集 retDataSet.append(reducedFeatVec) return retDataSet # 返回划分后的数据集 if __name__ == '__main__': dataSet, features = tr.createDataSet() print(splitDataSet(dataSet, 0, 1)) print(splitDataSet(dataSet, 0, 0))
def main(): import trees myDat, labels = trees.createDataSet() myTree = trees.createTree(myDat, labels) print(myTree)
# autor: zhumenger import trees myDat, lables = trees.createDataSet() print(myDat) print(lables) print(trees.calcShannonEnt(myDat))#返回期望值, 期望值越高,则混合的数据也越多 myDat[0][-1] = 'maybe' print(trees.calcShannonEnt(myDat)) #测试splitDataSet() print(trees.splitDataSet(myDat, 0, 1)) print(trees.splitDataSet(myDat, 0, 0)) trees.chooseBestFeatureToSplit(myDat) #寻找最好的划分方式 print(trees.chooseBestFeatureToSplit(myDat)) #得到按照第 0 个特征值进行划分的结果最好 #3-4: print(trees.createTree(myDat, lables))
inputTree - 已经生成的决策树 featLabels - 存储选择的最优特征标签 testVec - 测试数据列表,顺序对应最优特征标签 Returns: classLabel - 分类结果 Modify: 2020-04-11 """ def classify(inputTree, featLabels, testVec): firstStr = next(iter(inputTree)) # 获取决策树结点 secondDict = inputTree[firstStr] # 下一个字典 featIndex = featLabels.index(firstStr) for key in secondDict.keys(): if testVec[featIndex] == key: if type(secondDict[key]).__name__ == 'dict': classLabel = classify(secondDict[key], featLabels, testVec) else: classLabel = secondDict[key] return classLabel if __name__ == '__main__': dataSet, labels = tr.createDataSet() print(labels) myTree = at.retriveTree(0) print(myTree) print(classify(myTree, labels, [1, 0])) print(classify(myTree, labels, [1, 1]))
# -*- coding: utf-8 -*- """ Created on Wed May 23 11:35:31 2018 @author: lijie """ import trees import treePlotter def classify(inputTree, featLabels, testVec): firstSide = list(inputTree.keys()) firstStr = firstSide[0] secondDict = inputTree[firstStr] featIndex = featLabels.index(firstStr) for key in secondDict.keys(): if testVec[featIndex] == key: if type(secondDict[key]).__name__ == 'dict': classLabel = classify(secondDict[key], featLabels, testVec) else: classLabel = secondDict[key] return classLabel if __name__ == '__main__': #test tree = treePlotter.retrieveTree(0) dataset, labels = trees.createDataSet() a = classify(tree, labels, [1, 0]) print(a)