def test_store_load(self): dataSet, labels = trees.createDataSet() print("\n dataSet == %s" % (dataSet)) tree = trees.createTree(dataSet, labels) print("\n tree == %s" % (tree)) fileName = "./mytree.txt" trees.storeTree(tree, fileName) newTree = trees.grabTree(fileName) print("\n newTree == %s" % (newTree))
def main(): import trees import treePlotter myDat, labels = trees.createDataSet() myTree = trees.createTree(myDat, labels) # myTree = treePlotter.retrieveTree(1) treePlotter.createPlot(myTree, 'test.png') trees.storeTree(myTree, 'classifierStorage') myTree = trees.grabTree('classifierStorage') print(myTree)
def job_tree(): ''' 重新建进行预测的决策树 :param labels: :return: ''' fr = open(r'data/job_test.csv', encoding='UTF-8') listWm = [inst.strip().split('\t') for inst in fr.readlines()] labels = get_labels2() Trees = trees.createTree(listWm, labels) print("决策树:") print(json.dumps(Trees, ensure_ascii=False)) #保存树 fileName = r'data/tree.txt' trees.storeTree(Trees, fileName)
# -*- coding: utf-8 -*- import JobTree import trees import keras fileName = r'tree.txt' trees.storeTree(JobTree.Trees, fileName) # import json # print(json.dumps(trees.grabTree('job_tree.txt'), encoding="cp936", ensure_ascii=False)) import json print(json.dumps(trees.grabTree(fileName), ensure_ascii=False)) print("1代表熟练掌握,2代表精通,3代表熟悉,4代表了解")
#print(trees.splitDataSet(mydata,0,1)) index = trees.chooseBestFeatureToSplit(mydata) #print(index) ''' mytree = trees.createTree(mydata,features) print(mytree) ''' import treePlotter ''' mytree = treePlotter.retrieveTree(0) treePlotter.createPlot(mytree) mytree['no surfacing'][3] = 'maybe' treePlotter.createPlot(mytree) ''' mytree = treePlotter.retrieveTree(0) print(trees.classify(mytree,features,[0,0])) print(trees.classify(mytree,features,[1,1])) trees.storeTree(mytree, 'classifier.txt') grabtree = trees.grabTree('classifier.txt') print(grabtree) fr = open('lenses.txt') lense =[inst.strip().split('\t') for inst in fr.readlines()] lensefeatures = ['age', 'prescript', 'astigmatic', 'tearrate'] lensetree = trees.createTree(lense,lensefeatures) print(lensetree) treePlotter.createPlot(lensetree)
print "获取叶节点的数目:", treePlotter.getNumLeafs(myTree) print "获取树的层数:", treePlotter.getTreeDepth(myTree) treePlotter.createPlot(myTree) myTree['no surfacing'][3] = 'maybe' print "myTree:", myTree treePlotter.createPlot(myTree) #3.3.1 测试算法:使用决策树执行分类 myDat, labels = trees.createDataSet() print "labels:", labels myTree = treePlotter.retrieveTree(0) print "myTree:", myTree print "分类1:", trees.classify(myTree, labels, [1, 0]) print "分类2:", trees.classify(myTree, labels, [1, 1]) #3.3.2 决策树的存储 trees.storeTree(myTree, homedir + 'classifierStorage.txt') print "决策树调取:", trees.grabTree(homedir + 'classifierStorage.txt') print ":", print ":", #3.4 示例:使用决策树预测隐形眼镜类型 fr = open(homedir + 'lenses.txt') print 'fr:', fr lenses = [inst.strip().split('\t') for inst in fr.readlines()] print 'lenses:', lenses lensesLabels = [' age', 'prescript', 'astigmatic', 'tearRate'] print 'lensesLabels:', lensesLabels lensesTree = trees.createTree(lenses, lensesLabels) treePlotter.createPlot(lensesTree)
ig = shannon - hxy print ig print '--找到最佳分类特征' feature = trees.chooseBestFeatureToSplit(dateset) print labels[feature] print '--创建决策树' labelsCopy = labels[:] tree = trees.createTree(dateset, labelsCopy) print tree # print '--画图' # treePlotter.createPlot(tree) print '--用决策树测试数据' #mytree = treePlotter.retrieveTree(0) testdata = [4, 4, 1, 'cha'] label = trees.classify(tree, labels, testdata) print label print '--保存树' trees.storeTree(tree, 'houseTree') print '--测试隐形眼镜类型' fr = open('lenses.txt') # for line in fr.readlines(): # print line # row = line.strip().split('\t'); # print row lenses = [inst.strip().split('\t') for inst in fr.readlines()] len_labels = ['age', 'prescript', 'astigmatic', 'tearRate'] len_tree = trees.createTree(lenses, len_labels) print len_tree print len_labels treePlotter.createPlot(len_tree)
labels # 调用函数,指定列,划分数据用的 trees.splitDataSet(myData, 0, 1) trees.splitDataSet(myData, 0, 0) # 调用函数,获取最好的数据集划分方式,使用熵来计算 trees.chooseBestFeatureToSplit(myData) # 调用决策树函数 myTree = trees.createTree(myData, labels) myTree # 在测试数据上看决策树效果 myData, labels = trees.creatDataSet() trees.classify(myTree, labels, [1, 0]) trees.classify(myTree, labels, [1, 1]) # 调用存储决策树的函数 trees.storeTree(myTree, 'classifierStore.txt') # 调用存储为文件形式的决策树 trees.grabTree('classifierStore.txt') # 生成隐形眼镜的类型的函数 fr = open('lenses.txt') lenses = [inst.strip().split('\t') for inst in fr.readlines()] lensesLabels = ['age', 'prescript', 'astigmatic', 'tearRate'] lensesTree = trees.createTree(lenses, lensesLabels) lensesTree
# -*- coding: utf-8 -*- import treeplot import trees trees.storeTree(myTree , 'classifierStorage') print trees.grabTree('classifierStorage.txt') `
# main #读取眼镜数据并构建树 fr = open('lenses.txt') lenses = [inst.strip().split('\t') for inst in fr.readlines()] lensesLabels = ['age', 'prescript', 'astigmatic', 'tearRate'] lensesTree = createTree(lenses,lensesLabels) print(lensesTree) # plot tree tP.createPlot(lensesTree) #对新数据进行分类 lensesLabels = ['age', 'prescript', 'astigmatic', 'tearRate'] testVec=['young','hyper','yes','normal'] result=classify(lensesTree,lensesLabels, testVec) print(result) #存储构建的树并加载树 tr_f.storeTree(lensesTree,'ClassfyTree_lenses.txt') load_tree=tr_f.grabTree('ClassfyTree_lenses.txt') print(load_tree) # 原始数据集分类 #lensesLabels = ['age', 'prescript', 'astigmatic', 'tearRate'] #classify(lensesTree, lensesLabels, lenses[0][:-1]) # #preds = [] #for i in range(len(lenses)): # pred = classify(lensesTree, lensesLabels, lenses[i][:-1]) # preds.append(pred) #print(preds)
# myDat[0][-1] = 'maybe' # print(myDat) # print(trees.calcShannonEnt(myDat)) # 三个参数 数据集 要划分的特征 特征值 # 在数据集中找特征等于特征值的项 # print(trees.splitDataSet(myDat,0,1)) # 选择最适合分类的一个特征 # print(trees.chooseBestFeatureToSplit(myDat)) # 树结构 字典 # print(trees.createTree(myDat,labels)) # 画出树结构 # treePlotter.createPlot() # 树的节点数和深度 # print(treePlotter.getNumleafs(trees.createTree(myDat,labels))) # print(treePlotter.getTreeDepth(trees.createTree(myDat,labels))) # # treePlotter.createPlot(trees.createTree(myDat,labels)) # 测试分类器 myTree = {'no surfacing': {0: 'no', 1: {'flippers': {0: 'no', 1: 'yes'}}}} # print(trees.classify(myTree,labels,[1,1])) # 测试存储 读取决策树模型 trees.storeTree(myTree, 'testClassify.txt') print(trees.grabTree('testClassify.txt'))
0: 'no', 1: 'yes' } }, 1: 'no' } } } }] return listOfTrees[i] if __name__ == '__main__': import trees import treePlotter dataSet, labels = trees.createDataSet() myTree = trees.createTree(dataSet, labels) print(myTree) treePlotter.createPlot(myTree) fr = open('lenses.txt') lenses = [inst.strip().split('\t') for inst in fr.readlines()] lensesLabels = ['age', 'prescript', 'astigmatic', 'tearRate'] lensesTree = trees.createTree(lenses, lensesLabels) print(lensesTree) treePlotter.createPlot(lensesTree) trees.storeTree(lensesTree, 'test.txt') trees.grabTree('test.txt')
# -*- coding=utf-8 -*- """ make_lenses_tree Date: 16/2/27 Company: Copyright (c) 2016 Ninlgde co.,Ltd. All right reserved. """ import trees import treePlotter as tp __author__ = "Ninlgde" if __name__ == "__main__": fr = open("lenses.txt") lenses = [inst.strip().split("\t") for inst in fr.readlines()] lensesLabels = ["age", "prescript", "astigmatic", "tearRate"] lensesTree = trees.createTree(lenses, lensesLabels) trees.storeTree(lensesTree, "lenses_tree.bin") tree = trees.grabTree("lenses_tree.bin") tp.createPlot(tree)
import trees import treePlotter #将分类器存储到硬盘上,使其持久化 myDat, labels = trees.createDataSet() myTree = treePlotter.retrieveTree(0) trees.storeTree( myTree, 'F:\\python库包\机器学习实战源代码\machinelearninginaction\Ch03\classifierStorage.txt' ) trees.grabTree( 'F:\\python库包\机器学习实战源代码\machinelearninginaction\Ch03\classifierStorage.txt' ) fr = open('F:\\python库包\机器学习实战源代码\machinelearninginaction\Ch03\lenses.txt') lenses = [inst.strip().split('\t') for inst in fr.readlines()] lensesLabels = ['age', 'prescript', 'astigmatic', 'tearRate'] lensesTree = trees.createTree(lenses, lensesLabels) #创建决策树 print(lensesTree) treePlotter.createPlot(lensesTree) #画图
# name industry profession sex 摄影 自驾游 SNS达人 github 翻墙 常阅读 科幻迷 兴趣广泛 吹牛 分类 def getTrainingDatas(): dataSet = [ ["it", "gm", "man", 1, 1, 0, 0, 0, 0, 0, 1, 0, "liver"], ["it", "engineer", "man", 0, 1, 0, 0, 0, 0, 0, 0, 0, "empty"], ["it", "sale", "man", 0, 1, 0, 0, 0, 0, 0, 0, 1, "liver"], ["it", "founder", "man", 0, 1, 1, 0, 0, 0, 0, 1, 0, "boss"], ["it", "phd", "man", 1, 0, 0, 0, 0, 1, 0, 1, 0, "liver, fake hacker"], ["it", "engineer", "man", 0, 1, 0, 1, 1, 1, 1, 1, 0, "fake hacker"], ["it", "engineer", "man", 0, 0, 0, 0, 0, 1, 0, 1, 0, "fake hacker"], ["it", "engineer", "man", 0, 0, 0, 1, 1, 1, 1, 1, 0, "fake hacker"], ] labels = ["industry", "profession", "sex", "camera", "drive tour", "SNS", "github", "over GFW", "reader", "Science fiction fan","hobby","brag"] return dataSet, labels if __name__ == "__main__": if len(sys.argv) > 1: # classify test, tortoise classmate = ["tortoise", "it", "engineer", "man", 1, 0, 0, 0, 0, 1, 0, 1, 1] dataSet, labels = getTrainingDatas() tree = trees.grabTree("cm_tree.txt") print "{} is \"{}\"".format(classmate[0], trees.classify(tree, labels, classmate[1:])) else: # training dataSet, labels = getTrainingDatas() tree = trees.createTree(dataSet, list(labels)) trees.storeTree(tree, "cm_tree.txt") treePlotter.createPlot(tree)
import trees ds, ls = trees.createDataSet() trees.createTree(ds, ls) # 绘制树 import treePlotter mt = treePlotter.retrieveTree(0) treePlotter.createPlot(mt) # 利用决策树判断分类 import trees import treePlotter it = treePlotter.retrieveTree(0) ds, ls = trees.createDataSet() trees.classify(it, ls, [0, 0]) # 序列化与反序列化决策树 import trees import treePlotter it = treePlotter.retrieveTree(0) trees.storeTree(it, 'classifierStorage.txt') ot = trees.grabTree('classifierStorage.txt') # 隐形眼镜数据集测试 import trees import treePlotter fr = open('lenses.txt') ds = [example.strip().split("\t") for example in fr.readlines()] ls = ['age', 'prescript', 'antigmatic', 'tearRate'] mt = trees.createTree(ds, ls) treePlotter.createPlot(mt)
import treePlotter """ 绘制决策树: treePlotter.createPlot() """ """ 获取树的叶子数量和深度: myTree = treePlotter.retrieveTree(0) print(treePlotter.getNumLeafs(myTree)) print(treePlotter.getTreeDepth(myTree)) """ """ 生成决策图: myTree = treePlotter.retrieveTree(0) myTree['no surfacing'][3] = 'maybe' treePlotter.createPlot(myTree) """ """ 测试算法: myTree = treePlotter.retrieveTree(0) print(trees.classify(myTree,labels,[1,0])) print(trees.classify(myTree,labels,[1,1])) """ """ 使用pickle模块存储决策树 myTree = treePlotter.retrieveTree(0) trees.storeTree(myTree,'classifierStorage.txt') print(trees.grabTree('classifierStorage.txt')) """
# treePlotter.createPlot() ''' print(treePlotter.retrieveTree(1)) myTree = treePlotter.retrieveTree(0) print(treePlotter.getNumLeafs(myTree)) print(treePlotter.getTreeDepth(myTree)) ''' # 绘制树 ''' myTree = treePlotter.retrieveTree(0) # treePlotter.createPlot(myTree) # 没有坐标轴标签 myTree['no surfacing'][3] = 'maybe' print(myTree) treePlotter.createPlot(myTree) ''' # 使用pickle模块存储决策树 ''' myTree = treePlotter.retrieveTree(0) trees.storeTree(myTree, 'classifierStorage.txt') print(trees.grabTree('classifierStorage.txt')) ''' # 隐性眼镜数据集 fr = open('lenses.txt') lenses = [inst.strip().split('\t') for inst in fr.readlines()] lensesLabels = ['age', 'prescript', 'astigmatic', 'tearRate'] lensesTree = trees.createTree(lenses, lensesLabels) treePlotter.createPlot(lensesTree)
import math import operator import matplotlib.pyplot as plt import pickle import trees import treeplotter myDat,labels=trees.createDataSet() myTree=treeplotter.retrieveTree(0) trees.storeTree(myTree,'classifierstorage.txt') ans=trees.grabTree('classifierstorage.txt') with open('output.out','w') as f: f.write(str(ans))
print '决策树:' Tree = trees.createTree(dataset, labels) print Tree firstFeature = Tree.keys()[0] print firstFeature firstFeatureValues = Tree[firstFeature].keys() print firstFeatureValues print '\n' treePlotter.createPlot(Tree) testVec = ['pre', 'myope', 'yes', 'normal'] print '测试数据' print testVec labels.append('tearRate') print '匹配过程:' result = trees.classify(Tree, labels, testVec) print '匹配结果:' print result print '\n' # 把树存在磁盘中 print '将树存放磁盘...' trees.storeTree(Tree, 'myTree.txt') print '\n' # 从磁盘中取出树 print '再从磁盘中读取树:' print trees.grabTree('myTree.txt')
# _*_ coding:utf-8 _*_ import trees ''' mydat,labels=trees.createDataSet() result=trees.splitDataSet(mydat,0,1) print (result) ''' ''' mydat,labels=trees.createDataSet() print (trees.chooseBestFeatureToSplit(mydat)) ''' ''' mydat,labels=trees.createDataSet() mytree=trees.createTree(mydat,labels) print (mytree) ''' mydat, labels = trees.createDataSet() mytree = trees.retrieveTree(0) trees.storeTree(mytree, 'classStorage.txt') print(trees.grabTree('classStorage.txt'))
import treePlotter import trees myDat, labels = trees.createDataSet() myTree = trees.createTree(myDat, labels) print(myTree) treePlotter.createPlot(myTree) trees.storeTree(myTree, 'myStoreTree')
myTree = TP.retrieveTree( 0) #{'no surfacing': {0: 'no', 1: {'flippers': {0: 'no', 1: 'yes'}}}} n = TP.getNumLeafs(myTree) # 3 d = TP.getTreeDepth(myTree) # 2 TP.createPlot(myTree) # classify myDat, labels = DT.createDataSet() myTree = TP.retrieveTree( 0) # {'no surfacing': {0: 'no', 1: {'flippers': {0: 'no', 1: 'yes'}}}} class1 = DT.classify(myTree, labels, [1, 0]) # no class2 = DT.classify(myTree, labels, [1, 1]) # yes # storing the tree pickeld form DT.storeTree(myTree, 'data/classifierStorage.txt') grabedTree = DT.grabTree( 'data/classifierStorage.txt' ) # {'no surfacing': {0: 'no', 1: {'flippers': {0: 'no', 1: 'yes'}}}} # lens tree fr = open('data/lenses.txt') lenses = [inst.strip().split('\t') for inst in fr.readlines()] lensesLabels = ['age', 'prescript', 'astigmatic', 'tearRate'] lensesTree = DT.createTree(lenses, lensesLabels) """ output: {'tearRate': {'reduced': 'no lenses', 'normal': {'astigmatic': {'yes': {'prescript': {'myope': 'hard', 'hyper': {'age': {'young': 'hard', 'presbyopic': 'no lenses',
print mySplit1 shannonEntSplit1 = trees.calcShannonEnt(mySplit1) print(shannonEntSplit1) mySplit2 = trees.splitDataSet(myDat,0,0) print mySplit2 shannonEntSplit2 = trees.calcShannonEnt(mySplit2) print(shannonEntSplit2) mySplit3 = trees.splitDataSet(myDat,1,1) print mySplit3 shannonEntSplit3 = trees.calcShannonEnt(mySplit3) print(shannonEntSplit3) bestFeature = trees.chooseBestFeatureToSplit(myDat) print(bestFeature) myTree = trees.createTree(myDat,labels) print(myTree) import treePlotter myTree2 = treePlotter.retrieveTree(0) treePlotter.createPlot(myTree2) trees.storeTree(myTree2,'classifierStorage2.txt') print trees.grabTree('classifierStorage2.txt') fr=open('lenses.txt') lenses=[inst.strip().split('\t') for inst in fr.readlines()] lensesLabels = ['age','prescript','astigmatic','tearRate'] lensesTree=trees.createTree(lenses,lensesLabels) print lensesTree treePlotter.createPlot(lensesTree)
lenses = [inst.strip().split('\t') for inst in fr.readlines()] lensesLabels = ['age', 'presscript', 'astigmatic', 'tearRate'] labels = ['age', 'presscript', 'astigmatic', 'tearRate'] #{'tearRate': {'reduced': 'no lenses', # 'normal': {'astigmatic': {'yes': {'presscript': { # 'hyper': {'age': { # 'pre': 'no lenses', # 'presbyopic': 'no lenses', # 'young': 'hard'} # }, # 'myope': 'hard'}}, # 'no': {'age': {'pre': 'soft', # 'presbyopic': {'presscript': {'hyper': 'soft', # 'myope': 'no lenses'}}, # 'young': 'soft'} # }}}}} lensesTree = trees.createTree(lenses, labels) lensesTreeFileName = 'lensesTreeClassed.txt' trees.storeTree(lensesTree, lensesTreeFileName) print(lensesTree) #young hyper no normal soft # young hyper yes reduced no lenses # young hyper yes normal hard # pre myope no reduced no lenses # pre myope no normal soft lensesClass = trees.classify(lensesTree, lensesLabels, ['young', 'myope', 'yes', 'normal']) print(lensesClass) treePlotter.createPlot(lensesTree)