示例#1
0
 def test_store_load(self):
     dataSet, labels = trees.createDataSet()
     print("\n dataSet == %s" % (dataSet))
     tree = trees.createTree(dataSet, labels)
     print("\n tree == %s" % (tree))
     fileName = "./mytree.txt"
     trees.storeTree(tree, fileName)
     newTree = trees.grabTree(fileName)
     print("\n newTree == %s" % (newTree))
示例#2
0
def main():
    import trees
    import treePlotter
    myDat, labels = trees.createDataSet()
    myTree = trees.createTree(myDat, labels)
    # myTree = treePlotter.retrieveTree(1)
    treePlotter.createPlot(myTree, 'test.png')
    trees.storeTree(myTree, 'classifierStorage')
    myTree = trees.grabTree('classifierStorage')
    print(myTree)
示例#3
0
def job_tree():
    '''
    重新建进行预测的决策树
    :param labels:
    :return:
    '''
    fr = open(r'data/job_test.csv', encoding='UTF-8')

    listWm = [inst.strip().split('\t') for inst in fr.readlines()]
    labels = get_labels2()
    Trees = trees.createTree(listWm, labels)
    print("决策树:")
    print(json.dumps(Trees, ensure_ascii=False))
    #保存树
    fileName = r'data/tree.txt'
    trees.storeTree(Trees, fileName)
示例#4
0
# -*- coding: utf-8 -*-
import JobTree
import trees
import keras
fileName = r'tree.txt'
trees.storeTree(JobTree.Trees, fileName)
# import json
# print(json.dumps(trees.grabTree('job_tree.txt'), encoding="cp936", ensure_ascii=False))
import json
print(json.dumps(trees.grabTree(fileName), ensure_ascii=False))
print("1代表熟练掌握,2代表精通,3代表熟悉,4代表了解")

示例#5
0
#print(trees.splitDataSet(mydata,0,1))

index = trees.chooseBestFeatureToSplit(mydata)
#print(index)
'''
mytree = trees.createTree(mydata,features)
print(mytree)
'''
import treePlotter
'''
mytree = treePlotter.retrieveTree(0)
treePlotter.createPlot(mytree)
mytree['no surfacing'][3] = 'maybe'
treePlotter.createPlot(mytree)
'''

mytree = treePlotter.retrieveTree(0)
print(trees.classify(mytree,features,[0,0]))
print(trees.classify(mytree,features,[1,1]))

trees.storeTree(mytree, 'classifier.txt')
grabtree = trees.grabTree('classifier.txt')
print(grabtree)


fr = open('lenses.txt')
lense =[inst.strip().split('\t') for inst in fr.readlines()]
lensefeatures = ['age', 'prescript', 'astigmatic', 'tearrate']
lensetree = trees.createTree(lense,lensefeatures)
print(lensetree)
treePlotter.createPlot(lensetree)
print "获取叶节点的数目:", treePlotter.getNumLeafs(myTree)
print "获取树的层数:", treePlotter.getTreeDepth(myTree)
treePlotter.createPlot(myTree)
myTree['no surfacing'][3] = 'maybe'
print "myTree:", myTree
treePlotter.createPlot(myTree)

#3.3.1 测试算法:使用决策树执行分类
myDat, labels = trees.createDataSet()
print "labels:", labels
myTree = treePlotter.retrieveTree(0)
print "myTree:", myTree
print "分类1:", trees.classify(myTree, labels, [1, 0])
print "分类2:", trees.classify(myTree, labels, [1, 1])

#3.3.2  决策树的存储
trees.storeTree(myTree, homedir + 'classifierStorage.txt')
print "决策树调取:", trees.grabTree(homedir + 'classifierStorage.txt')
print ":",
print ":",

#3.4 示例:使用决策树预测隐形眼镜类型
fr = open(homedir + 'lenses.txt')
print 'fr:', fr
lenses = [inst.strip().split('\t') for inst in fr.readlines()]
print 'lenses:', lenses
lensesLabels = [' age', 'prescript', 'astigmatic', 'tearRate']
print 'lensesLabels:', lensesLabels
lensesTree = trees.createTree(lenses, lensesLabels)
treePlotter.createPlot(lensesTree)
示例#7
0
ig = shannon - hxy
print ig
print '--找到最佳分类特征'
feature = trees.chooseBestFeatureToSplit(dateset)
print labels[feature]
print '--创建决策树'
labelsCopy = labels[:]
tree = trees.createTree(dateset, labelsCopy)
print tree
# print '--画图'
# treePlotter.createPlot(tree)
print '--用决策树测试数据'
#mytree = treePlotter.retrieveTree(0)
testdata = [4, 4, 1, 'cha']
label = trees.classify(tree, labels, testdata)
print label
print '--保存树'
trees.storeTree(tree, 'houseTree')
print '--测试隐形眼镜类型'
fr = open('lenses.txt')
# for line in fr.readlines():
#     print line
#     row = line.strip().split('\t');
#     print row
lenses = [inst.strip().split('\t') for inst in fr.readlines()]
len_labels = ['age', 'prescript', 'astigmatic', 'tearRate']
len_tree = trees.createTree(lenses, len_labels)
print len_tree
print len_labels
treePlotter.createPlot(len_tree)
示例#8
0
labels

# 调用函数,指定列,划分数据用的
trees.splitDataSet(myData, 0, 1)
trees.splitDataSet(myData, 0, 0)

# 调用函数,获取最好的数据集划分方式,使用熵来计算
trees.chooseBestFeatureToSplit(myData)

# 调用决策树函数
myTree = trees.createTree(myData, labels)
myTree

# 在测试数据上看决策树效果
myData, labels = trees.creatDataSet()
trees.classify(myTree, labels, [1, 0])
trees.classify(myTree, labels, [1, 1])

# 调用存储决策树的函数
trees.storeTree(myTree, 'classifierStore.txt')

# 调用存储为文件形式的决策树
trees.grabTree('classifierStore.txt')

# 生成隐形眼镜的类型的函数
fr = open('lenses.txt')
lenses = [inst.strip().split('\t') for inst in fr.readlines()]
lensesLabels = ['age', 'prescript', 'astigmatic', 'tearRate']
lensesTree = trees.createTree(lenses, lensesLabels)
lensesTree
# -*- coding: utf-8 -*-
import treeplot
import trees
trees.storeTree(myTree , 'classifierStorage')
print trees.grabTree('classifierStorage.txt')
    `
示例#10
0

# main
  #读取眼镜数据并构建树
fr = open('lenses.txt')
lenses = [inst.strip().split('\t') for inst in fr.readlines()]
lensesLabels = ['age', 'prescript', 'astigmatic', 'tearRate']
lensesTree = createTree(lenses,lensesLabels)
print(lensesTree)

# plot tree
tP.createPlot(lensesTree)
#对新数据进行分类
lensesLabels = ['age', 'prescript', 'astigmatic', 'tearRate']
testVec=['young','hyper','yes','normal']
result=classify(lensesTree,lensesLabels, testVec)
print(result)

#存储构建的树并加载树
tr_f.storeTree(lensesTree,'ClassfyTree_lenses.txt')
load_tree=tr_f.grabTree('ClassfyTree_lenses.txt')
print(load_tree)
# 原始数据集分类
#lensesLabels = ['age', 'prescript', 'astigmatic', 'tearRate']
#classify(lensesTree, lensesLabels, lenses[0][:-1])
#
#preds = []
#for i in range(len(lenses)):
#    pred = classify(lensesTree, lensesLabels, lenses[i][:-1])
#    preds.append(pred)
#print(preds)
示例#11
0
# myDat[0][-1] = 'maybe'
# print(myDat)
# print(trees.calcShannonEnt(myDat))

# 三个参数 数据集 要划分的特征 特征值
# 在数据集中找特征等于特征值的项
# print(trees.splitDataSet(myDat,0,1))

# 选择最适合分类的一个特征
# print(trees.chooseBestFeatureToSplit(myDat))

# 树结构 字典
# print(trees.createTree(myDat,labels))

# 画出树结构
# treePlotter.createPlot()

# 树的节点数和深度
# print(treePlotter.getNumleafs(trees.createTree(myDat,labels)))
# print(treePlotter.getTreeDepth(trees.createTree(myDat,labels)))

#
# treePlotter.createPlot(trees.createTree(myDat,labels))

# 测试分类器
myTree = {'no surfacing': {0: 'no', 1: {'flippers': {0: 'no', 1: 'yes'}}}}
# print(trees.classify(myTree,labels,[1,1]))

# 测试存储 读取决策树模型
trees.storeTree(myTree, 'testClassify.txt')
print(trees.grabTree('testClassify.txt'))
                            0: 'no',
                            1: 'yes'
                        }
                    },
                    1: 'no'
                }
            }
        }
    }]
    return listOfTrees[i]


if __name__ == '__main__':
    import trees
    import treePlotter

    dataSet, labels = trees.createDataSet()
    myTree = trees.createTree(dataSet, labels)
    print(myTree)
    treePlotter.createPlot(myTree)

    fr = open('lenses.txt')
    lenses = [inst.strip().split('\t') for inst in fr.readlines()]
    lensesLabels = ['age', 'prescript', 'astigmatic', 'tearRate']
    lensesTree = trees.createTree(lenses, lensesLabels)
    print(lensesTree)
    treePlotter.createPlot(lensesTree)

    trees.storeTree(lensesTree, 'test.txt')
    trees.grabTree('test.txt')
示例#13
0
# -*- coding=utf-8 -*-
"""
make_lenses_tree
Date: 16/2/27
Company: Copyright (c) 2016 Ninlgde co.,Ltd. All right reserved.
"""

import trees
import treePlotter as tp

__author__ = "Ninlgde"

if __name__ == "__main__":
    fr = open("lenses.txt")
    lenses = [inst.strip().split("\t") for inst in fr.readlines()]
    lensesLabels = ["age", "prescript", "astigmatic", "tearRate"]
    lensesTree = trees.createTree(lenses, lensesLabels)
    trees.storeTree(lensesTree, "lenses_tree.bin")
    tree = trees.grabTree("lenses_tree.bin")
    tp.createPlot(tree)
示例#14
0
import trees
import treePlotter

#将分类器存储到硬盘上,使其持久化
myDat, labels = trees.createDataSet()
myTree = treePlotter.retrieveTree(0)
trees.storeTree(
    myTree,
    'F:\\python库包\机器学习实战源代码\machinelearninginaction\Ch03\classifierStorage.txt'
)
trees.grabTree(
    'F:\\python库包\机器学习实战源代码\machinelearninginaction\Ch03\classifierStorage.txt'
)

fr = open('F:\\python库包\机器学习实战源代码\machinelearninginaction\Ch03\lenses.txt')
lenses = [inst.strip().split('\t') for inst in fr.readlines()]
lensesLabels = ['age', 'prescript', 'astigmatic', 'tearRate']
lensesTree = trees.createTree(lenses, lensesLabels)  #创建决策树
print(lensesTree)
treePlotter.createPlot(lensesTree)  #画图
# name   industry   profession    sex    摄影     自驾游    SNS达人   github   翻墙   常阅读   科幻迷  兴趣广泛    吹牛    分类

def getTrainingDatas():
    dataSet = [
        ["it", "gm",       "man", 1, 1, 0, 0, 0, 0, 0, 1, 0, "liver"],
        ["it", "engineer", "man", 0, 1, 0, 0, 0, 0, 0, 0, 0, "empty"],
        ["it", "sale",     "man", 0, 1, 0, 0, 0, 0, 0, 0, 1, "liver"],
        ["it", "founder",  "man", 0, 1, 1, 0, 0, 0, 0, 1, 0, "boss"],
        ["it", "phd",      "man", 1, 0, 0, 0, 0, 1, 0, 1, 0, "liver, fake hacker"],
        ["it", "engineer", "man", 0, 1, 0, 1, 1, 1, 1, 1, 0, "fake hacker"],
        ["it", "engineer", "man", 0, 0, 0, 0, 0, 1, 0, 1, 0, "fake hacker"],
        ["it", "engineer", "man", 0, 0, 0, 1, 1, 1, 1, 1, 0, "fake hacker"],
    ]
    labels = ["industry", "profession", "sex", "camera", "drive tour", 
            "SNS", "github", "over GFW", "reader", "Science fiction fan","hobby","brag"]
    return dataSet, labels

if __name__ == "__main__":
    if len(sys.argv) > 1:
        # classify test, tortoise
        classmate = ["tortoise", "it", "engineer",       "man", 1, 0, 0, 0, 0, 1, 0, 1, 1]
        dataSet, labels = getTrainingDatas()
        tree = trees.grabTree("cm_tree.txt")
        print "{} is \"{}\"".format(classmate[0], trees.classify(tree, labels, classmate[1:]))
    else:
        # training
        dataSet, labels = getTrainingDatas()
        tree = trees.createTree(dataSet, list(labels))
        trees.storeTree(tree, "cm_tree.txt")
        treePlotter.createPlot(tree)
示例#16
0
import trees
ds, ls = trees.createDataSet()
trees.createTree(ds, ls)

# 绘制树
import treePlotter
mt = treePlotter.retrieveTree(0)
treePlotter.createPlot(mt)

# 利用决策树判断分类
import trees
import treePlotter
it = treePlotter.retrieveTree(0)
ds, ls = trees.createDataSet()
trees.classify(it, ls, [0, 0])

# 序列化与反序列化决策树
import trees
import treePlotter
it = treePlotter.retrieveTree(0)
trees.storeTree(it, 'classifierStorage.txt')
ot = trees.grabTree('classifierStorage.txt')

# 隐形眼镜数据集测试
import trees
import treePlotter
fr = open('lenses.txt')
ds = [example.strip().split("\t") for example in fr.readlines()]
ls = ['age', 'prescript', 'antigmatic', 'tearRate']
mt = trees.createTree(ds, ls)
treePlotter.createPlot(mt)
示例#17
0
import treePlotter
"""
绘制决策树:
treePlotter.createPlot()
"""
"""
获取树的叶子数量和深度:
myTree = treePlotter.retrieveTree(0)
print(treePlotter.getNumLeafs(myTree))
print(treePlotter.getTreeDepth(myTree))
"""
"""
生成决策图:
myTree = treePlotter.retrieveTree(0)
myTree['no surfacing'][3] = 'maybe'
treePlotter.createPlot(myTree)
"""
"""
测试算法:
myTree = treePlotter.retrieveTree(0)
print(trees.classify(myTree,labels,[1,0]))
print(trees.classify(myTree,labels,[1,1]))
"""
"""
使用pickle模块存储决策树
myTree = treePlotter.retrieveTree(0)
trees.storeTree(myTree,'classifierStorage.txt')
print(trees.grabTree('classifierStorage.txt'))
"""
示例#18
0
# treePlotter.createPlot()
'''
print(treePlotter.retrieveTree(1))
myTree = treePlotter.retrieveTree(0)
print(treePlotter.getNumLeafs(myTree))
print(treePlotter.getTreeDepth(myTree))
'''

# 绘制树
'''
myTree = treePlotter.retrieveTree(0)
# treePlotter.createPlot(myTree)      # 没有坐标轴标签
myTree['no surfacing'][3] = 'maybe'
print(myTree)
treePlotter.createPlot(myTree)
'''

# 使用pickle模块存储决策树
'''
myTree = treePlotter.retrieveTree(0)
trees.storeTree(myTree, 'classifierStorage.txt')
print(trees.grabTree('classifierStorage.txt'))
'''

# 隐性眼镜数据集
fr = open('lenses.txt')
lenses = [inst.strip().split('\t') for inst in fr.readlines()]
lensesLabels = ['age', 'prescript', 'astigmatic', 'tearRate']
lensesTree = trees.createTree(lenses, lensesLabels)
treePlotter.createPlot(lensesTree)
示例#19
0
import math
import operator
import matplotlib.pyplot as plt
import pickle
import trees
import treeplotter

myDat,labels=trees.createDataSet()
myTree=treeplotter.retrieveTree(0)
trees.storeTree(myTree,'classifierstorage.txt')
ans=trees.grabTree('classifierstorage.txt')

with open('output.out','w') as f:
	f.write(str(ans))
示例#20
0
print '决策树:'
Tree = trees.createTree(dataset, labels)
print Tree
firstFeature = Tree.keys()[0]
print firstFeature
firstFeatureValues = Tree[firstFeature].keys()
print firstFeatureValues
print '\n'

treePlotter.createPlot(Tree)

testVec = ['pre', 'myope', 'yes', 'normal']
print '测试数据'
print testVec
labels.append('tearRate')
print '匹配过程:'
result = trees.classify(Tree, labels, testVec)
print '匹配结果:'
print result
print '\n'

# 把树存在磁盘中
print '将树存放磁盘...'
trees.storeTree(Tree, 'myTree.txt')
print '\n'

# 从磁盘中取出树
print '再从磁盘中读取树:'
print trees.grabTree('myTree.txt')
示例#21
0
# _*_ coding:utf-8 _*_
import trees
'''
mydat,labels=trees.createDataSet()
result=trees.splitDataSet(mydat,0,1)
print  (result)

'''
'''
mydat,labels=trees.createDataSet()
print (trees.chooseBestFeatureToSplit(mydat))

'''
'''
mydat,labels=trees.createDataSet()
mytree=trees.createTree(mydat,labels)
print (mytree)
'''

mydat, labels = trees.createDataSet()
mytree = trees.retrieveTree(0)

trees.storeTree(mytree, 'classStorage.txt')
print(trees.grabTree('classStorage.txt'))
示例#22
0
import treePlotter
import trees

myDat, labels = trees.createDataSet()
myTree = trees.createTree(myDat, labels)
print(myTree)

treePlotter.createPlot(myTree)

trees.storeTree(myTree, 'myStoreTree')
示例#23
0
myTree = TP.retrieveTree(
    0)  #{'no surfacing': {0: 'no', 1: {'flippers': {0: 'no', 1: 'yes'}}}}
n = TP.getNumLeafs(myTree)  # 3
d = TP.getTreeDepth(myTree)  # 2

TP.createPlot(myTree)

# classify
myDat, labels = DT.createDataSet()
myTree = TP.retrieveTree(
    0)  # {'no surfacing': {0: 'no', 1: {'flippers': {0: 'no', 1: 'yes'}}}}
class1 = DT.classify(myTree, labels, [1, 0])  # no
class2 = DT.classify(myTree, labels, [1, 1])  # yes

# storing the tree pickeld form
DT.storeTree(myTree, 'data/classifierStorage.txt')
grabedTree = DT.grabTree(
    'data/classifierStorage.txt'
)  # {'no surfacing': {0: 'no', 1: {'flippers': {0: 'no', 1: 'yes'}}}}

# lens tree
fr = open('data/lenses.txt')
lenses = [inst.strip().split('\t') for inst in fr.readlines()]
lensesLabels = ['age', 'prescript', 'astigmatic', 'tearRate']
lensesTree = DT.createTree(lenses, lensesLabels)
"""
output:
{'tearRate': {'reduced': 'no lenses',
  'normal': {'astigmatic': {'yes': {'prescript': {'myope': 'hard',
      'hyper': {'age': {'young': 'hard',
        'presbyopic': 'no lenses',
示例#24
0
print mySplit1
shannonEntSplit1 = trees.calcShannonEnt(mySplit1)
print(shannonEntSplit1)
mySplit2 = trees.splitDataSet(myDat,0,0)
print mySplit2
shannonEntSplit2 = trees.calcShannonEnt(mySplit2)
print(shannonEntSplit2)
mySplit3 = trees.splitDataSet(myDat,1,1)
print mySplit3
shannonEntSplit3 = trees.calcShannonEnt(mySplit3)
print(shannonEntSplit3)

bestFeature = trees.chooseBestFeatureToSplit(myDat)
print(bestFeature)

myTree = trees.createTree(myDat,labels)
print(myTree)

import treePlotter
myTree2 = treePlotter.retrieveTree(0)
treePlotter.createPlot(myTree2)

trees.storeTree(myTree2,'classifierStorage2.txt')
print trees.grabTree('classifierStorage2.txt')

fr=open('lenses.txt')
lenses=[inst.strip().split('\t') for inst in fr.readlines()]
lensesLabels = ['age','prescript','astigmatic','tearRate']
lensesTree=trees.createTree(lenses,lensesLabels)
print lensesTree
treePlotter.createPlot(lensesTree)
示例#25
0
lenses = [inst.strip().split('\t') for inst in fr.readlines()]
lensesLabels = ['age', 'presscript', 'astigmatic', 'tearRate']
labels = ['age', 'presscript', 'astigmatic', 'tearRate']
#{'tearRate': {'reduced': 'no lenses',
#               'normal': {'astigmatic': {'yes': {'presscript': {
#                                                       'hyper': {'age': {
#                                                                      'pre': 'no lenses',
#                                                                       'presbyopic': 'no lenses',
#                                                                       'young': 'hard'}
#                                                                    },
#                                                       'myope': 'hard'}},
#                                         'no': {'age': {'pre': 'soft',
#                                                       'presbyopic': {'presscript': {'hyper': 'soft',
#                                                                                   'myope': 'no lenses'}},
#                                                        'young': 'soft'}
# }}}}}

lensesTree = trees.createTree(lenses, labels)
lensesTreeFileName = 'lensesTreeClassed.txt'
trees.storeTree(lensesTree, lensesTreeFileName)
print(lensesTree)
#young	hyper	no	normal	soft
# young	hyper	yes	reduced	no lenses
# young	hyper	yes	normal	hard
# pre	myope	no	reduced	no lenses
# pre	myope	no	normal	soft
lensesClass = trees.classify(lensesTree, lensesLabels,
                             ['young', 'myope', 'yes', 'normal'])
print(lensesClass)

treePlotter.createPlot(lensesTree)