Пример #1
0
def demo():
    print '... demo'
    myDat, featNames = createDataSet()
    print myDat

    shannonEnt = decisionTree.calcShannonEnt(myDat)
    print '当前数据集的熵是: ', shannonEnt

    print '... 测试拆分数据集'
    print decisionTree.splitDataSet(myDat, 0, 1)

    print '... 测试最佳特征选择'
    bestFeature = decisionTree.chooseBestFeatureToSplit(myDat)
    print '最好的分类特征是 %s' % bestFeature

    print '... 测试决策树的生成'
    myTree = decisionTree.createTree(myDat, featNames)
    print '生成的决策树是: \n', myTree

    print '... 测试SortedCount'
    classList = ['a', 'b', 'b', 'c', 'e']
    print decisionTree.majorityCnt(classList)

    # print '... 测试绘制树节点'
    # plotDecisionTree.createPlot()

    print '... 测试绘制决策树'
    myTree = plotDecisionTree.retrieveTree(0)
    leafNums = decisionTree.getNumLeafs(myTree)
    treeDepth = decisionTree.getTreeDepth(myTree)
    print myTree
    print '叶子数量:%d, 树高度:%d' % (leafNums, treeDepth)

    # plotDecisionTree.createPlot(myTree)
    print '... 预测'
    featNames = ['no surfacing', 'flippers', 'fish']
    print decisionTree.classify(myTree, featNames, [1, 1])

    print '... 测试和读取决策树存储'
    decisionTree.storeTree(myTree, 'classfierStorage.txt')

    print decisionTree.grabTree('classfierStorage.txt')

    print '... 测试中文情况的决策树读取和存储'
    cnTree = plotDecisionTree.retrieveTree(2)
    print '存储前的决策树: \n', cnTree

    # 对于中文来说,正常的print只能打印出utf-8编码格式
    # 但是可以递归的打印字典 就可以输出中文
    decisionTree.storeTree(cnTree, 'cnTree.txt')
    print decisionTree.grabTree('cnTree.txt')
Пример #2
0
def randomForest(dataSet,
                 labels,
                 num_trees=2,
                 max_depth=None,
                 by='random',
                 max_features='log2'):
    decisionTrees = []
    for i in range(num_trees):
        subDataSet = subSample(dataSet)
        tree = createTree(subDataSet,
                          labels[:],
                          max_depth=max_depth,
                          by=by,
                          max_features=max_features)
        decisionTrees.append(tree)
    return decisionTrees
Пример #3
0
from decisionTree import createDataSet
from decisionTree import createTree
import matplotlib.pyplot as plt

dataSet, labels = createDataSet()

#决策树
decisionTree = createTree(dataSet, labels)

#决策节点样式为锯齿框
decisionNode = dict(boxstyle="sawtooth", fc="0.8")
#叶结点样式
leafNode = dict(boxstyle="round4", fc="0.8")
#箭头样式
arrow_args = dict(arrowstyle="<-")

def plotNode(nodeTxt, centerPt, parentPt, nodeType):
    createPlot.ax1.annotate(nodeTxt, xy=parentPt, xycoords="axes fraction",xytext=centerPt,\
    textcoords="axes fraction", va="center", ha="center", bbox=nodeType, arrowprops=arrow_args)
    
def createPlot():
    fig = plt.figure(1, facecolor="white")
    fig.clf()
    createPlot.ax1 = plt.subplot(111, frameon=False)
    plotNode("decisionNode", (0.5, 0.1), (0.1, 0.5), decisionNode)
    plotNode('leafNode', (0.8, 0.1), (0.3, 0.8), leafNode)
    plt.show()

def getNumLeafs(myTree):
    numLeafs = 0
    for key in myTree:
Пример #4
0
# coding:utf-8

# 对隐形眼镜的分类

import decisionTree
import plotDecisionTree
import trees

fr = open('lenses.txt')
lenses = [inst.strip().split('\t') for inst in fr.readlines()]
fr.close()

print type(lenses)
for i in range(len(lenses)):
    print lenses[i]

print '... end lenses'


lensesFeatName = ['age', 'prescript', 'astigmatic', 'tearRate', 'lenses type']


lensesTree = decisionTree.createTree(lenses, lensesFeatName)
print lensesTree



plotDecisionTree.createPlot(lensesTree)
Пример #5
0
#!/usr/bin/env python

import decisionTree as dt

def createDataset() :
	dataset = [[1, 1],
				[1, 1],
				[1, 0],
				[0, 1],
				[0, 1]]
	labels = ['yes', 'yes', 'no', 'no', 'no']
	return (dataset, labels)

if __name__ == '__main__' :
	(dataset, labels) = createDataset()
	labelName = ["no surface", "flipper"]

	tree = dt.createTree(dataset, labels, labelName)
	print(tree)

	labelName = ["no surface", "flipper"]
	label = dt.classify(tree, labelName, [0,0])
	print(label)