Пример #1
0
# shannonEnt = Trees.calcShannonEnt(myDat)
# print shannonEnt

# retDataSet = Trees.splitDataSet(myDat, 0, 1)
# print retDataSet

# bestFeature = Trees.chooseBestFeatureToSplit(myDat)
# print bestFeature

# tree = Trees.createTree(myDat, labels)
# print tree
'''

# TreePlotter
'''
# TreePlotter.createPlot()

tree = TreePlotter.retrieveTree(0)

# numLeafs = TreePlotter.getNumLeafs(tree)
# depth = TreePlotter.getTreeDepth(tree)
# print "leafs nums: %d, depth: %d" % (numLeafs, depth)

TreePlotter.createPlot(tree)
'''

# Classify
'''
# myDat, labels = Trees.createDataSet()
# myTree = TreePlotter.retrieveTree(0)
Пример #2
0
        0: 'lef node',
        1: {
            'level 2': {
                0: 'leaf node',
                1: 'leaf node'
            }
        },
        2: {
            'lead 2': {
                0: 'leaf node',
                1: 'leaf node'
            }
        }
    }
}
tp.createPlot(myTree)

# 图与网格结构的可视化
data = np.mat([[0.1, 0.1], [0.9, 0.5], [0.3, 0.6], [0.7, 0.2], [0.1, 0.7],
               [0.5, 0.1]])
m, n = np.shape(data)
fig = plt.figure()
ax = fig.add_subplot(1, 1, 1)
ax.scatter(data.T[0].tolist(), data.T[1].tolist(), color='blue', marker='o')

for point in data.tolist():
    plt.annotate("(" + str(point[0]) + "," + str(point[1]) + ")",
                 xy=(point[0], point[1]))
xList = []
yList = []
for px, py in zip(data.T.tolist()[0], data.T.tolist()[1]):
Пример #3
0
2. Prepare: Parse tab-delimited lines.
3. Analyze: Quickly review data visually to make sure it was parsed properly. The final
tree will be plotted with createPlot().
4. Train: Use createTree() from section 3.1.
5. Test: Write a function to descend the tree for a given instance.
6. Use: Persist the tree data structure so it can be recalled without building the
tree; then use it in any application.
'''
from numpy import *
import LoadData as ld
import DicisionTree as dts
import TreePlotter as tplt
dataSet, labels = ld.createDataSet('lenses.txt')
#testLabels = zeros(len(labels),1)#也不能采用
#testLabels =  labels# 不能采用
lensesTree = dts.createTree(dataSet, labels)
print lensesTree
tplt.createPlot(lensesTree)
print labels
#利用训练数据做测试数据
dataSet, testLabels = ld.createDataSet('lenses.txt')
errorCount = 0
numTestVec = 0
for testVec in dataSet:
    numTestVec += 1.0
    classLabel = dts.classify(lensesTree, testLabels, testVec)
    if classLabel != testVec[-1]:
        errorCount += 1
errorRate = (float(errorCount) / numTestVec)
print "the error rate of this test is: %f" % errorRate
Пример #4
0
# shannonEnt = Trees.calcShannonEnt(myDat)
# print shannonEnt

# retDataSet = Trees.splitDataSet(myDat, 0, 1)
# print retDataSet

# bestFeature = Trees.chooseBestFeatureToSplit(myDat)
# print bestFeature

# tree = Trees.createTree(myDat, labels)
# print tree
'''

# TreePlotter
'''
# TreePlotter.createPlot()

tree = TreePlotter.retrieveTree(0)

# numLeafs = TreePlotter.getNumLeafs(tree)
# depth = TreePlotter.getTreeDepth(tree)
# print "leafs nums: %d, depth: %d" % (numLeafs, depth)

TreePlotter.createPlot(tree)
'''

# Classify
'''
# myDat, labels = Trees.createDataSet()
# myTree = TreePlotter.retrieveTree(0)
Пример #5
0
#encoding=utf-8
import TreePlotter
import Decide_Tree_library
fr = open("lenses.txt")
lenses = [line.strip().split("\t")
          for line in fr.readlines()]  #readlines返回的是每一个列表
#变量列表元素,用strip形成新的列表作为主列表元素
lensesLabels = ["age", "prescript", "astigmatic", "tearrate"]
lensesTree = Decide_Tree_library.createTree(lenses, lensesLabels)
print lensesTree
TreePlotter.createPlot(lensesTree)
Пример #6
0
    pickle.dump(obj, file)
    file.close()


def readDump(path):
    file = open(path, "rb")
    data = pickle.load(file)
    file.close()
    return data


dtree = ID3Tree()
if not os.path.exists(ID3SavePath):  # 不是文件夹
    print("生成数据")
    dtree.dataSet = loadDataSet(ID3LoadPath)
    dtree.labels = ["age", "revenue", "student", "credit"]
    # dtree.dataSet = dataSet
    # dtree.labels = labels
    print("训练数据")
    dtree.train()
    print("持久化数据")
    saveDump(ID3SavePath, dtree.getDumpData())

else:
    print("读取持久化")
    dtree.loadDumpData(readDump(ID3SavePath))

print("正在生成树")
tp.createPlot(dtree.tree)
print("预测结果为:", dtree.predict(dtree.tree, [0, 0, 0, 0]))
Пример #7
0
Файл: 3.py Проект: niumeng07/ML
import operator
import os

import Trees
myData,labels=Trees.createDataSet()
print(myData)
print(labels)
print(Trees.calcShannonEnt(myData))

myData[0][-1]='maybe'
print(myData)
print(labels)
print(Trees.calcShannonEnt(myData))

myDat,labels=Trees.createDataSet()
print(myDat)
print(Trees.splitDataSet(myDat,0,1))
print(Trees.splitDataSet(myData,0,0))

print(Trees.chooseBestFeatureToSplit(myDat))

myTree=Trees.createTree(myDat,labels)
print(myTree)

import TreePlotter

TreePlotter.createPlot()



    fw.close()

def grabTree(filename):
    import pickle
    fr = open(filename,"rb")
    return pickle.load(fr)


dataSet,labels = createDataSet()
# shannonEnt = calcShannoEnt(dataSet)
# print(shannonEnt)

# retDataSet = splitDataSet(dataSet,0,1)                      # 取出第0个特征为1的数据,并去掉该特征
# print(dataSet)
# print(retDataSet)

# bestFeature = chooseBestFeatureToSplit(dataSet)
# print(bestFeature)

# myTree = createTree(dataSet,labels)
# print(myTree)

# 使用算法
fr = open('/Users/lixiwei-mac/Documents/IdeaProjects/MachineLearningInAction/DecisionTree/lenses.txt')
lenses = [inst.strip().split('\t') for inst in  fr.readlines()]
lensesLabels = ['age','prescript','astigmatic','tearRage']
lensesTree = createTree(lenses,lensesLabels)
print(lensesTree)
treePlotter.createPlot(lensesTree)

Пример #9
0
    # 获取信息增益最大的特征及其增益
    highest_gain_feature, highest_gain = get_feature_with_highest_Gain(
        data_set)
    # 增益小于ε,单一节点,返回实例数最大的类
    if highest_gain < eps:
        return get_most_common_class(data_set)
    # 构建树
    decision_tree_dict = {highest_gain_feature: {}}
    # 对每个最高增益特征的取值进行分割数据集,并进行递归调用生成树
    feature_values = set(data_set[highest_gain_feature])
    for one_value in feature_values:
        # 分割D
        divided_data_set = data_set[data_set[highest_gain_feature] ==
                                    one_value]
        # 去除列,A = A - {Ak}i
        divided_data_set = divided_data_set.drop(labels=highest_gain_feature,
                                                 axis=1)
        # 生成子树
        decision_tree_dict[highest_gain_feature][
            one_value] = generate_decision_tree(divided_data_set, eps)
    return decision_tree_dict


if __name__ == '__main__':
    data_set = init_data('resources/lenses.txt')
    decision_tree = generate_decision_tree(data_set, eps=0.0001)
    print(decision_tree)
    TreePlotter.createPlot(decision_tree)
    # print(data_set[(data_set['tearRate'] == 'normal') & (data_set['astigmatic'] == 'yes') & (data_set['prescript'] == 'myope')])
    # print(data_set[data_set['tearRate'] == 'reduced'])
Пример #10
0
def lensesStudy(filepath):
    fr = open(filepath)
    lenses = [inst.strip().split("\t") for inst in fr.readlines()]
    lensesLabels = ['age', 'prescript', 'astigmatic', 'tearRate']
    lensesTree = createTree(lenses, lensesLabels)
    TreePlotter.createPlot(lensesTree)
Пример #11
0
def lensesStudy(filepath):
    fr = open(filepath)
    lenses = [inst.strip().split("\t") for inst in fr.readlines()]
    lensesLabels = ['age', 'prescript', 'astigmatic', 'tearRate']
    lensesTree = createTree(lenses, lensesLabels)
    TreePlotter.createPlot(lensesTree)