Exemplo n.º 1
0
# shannonEnt = Trees.calcShannonEnt(myDat)
# print shannonEnt

# retDataSet = Trees.splitDataSet(myDat, 0, 1)
# print retDataSet

# bestFeature = Trees.chooseBestFeatureToSplit(myDat)
# print bestFeature

# tree = Trees.createTree(myDat, labels)
# print tree
'''

# TreePlotter
'''
# TreePlotter.createPlot()

tree = TreePlotter.retrieveTree(0)

# numLeafs = TreePlotter.getNumLeafs(tree)
# depth = TreePlotter.getTreeDepth(tree)
# print "leafs nums: %d, depth: %d" % (numLeafs, depth)

TreePlotter.createPlot(tree)
'''

# Classify
'''
# myDat, labels = Trees.createDataSet()
# myTree = TreePlotter.retrieveTree(0)
Exemplo n.º 2
0
        0: 'lef node',
        1: {
            'level 2': {
                0: 'leaf node',
                1: 'leaf node'
            }
        },
        2: {
            'lead 2': {
                0: 'leaf node',
                1: 'leaf node'
            }
        }
    }
}
tp.createPlot(myTree)

# 图与网格结构的可视化
data = np.mat([[0.1, 0.1], [0.9, 0.5], [0.3, 0.6], [0.7, 0.2], [0.1, 0.7],
               [0.5, 0.1]])
m, n = np.shape(data)
fig = plt.figure()
ax = fig.add_subplot(1, 1, 1)
ax.scatter(data.T[0].tolist(), data.T[1].tolist(), color='blue', marker='o')

for point in data.tolist():
    plt.annotate("(" + str(point[0]) + "," + str(point[1]) + ")",
                 xy=(point[0], point[1]))
xList = []
yList = []
for px, py in zip(data.T.tolist()[0], data.T.tolist()[1]):
Exemplo n.º 3
0
# shannonEnt = Trees.calcShannonEnt(myDat)
# print shannonEnt

# retDataSet = Trees.splitDataSet(myDat, 0, 1)
# print retDataSet

# bestFeature = Trees.chooseBestFeatureToSplit(myDat)
# print bestFeature

# tree = Trees.createTree(myDat, labels)
# print tree
'''

# TreePlotter
'''
# TreePlotter.createPlot()

tree = TreePlotter.retrieveTree(0)

# numLeafs = TreePlotter.getNumLeafs(tree)
# depth = TreePlotter.getTreeDepth(tree)
# print "leafs nums: %d, depth: %d" % (numLeafs, depth)

TreePlotter.createPlot(tree)
'''

# Classify
'''
# myDat, labels = Trees.createDataSet()
# myTree = TreePlotter.retrieveTree(0)
Exemplo n.º 4
0
    :param filename: 存储的文件
    :return:
    """
    import pickle
    with open(filename, 'w') as fw:
        pickle.dumps(input_tree, fw)


def grab_tree(filename):
    """
    从文件中读取决策树
    :param filename: 要读取的文件
    :return: 决策树
    """
    import pickle
    with open(filename) as fr:
        return pickle.load(fr)


if __name__ == '__main__':
    with open('lenses.txt') as fr:
        lenses = [inst.strip().split('\t') for inst in fr.readlines()]
    # 标签列表
    lensesLabels = ['age', 'prescript', 'astigmatic', 'tearRate']
    # 创建决策树
    lensesTree = create_tree(lenses, lensesLabels)
    # 打印树
    print(lensesTree)
    # 显示树形图
    TreePlotter.create_plot(lensesTree)
Exemplo n.º 5
0
    pickle.dump(obj, file)
    file.close()


def readDump(path):
    file = open(path, "rb")
    data = pickle.load(file)
    file.close()
    return data


dtree = ID3Tree()
if not os.path.exists(ID3SavePath):  # 不是文件夹
    print("生成数据")
    dtree.dataSet = loadDataSet(ID3LoadPath)
    dtree.labels = ["age", "revenue", "student", "credit"]
    # dtree.dataSet = dataSet
    # dtree.labels = labels
    print("训练数据")
    dtree.train()
    print("持久化数据")
    saveDump(ID3SavePath, dtree.getDumpData())

else:
    print("读取持久化")
    dtree.loadDumpData(readDump(ID3SavePath))

print("正在生成树")
tp.createPlot(dtree.tree)
print("预测结果为:", dtree.predict(dtree.tree, [0, 0, 0, 0]))
Exemplo n.º 6
0
2. Prepare: Parse tab-delimited lines.
3. Analyze: Quickly review data visually to make sure it was parsed properly. The final
tree will be plotted with createPlot().
4. Train: Use createTree() from section 3.1.
5. Test: Write a function to descend the tree for a given instance.
6. Use: Persist the tree data structure so it can be recalled without building the
tree; then use it in any application.
'''
from numpy import *
import LoadData as ld
import DicisionTree as dts
import TreePlotter as tplt
dataSet, labels = ld.createDataSet('lenses.txt')
#testLabels = zeros(len(labels),1)#也不能采用
#testLabels =  labels# 不能采用
lensesTree = dts.createTree(dataSet, labels)
print lensesTree
tplt.createPlot(lensesTree)
print labels
#利用训练数据做测试数据
dataSet, testLabels = ld.createDataSet('lenses.txt')
errorCount = 0
numTestVec = 0
for testVec in dataSet:
    numTestVec += 1.0
    classLabel = dts.classify(lensesTree, testLabels, testVec)
    if classLabel != testVec[-1]:
        errorCount += 1
errorRate = (float(errorCount) / numTestVec)
print "the error rate of this test is: %f" % errorRate
Exemplo n.º 7
0
               [u'T', u'D', u'P', 'yes'], [u'D', u'D', u'B', 'yes'],
               [u'D', u'N', u'B', 'yes'], [u'D', u'D', u'D', 'yes'],
               [u'D', u'D', u'D', 'yes'], [u'D', u'A', u'D', 'yes'],
               [u'T', u'D', u'P', 'yes'], [u'T', u'D', u'B', 'yes'],
               [u'D', u'D', u'D', 'yes'], [u'T', u'D', u'D', 'yes'],
               [u'T', u'N', u'B', 'yes'], [u'D', u'A', u'D', 'yes'],
               [u'T', u'D', u'D', 'yes'], [u'T', u'D', u'D', 'yes'],
               [u'D', u'D', u'D', 'yes'], [u'D', u'D', u'D', 'yes'],
               [u'T', u'D', u'D', 'yes'], [u'D', u'A', u'D', 'yes'],
               [u'D', u'D', u'D', 'yes'], [u'D', u'D', u'D', 'yes'],
               [u'D', u'A', u'D', 'yes'], [u'D', u'D', u'D', 'yes'],
               [u'D', u'D', u'D', 'yes'], [u'T', u'D', u'B', 'yes'],
               [u'T', u'D', u'P', 'yes'], [u'D', u'D', u'D', 'yes'],
               [u'D', u'D', u'D', 'yes'], [u'T', u'D', u'D', 'yes'],
               [u'T', u'D', u'D', 'yes'], [u'T', u'D', u'D', 'yes'],
               [u'D', u'D', u'D', 'yes'], [u'T', u'D', u'B', 'yes'],
               [u'T', u'P', u'B', 'yes'], [u'D', u'D', u'D', 'yes'],
               [u'D', u'A', u'P', 'yes'], [u'T', u'D', u'D', 'yes'],
               [u'D', u'D', u'P', 'yes'], [u'T', u'D', u'D', 'yes'],
               [u'T', u'D', u'D', 'yes'], [u'T', u'D', u'D', 'yes'],
               [u'T', u'D', u'D', 'yes'], [u'D', u'D', u'D', 'yes'],
               [u'T', u'N', u'B', 'yes'], [u'D', u'N', u'B', 'yes'],
               [u'T', u'D', u'D', 'yes'], [u'D', u'D', u'D', 'yes'],
               [u'D', u'D', u'D', 'yes'], [u'D', u'D', u'D', 'yes'],
               [u'D', u'A', u'D', 'yes'], [u'T', u'N', u'B', 'yes']
               ]  #[[1,1,'yes'],[1,1,'yes'],[1,0,'no'],[0,1,'no'],[0,1,'no']]
    labels = ['surfacing', 'flippers', 'ff']
    myTree = CreatTree(dataset, labels)
    print myTree
    TreePlotter.CreatPlot(myTree)
Exemplo n.º 8
0
Arquivo: 3.py Projeto: niumeng07/ML
import operator
import os

import Trees
myData,labels=Trees.createDataSet()
print(myData)
print(labels)
print(Trees.calcShannonEnt(myData))

myData[0][-1]='maybe'
print(myData)
print(labels)
print(Trees.calcShannonEnt(myData))

myDat,labels=Trees.createDataSet()
print(myDat)
print(Trees.splitDataSet(myDat,0,1))
print(Trees.splitDataSet(myData,0,0))

print(Trees.chooseBestFeatureToSplit(myDat))

myTree=Trees.createTree(myDat,labels)
print(myTree)

import TreePlotter

TreePlotter.createPlot()



    fw.close()

def grabTree(filename):
    import pickle
    fr = open(filename,"rb")
    return pickle.load(fr)


dataSet,labels = createDataSet()
# shannonEnt = calcShannoEnt(dataSet)
# print(shannonEnt)

# retDataSet = splitDataSet(dataSet,0,1)                      # 取出第0个特征为1的数据,并去掉该特征
# print(dataSet)
# print(retDataSet)

# bestFeature = chooseBestFeatureToSplit(dataSet)
# print(bestFeature)

# myTree = createTree(dataSet,labels)
# print(myTree)

# 使用算法
fr = open('/Users/lixiwei-mac/Documents/IdeaProjects/MachineLearningInAction/DecisionTree/lenses.txt')
lenses = [inst.strip().split('\t') for inst in  fr.readlines()]
lensesLabels = ['age','prescript','astigmatic','tearRage']
lensesTree = createTree(lenses,lensesLabels)
print(lensesTree)
treePlotter.createPlot(lensesTree)

Exemplo n.º 10
0
    maxMJJ=610.0

    minMVV=1000.0
    maxMVV=7000.0

    binsMJJ=290
    binsMVV=160

    cuts['acceptance']= "(jj_LV_mass>{minMVV}&&jj_LV_mass<{maxMVV}&&jj_l1_softDrop_mass>{minMJJ}&&jj_l1_softDrop_mass<{maxMJJ})".format(minMVV=minMVV,maxMVV=maxMVV,minMJJ=minMJJ,maxMJJ=maxMJJ)
    cuts['acceptanceGEN']='(jj_l1_gen_softDrop_mass>0&&jj_gen_partialMass>0)'
    cuts['nonres'] = '1'
    cut='*'.join([cuts['common'],cuts['nonres'], '(jj_l1_softDrop_mass>30&&jj_l1_softDrop_mass<610)','(jj_LV_mass>1000&&jj_LV_mass<7000)',cuts['HP'],'(jj_l1_gen_softDrop_mass>0&&jj_gen_partialMass>0)'])
    
    dataPlotters=[]
    dataPlottersNW=[]
    dataPlotters.append(TreePlotter(fromsample+'.root','tree'))
    dataPlotters[-1].setupFromFile(fromsample+'.pck')
    dataPlotters[-1].addCorrectionFactor('xsec','tree')
    dataPlotters[-1].addCorrectionFactor('genWeight','tree')
    dataPlotters[-1].addCorrectionFactor('puWeight','tree')
    data=MergedPlotter(dataPlotters)
    sampleHisto=dataPlotters[0].drawTH2("jj_l1_gen_softDrop_mass:jj_LV_mass",cut,"1",binsMVV,minMVV,maxMVV,binsMJJ,minMJJ,maxMJJ,"M_{qV} mass","GeV","Softdrop mass","GeV","COLZ" )
    
    sampleHisto1Dmjet=dataPlotters[0].drawTH1('jj_l1_softDrop_mass',cut,"1",binsMJJ,minMJJ,maxMJJ)
    sampleHisto1Dmjet.Scale(1/sampleHisto1Dmjet.Integral())
    print sampleHisto
     
     
     
     
     
Exemplo n.º 11
0
    # 获取信息增益最大的特征及其增益
    highest_gain_feature, highest_gain = get_feature_with_highest_Gain(
        data_set)
    # 增益小于ε,单一节点,返回实例数最大的类
    if highest_gain < eps:
        return get_most_common_class(data_set)
    # 构建树
    decision_tree_dict = {highest_gain_feature: {}}
    # 对每个最高增益特征的取值进行分割数据集,并进行递归调用生成树
    feature_values = set(data_set[highest_gain_feature])
    for one_value in feature_values:
        # 分割D
        divided_data_set = data_set[data_set[highest_gain_feature] ==
                                    one_value]
        # 去除列,A = A - {Ak}i
        divided_data_set = divided_data_set.drop(labels=highest_gain_feature,
                                                 axis=1)
        # 生成子树
        decision_tree_dict[highest_gain_feature][
            one_value] = generate_decision_tree(divided_data_set, eps)
    return decision_tree_dict


if __name__ == '__main__':
    data_set = init_data('resources/lenses.txt')
    decision_tree = generate_decision_tree(data_set, eps=0.0001)
    print(decision_tree)
    TreePlotter.createPlot(decision_tree)
    # print(data_set[(data_set['tearRate'] == 'normal') & (data_set['astigmatic'] == 'yes') & (data_set['prescript'] == 'myope')])
    # print(data_set[data_set['tearRate'] == 'reduced'])
Exemplo n.º 12
0
def lensesStudy(filepath):
    fr = open(filepath)
    lenses = [inst.strip().split("\t") for inst in fr.readlines()]
    lensesLabels = ['age', 'prescript', 'astigmatic', 'tearRate']
    lensesTree = createTree(lenses, lensesLabels)
    TreePlotter.createPlot(lensesTree)
Exemplo n.º 13
0
def lensesStudy(filepath):
    fr = open(filepath)
    lenses = [inst.strip().split("\t") for inst in fr.readlines()]
    lensesLabels = ['age', 'prescript', 'astigmatic', 'tearRate']
    lensesTree = createTree(lenses, lensesLabels)
    TreePlotter.createPlot(lensesTree)