Пример #1
0
def test2():
    ''' line = '0.530897\t0.893462'
    curLine = line.strip().split('\t')
    print(curLine)
    fltLine = map(float, curLine) #在python3里面变成map了。。。错了
    print(list(curLine))
    print(set(fltLine)) '''

    myDat = regTrees.loadDataSet('exp.txt')  #200*2 float, exp和ex2也差不多,y的方差更小
    #print(myDat[2])    #print(list(myDat[2]))一样的
    print(shape(myDat))
    #print(m1[:,-1])    #print(m1[5,:]) # 用mat()转成矩阵才能这样用
    myMat = mat(myDat)
    #retTree = regTrees.createTree(myMat, ops=(1000,10)) #(0,1)就是每个点都分了一个叉,典型的overfitting
    #retTree = regTrees.createTree(myMat, ops=(0.2,4)) #ex2比ex00分布差不多,y的取值大了100倍,因此用10000,4和原来的效果差不多
    #print(retTree)

    retTree = regTrees.createTree(myMat, ops=(10, 4))
    testDat = mat(
        regTrees.loadDataSet('ex2test.txt'))  #ex2test.txt的数据分布范围和ex2很接近,真实的测试集
    pruned_Tree = regTrees.prune(retTree, testDat)
    print(pruned_Tree)

    #regTrees.plot1(myMat)
    regTrees.plot1withTree(myMat, retTree)
    regTrees.plot1withTree(myMat, pruned_Tree)
Пример #2
0
def reDraw(tolS, tolN):
    reDraw.f.clf()  # clear the figure
    reDraw.a = reDraw.f.add_subplot(111)
    if chkBtnVar.get():
        if tolN < 2: tolN = 2
        myTree=regTrees.createTree(reDraw.rawDat, regTrees.modelLeaf,\
                                   regTrees.modelErr, (tolS,tolN))
        if is_prune: myTree = regTrees.prune(myTree, reDraw.testDat)
        yHat = regTrees.createForeCast(myTree, reDraw.testX, \
                                       regTrees.modelTreeEval)
    else:
        myTree = regTrees.createTree(reDraw.rawDat, ops=(tolS, tolN))
        if is_prune: myTree = regTrees.prune(myTree, reDraw.testDat)
        yHat = regTrees.createForeCast(myTree, reDraw.testX)
    reDraw.a.scatter(reDraw.testDat[:, 0], reDraw.testDat[:, 1],
                     s=5)  #use scatter for data set
    reDraw.a.plot(reDraw.testX, yHat, linewidth=4.0)  #use plot for yHat
    reDraw.canvas.show()
Пример #3
0
Файл: 9.py Проект: niumeng07/ML
#print(myMat)
print(regTrees.createTree(myMat))

print("ex0.txt")
myDat1=regTrees.loadDataSet('ex0.txt')
myMat1=mat(myDat1)
print(shape(myMat1))#200 3
print(regTrees.createTree(myMat1))
#建树完成
myDat2=regTrees.loadDataSet('ex2.txt')
myMat2=mat(myDat2)
print(regTrees.createTree(myMat2))
myTree=regTrees.createTree(myMat2,ops=(0,1))
myDatTest=regTrees.loadDataSet('ex2test.txt')
myMat2Test=mat(myDatTest)
regTrees.prune(myTree,myMat2Test)
print(myTree)

print("分段函数表示:")
myMat2=mat(regTrees.loadDataSet('exp2.txt'))
print(regTrees.createTree(myMat2,regTrees.modelLeaf,regTrees.modelErr,(1,10)))


trainMat=mat(regTrees.loadDataSet('bikeSpeedVsIq_train.txt'))
testMat=mat(regTrees.loadDataSet('bikeSpeedVsIq_test.txt'))
myTree=regTrees.createTree(trainMat,ops=(1,20))
yHat=regTrees.createForeCast(myTree,testMat[:,0])
print(corrcoef(yHat,testMat[:,1],rowvar=0)[0,1])

ws,X,Y=regTrees.linearSolve(trainMat)
print(ws)
print regTrees.createTree(myMat)

myDat1 = regTrees.loadDataSet('ex0.txt')
myMat1 = mat(myDat1)
print regTrees.createTree(myMat1)

#print regTrees.createTree(myMat, ops=(0,1))
myDat2 = regTrees.loadDataSet('ex2.txt')
myMat2 = mat(myDat2)
#print regTrees.createTree(myMat2)
print regTrees.createTree(myMat2,ops=(10000,4))

myTree = regTrees.createTree(myMat2, ops=(0,1))
myDatTest = regTrees.loadDataSet('ex2test.txt')
myMat2Test = mat(myDatTest)
print regTrees.prune(myTree, myMat2Test)

myMat2 = mat(regTrees.loadDataSet('exp2.txt'))
print regTrees.createTree(myMat2, regTrees.modelLeaf, regTrees.modelErr, (1,10))

trainMat = mat(regTrees.loadDataSet('bikeSpeedVsIq_train.txt'))
testMat = mat(regTrees.loadDataSet('bikeSpeedVsIq_test.txt'))
myTree = regTrees.createTree(trainMat, ops=(1,20))
yHat = regTrees.createForeCast(myTree, testMat[:,0])
print corrcoef(yHat, testMat[:,1], rowvar=0)[0,1]

myTree = regTrees.createTree(trainMat, regTrees.modelLeaf, regTrees.modelErr, ops=(1,20))
yHat = regTrees.createForeCast(myTree, testMat[:,0], regTrees.modelTreeEval)
print corrcoef(yHat, testMat[:,1], rowvar=0)[0,1]

ws, X, Y = regTrees.linearSolve(trainMat)
import regTrees
from numpy import *

# mydat = regTrees.loadDataSet('ex00.txt')
# mydat = mat(mydat)
# print(regTrees.createTree(mydat))
#
# testmat = mat(eye(4))
# mat0,mat1 = regTrees.binSplitDataSet(testmat,1,0.5)
# print(mat0,mat1)

mydat = regTrees.loadDataSet('ex2.txt')
mydat = mat(mydat)
mytree = regTrees.createTree(mydat, ops=(0, 1))
mytest = regTrees.loadDataSet('ex2test.txt')
mytest = mat(mytest)
print(regTrees.prune(mytree, mytest))
Пример #6
0
import regTrees as rt
import plotRegTrees as pt

if __name__ == '__main__':
    dataSet = rt.loadCSV("dataSet.csv")
    myTree = rt.createTree(dataSet, evaluationFunc=rt.gini)
    print(u"myTree:%s" % myTree)
    #绘制决策树
    print(u"绘制决策树:")
    pt.createPlot1(myTree)
    decisionTree = rt.buildDecisionTree(dataSet, evaluationFunc=rt.gini)
    testData = [5.9, 3, 4.2, 1.75]
    r = rt.classify(testData, decisionTree)
    print(u"分类后测试结果:")
    print(r)
    print()
    rt.prune(decisionTree, 0.4)
    r1 = rt.classify(testData, decisionTree)
    print(u"剪枝后测试结果:")
    print(r1)
Пример #7
0
import regTrees
import numpy as np
testMat = np.mat(np.eye((4)))
# print(testMat)
# mat0 ,mat1 = regTrees.binSplitDataSet(testMat,1,0.5)
# print(mat0)
# print(mat1)
myDat = regTrees.loadDataSet('ex00.txt')
myMat = np.mat(myDat)
# print(regTrees.createTree(myMat))
myDat1 = regTrees.loadDataSet('ex0.txt')
myMat1 = np.mat(myDat1)
# print(regTrees.createTree(myMat1))
myDat2 = regTrees.loadDataSet('ex2.txt')
myMat2 = np.mat(myDat2)
myTree = regTrees.createTree(myMat2, ops=(0, 1))
myDatTest = regTrees.loadDataSet('ex2test.txt')
myMat2Test = np.mat(myDatTest)
print(regTrees.prune(myTree, myMat2Test))
Пример #8
0
print('-------- loadDataSet regLeaf : ')
print(regTrees.regLeaf(tMat))

print('-------- loadDataSet regErr : ')
print(regTrees.regErr(tMat))

print('-------- tMat : ')
print(tMat)

myTree = regTrees.createTree(tMat)

print('-------- regTree createTree : ')
print(myTree)

myDat2 = regTrees.loadDataSet('ex2.txt')
myMat2 = mat(myDat2)
myTree2 = regTrees.createTree(myMat2, ops=(0, 1))
print('-------- regTree createTree2 : ')
print(myTree2)

myDat3 = regTrees.loadDataSet('ex2test.txt')
myMat2Test = mat(myDat3)

regTrees.prune(myTree2, myMat2Test)

myMat4 = mat(regTrees.loadDataSet('exp2.txt'))
print(
    regTrees.createTree(myMat4, regTrees.modelLeaf, regTrees.modelErr,
                        (1, 10)))
Пример #9
0
# -*- coding: utf-8 -*-

import regTrees
from numpy import *
testmat = mat(eye(4))
mat0, mat1 = regTrees.bin_split_data_set(testmat, 1, 0.5)

my_dat1 = regTrees.load_data_set('ex00.txt')
tree = regTrees.create_tree(mat(my_dat1))

my_dat1 = regTrees.load_data_set('ex2.txt')
tree = regTrees.create_tree(mat(my_dat1), ops=(0, 1))
print(tree)
my_test1 = regTrees.load_data_set('ex2test.txt')
t = regTrees.prune(tree, mat(my_test1))
print(t)

my_mat = mat(regTrees.load_data_set('exp2.txt'))
regTrees.create_tree(my_mat, regTrees.model_leaf, regTrees.model_err, (1,10))

train = mat(regTrees.load_data_set('bikeSpeedVsIq_train.txt'))
test = mat(regTrees.load_data_set('bikeSpeedVsIq_test.txt'))
tree = regTrees.create_tree(train, ops=(1, 20))  # regression tree
# tree = regTrees.create_tree(train, regTrees.model_leaf, regTrees.model_err, (1,20))  # model tree
yhat = regTrees.create_forecast(tree, test[:, 0], regTrees.model_tree_eval)
corrcoef(yhat, test[:, 1], rowvar=0)[0, 1]

ws, x, y = regTrees.linear_solve(train)
for i in range(shape(test)[0]):
    yhat[i] = test[i, 0] * ws[1, 0] + ws[0, 0]
corrcoef(yhat, test[:, 1], rowvar=0)[0, 1]
# print "myMat1:",myMat1
print "regTrees.createTree(myMat1):", regTrees.createTree(myMat1)

#9.4.1 预剪枝
myDat2 = regTrees.loadDataSet(homedir + 'ex2.txt')
myMat2 = mat(myDat2)
print "regTrees.createTree(myMat2):", regTrees.createTree(myMat2)

#9.4.2 后剪枝
myDat2 = regTrees.loadDataSet(homedir + 'ex2.txt')
myMat2 = mat(myDat2)
myTree = regTrees.createTree(myMat2, ops=(0, 1))
print "myTree:", myTree
myDatTest = regTrees.loadDataSet(homedir + 'ex2test.txt')
myMat2Test = mat(myDatTest)
print "regTrees.prune(myTree,myMat2Test)", regTrees.prune(myTree, myMat2Test)

#9.5 模型树
myMat2 = mat(regTrees.loadDataSet(homedir + 'exp2.txt'))
print "regTrees.createTree(myMat2,regTrees.modelLeaf,regTrees.modelErr,(1,10)):", regTrees.createTree(
    myMat2, regTrees.modelLeaf, regTrees.modelErr, (1, 10))

#9.6 示例:树回归与标准回归的比较
trainMat = mat(regTrees.loadDataSet(homedir + 'bikeSpeedVsIq_train.txt'))
testMat = mat(regTrees.loadDataSet(homedir + 'bikeSpeedVsIq_test.txt'))

myTree = regTrees.createTree(trainMat, ops=(1, 20))
yHat = regTrees.createForeCast(myTree, testMat[:, 0])
corrcoef1 = corrcoef(yHat, testMat[:, 1], rowvar=0)[0, 1]

myTree = regTrees.createTree(trainMat, regTrees.modelLeaf, regTrees.modelErr,
Пример #11
0
__author__ = 'bacon'

import regTrees
from numpy import *

testMat = mat(eye(4))
print testMat

mat0, mat1 = regTrees.binSplitDataSet(testMat, 1, 0.5)
print mat0
print mat1

# myDat = regTrees.loadDataSet('ex00.txt')
# myMat = mat(myDat)
# print regTrees.createTree(myMat)
#
# myDat2=regTrees.loadDataSet('ex0.txt')
# myMat2=mat(myDat2)
# print regTrees.createTree(myMat2)
#
# print regTrees.createTree(myMat,ops=(0,1))
#
myDat3 = regTrees.loadDataSet('ex2.txt')
myMat3 = mat(myDat3)
# print regTrees.createTree(myMat3)

myTree = regTrees.createTree(myMat3, ops=(0, 1))
myDatTest = regTrees.loadDataSet('ex2test.txt')
myMatTest = mat(myDatTest)
print regTrees.prune(myTree, myDatTest)
Пример #12
0
###################  预剪枝方法的 树回归 P164
myDat1 = regTrees.loadDataSet('ex00.txt')
myMat1 = numpy.mat(myDat1)
trees1 = regTrees.createTree(myMat1,ops=(1,4))
print "预剪枝方法生成树回归是:", trees1
trees1 = regTrees.createTree(myMat1,ops=(1,4))

##################  后剪枝方法的 树回归 P169
myDat2 = regTrees.loadDataSet('ex2.txt')
myMat2 = numpy.mat(myDat2)
trees2 = regTrees.createTree(myMat2,ops=(10,10))
print "\n后剪枝之前的树trees2是:\n", trees2
myDat3 = regTrees.loadDataSet('ex2test.txt')
myMat3 = numpy.mat(myDat3)
trees3 = regTrees.prune(trees2, myMat3)
print "\n后剪枝之后的树trees3 是:\n", trees3

##################  叶子节点是模型树(线性模型)P172
myMat4 = numpy.mat(regTrees.loadDataSet('exp2.txt'))
#Page170 是书上的模型树即叶子节点是线性模型, modelLeaf函数返回的是线性的权重ws,modelErr函数返回的误差的值
trees4 = regTrees.createTree(myMat4, leafType = regTrees.modelLeaf, errType = regTrees.modelErr, ops=(1,4))
print "\n叶子节点是模型树的树回归:\n", trees4



################## 树回归与标准回归的比较 P174
###回归树的预测情况和相关系数的计算
trainMat = numpy.mat(regTrees.loadDataSet('bikeSpeedVsIq_train.txt')) #加载训练数据
testMat = numpy.mat(regTrees.loadDataSet('bikeSpeedVsIq_test.txt'))   #加载测试数据
myTree = regTrees.createTree(trainMat, ops = (1, 20))   #获得训练数据的树:树回归(叶子节点是常数项)
Пример #13
0
tree_dict=regTrees.createTree(myMat)
print tree_dict

print ' '
myDat1=regTrees.loadDataSet('ex0.txt')
myMat1=mat(myDat1)
tree_dict=regTrees.createTree(myMat1)
print tree_dict

print ' '
myDat2=regTrees.loadDataSet('ex2.txt')
myMat2=mat(myDat2)
#tree_dict=regTrees.createTree(myMat2)
print tree_dict

myTree=regTrees.createTree(myMat2,ops=(0,1))
myDataTest=regTrees.loadDataSet('ex2test.txt')
myMatTest=mat(myDataTest)
regTrees.prune(myTree,myMatTest)

## mode tree

myMat3=mat(regTrees.loadDataSet('exp2.txt'))

modelTree=regTrees.createTree(myMat3,regTrees.modelLeaf,regTrees.modelErr,(1,10))
print modelTree




Пример #14
0
regTrees.createTree(myMat1)  # 如果不画图基本上不是人看的....

# 看看其他的参数对模型的影响,隐含的就是通过参数设置来裁剪树,俗称前剪枝
regTrees.createTree(myMat, ops=(0, 1))  # ops的第二个参数是最小切分的样本数,所以基本上每个样本一个叶节点了。。。

myDat2 = regTrees.loadDataSet('ex2.txt')
myMat2 = np.mat(myDat2)
regTrees.createTree(myMat2)  # 默认是(1,4)
regTrees.createTree(myMat2, ops=(10000, 4))

# 后剪枝
reload(regTrees)
myTree = regTrees.createTree(myMat2, ops=(0, 1))
myDatTest = regTrees.loadDataSet('ex2test.txt')
myMat2Test = np.mat(myDatTest)
regTrees.prune(myTree, myMat2Test)  # 你真的剪了么。。。。

# 模型树部分了
reload(regTrees)
myMat2 = np.mat(regTrees.loadDataSet('exp2.txt'))
regTrees.createTree(myMat2, regTrees.modelLeaf, regTrees.modelErr,
                    (1, 10))  # 区别就是调用方法时选择不同的生成叶节点的方法和误差计算

# 模型比较
reload(regTrees)
trainMat = np.mat(regTrees.loadDataSet('bikeSpeedVsIq_train.txt'))
testMat = np.mat(regTrees.loadDataSet('bikeSpeedVsIq_test.txt'))
myTree = regTrees.createTree(trainMat, ops=(1, 20))
yHat = regTrees.createForeCast(myTree, testMat[:, 0])  # 创建一个回归树
np.corrcoef(yHat, testMat[:, 1], rowvar=0)[0, 1]