Exemplo n.º 1
0
def test4():
    trainMat = mat(regTrees.loadDataSet('bikeSpeedVsIq_train.txt'))
    testMat = mat(regTrees.loadDataSet('bikeSpeedVsIq_test.txt'))
    #regTrees.plot1(testMat)
    myTree = regTrees.createTree(trainMat, ops=(1, 20))
    yHat = regTrees.createForeCast(myTree, testMat[:, 0])
    print(corrcoef(yHat, testMat[:, 1], rowvar=0)[0, 1])
    #regTrees.plot1withTree(trainMat, myTree)

    myTree = regTrees.createTree(trainMat, regTrees.modelLeaf,
                                 regTrees.modelErr, (1, 20))
    yHat = regTrees.createForeCast(myTree, testMat[:, 0],
                                   regTrees.modelTreeEval)
    print(corrcoef(yHat, testMat[:, 1], rowvar=0)[0, 1])
    print(myTree)
    regTrees.plot1withTree_Linear(trainMat, myTree)

    ws, X, Y = regTrees.linearSolve(trainMat)
    print(ws)
    for i in range(shape(testMat)[0]):
        yHat[i] = testMat[i, 0] * ws[1, 0] + ws[0, 0]
    print(corrcoef(yHat, testMat[:, 1], rowvar=0)[0, 1])
Exemplo n.º 2
0
Arquivo: 9.py Projeto: niumeng07/ML
myMat2Test=mat(myDatTest)
regTrees.prune(myTree,myMat2Test)
print(myTree)

print("分段函数表示:")
myMat2=mat(regTrees.loadDataSet('exp2.txt'))
print(regTrees.createTree(myMat2,regTrees.modelLeaf,regTrees.modelErr,(1,10)))


trainMat=mat(regTrees.loadDataSet('bikeSpeedVsIq_train.txt'))
testMat=mat(regTrees.loadDataSet('bikeSpeedVsIq_test.txt'))
myTree=regTrees.createTree(trainMat,ops=(1,20))
yHat=regTrees.createForeCast(myTree,testMat[:,0])
print(corrcoef(yHat,testMat[:,1],rowvar=0)[0,1])

ws,X,Y=regTrees.linearSolve(trainMat)
print(ws)
for i in range(shape(testMat)[0]):
    yHat[i]=testMat[i,0]*ws[1,0]+ws[0,0]
print(corrcoef(yHat,testMat[:,1],rowvar=0)[0,1])

from Tkinter import *
root=Tk()
myLabel=Label(root,text="Hello World")
myLabel.grid()
root.mainloop()




myMat2 = mat(myDat2)
#print regTrees.createTree(myMat2)
print regTrees.createTree(myMat2, ops=(10000, 4))

myTree = regTrees.createTree(myMat2, ops=(0, 1))
myDatTest = regTrees.loadDataSet('ex2test.txt')
myMat2Test = mat(myDatTest)
print regTrees.prune(myTree, myMat2Test)

myMat2 = mat(regTrees.loadDataSet('exp2.txt'))
print regTrees.createTree(myMat2, regTrees.modelLeaf, regTrees.modelErr,
                          (1, 10))

trainMat = mat(regTrees.loadDataSet('bikeSpeedVsIq_train.txt'))
testMat = mat(regTrees.loadDataSet('bikeSpeedVsIq_test.txt'))
myTree = regTrees.createTree(trainMat, ops=(1, 20))
yHat = regTrees.createForeCast(myTree, testMat[:, 0])
print corrcoef(yHat, testMat[:, 1], rowvar=0)[0, 1]

myTree = regTrees.createTree(trainMat,
                             regTrees.modelLeaf,
                             regTrees.modelErr,
                             ops=(1, 20))
yHat = regTrees.createForeCast(myTree, testMat[:, 0], regTrees.modelTreeEval)
print corrcoef(yHat, testMat[:, 1], rowvar=0)[0, 1]

ws, X, Y = regTrees.linearSolve(trainMat)
print ws
for i in range(shape(testMat)[0]):
    yHat[i] = testMat[i, 0] * ws[1, 0] + ws[0, 0]
print corrcoef(yHat, testMat[:, 1], rowvar=0)[0, 1]
Exemplo n.º 4
0
#相关系数是一个方阵,取其中的[0,1],0行1列的值,即是两者的相关系数
corrValue = numpy.corrcoef(yHat, testMat[:,-1], rowvar = False)[0,1] # 计算预测值与真实值之间的corrcoef:相关系数
print "\n回归树(叶子节点是常数项)时,数据的相关系数是:", corrValue

###模型树的预测情况和相关系数的计算
myTree = regTrees.createTree(trainMat, regTrees.modelLeaf, regTrees.modelErr, (1, 20))
yHat = regTrees.createForeCast(myTree , testMat[:,0], regTrees.modelTreeEval) #testMat[:,0]:待分类的测试数据集
corrValue = numpy.corrcoef(yHat, testMat[:,-1], rowvar = False)[0,1] #testMat[:,-1]:实际的数据集
print "模型树(叶子节点是线性模型)时,数据的相关系数是:", corrValue
print "\n结果对比如下:"
print "从相关系数来看,模型树优于回归树"
print "说明:相关系数最大值为1,所以预测数据与真实数据的相关系数越接近1,表示预测结果越好。"

###标准线性回归的预测情况和相关系数的计算
print "\n标准线性回归(上一章)的预测情况"
ws, X, Y = regTrees.linearSolve(trainMat)   #生成线性回顾模型的权重ws
print "回归系数ws.T的值是:", ws.T,"(第一个量是常数偏量)"
for i in range(numpy.shape(testMat)[0]):  #计算预测值(列向量)
    yHat[i] = testMat[i,0] * ws[1,0] + ws[0,0]  #ws(列向量)的第一个数据ws[0,0]是常数偏量值。
    # 这里的测试数据集值testMat是未添加常数列1的,所以这个式子不是一个通用的计算方法,只适用于只有一个特征的数据集
corrValue = numpy.corrcoef(yHat, testMat[:,1], rowvar = False)[0,1] #计算相关系数
print "标准线性回归的相关系数是:", corrValue
print "\n结果对比如下:"
print "模型树 优于 回归树 优于 标准线性回归(上一章)"

# 程序运行情况如下:
# 回归树(叶子节点是常数项)时,数据的相关系数是: 0.9640852318222141
# 模型树(叶子节点是线性模型)时,数据的相关系数是: 0.9760412191380623
#
# 结果对比如下:
# 从相关系数来看,模型树优于回归树
Exemplo n.º 5
0
myTree = regTrees.createTree(myMat2, ops=(0, 1))
myDatTest = regTrees.loadDataSet('ex2test.txt')
myMat2Test = np.mat(myDatTest)
regTrees.prune(myTree, myMat2Test)  # 你真的剪了么。。。。

# 模型树部分了
reload(regTrees)
myMat2 = np.mat(regTrees.loadDataSet('exp2.txt'))
regTrees.createTree(myMat2, regTrees.modelLeaf, regTrees.modelErr,
                    (1, 10))  # 区别就是调用方法时选择不同的生成叶节点的方法和误差计算

# 模型比较
reload(regTrees)
trainMat = np.mat(regTrees.loadDataSet('bikeSpeedVsIq_train.txt'))
testMat = np.mat(regTrees.loadDataSet('bikeSpeedVsIq_test.txt'))
myTree = regTrees.createTree(trainMat, ops=(1, 20))
yHat = regTrees.createForeCast(myTree, testMat[:, 0])  # 创建一个回归树
np.corrcoef(yHat, testMat[:, 1], rowvar=0)[0, 1]

myTree = regTrees.createTree(trainMat, regTrees.modelLeaf, regTrees.modelErr,
                             (1, 20))  #同意的数据创建一个模型树
yHat = regTrees.createForeCast(myTree, testMat[:, 0], regTrees.modelTreeEval)
np.corrcoef(yHat, testMat[:, 1], rowvar=0)[0, 1]  # 好像稍微高那么一些

ws, X, Y = regTrees.linearSolve(trainMat)  # 再试一下普通的线性回归
for i in range(np.shape(testMat)[0]):
    yHat[i] = testMat[i, 0] * ws[1, 0] + ws[0, 0]
np.corrcoef(yHat, testMat[:, 1], rowvar=0)[0, 1]  # 好像最低的

# 大致应该是:模型树>回归树>线性回归