def test4(): trainMat = mat(regTrees.loadDataSet('bikeSpeedVsIq_train.txt')) testMat = mat(regTrees.loadDataSet('bikeSpeedVsIq_test.txt')) #regTrees.plot1(testMat) myTree = regTrees.createTree(trainMat, ops=(1, 20)) yHat = regTrees.createForeCast(myTree, testMat[:, 0]) print(corrcoef(yHat, testMat[:, 1], rowvar=0)[0, 1]) #regTrees.plot1withTree(trainMat, myTree) myTree = regTrees.createTree(trainMat, regTrees.modelLeaf, regTrees.modelErr, (1, 20)) yHat = regTrees.createForeCast(myTree, testMat[:, 0], regTrees.modelTreeEval) print(corrcoef(yHat, testMat[:, 1], rowvar=0)[0, 1]) print(myTree) regTrees.plot1withTree_Linear(trainMat, myTree) ws, X, Y = regTrees.linearSolve(trainMat) print(ws) for i in range(shape(testMat)[0]): yHat[i] = testMat[i, 0] * ws[1, 0] + ws[0, 0] print(corrcoef(yHat, testMat[:, 1], rowvar=0)[0, 1])
myMat2Test=mat(myDatTest) regTrees.prune(myTree,myMat2Test) print(myTree) print("分段函数表示:") myMat2=mat(regTrees.loadDataSet('exp2.txt')) print(regTrees.createTree(myMat2,regTrees.modelLeaf,regTrees.modelErr,(1,10))) trainMat=mat(regTrees.loadDataSet('bikeSpeedVsIq_train.txt')) testMat=mat(regTrees.loadDataSet('bikeSpeedVsIq_test.txt')) myTree=regTrees.createTree(trainMat,ops=(1,20)) yHat=regTrees.createForeCast(myTree,testMat[:,0]) print(corrcoef(yHat,testMat[:,1],rowvar=0)[0,1]) ws,X,Y=regTrees.linearSolve(trainMat) print(ws) for i in range(shape(testMat)[0]): yHat[i]=testMat[i,0]*ws[1,0]+ws[0,0] print(corrcoef(yHat,testMat[:,1],rowvar=0)[0,1]) from Tkinter import * root=Tk() myLabel=Label(root,text="Hello World") myLabel.grid() root.mainloop()
myMat2 = mat(myDat2) #print regTrees.createTree(myMat2) print regTrees.createTree(myMat2, ops=(10000, 4)) myTree = regTrees.createTree(myMat2, ops=(0, 1)) myDatTest = regTrees.loadDataSet('ex2test.txt') myMat2Test = mat(myDatTest) print regTrees.prune(myTree, myMat2Test) myMat2 = mat(regTrees.loadDataSet('exp2.txt')) print regTrees.createTree(myMat2, regTrees.modelLeaf, regTrees.modelErr, (1, 10)) trainMat = mat(regTrees.loadDataSet('bikeSpeedVsIq_train.txt')) testMat = mat(regTrees.loadDataSet('bikeSpeedVsIq_test.txt')) myTree = regTrees.createTree(trainMat, ops=(1, 20)) yHat = regTrees.createForeCast(myTree, testMat[:, 0]) print corrcoef(yHat, testMat[:, 1], rowvar=0)[0, 1] myTree = regTrees.createTree(trainMat, regTrees.modelLeaf, regTrees.modelErr, ops=(1, 20)) yHat = regTrees.createForeCast(myTree, testMat[:, 0], regTrees.modelTreeEval) print corrcoef(yHat, testMat[:, 1], rowvar=0)[0, 1] ws, X, Y = regTrees.linearSolve(trainMat) print ws for i in range(shape(testMat)[0]): yHat[i] = testMat[i, 0] * ws[1, 0] + ws[0, 0] print corrcoef(yHat, testMat[:, 1], rowvar=0)[0, 1]
#相关系数是一个方阵,取其中的[0,1],0行1列的值,即是两者的相关系数 corrValue = numpy.corrcoef(yHat, testMat[:,-1], rowvar = False)[0,1] # 计算预测值与真实值之间的corrcoef:相关系数 print "\n回归树(叶子节点是常数项)时,数据的相关系数是:", corrValue ###模型树的预测情况和相关系数的计算 myTree = regTrees.createTree(trainMat, regTrees.modelLeaf, regTrees.modelErr, (1, 20)) yHat = regTrees.createForeCast(myTree , testMat[:,0], regTrees.modelTreeEval) #testMat[:,0]:待分类的测试数据集 corrValue = numpy.corrcoef(yHat, testMat[:,-1], rowvar = False)[0,1] #testMat[:,-1]:实际的数据集 print "模型树(叶子节点是线性模型)时,数据的相关系数是:", corrValue print "\n结果对比如下:" print "从相关系数来看,模型树优于回归树" print "说明:相关系数最大值为1,所以预测数据与真实数据的相关系数越接近1,表示预测结果越好。" ###标准线性回归的预测情况和相关系数的计算 print "\n标准线性回归(上一章)的预测情况" ws, X, Y = regTrees.linearSolve(trainMat) #生成线性回顾模型的权重ws print "回归系数ws.T的值是:", ws.T,"(第一个量是常数偏量)" for i in range(numpy.shape(testMat)[0]): #计算预测值(列向量) yHat[i] = testMat[i,0] * ws[1,0] + ws[0,0] #ws(列向量)的第一个数据ws[0,0]是常数偏量值。 # 这里的测试数据集值testMat是未添加常数列1的,所以这个式子不是一个通用的计算方法,只适用于只有一个特征的数据集 corrValue = numpy.corrcoef(yHat, testMat[:,1], rowvar = False)[0,1] #计算相关系数 print "标准线性回归的相关系数是:", corrValue print "\n结果对比如下:" print "模型树 优于 回归树 优于 标准线性回归(上一章)" # 程序运行情况如下: # 回归树(叶子节点是常数项)时,数据的相关系数是: 0.9640852318222141 # 模型树(叶子节点是线性模型)时,数据的相关系数是: 0.9760412191380623 # # 结果对比如下: # 从相关系数来看,模型树优于回归树
myTree = regTrees.createTree(myMat2, ops=(0, 1)) myDatTest = regTrees.loadDataSet('ex2test.txt') myMat2Test = np.mat(myDatTest) regTrees.prune(myTree, myMat2Test) # 你真的剪了么。。。。 # 模型树部分了 reload(regTrees) myMat2 = np.mat(regTrees.loadDataSet('exp2.txt')) regTrees.createTree(myMat2, regTrees.modelLeaf, regTrees.modelErr, (1, 10)) # 区别就是调用方法时选择不同的生成叶节点的方法和误差计算 # 模型比较 reload(regTrees) trainMat = np.mat(regTrees.loadDataSet('bikeSpeedVsIq_train.txt')) testMat = np.mat(regTrees.loadDataSet('bikeSpeedVsIq_test.txt')) myTree = regTrees.createTree(trainMat, ops=(1, 20)) yHat = regTrees.createForeCast(myTree, testMat[:, 0]) # 创建一个回归树 np.corrcoef(yHat, testMat[:, 1], rowvar=0)[0, 1] myTree = regTrees.createTree(trainMat, regTrees.modelLeaf, regTrees.modelErr, (1, 20)) #同意的数据创建一个模型树 yHat = regTrees.createForeCast(myTree, testMat[:, 0], regTrees.modelTreeEval) np.corrcoef(yHat, testMat[:, 1], rowvar=0)[0, 1] # 好像稍微高那么一些 ws, X, Y = regTrees.linearSolve(trainMat) # 再试一下普通的线性回归 for i in range(np.shape(testMat)[0]): yHat[i] = testMat[i, 0] * ws[1, 0] + ws[0, 0] np.corrcoef(yHat, testMat[:, 1], rowvar=0)[0, 1] # 好像最低的 # 大致应该是:模型树>回归树>线性回归