regression.rssError(abY[0:99], yHat1.T) regression.rssError(abY[0:99], yHat10.T) # 看看是不是最好的k,测试集上也表现的良好 yHat01 = regression.lwlrTest(abX[100:199], abX[0:99], abY[0:99], 0.1) yHat1 = regression.lwlrTest(abX[100:199], abX[0:99], abY[0:99], 1) yHat10 = regression.lwlrTest(abX[100:199], abX[0:99], abY[0:99], 10) regression.rssError(abY[100:199], yHat01.T) # 明显过拟合了~ regression.rssError(abY[100:199], yHat1.T) regression.rssError(abY[100:199], yHat10.T) # You are really the best...有没有写错.... # 岭回归测试 reload(regression) abX, abY = regression.loadDataSet('abalone.txt') ridgeWeights = regression.ridgeTest(abX, abY) # 绘制λ fig = plt.figure() ax = fig.add_subplot(111) ax.plot(ridgeWeights) plt.show() # lasso的测试 reload(regression) xArr, yArr = regression.loadDataSet('abalone.txt') regression.stageWise(xArr, yArr, 0.01, 200) regression.stageWise(xArr, yArr, 0.001, 5000) # 与最小二乘比较看下 xMat = mat(xArr) yMat = mat(yArr).T
import regression def rssError(yArr, yHatArr): return ((yArr - yHatArr)**2).sum() if __name__ == '__main__': xArr, yArr = regression.loadDataSet('../data/abalone.txt') # yHat01 = regression.lwlrTest(xArr[0:99], xArr[0:99], yArr[0:99], 0.1) # yHat1 = regression.lwlrTest(xArr[0:99], xArr[0:99], yArr[0:99], 1) # yHat10 = regression.lwlrTest(xArr[0:99], xArr[0:99], yArr[0:99], 10) # # 为了分析预测误差的大小,可以用函数 rssError() 计算出这一指标 # print(rssError(yArr[0:99], yHat01.T)) # print(rssError(yArr[0:99], yHat1.T)) # print(rssError(yArr[0:99], yHat10.T)) # print('在新数据上的误差:') # yHat01 = regression.lwlrTest(xArr[100:199], xArr[0:99], yArr[0:99], 0.1) # yHat1 = regression.lwlrTest(xArr[100:199], xArr[0:99], yArr[0:99], 1) # yHat10 = regression.lwlrTest(xArr[100:199], xArr[0:99], yArr[0:99], 10) # print(rssError(yArr[100:199], yHat01.T)) # print(rssError(yArr[100:199], yHat1.T)) # print(rssError(yArr[100:199], yHat10.T)) # 使用岭回归的方式 ridgeWeights = regression.ridgeTest(xArr, yArr) import matplotlib.pyplot as plt fig = plt.figure() ax = fig.add_subplot(111) ax.plot(ridgeWeights) plt.show()
regression.rssError(abY[100:199], yHat01.T) yHat1 = regression.lwlrTest(abX[100:199], abX[0:99], abY[0:99], 1) regression.rssError(abY[100:199], yHat1.T) yHat10 = regression.lwlrTest(abX[100:199], abX[0:99], abY[0:99], 10) regression.rssError(abY[100:199], yHat10.T) # 与简单线性回归比较 ws = regression.standRegres(abX[0:99], abY[0:99]) yHat = mat(abX[100:199]) * ws regression.rssError(abY[100:199], yHat.T.A) # 测试岭回归 from importlib import reload reload(regression) abX, abY = regression.loadDataSet('abalone.txt') ridgeWeights = regression.ridgeTest(abX, abY) # 绘图 import matplotlib.pyplot as plt fig = plt.figure() ax = fig.add_subplot(111) ax.plot(ridgeWeights) plt.show() # 测试前向逐步线性回归 reload(regression) xArr, yArr = regression.loadDataSet('abalone.txt') regression.stageWise(xArr, yArr, 0.01, 200) regression.stageWise(xArr, yArr, 0.001, 5000) # 与最小二乘法进行比较 xMat = mat(xArr)
# -*- coding:utf-8 -*- import regression from numpy import * abX,abY = regression.loadDataSet("abalone.txt") ridgeWeight = regression.ridgeTest(abX, abY) #print ("ridgeWeight is %s" % ridgeWeight) #展现回归系数与log(lam)的关系 #lam非常小时,与线性回归一致 #lam非常大时,系数全部缩减成0 import matplotlib.pyplot as plt fig = plt.figure() ax = fig.add_subplot(111) ax.plot(ridgeWeight) plt.show()
yHat01 = regression.lwlrTest(abX[100:199], abX[0:99], abY[0:99], 0.1) regression.rssError(abY[100:199], yHat01.T) yHat1 = regression.lwlrTest(abX[100:199], abX[0:99], abY[0:99], 1) regression.rssError(abY[100:199], yHat1.T) yHat10 = regression.lwlrTest(abX[100:199], abX[0:99], abY[0:99], 10) regression.rssError(abY[100:199], yHat10.T) ws = regression.standRegres(abX[0:99], abY[0:99]) yHat = mat(abX[100:199]) * ws regression.rssError(abY[100:199], yHat.T.A) # 岭回归 import regression dm, ls = regression.loadDataSet('abalone.txt') ridgeWeights = regression.ridgeTest(dm, ls) import matplotlib.pyplot as plt fig = plt.figure() ax = fig.add_subplot(111) ax.plot(ridgeWeights) plt.show() # 前向逐步回归 import regression from numpy import * dm, ls = regression.loadDataSet('abalone.txt') wMat = regression.stageWise(dm, ls, 0.01, 200) wMat = regression.stageWise(dm, ls, 0.001, 5000) xMat = mat(dm) yMat = mat(ls).T xMat = (xMat - mean(xMat, 0)) / var(xMat, 0)
# yM = mean(yMat,0) # yMat =yMat-yM # weights=regression.standRegres(xMat,yMat.T) # print(weights.T) #--------产生图8-7的代码------------# # xArr,yArr =regression.loadDataSet('abalone.txt') # rightweights=regression.stageWise(xArr,yArr,0.005,1000)#运行时请把stageWise()中注释的三句代码恢复 # import matplotlib.pyplot as plt # fig = plt.figure() # ax=fig.add_subplot(111) # ax.plot(rightweights) # plt.show() #-------lego积木预测-------------# import legoAPI lgx = [] lgy = [] legoAPI.setDataCollect(lgx, lgy) #乐高URL已经过期,所以使用legoAPI.py本地解释文件夹setHtml下的网页 # regression.scrapePage('./setHtml/lego10030.html','out.txt', 2002, 3096, 269.99)#也可以这样使用作者注释掉的scrapePage()函数 lgx1 = mat(ones((63, 5))) lgx1[:, 1:5] = mat(lgx) print(lgx[0]) print(lgx1[0]) ws = regression.standRegres(lgx1, lgy) #最小二乘法,线性回归 print('ws', end='=') print(ws) # print('lgx1[0]*ws',end='=');print(lgx1[0]*ws) # print('lgx1[0]*ws',end='=');print(lgx1[-1]*ws) # print('lgx1[0]*ws',end='=');print(lgx1[43]*ws) regression.crossValidation(lgx, lgy, 10) print(regression.ridgeTest(lgx, lgy))
print corrcoef(yHat.T, yMat) plt.show() # 在训练集上预测鲍鱼年龄 featArr, ageArr = regression.loadDataSet('abalone.txt') # predictionArr01 = regression.lwlrTest(featArr[0:99], featArr[0:99],ageArr[0:99],0.0972) # predictionArr1 = regression.lwlrTest(featArr[0:99], featArr[0:99],ageArr[0:99],1.0) # predictionArr10 = regression.lwlrTest(featArr[0:99], featArr[0:99],ageArr[0:99],10) # # print regression.ressError(ageArr[0:99], predictionArr01.T) # print regression.ressError(ageArr[0:99], predictionArr1.T) # print regression.ressError(ageArr[0:99], predictionArr10.T) # 在测试集上预测鲍鱼年龄,在测试集上,k=2时效果最好 # predictionArr01 = regression.lwlrTest(featArr[100:199], featArr[0:99],ageArr[0:99],0.0972) # predictionArr1 = regression.lwlrTest(featArr[100:199], featArr[0:99],ageArr[0:99],1.0) # predictionArr10 = regression.lwlrTest(featArr[100:199], featArr[0:99],ageArr[0:99],2) # print regression.ressError(ageArr[100:199], predictionArr01.T) # print regression.ressError(ageArr[100:199], predictionArr1.T) # print regression.ressError(ageArr[100:199], predictionArr10.T) # 岭回归 这个有问题,下次看一下 ridgeWeights = regression.ridgeTest(featArr, ageArr) # # # 前向逐步回归 stageWise = regression.stageWise(featArr, ageArr, 0.01, 200) fig = plt.figure() ax = fig.add_subplot(111) ax.plot(ridgeWeights) # ax.plot(stageWise) # plt.show()