Пример #1
0
regression.rssError(abY[0:99], yHat1.T)
regression.rssError(abY[0:99], yHat10.T)

# 看看是不是最好的k,测试集上也表现的良好
yHat01 = regression.lwlrTest(abX[100:199], abX[0:99], abY[0:99], 0.1)
yHat1 = regression.lwlrTest(abX[100:199], abX[0:99], abY[0:99], 1)
yHat10 = regression.lwlrTest(abX[100:199], abX[0:99], abY[0:99], 10)
regression.rssError(abY[100:199], yHat01.T)  # 明显过拟合了~
regression.rssError(abY[100:199], yHat1.T)
regression.rssError(abY[100:199],
                    yHat10.T)  # You are really the best...有没有写错....

# 岭回归测试
reload(regression)
abX, abY = regression.loadDataSet('abalone.txt')
ridgeWeights = regression.ridgeTest(abX, abY)
# 绘制λ
fig = plt.figure()
ax = fig.add_subplot(111)
ax.plot(ridgeWeights)
plt.show()

# lasso的测试
reload(regression)
xArr, yArr = regression.loadDataSet('abalone.txt')
regression.stageWise(xArr, yArr, 0.01, 200)
regression.stageWise(xArr, yArr, 0.001, 5000)

# 与最小二乘比较看下
xMat = mat(xArr)
yMat = mat(yArr).T
import regression


def rssError(yArr, yHatArr):
    return ((yArr - yHatArr)**2).sum()


if __name__ == '__main__':
    xArr, yArr = regression.loadDataSet('../data/abalone.txt')
    # yHat01 = regression.lwlrTest(xArr[0:99], xArr[0:99], yArr[0:99], 0.1)
    # yHat1 = regression.lwlrTest(xArr[0:99], xArr[0:99], yArr[0:99], 1)
    # yHat10 = regression.lwlrTest(xArr[0:99], xArr[0:99], yArr[0:99], 10)
    # # 为了分析预测误差的大小,可以用函数 rssError() 计算出这一指标
    # print(rssError(yArr[0:99], yHat01.T))
    # print(rssError(yArr[0:99], yHat1.T))
    # print(rssError(yArr[0:99], yHat10.T))
    # print('在新数据上的误差:')
    # yHat01 = regression.lwlrTest(xArr[100:199], xArr[0:99], yArr[0:99], 0.1)
    # yHat1 = regression.lwlrTest(xArr[100:199], xArr[0:99], yArr[0:99], 1)
    # yHat10 = regression.lwlrTest(xArr[100:199], xArr[0:99], yArr[0:99], 10)
    # print(rssError(yArr[100:199], yHat01.T))
    # print(rssError(yArr[100:199], yHat1.T))
    # print(rssError(yArr[100:199], yHat10.T))

    # 使用岭回归的方式
    ridgeWeights = regression.ridgeTest(xArr, yArr)
    import matplotlib.pyplot as plt
    fig = plt.figure()
    ax = fig.add_subplot(111)
    ax.plot(ridgeWeights)
    plt.show()
Пример #3
0
regression.rssError(abY[100:199], yHat01.T)
yHat1 = regression.lwlrTest(abX[100:199], abX[0:99], abY[0:99], 1)
regression.rssError(abY[100:199], yHat1.T)
yHat10 = regression.lwlrTest(abX[100:199], abX[0:99], abY[0:99], 10)
regression.rssError(abY[100:199], yHat10.T)
# 与简单线性回归比较
ws = regression.standRegres(abX[0:99], abY[0:99])
yHat = mat(abX[100:199]) * ws
regression.rssError(abY[100:199], yHat.T.A)

# 测试岭回归
from importlib import reload

reload(regression)
abX, abY = regression.loadDataSet('abalone.txt')
ridgeWeights = regression.ridgeTest(abX, abY)
# 绘图
import matplotlib.pyplot as plt

fig = plt.figure()
ax = fig.add_subplot(111)
ax.plot(ridgeWeights)
plt.show()

# 测试前向逐步线性回归
reload(regression)
xArr, yArr = regression.loadDataSet('abalone.txt')
regression.stageWise(xArr, yArr, 0.01, 200)
regression.stageWise(xArr, yArr, 0.001, 5000)
# 与最小二乘法进行比较
xMat = mat(xArr)
Пример #4
0
# -*- coding:utf-8 -*-
import regression
from numpy import *

abX,abY = regression.loadDataSet("abalone.txt")
ridgeWeight = regression.ridgeTest(abX, abY)
#print ("ridgeWeight is %s"  % ridgeWeight)


#展现回归系数与log(lam)的关系
#lam非常小时,与线性回归一致
#lam非常大时,系数全部缩减成0
import matplotlib.pyplot as plt
fig = plt.figure()
ax = fig.add_subplot(111)
ax.plot(ridgeWeight)
plt.show()




Пример #5
0
yHat01 = regression.lwlrTest(abX[100:199], abX[0:99], abY[0:99], 0.1)
regression.rssError(abY[100:199], yHat01.T)
yHat1 = regression.lwlrTest(abX[100:199], abX[0:99], abY[0:99], 1)
regression.rssError(abY[100:199], yHat1.T)
yHat10 = regression.lwlrTest(abX[100:199], abX[0:99], abY[0:99], 10)
regression.rssError(abY[100:199], yHat10.T)

ws = regression.standRegres(abX[0:99], abY[0:99])
yHat = mat(abX[100:199]) * ws
regression.rssError(abY[100:199], yHat.T.A)

# 岭回归
import regression
dm, ls = regression.loadDataSet('abalone.txt')
ridgeWeights = regression.ridgeTest(dm, ls)
import matplotlib.pyplot as plt
fig = plt.figure()
ax = fig.add_subplot(111)
ax.plot(ridgeWeights)
plt.show()

# 前向逐步回归
import regression
from numpy import *
dm, ls = regression.loadDataSet('abalone.txt')
wMat = regression.stageWise(dm, ls, 0.01, 200)
wMat = regression.stageWise(dm, ls, 0.001, 5000)
xMat = mat(dm)
yMat = mat(ls).T
xMat = (xMat - mean(xMat, 0)) / var(xMat, 0)
# yM = mean(yMat,0)
# yMat =yMat-yM
# weights=regression.standRegres(xMat,yMat.T)
# print(weights.T)
#--------产生图8-7的代码------------#
# xArr,yArr =regression.loadDataSet('abalone.txt')
# rightweights=regression.stageWise(xArr,yArr,0.005,1000)#运行时请把stageWise()中注释的三句代码恢复
# import  matplotlib.pyplot as plt
# fig = plt.figure()
# ax=fig.add_subplot(111)
# ax.plot(rightweights)
# plt.show()
#-------lego积木预测-------------#
import legoAPI
lgx = []
lgy = []
legoAPI.setDataCollect(lgx, lgy)  #乐高URL已经过期,所以使用legoAPI.py本地解释文件夹setHtml下的网页
# regression.scrapePage('./setHtml/lego10030.html','out.txt', 2002, 3096, 269.99)#也可以这样使用作者注释掉的scrapePage()函数
lgx1 = mat(ones((63, 5)))
lgx1[:, 1:5] = mat(lgx)
print(lgx[0])
print(lgx1[0])
ws = regression.standRegres(lgx1, lgy)  #最小二乘法,线性回归
print('ws', end='=')
print(ws)
# print('lgx1[0]*ws',end='=');print(lgx1[0]*ws)
# print('lgx1[0]*ws',end='=');print(lgx1[-1]*ws)
# print('lgx1[0]*ws',end='=');print(lgx1[43]*ws)
regression.crossValidation(lgx, lgy, 10)
print(regression.ridgeTest(lgx, lgy))
Пример #7
0
print corrcoef(yHat.T, yMat)
plt.show()

# 在训练集上预测鲍鱼年龄
featArr, ageArr = regression.loadDataSet('abalone.txt')
# predictionArr01 = regression.lwlrTest(featArr[0:99], featArr[0:99],ageArr[0:99],0.0972)
# predictionArr1 = regression.lwlrTest(featArr[0:99], featArr[0:99],ageArr[0:99],1.0)
# predictionArr10 = regression.lwlrTest(featArr[0:99], featArr[0:99],ageArr[0:99],10)
#
# print regression.ressError(ageArr[0:99], predictionArr01.T)
# print regression.ressError(ageArr[0:99], predictionArr1.T)
# print regression.ressError(ageArr[0:99], predictionArr10.T)
# 在测试集上预测鲍鱼年龄,在测试集上,k=2时效果最好
# predictionArr01 = regression.lwlrTest(featArr[100:199], featArr[0:99],ageArr[0:99],0.0972)
# predictionArr1 = regression.lwlrTest(featArr[100:199], featArr[0:99],ageArr[0:99],1.0)
# predictionArr10 = regression.lwlrTest(featArr[100:199], featArr[0:99],ageArr[0:99],2)
# print regression.ressError(ageArr[100:199], predictionArr01.T)
# print regression.ressError(ageArr[100:199], predictionArr1.T)
# print regression.ressError(ageArr[100:199], predictionArr10.T)

# 岭回归 这个有问题,下次看一下
ridgeWeights = regression.ridgeTest(featArr, ageArr)
#
# # 前向逐步回归
stageWise = regression.stageWise(featArr, ageArr, 0.01, 200)
fig = plt.figure()
ax = fig.add_subplot(111)
ax.plot(ridgeWeights)
# ax.plot(stageWise)
# plt.show()