Exemplo n.º 1
0
from sklearn.linear_model import LinearRegression as LR
from sklearn.model_selection import train_test_split
from sklearn.model_selection import cross_val_score
from sklearn.datasets import fetch_california_housing as fch  #加利福尼亚房屋价值数据集
import pandas as pd
from sklearn.metrics import mean_squared_error as MSE
import matplotlib.pyplot as plt
housevalue = fch()  #会需要下载,大家可以提前运行试试看
X = pd.DataFrame(housevalue.data)  #放入DataFrame中便于查看
y = housevalue.target

Xtrain, Xtest, Ytrain, Ytest = train_test_split(X,
                                                y,
                                                test_size=0.3,
                                                random_state=420)
# 恢复索引
for i in [Xtrain, Xtest]:
    i.index = range(i.shape[0])

reg = LR().fit(Xtrain, Ytrain)
yhat = reg.predict(Xtest)
print(reg.coef_)
print([*zip(Xtrain.columns, reg.coef_)])
print(MSE(yhat, Ytest))
print(cross_val_score(reg, X, y, cv=10, scoring="neg_mean_squared_error"))
from sklearn.metrics import r2_score
print(r2_score(yhat, Ytest))
print(r2_score(Ytest, yhat))
r2 = reg.score(Xtest, Ytest)
print(r2)
import numpy as np
import pandas as pd
from sklearn.linear_model import Ridge, LinearRegression, Lasso
from sklearn.model_selection import train_test_split as TTS
from sklearn.datasets import fetch_california_housing as fch
import matplotlib.pyplot as plt

housevalue = fch()
X = pd.DataFrame(housevalue.data)
y = housevalue.target
X.columns = [
    "住户收入中位数", "房屋使用年代中位数", "平均房间数目", "平均卧室数目", "街区人口", "平均入住率", "街区的纬度",
    "街区的经度"
]
X.head()
Xtrain, Xtest, Ytrain, Ytest = TTS(X, y, test_size=0.3, random_state=420)

for i in [Xtrain, Xtest]:
    i.index = range(i.shape[0])

reg = LinearRegression().fit(Xtrain, Ytrain)

Ridge_ = Ridge(alpha=0).fit(Xtrain, Ytrain)

lasso_ = Lasso(alpha=0).fit(Xtrain, Ytrain)

Ridge_2 = Ridge(alpha=0.01).fit(Xtrain, Ytrain)

lasso_2 = Lasso(alpha=0.01).fit(Xtrain, Ytrain)

Ridge_3 = Ridge(alpha=10**4).fit(Xtrain, Ytrain)
Exemplo n.º 3
0

import numpy as np
import pandas as pd
from sklearn.datasets import fetch_california_housing as fch  #加载加利福尼亚房屋价值数据
#加载线性回归需要的模块和库
import statsmodels.api as sm #最小二乘
from statsmodels.formula.api import ols #加载ols模型


#设置全部行输出
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"


data = fch() #导入数据
house_data=pd.DataFrame(data.data) #将自变量转换成dataframe格式,便于查看
house_data.columns=data.feature_names  #命名自变量
house_data.loc[:,"value"]=data.target #合并自变量,因变量数据
print(house_data.shape) #查看数据量
print(house_data.head(10)) #查看前10行数据

#分训练集测试集
import random
random.seed(123) #设立随机数种子
a=random.sample(range(len(house_data)),round(len(house_data)*0.3))
house_test=[]
for i in a:
    house_test.append(house_data.iloc[i])
house_test=pd.DataFrame(house_test)
house_train=house_data.drop(a)