from sklearn.linear_model import LinearRegression as LR from sklearn.model_selection import train_test_split from sklearn.model_selection import cross_val_score from sklearn.datasets import fetch_california_housing as fch #加利福尼亚房屋价值数据集 import pandas as pd from sklearn.metrics import mean_squared_error as MSE import matplotlib.pyplot as plt housevalue = fch() #会需要下载,大家可以提前运行试试看 X = pd.DataFrame(housevalue.data) #放入DataFrame中便于查看 y = housevalue.target Xtrain, Xtest, Ytrain, Ytest = train_test_split(X, y, test_size=0.3, random_state=420) # 恢复索引 for i in [Xtrain, Xtest]: i.index = range(i.shape[0]) reg = LR().fit(Xtrain, Ytrain) yhat = reg.predict(Xtest) print(reg.coef_) print([*zip(Xtrain.columns, reg.coef_)]) print(MSE(yhat, Ytest)) print(cross_val_score(reg, X, y, cv=10, scoring="neg_mean_squared_error")) from sklearn.metrics import r2_score print(r2_score(yhat, Ytest)) print(r2_score(Ytest, yhat)) r2 = reg.score(Xtest, Ytest) print(r2)
import numpy as np import pandas as pd from sklearn.linear_model import Ridge, LinearRegression, Lasso from sklearn.model_selection import train_test_split as TTS from sklearn.datasets import fetch_california_housing as fch import matplotlib.pyplot as plt housevalue = fch() X = pd.DataFrame(housevalue.data) y = housevalue.target X.columns = [ "住户收入中位数", "房屋使用年代中位数", "平均房间数目", "平均卧室数目", "街区人口", "平均入住率", "街区的纬度", "街区的经度" ] X.head() Xtrain, Xtest, Ytrain, Ytest = TTS(X, y, test_size=0.3, random_state=420) for i in [Xtrain, Xtest]: i.index = range(i.shape[0]) reg = LinearRegression().fit(Xtrain, Ytrain) Ridge_ = Ridge(alpha=0).fit(Xtrain, Ytrain) lasso_ = Lasso(alpha=0).fit(Xtrain, Ytrain) Ridge_2 = Ridge(alpha=0.01).fit(Xtrain, Ytrain) lasso_2 = Lasso(alpha=0.01).fit(Xtrain, Ytrain) Ridge_3 = Ridge(alpha=10**4).fit(Xtrain, Ytrain)
import numpy as np import pandas as pd from sklearn.datasets import fetch_california_housing as fch #加载加利福尼亚房屋价值数据 #加载线性回归需要的模块和库 import statsmodels.api as sm #最小二乘 from statsmodels.formula.api import ols #加载ols模型 #设置全部行输出 from IPython.core.interactiveshell import InteractiveShell InteractiveShell.ast_node_interactivity = "all" data = fch() #导入数据 house_data=pd.DataFrame(data.data) #将自变量转换成dataframe格式,便于查看 house_data.columns=data.feature_names #命名自变量 house_data.loc[:,"value"]=data.target #合并自变量,因变量数据 print(house_data.shape) #查看数据量 print(house_data.head(10)) #查看前10行数据 #分训练集测试集 import random random.seed(123) #设立随机数种子 a=random.sample(range(len(house_data)),round(len(house_data)*0.3)) house_test=[] for i in a: house_test.append(house_data.iloc[i]) house_test=pd.DataFrame(house_test) house_train=house_data.drop(a)