#把离散特征和连续特征拼接起来 x_vec = np.concatenate((x_vec_con, x_vec_dis), axis=1) #对于目标进行预测 y_registered = bike_rel['registered'].values.astype(float) y_casual = bike_rel['casual'].values.astype(float) y = np.stack((y_registered, y_casual), axis=1) #建立模型进行预测 from sklearn.linear_model import MultiTaskLassoCV from sklearn.model_selection import train_test_split from sklearn.linear_model import MultiTaskElasticNetCV x1, x2, y1, y2 = train_test_split(x_vec, y, test_size=0.2, random_state=20) ############ Lasso mtl = MultiTaskLassoCV(alphas=np.logspace(-3, -1, 3), cv=8, verbose=3) mtl.fit(x1, y1) mtl.score(x1, y1) mtl.score(x2, y2) ############ ElasticNetCV mte = MultiTaskElasticNetCV(l1_ratio=np.logspace(-3, -1, 3), alphas=np.logspace(-3, -1, 3), cv=8, verbose=3) mte.fit(x1, y1) mtl.score(x1, y1) mtl.score(x2, y2)
# 在初始化MultiTaskLassoCV类时, 提供一组备选的α值, MultiTaskLassoCV类会帮我们选择一个合适的α值. multiTaskLassoCV = MultiTaskLassoCV( alphas=[0.01, 0.1, 0.5, 1, 3, 5, 7, 10, 20, 100], cv=5) # 拟合训练集 multiTaskLassoCV.fit(train_X, train_Y) # 打印最优的α值 print "最优的alpha值: ", multiTaskLassoCV.alpha_ # 打印模型的系数 print "系数:", multiTaskLassoCV.coef_ print "截距:", multiTaskLassoCV.intercept_ print '训练集R2: ', r2_score(train_Y, multiTaskLassoCV.predict(train_X)) # 对于线性回归模型, 一般使用均方误差(Mean Squared Error,MSE)或者 # 均方根误差(Root Mean Squared Error,RMSE)在测试集上的表现来评该价模型的好坏. test_Y_pred = multiTaskLassoCV.predict(test_X) print "测试集得分:", multiTaskLassoCV.score(test_X, test_Y) print "测试集MSE:", mean_squared_error(test_Y, test_Y_pred) print "测试集RMSE:", np.sqrt(mean_squared_error(test_Y, test_Y_pred)) print "测试集R2:", r2_score(test_Y, test_Y_pred) tss, rss, ess, r2 = xss(Y, multiTaskLassoCV.predict(X)) print "TSS(Total Sum of Squares): ", tss print "RSS(Residual Sum of Squares): ", rss print "ESS(Explained Sum of Squares): ", ess print "R^2: ", r2 print "\n**********测试MultiTaskElasticNet类**********" # 在初始化MultiTaskElasticNet类时, 指定超参数α和ρ, 默认值分别是1.0和0.5. multiTaskElasticNet = MultiTaskElasticNet(alpha=0.01, l1_ratio=0.7) # 拟合训练集 multiTaskElasticNet.fit(train_X, train_Y)