X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.25, random_state=1) folds = 5 alphas = np.logspace(1, 5, 3) l1_ratios = np.linspace(0, 1, 2, endpoint=True) models = MultiTaskElasticNetCV(l1_ratio=l1_ratios, alphas=alphas, verbose=1, cv=folds, n_jobs=-1) models.fit(X_train, Y_train) models.score(X_test, Y_test) print "Alpha: ", models.alpha_ print "L1 ratio: ", models.l1_ratio_ print "Score of Elastic-net on test data: ", models.score(X_test, Y_test) model_EN = ElasticNet(l1_ratio=models.l1_ratio_, alpha=models.alpha_) model_EN.fit(np.concatenate((X_train, X_test)), np.concatenate((Y_train, Y_test))) test = np.rint(models.predict(X_test)).astype('int16') coeff = model_EN.coef_.T # coeff = models.coef_.T # high=1.0 # low=0.0
# 在初始化MultiTaskElasticNetCV类时, 提供一组备选的α值, MultiTaskElasticNetCV类会帮我们选择一个合适的α值. multiTaskElasticNetCV = MultiTaskElasticNetCV( alphas=[0.01, 0.1, 0.5, 1, 3, 5, 7, 10, 20, 100], cv=5) # 拟合训练集 multiTaskElasticNetCV.fit(train_X, train_Y) # 打印最优的α值 print "最优的alpha值: ", multiTaskElasticNetCV.alpha_ # 打印模型的系数 print "系数:", multiTaskElasticNetCV.coef_ print "截距:", multiTaskElasticNetCV.intercept_ print '训练集R2: ', r2_score(train_Y, multiTaskElasticNetCV.predict(train_X)) # 对于线性回归模型, 一般使用均方误差(Mean Squared Error,MSE)或者 # 均方根误差(Root Mean Squared Error,RMSE)在测试集上的表现来评该价模型的好坏. test_Y_pred = multiTaskElasticNetCV.predict(test_X) print "测试集得分:", multiTaskElasticNetCV.score(test_X, test_Y) print "测试集MSE:", mean_squared_error(test_Y, test_Y_pred) print "测试集RMSE:", np.sqrt(mean_squared_error(test_Y, test_Y_pred)) print "测试集R2:", r2_score(test_Y, test_Y_pred) tss, rss, ess, r2 = xss(Y, multiTaskElasticNetCV.predict(X)) print "TSS(Total Sum of Squares): ", tss print "RSS(Residual Sum of Squares): ", rss print "ESS(Explained Sum of Squares): ", ess print "R^2: ", r2 print "\n**********测试BayesianRidge类**********" bayesianRidge = BayesianRidge() # 拟合训练集 bayesianRidge.fit(train_X, train_Y.values.ravel()) # 打印模型的系数