def OMP_cv(problem, **kwargs): r"""High level description. Requirements ------------ kwargs['choose'] must be a positive integer kwargs['coef_tolerance'] must be a nonnegative float Returns ------- output : tuple (optimum, maximum) """ data_list = [datum['data']['values'] for datum in problem.data] data = numpy.array(data_list) OMP = OrthogonalMatchingPursuitCV(max_iter=kwargs['choose']) OMP.fit(data.T, problem.goal['data']['values']) OMP_coefficients = OMP.coef_ optimum = [ problem.data[index] for index, element in enumerate(OMP_coefficients) if abs(element) > kwargs['coef_tolerance'] ] maximum = OMP.score(data.T, problem.goal['data']['values']) output = (optimum, maximum) return output
print "\n**********测试OrthogonalMatchingPursuitCV类**********" ompCV = OrthogonalMatchingPursuitCV(cv=5) # 拟合训练集 ompCV.fit(train_X, train_Y.values.ravel()) # 打印最好的n_nonzero_coefs值 print "最好的n_nonzero_coefs值: ", ompCV.n_nonzero_coefs_ # 打印模型的系数 print "系数:", ompCV.coef_ print "截距:", ompCV.intercept_ print '训练集R2: ', r2_score(train_Y, ompCV.predict(train_X)) # 对于线性回归模型, 一般使用均方误差(Mean Squared Error,MSE)或者 # 均方根误差(Root Mean Squared Error,RMSE)在测试集上的表现来评该价模型的好坏. test_Y_pred = ompCV.predict(test_X) print "测试集得分:", ompCV.score(test_X, test_Y) print "测试集MSE:", mean_squared_error(test_Y, test_Y_pred) print "测试集RMSE:", np.sqrt(mean_squared_error(test_Y, test_Y_pred)) print "测试集R2:", r2_score(test_Y, test_Y_pred) tss, rss, ess, r2 = xss(Y, ompCV.predict(X)) print "TSS(Total Sum of Squares): ", tss print "RSS(Residual Sum of Squares): ", rss print "ESS(Explained Sum of Squares): ", ess print "R^2: ", r2 print "\n**********测试MultiTaskLasso类**********" # 在初始化MultiTaskLasso类时, 指定参数alpha, 默认值是1.0. multiTaskLasso = MultiTaskLasso(alpha=1.0) # 拟合训练集 multiTaskLasso.fit(train_X, train_Y)