class MultiLasso_model(Lasso_model): def __init__(self, train_path, test_path, pred_path): super().__init__(train_path, test_path, pred_path) self.multiLasso_model = MultiTaskLassoCV( alphas=[float(i) * 0.05 for i in range(1, 100)], cv=8, max_iter=1000000) def train(self, X_train, Y_train): self.multiLasso_model.fit(X_train, Y_train) def pred(self, X_test): return self.multiLasso_model.predict(X_test) def run(self): X_train_PMNF, X_test_PMNF, y_trains, y_tests = super().get_train_test() self.train(X_train_PMNF, np.asarray(y_trains).T) y_preds = self.pred(X_test_PMNF).T print(y_preds.shape, np.asarray(y_tests).shape) with open(self.pred_path, "w", newline='') as f: csv_writer = csv.writer(f) for i in range(len(y_trains)): for row in self.data_train_split[i]: csv_writer.writerow(row) group = self.data_test_split[i][self.split_train_len:, :] for j in range(len(group)): row = np.append(group[j, :], y_preds[i][j]) csv_writer.writerow(row)
def _informativeness(self, z_p, z): if isinstance(self.regressor, LassoCV): regressor = MultiTaskLassoCV(cv=self.regressor.cv, max_iter=2000, selection='random') regressor.fit(z_p, z) return self.regressor.score(z_p)
def test_1d_multioutput_lasso_and_multitask_lasso_cv(): X, y, _, _ = build_dataset(n_features=10) y = y[:, np.newaxis] clf = LassoCV(n_alphas=5, eps=2e-3) clf.fit(X, y[:, 0]) clf1 = MultiTaskLassoCV(n_alphas=5, eps=2e-3) clf1.fit(X, y) assert_almost_equal(clf.alpha_, clf1.alpha_) assert_almost_equal(clf.coef_, clf1.coef_[0]) assert_almost_equal(clf.intercept_, clf1.intercept_[0])
class MultiTaskLassoCVImpl: def __init__(self, **hyperparams): self._hyperparams = hyperparams self._wrapped_model = Op(**self._hyperparams) def fit(self, X, y=None): if y is not None: self._wrapped_model.fit(X, y) else: self._wrapped_model.fit(X) return self def predict(self, X): return self._wrapped_model.predict(X)
def lassoCV(self, name): ''' Lasso Regression ''' sciLasso = MultiTaskLassoCV( fit_intercept=True, normalize=False, cv=12, tol = 0.001 ) sciLasso.fit(self.X_train, self.Y_train) predict_test = sciLasso.predict(self.X_test) MSE = mean_squared_error(predict_test,self.Y_test) s = "Sci LassoCV (MSE: %f)" % (MSE) print s # print sciLasso.score(self.X_test, self.Y_test) print sciLasso.coef_ print np.nonzero(sciLasso.coef_) predict_final = sciLasso.predict(self.X_final) genCSV( name + '_MSE' + str(MSE), self.index_final, predict_final )
def compare_to_lasso_analysis(adata, ccdtranscript): '''Perform a comparison of pseudotime analysis to LASSO analysis for finding CCD proteins''' prevPlotSize = plt.rcParams['figure.figsize'] plt.rcParams['figure.figsize'] = (6, 5) print("ANALYZING SC-RNA-SEQ WITH LASSO") warnings.filterwarnings("ignore") fucci_rna_data = [(adata.obs["Red585"][ii], adata.obs["Green530"][ii]) for ii in np.arange(len(adata.obs))] imputer = KNNImputer(missing_values=0) expression = imputer.fit_transform(adata.X) fucci_rna_path = "output/pickles/fucci_rna_imputed_lasso.pkl" if os.path.exists(fucci_rna_path): fucci_rna = np.load(open(fucci_rna_path, 'rb'), allow_pickle=True) else: fucci_rna = MultiTaskLassoCV() fucci_rna.fit(expression, fucci_rna_data) pickle.dump(fucci_rna, open(fucci_rna_path, 'wb')) nz_coef = np.sum(fucci_rna.coef_, axis=0) != 0 print(f"{sum(nz_coef)}: number of nonzero lasso coefficients") print(f"{adata.var_names[nz_coef]}: genes with nonzero lasso coeff") print( f"{sum(ccdtranscript[nz_coef]) / sum(nz_coef)}: % nonzero lasso found as CCD transcripts" ) print( f"{np.sum(fucci_rna.coef_, axis=0)[nz_coef]}: coefficients for nonzero lasso coeff" ) # Generate UMAP for CCD and nonCCD for the LASSO model adataCCd = adata[:, nz_coef] sc.pp.neighbors(adataCCd, n_neighbors=10, n_pcs=40) sc.tl.umap(adataCCd) sc.pl.umap(adataCCd, color="fucci_time", show=False, save=True) shutil.move("figures/umap.pdf", f"figures/umapRNALassoCCD.pdf") adataNonCCd = adata[:, ~nz_coef] sc.pp.neighbors(adataNonCCd, n_neighbors=10, n_pcs=40) sc.tl.umap(adataNonCCd) sc.pl.umap(adataNonCCd, color="fucci_time", show=False, save=True) shutil.move("figures/umap.pdf", f"figures/umapRNALassoNonCCD.pdf") plt.rcParams['figure.figsize'] = prevPlotSize warnings.filterwarnings("default")
def _compare_with_lasso_cv(self, lasso_X, lasso_y, wlasso_X, wlasso_y, sample_weight, alphas, lasso_cv=3, wlasso_cv=3, params={}, tol=1e-8): # Check if multitask if np.ndim(lasso_y) > 1: lassoCV = MultiTaskLassoCV(alphas=alphas, cv=lasso_cv) wlassoCV = WeightedMultiTaskLassoCV(alphas=alphas, cv=wlasso_cv) else: lassoCV = LassoCV(alphas=alphas, cv=lasso_cv) wlassoCV = WeightedLassoCV(alphas=alphas, cv=wlasso_cv) lassoCV.set_params(**params) lassoCV.fit(lasso_X, lasso_y) wlassoCV.set_params(**params) wlassoCV.fit(wlasso_X, wlasso_y, sample_weight) # Check that same alpha is chosen self.assertEqual(lassoCV.alpha_, wlassoCV.alpha_) # Check that the coefficients are similar if np.ndim(lasso_y) > 1: for i in range(lasso_y.shape[1]): np.testing.assert_allclose(lassoCV.coef_[i], wlassoCV.coef_[i], atol=tol) if lassoCV.get_params()["fit_intercept"]: self.assertAlmostEqual(lassoCV.intercept_[i], wlassoCV.intercept_[i]) else: np.testing.assert_allclose(lassoCV.coef_, wlassoCV.coef_, atol=tol) self.assertAlmostEqual(lassoCV.intercept_, wlassoCV.intercept_)
def select_mtlasso(self, X, y): mtlasso_alphas = MultiTaskLassoCV(alphas=[ 0.00001, .0001, .001, .002, .003, .004, .005, .006, .007, .008, .009, .099, .01, .011, .012, .013, .014, .015, .016, .017, .018, .019, .02, .025, .03, .035, .036, .037, .038, .039, .04, .041, .042, .043, .044, .045, .05, .06, .075, .1, .2, .225, .23, .24, .245, .246, .247, .248, .249, .25, .251, .252, .253, .254, .255, .26, .27, .275, .3, .35, .4, .45, .46, .47, .48, .481, .482, .483, .484, .485, .486, .487, .488, .489, .49, .491, .492, .493, .494, .495, .496, .497, .498, .499, .5, .51, .511, .512, .513, .514, .515, .516, .517, .518, .519, .52, .525, .53, .54, .55, .6, .75, .752, .7527, .7528, .7529, .753, .7531, .754, .7545, .755, .756, .76, .765, .77, .78, .79, .8, .9, 1.0, 1.2, 1.25, 1.5, 1.75, 2.0 ]) sel_alpha = mtlasso_alphas.fit(X, y) sel_alpha.alpha_ print(sel_alpha.alpha_)
path_train = 'data_train.txt' path_test = 'data_test.txt' X, Y = get_data_own(path_train) print(X.shape) print(Y.shape) print("Split data for CV") X_train, X_test , y_train, y_test = train_test_split(X, Y, test_size=0.2, random_state=1) lasso = MultiTaskLasso(max_iter = max_iter, normalize = True) print("Init train with multitasklassocv") lassocv = MultiTaskLassoCV(alphas=None, cv=10, max_iter=max_iter, verbose=True, normalize=True) lassocv.fit(X_train, y_train) print("Fit multitasklasso with alpha from cv lasso") lasso.set_params(alpha=lassocv.alpha_) lasso.fit(X_train, y_train) print("get mean square error") mae = mean_absolute_error(y_test, lasso.predict(X_test)) print("mae: {}".format(mae)) rmsle = mean_squared_log_error(y_test, lasso.predict(X_test)) print("rmsle: {}".format(rmsle)) mape = mean_absolute_percentage_error(y_test, lasso.predict(X_test)) print("mape: {}".format(mape))
# logreg.fit(X_train, y_train) # y_pred_logreg = logreg.predict(X_test) # print('R2: ', r2_score(y_test, y_pred_logreg)) # print('MAE: ', metrics.mean_absolute_error(y_test, y_pred_logreg)) # print('MSE: ', metrics.mean_squared_error(y_test, y_pred_logreg)) # print('RMSE: ', np.sqrt(np.absolute(metrics.mean_squared_error(y_test, y_pred_logreg)))) # print('variance score:', explained_variance_score(y_test, y_pred_logreg, multioutput='uniform_average')) # ----------------------------------------------------------------------------- # Method 3: MultiTaskLassoCV regression with 10-fold CV # ----------------------------------------------------------------------------- print(' ') print('## 2. Lasso Regression Results ##') lasso = MultiTaskLassoCV(cv=10, eps=0.01, max_iter=1000) t = time.time() lasso.fit(X_train, y_train) t_lasso = time.time() - t y_pred_lasso = lasso.predict(X_test) print('R2: ', r2_score(y_test, y_pred_lasso)) print('MAE: ', metrics.mean_absolute_error(y_test, y_pred_lasso)) print('MSE: ', metrics.mean_squared_error(y_test, y_pred_lasso)) print('RMSE: ', np.sqrt(np.absolute(metrics.mean_squared_error(y_test, y_pred_lasso)))) print( 'variance score: ', explained_variance_score(y_test, y_pred_lasso, multioutput='uniform_average')) print('training time: ', t_lasso) # # # -----------------------------------------------------------------------------
mea_h = np.zeros(Dh) sig_h = np.zeros(Dh) for k in range(Dh): mea_h[k] = np.mean(Xh_tr[:,k]) sig_h[k] = np.std(Xh_tr[:,k]) Xh_tr[:,k] = (Xh_tr[:,k]-mea_h[k])/sig_h[k] ############## LassoCV ######################################################## from sklearn.linear_model import MultiTaskLassoCV n_alphas = 5 alphas = np.logspace(-10, 0, n_alphas) lasso = MultiTaskLassoCV(alphas = alphas, cv = 5, fit_intercept=False, normalize=False,n_jobs=3) lasso.fit(Xl_tr, Xh_tr) Lasso_lambda_opt = lasso.alpha_ print('\n Optimal lambda:', Lasso_lambda_opt) ############ Validation curve ################################################# """ # validation curve from sklearn.linear_model import Lasso from sklearn.learning_curve import validation_curve lambdas_range= np.append(0, np.logspace(0, 6, 28)) train_MSE, test_MSE = validation_curve(Lasso(),Xl_tr, Xh_tr, param_name="alpha", param_range=lambdas_range, scoring = "mean_squared_error", cv=10) # API always tries to maximize a loss function, so MSE is actually in the flipped sign
#把离散特征和连续特征拼接起来 x_vec = np.concatenate((x_vec_con, x_vec_dis), axis=1) #对于目标进行预测 y_registered = bike_rel['registered'].values.astype(float) y_casual = bike_rel['casual'].values.astype(float) y = np.stack((y_registered, y_casual), axis=1) #建立模型进行预测 from sklearn.linear_model import MultiTaskLassoCV from sklearn.model_selection import train_test_split from sklearn.linear_model import MultiTaskElasticNetCV x1, x2, y1, y2 = train_test_split(x_vec, y, test_size=0.2, random_state=20) ############ Lasso mtl = MultiTaskLassoCV(alphas=np.logspace(-3, -1, 3), cv=8, verbose=3) mtl.fit(x1, y1) mtl.score(x1, y1) mtl.score(x2, y2) ############ ElasticNetCV mte = MultiTaskElasticNetCV(l1_ratio=np.logspace(-3, -1, 3), alphas=np.logspace(-3, -1, 3), cv=8, verbose=3) mte.fit(x1, y1) mtl.score(x1, y1) mtl.score(x2, y2)
training_data_Y = np.delete(temp2, [i for i in range(gap)], axis=0) validation_data_X = X[:gap, :] validation_data_Y = Y[:gap, :] X = training_data_X Y = training_data_Y # Main Logic iters = 500 alpha = 0.1 # For L1 ok = MultiTaskLassoCV(cv=5) lcv = ok.fit(X, Y) lasso_lmbda = ok.alpha_ print("Hyperparameter for Lasso Regularisation: " + str(lasso_lmbda)) # Train theta2 = np.matrix(np.zeros((features, 1))) theta2, cost_las = GradientDescent_lasso(X, Y, theta2, alpha, iters, lasso_lmbda) lin = [i + 1 for i in range(iters)] plt.plot(lin, cost_las) plt.xlabel('Number of iterations') plt.ylabel('Error') plt.title('Error vs Iterations for Lasso regression L1 on Training set') plt.show() print("Accuracy with Lasso Regularisation on Tain set: " + str(predict(X, theta2, Y)))
print "测试集MSE:", mean_squared_error(test_Y, test_Y_pred) print "测试集RMSE:", np.sqrt(mean_squared_error(test_Y, test_Y_pred)) print "测试集R2:", r2_score(test_Y, test_Y_pred) tss, rss, ess, r2 = xss(Y, multiTaskLasso.predict(X)) print "TSS(Total Sum of Squares): ", tss print "RSS(Residual Sum of Squares): ", rss print "ESS(Explained Sum of Squares): ", ess print "R^2: ", r2 print "\n**********测试MultiTaskLassoCV类**********" # 在初始化MultiTaskLassoCV类时, 提供一组备选的α值, MultiTaskLassoCV类会帮我们选择一个合适的α值. multiTaskLassoCV = MultiTaskLassoCV( alphas=[0.01, 0.1, 0.5, 1, 3, 5, 7, 10, 20, 100], cv=5) # 拟合训练集 multiTaskLassoCV.fit(train_X, train_Y) # 打印最优的α值 print "最优的alpha值: ", multiTaskLassoCV.alpha_ # 打印模型的系数 print "系数:", multiTaskLassoCV.coef_ print "截距:", multiTaskLassoCV.intercept_ print '训练集R2: ', r2_score(train_Y, multiTaskLassoCV.predict(train_X)) # 对于线性回归模型, 一般使用均方误差(Mean Squared Error,MSE)或者 # 均方根误差(Root Mean Squared Error,RMSE)在测试集上的表现来评该价模型的好坏. test_Y_pred = multiTaskLassoCV.predict(test_X) print "测试集得分:", multiTaskLassoCV.score(test_X, test_Y) print "测试集MSE:", mean_squared_error(test_Y, test_Y_pred) print "测试集RMSE:", np.sqrt(mean_squared_error(test_Y, test_Y_pred)) print "测试集R2:", r2_score(test_Y, test_Y_pred)