def mapper(key, output_collector): import mapreduce as GLOBAL Xtr = GLOBAL.DATA_RESAMPLED["X"][0] Xte = GLOBAL.DATA_RESAMPLED["X"][1] ytr = GLOBAL.DATA_RESAMPLED["y"][0] yte = GLOBAL.DATA_RESAMPLED["y"][1] alpha = float(key[0]) l1, l2, tv = alpha * float(key[1]), alpha * float(key[2]), alpha * float( key[3]) print("l1:%f, l2:%f, tv:%f" % (l1, l2, tv)) mask = np.ones(Xtr.shape[0], dtype=bool) scaler = preprocessing.StandardScaler().fit(Xtr) Xtr = scaler.transform(Xtr) Xte = scaler.transform(Xte) A = GLOBAL.A conesta = algorithms.proximal.CONESTA(max_iter=10000) mod = estimators.LinearRegressionL1L2TV(l1, l2, tv, A, algorithm=conesta, penalty_start=penalty_start) mod.fit(Xtr, ytr.ravel()) y_pred = mod.predict(Xte) ret = dict(y_pred=y_pred, y_true=yte, beta=mod.beta, mask=mask) if output_collector: output_collector.collect(key, ret) else: return ret
def mapper(key, output_collector): """ # debug mapper config = json.load(open(os.path.join(WD, "config_cv_largerange.json"), "r")) load_globals(config) resample(config, 'refit/refit') key = ('enettv', 0.01, 0.1, 0.3) """ import mapreduce as GLOBAL Xtr = GLOBAL.DATA_RESAMPLED["X"][0] Xte = GLOBAL.DATA_RESAMPLED["X"][1] ytr = GLOBAL.DATA_RESAMPLED["y"][0] yte = GLOBAL.DATA_RESAMPLED["y"][1] # key = 'enettv_0.01_0.1_0.2'.split("_") algo, alpha, l1l2ratio, tvratio = key[0], float(key[1]), float( key[2]), float(key[3]) tv = alpha * tvratio l1 = alpha * float(1 - tv) * l1l2ratio l2 = alpha * float(1 - tv) * (1 - l1l2ratio) print(key, algo, alpha, l1, l2, tv) scaler = preprocessing.StandardScaler().fit(Xtr) Xtr = scaler.transform(Xtr) Xte = scaler.transform(Xte) if algo == 'enettv': conesta = algorithms.proximal.CONESTA(max_iter=10000) mod = estimators.LinearRegressionL1L2TV(l1, l2, tv, GLOBAL.Atv, algorithm=conesta) mod.fit(Xtr, ytr.ravel()) elif algo == 'enetgn': fista = algorithms.proximal.FISTA(max_iter=5000) mod = estimators.LinearRegressionL1L2GraphNet(l1, l2, tv, GLOBAL.Agn, algorithm=fista) mod.fit(Xtr, ytr.ravel()) elif algo == 'enet': fista = algorithms.proximal.FISTA(max_iter=5000) mod = estimators.ElasticNet(l1l2ratio, algorithm=fista) mod.fit(Xtr, ytr.ravel()) else: raise Exception('Algo%s not handled' % algo) #mod.fit(Xtr, ytr.ravel()) y_pred = mod.predict(Xte) ret = dict(y_pred=y_pred, y_true=yte, beta=mod.beta) if output_collector: output_collector.collect(key, ret) else: return ret
ALPHA = 0.01 # l, k, g = ALPHA * np.array([0.3335, 0.3335, 0.333]) mask_ima = nibabel.load(os.path.join(WD, mask_filename)) Atv = tv.linear_operator_from_mask(mask_ima.get_data()) out = os.path.join(WD, "run", "conesta_ite_snapshots/") snapshot = AlgorithmSnapshot(out, saving_period=1).save_conesta info = [ Info.converged, Info.num_iter, Info.time, Info.func_val, Info.mu, Info.gap, Info.converged, Info.fvalue ] conesta = algorithms.proximal.CONESTA(callback_conesta=snapshot) algorithm_params = dict(max_iter=1000000, info=info) os.makedirs(out, exist_ok=True) algorithm_params["callback"] = snapshot mod = estimators.LinearRegressionL1L2TV(l, k, g, A=Atv, algorithm=conesta, algorithm_params=algorithm_params, penalty_start=0, mean=True) mod.fit(X, y, beta_start)
def test_total_variation(self): random_state = np.random.RandomState(42) state = random_state.get_state() rng01 = simulate.utils.RandomUniform(0, 1, random_state=random_state) rng11 = simulate.utils.RandomUniform(-1, 1, random_state=random_state) shape = (4, 4, 4) n, p = 48, np.prod(shape) # Generate candidate data. beta = simulate.beta.random((p, 1), density=0.5, sort=True, rng=rng01) Sigma = simulate.correlation_matrices.constant_correlation( p=p, rho=0.01, eps=0.001, random_state=random_state) X0 = random_state.multivariate_normal(np.zeros(p), Sigma, n) e = random_state.randn(n, 1) # Generate the linear operator for total variation. A = simulate.functions.TotalVariation.A_from_shape(shape) # Regularisation parameters k = 0.5 # Ridge (L2) coefficient. l = 1.0 - k # L1 coefficient. g = 1.0 # TV coefficient. mu = 5e-4 # Create the optimisation problem. random_state.set_state(state) l1 = simulate.functions.L1(l, rng=rng11) l2 = simulate.functions.L2Squared(k) tv = simulate.functions.TotalVariation(g, A, rng=rng01) lr = simulate.LinearRegressionData([l1, l2, tv], X0, e, snr=3.0, intercept=False) # Generate simulated data. X, y, beta_star, e = lr.load(beta) try: import parsimony.estimators as estimators import parsimony.algorithms.proximal as proximal except ImportError: print "pylearn-parsimony is not properly installed. Will not be " \ "able to run this test." return e = estimators.LinearRegressionL1L2TV( l, k, g, A, mu, algorithm=proximal.FISTA(), algorithm_params=dict(max_iter=10000), penalty_start=0, mean=False) beta_sim = e.fit(X, y, beta).parameters()["beta"] assert (np.linalg.norm(beta_sim - beta_star) < 0.0005)
rr = estimators.RidgeRegression(l=alpha) rr.fit(Xtr, ytr) yte_pred_rr = rr.fit(Xtr, ytr).predict(Xte) # Fit GraphNet l1, l2, gn = alpha * np.array((.33, .33, 33)) # l1, l2, gn penalties A = sparse.vstack(nesterov_tv.linear_operator_from_shape(shape)) enetgn = estimators.LinearRegressionL1L2GraphNet(l1, l2, gn, A) yte_pred_enetgn = enetgn.fit(Xtr, ytr).predict(Xte) # Fit LinearRegressionL1L2TV l1, l2, tv = alpha * np.array((.33, .33, .33)) # l1, l2, tv penalties Atv = nesterov_tv.linear_operator_from_shape(shape) enettv = estimators.LinearRegressionL1L2TV(l1, l2, tv, Atv, algorithm_params=dict(max_iter=500)) yte_pred_enettv = enettv.fit(Xtr, ytr).predict(Xte) ############################################################################### # Plot plot = plt.subplot(221) utils.plots.map2d(beta3d.reshape(shape), plot, title="beta star") plot = plt.subplot(222) utils.plots.map2d(rr.beta.reshape(shape), plot, title="Ridge (R2=%.2f)" % r2_score(yte, yte_pred_rr))
MODELS["l1l2_inter__fista"] = \ estimators.ElasticNet(alpha=alpha, l=.5, penalty_start=1) ## LinearRegressionL1L2TV, Parsimony only # Minimize: # f(beta, X, y) = (1 / (2 * n)) * ||Xbeta - y||²_2 # + l1 * ||beta||_1 # + (l2 / 2) * ||beta||²_2 # + tv * TV(beta) A = nesterov_tv.linear_operator_from_shape(beta3d.shape) l1, l2, tv = alpha * np.array((.05, .65, .3)) # l2, l1, tv penalties nite_fsta = 70000 MODELS["l1l2tv__fista"] = \ estimators.LinearRegressionL1L2TV(l1, l2, tv, A, algorithm=algorithms.proximal.FISTA(max_iter=nite_fsta)) MODELS["l1l2tv_inter__fista"] = \ estimators.LinearRegressionL1L2TV(l1, l2, tv, A, penalty_start=1, algorithm=algorithms.proximal.FISTA(max_iter=nite_fsta)) nite_stc_cnsta = 10000 MODELS["l1l2tv__static_conesta"] = \ estimators.LinearRegressionL1L2TV(l1, l2, tv, A, algorithm=algorithms.proximal.StaticCONESTA(max_iter=nite_stc_cnsta)) MODELS["l1l2tv_inter__static_conesta"] = \ estimators.LinearRegressionL1L2TV(l1, l2, tv, A, penalty_start=1, algorithm=algorithms.proximal.StaticCONESTA(max_iter=nite_stc_cnsta)) nite_cnsta = 5000
# Create the loss function. function = LinearRegressionL1L2TV(X, y, l, k, g, A=A, mu=mu, penalty_start=0, mean=False) # Create the estimator. lr = estimators.LinearRegressionL1L2TV(l, k, g, A=A, mu=mu, algorithm=CONESTA( max_iter=max_iter, eps=eps), mean=False) # Fit data with the new regularisation parameters. beta = lr.fit(X, y, beta).beta # Compute output. err_beta[i, j] = np.linalg.norm(beta - beta_star) err_f[i, j] = np.linalg.norm(function.f(beta) - function.f(beta_star)) print("k: %.3f, g: %.3f, err_f: %.12f" % (k, g, err_f[i, j])) print("err_beta:\n", err_beta) print("err_f:\n", err_f)
def mapper(key, output_collector): import mapreduce as GLOBAL Xtr = GLOBAL.DATA_RESAMPLED["X"][0] Xte = GLOBAL.DATA_RESAMPLED["X"][1] ytr = GLOBAL.DATA_RESAMPLED["y"][0] yte = GLOBAL.DATA_RESAMPLED["y"][1] # key = 'enettv_0.01_0.1_0.2'.split("_") algo, alpha, l1l2ratio, tvratio = key[0], float(key[1]), float( key[2]), float(key[3]) tv = alpha * tvratio l1 = alpha * float(1 - tv) * l1l2ratio l2 = alpha * float(1 - tv) * (1 - l1l2ratio) print(key, algo, alpha, l1, l2, tv) scaler = preprocessing.StandardScaler().fit(Xtr[:, 1:]) Xtr[:, 1:] = scaler.transform(Xtr[:, 1:]) Xte[:, 1:] = scaler.transform(Xte[:, 1:]) if algo == 'enettv': conesta = algorithms.proximal.CONESTA(max_iter=10000) mod = estimators.LinearRegressionL1L2TV(l1, l2, tv, GLOBAL.Atv, algorithm=conesta, penalty_start=penalty_start) mod.fit(Xtr, ytr.ravel()) beta = mod.beta elif algo == 'enetgn': fista = algorithms.proximal.FISTA(max_iter=5000) mod = estimators.LinearRegressionL1L2GraphNet( l1, l2, tv, GLOBAL.Agn, algorithm=fista, penalty_start=penalty_start) mod.fit(Xtr, ytr.ravel()) beta = mod.beta elif algo == 'enet': fista = algorithms.proximal.FISTA(max_iter=5000) mod = estimators.ElasticNet(l1l2ratio, algorithm=fista, penalty_start=penalty_start) mod.fit(Xtr, ytr.ravel()) beta = mod.beta elif algo == 'Ridge': mod = estimators.RidgeRegression(l1l2ratio, penalty_start=penalty_start) mod.fit(Xtr, ytr.ravel()) beta = mod.beta elif algo == 'RidgeAGD': mod = estimators.RidgeRegression(l1l2ratio,\ algorithm=gradient.GradientDescent(max_iter=1000),penalty_start = penalty_start ) mod.fit(Xtr, ytr.ravel()) beta = mod.beta elif algo == 'linearSklearn': mod = linear_model.LinearRegression(fit_intercept=False) mod.fit(Xtr, ytr.ravel()) beta = mod.coef_ beta = beta.reshape(beta.shape[0], 1) elif algo == 'SkRidge': mod = linear_model.Ridge(alpha=l1l2ratio, fit_intercept=False) mod.fit(Xtr, ytr.ravel()) beta = mod.coef_ beta = beta.reshape(beta.shape[0], 1) elif algo == 'SkRidgeInt': mod = linear_model.Ridge(alpha=l1l2ratio, fit_intercept=True) mod.fit(Xtr, ytr.ravel()) beta = mod.coef_ beta = beta.reshape(beta.shape[0], 1) else: raise Exception('Algo%s not handled' % algo) y_pred = mod.predict(Xte) ret = dict(y_pred=y_pred, y_true=yte, beta=beta) if output_collector: output_collector.collect(key, ret) else: return ret
plot.plot(t[skip:], f[skip:], 'r', linewidth=3) plot.plot(t[skip:], bound[skip:], 'g', linewidth=3) ###################### ### FISTA small mu ### ###################### print("FISTA small mu") A = simulate.functions.TotalVariation.A_from_shape(shape) lr_ = estimators.LinearRegressionL1L2TV(l, k, g, A=A, mu=consts.TOLERANCE, algorithm=proximal.FISTA(), algorithm_params=dict( max_iter=int(max_iter * 1.1), info=[Info.fvalue, Info.time], tau=0.99, mu=mu, use_gap=True), penalty_start=0, mean=mean, rho=1.0) res = lr_.fit(X, y, beta_start) error = lr_.score(X, y) print("error = ", error) info_ = lr_.get_info() y_ = info_[Info.fvalue] t_ = np.cumsum(info_[Info.time])
# Parsimony Elasticnet is based on FISTA, is then slower that scikit-learn one l1_ratio = .5 enet = estimators.ElasticNet(alpha=alpha, l=.5) yte_pred_enet = enet.fit(Xtr, ytr).predict(Xte) ########################################################################### ## Fit LinearRegressionL1L2TV # Min: (1 / (2 * n)) * ||Xbeta - y||^2_2 # + l1 * ||beta||_1 # + (l2 / 2) * ||beta||^2_2 # + tv * TV(beta) # l1, l2, tv = alpha * np.array((.33, .33, .33)) # l1, l2, tv penalties A = nesterov_tv.linear_operator_from_shape(shape) algo = algorithms.proximal.CONESTA(max_iter=500) enettv = estimators.LinearRegressionL1L2TV(l1, l2, tv, A, algorithm=algo) yte_pred_enettv = enettv.fit(Xtr, ytr).predict(Xte) ########################################################################### ## Plot # TODO: Please remove dependence on scikit-learn. Add required functionality # to parsimony instead. plot = plt.subplot(131) utils.plot_map2d(beta3d.reshape(shape), plot, title="beta star") plot = plt.subplot(132) utils.plot_map2d(enet.beta.reshape(shape), plot, title="beta enet (R2=%.2f)" % r2_score(yte, yte_pred_enet)) #utils.plot_map2d(enet.coef_.reshape(shape), plot, title="beta enet (R2=%.2f)" % # r2_score(yte, yte_pred_enet), limits=limits/1.) plot = plt.subplot(133)