def gl_ProxGD_primal(x0: np.ndarray, A: np.ndarray, b: np.ndarray, mu_0, opts: dict): default_opts = { "maxit": 2500, # 最大迭代次数 "thres": 1e-3, # 判断小量是否被认为 0 的阈值 "step_type": "line_search", # 步长衰减的类型(见辅助函数) "alpha0": 2e-3, # 步长的初始值 "ftol": 1e-6, # 停机准则,当目标函数历史最优值的变化小于该值时认为满足 "stable_len_threshold": 70, "line_search_attenuation_coeffi": 0.9, "maxit_line_search_iter": 5, } # The second dictionary's values overwrite those from the first. opts = {**default_opts, **opts} sparsity_func = lambda x: np.sum(np.abs(x) > 1e-6 * np.max(np.abs(x))) / x.size def real_obj_func(x: np.ndarray): fro_term = 0.5 * np.sum((A @ x - b) ** 2) regular_term = np.sum(LA.norm(x, axis=1).reshape(-1, 1)) return fro_term + mu_0 * regular_term out = { "fvec": None, # 每一步迭代的 LASSO 问题目标函数值 "grad_hist": None, # 可微部分梯度范数的历史值 "f_hist": None, # 目标函数的历史值 "f_hist_best": None, # 目标函数每一步迭代对应的历史最优值 "tt": None, # 运行时间 "flag": None # 标记是否收敛 } maxit, ftol, alpha0 = opts["maxit"], opts["ftol"], opts["alpha0"] stable_len_threshold = opts["stable_len_threshold"] thres = opts["thres"] step_type = opts['step_type'] aten_coeffi = opts['line_search_attenuation_coeffi'] max_line_search_iter = opts['maxit_line_search_iter'] logger.debug("alpha0= {:10E}".format(alpha0)) f_hist, f_hist_best, sparsity_hist = [], [], [] f_best = np.inf x = np.copy(x0) stopwatch = Stopwatch() stopwatch.start() k = 0 for mu in [100 * mu_0, 10 * mu_0, mu_0]: logger.debug("new mu= {:10E}".format(mu)) # min f(x) = g(x) + h(x) # g(x) = 0.5 * |Ax-b|_F^2 # h(x) = mu * |x|_{1,2} def g(x: np.ndarray): return 0.5 * np.sum((A @ x - b) ** 2) grad_g = None def prox_th(x: np.ndarray, t): """ Proximal operator of t * mu * h(x). """ t_mu = t * mu row_norms = LA.norm(x, axis=1).reshape(-1, 1) rv = x * np.clip(row_norms - t_mu, a_min=0, a_max=None) / ((row_norms < thres) + row_norms) return rv def Gt(x: np.ndarray, t): return (x - prox_th(x - t * grad_g, t)) / t inner_iter = 0 def set_step(step_type: str): iter_hat = max(inner_iter, 1000) - 999 if step_type == 'fixed': return alpha0 elif step_type == 'diminishing': return alpha0 / np.sqrt(iter_hat) elif step_type == 'diminishing2': return alpha0 / iter_hat elif step_type == 'line_search': g_x = g(x) def stop_condition(x, t): gt_x = Gt(x, t) return (g(x - t * gt_x) <= g_x - t * np.sum(grad_g * gt_x) + 0.5 * t * np.sum(gt_x ** 2)) alpha = alpha0 for i in range(max_line_search_iter): if stop_condition(x, alpha): break alpha *= aten_coeffi return alpha else: logger.error("Unsupported type.") stable_len = 0 while inner_iter < maxit: # Record current objective value f_now = real_obj_func(x) f_hist.append(f_now) f_best = min(f_best, f_now) f_hist_best.append(f_best) sparsity_hist.append(sparsity_func(x)) k += 1 inner_iter += 1 if (k > 1 and abs(f_hist[k - 1] - f_hist[k - 2]) / abs(f_hist[k - 2]) < ftol and abs(sparsity_hist[k - 1] - sparsity_hist[k - 2]) / abs(sparsity_hist[k - 2]) < ftol): stable_len += 1 else: stable_len = 0 if stable_len > stable_len_threshold: break x[np.abs(x) < thres] = 0 grad_g = A.T @ (A @ x - b) alpha_k = set_step(step_type) # logger.debug("alpha_k: {}".format(alpha_k)) x = prox_th(x - alpha_k * grad_g, alpha_k) if k % 100 == 0: logger.debug( 'iter= {:5}, objective= {:10E}, sparsity= {:3f}'.format(k, f_now.item(), sparsity_func(x))) elapsed_time = stopwatch.elapsed(time_format=Stopwatch.TimeFormat.kMicroSecond) / 1e6 out = { "tt": elapsed_time, "fval": real_obj_func(x), "f_hist": f_hist, "f_hist_best": f_hist_best } return x, k, out
def gl_Alm_dual(x0: np.ndarray, A: np.ndarray, b: np.ndarray, mu, opts: dict): default_opts = { "maxit": 100, # 最大迭代次数 "thres": 1e-3, # 判断小量是否被认为 0 的阈值 "tau": (1 + math.sqrt(5)) * 0.5, "rho": 1e2, "converge_len": 20, } # The second dictionary's values overwrite those from the first. opts = {**default_opts, **opts} def sparsity_func(x: np.ndarray): return np.sum(np.abs(x) > 1e-6 * np.max(np.abs(x))) / x.size def real_obj_func(x: np.ndarray): fro_term = 0.5 * np.sum((A @ x - b)**2) regular_term = np.sum(LA.norm(x, axis=1).reshape(-1, 1)) return fro_term + mu * regular_term out = { "fvec": None, # 每一步迭代的 LASSO 问题目标函数值 "f_hist": None, # 目标函数的历史值 "f_hist_best": None, # 目标函数每一步迭代对应的历史最优值 "tt": None, # 运行时间 } def projection_functor(x: np.array): row_norms = LA.norm(x, axis=1, ord=2).reshape(-1, 1) return mu * x / np.clip(row_norms, a_min=mu, a_max=None) maxit, thres = opts["maxit"], opts["thres"] rho, tau = opts['rho'], opts['tau'] converge_len = opts['converge_len'] f_hist, f_hist_best, sparsity_hist = [], [], [] f_best = np.inf x_k = np.copy(x0) z_k = np.zeros_like(b) u_k = np.zeros_like(x_k) stopwatch = Stopwatch() stopwatch.start() L = LA.cholesky(np.identity(A.shape[0]) + rho * A @ A.T) k = 0 length = 0 while k < maxit: k += 1 u = solve_sub_problem(b, rho, A, x_k, L, mu) z = LA.solve(L.T, LA.solve(L, A @ (x_k - rho * u) - b)) x = x_k - tau * rho * (u + A.T @ z) r_k = u + A.T @ z # 原始可行性 s_k = A @ (u_k - u) # 对偶可行性 z_k, u_k, x_k = z, u, x f_now = real_obj_func(x_k) f_hist.append(f_now) f_best = min(f_best, f_now) f_hist_best.append(f_best) sparsity_now = sparsity_func(x_k) sparsity_hist.append(sparsity_now) if k % 1 == 0: logger.debug( 'iter= {:5}, objective= {:10E}, sparsity= {:3f}'.format( k, f_now.item(), sparsity_now.item())) r_k_norm = LA.norm(r_k, ord=2) s_k_norm = LA.norm(s_k, ord=2) if r_k_norm < thres and s_k_norm < thres: length += 1 else: length = 0 if length >= converge_len: break elapsed_time = stopwatch.elapsed( time_format=Stopwatch.TimeFormat.kMicroSecond) / 1e6 out = { "tt": elapsed_time, "fval": real_obj_func(x_k), "f_hist": f_hist, "f_hist_best": f_hist_best } return x_k, k, out
def gl_SGD_primal(x0: np.ndarray, A: np.ndarray, b: np.ndarray, mu_0, opts: dict): default_opts = { "maxit": 2100, # 内循环最大迭代次数 "thres": 1e-3, # 判断小量是否被认为 0 的阈值 "step_type": "diminishing", # 步长衰减的类型(见辅助函数) "alpha0": 1e-3, # 步长的初始值 "ftol": 1e-5, # 停机准则,当目标函数历史最优值的变化小于该值时认为满足 "stable_len_threshold": 100, "continuous_subgradient_flag": False, } # The second dictionary's values overwrite those from the first. opts = {**default_opts, **opts} sparsity_func = lambda x: np.sum(np.abs(x) > 1e-6 * np.max(np.abs(x))) / x.size out = { "fvec": None, # 每一步迭代的 LASSO 问题目标函数值 "grad_hist": None, # 可微部分梯度范数的历史值 "f_hist": None, # 目标函数的历史值 "f_hist_best": None, # 目标函数每一步迭代对应的历史最优值 "tt": None, # 运行时间 "flag": None # 标记是否收敛 } maxit, ftol, alpha0 = opts["maxit"], opts["ftol"], opts["alpha0"] stable_len_threshold = opts["stable_len_threshold"] thres = opts["thres"] if opts["continuous_subgradient_flag"]: L = np.max(LA.eigvals(A.T @ A)) alpha0 = 1. / L.real logger.debug("alpha0= {:10E}".format(alpha0)) f_hist, f_hist_best = [], [] f_best = np.inf x = np.copy(x0) stopwatch = Stopwatch() stopwatch.start() k = 0 stable_len = 0 for mu in [100 * mu_0, 10 * mu_0, mu_0]: logger.debug("new mu= {:10E}".format(mu)) def obj_func(x: np.ndarray): fro_term = 0.5 * np.sum((A @ x - b) ** 2) regular_term = np.sum(LA.norm(x, axis=1).reshape(-1, 1)) return fro_term + mu * regular_term def subgrad(x: np.ndarray): fro_term_grad = A.T @ (A @ x - b) regular_term_norm = LA.norm(x, axis=1).reshape(-1, 1) regular_term_grad = x / ((regular_term_norm < thres) + regular_term_norm) grad = fro_term_grad + mu * regular_term_grad return grad inn_iter = 0 def set_step(step_type): iter_hat = max(inn_iter, 1000) - 999 if step_type == 'fixed' or mu > mu_0: return alpha0 elif step_type == 'diminishing': return alpha0 / np.sqrt(iter_hat) elif step_type == 'diminishing2': return alpha0 / iter_hat else: logger.error("Unsupported type.") while inn_iter < maxit: # Record current objective value f_now = obj_func(x) f_hist.append(f_now) f_best = min(f_best, f_now) f_hist_best.append(f_best) k += 1 inn_iter += 1 if k > 1 and abs(f_hist[k - 1] - f_hist[k - 2]) / abs(f_hist[k - 2]) < ftol: stable_len += 1 else: stable_len = 0 # if stable_len > stable_len_threshold: # break x[np.abs(x) < thres] = 0 sub_g = subgrad(x) alpha = set_step(opts["step_type"]) x = x - alpha * sub_g if k % 100 == 0: logger.debug('iter= {:5}, objective= {:10E}, sparsity= {:3f}'.format(k, f_now.item(), sparsity_func(x))) elapsed_time = stopwatch.elapsed(time_format=Stopwatch.TimeFormat.kMicroSecond) / 1e6 out = { "tt": elapsed_time, "fval": obj_func(x), "f_hist": f_hist, "f_hist_best": f_hist_best } return x, k, out
def gl_Admm_primal(x0: np.ndarray, A: np.ndarray, b: np.ndarray, mu, opts: dict): default_opts = { "maxit": 100, # 最大迭代次数 "thres": 1e-3, # 判断小量是否被认为 0 的阈值 "tau": (1 + math.sqrt(5)) * 0.5, "rho": 1e-2, "eta_0": 100, "converge_len": 10, "converge_thres": 1e-5, "step_type": "fixed", } # The second dictionary's values overwrite those from the first. opts = {**default_opts, **opts} sparsity_func = lambda x: np.sum(np.abs(x) > 1e-6 * np.max(np.abs(x)) ) / x.size def real_obj_func(x: np.ndarray): fro_term = 0.5 * np.sum((A @ x - b)**2) regular_term = np.sum(LA.norm(x, axis=1).reshape(-1, 1)) return fro_term + mu * regular_term out = { "fvec": None, # 每一步迭代的 LASSO 问题目标函数值 "f_hist": None, # 目标函数的历史值 "f_hist_best": None, # 目标函数每一步迭代对应的历史最优值 "tt": None, # 运行时间 } maxit, thres = opts["maxit"], opts["thres"] rho, tau, eta_0 = opts['rho'], opts['tau'], opts['eta_0'] converge_len = opts['converge_len'] converge_thres = opts['converge_thres'] step_type = opts['step_type'] f_hist, f_hist_best, sparsity_hist = [], [], [] f_best = np.inf def prox_tf(x: np.array, t): t_mu = t * mu row_norms = LA.norm(x, axis=1).reshape(-1, 1) rv = x * np.clip(row_norms - t_mu, a_min=0, a_max=None) / ( (row_norms < thres) + row_norms) return rv x_k = np.copy(x0) y_k = x_k z_k = x_k stopwatch = Stopwatch() stopwatch.start() k = 0 L = LA.cholesky(rho * np.identity(A.shape[1]) + A.T @ A) AT_b = A.T @ b length = 0 def set_step(step_type: str): if step_type == 'fixed': return eta_0 elif step_type == 'diminishing': return eta_0 / np.sqrt(k) elif step_type == 'diminishing2': return eta_0 / k while k < maxit: k += 1 eta = set_step(step_type) y = LA.solve(L.T, LA.solve(L, AT_b - z_k + rho * x_k)) # x = prox_tf(y + z_k / rho, 1/rho) x = prox_tf(x_k - eta * rho * (x_k - y - z_k / rho), eta) z = z_k - tau * rho * (x - y) r_k = x - y # 原始可行性 s_k = y - y_k # 对偶可行性 x_k, y_k, z_k = x, y, z f_now = real_obj_func(x_k) f_hist.append(f_now) f_best = min(f_best, f_now) f_hist_best.append(f_best) sparsity_hist.append(sparsity_func(x_k)) if k % 1 == 0: logger.debug( 'iter= {:5}, objective= {:10E}, sparsity= {:3f}'.format( k, f_now.item(), sparsity_func(x_k))) r_k_norm = LA.norm(r_k, ord=2) s_k_norm = LA.norm(s_k, ord=2) if r_k_norm < thres and s_k_norm < thres: length += 1 else: length = 0 if length >= converge_len: break elapsed_time = stopwatch.elapsed( time_format=Stopwatch.TimeFormat.kMicroSecond) / 1e6 out = { "tt": elapsed_time, "fval": real_obj_func(x_k), "f_hist": f_hist, "f_hist_best": f_hist_best } return x_k, k, out
def gl_FGD_primal(x0: np.ndarray, A: np.ndarray, b: np.ndarray, mu_0, opts: dict): default_opts = { "maxit": 1500, # 最大迭代次数 "thres": 1e-3, # 判断小量是否被认为 0 的阈值 "step_type": "line_search", # 步长衰减的类型(见辅助函数) "alpha0": 1e-3, # 步长的初始值 "ftol": 1e-6, # 停机准则,当目标函数历史最优值的变化小于该值时认为满足 "stable_len_threshold": 70, "line_search_attenuation_coeffi": 0.98, "maxit_line_search_iter": 5, "delta": 1e-6 # 光滑化参数 } # The second dictionary's values overwrite those from the first. opts = {**default_opts, **opts} sparsity_func = lambda x: np.sum(np.abs(x) > 1e-6 * np.max(np.abs(x)) ) / x.size def real_obj_func(x: np.ndarray): fro_term = 0.5 * np.sum((A @ x - b)**2) regular_term = np.sum(LA.norm(x, axis=1).reshape(-1, 1)) return fro_term + mu_0 * regular_term out = { "fvec": None, # 每一步迭代的 LASSO 问题目标函数值 "grad_hist": None, # 可微部分梯度范数的历史值 "f_hist": None, # 目标函数的历史值 "f_hist_best": None, # 目标函数每一步迭代对应的历史最优值 "tt": None, # 运行时间 "flag": None # 标记是否收敛 } maxit, ftol, alpha0 = opts["maxit"], opts["ftol"], opts["alpha0"] stable_len_threshold = opts["stable_len_threshold"] thres = opts["thres"] step_type = opts['step_type'] aten_coeffi = opts['line_search_attenuation_coeffi'] max_line_search_iter = opts['maxit_line_search_iter'] delta = opts["delta"] logger.debug("alpha0= {:10E}".format(alpha0)) f_hist, f_hist_best, sparsity_hist = [], [], [] v_hist, t_hist = [], [] f_best = np.inf x_k = np.copy(x0) stopwatch = Stopwatch() stopwatch.start() k = 0 for mu in [100 * mu_0, 10 * mu_0, mu_0]: logger.debug("new mu= {:10E}".format(mu)) # min f(x) = g(x) + h(x) # g(x) = 0.5 * |Ax-b|_F^2 + mu * smoothed |x|_{1,2} # h(x) = 0 def g_func(x: np.ndarray): fro_term = 0.5 * np.sum((A @ x - b)**2) regular_term = np.sum( np.sqrt(np.sum(x**2, axis=1).reshape(-1, 1) + delta * delta) - delta) return fro_term + mu * regular_term def grad_g_func(x: np.ndarray): fro_term_grad = A.T @ (A @ x - b) regular_term_grad = x / np.sqrt( np.sum(x**2, axis=1).reshape(-1, 1) + delta * delta) return fro_term_grad + mu * regular_term_grad v_k = np.copy(x_k) t_k = alpha0 def prox_th(x: np.ndarray, t): """ Proximal operator of t * mu * h(x). """ return x inner_iter = 0 def set_step(step_type: str): iter_hat = max(inner_iter, 1000) - 999 if step_type == 'fixed': return alpha0 elif step_type == 'diminishing': return alpha0 / np.sqrt(iter_hat) elif step_type == 'diminishing2': return alpha0 / iter_hat elif step_type == 'line_search': t = t_k g_y = g_func(y) def stop_condition(t): x = prox_th(y - t * grad_g_y, t) g_x = g_func(x) return g_x <= g_y + np.sum(grad_g_y * (x - y)) + np.sum( (x - y)**2) / (2 * t) for i in range(max_line_search_iter): if stop_condition(t): break t *= aten_coeffi return t else: logger.error("Unsupported type.") stable_len = 0 while inner_iter < maxit: # Record current objective value f_now = real_obj_func(x_k) f_hist.append(f_now) f_best = min(f_best, f_now) f_hist_best.append(f_best) sparsity_hist.append(sparsity_func(x_k)) v_hist.append(v_k) t_hist.append(t_k) k += 1 inner_iter += 1 if (k > 1 and abs(f_hist[k - 1] - f_hist[k - 2]) / abs(f_hist[k - 2]) < ftol and abs(sparsity_hist[k - 1] - sparsity_hist[k - 2]) / abs(sparsity_hist[k - 2]) < ftol): stable_len += 1 else: stable_len = 0 if stable_len > stable_len_threshold: break x_k[np.abs(x_k) < thres] = 0 theta = 2 / (inner_iter + 1) y = (1 - theta) * x_k + theta * v_k grad_g_y = grad_g_func(y) t = set_step(step_type) x = prox_th(y - t * grad_g_y, t) v = x_k + (x - x_k) / theta x_k, v_k, t_k = x, v, t if k % 100 == 0: logger.debug( 'iter= {:5}, objective= {:10E}, sparsity= {:3f}'.format( k, f_now.item(), sparsity_func(x))) elapsed_time = stopwatch.elapsed( time_format=Stopwatch.TimeFormat.kMicroSecond) / 1e6 out = { "tt": elapsed_time, "fval": real_obj_func(x), "f_hist": f_hist, "f_hist_best": f_hist_best } return x, k, out