class MultiblockFISTA(bases.ExplicitAlgorithm, bases.IterativeAlgorithm, bases.InformationAlgorithm): """The projected or proximal gradient algorithm with alternating minimisations in a multiblock setting. Parameters ---------- info : List or tuple of utils.Info. What, if any, extra run information should be stored. Default is an empty list, which means that no run information is computed nor returned. eps : Positive float. Tolerance for the stopping criterion. max_iter : Non-negative integer. Maximum total allowed number of iterations. min_iter : Non-negative integer less than or equal to max_iter. Minimum number of iterations that must be performed. Default is 1. """ INTERFACES = [ multiblock_properties.MultiblockFunction, multiblock_properties.MultiblockGradient, multiblock_properties.MultiblockStepSize, properties.OR(multiblock_properties.MultiblockProjectionOperator, multiblock_properties.MultiblockProximalOperator) ] INFO_PROVIDED = [ Info.ok, Info.num_iter, Info.time, Info.func_val, Info.smooth_func_val, Info.converged ] def __init__(self, info=[], eps=consts.TOLERANCE, max_iter=consts.MAX_ITER, min_iter=1): super(MultiblockFISTA, self).__init__(info=info, max_iter=max_iter, min_iter=min_iter) self.eps = max(consts.FLOAT_EPSILON, float(eps)) def reset(self): self.info_reset() self.iter_reset() @bases.force_reset @bases.check_compatibility def run(self, function, w): # Not ok until the end. if self.info_requested(Info.ok): self.info_set(Info.ok, False) # Initialise info variables. Info variables have the prefix "_". if self.info_requested(Info.time): _t = [] if self.info_requested(Info.func_val): _f = [] if self.info_requested(Info.smooth_func_val): _fmu = [] if self.info_requested(Info.converged): self.info_set(Info.converged, False) FISTA = True if FISTA: exp = 4.0 + consts.FLOAT_EPSILON else: exp = 2.0 + consts.FLOAT_EPSILON block_iter = [1] * len(w) it = 0 while True: for i in range(len(w)): # print "it: %d, i: %d" % (it, i) # if True: # pass # Wrap a function around the ith block. func = mb_losses.MultiblockFunctionWrapper(function, w, i) # Run FISTA. w_old = w[i] for k in range( 1, max(self.min_iter + 1, self.max_iter - self.num_iter + 1)): if self.info_requested(Info.time): time = utils.time_wall() if FISTA: # Take an interpolated step. z = w[i] + ((k - 2.0) / (k + 1.0)) * (w[i] - w_old) else: z = w[i] # Compute the step. step = func.step(z) # Compute inexact precision. eps = max(consts.FLOAT_EPSILON, 1.0 / (block_iter[i]**exp)) # eps = consts.TOLERANCE w_old = w[i] # Take a FISTA step. w[i] = func.prox(z - step * func.grad(z), factor=step, eps=eps) # Store info variables. if self.info_requested(Info.time): _t.append(utils.time_wall() - time) if self.info_requested(Info.func_val): _f.append(function.f(w)) if self.info_requested(Info.smooth_func_val): _fmu.append(function.fmu(w)) # Update iteration counts. self.num_iter += 1 block_iter[i] += 1 # print i, function.fmu(w), step, \ # (1.0 / step) * maths.norm(w[i] - z), self.eps, \ # k, self.num_iter, self.max_iter # Test stopping criterion. if maths.norm(w[i] - z) < step * self.eps \ and k >= self.min_iter: break # Test global stopping criterion. all_converged = True for i in range(len(w)): # Wrap a function around the ith block. func = mb_losses.MultiblockFunctionWrapper(function, w, i) # Compute the step. step = func.step(w[i]) # Compute inexact precision. eps = max(consts.FLOAT_EPSILON, 1.0 / (block_iter[i]**exp)) # eps = consts.TOLERANCE # Take one ISTA step for use in the stopping criterion. w_tilde = func.prox(w[i] - step * func.grad(w[i]), factor=step, eps=eps) # Test if converged for block i. if maths.norm(w[i] - w_tilde) > step * self.eps: all_converged = False break # Converged in all blocks! if all_converged: if self.info_requested(Info.converged): self.info_set(Info.converged, True) break # Stop after maximum number of iterations. if self.num_iter >= self.max_iter: break it += 1 # Store information. if self.info_requested(Info.num_iter): self.info_set(Info.num_iter, self.num_iter) if self.info_requested(Info.time): self.info_set(Info.time, _t) if self.info_requested(Info.func_val): self.info_set(Info.func_val, _f) if self.info_requested(Info.smooth_func_val): self.info_set(Info.smooth_func_val, _fmu) if self.info_requested(Info.ok): self.info_set(Info.ok, True) return w
class ADMM(bases.ExplicitAlgorithm, bases.IterativeAlgorithm, bases.InformationAlgorithm): """The alternating direction method of multipliers (ADMM). Computes the minimum of the sum of two functions with associated proximal or projection operators. Solves problems on the form min. f(x, y) = g(x) + h(y) s.t. y = x The functions have associated proximal or projection operators. Parameters ---------- rho : Positive float. The penalty parameter. mu : Float, greater than 1. The factor within which the primal and dual variables should be kept. Set to less than or equal to 1 if you don't want to update the penalty parameter rho dynamically. tau : Float, greater than 1. Increase rho by a factor tau. info : List or tuple of utils.consts.Info. What, if any, extra run information should be stored. Default is an empty list, which means that no run information is computed nor returned. eps : Positive float. Tolerance for the stopping criterion. max_iter : Non-negative integer. Maximum allowed number of iterations. min_iter : Non-negative integer less than or equal to max_iter. Minimum number of iterations that must be performed. Default is 1. """ INTERFACES = [properties.SplittableFunction, properties.AugmentedProximalOperator, properties.OR(properties.ProximalOperator, properties.ProjectionOperator)] INFO_PROVIDED = [Info.ok, Info.num_iter, Info.time, Info.fvalue, Info.converged] def __init__(self, rho=1.0, mu=10.0, tau=2.0, info=[], eps=consts.TOLERANCE, max_iter=consts.MAX_ITER, min_iter=1, simulation=False): # TODO: Investigate what is a good default value here! super(ADMM, self).__init__(info=info, max_iter=max_iter, min_iter=min_iter) self.rho = max(consts.FLOAT_EPSILON, float(rho)) self.mu = max(1.0, float(mu)) self.tau = max(1.0, float(tau)) self.eps = max(consts.FLOAT_EPSILON, float(eps)) self.simulation = bool(simulation) @bases.force_reset @bases.check_compatibility def run(self, functions, xy): """Finds the minimum of two functions with associated proximal operators. Parameters ---------- functions : List or tuple with two Functions or a SplittableFunction. The two functions. xy : List or tuple with two elements, numpy arrays. The starting points for the minimisation. """ if self.info_requested(Info.ok): self.info_set(Info.ok, False) if self.info_requested(Info.time): t = [] if self.info_requested(Info.fvalue): f = [] if self.info_requested(Info.converged): self.info_set(Info.converged, False) funcs = [functions.g, functions.h] x_new = xy[0] y_new = xy[1] z_new = x_new.copy() u_new = y_new.copy() for i in range(1, self.max_iter + 1): if self.info_requested(Info.time): tm = utils.time_cpu() x_old = x_new z_old = z_new u_old = u_new if isinstance(funcs[0], properties.ProximalOperator): x_new = funcs[0].prox(z_old - u_old) else: x_new = funcs[0].proj(z_old - u_old) y_new = x_new # TODO: Allow a linear operator here. if isinstance(funcs[1], properties.ProximalOperator): z_new = funcs[1].prox(y_new + u_old) else: z_new = funcs[1].proj(y_new + u_old) # The order here is important! Do not change! u_new = (y_new - z_new) + u_old if self.info_requested(Info.time): t.append(utils.time_cpu() - tm) if self.info_requested(Info.fvalue): fval = funcs[0].f(z_new) + funcs[1].f(z_new) f.append(fval) if not self.simulation: if i == 1: if maths.norm(x_new - x_old) < self.eps \ and i >= self.min_iter: # print "Stopping criterion kicked in!" if self.info_requested(Info.converged): self.info_set(Info.converged, True) break else: if maths.norm(x_new - x_old) / maths.norm(x_old) < self.eps \ and i >= self.min_iter: # print "Stopping criterion kicked in!" if self.info_requested(Info.converged): self.info_set(Info.converged, True) break # Update the penalty parameter, rho, dynamically. if self.mu > 1.0: r = x_new - z_new s = (z_new - z_old) * -self.rho norm_r = maths.norm(r) norm_s = maths.norm(s) # print "norm(r): ", norm_r, ", norm(s): ", norm_s, ", rho:", \ # self.rho if norm_r > self.mu * norm_s: self.rho *= self.tau u_new *= 1.0 / self.tau # Rescale dual variable. elif norm_s > self.mu * norm_r: self.rho /= self.tau u_new *= self.tau # Rescale dual variable. # Update the penalty parameter in the functions. functions.set_rho(self.rho) self.num_iter = i if self.info_requested(Info.num_iter): self.info_set(Info.num_iter, i) if self.info_requested(Info.time): self.info_set(Info.time, t) if self.info_requested(Info.fvalue): self.info_set(Info.fvalue, f) if self.info_requested(Info.ok): self.info_set(Info.ok, True) return z_new
class SubGradientDescent(bases.ExplicitAlgorithm, bases.IterativeAlgorithm, bases.InformationAlgorithm): """The subgradient descent algorithm. Note: If the function has a gradient, it will be used instead, and effectively make this the gradient descent algorithm. To prevent this from happening, change the use_gradient parameter. Parameters ---------- step_size : parsimony.algorithms.utils.StepSize The step size function to use. Default is NonSumDimStepSize(a=0.1). eps : float Must be positive. Tolerance for the stopping criterion. info : list or tuple of utils.consts.Info What, if any, extra run information should be stored. Default is an empty list, which means that no run information is computed nor returned. max_iter : int Must be non-negative. Maximum allowed number of iterations. Default is 10000. min_iter : int Must be non-negative and less than or equal to max_iter. Minimum number of iterations that must be performed. Default is 1. use_best_f : bool Whether or not to keep the parameter vector that gave the lowest function value over all iterations. Default is True, the best parameter vector found over all iterations will be the one returned. use_gradient : bool Whether or not to utilise the gradient of the function, if it exists. Default is False, i.e. do not use the gradient if it exists. Examples -------- >>> from parsimony.algorithms.subgradient import SubGradientDescent >>> from parsimony.functions.losses import RidgeRegression >>> from parsimony.algorithms.utils import NonSumDimStepSize >>> import numpy as np >>> np.random.seed(42) >>> X = np.random.randn(100, 50) >>> y = np.random.randn(100, 1) >>> sgd = SubGradientDescent(max_iter=10000, step_size=NonSumDimStepSize(a=0.1), use_gradient=True) >>> function = RidgeRegression(X, y, k=0.0, mean=False) >>> beta1 = sgd.run(function, np.random.rand(50, 1)) >>> beta2 = np.dot(np.linalg.pinv(X), y) >>> round(np.linalg.norm(beta1 - beta2) / np.linalg.norm(beta2), 13) < 5e-6 True """ INTERFACES = [ properties.Function, properties.OR(properties.Gradient, properties.SubGradient) ] INFO_PROVIDED = [ Info.ok, Info.num_iter, Info.time, Info.fvalue, Info.func_val, Info.converged ] def __init__(self, step_size=NonSumDimStepSize(a=0.1), eps=consts.TOLERANCE, info=[], max_iter=10000, min_iter=1, use_best_f=True, use_gradient=False): super(SubGradientDescent, self).__init__(info=info, max_iter=max_iter, min_iter=min_iter) self.step_size = step_size self.eps = float(eps) self.use_best_f = bool(use_best_f) self.use_gradient = bool(use_gradient) @bases.force_reset @bases.check_compatibility def run(self, function, beta): """Find the minimiser of the given function, starting at beta. Parameters ---------- function : Function The function to minimise. beta : numpy array The start vector. """ if self.info_requested(Info.ok): self.info_set(Info.ok, False) betanew = betaold = beta if self.use_gradient and hasattr(function, "grad"): function_grad = function.grad else: function_grad = function.subgrad if self.info_requested(Info.time): t = [] if self.info_requested(Info.func_val): f = [] if self.info_requested(Info.converged): self.info_set(Info.converged, False) fbest = np.inf betabest = None for i in range(1, self.max_iter + 1): if self.info_requested(Info.time): tm = utils.time_cpu() betaold = betanew subgrad = function_grad(betaold) step = self.step_size(i, betaold, subgrad) betanew = betaold - step * subgrad fval = None if self.use_best_f: fval = function.f(betanew) if fval < fbest: fbest = fval betabest = betanew if self.info_requested(Info.time): t.append(utils.time_cpu() - tm) if self.info_requested(Info.func_val): if self.use_best_f: f.append(fbest) else: if fval is None: f.append(function.f(betanew)) else: f.append(fval) if maths.norm(betanew - betaold) < self.eps \ and i >= self.min_iter: if self.info_requested(Info.converged): self.info_set(Info.converged, True) break if self.info_requested(Info.num_iter): self.info_set(Info.num_iter, i) if self.info_requested(Info.time): self.info_set(Info.time, t) if self.info_requested(Info.func_val): self.info_set(Info.func_val, f) if self.info_requested(Info.ok): self.info_set(Info.ok, True) if self.use_best_f: return betabest else: return betanew
class ParallelDykstrasProximalAlgorithm(bases.ExplicitAlgorithm): """Dykstra's projection algorithm for two or more functions. Computes the proximal operator of a sum of functions. These functions may be indicator functions for convex sets (ProjectionOperator) or ProximalOperators. If all functions are ProjectionOperators, this algorithm finds the projection onto the intersection of the convex sets. The functions have projection operators (ProjectionOperator.proj) onto the respective convex sets or proximal operators (ProximalOperator.prox). """ INTERFACES = [properties.Function, properties.OR(properties.ProjectionOperator, properties.ProximalOperator)] def __init__(self, eps=consts.TOLERANCE, max_iter=100, min_iter=1): # TODO: Investigate what is a good default value here! self.eps = eps self.max_iter = max_iter self.min_iter = min_iter def run(self, x, prox=[], proj=[], factor=1.0, weights=None): """Finds the projection onto the intersection of two sets. Parameters ---------- prox : List or tuple with two or more elements. The functions that are ProximalOperators. Either prox or proj must be non-empty. proj : List or tuple with two or more elements. The functions that are ProjectionOperators. Either proj or prox must be non-empty. factor : Positive float. A factor by which the Lagrange multiplier is scaled. This is usually the step size. x : Numpy array. The point that we wish to project. weights : List or tuple with floats. Weights for the functions. Default is that they all have the same weight. The elements of the list or tuple must sum to 1. """ for f in prox: self.check_compatibility(f, self.INTERFACES) for f in proj: self.check_compatibility(f, self.INTERFACES) num_prox = len(prox) num_proj = len(proj) if weights is None: weights = [1. / float(num_prox + num_proj)] * (num_prox + num_proj) x_new = x_old = x p = [0.0] * (num_prox + num_proj) z = [0.0] * (num_prox + num_proj) for i in range(num_prox + num_proj): z[i] = np.copy(x) for i in range(1, self.max_iter + 1): for i in range(num_prox): p[i] = prox[i].prox(z[i], factor) for i in range(num_proj): p[num_prox + i] = proj[i].proj(z[num_prox + i]) x_old = x_new x_new = np.zeros(x_old.shape) for i in range(num_prox + num_proj): x_new += weights[i] * p[i] if maths.norm(x_new - x_old) / maths.norm(x_old) < self.eps \ and i >= self.min_iter: all_feasible = True for i in range(num_proj): if proj[i].f(p[num_prox + i]) > 0.0: all_feasible = False if all_feasible: break for i in range(num_prox + num_proj): z[i] = x_new + z[i] - p[i] return x_new