示例#1
0
class MultiblockFISTA(bases.ExplicitAlgorithm, bases.IterativeAlgorithm,
                      bases.InformationAlgorithm):
    """The projected or proximal gradient algorithm with alternating
    minimisations in a multiblock setting.

    Parameters
    ----------
    info : List or tuple of utils.Info. What, if any, extra run information
            should be stored. Default is an empty list, which means that no
            run information is computed nor returned.

    eps : Positive float. Tolerance for the stopping criterion.

    max_iter : Non-negative integer. Maximum total allowed number of
            iterations.

    min_iter : Non-negative integer less than or equal to max_iter. Minimum
            number of iterations that must be performed. Default is 1.
    """
    INTERFACES = [
        multiblock_properties.MultiblockFunction,
        multiblock_properties.MultiblockGradient,
        multiblock_properties.MultiblockStepSize,
        properties.OR(multiblock_properties.MultiblockProjectionOperator,
                      multiblock_properties.MultiblockProximalOperator)
    ]

    INFO_PROVIDED = [
        Info.ok, Info.num_iter, Info.time, Info.func_val, Info.smooth_func_val,
        Info.converged
    ]

    def __init__(self,
                 info=[],
                 eps=consts.TOLERANCE,
                 max_iter=consts.MAX_ITER,
                 min_iter=1):

        super(MultiblockFISTA, self).__init__(info=info,
                                              max_iter=max_iter,
                                              min_iter=min_iter)

        self.eps = max(consts.FLOAT_EPSILON, float(eps))

    def reset(self):

        self.info_reset()
        self.iter_reset()

    @bases.force_reset
    @bases.check_compatibility
    def run(self, function, w):

        # Not ok until the end.
        if self.info_requested(Info.ok):
            self.info_set(Info.ok, False)

        # Initialise info variables. Info variables have the prefix "_".
        if self.info_requested(Info.time):
            _t = []
        if self.info_requested(Info.func_val):
            _f = []
        if self.info_requested(Info.smooth_func_val):
            _fmu = []
        if self.info_requested(Info.converged):
            self.info_set(Info.converged, False)

        FISTA = True
        if FISTA:
            exp = 4.0 + consts.FLOAT_EPSILON
        else:
            exp = 2.0 + consts.FLOAT_EPSILON
        block_iter = [1] * len(w)

        it = 0
        while True:

            for i in range(len(w)):
                #                print "it: %d, i: %d" % (it, i)

                #                if True:
                #                    pass

                # Wrap a function around the ith block.
                func = mb_losses.MultiblockFunctionWrapper(function, w, i)

                # Run FISTA.
                w_old = w[i]
                for k in range(
                        1,
                        max(self.min_iter + 1,
                            self.max_iter - self.num_iter + 1)):

                    if self.info_requested(Info.time):
                        time = utils.time_wall()

                    if FISTA:
                        # Take an interpolated step.
                        z = w[i] + ((k - 2.0) / (k + 1.0)) * (w[i] - w_old)
                    else:
                        z = w[i]

                    # Compute the step.
                    step = func.step(z)
                    # Compute inexact precision.
                    eps = max(consts.FLOAT_EPSILON, 1.0 / (block_iter[i]**exp))
                    #                    eps = consts.TOLERANCE

                    w_old = w[i]
                    # Take a FISTA step.
                    w[i] = func.prox(z - step * func.grad(z),
                                     factor=step,
                                     eps=eps)

                    # Store info variables.
                    if self.info_requested(Info.time):
                        _t.append(utils.time_wall() - time)
                    if self.info_requested(Info.func_val):
                        _f.append(function.f(w))
                    if self.info_requested(Info.smooth_func_val):
                        _fmu.append(function.fmu(w))

                    # Update iteration counts.
                    self.num_iter += 1
                    block_iter[i] += 1

                    #                    print i, function.fmu(w), step, \
                    #                           (1.0 / step) * maths.norm(w[i] - z), self.eps, \
                    #                           k, self.num_iter, self.max_iter
                    # Test stopping criterion.
                    if maths.norm(w[i] - z) < step * self.eps \
                            and k >= self.min_iter:
                        break

            # Test global stopping criterion.
            all_converged = True
            for i in range(len(w)):

                # Wrap a function around the ith block.
                func = mb_losses.MultiblockFunctionWrapper(function, w, i)

                # Compute the step.
                step = func.step(w[i])
                # Compute inexact precision.
                eps = max(consts.FLOAT_EPSILON, 1.0 / (block_iter[i]**exp))
                #                eps = consts.TOLERANCE
                # Take one ISTA step for use in the stopping criterion.
                w_tilde = func.prox(w[i] - step * func.grad(w[i]),
                                    factor=step,
                                    eps=eps)

                # Test if converged for block i.
                if maths.norm(w[i] - w_tilde) > step * self.eps:
                    all_converged = False
                    break

            # Converged in all blocks!
            if all_converged:
                if self.info_requested(Info.converged):
                    self.info_set(Info.converged, True)

                break

            # Stop after maximum number of iterations.
            if self.num_iter >= self.max_iter:
                break

            it += 1

        # Store information.
        if self.info_requested(Info.num_iter):
            self.info_set(Info.num_iter, self.num_iter)
        if self.info_requested(Info.time):
            self.info_set(Info.time, _t)
        if self.info_requested(Info.func_val):
            self.info_set(Info.func_val, _f)
        if self.info_requested(Info.smooth_func_val):
            self.info_set(Info.smooth_func_val, _fmu)
        if self.info_requested(Info.ok):
            self.info_set(Info.ok, True)

        return w
示例#2
0
class ADMM(bases.ExplicitAlgorithm,
           bases.IterativeAlgorithm,
           bases.InformationAlgorithm):
    """The alternating direction method of multipliers (ADMM). Computes the
    minimum of the sum of two functions with associated proximal or projection
    operators. Solves problems on the form

        min. f(x, y) = g(x) + h(y)
        s.t. y = x

    The functions have associated proximal or projection operators.

    Parameters
    ----------
    rho : Positive float. The penalty parameter.

    mu : Float, greater than 1. The factor within which the primal and dual
            variables should be kept. Set to less than or equal to 1 if you
            don't want to update the penalty parameter rho dynamically.

    tau : Float, greater than 1. Increase rho by a factor tau.

    info : List or tuple of utils.consts.Info. What, if any, extra run
            information should be stored. Default is an empty list, which means
            that no run information is computed nor returned.

    eps : Positive float. Tolerance for the stopping criterion.

    max_iter : Non-negative integer. Maximum allowed number of iterations.

    min_iter : Non-negative integer less than or equal to max_iter. Minimum
            number of iterations that must be performed. Default is 1.
    """
    INTERFACES = [properties.SplittableFunction,
                  properties.AugmentedProximalOperator,
                  properties.OR(properties.ProximalOperator,
                                properties.ProjectionOperator)]

    INFO_PROVIDED = [Info.ok,
                     Info.num_iter,
                     Info.time,
                     Info.fvalue,
                     Info.converged]

    def __init__(self, rho=1.0, mu=10.0, tau=2.0,
                 info=[],
                 eps=consts.TOLERANCE, max_iter=consts.MAX_ITER, min_iter=1,
                 simulation=False):
                 # TODO: Investigate what is a good default value here!

        super(ADMM, self).__init__(info=info,
                                   max_iter=max_iter,
                                   min_iter=min_iter)

        self.rho = max(consts.FLOAT_EPSILON, float(rho))
        self.mu = max(1.0, float(mu))
        self.tau = max(1.0, float(tau))

        self.eps = max(consts.FLOAT_EPSILON, float(eps))

        self.simulation = bool(simulation)

    @bases.force_reset
    @bases.check_compatibility
    def run(self, functions, xy):
        """Finds the minimum of two functions with associated proximal
        operators.

        Parameters
        ----------
        functions : List or tuple with two Functions or a SplittableFunction.
                The two functions.

        xy : List or tuple with two elements, numpy arrays. The starting points
        for the minimisation.
        """
        if self.info_requested(Info.ok):
            self.info_set(Info.ok, False)

        if self.info_requested(Info.time):
            t = []
        if self.info_requested(Info.fvalue):
            f = []
        if self.info_requested(Info.converged):
            self.info_set(Info.converged, False)

        funcs = [functions.g, functions.h]

        x_new = xy[0]
        y_new = xy[1]
        z_new = x_new.copy()
        u_new = y_new.copy()
        for i in range(1, self.max_iter + 1):

            if self.info_requested(Info.time):
                tm = utils.time_cpu()

            x_old = x_new
            z_old = z_new
            u_old = u_new

            if isinstance(funcs[0], properties.ProximalOperator):
                x_new = funcs[0].prox(z_old - u_old)
            else:
                x_new = funcs[0].proj(z_old - u_old)

            y_new = x_new  # TODO: Allow a linear operator here.

            if isinstance(funcs[1], properties.ProximalOperator):
                z_new = funcs[1].prox(y_new + u_old)
            else:
                z_new = funcs[1].proj(y_new + u_old)

            # The order here is important! Do not change!
            u_new = (y_new - z_new) + u_old

            if self.info_requested(Info.time):
                t.append(utils.time_cpu() - tm)
            if self.info_requested(Info.fvalue):
                fval = funcs[0].f(z_new) + funcs[1].f(z_new)
                f.append(fval)

            if not self.simulation:
                if i == 1:
                    if maths.norm(x_new - x_old) < self.eps \
                            and i >= self.min_iter:
#                        print "Stopping criterion kicked in!"
                        if self.info_requested(Info.converged):
                            self.info_set(Info.converged, True)

                        break
                else:
                    if maths.norm(x_new - x_old) / maths.norm(x_old) < self.eps \
                            and i >= self.min_iter:
#                        print "Stopping criterion kicked in!"
                        if self.info_requested(Info.converged):
                            self.info_set(Info.converged, True)

                        break

            # Update the penalty parameter, rho, dynamically.
            if self.mu > 1.0:
                r = x_new - z_new
                s = (z_new - z_old) * -self.rho
                norm_r = maths.norm(r)
                norm_s = maths.norm(s)
#                print "norm(r): ", norm_r, ", norm(s): ", norm_s, ", rho:", \
#                    self.rho

                if norm_r > self.mu * norm_s:
                    self.rho *= self.tau
                    u_new *= 1.0 / self.tau  # Rescale dual variable.
                elif norm_s > self.mu * norm_r:
                    self.rho /= self.tau
                    u_new *= self.tau  # Rescale dual variable.

                # Update the penalty parameter in the functions.
                functions.set_rho(self.rho)

        self.num_iter = i

        if self.info_requested(Info.num_iter):
            self.info_set(Info.num_iter, i)
        if self.info_requested(Info.time):
            self.info_set(Info.time, t)
        if self.info_requested(Info.fvalue):
            self.info_set(Info.fvalue, f)
        if self.info_requested(Info.ok):
            self.info_set(Info.ok, True)

        return z_new
class SubGradientDescent(bases.ExplicitAlgorithm, bases.IterativeAlgorithm,
                         bases.InformationAlgorithm):
    """The subgradient descent algorithm.

    Note: If the function has a gradient, it will be used instead, and
    effectively make this the gradient descent algorithm. To prevent this from
    happening, change the use_gradient parameter.

    Parameters
    ----------
    step_size : parsimony.algorithms.utils.StepSize
        The step size function to use. Default is NonSumDimStepSize(a=0.1).

    eps : float
        Must be positive. Tolerance for the stopping criterion.

    info : list or tuple of utils.consts.Info
        What, if any, extra run information should be stored. Default is an
        empty list, which means that no run information is computed nor
        returned.

    max_iter : int
        Must be non-negative. Maximum allowed number of iterations. Default is
        10000.

    min_iter : int
        Must be non-negative and less than or equal to max_iter. Minimum number
        of iterations that must be performed. Default is 1.

    use_best_f : bool
        Whether or not to keep the parameter vector that gave the lowest
        function value over all iterations. Default is True, the best parameter
        vector found over all iterations will be the one returned.

    use_gradient : bool
        Whether or not to utilise the gradient of the function, if it exists.
        Default is False, i.e. do not use the gradient if it exists.

    Examples
    --------
    >>> from parsimony.algorithms.subgradient import SubGradientDescent
    >>> from parsimony.functions.losses import RidgeRegression
    >>> from parsimony.algorithms.utils import NonSumDimStepSize
    >>> import numpy as np
    >>> np.random.seed(42)
    >>> X = np.random.randn(100, 50)
    >>> y = np.random.randn(100, 1)
    >>> sgd = SubGradientDescent(max_iter=10000, step_size=NonSumDimStepSize(a=0.1), use_gradient=True)
    >>> function = RidgeRegression(X, y, k=0.0, mean=False)
    >>> beta1 = sgd.run(function, np.random.rand(50, 1))
    >>> beta2 = np.dot(np.linalg.pinv(X), y)
    >>> round(np.linalg.norm(beta1 - beta2) / np.linalg.norm(beta2), 13) < 5e-6
    True
    """
    INTERFACES = [
        properties.Function,
        properties.OR(properties.Gradient, properties.SubGradient)
    ]

    INFO_PROVIDED = [
        Info.ok, Info.num_iter, Info.time, Info.fvalue, Info.func_val,
        Info.converged
    ]

    def __init__(self,
                 step_size=NonSumDimStepSize(a=0.1),
                 eps=consts.TOLERANCE,
                 info=[],
                 max_iter=10000,
                 min_iter=1,
                 use_best_f=True,
                 use_gradient=False):

        super(SubGradientDescent, self).__init__(info=info,
                                                 max_iter=max_iter,
                                                 min_iter=min_iter)

        self.step_size = step_size
        self.eps = float(eps)
        self.use_best_f = bool(use_best_f)
        self.use_gradient = bool(use_gradient)

    @bases.force_reset
    @bases.check_compatibility
    def run(self, function, beta):
        """Find the minimiser of the given function, starting at beta.

        Parameters
        ----------
        function : Function
            The function to minimise.

        beta : numpy array
            The start vector.
        """
        if self.info_requested(Info.ok):
            self.info_set(Info.ok, False)

        betanew = betaold = beta

        if self.use_gradient and hasattr(function, "grad"):
            function_grad = function.grad
        else:
            function_grad = function.subgrad

        if self.info_requested(Info.time):
            t = []
        if self.info_requested(Info.func_val):
            f = []
        if self.info_requested(Info.converged):
            self.info_set(Info.converged, False)

        fbest = np.inf
        betabest = None

        for i in range(1, self.max_iter + 1):

            if self.info_requested(Info.time):
                tm = utils.time_cpu()

            betaold = betanew
            subgrad = function_grad(betaold)

            step = self.step_size(i, betaold, subgrad)

            betanew = betaold - step * subgrad

            fval = None
            if self.use_best_f:
                fval = function.f(betanew)
                if fval < fbest:
                    fbest = fval
                    betabest = betanew

            if self.info_requested(Info.time):
                t.append(utils.time_cpu() - tm)
            if self.info_requested(Info.func_val):
                if self.use_best_f:
                    f.append(fbest)
                else:
                    if fval is None:
                        f.append(function.f(betanew))
                    else:
                        f.append(fval)

            if maths.norm(betanew - betaold) < self.eps \
                    and i >= self.min_iter:

                if self.info_requested(Info.converged):
                    self.info_set(Info.converged, True)

                break

        if self.info_requested(Info.num_iter):
            self.info_set(Info.num_iter, i)
        if self.info_requested(Info.time):
            self.info_set(Info.time, t)
        if self.info_requested(Info.func_val):
            self.info_set(Info.func_val, f)
        if self.info_requested(Info.ok):
            self.info_set(Info.ok, True)

        if self.use_best_f:
            return betabest
        else:
            return betanew
示例#4
0
class ParallelDykstrasProximalAlgorithm(bases.ExplicitAlgorithm):
    """Dykstra's projection algorithm for two or more functions. Computes the
    proximal operator of a sum of functions. These functions may be indicator
    functions for convex sets (ProjectionOperator) or ProximalOperators.

    If all functions are ProjectionOperators, this algorithm finds the
    projection onto the intersection of the convex sets.

    The functions have projection operators (ProjectionOperator.proj) onto the
    respective convex sets or proximal operators (ProximalOperator.prox).
    """
    INTERFACES = [properties.Function,
                  properties.OR(properties.ProjectionOperator,
                                properties.ProximalOperator)]

    def __init__(self, eps=consts.TOLERANCE,
                 max_iter=100, min_iter=1):
                 # TODO: Investigate what is a good default value here!

        self.eps = eps
        self.max_iter = max_iter
        self.min_iter = min_iter

    def run(self, x, prox=[], proj=[], factor=1.0, weights=None):
        """Finds the projection onto the intersection of two sets.

        Parameters
        ----------
        prox : List or tuple with two or more elements. The functions that
                are ProximalOperators. Either prox or proj must be non-empty.

        proj : List or tuple with two or more elements. The functions that
                are ProjectionOperators. Either proj or prox must be non-empty.

        factor : Positive float. A factor by which the Lagrange multiplier is
                scaled. This is usually the step size.

        x : Numpy array. The point that we wish to project.

        weights : List or tuple with floats. Weights for the functions.
                Default is that they all have the same weight. The elements of
                the list or tuple must sum to 1.
        """
        for f in prox:
            self.check_compatibility(f, self.INTERFACES)

        for f in proj:
            self.check_compatibility(f, self.INTERFACES)

        num_prox = len(prox)
        num_proj = len(proj)

        if weights is None:
            weights = [1. / float(num_prox + num_proj)] * (num_prox + num_proj)

        x_new = x_old = x
        p = [0.0] * (num_prox + num_proj)
        z = [0.0] * (num_prox + num_proj)
        for i in range(num_prox + num_proj):
            z[i] = np.copy(x)

        for i in range(1, self.max_iter + 1):

            for i in range(num_prox):
                p[i] = prox[i].prox(z[i], factor)
            for i in range(num_proj):
                p[num_prox + i] = proj[i].proj(z[num_prox + i])

            x_old = x_new
            x_new = np.zeros(x_old.shape)
            for i in range(num_prox + num_proj):
                x_new += weights[i] * p[i]

            if maths.norm(x_new - x_old) / maths.norm(x_old) < self.eps \
                    and i >= self.min_iter:

                all_feasible = True
                for i in range(num_proj):
                    if proj[i].f(p[num_prox + i]) > 0.0:
                        all_feasible = False

                if all_feasible:
                    break

            for i in range(num_prox + num_proj):
                z[i] = x_new + z[i] - p[i]

        return x_new