def run(self, function, x): """Finds the projection onto the intersection of two sets. Parameters ---------- function : list or tuple with two Functions The two functions. x : numpy array (p-by-1) The point that we wish to project. """ self.check_compatibility(function[0], self.INTERFACES) self.check_compatibility(function[1], self.INTERFACES) x_new = x p_new = np.zeros(x.shape) q_new = np.zeros(x.shape) for i in range(1, self.max_iter + 1): x_old = x_new p_old = p_new q_old = q_new y_old = function[0].proj(x_old + p_old) p_new = x_old + p_old - y_old x_new = function[1].proj(y_old + q_old) q_new = y_old + q_old - x_new if maths.norm(x_new - x_old) / maths.norm(x_old) < self.eps \ and i >= self.min_iter: break return x_new
def run(self, function, x, factor=1.0): """Finds the proximal operator of the sum of two proximal operators. Parameters ---------- function : list or tuple with two Functions The two functions. x : numpy array (p-by-1) The point at which we want to compute the proximal operator. """ self.check_compatibility(function[0], self.INTERFACES) self.check_compatibility(function[1], self.INTERFACES) x_new = x p_new = np.zeros(x.shape) q_new = np.zeros(x.shape) for i in range(1, self.max_iter + 1): x_old = x_new p_old = p_new q_old = q_new y_old = function[0].prox(x_old + p_old, factor=factor) p_new = x_old + p_old - y_old x_new = function[1].prox(y_old + q_old, factor=factor) q_new = y_old + q_old - x_new if maths.norm(x_new - x_old) / maths.norm(x_old) < self.eps \ and i >= self.min_iter: break return x_new
def run(self, XY, wc=None): """A NIPALS implementation for sparse PLS regresison. Parameters ---------- XY : List of two numpy arrays. XY[0] is n-by-p and XY[1] is n-by-q. The independent and dependent variables. wc : List of numpy array. The start vectors. Returns ------- w : Numpy array, p-by-1. The weight vector of X. c : Numpy array, q-by-1. The weight vector of Y. """ X = XY[0] Y = XY[1] n, p = X.shape l1_1 = penalties.L1(l=self.l[0]) l1_2 = penalties.L1(l=self.l[1]) if wc is not None: w_new = wc[0] else: maxi = np.argmax(np.sum(Y ** 2, axis=0)) u = Y[:, [maxi]] w_new = np.dot(X.T, u) w_new *= 1.0 / maths.norm(w_new) for i in range(self.max_iter): w = w_new c = np.dot(Y.T, np.dot(X, w)) if self.penalise_y: c = l1_2.prox(c) normc = maths.norm(c) if normc > consts.TOLERANCE: c *= 1.0 / normc w_new = np.dot(X.T, np.dot(Y, c)) w_new = l1_1.prox(w_new) normw = maths.norm(w_new) if normw > consts.TOLERANCE: w_new *= 1.0 / normw if maths.norm(w_new - w) / maths.norm(w) < self.eps: break self.num_iter = i # t = np.dot(X, w) # tt = np.dot(t.T, t)[0, 0] # c = np.dot(Y.T, t) # if tt > consts.TOLERANCE: # c /= tt return w_new, c
def run(self, X): """Runs the K-means clustering algorithm on the given data matrix. Parameters ---------- X : Numpy array of shape (n, p). The matrix of points to cluster. """ K = min(self.K, X.shape[0]) # If K > # points. best_wcss = np.infty best_mus = None for repeat in range(self.repeat): mus = self._init_mus(X, K) for it in range(self.max_iter): closest = self._closest_centers(X, mus, K) old_mus = mus mus = self._new_centers(X, closest, K) if maths.norm(old_mus - mus) / maths.norm(old_mus) < self.eps: break if self.repeat == 1: best_mus = mus else: wcss = self._wcss(X, mus, closest, K) if wcss < best_wcss: best_wcss = wcss best_mus = mus return best_mus
def run(self, XY, wc=None): """A NIPALS implementation for sparse PLS regresison. Parameters ---------- XY : List of two numpy arrays. XY[0] is n-by-p and XY[1] is n-by-q. The independent and dependent variables. wc : List of numpy array. The start vectors. Returns ------- w : Numpy array, p-by-1. The weight vector of X. c : Numpy array, q-by-1. The weight vector of Y. """ X = XY[0] Y = XY[1] n, p = X.shape l1_1 = penalties.L1(l=self.l[0]) l1_2 = penalties.L1(l=self.l[1]) if wc is not None: w_new = wc[0] else: maxi = np.argmax(np.sum(Y ** 2.0, axis=0)) u = Y[:, [maxi]] w_new = np.dot(X.T, u) w_new *= 1.0 / maths.norm(w_new) for i in range(self.max_iter): w = w_new c = np.dot(Y.T, np.dot(X, w)) if self.penalise_y: c = l1_2.prox(c) normc = maths.norm(c) if normc > consts.TOLERANCE: c *= 1.0 / normc w_new = np.dot(X.T, np.dot(Y, c)) w_new = l1_1.prox(w_new) normw = maths.norm(w_new) if normw > consts.TOLERANCE: w_new *= 1.0 / normw if maths.norm(w_new - w) / maths.norm(w) < self.eps: break self.num_iter = i # t = np.dot(X, w) # tt = np.dot(t.T, t)[0, 0] # c = np.dot(Y.T, t) # if tt > consts.TOLERANCE: # c /= tt return w_new, c
def run(self, X, Y, start_vector=None): """Find the right-singular vector of the product of two matrices. Parameters ---------- X : Numpy array with shape (n, p). The first matrix of the product. Y : Numpy array with shape (p, m). The second matrix of the product. start_vector : BaseStartVector. A start vector generator. Default is to use a random start vector. """ if self.info_requested(utils.Info.ok): self.info_set(utils.Info.ok, False) if self.info_requested(utils.Info.time): _t = utils.time() if self.info_requested(utils.Info.converged): self.info_set(utils.Info.converged, False) M, N = X.shape if start_vector is None: start_vector = start_vectors.RandomStartVector(normalise=True) v = start_vector.get_vector(Y.shape[1]) for it in xrange(1, self.max_iter + 1): v_ = v v = np.dot(X, np.dot(Y, v_)) v = np.dot(Y.T, np.dot(X.T, v)) v *= 1.0 / maths.norm(v) if maths.norm(v_ - v) / maths.norm(v) < self.eps \ and it >= self.min_iter: if self.info_requested(utils.Info.converged): self.info_set(utils.Info.converged, True) break if self.info_requested(utils.Info.time): self.info_set(utils.Info.time, utils.time() - _t) if self.info_requested(utils.Info.func_val): _f = maths.norm(np.dot(X, np.dot(Y, v))) # Largest singular value. self.info_set(utils.Info.func_val, _f) if self.info_requested(utils.Info.ok): self.info_set(utils.Info.ok, True) return utils.direct_vector(v)
def run(self, X, Y, start_vector=None): """Find the right-singular vector of the product of two matrices. Parameters ---------- X : Numpy array with shape (n, p). The first matrix of the product. Y : Numpy array with shape (p, m). The second matrix of the product. start_vector : BaseStartVector. A start vector generator. Default is to use a random start vector. """ if self.info_requested(utils.Info.ok): self.info_set(utils.Info.ok, False) if self.info_requested(utils.Info.time): _t = utils.time() if self.info_requested(utils.Info.converged): self.info_set(utils.Info.converged, False) M, N = X.shape if start_vector is None: start_vector = weights.RandomUniformWeights(normalise=True) v = start_vector.get_weights(Y.shape[1]) for it in range(1, self.max_iter + 1): v_ = v v = np.dot(X, np.dot(Y, v_)) v = np.dot(Y.T, np.dot(X.T, v)) v *= 1.0 / maths.norm(v) if maths.norm(v_ - v) / maths.norm(v) < self.eps \ and it >= self.min_iter: if self.info_requested(utils.Info.converged): self.info_set(utils.Info.converged, True) break if self.info_requested(utils.Info.time): self.info_set(utils.Info.time, utils.time() - _t) if self.info_requested(utils.Info.func_val): _f = maths.norm(np.dot(X, np.dot(Y, v))) # Largest singular value. self.info_set(utils.Info.func_val, _f) if self.info_requested(utils.Info.ok): self.info_set(utils.Info.ok, True) return utils.direct_vector(v)
def run(self, XY, wc=None): """A NIPALS implementation for PLS regresison. Parameters ---------- XY : List of two numpy arrays. XY[0] is n-by-p and XY[1] is n-by-q. The independent and dependent variables. wc : List of numpy array. The start vectors. Returns ------- w : Numpy array, p-by-1. The weight vector of X. c : Numpy array, q-by-1. The weight vector of Y. """ X = XY[0] Y = XY[1] n, p = X.shape if wc is not None: w_new = wc[0] else: maxi = np.argmax(np.sum(Y ** 2, axis=0)) u = Y[:, [maxi]] w_new = np.dot(X.T, u) w_new *= 1.0 / maths.norm(w_new) for i in range(self.max_iter): w = w_new c = np.dot(Y.T, np.dot(X, w)) w_new = np.dot(X.T, np.dot(Y, c)) normw = maths.norm(w_new) if normw > 10.0 * consts.FLOAT_EPSILON: w_new *= 1.0 / normw if maths.norm(w_new - w) < maths.norm(w) * self.eps: break self.num_iter = i t = np.dot(X, w) tt = np.dot(t.T, t)[0, 0] c = np.dot(Y.T, t) if tt > consts.TOLERANCE: c *= 1.0 / tt return w_new, c
def run(self, XY, wc=None): """A NIPALS implementation for PLS regresison. Parameters ---------- XY : List of two numpy arrays. XY[0] is n-by-p and XY[1] is n-by-q. The independent and dependent variables. wc : List of numpy array. The start vectors. Returns ------- w : Numpy array, p-by-1. The weight vector of X. c : Numpy array, q-by-1. The weight vector of Y. """ X = XY[0] Y = XY[1] n, p = X.shape if wc is not None: w_new = wc[0] else: maxi = np.argmax(np.sum(Y ** 2.0, axis=0)) u = Y[:, [maxi]] w_new = np.dot(X.T, u) w_new *= 1.0 / maths.norm(w_new) for i in range(self.max_iter): w = w_new c = np.dot(Y.T, np.dot(X, w)) w_new = np.dot(X.T, np.dot(Y, c)) normw = maths.norm(w_new) if normw > 10.0 * consts.FLOAT_EPSILON: w_new *= 1.0 / normw if maths.norm(w_new - w) < maths.norm(w) * self.eps: break self.num_iter = i t = np.dot(X, w) tt = np.dot(t.T, t)[0, 0] c = np.dot(Y.T, t) if tt > consts.TOLERANCE: c *= 1.0 / tt return w_new, c
def gap(self, beta, beta_hat=None, max_iter=consts.MAX_ITER, eps=consts.TOLERANCE): """Compute the duality gap. From the interface "DualFunction". """ if self.penalty_start > 0: beta_ = beta[self.penalty_start:, :] else: beta_ = beta alpha = self.tv.alpha(beta_) g = self.fmu(beta_) a = beta_ - self.c lstar = (1.0 / 2.0) * maths.norm(a) ** 2.0 + np.dot(self.c.T, a)[0, 0] lAta = self.tv.l * self.tv.Aa(alpha) if self.penalty_start > 0: lAta = np.vstack((np.zeros((self.penalty_start, 1)), lAta)) alpha_sqsum = 0.0 for a_ in alpha: alpha_sqsum += np.sum(a_ ** 2.0) z = -a psistar = (1.0 / 2.0) \ * np.sum(maths.positive(np.abs(z - lAta) - self.l1.l) ** 2.0) \ + (0.5 * self.tv.l * self.tv.get_mu() * alpha_sqsum) gap = g + lstar + psistar return gap
def run(self, function, beta): """Find the minimiser of the given function, starting at beta. Parameters ---------- function : Function. The function to minimise. beta : Numpy array. The start vector. """ if self.info_requested(Info.ok): self.info_set(Info.ok, False) step = function.step(beta) betanew = betaold = beta if self.info_requested(Info.time): t = [] if self.info_requested(Info.fvalue) \ or self.info_requested(Info.func_val): f = [] if self.info_requested(Info.converged): self.info_set(Info.converged, False) for i in xrange(1, self.max_iter + 1): if self.info_requested(Info.time): tm = utils.time_cpu() step = function.step(betanew) betaold = betanew betanew = betaold - step * function.grad(betaold) if self.info_requested(Info.time): t.append(utils.time_cpu() - tm) if self.info_requested(Info.fvalue) \ or self.info_requested(Info.func_val): f.append(function.f(betanew)) if maths.norm(betanew - betaold) < self.eps \ and i >= self.min_iter: if self.info_requested(Info.converged): self.info_set(Info.converged, True) break if self.info_requested(Info.num_iter): self.info_set(Info.num_iter, i) if self.info_requested(Info.time): self.info_set(Info.time, t) if self.info_requested(Info.fvalue) \ or self.info_requested(Info.func_val): self.info_set(Info.fvalue, f) self.info_set(Info.func_val, f) if self.info_requested(Info.ok): self.info_set(Info.ok, True) return betanew
def feasible(self, beta): """Feasibility of the constraint. From the interface "Constraint". Parameters ---------- beta : Numpy array. The variable to check for feasibility. Examples -------- >>> import numpy as np >>> from parsimony.functions.penalties import L2 >>> np.random.seed(42) >>> l2 = L2(c=0.3183098861837907) >>> y1 = 0.01 * (np.random.rand(50, 1) * 2.0 - 1.0) >>> l2.feasible(y1) True >>> y2 = 10.0 * (np.random.rand(50, 1) * 2.0 - 1.0) >>> l2.feasible(y2) False >>> y3 = l2.proj(50.0 * np.random.rand(100, 1) * 2.0 - 1.0) >>> l2.feasible(y3) True """ if self.penalty_start > 0: beta_ = beta[self.penalty_start:, :] else: beta_ = beta return maths.norm(beta_) <= self.c
def run(self, function, beta): """Find the minimiser of the given function, starting at beta. Parameters ---------- function : Function. The function to minimise. beta : Numpy array, p-by-1. The start vector. """ if self.info_requested(Info.ok): self.info_set(Info.ok, False) if self.info_requested(Info.time): t = [] if self.info_requested(Info.func_val): f = [] if self.info_requested(Info.converged): self.info_set(Info.converged, False) aold = anew = 1.0 thetaold = thetanew = beta betanew = betaold = beta for i in range(1, self.max_iter + 1): if self.info_requested(Info.time): tm = utils.time_cpu() step = function.step(betanew) betaold = betanew thetaold = thetanew aold = anew thetanew = betaold - step * function.grad(betaold) anew = (1.0 + np.sqrt(4.0 * aold * aold + 1.0)) / 2.0 betanew = thetanew + (aold - 1.0) * (thetanew - thetaold) / anew if self.info_requested(Info.time): t.append(utils.time_cpu() - tm) if self.info_requested(Info.func_val): f.append(function.f(betanew)) if maths.norm(betanew - betaold) < self.eps \ and i >= self.min_iter: if self.info_requested(Info.converged): self.info_set(Info.converged, True) break if self.info_requested(Info.num_iter): self.info_set(Info.num_iter, i) if self.info_requested(Info.time): self.info_set(Info.time, t) if self.info_requested(Info.func_val): self.info_set(Info.func_val, f) if self.info_requested(Info.ok): self.info_set(Info.ok, True) return betanew
def get_weights(self, shape): """Return randomly generated weights of given shape. Parameters ---------- shape : int or list of ints or tuple of ints Shape of the weights to generate. The shape of the output is shape or (shape, 1) in case shape is an integer. """ if not isinstance(shape, (list, tuple)): shape = (int(shape), 1) if self.limits is not None: l = float(self.limits[0]) u = float(self.limits[1]) elif self.variance is not None: u = np.sqrt(3.0 * self.variance) l = -u if self.random_state is None: vector = np.random.rand(*shape) * (u - l) + l # Random vector. else: vector = self.random_state.rand(*shape) * (u - l) + l # Random vector. # TODO: Normalise columns when a matrix? if self.normalise: vector /= maths.norm(vector) if self.dtype is not None: vector = vector.astype(self.dtype) return vector
def get_weights(self, shape): """Return randomly generated weights of given shape. Parameters ---------- shape : int or list of int or tuple of int Shape of the weights to generate. The shape of the output is shape or (shape, 1) in case shape is an integer. """ if not isinstance(shape, (list, tuple)): shape = (int(shape), 1) if self.limits is not None: m = np.min(self.limits) + (abs(self.limits[1] - self.limits[0]) / 2.0) s = abs(self.limits[1] - self.limits[0]) / 4.0 else: m = self.mean s = np.sqrt(self.variance) if self.random_state is None: vector = np.random.randn(*shape) * s + m else: vector = self.random_state.randn(*shape) * s + m # Random vector. # TODO: Normalise columns when a matrix? if self.normalise: vector /= maths.norm(vector) if self.dtype is not None: vector = vector.astype(self.dtype) return vector
def get_vector(self, size): """Return vector of ones of chosen shape Parameters ---------- size : Positive integer. Size of the vector to generate. The shape of the output is (size, 1). Examples -------- >>> from parsimony.utils.start_vectors import OnesStartVector >>> start_vector = OnesStartVector() >>> ones = start_vector.get_vector(3) >>> print(ones) [[ 1.] [ 1.] [ 1.]] """ size = int(size) vector = np.ones((size, 1)) # Using a vector of ones. if self.normalise: return vector * (1.0 / maths.norm(vector)) else: return vector
def run(self, function, beta): """Find the minimiser of the given function, starting at beta. Parameters ---------- function : Function. The function to minimise. beta : Numpy array, p-by-1. The start vector. """ if self.info_requested(Info.ok): self.info_set(Info.ok, False) if self.info_requested(Info.time): t = [] if self.info_requested(Info.func_val): f = [] if self.info_requested(Info.converged): self.info_set(Info.converged, False) aold = anew = 1.0 thetaold = thetanew = beta betanew = betaold = beta for i in xrange(1, self.max_iter + 1): if self.info_requested(Info.time): tm = utils.time_cpu() step = function.step(betanew) betaold = betanew thetaold = thetanew aold = anew thetanew = betaold - step * function.grad(betaold) anew = (1.0 + np.sqrt(4.0 * aold * aold + 1.0)) / 2.0 betanew = thetanew + (aold - 1.0) * (thetanew - thetaold) / anew if self.info_requested(Info.time): t.append(utils.time_cpu() - tm) if self.info_requested(Info.func_val): f.append(function.f(betanew)) if maths.norm(betanew - betaold) < self.eps \ and i >= self.min_iter: if self.info_requested(Info.converged): self.info_set(Info.converged, True) break if self.info_requested(Info.num_iter): self.info_set(Info.num_iter, i) if self.info_requested(Info.time): self.info_set(Info.time, t) if self.info_requested(Info.func_val): self.info_set(Info.func_val, f) if self.info_requested(Info.ok): self.info_set(Info.ok, True) return betanew
def get_weights(self, shape): """Return randomly generated weights of given shape. Parameters ---------- shape : int or list of int or tuple of int Shape of the weights to generate. The shape of the output is shape or (shape, 1) in case shape is an integer. """ if not isinstance(shape, (list, tuple)): shape = (int(shape), 1) if self.limits is not None: m = np.min( self.limits) + (abs(self.limits[1] - self.limits[0]) / 2.0) s = abs(self.limits[1] - self.limits[0]) / 4.0 else: m = self.mean s = np.sqrt(self.variance) if self.random_state is None: vector = np.random.randn(*shape) * s + m else: vector = self.random_state.randn(*shape) * s + m # Random vector. # TODO: Normalise columns when a matrix? if self.normalise: vector /= maths.norm(vector) if self.dtype is not None: vector = vector.astype(self.dtype) return vector
def run(self, function, beta): """Find the minimiser of the given function, starting at beta. Parameters ---------- function : Function. The function to minimise. beta : Numpy array. The start vector. """ if self.info_requested(Info.ok): self.info_set(Info.ok, False) step = function.step(beta) betanew = betaold = beta if self.info_requested(Info.time): t = [] if self.info_requested(Info.fvalue) \ or self.info_requested(Info.func_val): f = [] if self.info_requested(Info.converged): self.info_set(Info.converged, False) for i in range(1, self.max_iter + 1): if self.info_requested(Info.time): tm = utils.time_cpu() step = function.step(betanew) betaold = betanew betanew = betaold - step * function.grad(betaold) if self.info_requested(Info.time): t.append(utils.time_cpu() - tm) if self.info_requested(Info.fvalue) \ or self.info_requested(Info.func_val): f.append(function.f(betanew)) if maths.norm(betanew - betaold) < self.eps \ and i >= self.min_iter: if self.info_requested(Info.converged): self.info_set(Info.converged, True) break if self.info_requested(Info.num_iter): self.info_set(Info.num_iter, i) if self.info_requested(Info.time): self.info_set(Info.time, t) if self.info_requested(Info.fvalue) \ or self.info_requested(Info.func_val): self.info_set(Info.fvalue, f) self.info_set(Info.func_val, f) if self.info_requested(Info.ok): self.info_set(Info.ok, True) return betanew
def run(self, functions, x, weights=None): """Finds the projection onto the intersection of two sets. Parameters ---------- functions : List or tuple with two or more elements. The functions. x : Numpy array. The point that we wish to project. weights : List or tuple with floats. Weights for the functions. Default is that they all have the same weight. The elements of the list or tuple must sum to 1. """ for f in functions: self.check_compatibility(f, self.INTERFACES) num = len(functions) if weights is None: weights = [1.0 / float(num)] * num x_new = x_old = x p = [0.0] * len(functions) z = [0.0] * len(functions) for i in range(num): z[i] = np.copy(x) for i in range(1, self.max_iter + 1): for i in range(num): p[i] = functions[i].proj(z[i]) # TODO: Does the weights really matter when the function is the # indicator function? x_old = x_new x_new = np.zeros(x_old.shape) for i in range(num): x_new += weights[i] * p[i] for i in range(num): z[i] = x + z[i] - p[i] if maths.norm(x_new - x_old) / maths.norm(x_old) < self.eps \ and i >= self.min_iter: break return x_new
def f(self, beta): """Function value. From the interface "Function". """ if self.penalty_start > 0: beta_ = beta[self.penalty_start:, :] else: beta_ = beta return self.l * (maths.norm(beta_) - self.c)
def L(self, w, index): """Lipschitz constant of the gradient with given index. From the interface "MultiblockLipschitzContinuousGradient". """ index = int(index) grad = np.dot(self.X[index].T, np.dot(self.X[1 - index], w[1 - index])) \ * (1.0 / self.n) return 2.0 * maths.norm(grad) ** 2
def L(self, w, index): """Lipschitz constant of the gradient with given index. From the interface "MultiblockLipschitzContinuousGradient". """ index = int(index) grad = np.dot(self.X[index].T, np.dot(self.X[1 - index], w[1 - index])) \ * (1.0 / self.n) return 2.0 * maths.norm(grad)**2
def approx_L(self, shape, max_iter=10000): """Monte Carlo approximation of the Lipschitz constant. Warning: This will not yield a good approximation within reasonable time for very large data sets. Use only if you know what you are doing. Parameters ---------- shape : List or tuple. Usually has the form (p, 1). The shape of the points which we draw randomly. """ L = -float("inf") for i in xrange(max_iter): a = np.random.rand(*shape) * 2.0 - 1.0 b = np.random.rand(*shape) * 2.0 - 1.0 grad_a = self.grad(a) grad_b = self.grad(b) L_ = maths.norm(grad_a - grad_b) / maths.norm(a - b) L = max(L, L_) return L
def _approximate_eps(self, function, beta0): old_mu = function.set_mu(self.mu_min) step = function.step(beta0) D1 = maths.norm(function.prox(-step * function.grad(beta0), step, # Arbitrary eps ... eps=np.sqrt(consts.TOLERANCE), max_iter=self.max_iter)) function.set_mu(old_mu) return (2.0 / step) * D1 * self._harmonic_number_approx()
def f(self, beta): """Function value. """ if self.penalty_start > 0: beta_ = beta[self.penalty_start:, :] else: beta_ = beta if maths.norm(beta_) ** 2.0 > self.l2: return consts.FLOAT_INF return self.l1 * maths.norm1(beta_)
def estimate_mu(self, beta): if self.penalty_start > 0: beta_ = beta[self.penalty_start:, :] else: beta_ = beta SS = 0.0 A = self.A() for i in range(len(A)): SS = max(SS, maths.norm(A[i].dot(beta_))) return SS
def get_weights(self, *args, **kwargs): """Returns the predetermined start vector """ weights = self.weights # TODO: Normalise columns when a matrix? if self.normalise: weights = weights / maths.norm(weights) if self.dtype is not None: weights = weights.astype(self.dtype) return weights
def estimate_mu(self, beta): if self.penalty_start > 0: beta_ = beta[self.penalty_start:, :] else: beta_ = beta SS = 0.0 A = self.A() for i in xrange(len(A)): SS = max(SS, maths.norm(A[i].dot(beta_))) return SS
def feasible(self, beta): """Feasibility of the constraint. From the interface "Constraint". """ if self.penalty_start > 0: beta_ = beta[self.penalty_start:, :] else: beta_ = beta A = self.A() normsum = 0.0 for Ag in A: normsum += maths.norm(Ag.dot(beta_)) return normsum <= self.c
def estimate_mu(self, beta): """ Computes a "good" value of mu with respect to the given beta. From the interface "NesterovFunction". """ if self.penalty_start > 0: beta_ = beta[self.penalty_start:, :] else: beta_ = beta SS = 0.0 A = self.A() for i in xrange(len(A)): SS = max(SS, maths.norm(A[i].dot(beta_))) return np.max(np.sqrt(SS))
def f(self, beta): """ Function value. """ if self.l < consts.TOLERANCE: return 0.0 if self.penalty_start > 0: beta_ = beta[self.penalty_start:, :] else: beta_ = beta A = self.A() normsum = 0.0 for Ag in A: normsum += maths.norm(Ag.dot(beta_)) return self.l * (normsum - self.c)
def get_weights(self, shape): """Return weights that are all one. Parameters ---------- shape : int or list of ints or tuple of ints Shape of the vector to generate. The shape of the output is shape or (shape, 1) in case shape is an integer. """ if not isinstance(shape, (list, tuple)): shape = (int(shape), 1) vector = np.ones(shape) # Using a vector of ones. if self.normalise: return vector / maths.norm(vector) else: return vector
def estimate_mu(self, beta): """ Compute a "good" value of mu with respect to the given beta. Parameters ---------- beta : Numpy array (p-by-1). The primal variable at which to compute a feasible value of mu. """ if self.penalty_start > 0: beta_ = beta[self.penalty_start:, :] else: beta_ = beta SS = 0.0 A = self.A() for i in range(len(A)): SS = max(SS, maths.norm(A[i].dot(beta_))) return SS
def estimate_mu(self, beta): """ Compute a "good" value of mu with respect to the given beta. Parameters ---------- beta : Numpy array (p-by-1). The primal variable at which to compute a feasible value of mu. """ if self.penalty_start > 0: beta_ = beta[self.penalty_start:, :] else: beta_ = beta SS = 0.0 A = self.A() for i in range(len(A)): SS = np.max(SS, maths.norm(A[i].dot(beta_), axis=0)) return SS
def get_weights(self, shape): """Return weights that are all one. Parameters ---------- shape : int or list of ints or tuple of ints Shape of the vector to generate. The shape of the output is shape or (shape, 1) in case shape is an integer. """ if not isinstance(shape, (list, tuple)): shape = (int(shape), 1) vector = np.ones(shape) # Using a vector of ones. # TODO: Normalise columns when a matrix? if self.normalise: vector /= maths.norm(vector) if self.dtype is not None: vector = vector.astype(self.dtype) return vector
def proj(self, beta): """The corresponding projection operator. From the interface "ProjectionOperator". Examples -------- >>> import numpy as np >>> from parsimony.functions.penalties import L2 >>> np.random.seed(42) >>> l2 = L2(c=0.3183098861837907) >>> y1 = l2.proj(np.random.rand(100, 1) * 2.0 - 1.0) >>> np.linalg.norm(y1) 0.3183098861837908 >>> y2 = np.random.rand(100, 1) * 2.0 - 1.0 >>> l2.feasible(y2) False >>> l2.feasible(l2.proj(y2)) True """ if self.penalty_start > 0: beta_ = beta[self.penalty_start:, :] else: beta_ = beta norm = maths.norm(beta_) # Feasible? if norm <= self.c: return beta # The correction by eps is to nudge the norm just below self.c. eps = consts.FLOAT_EPSILON beta_ *= self.c / (norm + eps) proj = beta_ if self.penalty_start > 0: proj = np.vstack((beta[:self.penalty_start, :], beta_)) return proj
def prox(self, beta, factor=1.0): """The corresponding proximal operator. From the interface "ProximalOperator". """ l = self.l * factor if self.penalty_start > 0: beta_ = beta[self.penalty_start:, :] else: beta_ = beta norm = maths.norm(beta_) if norm >= l: beta_ *= (1.0 - l / norm) * beta_ else: beta_ *= 0.0 if self.penalty_start > 0: prox = np.vstack((beta[:self.penalty_start, :], beta_)) else: prox = beta_ return prox
def get_vector(self, size): """Return randomly generated vector of given shape. Parameters ---------- size : Positive integer. Size of the vector to generate. The shape of the output is (size, 1). Examples -------- >>> from parsimony.utils.start_vectors import RandomStartVector >>> start_vector = RandomStartVector(normalise=False, seed=42) >>> random = start_vector.get_vector(3) >>> print(random) [[ 0.37454012] [ 0.95071431] [ 0.73199394]] >>> >>> start_vector = RandomStartVector(normalise=False, seed=1, ... limits=(-1, 2)) >>> random = start_vector.get_vector(3) >>> print(random) [[ 0.25106601] [ 1.16097348] [-0.99965688]] """ l = float(self.limits[0]) u = float(self.limits[1]) size = int(size) vector = np.random.rand(size, 1) * (u - l) + l # Random vector. if self.normalise: return vector * (1.0 / maths.norm(vector)) else: return vector
def run(self, function, beta): # self.info.clear() if self.info_requested(Info.ok): self.info_set(Info.ok, False) if self.mu_start is None: mu = function.estimate_mu(beta) else: mu = self.mu_start # We use 2x as in Chen et al. (2012). eps = 2.0 * function.eps_max(mu) function.set_mu(self.mu_min) tmin = function.step(beta) function.set_mu(mu) if self.info_requested(Info.mu): mu = [mu] if self.info_requested(Info.time): t = [] if self.info_requested(Info.fvalue): f = [] if self.info_requested(Info.converged): self.info_set(Info.converged, False) i = 0 while True: tnew = function.step(beta) self.algorithm.set_params(step=tnew, eps=eps, max_iter=self.max_iter - self.num_iter) # self.fista_info.clear() beta = self.algorithm.run(function, beta) self.num_iter += self.algorithm.num_iter if Info.time in self.algorithm.info: tval = self.algorithm.info_get(Info.time) if Info.fvalue in self.algorithm.info: fval = self.algorithm.info_get(Info.fvalue) if self.info_requested(Info.time): t = t + tval if self.info_requested(Info.fvalue): f = f + fval old_mu = function.set_mu(self.mu_min) # Take one ISTA step for use in the stopping criterion. beta_tilde = function.prox(beta - tmin * function.grad(beta), tmin) function.set_mu(old_mu) if (1.0 / tmin) * maths.norm(beta - beta_tilde) < self.eps: if self.info_requested(Info.converged): self.info_set(Info.converged, True) break if self.num_iter >= self.max_iter: break eps = max(self.tau * eps, consts.TOLERANCE) # if eps <= consts.TOLERANCE: # break if self.info_requested(Info.mu): mu_new = max(self.mu_min, self.tau * mu[-1]) mu = mu + [mu_new] * len(fval) else: mu_new = max(self.mu_min, self.tau * mu) mu = mu_new print "eps:", eps, ", mu:", mu_new function.set_mu(mu_new) i = i + 1 if self.info_requested(Info.num_iter): self.info_set(Info.num_iter, i + 1) if self.info_requested(Info.time): self.info_set(Info.time, t) if self.info_requested(Info.fvalue): self.info_set(Info.fvalue, f) if self.info_requested(Info.mu): self.info_set(Info.mu, mu) if self.info_requested(Info.ok): self.info_set(Info.ok, True) return beta
def run(self, function, beta): # Copy the allowed info keys for FISTA. fista_info = list() for nfo in self.info_copy(): if nfo in FISTA.INFO_PROVIDED: fista_info.append(nfo) # if not self.fista_info.allows(Info.num_iter): # self.fista_info.add_key(Info.num_iter) # Create the inner algorithm. algorithm = FISTA(eps=self.eps, max_iter=self.max_iter, min_iter=self.min_iter, info=fista_info) if self.info_requested(Info.ok): self.info_set(Info.ok, False) if self.mu_start is None: mu = [function.estimate_mu(beta)] else: mu = [self.mu_start] function.set_mu(self.mu_min) tmin = function.step(beta) function.set_mu(mu[0]) max_eps = function.eps_max(mu[0]) G = min(max_eps, function.eps_opt(mu[0])) if self.info_requested(Info.time): t = [] if self.info_requested(Info.fvalue): f = [] if self.info_requested(Info.gap): Gval = [] if self.info_requested(Info.converged): self.info_set(Info.converged, False) i = 0 while True: stop = False tnew = function.step(beta) eps_plus = min(max_eps, function.eps_opt(mu[-1])) # print "current iterations: ", self.num_iter, \ # ", iterations left: ", self.max_iter - self.num_iter algorithm.set_params(step=tnew, eps=eps_plus, max_iter=self.max_iter - self.num_iter, conesta_stop=None) # conesta_stop=[self.mu_min]) # self.fista_info.clear() beta = algorithm.run(function, beta) #print "CONESTA loop", i, "FISTA=",self.fista_info[Info.num_iter], "TOT iter:", self.num_iter self.num_iter += algorithm.num_iter if Info.time in algorithm.info: tval = algorithm.info_get(Info.time) if Info.fvalue in algorithm.info: fval = algorithm.info_get(Info.fvalue) self.mu_min = min(self.mu_min, mu[-1]) tmin = min(tmin, tnew) old_mu = function.set_mu(self.mu_min) # Take one ISTA step for use in the stopping criterion. beta_tilde = function.prox(beta - tmin * function.grad(beta), tmin) function.set_mu(old_mu) if (1.0 / tmin) * maths.norm(beta - beta_tilde) < self.eps: if self.info_requested(Info.converged): self.info_set(Info.converged, True) stop = True if self.num_iter >= self.max_iter: stop = True if self.info_requested(Info.time): gap_time = utils.time_cpu() if self.dynamic: G_new = function.gap(beta, eps=eps_plus, max_iter=self.max_iter - self.num_iter) # TODO: Warn if G_new < 0. G_new = abs(G_new) # Just in case ... if G_new < G: G = G_new else: G = self.tau * G else: # Static G = self.tau * G if self.info_requested(Info.time): gap_time = utils.time_cpu() - gap_time tval[-1] += gap_time t = t + tval if self.info_requested(Info.fvalue): f = f + fval if self.info_requested(Info.gap): Gval.append(G) if (G <= consts.TOLERANCE and mu[-1] <= consts.TOLERANCE) or stop: break mu_new = min(mu[-1], function.mu_opt(G)) self.mu_min = min(self.mu_min, mu_new) if self.info_requested(Info.mu): mu = mu + [max(self.mu_min, mu_new)] * len(fval) else: mu.append(max(self.mu_min, mu_new)) function.set_mu(mu_new) i = i + 1 if self.info_requested(Info.num_iter): self.info_set(Info.num_iter, i + 1) if self.info_requested(Info.time): self.info_set(Info.time, t) if self.info_requested(Info.fvalue): self.info_set(Info.fvalue, f) if self.info_requested(Info.gap): self.info_set(Info.gap, Gval) if self.info_requested(Info.mu): self.info_set(Info.mu, mu) if self.info_requested(Info.ok): self.info_set(Info.ok, True) return beta
def run(self, X, y, beta=None): """Find the minimiser of the associated function, starting at beta. Parameters ---------- X : Numpy array, shape n-by-p. The matrix X with independent variables. y : Numpy array, shape n-by-1. The response variable y. beta : Numpy array. Optional starting point. """ if self.info_requested(Info.ok): self.info_set(Info.ok, False) if self.info_requested(Info.time): t = [] if self.info_requested(Info.fvalue): f = [] if self.info_requested(Info.converged): self.info_set(Info.converged, False) n, p = X.shape if beta is None: beta = self.start_vector.get_weights(p) else: beta = beta.copy() xTx = np.sum(X ** 2.0, axis=0) if self.mean: xTx *= 1.0 / float(n) for i in range(1, self.max_iter + 1): if self.info_requested(Info.time): tm = utils.time_cpu() # The update has an error that propagates. This resets the # approximation. We may not need to do this at every iteration. Xbeta_y = np.dot(X, beta) - y betaold = beta.copy() for j in range(p): xj = X[:, [j]] betaj = beta[j] # Solve for beta[j]. if xTx[j] < consts.TOLERANCE: # Avoid division-by-zero. bj = 0.0 else: # Intercept. S0 = np.dot(xj.T, Xbeta_y - xj * betaj)[0, 0] if self.mean: S0 /= float(n) if j < self.penalty_start: bj = -S0 / xTx[j] else: if S0 > self.l: bj = (self.l - S0) / xTx[j] elif S0 < -self.l: bj = (-self.l - S0) / xTx[j] else: bj = 0.0 Xbeta_y += xj * (bj - betaj) # Update X.beta. beta[j] = bj # Save result. if self.info_requested(Info.time): t.append(utils.time_cpu() - tm) if self.info_requested(Info.fvalue): f_ = self._f(Xbeta_y, y, beta) f.append(f_) # print "f:", f[-1] # print "err:", maths.norm(beta - betaold) if maths.norm(beta - betaold) < self.eps \ and i >= self.min_iter: if self.info_requested(Info.converged): self.info_set(Info.converged, True) # print "iterations: ", i break self.num_iter = i if self.info_requested(Info.num_iter): self.info_set(Info.num_iter, i) if self.info_requested(Info.time): self.info_set(Info.time, t) if self.info_requested(Info.fvalue): self.info_set(Info.fvalue, f) if self.info_requested(Info.ok): self.info_set(Info.ok, True) return beta
def run(self, X, y, beta=None): """Find the minimiser of the associated function, starting at beta. Parameters ---------- X : Numpy array, shape n-by-p. The matrix X with independent variables. y : Numpy array, shape n-by-1. The response variable y. beta : Numpy array. Optional starting point. """ if self.info_requested(Info.ok): self.info_set(Info.ok, False) if self.info_requested(Info.time): t = [] if self.info_requested(Info.fvalue): f = [] if self.info_requested(Info.converged): self.info_set(Info.converged, False) n, p = X.shape if beta is None: beta = self.start_vector.get_weights(p) else: beta = beta.copy() function = functions.CombinedFunction() function.add_loss(functions.losses.LinearRegression(X, y, mean=False)) function.add_prox(penalties.L1(l=self.l)) xTx = np.sum(X ** 2.0, axis=0) if self.mean: xTx *= 1.0 / float(n) for i in range(1, self.max_iter + 1): if self.info_requested(Info.time): tm = utils.time_cpu() # The update has an error that propagates. This resets the # approximation. We may not need to do this at every iteration. y_Xbeta = y - np.dot(X, beta) betaold = beta.copy() for j in range(p): xj = X[:, [j]] betaj = beta[j, 0] if xTx[j] < consts.TOLERANCE: # Avoid division-by-zero. bj = 0.0 else: bj = np.dot(xj.T, y_Xbeta + xj * betaj)[0, 0] if self.mean: bj /= float(n) if j < self.penalty_start: bj = bj / xTx[j] else: # Soft thresholding. bj = np.sign(bj) \ * max(0.0, (abs(bj) - self.l) / xTx[j]) y_Xbeta -= xj * (bj - betaj) # Update X.beta. beta[j] = bj # Save result. if self.info_requested(Info.time): t.append(utils.time_cpu() - tm) if self.info_requested(Info.fvalue): f_ = self._f(y_Xbeta, y, beta) f.append(f_) # print "err:", maths.norm(beta - betaold) if maths.norm(beta - betaold) < self.eps \ and i >= self.min_iter: if self.info_requested(Info.converged): self.info_set(Info.converged, True) # print "iterations: ", i break self.num_iter = i if self.info_requested(Info.num_iter): self.info_set(Info.num_iter, i) if self.info_requested(Info.time): self.info_set(Info.time, t) if self.info_requested(Info.fvalue): self.info_set(Info.fvalue, f) if self.info_requested(Info.ok): self.info_set(Info.ok, True) return beta
def run(self, X, start_vector=None): """Find the right-singular vector of the given sparse matrix. Parameters ---------- X : Scipy sparse array. The sparse matrix to decompose. start_vector : BaseStartVector. A start vector generator. Default is to use a random start vector. """ if self.info_requested(utils.Info.ok): self.info_set(utils.Info.ok, False) if self.info_requested(utils.Info.time): _t = utils.time() if self.info_requested(utils.Info.converged): self.info_set(utils.Info.converged, False) if start_vector is None: start_vector = weights.RandomUniformWeights(normalise=True) v0 = start_vector.get_weights(np.min(X.shape)) # determine when to use power method or scipy_sparse use_power = True if X.shape[1] >= 10 ** 3 else False if not use_power: try: if not sp.sparse.issparse(X): X = sp.sparse.csr_matrix(X) try: [_, _, v] = sparse_linalg.svds(X, k=1, v0=v0, tol=self.eps, maxiter=self.max_iter, return_singular_vectors=True) except TypeError: # For scipy 0.9.0. [_, _, v] = sparse_linalg.svds(X, k=1, tol=self.eps) v = v.T if self.info_requested(utils.Info.converged): self.info_set(utils.Info.converged, True) except ArpackNoConvergence: use_power = True if use_power: # Use the power method if scipy failed or if determined. # TODO: Use estimators for this! M, N = X.shape if M < N: K = X.dot(X.T) t = v0 for it in range(self.max_iter): t_ = t t = K.dot(t_) t *= 1.0 / maths.norm(t) crit = float(maths.norm(t_ - t)) / float(maths.norm(t)) if crit < consts.TOLERANCE: if self.info_requested(utils.Info.converged): self.info_set(utils.Info.converged, True) break v = X.T.dot(t) v *= 1.0 / maths.norm(v) else: K = X.T.dot(X) v = v0 for it in range(self.max_iter): v_ = v v = K.dot(v_) v *= 1.0 / maths.norm(v) crit = float(maths.norm(v_ - v)) / float(maths.norm(v)) if crit < consts.TOLERANCE: if self.info_requested(utils.Info.converged): self.info_set(utils.Info.converged, True) break if self.info_requested(utils.Info.time): self.info_set(utils.Info.time, utils.time() - _t) if self.info_requested(utils.Info.func_val): _f = maths.norm(X.dot(v)) # Largest singular value. self.info_set(utils.Info.func_val, _f) if self.info_requested(utils.Info.ok): self.info_set(utils.Info.ok, True) return utils.direct_vector(v)
def run(self, function, w): """Apply the algorithm to minimise function, starting at the positions of the vectors in the list w. Parameters ---------- function : MultiblockFunction The function to minimise. w : list of numpy arrays Each element of the list is the parameter vector corresponding to a block. """ # Not ok until the end. if self.info_requested(Info.ok): self.info_set(Info.ok, False) # Initialise info variables. Info variables have the prefix "_". if self.info_requested(Info.time): _t = [] if self.info_requested(Info.func_val): _f = [] if self.info_requested(Info.converged): self.info_set(Info.converged, False) w_old = [0] * len(w) it = 0 while True: for i in range(len(w)): # Wrap a function around the ith block: func = mb_losses.MultiblockFunctionWrapper(function, w, i) if hasattr(function, "at_point"): def new_at_point(self, w): return function.at_point(self.w[:self.index] + [w] + self.w[self.index + 1:]) import types func.at_point = types.MethodType(new_at_point, func) w_old[i] = w[i] self.algorithm.reset() w[i] = self.algorithm.run(func, w_old[i]) # Store info from algorithm: if self.info_requested(Info.time): time = self.algorithm.info_get(Info.time) _t.extend(time) if self.info_requested(Info.func_val): func_val = self.algorithm.info_get(Info.func_val) _f.extend(func_val) # Update iteration counts. self.num_iter += self.algorithm.num_iter # Test global stopping criterion. all_converged = True for i in range(len(w)): # Wrap a function around the ith block. func = mb_losses.MultiblockFunctionWrapper(function, w, i) # Test if converged for block i. if maths.norm(w[i] - w_old[i]) > self.eps: all_converged = False break # Converged in all blocks! if all_converged: if self.info_requested(Info.converged): self.info_set(Info.converged, True) break # Stop after maximum number of iterations. if self.num_iter >= self.max_iter: break it += 1 # Store information. if self.info_requested(Info.num_iter): self.info_set(Info.num_iter, self.num_iter) if self.info_requested(Info.time): self.info_set(Info.time, _t) if self.info_requested(Info.func_val): self.info_set(Info.func_val, _f) if self.info_requested(Info.ok): self.info_set(Info.ok, True) return w
def run(self, function, w): # self.info.clear() if self.info_requested(Info.ok): self.info_set(Info.ok, False) if self.info_requested(Info.time): t = [] if self.info_requested(Info.fvalue): f = [] if self.info_requested(Info.converged): self.info_set(Info.converged, False) # print "len(w):", len(w) # print "max_iter:", self.max_iter num_iter = [0] * len(w) for it in range(1, self.outer_iter + 1): all_converged = True for i in range(len(w)): # print "it: %d, i: %d" % (it, i) if function.has_nesterov_function(i): # print "Block %d has a Nesterov function!" % (i,) func = mb_losses.MultiblockNesterovFunctionWrapper( function, w, i) algorithm = self.conesta else: func = mb_losses.MultiblockFunctionWrapper(function, w, i) algorithm = self.fista # self.alg_info.clear() # self.algorithm.set_params(max_iter=self.max_iter - num_iter[i]) # w[i] = self.algorithm.run(func, w_old[i]) if i == 1: pass w[i] = algorithm.run(func, w[i]) if algorithm.info_requested(Info.num_iter): num_iter[i] += algorithm.info_get(Info.num_iter) if algorithm.info_requested(Info.time): tval = algorithm.info_get(Info.time) if algorithm.info_requested(Info.fvalue): fval = algorithm.info_get(Info.fvalue) if self.info_requested(Info.time): t = t + tval if self.info_requested(Info.fvalue): f = f + fval # print "l0 :", maths.norm0(w[i]), \ # ", l1 :", maths.norm1(w[i]), \ # ", l2²:", maths.norm(w[i]) ** 2.0 # print "f:", fval[-1] for i in range(len(w)): # Take one ISTA step for use in the stopping criterion. step = function.step(w, i) w_tilde = function.prox( w[:i] + [w[i] - step * function.grad(w, i)] + w[i + 1:], i, step) # func = mb_losses.MultiblockFunctionWrapper(function, w, i) # step2 = func.step(w[i]) # w_tilde2 = func.prox(w[i] - step2 * func.grad(w[i]), step2) # # print "diff:", maths.norm(w_tilde - w_tilde2) # print "err:", maths.norm(w[i] - w_tilde) * (1.0 / step) if (1.0 / step) * maths.norm(w[i] - w_tilde) > self.eps: all_converged = False break if all_converged: # print "All converged!" if self.info_requested(Info.converged): self.info_set(Info.converged, True) break # # If all blocks have used max_iter iterations, stop. # if np.all(np.asarray(num_iter) >= self.max_iter): # break # it += 1 if self.info_requested(Info.num_iter): self.info_set(Info.num_iter, num_iter) if self.info_requested(Info.time): self.info_set(Info.time, t) if self.info_requested(Info.fvalue): self.info_set(Info.fvalue, f) if self.info_requested(Info.ok): self.info_set(Info.ok, True) return w
def run(self, function, w): # Not ok until the end. if self.info_requested(Info.ok): self.info_set(Info.ok, False) # Initialise info variables. Info variables have the prefix "_". if self.info_requested(Info.time): _t = [] if self.info_requested(Info.func_val): _f = [] if self.info_requested(Info.smooth_func_val): _fmu = [] if self.info_requested(Info.converged): self.info_set(Info.converged, False) FISTA = True if FISTA: exp = 4.0 + consts.FLOAT_EPSILON else: exp = 2.0 + consts.FLOAT_EPSILON block_iter = [1] * len(w) it = 0 while True: for i in range(len(w)): # print "it: %d, i: %d" % (it, i) # if True: # pass # Wrap a function around the ith block. func = mb_losses.MultiblockFunctionWrapper(function, w, i) # Run FISTA. w_old = w[i] for k in range( 1, max(self.min_iter + 1, self.max_iter - self.num_iter + 1)): if self.info_requested(Info.time): time = utils.time_wall() if FISTA: # Take an interpolated step. z = w[i] + ((k - 2.0) / (k + 1.0)) * (w[i] - w_old) else: z = w[i] # Compute the step. step = func.step(z) # Compute inexact precision. eps = max(consts.FLOAT_EPSILON, 1.0 / (block_iter[i]**exp)) # eps = consts.TOLERANCE w_old = w[i] # Take a FISTA step. w[i] = func.prox(z - step * func.grad(z), factor=step, eps=eps) # Store info variables. if self.info_requested(Info.time): _t.append(utils.time_wall() - time) if self.info_requested(Info.func_val): _f.append(function.f(w)) if self.info_requested(Info.smooth_func_val): _fmu.append(function.fmu(w)) # Update iteration counts. self.num_iter += 1 block_iter[i] += 1 # print i, function.fmu(w), step, \ # (1.0 / step) * maths.norm(w[i] - z), self.eps, \ # k, self.num_iter, self.max_iter # Test stopping criterion. if maths.norm(w[i] - z) < step * self.eps \ and k >= self.min_iter: break # Test global stopping criterion. all_converged = True for i in range(len(w)): # Wrap a function around the ith block. func = mb_losses.MultiblockFunctionWrapper(function, w, i) # Compute the step. step = func.step(w[i]) # Compute inexact precision. eps = max(consts.FLOAT_EPSILON, 1.0 / (block_iter[i]**exp)) # eps = consts.TOLERANCE # Take one ISTA step for use in the stopping criterion. w_tilde = func.prox(w[i] - step * func.grad(w[i]), factor=step, eps=eps) # Test if converged for block i. if maths.norm(w[i] - w_tilde) > step * self.eps: all_converged = False break # Converged in all blocks! if all_converged: if self.info_requested(Info.converged): self.info_set(Info.converged, True) break # Stop after maximum number of iterations. if self.num_iter >= self.max_iter: break it += 1 # Store information. if self.info_requested(Info.num_iter): self.info_set(Info.num_iter, self.num_iter) if self.info_requested(Info.time): self.info_set(Info.time, _t) if self.info_requested(Info.func_val): self.info_set(Info.func_val, _f) if self.info_requested(Info.smooth_func_val): self.info_set(Info.smooth_func_val, _fmu) if self.info_requested(Info.ok): self.info_set(Info.ok, True) return w
def run(self, function, beta): """Find the minimiser of the given function, starting at beta. Parameters ---------- function : Function The function to minimise. beta : numpy.ndarray or list of numpy.ndarray The starting point. """ if self.info_requested(Info.ok): self.info_set(Info.ok, False) is_list = False if isinstance(beta, list): is_list = True if self.info_requested(Info.time): t = [] if self.info_requested(Info.func_val): f = [] if self.info_requested(Info.converged): self.info_set(Info.converged, False) aold = anew = 1.0 thetaold = thetanew = beta betanew = betaold = beta for i in range(1, self.max_iter + 1): if self.info_requested(Info.time): tm = utils.time_cpu() step = function.step(betanew, iteration=i) betaold = betanew thetaold = thetanew aold = anew # thetanew = betaold - step * function.grad(betaold) anew = (1.0 + np.sqrt(4.0 * aold * aold + 1.0)) / 2.0 # betanew = thetanew + (aold - 1.0) * (thetanew - thetaold) / anew grad = function.grad(betaold) acc_step = ((aold - 1.0) / anew) if not is_list: thetanew = betaold - step * grad betanew = thetanew + acc_step * (thetanew - thetaold) else: thetanew = [betaold[i] - step * grad[i] for i in range(len(betaold))] betanew = [thetanew[i] + acc_step * (thetanew[i] - thetaold[i]) for i in range(len(thetanew))] if self.info_requested(Info.time): t.append(utils.time_cpu() - tm) if self.info_requested(Info.func_val): f.append(function.f(betanew)) if not is_list: err = maths.norm(betanew - betaold) else: err = np.sqrt(np.sum([np.sum((betanew[i] - betaold[i])**2.0) for i in range(len(betanew))])) if err < self.eps and i >= self.min_iter: if self.info_requested(Info.converged): self.info_set(Info.converged, True) break if self.info_requested(Info.num_iter): self.info_set(Info.num_iter, i) if self.info_requested(Info.time): self.info_set(Info.time, t) if self.info_requested(Info.func_val): self.info_set(Info.func_val, f) if self.info_requested(Info.ok): self.info_set(Info.ok, True) return betanew
def run(self, function, beta): """Find the minimiser of the given function, starting at beta. Parameters ---------- function : parsimony.functions.properties.Function The function to minimise. beta : numpy.ndarray or list of numpy.ndarray The start point. """ if self.info_requested(Info.ok): self.info_set(Info.ok, False) is_list = False if isinstance(beta, list): is_list = True # step = function.step(beta, iteration=0) betanew = betaold = beta if self.info_requested(Info.time): t = [] if self.info_requested(Info.fvalue) \ or self.info_requested(Info.func_val): f = [] if self.info_requested(Info.converged): self.info_set(Info.converged, False) for i in range(1, self.max_iter + 1): if self.info_requested(Info.time): tm = utils.time_cpu() step = function.step(betanew, iteration=i) betaold = betanew grad = function.grad(betaold) if not is_list: betanew = betaold - step * grad else: betanew = [ betaold[i] - step * grad[i] for i in range(len(betaold)) ] if self.info_requested(Info.time): t.append(utils.time_cpu() - tm) if self.info_requested(Info.fvalue) \ or self.info_requested(Info.func_val): f.append(function.f(betanew)) if not is_list: err = maths.norm(betanew - betaold) else: err = np.sqrt( np.sum([ np.sum((betanew[i] - betaold[i])**2.0) for i in range(len(betanew)) ])) if err < self.eps and i >= self.min_iter: if self.info_requested(Info.converged): self.info_set(Info.converged, True) break self.num_iter = i if self.info_requested(Info.num_iter): self.info_set(Info.num_iter, i) if self.info_requested(Info.time): self.info_set(Info.time, t) if self.info_requested(Info.fvalue) \ or self.info_requested(Info.func_val): self.info_set(Info.fvalue, f) self.info_set(Info.func_val, f) if self.info_requested(Info.ok): self.info_set(Info.ok, True) return betanew