def _f(self, f, u, X): u, s, t = self._u_to_s_t(u) (N,) = u.shape X = np.atleast_2d(X) raise_if_not_shape('X', X, (self.num_control_points, self.dim)) R = np.empty((N, self.dim), dtype=float) for s_, i in groupby(np.argsort(s), key=lambda i: s[i]): i = list(i) R[i] = np.dot(f(t[i]), X[self._i(s_)]) return R
def _f(self, f, u, X): u, s, t = self._u_to_s_t(u) (N, ) = u.shape X = np.atleast_2d(X) raise_if_not_shape('X', X, (self.num_control_points, self.dim)) R = np.empty((N, self.dim), dtype=float) for s_, i in groupby(np.argsort(s), key=lambda i: s[i]): i = list(i) R[i] = np.dot(f(t[i]), X[self._i(s_)]) return R
def minimise(self, Y, w, lambda_, u, X, return_all=False, max_num_iterations=100, min_radius=1e-9, max_radius=1e12, initial_radius=1e4): """Minimise the sum of squared errors between the uniform B-spline specified by `X` and the positions of unstructured data points `Y`. The exact expression minimised with respect to `X` and `u` is: 0.5 * ( sum(w * (Y - M(u, X))**2) + lambda_ * R(X) ) where `M` is the uniform B-spline position function and `R` is the regularisation function (the sum of squared distances between adjacent control points). Parameters ---------- Y : float, array_like of shape = (N, dim) The matrix of data point positions. w : float, array_like of shape = (N, dim) The matrix of non-negative weights applied to each squared residual on each dimension. lambda_ : float The non-negative float that specifies the amount of regularisation. u : float, array_like of shape = (N,) The vector of initial contour correspondences. Optimally, `u[i]` is the contour coordinate that minimises the weighted squared distance between the uniform B-spline and `Y[i]`. Here, only a coarse initialisation is (typically) required. X : float, array_like of shape = (num_control_points, dim) The matrix of initial control point positions. return_all : optional, bool If True, a tuple is returned of the form `(u, X, has_converged, states, n, t)` where: `u` is the optimised vector of correspondences; `X` is the optimised matrix of control point positions; `has_converged` is True if the optimisation terminated by reaching the minimum trust region radius and False otherwise; `states` is a list of optimisation states comprising of the `u`, `X`, energy, and trust region radius after each successful optimisation step (includes the initialisation); `n` is the number of total optimisation steps; `t` is the total time taken (measured using `time.time`). Otherwise, `minimise` returns `(u, X)`. max_num_iterations : optional, int The maximum number of optimisation iterations. min_radius: optional, float The non-negative minimum trust region radius. If the trust region radius falls below this value, optimisation terminates. max_radius : optional, float The non-negative maximum trust region radius. initial_radius : optional, float The initial non-negative trust region radius. Returns ------- See `return_all`. Further Details --------------- The energy `e` to be minimised can be written as: e = 0.5 * (r(z)**2).sum() where `z` is the concatenated vector of correspondences `u` and control point positions `X` (row first), and `r` is a function which returns the vector of concatenated data point and regularisation residuals. Let `de` denote the vector of first derivatives. It is given by: de = dot(J(z).T, f(z)) where `J` is the sparse Jacobian: `J[i, j]` is the first derivative of residual `i` with respect to `z[j]`. Similarly, using `J` and `r` instead of `J(z)` and `r(z)`, the matrix of second derivatives `de2` is given by: de2 = dot(J.T, J) + sum(r[i] * H[i]) (1) where `H[i]` is the matrix of second derivatives (the "Hessian") for residual `i`. In Newton's method, the update `del_z` to minimise `e` is given by: del_z = -dot(inv(de2), de) If `de2` is not positive definite, then this update is invalid. As an alternative, a "damped" version (Levenberg's contribution) can be solved instead: del_z = -dot(inv(de2 + D), de) (2) where `D` is a diagonal matrix with entries `1 / radius` so that `de2 + D` is positive definite. For large values of `radius`, the contribution of `D` has little effect. For small values, `del_z` tends to `-radius * de2` (gradient descent). Here, 'dn' (damped Newton) computes `del_z` exactly using (2) and (1) and 'lm' (Levenberg-Marquardt) approximates `de2` by ignoring all second derivative terms. To efficiently compute (2), the sparsity of the problem is leveraged. Since `z = r_[u, X.ravel()]`, and the data residuals are ordered before the regularisation residuals, `J` is block-sparse. Deviating from the Python-like notation so far: J = |E F| | | |0 G| where `E` is block-diagonal. Similarly, `H[i]`, where `i` indexes a data point residual, is also block-sparse: H[i] = |P[i] Q[i]| | | |Q[i].T 0| where `P[i]` is diagonal. (`H[i]` for regularisation residuals is 0.) Therefore, the linear system of (2), ignoring the leading minus sign, is of the form: |E.T*E + r[i]*P[i] + Da E.T*F + r[i]*Q[i]| | dza | | a | | | * | | = | | |(E.T*F + r[i]*Q[i]).T F.T*F + G.T*G + Db| | dzb | | b | where `D` has been split into diagonal sub-blocks `Da` and `Db`, `del_z` and `de` have been partitioned into `(dza, dzb)` and `(a, b)` respectively, and summation over `i` is implicit. Expanding the above equation gives a pair of simultaneous equations in `dza` and `dzb`. Eliminating `dza`, it turns out that the only matrix inverse in the expression for `dzb` is of the upper left block above. That is, the linear system solved for `dzb` is the Schur complement of the complete system matrix. Since both `E.T * E` and `P[i]` are diagonal, this is trivial. Furthermore, the time taken to compute either a damped Newton or LM update is now linear in the number of data points. """ # Ensure that the dimensions and values of inputs are valid. w = np.atleast_2d(w) N = w.shape[0] raise_if_not_shape('w', w, (N, self._c.dim)) if np.any(w <= 0.0): raise ValueError('w <= 0.0') Y = np.atleast_2d(Y) raise_if_not_shape('Y', Y, (N, self._c.dim)) if lambda_ <= 0.0: raise ValueError('lambda_ <= 0.0 (= {})'.format(lambda_)) u = np.atleast_1d(u) raise_if_not_shape('u', u, (N, )) u = self._c.clip(u) X = np.atleast_2d(X) raise_if_not_shape('X', X, (self._c.num_control_points, self._c.dim)) # Set `_Y`, `_w`, and `_lambda` for internal evaluation methods. self._Y = Y self._w = np.sqrt(w) self._lambda = np.sqrt(lambda_) # `G` is constant and depends only on `_lambda`. G = self._G() # Set internal variables for `_accept_step` and `_reject_step`. self._min_radius = max(0.0, min_radius) self._max_radius = max(self._min_radius, max_radius) self._radius = max(self._min_radius, min(initial_radius, self._max_radius)) self._decrease_factor = 2.0 # Set `save_state`. if return_all: states = [] def save_state(u, X, *args): states.append((u.copy(), X.copy()) + args) else: def save_state(*args): pass save_state(u, X, self._e(u, X), self._radius) # Use `d` for dimension of the problem (convenience). d = self._c.dim t0 = time() update_schur_components, has_converged = True, False for i in range(max_num_iterations): if self._radius <= self._min_radius: # Terminate if the trust region radius is too small. has_converged = True break # Compute a damped Newton or Levenberg-Marquardt step depending on # `_solver_type`. if update_schur_components: # Error and residual components. e, (ra, rb, r) = self._e(u, X, return_all=True) # First derivatives. # The actual E is a block-diagonal matrix of `N` blocks, each # of shape `(dim, 1)` (where `N = u.shape[0]`). # Here, `E` is a list of length `N`, where `E[i]` is a vector # for the `i`th block and is of shape `(dim,)`. E, F = self._E(u, X), self._F(u) # Set (partially) the Schur diagonal. D_EtE_rP = (E * E).sum(axis=1) # Set the Schur upper right block. EtF_rQ = np.empty((N, F.shape[1])) for i in range(N): EtF_rQ[i] = np.dot(E[i], F[d * i:d * (i + 1)]) # For damped Newton, add the second and mixed derivative terms # to `D_EtE_rP` and `EtF_rQ`. if self._solver_type == 'dn': # Second derivatives. # `P` is the same dimensions as `E`. P, Q = self._P(u, X), self._Q(u) D_EtE_rP += (P * ra.reshape(-1, d)).sum(axis=1) for i in range(N): EtF_rQ[i] += np.dot(ra[d * i:d * (i + 1)], Q[d * i:d * (i + 1)]) # Set the Schur lower left block. FtE_rQ = EtF_rQ.T # Set (partially) the Schur lower right block. S0 = np.dot(F.T, F) + np.dot(G.T, G) # Set the Schur right-hand side components (a = Et * ra). a = (E * ra.reshape(-1, d)).sum(axis=1) b = np.dot(F.T, ra) + np.dot(G.T, rb) # `D` is the vector of the inverse of the complete Schur diagonal. D = 1.0 / (D_EtE_rP + 1.0 / self._radius) # Solve the Schur reduced system for `delta_u` and `delta_X`. S = (S0 + np.diag([1.0 / self._radius] * S0.shape[0]) - np.dot(FtE_rQ, D[:, np.newaxis] * EtF_rQ)) try: c_and_lower = scipy.linalg.cho_factor(S) except scipy.linalg.LinAlgError: # Step is invalid. self._reject_step() update_schur_components = False continue t = b - np.dot(FtE_rQ, D * a) v1 = scipy.linalg.cho_solve(c_and_lower, t) v0 = D * (a - np.dot(EtF_rQ, v1)) delta_u = -v0 delta_X = -v1.reshape(-1, d) # Evaluate the change in energy as expected by the quadratic # approximation. # For `solver_type == 'lm'`, `D_EtE_rP` and `EtF_rQ` do not contain # the second and mixed derivative terms so the following is OK # although could be done (slightly) more efficiently. Jdelta = np.r_[(E * delta_u[:, np.newaxis]).ravel() + np.dot(F, delta_X.ravel()), np.dot(G, delta_X.ravel())] Hdelta = np.r_[D_EtE_rP * delta_u + np.dot(EtF_rQ, delta_X.ravel()), np.dot(EtF_rQ.T, delta_u) + np.dot(S0, delta_X.ravel())] model_e_decrease = -(np.dot(r, Jdelta) + 0.5 * np.dot( np.r_[delta_u, delta_X.ravel()], Hdelta)) assert model_e_decrease >= 0.0 # Evaluate the updated coordinates `u1` and control points `X1`. u1 = self._c.clip(u + delta_u) X1 = X + delta_X # Accept the updates if the energy has decreased and reject it # otherwise. Also update the trust region radius depending on how # well the quadratic approximation modelled the change in energy. e1 = self._e(u1, X1) step_quality = (e - e1) / model_e_decrease if step_quality > 0: save_state(u1, X1, e1, self._radius) self._accept_step(step_quality) e, u, X = e1, u1, X1 update_schur_components = True else: self._reject_step() update_schur_components = False t1 = time() return ((u, X, has_converged, states, i, t1 - t0) if return_all else (u, X))
def minimise(self, Y, w, lambda_, u, X, return_all=False, max_num_iterations=100, min_radius=1e-9, max_radius=1e12, initial_radius=1e4): """Minimise the sum of squared errors between the uniform B-spline specified by `X` and the positions of unstructured data points `Y`. The exact expression minimised with respect to `X` and `u` is: 0.5 * ( sum(w * (Y - M(u, X))**2) + lambda_ * R(X) ) where `M` is the uniform B-spline position function and `R` is the regularisation function (the sum of squared distances between adjacent control points). Parameters ---------- Y : float, array_like of shape = (N, dim) The matrix of data point positions. w : float, array_like of shape = (N, dim) The matrix of non-negative weights applied to each squared residual on each dimension. lambda_ : float The non-negative float that specifies the amount of regularisation. u : float, array_like of shape = (N,) The vector of initial contour correspondences. Optimally, `u[i]` is the contour coordinate that minimises the weighted squared distance between the uniform B-spline and `Y[i]`. Here, only a coarse initialisation is (typically) required. X : float, array_like of shape = (num_control_points, dim) The matrix of initial control point positions. return_all : optional, bool If True, a tuple is returned of the form `(u, X, has_converged, states, n, t)` where: `u` is the optimised vector of correspondences; `X` is the optimised matrix of control point positions; `has_converged` is True if the optimisation terminated by reaching the minimum trust region radius and False otherwise; `states` is a list of optimisation states comprising of the `u`, `X`, energy, and trust region radius after each successful optimisation step (includes the initialisation); `n` is the number of total optimisation steps; `t` is the total time taken (measured using `time.time`). Otherwise, `minimise` returns `(u, X)`. max_num_iterations : optional, int The maximum number of optimisation iterations. min_radius: optional, float The non-negative minimum trust region radius. If the trust region radius falls below this value, optimisation terminates. max_radius : optional, float The non-negative maximum trust region radius. initial_radius : optional, float The initial non-negative trust region radius. Returns ------- See `return_all`. Further Details --------------- The energy `e` to be minimised can be written as: e = 0.5 * (r(z)**2).sum() where `z` is the concatenated vector of correspondences `u` and control point positions `X` (row first), and `r` is a function which returns the vector of concatenated data point and regularisation residuals. Let `de` denote the vector of first derivatives. It is given by: de = dot(J(z).T, f(z)) where `J` is the sparse Jacobian: `J[i, j]` is the first derivative of residual `i` with respect to `z[j]`. Similarly, using `J` and `r` instead of `J(z)` and `r(z)`, the matrix of second derivatives `de2` is given by: de2 = dot(J.T, J) + sum(r[i] * H[i]) (1) where `H[i]` is the matrix of second derivatives (the "Hessian") for residual `i`. In Newton's method, the update `del_z` to minimise `e` is given by: del_z = -dot(inv(de2), de) If `de2` is not positive definite, then this update is invalid. As an alternative, a "damped" version (Levenberg's contribution) can be solved instead: del_z = -dot(inv(de2 + D), de) (2) where `D` is a diagonal matrix with entries `1 / radius` so that `de2 + D` is positive definite. For large values of `radius`, the contribution of `D` has little effect. For small values, `del_z` tends to `-radius * de2` (gradient descent). Here, 'dn' (damped Newton) computes `del_z` exactly using (2) and (1) and 'lm' (Levenberg-Marquardt) approximates `de2` by ignoring all second derivative terms. To efficiently compute (2), the sparsity of the problem is leveraged. Since `z = r_[u, X.ravel()]`, and the data residuals are ordered before the regularisation residuals, `J` is block-sparse. Deviating from the Python-like notation so far: J = |E F| | | |0 G| where `E` is block-diagonal. Similarly, `H[i]`, where `i` indexes a data point residual, is also block-sparse: H[i] = |P[i] Q[i]| | | |Q[i].T 0| where `P[i]` is diagonal. (`H[i]` for regularisation residuals is 0.) Therefore, the linear system of (2), ignoring the leading minus sign, is of the form: |E.T*E + r[i]*P[i] + Da E.T*F + r[i]*Q[i]| | dza | | a | | | * | | = | | |(E.T*F + r[i]*Q[i]).T F.T*F + G.T*G + Db| | dzb | | b | where `D` has been split into diagonal sub-blocks `Da` and `Db`, `del_z` and `de` have been partitioned into `(dza, dzb)` and `(a, b)` respectively, and summation over `i` is implicit. Expanding the above equation gives a pair of simultaneous equations in `dza` and `dzb`. Eliminating `dza`, it turns out that the only matrix inverse in the expression for `dzb` is of the upper left block above. That is, the linear system solved for `dzb` is the Schur complement of the complete system matrix. Since both `E.T * E` and `P[i]` are diagonal, this is trivial. Furthermore, the time taken to compute either a damped Newton or LM update is now linear in the number of data points. """ # Ensure that the dimensions and values of inputs are valid. w = np.atleast_2d(w) N = w.shape[0] raise_if_not_shape('w', w, (N, self._c.dim)) if np.any(w <= 0.0): raise ValueError('w <= 0.0') Y = np.atleast_2d(Y) raise_if_not_shape('Y', Y, (N, self._c.dim)) if lambda_ <= 0.0: raise ValueError('lambda_ <= 0.0 (= {})'.format(lambda_)) u = np.atleast_1d(u) raise_if_not_shape('u', u, (N,)) u = self._c.clip(u) X = np.atleast_2d(X) raise_if_not_shape('X', X, (self._c.num_control_points, self._c.dim)) # Set `_Y`, `_w`, and `_lambda` for internal evaluation methods. self._Y = Y self._w = np.sqrt(w) self._lambda = np.sqrt(lambda_) # `G` is constant and depends only on `_lambda`. G = self._G() # Set internal variables for `_accept_step` and `_reject_step`. self._min_radius = max(0.0, min_radius) self._max_radius = max(self._min_radius, max_radius) self._radius = max(self._min_radius, min(initial_radius, self._max_radius)) self._decrease_factor = 2.0 # Set `save_state`. if return_all: states = [] def save_state(u, X, *args): states.append((u.copy(), X.copy()) + args) else: def save_state(*args): pass save_state(u, X, self._e(u, X), self._radius) # Use `d` for dimension of the problem (convenience). d = self._c.dim t0 = time() update_schur_components, has_converged = True, False for i in range(max_num_iterations): if self._radius <= self._min_radius: # Terminate if the trust region radius is too small. has_converged = True break # Compute a damped Newton or Levenberg-Marquardt step depending on # `_solver_type`. if update_schur_components: # Error and residual components. e, (ra, rb, r) = self._e(u, X, return_all=True) # First derivatives. # The actual E is a block-diagonal matrix of `N` blocks, each # of shape `(dim, 1)` (where `N = u.shape[0]`). # Here, `E` is a list of length `N`, where `E[i]` is a vector # for the `i`th block and is of shape `(dim,)`. E, F = self._E(u, X), self._F(u) # Set (partially) the Schur diagonal. D_EtE_rP = (E * E).sum(axis=1) # Set the Schur upper right block. EtF_rQ = np.empty((N, F.shape[1])) for i in range(N): EtF_rQ[i] = np.dot(E[i], F[d * i: d * (i + 1)]) # For damped Newton, add the second and mixed derivative terms # to `D_EtE_rP` and `EtF_rQ`. if self._solver_type == 'dn': # Second derivatives. # `P` is the same dimensions as `E`. P, Q = self._P(u, X), self._Q(u) D_EtE_rP += (P * ra.reshape(-1, d)).sum(axis=1) for i in range(N): EtF_rQ[i] += np.dot(ra[d * i: d * (i + 1)], Q[d * i: d * (i + 1)]) # Set the Schur lower left block. FtE_rQ = EtF_rQ.T # Set (partially) the Schur lower right block. S0 = np.dot(F.T, F) + np.dot(G.T, G) # Set the Schur right-hand side components (a = Et * ra). a = (E * ra.reshape(-1, d)).sum(axis=1) b = np.dot(F.T, ra) + np.dot(G.T, rb) # `D` is the vector of the inverse of the complete Schur diagonal. D = 1.0 / (D_EtE_rP + 1.0 / self._radius) # Solve the Schur reduced system for `delta_u` and `delta_X`. S = (S0 + np.diag([1.0 / self._radius] * S0.shape[0]) - np.dot(FtE_rQ, D[:, np.newaxis] * EtF_rQ)) try: c_and_lower = scipy.linalg.cho_factor(S) except scipy.linalg.LinAlgError: # Step is invalid. self._reject_step() update_schur_components = False continue t = b - np.dot(FtE_rQ, D * a) v1 = scipy.linalg.cho_solve(c_and_lower, t) v0 = D * (a - np.dot(EtF_rQ, v1)) delta_u = -v0 delta_X = -v1.reshape(-1, d) # Evaluate the change in energy as expected by the quadratic # approximation. # For `solver_type == 'lm'`, `D_EtE_rP` and `EtF_rQ` do not contain # the second and mixed derivative terms so the following is OK # although could be done (slightly) more efficiently. Jdelta = np.r_[ (E * delta_u[:, np.newaxis]).ravel() + np.dot(F, delta_X.ravel()), np.dot(G, delta_X.ravel()) ] Hdelta = np.r_[ D_EtE_rP * delta_u + np.dot(EtF_rQ, delta_X.ravel()), np.dot(EtF_rQ.T, delta_u) + np.dot(S0, delta_X.ravel()) ] model_e_decrease = -(np.dot(r, Jdelta) + 0.5 * np.dot(np.r_[delta_u, delta_X.ravel()], Hdelta)) assert model_e_decrease >= 0.0 # Evaluate the updated coordinates `u1` and control points `X1`. u1 = self._c.clip(u + delta_u) X1 = X + delta_X # Accept the updates if the energy has decreased and reject it # otherwise. Also update the trust region radius depending on how # well the quadratic approximation modelled the change in energy. e1 = self._e(u1, X1) step_quality = (e - e1) / model_e_decrease if step_quality > 0: save_state(u1, X1, e1, self._radius) self._accept_step(step_quality) e, u, X = e1, u1, X1 update_schur_components = True else: self._reject_step() update_schur_components = False t1 = time() return ((u, X, has_converged, states, i, t1 - t0) if return_all else (u, X))