def get_machine_parameter(i): if data_type == np.float32: if i is 1: return data_type(1.19209290e-07) elif i is 2: return data_type(1.17549435e-38) else: return data_type(3.40282347e+38) elif data_type == np.float64: if i is 1: return data_type(2.2204460492503131e-16) elif i is 2: return data_type(2.2250738585072014e-308) else: return data_type(1.7976931348623157e+308)
def lm_lambda(n, r, ldr, ipvt, diag, qtb, delta, lam): """ Solves the sub-problem in the levenberg-marquardt algorithm. By using the trust region framework, the L-M algorithm can be regarded as solving a set of minimization problems: 2 min || J * p + r ||_2 s.t. || D * p || <= Delta p By introducing a parameter lambda into this sub-problem, the constrained optimization problem can be converted to an unconstrained optimization problem: || / J \ / r \ || min || | | p + | | || p || \ sqrt(lambda) * D / \ 0 / || This routine determines the value lambda and as a by-product, it gives a nearly exact solution to the minimization problem Let a = J, d = D, b = -r, x = p, we denoted the optimization problem as : || / a \ / b \ || min || | | x - | | || x || \ sqrt(lambda) * d / \ 0 / || Parameters ---------- n: int a positive integer input variable set to the order of r r: ndarray an n by n array. on input the full upper triangle must contain the full upper triangle of the matrix r. on output the full upper triangle is unaltered, and the strict lower triangle contains the strict upper triangle (transposed) of the upper triangular matrix s such that t t 2 t P *(J * J + lambda * D ) * P = s * s ldr: int a positive integer input variable not less than n which specifies the leading dimension of the array r ipvt: ndarray an integer input array of length n which defines the permutation matrix p such that a*p = q*r. column j of p is column ipvt(j) of the identity matrix diag: ndarray an input array of length n which must contain the diagonal elements of the matrix D qtb: ndarray an input array of length n which must contain the first n elements of the vector (q transpose)*b delta: float a positive input variable which specifies an upper bound on the euclidean norm of D*x lam: float a non-negative variable containing an initial estimate of the levenberg-marquardt parameter. Returns ------- lam: float final estimate x: ndarray an output array of length n which contains the least squares solution of the system J*x = r, sqrt(lam)*D*x = 0, for the output lam sdiag: ndarray an output array of length n which contains the diagonal elements of the upper triangular matrix s """ # region : Initialize parameters # ---------------------------------------- global p1, p001, dwarf global wa1, wa2, x, sdiag if wa1 is None or wa1.size is not n: wa1 = np.zeros(n, data_type) if wa2 is None or wa2.size is not n: wa2 = np.zeros(n, data_type) if x is None or x.size is not n: x = np.zeros(n, data_type) if sdiag is None or sdiag.size is not n: sdiag = np.zeros(n, data_type) # ---------------------------------------- # endregion : Initialize parameters # region : Compute Gauss-Newton direction # ------------------------------------------ # :: stored in x. If the jacobian is rank-deficient, # obtain a least squares solution : # :: t # :: R * P * x = -qtb nsing = n for j in range(n): wa1[j] = qtb[j] # if np.abs(r[j + j * ldr]) < 1e-8: r[j + j * ldr] = 0.0 if r[j + j * ldr] == 0.0 and nsing is n: nsing = j if nsing < n: wa1[j] = 0.0 # :: solving R * z = qtb using back substitution if nsing >= 1: for k in range(1, nsing + 1): # :: wa1[j] - z[j+1]*r[j][j+1] -...- z[n]*r[j][n] # :: z[j] = ---------------------------------------------- # :: r[j][j] j = nsing - k wa1[j] /= r[j + j * ldr] temp = wa1[j] if j >= 1: for i in range(j): wa1[i] -= r[i + j * ldr] * temp # if abs(wa1[i]) > 1e5: # aaa = 1 # :: t # :: x = P * z for j in range(n): l = ipvt[j] - 1 x[l] = wa1[j] if utility.lam_trace: print ">> ||p^{GN}|| = %.10f" % enorm(x) # ------------------------------------------ # endregion : Compute Gauss-Newton direction # region : Preparation # ------------------------------------------------ # > initialize the iteration counter iter = 0 # > evaluate the function at the origin, and test # for acceptance of the gauss-newton direction for j in range(n): wa2[j] = diag[j] * x[j] dxnorm = enorm(wa2) # :: ||x||_2 = Delta + epsilon is acceptable fp = dxnorm - delta if fp <= p1 * delta: lam = data_type(0.0) return [lam, x, sdiag] # ------------------------------------------------ # endregion : Preparation # region : Set bound # TODO: make comments # :: f(lam) = || D * x ||_2 - delta # A root-finding Newton's method will be performed # :: If the jacobian is not rank deficient, the newton step provides # a lower bound, lam_l, for the zero of the function. # Otherwise set this bound to zero lam_l = data_type(0.0) if nsing >= n: for j in range(n): l = ipvt[j] - 1 # :: wa2 stores D * x in which x is gauss-newton direction wa1[j] = diag[l] * (wa2[l] / dxnorm) # :: wa1 stores ... for j in range(n): sum = data_type(0.0) if j >= 1: for i in range(j): sum += r[i + j * ldr] * wa1[i] wa1[j] = (wa1[j] - sum) / r[j + j * ldr] temp = enorm(wa1) lam_l = fp / delta / temp / temp # > calculate an upper bound, lam_u, for the zero of the function for j in range(n): sum = 0.0 for i in range(j + 1): sum += r[i + j * ldr] * qtb[i] l = ipvt[j] - 1 wa1[j] = sum / diag[l] gnorm = enorm(wa1) lam_u = gnorm / delta if lam_u == 0.0: lam_u = dwarf / min(delta, p1) # > if the input lam lies outside of the interval (lam_l, lam_u) # set lam to the closer endpoint lam = max(lam, lam_l) lam = min(lam, lam_u) if lam == 0.0: lam = gnorm / dxnorm # endregion : Set bound # region : Iteration while True: iter += 1 if utility.lam_trace: print '>> Step %d, lam ∈(%.8f, %.8f):' % (iter, lam_l, lam_u) # > evaluate the function at the current value of lam if lam == 0.0: d1 = dwarf d2 = p001 * lam_u lam = max(d1, d2) temp = np.sqrt(lam) for j in range(n): wa1[j] = temp * diag[j] if utility.lam_trace: print ' lam = %.8f' % lam qr_solve(n, r, ldr, ipvt, wa1, qtb, x, sdiag) for j in range(n): wa2[j] = diag[j] * x[j] dxnorm = enorm(wa2) temp = fp fp = dxnorm - delta if utility.lam_trace: print ' Dx - delta = %.8f' % fp # > if the function is small enough, accept the current value # of lam. also test for the exceptional cases where lam_l # is zero or the number of iterations has reached 10 if np.abs(fp) <= p1 * delta \ or (lam_l == 0.0 and fp <= temp and temp < 0.0) \ or iter is 10: return [lam, x, sdiag] # > compute the newton correction # :: # :: / ||d*x|| \2 ||d*x|| - delta # :: lam_c = | -------- | ----------------- # :: \ ||y|| / delta # :: t # :: r * y = x, fp = ||d*x|| - delta for j in range(n): l = ipvt[j] - 1 wa1[j] = diag[l] * (wa2[l] / dxnorm) for j in range(n): wa1[j] /= sdiag[j] temp = wa1[j] if n > j + 1: for i in range(j + 1, n): wa1[i] -= r[i + j * ldr] * temp temp = enorm(wa1) lam_c = fp / delta / temp / temp # > depending on the sign of the function, update lam_l or lam_u if fp > 0.0: lam_l = max(lam_l, lam) if fp < 0.0: lam_u = min(lam_u, lam) # > compute an improved estimate for lam d1 = lam_l d2 = lam + lam_c lam = max(d1, d2) # endregion : Iteration return [lam, x, sdiag]
# Created: June 20, 2016 # Author: William Ro # ######################################################################## import numpy as np from enorm import euclid_norm as enorm from dpmpar import get_machine_parameter as dpmpar from qrsolv import qr_solve from utility import data_type import utility # region: Module Parameters p1 = data_type(0.1) p001 = data_type(0.001) dwarf = dpmpar(2) wa1 = None wa2 = None x = None sdiag = None # endregion: Module Parameters def lm_lambda(n, r, ldr, ipvt, diag, qtb, delta, lam): """ Solves the sub-problem in the levenberg-marquardt algorithm.
def euclid_norm(x): """ Given an n-vector x, this function calculates the euclidean norm of x The euclidean norm is computed by accumulating the sum of squares in three different sums. The sums of squares for the small and large components are scaled so that no overflows occur. Non-destructive underflows are permitted. Underflows and overflows do not occur in the computation of the unscaled sum of squares for the intermediate components. The definitions of small, intermediate and large components depend on two constants, rdwarf and rgiant. The main restrictions on these constants are that rdwarf**2 not underflow and rgiant**2 not overflow. The constants given here are suitable for every known computer""" # > initialize parameters global rdwarf, rgiant n = x.size s1 = data_type(0.0) s2 = data_type(0.0) s3 = data_type(0.0) x1max = data_type(0.0) x3max = data_type(0.0) agiant = rgiant / n # > calculate sums for i in range(n): xabs = np.abs(x[i]) if xabs >= agiant: # :: sum for large components if xabs > x1max: # > compute 2nd power d1 = x1max / xabs s1 = 1.0 + s1 * (d1 * d1) x1max = xabs else: # > compute 2nd power d1 = xabs / x1max s1 += d1 * d1 elif xabs <= rdwarf: # :: sum for small components if xabs > x3max: # > compute 2nd power d1 = x3max / xabs s3 = 1.0 + s3 * (d1 * d1) x3max = xabs elif xabs != 0.0: # > compute 2nd power d1 = xabs / x3max s3 += d1 * d1 else: # :: sum for intermediate components s2 += xabs * xabs # > calculate norm if s1 != 0: ret_val = x1max * np.sqrt(s1 + (s2 / x1max) / x1max) elif s2 != 0: if s2 >= x3max: ret_val = np.sqrt( s2 * (1.0 + (x3max / s2) * (x3max * s3))) else: ret_val = np.sqrt( x3max * ((s2 / x3max) + (x3max * s3))) else: ret_val = x3max * np.sqrt(s3) return ret_val
def qr_solve(n, r, ldr, ipvt, diag, qtb, x, sdiag): """ Solves the linear least square problem: || / a \ / b \ || 2 min || | | x - | | || x || \ d / \ 0 / || 2 in which a is an m by n matrix, d is an n by n diagonal matrix, b is an m-vector. The necessary information must be provided: (1) the q-r factorization with column pivoting of a: a * p = q * r t (2) q * b With these information, we have t t / q \ / a \ / r * p \ | | | | = | 0 | \ i / \ d / \ d / This routine uses a set of givens transformation to convert the right-most matrix to an upper triangular matrix and then use back substitution to obtain the solution Parameters ---------- n: int a positive integer input variable set to the order of r r: ndarray is an n by n array. on input the full upper triangle must contain the full upper triangle of the matrix r. on output the full upper triangle is unaltered, and the strict lower triangle contains the strict upper triangle (transposed) of the upper triangular matrix s ldr: int a positive integer input variable not less than n which specifies the leading dimension of the array r ipvt: ndarray an integer input array of length n which defines the permutation matrix p such that a*p = q*r. column j of p is column ipvt(j) of the identity matrix diag: ndarray an input array of length n which must contain the diagonal elements of the matrix d qtb: ndarray an input array of length n which must contain the first n elements of the vector (q transpose)*b x: ndarray an output array of length n which contains the least squares solution of the system a*x = b, d*x = 0 sdiag: ndarray an output array of length n which contains the diagonal elements of the upper triangular matrix s satisfies t t t p * (a * a + d * d) * p = s * s In effect, s is the Cholesky factorization of the left matrix """ # region : Initialize parameters global wa, p5, p25 if wa is None or wa.size is not n: wa = np.zeros(n, data_type) # endregion : Initialize parameters # region : Preparation # ---------------------------- # > copy r and qtb to preserve input and initialize s # in particular, save the diagonal elements of r in x for j in range(n): for i in range(j, n): r[i + j * ldr] = r[j + i * ldr] x[j] = r[j + j * ldr] wa[j] = qtb[j] aa = 1 # ---------------------------- # endregion : Preparation # region : Givens rotation # --------------------------- # > eliminate the diagonal matrix d using a givens rotation # :: t _ t # :: n by n / r * p \ / r * p \ # :: (m - n) by n | 0 | = q_g * | 0 | # :: n by n \ d / \ 0 / for j in range(n): # > prepare the row of d to be eliminated, locating the # diagonal element using p from the qr factorization. l = ipvt[j] - 1 if diag[l] != 0.0: # :: sdiag[l : n] stores the row j in r temporarily for k in range(j, n): sdiag[k] = 0 sdiag[j] = diag[l] # :: the transformations to eliminate the row of d # modify only a single element of qtb beyond the # first n, which is initially zero. qtbpj = 0.0 for k in range(j, n): # > determine a givens rotation which eliminates the # appropriate element in the current row of d if sdiag[k] != 0.0: if np.abs(r[k + k * ldr]) < np.abs(sdiag[k]): cotan = r[k + k * ldr] / sdiag[k] sin = p5 / np.sqrt(p25 + p25 * (cotan * cotan)) cos = sin * cotan else: tan = sdiag[k] / r[k + k * ldr] cos = p5 / np.sqrt(p25 + p25 * (tan * tan)) sin = cos * tan # > compute the modified diagonal element of r and # the modified element of (qtb, 0)^t temp = cos * wa[k] + sin * qtbpj qtbpj = -sin * wa[k] + cos * qtbpj wa[k] = temp # > transform the row of s r[k + k * ldr] = cos * r[k + k * ldr] + \ sin * sdiag[k] if n > k + 1: for i in range(k + 1, n): temp = cos * r[i + k * ldr] + sin * sdiag[i] sdiag[i] = -sin * r[i + k * ldr] + \ cos * sdiag[i] r[i + k * ldr] = temp # > store the diagonal element of s and restore the # corresponding diagonal element of r sdiag[j] = r[j + j * ldr] r[j + j * ldr] = x[j] # > solve the triangular system for z. if the system is singular, # then obtain a least squares solution # t # :: r * z = qtb, z = p * x and qtb is stored in wa nsing = n for j in range(n): if sdiag[j] == 0.0 and nsing is n: nsing = j if nsing < n: wa[j] = 0.0 if nsing >= 1: # > use back substitution for k in range(1, nsing + 1): j = nsing - k sum = data_type(0) if nsing > j + 1: for i in range(j + 1, nsing): sum += r[i + j * ldr] * wa[i] wa[j] = (wa[j] - sum) / sdiag[j] # > permute the components of z back to components of x # --------------------------- # endregion : Givens rotation for j in range(n): l = ipvt[j] - 1 x[l] = wa[j] return
######################################################################## # # Created: June 21, 2016 # Author: William Ro # ######################################################################## import numpy as np from utility import data_type # region : Module parameters p5 = data_type(0.5) p25 = data_type(0.25) wa = None # endregion : Module parameters def qr_solve(n, r, ldr, ipvt, diag, qtb, x, sdiag): """ Solves the linear least square problem: || / a \ / b \ || 2 min || | | x - | | || x || \ d / \ 0 / || 2 in which a is an m by n matrix, d is an n by n diagonal matrix, b is an m-vector. The necessary information must be provided: (1) the q-r factorization with column pivoting of a:
def euclid_norm(x): """ Given an n-vector x, this function calculates the euclidean norm of x The euclidean norm is computed by accumulating the sum of squares in three different sums. The sums of squares for the small and large components are scaled so that no overflows occur. Non-destructive underflows are permitted. Underflows and overflows do not occur in the computation of the unscaled sum of squares for the intermediate components. The definitions of small, intermediate and large components depend on two constants, rdwarf and rgiant. The main restrictions on these constants are that rdwarf**2 not underflow and rgiant**2 not overflow. The constants given here are suitable for every known computer""" # > initialize parameters global rdwarf, rgiant n = x.size s1 = data_type(0.0) s2 = data_type(0.0) s3 = data_type(0.0) x1max = data_type(0.0) x3max = data_type(0.0) agiant = rgiant / n # > calculate sums for i in range(n): xabs = np.abs(x[i]) if xabs >= agiant: # :: sum for large components if xabs > x1max: # > compute 2nd power d1 = x1max / xabs s1 = 1.0 + s1 * (d1 * d1) x1max = xabs else: # > compute 2nd power d1 = xabs / x1max s1 += d1 * d1 elif xabs <= rdwarf: # :: sum for small components if xabs > x3max: # > compute 2nd power d1 = x3max / xabs s3 = 1.0 + s3 * (d1 * d1) x3max = xabs elif xabs != 0.0: # > compute 2nd power d1 = xabs / x3max s3 += d1 * d1 else: # :: sum for intermediate components s2 += xabs * xabs # > calculate norm if s1 != 0: ret_val = x1max * np.sqrt(s1 + (s2 / x1max) / x1max) elif s2 != 0: if s2 >= x3max: ret_val = np.sqrt(s2 * (1.0 + (x3max / s2) * (x3max * s3))) else: ret_val = np.sqrt(x3max * ((s2 / x3max) + (x3max * s3))) else: ret_val = x3max * np.sqrt(s3) return ret_val
def qr(m, n, a, lda, pivot): """ Uses householder transformations with column pivoting (optional) to compute a QR factorization of the m by n matrix a Parameters ---------- m: int a positive integer input variable set to the number of rows of a n: int a positive integer input variable set to the number of columns of a a: ndarray an m by n array. on input a contains the matrix for which the qr factorization is to be computed. on output the strict upper trapezoidal part of a contains the strict upper trapezoidal part of r, and the lower trapezoidal part of a contains a factored form of q (the non-trivial elements of the u vectors described above) lda: int a positive integer input variable not less than m which specifies the leading dimension of the array a pivot: bool a logical input variable. if pivot is set true, then column pivoting is enforced. if pivot is set false, then no column pivoting is done Returns ------- ipvt: ndarray an integer output array. ipvt defines the permutation matrix p such that a*p = q*r. column j of p is column ipvt(j) of the identity matrix. if pivot is false, ipvt will be set to None rdiag: ndarray an output array of length n which contains the diagonal elements of r acnorm: ndarray an output array of length n which contains the norms of the corresponding columns of the input matrix a. if this information is not needed, then acnorm can coincide with rdiag """ # region : Initialize parameters # ---------------------------------------- global p05, eps_machine, ipvt, rdiag, acnorm, wa if ipvt is None or ipvt.size is not n: ipvt = np.zeros(n, np.int32) if rdiag is None or rdiag.size is not n: rdiag = np.zeros(n, data_type) if acnorm is None or acnorm.size is not n: acnorm = np.zeros(n, data_type) if wa is None or wa.size is not n: wa = np.zeros(n, data_type) # ---------------------------------------- # endregion : Initialize parameters # > compute the initial column norms and initialize several arrays for j in range(n): acnorm[j] = enorm(a[lda * j:lda * (j + 1)]) rdiag[j] = acnorm[j] wa[j] = rdiag[j] if pivot: ipvt[j] = j + 1 # > reduce a to r with householder transformations min_mn = min(m, n) for j in range(min_mn): # > if pivot # -------------------------------------------------------- if pivot: # >> bring the column of largest norm # into the pivot position k_max = j for k in range(j, n): if rdiag[k] > rdiag[k_max]: k_max = k # >> switch if k_max is not j: for i in range(m): # traverse rows # >>> switch temp = a[i + j * lda] a[i + j * lda] = a[i + k_max * lda] a[i + k_max * lda] = temp # >>> overwrite, acnorm[k_max] still hold rdiag[k_max] = rdiag[j] wa[k_max] = wa[j] # >>> switch k = ipvt[j] ipvt[j] = ipvt[k_max] ipvt[k_max] = k # > compute the householder transformation to reduce the # j-th column of a to a multiple of the j-th unit vector # ------------------------ # >> normalize # :: v = x - ||x||_2 * e_1 # :: ajnorm = ||x||_2 ajnorm = enorm(a[lda * j + j:lda * (j + 1)]) if ajnorm != 0.0: if a[j + j * lda] < 0.0: # :: prepare to keep a[i + j * lda] positive ajnorm = -ajnorm # :: x = sgn(x_1) * x / ||x||_2 for i in range(j, m): a[i + j * lda] /= ajnorm # :: a[j + j * lda] temporarily stores v[0] # :: one number being subtracted from another close number # has been avoided a[j + j * lda] += 1.0 # > apply the transformation to the remaining columns and # update the norms # t # :: A[i][k] -= beta * v[i] * w[k], w = A * v # t # :: beta = 1 / v[0], can be proved easily # :: w[k] = A[k-th column] * v jp1 = j + 1 # j plus 1 if n > jp1: for k in range(jp1, n): # traverse columns sum = data_type(0.0) # this is w[j] for i in range(j, m): # traverse rows # v[i] A[i][k-th column] sum += a[i + j * lda] * a[i + k * lda] # :: beta * w[k] temp = sum / a[j + j * lda] for i in range(j, m): # :: a[i][k] -= beta * w[k] * v[i] a[i + k * lda] -= temp * a[i + j * lda] # :: rdiag stores information used to pivot # >> update rdiag to ensure that it can present # alpha = +- ||x||_2 if pivot and rdiag[k] != 0: temp = a[j + k * lda] / rdiag[k] # >>> compute max d1 = 1.0 - temp * temp rdiag[k] *= np.sqrt(max(0.0, d1)) # >>> compute 2nd power d1 = rdiag[k] / wa[k] # :: if rdiag is to small if p05 * (d1 * d1) <= eps_machine: rdiag[k] = enorm(a[jp1 + k * lda:(k + 1) * lda]) wa[k] = rdiag[k] # :: sgn(ajnorm) = -sgn(x_0) # :: H * x = alpha * e_1 rdiag[j] = -ajnorm # > return if pivot: return [ipvt, rdiag, acnorm] else: return [rdiag, acnorm]
from enorm import euclid_norm as enorm import utility from utility import data_type import qrfac from dpmpar import get_machine_parameter as dpmpar from fdjac2 import jac from qrfac import qr from lmpar import lm_lambda import clip.cl as cl # region : Module parameters p1 = data_type(0.1) p5 = data_type(0.5) p25 = data_type(0.25) p75 = data_type(0.75) p0001 = data_type(1e-4) eps_machine = dpmpar(1) wa4 = None qtf = None # endregion : Module parameters def lmdif(func, x, args=(), full_output=0, ftol=data_type(1.49012e-8), xtol=data_type(1.49012e-8),
######################################################################## # # Created: June 19, 2016 # Author: William Ro # ######################################################################## import numpy as np from enorm import euclid_norm as enorm from dpmpar import get_machine_parameter as dpmpar from utility import data_type # region : Module parameters p05 = data_type(0.05) eps_machine = dpmpar(1) ipvt = None rdiag = None acnorm = None wa = None # endregion : Module parameters def qr(m, n, a, lda, pivot): """ Uses householder transformations with column pivoting (optional) to compute a QR factorization of the m by n matrix a Parameters
def lmdif(func, x, args=(), full_output=0, ftol=data_type(1.49012e-8), xtol=data_type(1.49012e-8), gtol=0.0, maxfev=0, epsfcn=None, factor=100, diag=None): """ Minimize the sum of the squares of m nonlinear functions in n variables by a modification of the levenberg-marquardt algorithm. The user must provide a subroutine which calculates the functions. The jacobian is then calculated by a forward-difference approximation Parameters ---------- func: callable should take at least one (possibly length N vector) argument and returns M floating point numbers. It must not return NaNs or fitting might fail. x: ndarray The starting estimate for the minimization. args: tuple, optional Any extra arguments to func are placed in this tuple. full_output: bool, optional non-zero to return all optional outputs. ftol: float, optional Relative error desired in the sum of squares. xtol: float, optional Relative error desired in the approximate solution. gtol: float, optional Orthogonality desired between the function vector and the columns of the Jacobian. maxfev: int, optional The maximum number of calls to the function. If `Dfun` is provided then the default `maxfev` is 100*(N+1) where N is the number of elements in x0, otherwise the default `maxfev` is 200*(N+1). epsfcn: float, optional A variable used in determining a suitable step length for the forward-difference approximation of the Jacobian (for Dfun=None). Normally the actual step length will be sqrt(epsfcn)*x If epsfcn is less than the machine precision, it is assumed that the relative errors are of the order of the machine precision. factor: float, optional A parameter determining the initial step bound (``factor * || diag * x||``). Should be in interval ``(0.1, 100)``. diag: sequence, optional N positive entries that serve as a scale factors for the variables. If set None, the variables will be scaled internally Returns ------- x: ndarray The solution (or the result of the last iteration for an unsuccessful call). cov_x: ndarray Uses the fjac and ipvt optional outputs to construct an estimate of the jacobian around the solution. None if a singular matrix encountered (indicates very flat curvature in some direction). This matrix must be multiplied by the residual variance to get the covariance of the parameter estimates -- see curve_fit. infodict: dict a dictionary of optional outputs with the key s: ``nfev`` The number of function calls ``fvec`` The function evaluated at the output ``fjac`` A permutation of the R matrix of a QR factorization of the final approximate Jacobian matrix, stored column wise. Together with ipvt, the covariance of the estimate can be approximated. ``ipvt`` An integer array of length N which defines a permutation matrix, p, such that fjac*p = q*r, where r is upper triangular with diagonal elements of nonincreasing magnitude. Column j of p is column ipvt(j) of the identity matrix. ``qtf`` The vector (transpose(q) * fvec). mesg: str A string message giving information about the cause of failure. ier: int An integer flag. If it is equal to 1, 2, 3 or 4, the solution was found. O therwise, the solution was not found. In either case, the optional output variable 'mesg' gives more information. """ # region : Initialize part of parameters global eps_machine, wa4, qtf global p1, p5, p25, p75, p0001 ier = 0 x = np.asarray(x).flatten() if not isinstance(args, tuple): args = (args,) if epsfcn is None: epsfcn = finfo(utility.data_type).eps if diag is None: mode = 1 else: mode = 2 # endregion : Initialize part of parameters # region : Check the input parameters for errors if ftol < 0. or xtol < 0. or gtol < 0. or factor <= 0: raise ValueError('!!! Some input parameters for lmdif ' + 'are illegal') if diag is not None: # if mode == 2 for d in diag: if d <= 0: raise ValueError('!!! Entries in diag must be positive') # endregion : Check the input parameters for errors # region : Preparation before main loop # > evaluate the function at the starting point and calculate # its norm # :: evaluate r(x) -> fvec fvec = func(x, *args) # :: evaluate ||r(x)||_2 -> fnorm fnorm = enorm(fvec) if utility.wm_trace: print(">>> L-M begins") print(">>> ||x0|| = %.10f" % fnorm) # region : initialize other parameters # ----------------------------------------- nfev = 1 m = fvec.size n = x.size ldfjac = m if m < n: raise ValueError('!!! m < n in lmdif') if maxfev <= 0: maxfev = 200 * (n + 1) # > check wa4 and qtf if wa4 is None or wa4.size is not m: wa4 = np.zeros(m, data_type) if qtf is None or qtf.size is not n: qtf = np.zeros(n, data_type) # ------------------------------------------ # endregion : initialize other parameters # endregion : Preparation before main loop # region : Main loop # > initialize levenberg-marquardt parameter and iteration counter lam = data_type(0.0) iter = 1 # > begin outer loop while True: if utility.wm_trace: print(">>> Step %d:" % iter) # > calculate the jacobian matrix # :: evaluate J(x) -> fjac: m by n fjac = jac(func, x, args, fvec, epsfcn) nfev += n # > compute the qr factorization of the jacobian # :: # :: / R \ n by n # :: J * P = Q * | | # :: \ 0 / (m - n) by n # :: t # :: Q = H_n * ... H_2 * H_1 # :: # :: For H in { H_1, H_2, ..., H_n }, and arbitrary A # :: t # :: H = I - beta * v * v # :: t t # :: H * A = A - v * w, w = beta * A * v # :: information of P -> ipvt # :: R -> rdiag and strict upper trapezoidal part of fjac # :: { H_k }_k -> lower trapezoidal part of fjac ipvt, rdiag, acnorm = qr(m, n, fjac, ldfjac, True) # > on the first iteration if iter is 1: # >> if the diag is None, scale according to the norms of # the columns of the initial jacobian if diag is None: diag = np.zeros(n, data_type) for j in range(n): diag[j] = qrfac.acnorm[j] if diag[j] == 0.0: diag[j] = 1.0 # >> calculate the norm of the scaled x and initialize # the step bound delta wa3 = qrfac.wa # 'wa3' is a name left over by lmdif for j in range(n): wa3[j] = diag[j] * x[j] xnorm = enorm(wa3) delta = factor * xnorm if delta == 0.0: delta = data_type(factor) # > form (q^T)*fvec and store the first n components in qtf # :: see x_{NG} = - PI * R^{-1} * Q_1^T * fvec # :: H * r = r - v * beta * r^T * v for i in range(m): wa4[i] = fvec[i] for j in range(n): # altogether n times transformation # :: here the lower trapezoidal part of fjac contains # a factored form of q, in other words, a set of v if fjac[j + j * ldfjac] != 0: sum = data_type(0.0) # r^T * v for i in range(j, m): sum += fjac[i + j * ldfjac] * wa4[i] # :: mul -beta temp = -sum / fjac[j + j * ldfjac] for i in range(j, m): wa4[i] += fjac[i + j * ldfjac] * temp # restore the diag of R in fjac fjac[j + j * ldfjac] = qrfac.rdiag[j] qtf[j] = wa4[j] # > compute the norm(inf norm) of the scaled gradient # t t # :: g = J * r = P * R * qtf # gnorm = data_type(0.0) wa2 = qrfac.acnorm if fnorm != 0: for j in range(n): # >> get index l = ipvt[j] - 1 if wa2[l] != 0.0: sum = data_type(0.0) for i in range(j + 1): sum += fjac[i + j * ldfjac] * (qtf[i] / fnorm) # >>> computing max d1 = np.abs(sum / wa2[l]) gnorm = max(gnorm, d1) if utility.wm_trace: print(" ||df|| = %.10f, nfev = %d" % (gnorm, nfev)) # > test for convergence of the gradient norm if gnorm <= gtol: ier = 4 break # > rescale if necessary if mode is not 2: for j in range(n): # >> compute max d1 = diag[j] d2 = wa2[j] diag[j] = max(d1, d2) # > beginning of the inner loop while True: if utility.wm_trace: print(" => try delta = %.10f:" % delta) if False: utility.lam_trace = True print("--" * 26 + " lmpar begin") # > determine the levenberg-marquardt parameter lam, wa1, sdiag = lm_lambda(n, fjac, ldfjac, ipvt, diag, qtf, delta, lam) if utility.lam_trace: utility.lam_trace = False print("--" * 26 + " lmpar end") # store the direction p and x + p. calculate the norm of p for j in range(n): wa1[j] = -wa1[j] wa2[j] = x[j] + wa1[j] wa3[j] = diag[j] * wa1[j] # :: pnorm = || D * p ||_2 pnorm = enorm(wa3) # > on the first iteration, adjust the initial step bound if iter is 1: delta = min(delta, pnorm) # > evaluate the function at x + p and calculate its norm wa4 = func(wa2, *args) nfev += 1 fnorm1 = enorm(wa4) # > compute the scaled actual reduction act_red = -1 if p1 * fnorm1 < fnorm: # compute 2nd power d1 = fnorm1 / fnorm act_red = 1.0 - d1 * d1 # > compute the scaled predicted reduction and the # scaled directional derivative # # :: pre_red = (m(0) - m(p)) / m(0) # :: t t t # :: = (p * J * J * p + J * r * p) / m(0) # # :: t t t t # :: J = Q * R => p * J * J * p = p * R * R * p # :: # :: m(0) = fnorm * fnorm for j in range(n): wa3[j] = 0 l = ipvt[j] - 1 temp = wa1[l] for i in range(j + 1): wa3[i] += fjac[i + j * ldfjac] * temp # :: now wa3 stores J * p temp1 = enorm(wa3) / fnorm # t # :: lam * p = - grad_m(p) = J * r temp2 = (np.sqrt(lam) * pnorm) / fnorm # :: TODO - ... / p5 pre_red = temp1 * temp1 + temp2 * temp2 / p5 dir_der = -(temp1 * temp1 + temp2 * temp2) # > compute the ratio of the actual to the predicted # reduction ratio = 0.0 if pre_red != 0: ratio = act_red / pre_red if utility.wm_trace: print(" ratio = %.10f, nfev = %d" % (ratio, nfev)) # > update the step bound if ratio <= p25: if act_red >= 0.0: temp = p5 else: temp = p5 * dir_der / (dir_der + p5 * act_red) if p1 * fnorm1 >= fnorm or temp < p1: temp = p1 # >> compute min, shrink the trust region d1 = pnorm / p1 delta = temp * min(delta, d1) lam /= temp if utility.wm_trace: print(" delta ↓ -> %.10f:" % delta) else: if lam == 0.0 or ratio >= p75: # >> expand the trust region delta = pnorm / p5 lam = p5 * lam if utility.wm_trace: print(" delta ↑ -> %.10f:" % delta) # > test for successful iteration if ratio >= p0001: # >> successful iteration. update x, fvec # and their norms for j in range(n): x[j] = wa2[j] wa2[j] = diag[j] * x[j] for i in range(m): fvec[i] = wa4[i] xnorm = enorm(wa2) if utility.wm_trace: print(" √ ||x|| ↓ %.10f -> %.10f" % (fnorm - fnorm1, fnorm1)) fnorm = fnorm1 iter += 1 elif utility.wm_trace: print(" × ||x|| not changed") # > test for convergence if np.abs(act_red) <= ftol and pre_red <= ftol \ and p5 * ratio <= 1.0: ier = 1 if delta <= xtol * xnorm: ier = 2 if np.abs(act_red) <= ftol and pre_red <= ftol \ and p5 * ratio <= 1.0 and ier is 2: ier = 3 if ier is not 0: break # > test for termination and stringent tolerances if nfev >= maxfev: ier = 5 if np.abs(act_red) <= eps_machine and pre_red <= \ eps_machine and p5 * ratio <= 1.0: ier = 6 if delta <= eps_machine * xnorm: ier = 7 if gnorm <= eps_machine: ier = 8 if ier is not 0: break tmp = 1 if ratio >= p0001: break if ier is not 0: break # endregion : Main loop # > wrap results errors = {0: ["Improper input parameters.", TypeError], 1: ["Both actual and predicted relative reductions " "in the sum of squares are at most %f * 1e-8" % (ftol * 1e8), None], 2: ["The relative error between two consecutive " "iterates is at most %f * 1e-8" % (xtol * 1e8), None], 3: ["Both actual and predicted relative reductions in " "the sum of squares\n are at most %f and the " "relative error between two consecutive " "iterates is at \n most %f" % (ftol, xtol), None], 4: ["The cosine of the angle between func(x) and any " "column of the\n Jacobian is at most %f in " "absolute value" % gtol, None], 5: ["Number of calls to function has reached " "maxfev = %d." % maxfev, ValueError], 6: ["ftol=%f is too small, no further reduction " "in the sum of squares\n is possible.""" % ftol, ValueError], 7: ["xtol=%f is too small, no further improvement in " "the approximate\n solution is possible." % xtol, ValueError], 8: ["gtol=%f is too small, func(x) is orthogonal to the " "columns of\n the Jacobian to machine " "precision." % gtol, ValueError], 'unknown': ["Unknown error.", TypeError]} if ier not in [1, 2, 3, 4] and not full_output: if ier in [5, 6, 7, 8]: print("!!! leastsq warning: %s" % errors[ier][0]) mesg = errors[ier][0] if utility.wm_trace: print(">>> " + mesg) if full_output: cov_x = None if ier in [1, 2, 3, 4]: from numpy.dual import inv from numpy.linalg import LinAlgError perm = take(eye(n), ipvt - 1, 0) r = triu(transpose(fjac.reshape(n, m))[:n, :]) R = dot(r, perm) try: cov_x = inv(dot(transpose(R), R)) except (LinAlgError, ValueError): pass dct = {'fjac': fjac, 'fvec': fvec, 'ipvt': ipvt, 'nfev': nfev, 'qtf': qtf} return x, cov_x, dct, mesg, ier else: return x, ier
def qr(m, n, a, lda, pivot): """ Uses householder transformations with column pivoting (optional) to compute a QR factorization of the m by n matrix a Parameters ---------- m: int a positive integer input variable set to the number of rows of a n: int a positive integer input variable set to the number of columns of a a: ndarray an m by n array. on input a contains the matrix for which the qr factorization is to be computed. on output the strict upper trapezoidal part of a contains the strict upper trapezoidal part of r, and the lower trapezoidal part of a contains a factored form of q (the non-trivial elements of the u vectors described above) lda: int a positive integer input variable not less than m which specifies the leading dimension of the array a pivot: bool a logical input variable. if pivot is set true, then column pivoting is enforced. if pivot is set false, then no column pivoting is done Returns ------- ipvt: ndarray an integer output array. ipvt defines the permutation matrix p such that a*p = q*r. column j of p is column ipvt(j) of the identity matrix. if pivot is false, ipvt will be set to None rdiag: ndarray an output array of length n which contains the diagonal elements of r acnorm: ndarray an output array of length n which contains the norms of the corresponding columns of the input matrix a. if this information is not needed, then acnorm can coincide with rdiag """ # region : Initialize parameters # ---------------------------------------- global p05, eps_machine, ipvt, rdiag, acnorm, wa if ipvt is None or ipvt.size is not n: ipvt = np.zeros(n, np.int32) if rdiag is None or rdiag.size is not n: rdiag = np.zeros(n, data_type) if acnorm is None or acnorm.size is not n: acnorm = np.zeros(n, data_type) if wa is None or wa.size is not n: wa = np.zeros(n, data_type) # ---------------------------------------- # endregion : Initialize parameters # > compute the initial column norms and initialize several arrays for j in range(n): acnorm[j] = enorm(a[lda * j:lda * (j + 1)]) rdiag[j] = acnorm[j] wa[j] = rdiag[j] if pivot: ipvt[j] = j + 1 # > reduce a to r with householder transformations min_mn = min(m, n) for j in range(min_mn): # > if pivot # -------------------------------------------------------- if pivot: # >> bring the column of largest norm # into the pivot position k_max = j for k in range(j, n): if rdiag[k] > rdiag[k_max]: k_max = k # >> switch if k_max is not j: for i in range(m): # traverse rows # >>> switch temp = a[i + j * lda] a[i + j * lda] = a[i + k_max * lda] a[i + k_max * lda] = temp # >>> overwrite, acnorm[k_max] still hold rdiag[k_max] = rdiag[j] wa[k_max] = wa[j] # >>> switch k = ipvt[j] ipvt[j] = ipvt[k_max] ipvt[k_max] = k # > compute the householder transformation to reduce the # j-th column of a to a multiple of the j-th unit vector # ------------------------ # >> normalize # :: v = x - ||x||_2 * e_1 # :: ajnorm = ||x||_2 ajnorm = enorm(a[lda * j + j:lda * (j + 1)]) if ajnorm != 0.0: if a[j + j * lda] < 0.0: # :: prepare to keep a[i + j * lda] positive ajnorm = -ajnorm # :: x = sgn(x_1) * x / ||x||_2 for i in range(j, m): a[i + j * lda] /= ajnorm # :: a[j + j * lda] temporarily stores v[0] # :: one number being subtracted from another close number # has been avoided a[j + j * lda] += 1.0 # > apply the transformation to the remaining columns and # update the norms # t # :: A[i][k] -= beta * v[i] * w[k], w = A * v # t # :: beta = 1 / v[0], can be proved easily # :: w[k] = A[k-th column] * v jp1 = j + 1 # j plus 1 if n > jp1: for k in range(jp1, n): # traverse columns sum = data_type(0.0) # this is w[j] for i in range(j, m): # traverse rows # v[i] A[i][k-th column] sum += a[i + j * lda] * a[i + k * lda] # :: beta * w[k] temp = sum / a[j + j * lda] for i in range(j, m): # :: a[i][k] -= beta * w[k] * v[i] a[i + k * lda] -= temp * a[i + j * lda] # :: rdiag stores information used to pivot # >> update rdiag to ensure that it can present # alpha = +- ||x||_2 if pivot and rdiag[k] != 0: temp = a[j + k * lda] / rdiag[k] # >>> compute max d1 = 1.0 - temp * temp rdiag[k] *= np.sqrt(max(0.0, d1)) # >>> compute 2nd power d1 = rdiag[k] / wa[k] # :: if rdiag is to small if p05 * (d1 * d1) <= eps_machine: rdiag[k] = enorm( a[jp1 + k * lda:(k + 1) * lda]) wa[k] = rdiag[k] # :: sgn(ajnorm) = -sgn(x_0) # :: H * x = alpha * e_1 rdiag[j] = -ajnorm # > return if pivot: return [ipvt, rdiag, acnorm] else: return [rdiag, acnorm]