def polyfit(x, y, deg, rcond=None, full=False, w=None, cov=False): """ Least squares polynomial fit. Fit a polynomial ``p(x) = p[0] * x**deg + ... + p[deg]`` of degree `deg` to points `(x, y)`. Returns a vector of coefficients `p` that minimises the squared error. Parameters ---------- x : array_like, shape (M,) x-coordinates of the M sample points ``(x[i], y[i])``. y : array_like, shape (M,) or (M, K) y-coordinates of the sample points. Several data sets of sample points sharing the same x-coordinates can be fitted at once by passing in a 2D-array that contains one dataset per column. deg : int Degree of the fitting polynomial rcond : float, optional Relative condition number of the fit. Singular values smaller than this relative to the largest singular value will be ignored. The default value is len(x)*eps, where eps is the relative precision of the float type, about 2e-16 in most cases. full : bool, optional Switch determining nature of return value. When it is False (the default) just the coefficients are returned, when True diagnostic information from the singular value decomposition is also returned. w : array_like, shape (M,), optional weights to apply to the y-coordinates of the sample points. cov : bool, optional Return the estimate and the covariance matrix of the estimate If full is True, then cov is not returned. Returns ------- p : ndarray, shape (M,) or (M, K) Polynomial coefficients, highest power first. If `y` was 2-D, the coefficients for `k`-th data set are in ``p[:,k]``. residuals, rank, singular_values, rcond : Present only if `full` = True. Residuals of the least-squares fit, the effective rank of the scaled Vandermonde coefficient matrix, its singular values, and the specified value of `rcond`. For more details, see `linalg.lstsq`. V : ndarray, shape (M,M) or (M,M,K) Present only if `full` = False and `cov`=True. The covariance matrix of the polynomial coefficient estimates. The diagonal of this matrix are the variance estimates for each coefficient. If y is a 2-D array, then the covariance matrix for the `k`-th data set are in ``V[:,:,k]`` Warns ----- RankWarning The rank of the coefficient matrix in the least-squares fit is deficient. The warning is only raised if `full` = False. The warnings can be turned off by >>> import warnings >>> warnings.simplefilter('ignore', np.RankWarning) See Also -------- polyval : Computes polynomial values. linalg.lstsq : Computes a least-squares fit. scipy.interpolate.UnivariateSpline : Computes spline fits. Notes ----- The solution minimizes the squared error .. math :: E = \\sum_{j=0}^k |p(x_j) - y_j|^2 in the equations:: x[0]**n * p[0] + ... + x[0] * p[n-1] + p[n] = y[0] x[1]**n * p[0] + ... + x[1] * p[n-1] + p[n] = y[1] ... x[k]**n * p[0] + ... + x[k] * p[n-1] + p[n] = y[k] The coefficient matrix of the coefficients `p` is a Vandermonde matrix. `polyfit` issues a `RankWarning` when the least-squares fit is badly conditioned. This implies that the best fit is not well-defined due to numerical error. The results may be improved by lowering the polynomial degree or by replacing `x` by `x` - `x`.mean(). The `rcond` parameter can also be set to a value smaller than its default, but the resulting fit may be spurious: including contributions from the small singular values can add numerical noise to the result. Note that fitting polynomial coefficients is inherently badly conditioned when the degree of the polynomial is large or the interval of sample points is badly centered. The quality of the fit should always be checked in these cases. When polynomial fits are not satisfactory, splines may be a good alternative. References ---------- .. [1] Wikipedia, "Curve fitting", http://en.wikipedia.org/wiki/Curve_fitting .. [2] Wikipedia, "Polynomial interpolation", http://en.wikipedia.org/wiki/Polynomial_interpolation Examples -------- >>> x = np.array([0.0, 1.0, 2.0, 3.0, 4.0, 5.0]) >>> y = np.array([0.0, 0.8, 0.9, 0.1, -0.8, -1.0]) >>> z = np.polyfit(x, y, 3) >>> z array([ 0.08703704, -0.81349206, 1.69312169, -0.03968254]) It is convenient to use `poly1d` objects for dealing with polynomials: >>> p = np.poly1d(z) >>> p(0.5) 0.6143849206349179 >>> p(3.5) -0.34732142857143039 >>> p(10) 22.579365079365115 High-order polynomials may oscillate wildly: >>> p30 = np.poly1d(np.polyfit(x, y, 30)) /... RankWarning: Polyfit may be poorly conditioned... >>> p30(4) -0.80000000000000204 >>> p30(5) -0.99999999999999445 >>> p30(4.5) -0.10547061179440398 Illustration: >>> import matplotlib.pyplot as plt >>> xp = np.linspace(-2, 6, 100) >>> _ = plt.plot(x, y, '.', xp, p(xp), '-', xp, p30(xp), '--') >>> plt.ylim(-2,2) (-2, 2) >>> plt.show() """ order = int(deg) + 1 x = NX.asarray(x) + 0.0 y = NX.asarray(y) + 0.0 # check arguments. if deg < 0: raise ValueError("expected deg >= 0") if x.ndim != 1: raise TypeError("expected 1D vector for x") if x.size == 0: raise TypeError("expected non-empty vector for x") if y.ndim < 1 or y.ndim > 2: raise TypeError("expected 1D or 2D array for y") if x.shape[0] != y.shape[0]: raise TypeError("expected x and y to have same length") # set rcond if rcond is None: rcond = len(x) * finfo(x.dtype).eps # set up least squares equation for powers of x lhs = vander(x, order) rhs = y # apply weighting if w is not None: w = NX.asarray(w) + 0.0 if w.ndim != 1: raise TypeError("expected a 1-d array for weights") if w.shape[0] != y.shape[0]: raise TypeError("expected w and y to have the same length") lhs *= w[:, NX.newaxis] if rhs.ndim == 2: rhs *= w[:, NX.newaxis] else: rhs *= w # scale lhs to improve condition number and solve scale = NX.sqrt((lhs * lhs).sum(axis=0)) lhs /= scale c, resids, rank, s = lstsq(lhs, rhs, rcond) c = (c.T / scale).T # broadcast scale coefficients # warn on rank reduction, which indicates an ill conditioned matrix if rank != order and not full: msg = "Polyfit may be poorly conditioned" warnings.warn(msg, RankWarning) if full: return c, resids, rank, s, rcond elif cov: Vbase = inv(dot(lhs.T, lhs)) Vbase /= NX.outer(scale, scale) # Some literature ignores the extra -2.0 factor in the denominator, but # it is included here because the covariance of Multivariate Student-T # (which is implied by a Bayesian uncertainty analysis) includes it. # Plus, it gives a slightly more conservative estimate of uncertainty. fac = resids / (len(x) - order - 2.0) if y.ndim == 1: return c, Vbase * fac else: return c, Vbase[:, :, NX.newaxis] * fac else: return c
k = 15 points_x = np.array([Mod(random.randint(0, p - 1), p) for i in range(k)]) points_y = np.array([Mod(random.randint(0, p - 1), p) for i in range(k)]) #points_x = np.array([random.random() for i in range(k)]) #points_y = np.array([random.random() for i in range(k)]) print("INDIVIDUAL KEYS X") print(points_x) print("INDIVIDUAL KEYS Y") print(points_y) # run polynomial fitting with degree k - 1 lhs = vander(points_x, k) rhs = points_y solution = invert_matrix(lhs).dot(rhs) def fit_func(x, coeffs): rev = coeffs[::-1] t = 1 out = 0 for i in range(len(rev)): out += t * rev[i] t *= x return out # test fitting
def polyfit(x, y, deg, rcond=None, full=False): """ Least squares polynomial fit. Fit a polynomial ``p(x) = p[0] * x**deg + ... + p[deg]`` of degree `deg` to points `(x, y)`. Returns a vector of coefficients `p` that minimises the squared error. Parameters ---------- x : array_like, shape (M,) x-coordinates of the M sample points ``(x[i], y[i])``. y : array_like, shape (M,) or (M, K) y-coordinates of the sample points. Several data sets of sample points sharing the same x-coordinates can be fitted at once by passing in a 2D-array that contains one dataset per column. deg : int Degree of the fitting polynomial rcond : float, optional Relative condition number of the fit. Singular values smaller than this relative to the largest singular value will be ignored. The default value is len(x)*eps, where eps is the relative precision of the float type, about 2e-16 in most cases. full : bool, optional Switch determining nature of return value. When it is False (the default) just the coefficients are returned, when True diagnostic information from the singular value decomposition is also returned. Returns ------- p : ndarray, shape (M,) or (M, K) Polynomial coefficients, highest power first. If `y` was 2-D, the coefficients for `k`-th data set are in ``p[:,k]``. residuals, rank, singular_values, rcond : present only if `full` = True Residuals of the least-squares fit, the effective rank of the scaled Vandermonde coefficient matrix, its singular values, and the specified value of `rcond`. For more details, see `linalg.lstsq`. Warns ----- RankWarning The rank of the coefficient matrix in the least-squares fit is deficient. The warning is only raised if `full` = False. The warnings can be turned off by >>> import warnings >>> warnings.simplefilter('ignore', np.RankWarning) See Also -------- polyval : Computes polynomial values. linalg.lstsq : Computes a least-squares fit. scipy.interpolate.UnivariateSpline : Computes spline fits. Notes ----- The solution minimizes the squared error .. math :: E = \\sum_{j=0}^k |p(x_j) - y_j|^2 in the equations:: x[0]**n * p[n] + ... + x[0] * p[1] + p[0] = y[0] x[1]**n * p[n] + ... + x[1] * p[1] + p[0] = y[1] ... x[k]**n * p[n] + ... + x[k] * p[1] + p[0] = y[k] The coefficient matrix of the coefficients `p` is a Vandermonde matrix. `polyfit` issues a `RankWarning` when the least-squares fit is badly conditioned. This implies that the best fit is not well-defined due to numerical error. The results may be improved by lowering the polynomial degree or by replacing `x` by `x` - `x`.mean(). The `rcond` parameter can also be set to a value smaller than its default, but the resulting fit may be spurious: including contributions from the small singular values can add numerical noise to the result. Note that fitting polynomial coefficients is inherently badly conditioned when the degree of the polynomial is large or the interval of sample points is badly centered. The quality of the fit should always be checked in these cases. When polynomial fits are not satisfactory, splines may be a good alternative. References ---------- .. [1] Wikipedia, "Curve fitting", http://en.wikipedia.org/wiki/Curve_fitting .. [2] Wikipedia, "Polynomial interpolation", http://en.wikipedia.org/wiki/Polynomial_interpolation Examples -------- >>> x = np.array([0.0, 1.0, 2.0, 3.0, 4.0, 5.0]) >>> y = np.array([0.0, 0.8, 0.9, 0.1, -0.8, -1.0]) >>> z = np.polyfit(x, y, 3) >>> z array([ 0.08703704, -0.81349206, 1.69312169, -0.03968254]) It is convenient to use `poly1d` objects for dealing with polynomials: >>> p = np.poly1d(z) >>> p(0.5) 0.6143849206349179 >>> p(3.5) -0.34732142857143039 >>> p(10) 22.579365079365115 High-order polynomials may oscillate wildly: >>> p30 = np.poly1d(np.polyfit(x, y, 30)) /... RankWarning: Polyfit may be poorly conditioned... >>> p30(4) -0.80000000000000204 >>> p30(5) -0.99999999999999445 >>> p30(4.5) -0.10547061179440398 Illustration: >>> import matplotlib.pyplot as plt >>> xp = np.linspace(-2, 6, 100) >>> plt.plot(x, y, '.', xp, p(xp), '-', xp, p30(xp), '--') [<matplotlib.lines.Line2D object at 0x...>, <matplotlib.lines.Line2D object at 0x...>, <matplotlib.lines.Line2D object at 0x...>] >>> plt.ylim(-2,2) (-2, 2) >>> plt.show() """ order = int(deg) + 1 x = NX.asarray(x) + 0.0 y = NX.asarray(y) + 0.0 # check arguments. if deg < 0 : raise ValueError("expected deg >= 0") if x.ndim != 1: raise TypeError("expected 1D vector for x") if x.size == 0: raise TypeError("expected non-empty vector for x") if y.ndim < 1 or y.ndim > 2 : raise TypeError("expected 1D or 2D array for y") if x.shape[0] != y.shape[0] : raise TypeError("expected x and y to have same length") # set rcond if rcond is None : rcond = len(x)*finfo(x.dtype).eps # scale x to improve condition number scale = abs(x).max() if scale != 0 : x /= scale # solve least squares equation for powers of x v = vander(x, order) c, resids, rank, s = lstsq(v, y, rcond) # warn on rank reduction, which indicates an ill conditioned matrix if rank != order and not full: msg = "Polyfit may be poorly conditioned" warnings.warn(msg, RankWarning) # scale returned coefficients if scale != 0 : if c.ndim == 1 : c /= vander([scale], order)[0] else : c /= vander([scale], order).T if full : return c, resids, rank, s, rcond else : return c
def polyfit(x, y, deg, rcond=None, full=False): """Least squares polynomial fit. Do a best fit polynomial of degree 'deg' of 'x' to 'y'. Return value is a vector of polynomial coefficients [pk ... p1 p0]. Eg, for n=2 p2*x0^2 + p1*x0 + p0 = y1 p2*x1^2 + p1*x1 + p0 = y1 p2*x2^2 + p1*x2 + p0 = y2 ..... p2*xk^2 + p1*xk + p0 = yk Parameters ---------- x : array_like 1D vector of sample points. y : array_like 1D vector or 2D array of values to fit. The values should run down the columes in the 2D case. deg : integer Degree of the fitting polynomial rcond: {None, float}, optional Relative condition number of the fit. Singular values smaller than this relative to the largest singular value will be ignored. The defaul value is len(x)*eps, where eps is the relative precision of the float type, about 2e-16 in most cases. full : {False, boolean}, optional Switch determining nature of return value. When it is False just the coefficients are returned, when True diagnostic information from the singular value decomposition is also returned. Returns ------- coefficients, [residuals, rank, singular_values, rcond] : variable When full=False, only the coefficients are returned, running down the appropriate colume when y is a 2D array. When full=True, the rank of the scaled Vandermonde matrix, it's effective rank in light of the rcond value, its singular values, and the specified value of rcond are also returned. Warns ----- RankWarning : if rank is reduced and not full output The warnings can be turned off by: >>> import numpy as np >>> import warnings >>> warnings.simplefilter('ignore',np.RankWarning) See Also -------- polyval : computes polynomial values. Notes ----- If X is a the Vandermonde Matrix computed from x (see http://mathworld.wolfram.com/VandermondeMatrix.html), then the polynomial least squares solution is given by the 'p' in X*p = y where X.shape is a matrix of dimensions (len(x), deg + 1), p is a vector of dimensions (deg + 1, 1), and y is a vector of dimensions (len(x), 1). This equation can be solved as p = (XT*X)^-1 * XT * y where XT is the transpose of X and -1 denotes the inverse. However, this method is susceptible to rounding errors and generally the singular value decomposition of the matrix X is preferred and that is what is done here. The singular value method takes a paramenter, 'rcond', which sets a limit on the relative size of the smallest singular value to be used in solving the equation. This may result in lowering the rank of the Vandermonde matrix, in which case a RankWarning is issued. If polyfit issues a RankWarning, try a fit of lower degree or replace x by x - x.mean(), both of which will generally improve the condition number. The routine already normalizes the vector x by its maximum absolute value to help in this regard. The rcond parameter can be set to a value smaller than its default, but the resulting fit may be spurious. The current default value of rcond is len(x)*eps, where eps is the relative precision of the floating type being used, generally around 1e-7 and 2e-16 for IEEE single and double precision respectively. This value of rcond is fairly conservative but works pretty well when x - x.mean() is used in place of x. DISCLAIMER: Power series fits are full of pitfalls for the unwary once the degree of the fit becomes large or the interval of sample points is badly centered. The problem is that the powers x**n are generally a poor basis for the polynomial functions on the sample interval, resulting in a Vandermonde matrix is ill conditioned and coefficients sensitive to rounding erros. The computation of the polynomial values will also sensitive to rounding errors. Consequently, the quality of the polynomial fit should be checked against the data whenever the condition number is large. The quality of polynomial fits *can not* be taken for granted. If all you want to do is draw a smooth curve through the y values and polyfit is not doing the job, try centering the sample range or look into scipy.interpolate, which includes some nice spline fitting functions that may be of use. For more info, see http://mathworld.wolfram.com/LeastSquaresFittingPolynomial.html, but note that the k's and n's in the superscripts and subscripts on that page. The linear algebra is correct, however. """ order = int(deg) + 1 x = NX.asarray(x) + 0.0 y = NX.asarray(y) + 0.0 # check arguments. if deg < 0 : raise ValueError, "expected deg >= 0" if x.ndim != 1: raise TypeError, "expected 1D vector for x" if x.size == 0: raise TypeError, "expected non-empty vector for x" if y.ndim < 1 or y.ndim > 2 : raise TypeError, "expected 1D or 2D array for y" if x.shape[0] != y.shape[0] : raise TypeError, "expected x and y to have same length" # set rcond if rcond is None : xtype = x.dtype if xtype == NX.single or xtype == NX.csingle : rcond = len(x)*_single_eps else : rcond = len(x)*_double_eps # scale x to improve condition number scale = abs(x).max() if scale != 0 : x /= scale # solve least squares equation for powers of x v = vander(x, order) c, resids, rank, s = _lstsq(v, y, rcond) # warn on rank reduction, which indicates an ill conditioned matrix if rank != order and not full: msg = "Polyfit may be poorly conditioned" warnings.warn(msg, RankWarning) # scale returned coefficients if scale != 0 : if c.ndim == 1 : c /= vander([scale], order)[0] else : c /= vander([scale], order).T if full : return c, resids, rank, s, rcond else : return c
def polyfit(x, y, deg, rcond=None, full=False, w=None, cov=False): """ Least squares polynomial fit. Fit a polynomial ``p(x) = p[0] * x**deg + ... + p[deg]`` of degree `deg` to points `(x, y)`. Returns a vector of coefficients `p` that minimises the squared error. Parameters ---------- x : array_like, shape (M,) x-coordinates of the M sample points ``(x[i], y[i])``. y : array_like, shape (M,) or (M, K) y-coordinates of the sample points. Several data sets of sample points sharing the same x-coordinates can be fitted at once by passing in a 2D-array that contains one dataset per column. deg : int Degree of the fitting polynomial rcond : float, optional Relative condition number of the fit. Singular values smaller than this relative to the largest singular value will be ignored. The default value is len(x)*eps, where eps is the relative precision of the float type, about 2e-16 in most cases. full : bool, optional Switch determining nature of return value. When it is False (the default) just the coefficients are returned, when True diagnostic information from the singular value decomposition is also returned. w : array_like, shape (M,), optional Weights to apply to the y-coordinates of the sample points. For gaussian uncertainties, use 1/sigma (not 1/sigma**2). cov : bool, optional Return the estimate and the covariance matrix of the estimate If full is True, then cov is not returned. Returns ------- p : ndarray, shape (M,) or (M, K) Polynomial coefficients, highest power first. If `y` was 2-D, the coefficients for `k`-th data set are in ``p[:,k]``. residuals, rank, singular_values, rcond Present only if `full` = True. Residuals of the least-squares fit, the effective rank of the scaled Vandermonde coefficient matrix, its singular values, and the specified value of `rcond`. For more details, see `linalg.lstsq`. V : ndarray, shape (M,M) or (M,M,K) Present only if `full` = False and `cov`=True. The covariance matrix of the polynomial coefficient estimates. The diagonal of this matrix are the variance estimates for each coefficient. If y is a 2-D array, then the covariance matrix for the `k`-th data set are in ``V[:,:,k]`` Warns ----- RankWarning The rank of the coefficient matrix in the least-squares fit is deficient. The warning is only raised if `full` = False. The warnings can be turned off by >>> import warnings >>> warnings.simplefilter('ignore', np.RankWarning) See Also -------- polyval : Compute polynomial values. linalg.lstsq : Computes a least-squares fit. scipy.interpolate.UnivariateSpline : Computes spline fits. Notes ----- The solution minimizes the squared error .. math :: E = \\sum_{j=0}^k |p(x_j) - y_j|^2 in the equations:: x[0]**n * p[0] + ... + x[0] * p[n-1] + p[n] = y[0] x[1]**n * p[0] + ... + x[1] * p[n-1] + p[n] = y[1] ... x[k]**n * p[0] + ... + x[k] * p[n-1] + p[n] = y[k] The coefficient matrix of the coefficients `p` is a Vandermonde matrix. `polyfit` issues a `RankWarning` when the least-squares fit is badly conditioned. This implies that the best fit is not well-defined due to numerical error. The results may be improved by lowering the polynomial degree or by replacing `x` by `x` - `x`.mean(). The `rcond` parameter can also be set to a value smaller than its default, but the resulting fit may be spurious: including contributions from the small singular values can add numerical noise to the result. Note that fitting polynomial coefficients is inherently badly conditioned when the degree of the polynomial is large or the interval of sample points is badly centered. The quality of the fit should always be checked in these cases. When polynomial fits are not satisfactory, splines may be a good alternative. References ---------- .. [1] Wikipedia, "Curve fitting", http://en.wikipedia.org/wiki/Curve_fitting .. [2] Wikipedia, "Polynomial interpolation", http://en.wikipedia.org/wiki/Polynomial_interpolation Examples -------- >>> x = np.array([0.0, 1.0, 2.0, 3.0, 4.0, 5.0]) >>> y = np.array([0.0, 0.8, 0.9, 0.1, -0.8, -1.0]) >>> z = np.polyfit(x, y, 3) >>> z array([ 0.08703704, -0.81349206, 1.69312169, -0.03968254]) It is convenient to use `poly1d` objects for dealing with polynomials: >>> p = np.poly1d(z) >>> p(0.5) 0.6143849206349179 >>> p(3.5) -0.34732142857143039 >>> p(10) 22.579365079365115 High-order polynomials may oscillate wildly: >>> p30 = np.poly1d(np.polyfit(x, y, 30)) /... RankWarning: Polyfit may be poorly conditioned... >>> p30(4) -0.80000000000000204 >>> p30(5) -0.99999999999999445 >>> p30(4.5) -0.10547061179440398 Illustration: >>> import matplotlib.pyplot as plt >>> xp = np.linspace(-2, 6, 100) >>> _ = plt.plot(x, y, '.', xp, p(xp), '-', xp, p30(xp), '--') >>> plt.ylim(-2,2) (-2, 2) >>> plt.show() """ order = int(deg) + 1 x = NX.asarray(x) + 0.0 y = NX.asarray(y) + 0.0 # check arguments. if deg < 0: raise ValueError("expected deg >= 0") if x.ndim != 1: raise TypeError("expected 1D vector for x") if x.size == 0: raise TypeError("expected non-empty vector for x") if y.ndim < 1 or y.ndim > 2: raise TypeError("expected 1D or 2D array for y") if x.shape[0] != y.shape[0]: raise TypeError("expected x and y to have same length") # set rcond if rcond is None: rcond = len(x)*finfo(x.dtype).eps # set up least squares equation for powers of x lhs = vander(x, order) rhs = y # apply weighting if w is not None: w = NX.asarray(w) + 0.0 if w.ndim != 1: raise TypeError("expected a 1-d array for weights") if w.shape[0] != y.shape[0]: raise TypeError("expected w and y to have the same length") lhs *= w[:, NX.newaxis] if rhs.ndim == 2: rhs *= w[:, NX.newaxis] else: rhs *= w # scale lhs to improve condition number and solve scale = NX.sqrt((lhs*lhs).sum(axis=0)) lhs /= scale c, resids, rank, s = lstsq(lhs, rhs, rcond) c = (c.T/scale).T # broadcast scale coefficients # warn on rank reduction, which indicates an ill conditioned matrix if rank != order and not full: msg = "Polyfit may be poorly conditioned" warnings.warn(msg, RankWarning) if full: return c, resids, rank, s, rcond elif cov: Vbase = inv(dot(lhs.T, lhs)) Vbase /= NX.outer(scale, scale) # Some literature ignores the extra -2.0 factor in the denominator, but # it is included here because the covariance of Multivariate Student-T # (which is implied by a Bayesian uncertainty analysis) includes it. # Plus, it gives a slightly more conservative estimate of uncertainty. fac = resids / (len(x) - order - 2.0) if y.ndim == 1: return c, Vbase * fac else: return c, Vbase[:,:, NX.newaxis] * fac else: return c
def polyfit(x, y, deg, rcond=None, full=False): """Least squares polynomial fit. Required arguments x -- vector of sample points y -- vector or 2D array of values to fit deg -- degree of the fitting polynomial Keyword arguments rcond -- relative condition number of the fit (default len(x)*eps) full -- return full diagnostic output (default False) Returns full == False -- coefficients full == True -- coefficients, residuals, rank, singular values, rcond. Warns RankWarning -- if rank is reduced and not full output Do a best fit polynomial of degree 'deg' of 'x' to 'y'. Return value is a vector of polynomial coefficients [pk ... p1 p0]. Eg, for n=2 p2*x0^2 + p1*x0 + p0 = y1 p2*x1^2 + p1*x1 + p0 = y1 p2*x2^2 + p1*x2 + p0 = y2 ..... p2*xk^2 + p1*xk + p0 = yk Method: if X is a the Vandermonde Matrix computed from x (see http://mathworld.wolfram.com/VandermondeMatrix.html), then the polynomial least squares solution is given by the 'p' in X*p = y where X is a len(x) x N+1 matrix, p is a N+1 length vector, and y is a len(x) x 1 vector This equation can be solved as p = (XT*X)^-1 * XT * y where XT is the transpose of X and -1 denotes the inverse. However, this method is susceptible to rounding errors and generally the singular value decomposition is preferred and that is the method used here. The singular value method takes a paramenter, 'rcond', which sets a limit on the relative size of the smallest singular value to be used in solving the equation. This may result in lowering the rank of the Vandermonde matrix, in which case a RankWarning is issued. If polyfit issues a RankWarning, try a fit of lower degree or replace x by x - x.mean(), both of which will generally improve the condition number. The routine already normalizes the vector x by its maximum absolute value to help in this regard. The rcond parameter may also be set to a value smaller than its default, but this may result in bad fits. The current default value of rcond is len(x)*eps, where eps is the relative precision of the floating type being used, generally around 1e-7 and 2e-16 for IEEE single and double precision respectively. This value of rcond is fairly conservative but works pretty well when x - x.mean() is used in place of x. The warnings can be turned off by: >>> import numpy >>> import warnings >>> warnings.simplefilter('ignore',numpy.RankWarning) DISCLAIMER: Power series fits are full of pitfalls for the unwary once the degree of the fit becomes large or the interval of sample points is badly centered. The basic problem is that the powers x**n are generally a poor basis for the functions on the sample interval with the result that the Vandermonde matrix is ill conditioned and computation of the polynomial values is sensitive to coefficient error. The quality of the resulting fit should be checked against the data whenever the condition number is large, as the quality of polynomial fits *can not* be taken for granted. If all you want to do is draw a smooth curve through the y values and polyfit is not doing the job, try centering the sample range or look into scipy.interpolate, which includes some nice spline fitting functions that may be of use. For more info, see http://mathworld.wolfram.com/LeastSquaresFittingPolynomial.html, but note that the k's and n's in the superscripts and subscripts on that page. The linear algebra is correct, however. See also polyval """ order = int(deg) + 1 x = NX.asarray(x) + 0.0 y = NX.asarray(y) + 0.0 # check arguments. if deg < 0 : raise ValueError, "expected deg >= 0" if x.ndim != 1 or x.size == 0: raise TypeError, "expected non-empty vector for x" if y.ndim < 1 or y.ndim > 2 : raise TypeError, "expected 1D or 2D array for y" if x.shape[0] != y.shape[0] : raise TypeError, "expected x and y to have same length" # set rcond if rcond is None : xtype = x.dtype if xtype == NX.single or xtype == NX.csingle : rcond = len(x)*_single_eps else : rcond = len(x)*_double_eps # scale x to improve condition number scale = abs(x).max() if scale != 0 : x /= scale # solve least squares equation for powers of x v = vander(x, order) c, resids, rank, s = _lstsq(v, y, rcond) # warn on rank reduction, which indicates an ill conditioned matrix if rank != order and not full: msg = "Polyfit may be poorly conditioned" warnings.warn(msg, RankWarning) # scale returned coefficients if scale != 0 : c /= vander([scale], order)[0] if full : return c, resids, rank, s, rcond else : return c
def polyfit(x, y, deg, rcond=None, full=False, w=None, cov=False): """ Least squares polynomial fit. .. note:: This forms part of the old polynomial API. Since version 1.4, the new polynomial API defined in `numpy.polynomial` is preferred. A summary of the differences can be found in the :doc:`transition guide </reference/routines.polynomials>`. Fit a polynomial ``p(x) = p[0] * x**deg + ... + p[deg]`` of degree `deg` to points `(x, y)`. Returns a vector of coefficients `p` that minimises the squared error in the order `deg`, `deg-1`, ... `0`. The `Polynomial.fit <numpy.polynomial.polynomial.Polynomial.fit>` class method is recommended for new code as it is more stable numerically. See the documentation of the method for more information. Parameters ---------- x : array_like, shape (M,) x-coordinates of the M sample points ``(x[i], y[i])``. y : array_like, shape (M,) or (M, K) y-coordinates of the sample points. Several data sets of sample points sharing the same x-coordinates can be fitted at once by passing in a 2D-array that contains one dataset per column. deg : int Degree of the fitting polynomial rcond : float, optional Relative condition number of the fit. Singular values smaller than this relative to the largest singular value will be ignored. The default value is len(x)*eps, where eps is the relative precision of the float type, about 2e-16 in most cases. full : bool, optional Switch determining nature of return value. When it is False (the default) just the coefficients are returned, when True diagnostic information from the singular value decomposition is also returned. w : array_like, shape (M,), optional Weights. If not None, the weight ``w[i]`` applies to the unsquared residual ``y[i] - y_hat[i]`` at ``x[i]``. Ideally the weights are chosen so that the errors of the products ``w[i]*y[i]`` all have the same variance. When using inverse-variance weighting, use ``w[i] = 1/sigma(y[i])``. The default value is None. cov : bool or str, optional If given and not `False`, return not just the estimate but also its covariance matrix. By default, the covariance are scaled by chi2/dof, where dof = M - (deg + 1), i.e., the weights are presumed to be unreliable except in a relative sense and everything is scaled such that the reduced chi2 is unity. This scaling is omitted if ``cov='unscaled'``, as is relevant for the case that the weights are w = 1/sigma, with sigma known to be a reliable estimate of the uncertainty. Returns ------- p : ndarray, shape (deg + 1,) or (deg + 1, K) Polynomial coefficients, highest power first. If `y` was 2-D, the coefficients for `k`-th data set are in ``p[:,k]``. residuals, rank, singular_values, rcond These values are only returned if ``full == True`` - residuals -- sum of squared residuals of the least squares fit - rank -- the effective rank of the scaled Vandermonde coefficient matrix - singular_values -- singular values of the scaled Vandermonde coefficient matrix - rcond -- value of `rcond`. For more details, see `numpy.linalg.lstsq`. V : ndarray, shape (M,M) or (M,M,K) Present only if ``full == False`` and ``cov == True``. The covariance matrix of the polynomial coefficient estimates. The diagonal of this matrix are the variance estimates for each coefficient. If y is a 2-D array, then the covariance matrix for the `k`-th data set are in ``V[:,:,k]`` Warns ----- RankWarning The rank of the coefficient matrix in the least-squares fit is deficient. The warning is only raised if ``full == False``. The warnings can be turned off by >>> import warnings >>> warnings.simplefilter('ignore', np.RankWarning) See Also -------- polyval : Compute polynomial values. linalg.lstsq : Computes a least-squares fit. scipy.interpolate.UnivariateSpline : Computes spline fits. Notes ----- The solution minimizes the squared error .. math:: E = \\sum_{j=0}^k |p(x_j) - y_j|^2 in the equations:: x[0]**n * p[0] + ... + x[0] * p[n-1] + p[n] = y[0] x[1]**n * p[0] + ... + x[1] * p[n-1] + p[n] = y[1] ... x[k]**n * p[0] + ... + x[k] * p[n-1] + p[n] = y[k] The coefficient matrix of the coefficients `p` is a Vandermonde matrix. `polyfit` issues a `RankWarning` when the least-squares fit is badly conditioned. This implies that the best fit is not well-defined due to numerical error. The results may be improved by lowering the polynomial degree or by replacing `x` by `x` - `x`.mean(). The `rcond` parameter can also be set to a value smaller than its default, but the resulting fit may be spurious: including contributions from the small singular values can add numerical noise to the result. Note that fitting polynomial coefficients is inherently badly conditioned when the degree of the polynomial is large or the interval of sample points is badly centered. The quality of the fit should always be checked in these cases. When polynomial fits are not satisfactory, splines may be a good alternative. References ---------- .. [1] Wikipedia, "Curve fitting", https://en.wikipedia.org/wiki/Curve_fitting .. [2] Wikipedia, "Polynomial interpolation", https://en.wikipedia.org/wiki/Polynomial_interpolation Examples -------- >>> import warnings >>> x = np.array([0.0, 1.0, 2.0, 3.0, 4.0, 5.0]) >>> y = np.array([0.0, 0.8, 0.9, 0.1, -0.8, -1.0]) >>> z = np.polyfit(x, y, 3) >>> z array([ 0.08703704, -0.81349206, 1.69312169, -0.03968254]) # may vary It is convenient to use `poly1d` objects for dealing with polynomials: >>> p = np.poly1d(z) >>> p(0.5) 0.6143849206349179 # may vary >>> p(3.5) -0.34732142857143039 # may vary >>> p(10) 22.579365079365115 # may vary High-order polynomials may oscillate wildly: >>> with warnings.catch_warnings(): ... warnings.simplefilter('ignore', np.RankWarning) ... p30 = np.poly1d(np.polyfit(x, y, 30)) ... >>> p30(4) -0.80000000000000204 # may vary >>> p30(5) -0.99999999999999445 # may vary >>> p30(4.5) -0.10547061179440398 # may vary Illustration: >>> import matplotlib.pyplot as plt >>> xp = np.linspace(-2, 6, 100) >>> _ = plt.plot(x, y, '.', xp, p(xp), '-', xp, p30(xp), '--') >>> plt.ylim(-2,2) (-2, 2) >>> plt.show() """ order = int(deg) + 1 x = NX.asarray(x) + 0.0 y = NX.asarray(y) + 0.0 # check arguments. if deg < 0: raise ValueError("expected deg >= 0") if x.ndim != 1: raise TypeError("expected 1D vector for x") if x.size == 0: raise TypeError("expected non-empty vector for x") if y.ndim < 1 or y.ndim > 2: raise TypeError("expected 1D or 2D array for y") if x.shape[0] != y.shape[0]: raise TypeError("expected x and y to have same length") # set rcond if rcond is None: rcond = len(x)*finfo(x.dtype).eps # set up least squares equation for powers of x lhs = vander(x, order) rhs = y # apply weighting if w is not None: w = NX.asarray(w) + 0.0 if w.ndim != 1: raise TypeError("expected a 1-d array for weights") if w.shape[0] != y.shape[0]: raise TypeError("expected w and y to have the same length") lhs *= w[:, NX.newaxis] if rhs.ndim == 2: rhs *= w[:, NX.newaxis] else: rhs *= w # scale lhs to improve condition number and solve scale = NX.sqrt((lhs*lhs).sum(axis=0)) lhs /= scale c, resids, rank, s = lstsq(lhs, rhs, rcond) c = (c.T/scale).T # broadcast scale coefficients # warn on rank reduction, which indicates an ill conditioned matrix if rank != order and not full: msg = "Polyfit may be poorly conditioned" warnings.warn(msg, RankWarning, stacklevel=4) if full: return c, resids, rank, s, rcond elif cov: Vbase = inv(dot(lhs.T, lhs)) Vbase /= NX.outer(scale, scale) if cov == "unscaled": fac = 1 else: if len(x) <= order: raise ValueError("the number of data points must exceed order " "to scale the covariance matrix") # note, this used to be: fac = resids / (len(x) - order - 2.0) # it was deciced that the "- 2" (originally justified by "Bayesian # uncertainty analysis") is not what the user expects # (see gh-11196 and gh-11197) fac = resids / (len(x) - order) if y.ndim == 1: return c, Vbase * fac else: return c, Vbase[:,:, NX.newaxis] * fac else: return c
def polyfit(x, y, deg, rcond=None, full=False, w=None, cov=False): import numpy.core.numeric as NX from numpy.core import isscalar, abs, dot from numpy.lib.twodim_base import diag, vander from numpy.linalg import eigvals, lstsq, inv try: from numpy.core import finfo # 1.7 except: from numpy.lib.getlimits import finfo # 1.3 support for cluster order = int(deg) + 1 x = NX.asarray(x) + 0.0 y = NX.asarray(y) + 0.0 # check arguments. if deg < 0 : raise ValueError("expected deg >= 0") if x.ndim != 1: raise TypeError("expected 1D vector for x") if x.size == 0: raise TypeError("expected non-empty vector for x") if y.ndim < 1 or y.ndim > 2 : raise TypeError("expected 1D or 2D array for y") if x.shape[0] != y.shape[0] : raise TypeError("expected x and y to have same length") # set rcond if rcond is None : rcond = len(x)*finfo(x.dtype).eps # set up least squares equation for powers of x lhs = vander(x, order) rhs = y # apply weighting if w is not None: w = NX.asarray(w) + 0.0 if w.ndim != 1: raise TypeError, "expected a 1-d array for weights" if w.shape[0] != y.shape[0] : raise TypeError, "expected w and y to have the same length" lhs *= w[:, NX.newaxis] if rhs.ndim == 2: rhs *= w[:, NX.newaxis] else: rhs *= w # scale lhs to improve condition number and solve scale = NX.sqrt((lhs*lhs).sum(axis=0)) lhs /= scale c, resids, rank, s = lstsq(lhs, rhs, rcond) c = (c.T/scale).T # broadcast scale coefficients # warn on rank reduction, which indicates an ill conditioned matrix if rank != order and not full: msg = "Polyfit may be poorly conditioned" warnings.warn(msg, RankWarning) if full : return c, resids, rank, s, rcond elif cov : Vbase = inv(dot(lhs.T,lhs)) Vbase /= NX.outer(scale, scale) # Some literature ignores the extra -2.0 factor in the denominator, but # it is included here because the covariance of Multivariate Student-T # (which is implied by a Bayesian uncertainty analysis) includes it. # Plus, it gives a slightly more conservative estimate of uncertainty. fac = resids / (len(x) - order - 2.0) if y.ndim == 1: return c, Vbase * fac else: return c, Vbase[:,:,NX.newaxis] * fac else : return c
def polyfit(x, y, deg, rcond=None, full=False, w=None, cov=False): import numpy.core.numeric as NX from numpy.core import isscalar, abs, dot from numpy.lib.twodim_base import diag, vander from numpy.linalg import eigvals, lstsq, inv try: from numpy.core import finfo # 1.7 except: from numpy.lib.getlimits import finfo # 1.3 support for cluster order = int(deg) + 1 x = NX.asarray(x) + 0.0 y = NX.asarray(y) + 0.0 # check arguments. if deg < 0: raise ValueError("expected deg >= 0") if x.ndim != 1: raise TypeError("expected 1D vector for x") if x.size == 0: raise TypeError("expected non-empty vector for x") if y.ndim < 1 or y.ndim > 2: raise TypeError("expected 1D or 2D array for y") if x.shape[0] != y.shape[0]: raise TypeError("expected x and y to have same length") # set rcond if rcond is None: rcond = len(x) * finfo(x.dtype).eps # set up least squares equation for powers of x lhs = vander(x, order) rhs = y # apply weighting if w is not None: w = NX.asarray(w) + 0.0 if w.ndim != 1: raise TypeError, "expected a 1-d array for weights" if w.shape[0] != y.shape[0]: raise TypeError, "expected w and y to have the same length" lhs *= w[:, NX.newaxis] if rhs.ndim == 2: rhs *= w[:, NX.newaxis] else: rhs *= w # scale lhs to improve condition number and solve scale = NX.sqrt((lhs * lhs).sum(axis=0)) lhs /= scale c, resids, rank, s = lstsq(lhs, rhs, rcond) c = (c.T / scale).T # broadcast scale coefficients # warn on rank reduction, which indicates an ill conditioned matrix if rank != order and not full: msg = "Polyfit may be poorly conditioned" warnings.warn(msg, RankWarning) if full: return c, resids, rank, s, rcond elif cov: Vbase = inv(dot(lhs.T, lhs)) Vbase /= NX.outer(scale, scale) # Some literature ignores the extra -2.0 factor in the denominator, but # it is included here because the covariance of Multivariate Student-T # (which is implied by a Bayesian uncertainty analysis) includes it. # Plus, it gives a slightly more conservative estimate of uncertainty. fac = resids / (len(x) - order - 2.0) if y.ndim == 1: return c, Vbase * fac else: return c, Vbase[:, :, NX.newaxis] * fac else: return c