def diagsvd(s,M,N): """Return sigma from singular values and original size M,N.""" part = diag(s) typ = part.typecode() MorN = len(s) if MorN == M: return c_[part,zeros((M,N-M),typ)] elif MorN == N: return r_[part,zeros((M-N,N),typ)] else: raise ValueError, "Length of s must be M or N."
def factorial2(n,exact=0): """n!! = special.gamma(n/2+1)*2**((m+1)/2)/sqrt(pi) n odd = 2**(n) * n! n even If exact==0, then floating point precision is used, otherwise exact long integer is computed. Notes: - Array argument accepted only for exact=0 case. - If n<0, the return value is 0. """ if exact: if n < -1: return 0L if n <= 0: return 1L n = long(n) val = 1L k = n while (k > 0): val = val*k k -= 2 return val else: n = asarray(n) vals = zeros(n.shape,'d') cond1 = (n % 2) & (n >= -1) cond2 = (1-(n % 2)) & (n >= -1) oddn = extract(cond1,n) evenn = extract(cond2,n) nd2o = oddn / 2.0 nd2e = evenn / 2.0 insert(vals,cond1,special.gamma(nd2o+1)/sqrt(pi)*pow(2.0,nd2o+0.5)) insert(vals,cond2,special.gamma(nd2e+1) * pow(2.0,nd2e)) return vals
def kstat(data,n=2): """Return the nth k-statistic (1<=n<=4 so far). The nth k-statistic is the unique symmetric unbiased estimator of the nth cumulant kappa_n """ if n>4 or n<1: raise ValueError, "k-statistics only supported for 1<=n<=4" n = int(n) S = zeros(n+1,'d') data = ravel(data) N = len(data) for k in range(1,n+1): S[k] = sum(data**k) if n==1: return S[1]*1.0/N elif n==2: return (N*S[2]-S[1]**2.0)/(N*(N-1.0)) elif n==3: return (2*S[1]**3 - 3*N*S[1]*S[2]+N*N*S[3]) / (N*(N-1.0)*(N-2.0)) elif n==4: return (-6*S[1]**4 + 12*N*S[1]**2 * S[2] - 3*N*(N-1.0)*S[2]**2 - \ 4*N*(N+1)*S[1]*S[3] + N*N*(N+1)*S[4]) / \ (N*(N-1.0)*(N-2.0)*(N-3.0)) else: raise ValueError, "Should not be here."
def hankel(c,r=None): """ Construct a hankel matrix (i.e. matrix with constant anti-diagonals). Description: hankel(c,r) is a Hankel matrix whose first column is c and whose last row is r. hankel(c) is a square Hankel matrix whose first column is C. Elements below the first anti-diagonal are zero. See also: toeplitz """ isscalar = scipy_base.isscalar if isscalar(c) or isscalar(r): return c if r is None: r = zeros(len(c)) elif r[0] != c[-1]: print "Warning: column and row values don't agree; column value used." r,c = map(asarray_chkfinite,(r,c)) r,c = map(ravel,(r,c)) rN,cN = map(len,(r,c)) vals = r_[c, r[1:rN]] cols = mgrid[1:cN+1] rows = mgrid[0:rN] indx = cols[:,NewAxis]*ones((1,rN)) + \ rows[NewAxis,:]*ones((cN,1)) - 1 return take(vals, indx)
def probplot(x, sparams=(), dist='norm', fit=1, plot=None): """Return (osm, osr){,(scale,loc,r)} where (osm, osr) are order statistic medians and ordered response data respectively so that plot(osm, osr) is a probability plot. If fit==1, then do a regression fit and compute the slope (scale), intercept (loc), and correlation coefficient (r), of the best straight line through the points. If fit==0, only (osm, osr) is returned. sparams is a tuple of shape parameter arguments for the distribution. """ N = len(x) Ui = zeros(N)*1.0 Ui[-1] = 0.5**(1.0/N) Ui[0] = 1-Ui[-1] i = arange(2,N) Ui[1:-1] = (i-0.3175)/(N+0.365) try: ppf_func = eval('distributions.%s.ppf'%dist) except AttributError: raise dist, "is not a valid distribution with a ppf." if sparams is None: sparams = () if isscalar(sparams): sparams = (sparams,) if not isinstance(sparams,types.TupleType): sparams = tuple(sparams) res = inspect.getargspec(ppf_func) if not ('loc' == res[0][-2] and 'scale' == res[0][-1] and \ 0.0==res[-1][-2] and 1.0==res[-1][-1]): raise ValueError, "Function has does not have default location", \ "and scale parameters\n that are 0.0 and 1.0 respectively." if (len(sparams) < len(res[0])-len(res[-1])-1) or \ (len(sparams) > len(res[0])-3): raise ValueError, "Incorrect number of shape parameters." osm = ppf_func(Ui,*sparams) osr = sort(x) if fit or (plot is not None): # perform a linear fit. slope, intercept, r, prob, sterrest = stats.linregress(osm,osr) if plot is not None: try: import scipy.xplt as xplt xplt.limits() except: pass plot.plot(osm, osr, 'o', osm, slope*osm + intercept) plot.title('Probability Plot') plot.xlabel('Order Statistic Medians') plot.ylabel('Ordered Values') try: plot.expand_limits(5) except: pass xmin,xmax= amin(osm),amax(osm) ymin,ymax= amin(x),amax(x) pos = xmin+0.70*(xmax-xmin), ymin+0.01*(ymax-ymin) try: plot.addtext("r^2^=%1.4f" % r, xy=pos,tosys=1) except: pass if fit: return (osm, osr), (slope, intercept, r) else: return osm, osr
def lstsq(a, b, cond=None, overwrite_a=0, overwrite_b=0): """ lstsq(a, b, cond=None, overwrite_a=0, overwrite_b=0) -> x,resids,rank,s Return least-squares solution of a * x = b. Inputs: a -- An M x N matrix. b -- An M x nrhs matrix or M vector. cond -- Used to determine effective rank of a. Outputs: x -- The solution (N x nrhs matrix) to the minimization problem: 2-norm(| b - a * x |) -> min resids -- The residual sum-of-squares for the solution matrix x (only if M>N and rank==N). rank -- The effective rank of a. s -- Singular values of a in decreasing order. The condition number of a is abs(s[0]/s[-1]). """ a1, b1 = map(asarray_chkfinite,(a,b)) if len(a1.shape) != 2: raise ValueError, 'expected matrix' m,n = a1.shape if len(b1.shape)==2: nrhs = b1.shape[1] else: nrhs = 1 if m != b1.shape[0]: raise ValueError, 'incompatible dimensions' gelss, = get_lapack_funcs(('gelss',),(a1,b1)) if n>m: # need to extend b matrix as it will be filled with # a larger solution matrix b2 = zeros((n,nrhs),gelss.typecode) if len(b1.shape)==2: b2[:m,:] = b1 else: b2[:m,0] = b1 b1 = b2 overwrite_a = overwrite_a or (a1 is not a and not hasattr(a,'__array__')) overwrite_b = overwrite_b or (b1 is not b and not hasattr(b,'__array__')) if gelss.module_name[:7] == 'flapack': lwork = calc_lwork.gelss(gelss.prefix,m,n,nrhs)[1] v,x,s,rank,info = gelss(a1,b1,cond = cond, lwork = lwork, overwrite_a = overwrite_a, overwrite_b = overwrite_b) else: raise NotImplementedError,'calling gelss from %s' % (gelss.module_name) if info>0: raise LinAlgError, "SVD did not converge in Linear Least Squares" if info<0: raise ValueError,\ 'illegal value in %-th argument of internal gelss'%(-info) resids = asarray([],x.typecode()) if n<m: x1 = x[:n] if rank==n: resids = sum(x[n:]**2) x = x1 return x,resids,rank,s
def radon(arr,theta=None): if theta is None: theta = mgrid[0:180] s = zeros((arr.shape[1],len(theta)),'d') k = 0 for th in theta: im = imrotate(arr,-th) s[:,k] = sum(im,axis=0) k += 1 return s
def hessenberg(a,calc_q=0,overwrite_a=0): """ Compute Hessenberg form of a matrix. Inputs: a -- the matrix calc_q -- if non-zero then calculate unitary similarity transformation matrix q. overwrite_a=0 -- if non-zero then discard the contents of a, i.e. a is used as a work array if possible. Outputs: h -- Hessenberg form of a [calc_q=0] h, q -- matrices such that a = q * h * q^T [calc_q=1] """ a1 = asarray(a) if len(a1.shape) != 2 or (a1.shape[0] != a1.shape[1]): raise ValueError, 'expected square matrix' overwrite_a = overwrite_a or (a1 is not a and not hasattr(a,'__array__')) gehrd,gebal = get_lapack_funcs(('gehrd','gebal'),(a1,)) ba,lo,hi,pivscale,info = gebal(a,permute=1,overwrite_a = overwrite_a) if info<0: raise ValueError,\ 'illegal value in %-th argument of internal gebal (hessenberg)'%(-info) n = len(a1) lwork = calc_lwork.gehrd(gehrd.prefix,n,lo,hi) hq,tau,info = gehrd(ba,lo=lo,hi=hi,lwork=lwork,overwrite_a=1) if info<0: raise ValueError,\ 'illegal value in %-th argument of internal gehrd (hessenberg)'%(-info) if not calc_q: for i in range(lo,hi): hq[i+2:hi+1,i] = 0.0 return hq # XXX: Use ORGHR routines to compute q. ger,gemm = get_blas_funcs(('ger','gemm'),(hq,)) typecode = hq.typecode() q = None for i in range(lo,hi): if tau[i]==0.0: continue v = zeros(n,typecode=typecode) v[i+1] = 1.0 v[i+2:hi+1] = hq[i+2:hi+1,i] hq[i+2:hi+1,i] = 0.0 h = ger(-tau[i],v,v,a=diag(ones(n,typecode=typecode)),overwrite_a=1) if q is None: q = h else: q = gemm(1.0,q,h) if q is None: q = diag(ones(n,typecode=typecode)) return hq,q
def _make_complex_eigvecs(w,vin,cmplx_tcode): v = scipy_base.array(vin,typecode=cmplx_tcode) ind = scipy_base.nonzero(scipy_base.not_equal(w.imag,0.0)) vnew = scipy_base.zeros((v.shape[0],len(ind)>>1),cmplx_tcode) vnew.real = scipy_base.take(vin,ind[::2],1) vnew.imag = scipy_base.take(vin,ind[1::2],1) count = 0 conj = scipy_base.conjugate for i in range(len(ind)/2): v[:,ind[2*i]] = vnew[:,count] v[:,ind[2*i+1]] = conj(vnew[:,count]) count += 1 return v
def bartlett(*args): """Perform Bartlett test with the null hypothesis that all input samples have equal variances. Inputs are sample vectors: bartlett(x,y,z,...) Outputs: (T, pval) T -- the Test statistic pval -- significance level if null is rejected with this value of T (prob. that null is true but rejected with this p-value.) Sensitive to departures from normality. The Levene test is an alternative that is less sensitive to departures from normality. References: http://www.itl.nist.gov/div898/handbook/eda/section3/eda357.htm Snedecor, George W. and Cochran, William G. (1989), Statistical Methods, Eighth Edition, Iowa State University Press. """ k = len(args) if k < 2: raise ValueError, "Must enter at least two input sample vectors." Ni = zeros(k) ssq = zeros(k,'d') for j in range(k): Ni[j] = len(args[j]) ssq[j] = stats.var(args[j]) Ntot = sum(Ni) spsq = sum((Ni-1)*ssq)/(1.0*(Ntot-k)) numer = (Ntot*1.0-k)*log(spsq) - sum((Ni-1.0)*log(ssq)) denom = 1.0 + (1.0/(3*(k-1)))*((sum(1.0/(Ni-1.0)))-1.0/(Ntot-k)) T = numer / denom pval = distributions.chi2.sf(T,k-1) # 1 - cdf return T, pval
def _fix_shape(x, n, axis): """ Internal auxiliary function for _raw_fft, _raw_fftnd.""" s = list(x.shape) if s[axis] > n: index = [slice(None)]*len(s) index[axis] = slice(0,n) x = x[index] else: index = [slice(None)]*len(s) index[axis] = slice(0,s[axis]) s[axis] = n z = zeros(s,x.typecode()) z[index] = x x = z return x
def qr(a,overwrite_a=0,lwork=None): """QR decomposition of an M x N matrix a. Description: Find a unitary matrix, q, and an upper-trapezoidal matrix r such that q * r = a Inputs: a -- the matrix overwrite_a=0 -- if non-zero then discard the contents of a, i.e. a is used as a work array if possible. lwork=None -- >= shape(a)[1]. If None (or -1) compute optimal work array size. Outputs: q, r -- matrices such that q * r = a """ a1 = asarray_chkfinite(a) if len(a1.shape) != 2: raise ValueError, 'expected matrix' M,N = a1.shape overwrite_a = overwrite_a or (a1 is not a and not hasattr(a,'__array__')) geqrf, = get_lapack_funcs(('geqrf',),(a1,)) if lwork is None or lwork == -1: # get optimal work array qr,tau,work,info = geqrf(a1,lwork=-1,overwrite_a=1) lwork = work[0] qr,tau,work,info = geqrf(a1,lwork=lwork,overwrite_a=overwrite_a) if info<0: raise ValueError,\ 'illegal value in %-th argument of internal geqrf'%(-info) gemm, = get_blas_funcs(('gemm',),(qr,)) t = qr.typecode() R = basic.triu(qr) Q = scipy_base.identity(M,typecode=t) ident = scipy_base.identity(M,typecode=t) zeros = scipy_base.zeros for i in range(min(M,N)): v = zeros((M,),t) v[i] = 1 v[i+1:M] = qr[i+1:M,i] H = gemm(-tau[i],v,v,1+0j,ident,trans_b=2) Q = gemm(1,Q,H) return Q, R
def pinv2(a, cond=None): """ pinv2(a, cond=None) -> a_pinv Compute the generalized inverse of A using svd. """ a = asarray_chkfinite(a) u, s, vh = decomp.svd(a) t = u.typecode() if cond in [None,-1]: cond = {0: feps*1e3, 1: eps*1e6}[_array_precision[t]] m,n = a.shape cutoff = cond*scipy_base.maximum.reduce(s) psigma = zeros((m,n),t) for i in range(len(s)): if s[i] > cutoff: psigma[i,i] = 1.0/conjugate(s[i]) #XXX: use lapack/blas routines for dot return transpose(conjugate(dot(dot(u,psigma),vh)))
def boxcox_normmax(x,brack=(-1.0,1.0)): N = len(x) # compute uniform median statistics Ui = zeros(N)*1.0 Ui[-1] = 0.5**(1.0/N) Ui[0] = 1-Ui[-1] i = arange(2,N) Ui[1:-1] = (i-0.3175)/(N+0.365) # this function computes the x-axis values of the probability plot # and computes a linear regression (including the correlation) # and returns 1-r so that a minimization function maximizes the # correlation xvals = distributions.norm.ppf(Ui) def tempfunc(lmbda, xvals, samps): y = boxcox(samps,lmbda) yvals = sort(y) r, prob = stats.pearsonr(xvals, yvals) return 1-r return optimize.brent(tempfunc, brack=brack, args=(xvals, x))
def pade(an, m): """Given Taylor series coefficients in an, return a Pade approximation to the function as the ratio of two polynomials p / q where the order of q is m. """ an = asarray(an) N = len(an) - 1 n = N-m if (n < 0): raise ValueError, \ "Order of q <m> must be smaller than len(an)-1." Akj = eye(N+1,n+1) Bkj = zeros((N+1,m),'d') for row in range(1,m+1): Bkj[row,:row] = -(an[:row])[::-1] for row in range(m+1,N+1): Bkj[row,:] = -(an[row-m:row])[::-1] C = hstack((Akj,Bkj)) pq = dot(linalg.inv(C),an) p = pq[:n+1] q = r_[1.0,pq[n+1:]] return poly1d(p[::-1]), poly1d(q[::-1])
def ppcc_max(x, brack=(0.0,1.0), dist='tukeylambda'): """Returns the shape parameter that maximizes the probability plot correlation coefficient for the given data to a one-parameter family of distributions. See also ppcc_plot """ try: ppf_func = eval('distributions.%s.ppf'%dist) except AttributError: raise dist, "is not a valid distribution with a ppf." res = inspect.getargspec(ppf_func) if not ('loc' == res[0][-2] and 'scale' == res[0][-1] and \ 0.0==res[-1][-2] and 1.0==res[-1][-1]): raise ValueError, "Function has does not have default location", \ "and scale parameters\n that are 0.0 and 1.0 respectively." if (1 < len(res[0])-len(res[-1])-1) or \ (1 > len(res[0])-3): raise ValueError, "Must be a one-parameter family." N = len(x) # compute uniform median statistics Ui = zeros(N)*1.0 Ui[-1] = 0.5**(1.0/N) Ui[0] = 1-Ui[-1] i = arange(2,N) Ui[1:-1] = (i-0.3175)/(N+0.365) osr = sort(x) # this function computes the x-axis values of the probability plot # and computes a linear regression (including the correlation) # and returns 1-r so that a minimization function maximizes the # correlation def tempfunc(shape, mi, yvals, func): xvals = func(mi, shape) r, prob = stats.pearsonr(xvals, yvals) return 1-r return optimize.brent(tempfunc, brack=brack, args=(Ui, osr, ppf_func))
def shapiro(x,a=None,reta=0): """Shapiro and Wilk test for normality. Given random variates x, compute the W statistic and its p-value for a normality test. If p-value is high, one cannot reject the null hypothesis of normality with this test. P-value is probability that the W statistic is as low as it is if the samples are actually from a normal distribution. Output: W statistic and its p-value if reta is nonzero then also return the computed "a" values as the third output. If these are known for a given size they can be given as input instead of computed internally. """ N = len(x) if N < 3: raise ValueError, "Data must be at least length 3." if a is None: a = zeros(N,'f') init = 0 else: assert(len(a) == N/2), "a must be == len(x)/2" init = 1 y = sort(x) a,w,pw,ifault = statlib.swilk(y,a[:N/2],init) if not ifault in [0,2]: print ifault if N > 5000: print "p-value may not be accurate for N > 5000." if reta: return w, pw, a else: return w, pw
Inputs: (l,u) -- number of non-zero lower and upper diagonals, respectively. a -- An N x (l+u+1) matrix. b -- An N x nrhs matrix or N vector. overwrite_y - Discard data in y, where y is ab or b. Outputs: x -- The solution to the system a * x = b """ a1, b1 = map(asarray_chkfinite,(ab,b)) overwrite_b = overwrite_b or (b1 is not b and not hasattr(b,'__array__')) gbsv, = get_lapack_funcs(('gbsv',),(a1,b1)) a2 = zeros((2*l+u+1,a1.shape[1]),gbsv.typecode) a2[l:,:] = a1 lu,piv,x,info = gbsv(l,u,a2,b1, overwrite_ab=1, overwrite_b=overwrite_b) if info==0: return x if info>0: raise LinAlgError, "singular matrix" raise ValueError,\ 'illegal value in %-th argument of internal gbsv'%(-info) # matrix inversion def inv(a, overwrite_a=0): """ inv(a, overwrite_a=0) -> a_inv
def levene(*args,**kwds): """Perform Levene test with the null hypothesis that all input samples have equal variances. Inputs are sample vectors: bartlett(x,y,z,...) One keyword input, center, can be used with values center = 'mean', center='median' (default), center='trimmed' center='median' is recommended for skewed (non-normal) distributions center='mean' is recommended for symmetric, moderate-tailed, dist. center='trimmed' is recommended for heavy-tailed distributions. Outputs: (W, pval) W -- the Test statistic pval -- significance level if null is rejected with this value of W (prob. that null is true but rejected with this p-value.) References: http://www.itl.nist.gov/div898/handbook/eda/section3/eda35a.htm Levene, H. (1960). In Contributions to Probability and Statistics: Essays in Honor of Harold Hotelling, I. Olkin et al. eds., Stanford University Press, pp. 278-292. Brown, M. B. and Forsythe, A. B. (1974), Journal of the American Statistical Association, 69, 364-367 """ k = len(args) if k < 2: raise ValueError, "Must enter at least two input sample vectors." Ni = zeros(k) Yci = zeros(k,'d') if 'center' in kwds.keys(): center = kwds['center'] else: center = 'median' if not center in ['mean','median','trimmed']: raise ValueError, "Keyword argument <center> must be 'mean', 'median'"\ + "or 'trimmed'." if center == 'median': func = stats.median elif center == 'mean': func = stats.mean else: func = stats.trim_mean for j in range(k): Ni[j] = len(args[j]) Yci[j] = func(args[j]) Ntot = sum(Ni) # compute Zij's Zij = [None]*k for i in range(k): Zij[i] = abs(asarray(args[i])-Yci[i]) # compute Zbari Zbari = zeros(k,'d') Zbar = 0.0 for i in range(k): Zbari[i] = stats.mean(Zij[i]) Zbar += Zbari[i]*Ni[i] Zbar /= Ntot numer = (Ntot-k)*sum(Ni*(Zbari-Zbar)**2) # compute denom_variance dvar = 0.0 for i in range(k): dvar += sum((Zij[i]-Zbari[i])**2) denom = (k-1.0)*dvar W = numer / denom pval = distributions.f.sf(W,k-1,Ntot-k) # 1 - cdf return W, pval
def bicg(A,b,x0=None,tol=1e-5,maxiter=None,xtype=None): """Use BIConjugate Gradient iteration to solve A x = b Inputs: A -- An array or an object with matvec(x) and rmatvec(x) methods to represent A * x and A^H * x respectively. May also have psolve(b) and rpsolve(b) methods for representing solutions to the preconditioning equations M * x = b and M^H * x = b respectively. b -- An n-length vector Outputs: x -- The converged solution info -- output result 0 : successful exit >0 : convergence to tolerance not achieved, number of iterations <0 : illegal input or breakdown Optional Inputs: x0 -- (0) default starting guess tol -- (1e-5) relative tolerance to achieve maxiter -- (10*n) maximum number of iterations xtype -- The type of the result. If None, then it will be determined from A.typecode() and b. If A does not have a typecode method then it will compute A.matvec(x0) to get a typecode. To save the extra computation when A does not have a typecode attribute use xtype=0 for the same type as b or use xtype='f','d','F',or 'D' """ b = sb.asarray(b)+0.0 n = len(b) if maxiter is None: maxiter = n*10 x = x0 if x is None: x = sb.zeros(n) if xtype is None: try: atyp = A.typecode() except AttributeError: atyp = None if atyp is None: atyp = A.matvec(x).typecode() typ = _coerce_rules[b.typecode(),atyp] elif xtype == 0: typ = b.typecode() else: typ = xtype if typ not in 'fdFD': raise ValueError, "xtype must be 'f', 'd', 'F', or 'D'" x = sb.asarray(x,typ) b = sb.asarray(b,typ) matvec, psolve, rmatvec, rpsolve = (None,)*4 ltr = _type_conv[typ] revcom = _iterative.__dict__[ltr+'bicgrevcom'] stoptest = _iterative.__dict__[ltr+'stoptest2'] resid = tol ndx1 = 1 ndx2 = -1 work = sb.zeros(6*n,typ) ijob = 1 info = 0 ftflag = True bnrm2 = -1.0 iter_ = maxiter while 1: x, iter_, resid, info, ndx1, ndx2, sclr1, sclr2, ijob = \ revcom(b, x, work, iter_, resid, info, ndx1, ndx2, ijob) slice1 = slice(ndx1-1, ndx1-1+n) slice2 = slice(ndx2-1, ndx2-1+n) if (ijob == -1): break elif (ijob == 1): if matvec is None: matvec = get_matvec(A) work[slice2] *= sclr2 work[slice2] += sclr1*matvec(work[slice1]) elif (ijob == 2): if rmatvec is None: rmatvec = get_rmatvec(A) work[slice2] *= sclr2 work[slice2] += sclr1*rmatvec(work[slice1]) elif (ijob == 3): if psolve is None: psolve = get_psolve(A) work[slice1] = psolve(work[slice2]) elif (ijob == 4): if rpsolve is None: rpsolve = get_rpsolve(A) work[slice1] = rpsolve(work[slice2]) elif (ijob == 5): if matvec is None: matvec = get_matvec(A) work[slice2] *= sclr2 work[slice2] += sclr1*matvec(x) elif (ijob == 6): if ftflag: info = -1 ftflag = False bnrm2, resid, info = stoptest(work[slice1], b, bnrm2, tol, info) ijob = 2 return x, info