def boxcox_llf(lmb, data): """The boxcox log-likelihood function. """ N = len(data) y = boxcox(data,lmb) my = stats.mean(y) f = (lmb-1)*sum(log(data)) f -= N/2.0*log(sum((y-my)**2.0/N)) return f
def boxcox(x,lmbda=None,alpha=None): """Return a positive dataset tranformed by a Box-Cox power transformation. If lmbda is not None, do the transformation for that value. If lmbda is None, find the lambda that maximizes the log-likelihood function and return it as the second output argument. If alpha is not None, return the 100(1-alpha)% confidence interval for lambda as the third output argument. """ if any(x < 0): raise ValueError, "Data must be positive." if lmbda is not None: # single transformation lmbda = lmbda*(x==x) y = where(lmbda == 0, log(x), (x**lmbda - 1)/lmbda) return y # Otherwise find the lmbda that maximizes the log-likelihood function. def tempfunc(lmb, data): # function to minimize return -boxcox_llf(lmb,data) lmax = optimize.brent(tempfunc, brack=(-2.0,2.0),args=(x,)) y, lmax = boxcox(x, lmax) if alpha is None: return y, lmax # Otherwise find confidence interval interval = _boxcox_conf_interval(x, lmax, alpha) return y, lmax, interval
def bartlett(*args): """Perform Bartlett test with the null hypothesis that all input samples have equal variances. Inputs are sample vectors: bartlett(x,y,z,...) Outputs: (T, pval) T -- the Test statistic pval -- significance level if null is rejected with this value of T (prob. that null is true but rejected with this p-value.) Sensitive to departures from normality. The Levene test is an alternative that is less sensitive to departures from normality. References: http://www.itl.nist.gov/div898/handbook/eda/section3/eda357.htm Snedecor, George W. and Cochran, William G. (1989), Statistical Methods, Eighth Edition, Iowa State University Press. """ k = len(args) if k < 2: raise ValueError, "Must enter at least two input sample vectors." Ni = zeros(k) ssq = zeros(k,'d') for j in range(k): Ni[j] = len(args[j]) ssq[j] = stats.var(args[j]) Ntot = sum(Ni) spsq = sum((Ni-1)*ssq)/(1.0*(Ntot-k)) numer = (Ntot*1.0-k)*log(spsq) - sum((Ni-1.0)*log(ssq)) denom = 1.0 + (1.0/(3*(k-1)))*((sum(1.0/(Ni-1.0)))-1.0/(Ntot-k)) T = numer / denom pval = distributions.chi2.sf(T,k-1) # 1 - cdf return T, pval
def anderson(x,dist='norm'): """Anderson and Darling test for normal, exponential, or Gumbel (Extreme Value Type I) distribution. Given samples x, return A2, the Anderson-Darling statistic, the significance levels in percentages, and the corresponding critical values. Critical values provided are for the following significance levels norm/expon: 15%, 10%, 5%, 2.5%, 1% Gumbel: 25%, 10%, 5%, 2.5%, 1% logistic: 25%, 10%, 5%, 2.5%, 1%, 0.5% If A2 is larger than these critical values then for that significance level, the hypothesis that the data come from a normal (exponential) can be rejected. """ if not dist in ['norm','expon','gumbel','extreme1','logistic']: raise ValueError, "Invalid distribution." y = sort(x) xbar = stats.mean(x) N = len(y) if dist == 'norm': s = stats.std(x) w = (y-xbar)/s z = distributions.norm.cdf(w) sig = array([15,10,5,2.5,1]) critical = around(_Avals_norm / (1.0 + 4.0/N - 25.0/N/N),3) elif dist == 'expon': w = y / xbar z = distributions.expon.cdf(w) sig = array([15,10,5,2.5,1]) critical = around(_Avals_expon / (1.0 + 0.6/N),3) elif dist == 'logistic': def rootfunc(ab,xj,N): a,b = ab tmp = (xj-a)/b tmp2 = exp(tmp) val = [sum(1.0/(1+tmp2))-0.5*N, sum(tmp*(1.0-tmp2)/(1+tmp2))+N] return array(val) sol0=array([xbar,stats.std(x)]) sol = optimize.fsolve(rootfunc,sol0,args=(x,N),xtol=1e-5) w = (y-sol[0])/sol[1] z = distributions.logistic.cdf(w) sig = array([25,10,5,2.5,1,0.5]) critical = around(_Avals_logistic / (1.0+0.25/N),3) else: def fixedsolve(th,xj,N): val = stats.sum(xj)*1.0/N tmp = exp(-xj/th) term = sum(xj*tmp) term /= sum(tmp) return val - term s = optimize.fixed_point(fixedsolve, 1.0, args=(x,N),xtol=1e-5) xbar = -s*log(sum(exp(-x/s))*1.0/N) w = (y-xbar)/s z = distributions.gumbel_l.cdf(w) sig = array([25,10,5,2.5,1]) critical = around(_Avals_gumbel / (1.0 + 0.2/sqrt(N)),3) i = arange(1,N+1) S = sum((2*i-1.0)/N*(log(z)+log(1-z[::-1]))) A2 = -N-S return A2, critical, sig