def column_degeneracy(a,cutoff=.5): """Returns the number of characters that's needed to cover >= cutoff a: Numeric array cutoff: number that should be covered in the array Example: [ [.1 .8 .3], [.3 .2 .3], [.6 0 .4]] if cutoff = .75: column_degeneracy -> [2,1,3] if cutoff = .45: column_degeneracy -> [1,1,2] WARNING: watch out with floating point numbers. if the cutoff= 0.9 and in the array is also 0.9, it might not be found >>> searchsorted(cumsum(array([.6,.3,.1])),.9) 2 >>> searchsorted(cumsum(array([.5,.4,.1])),.9) 1 If the cutoff value is not found, the result is clipped to the number of rows in the array. """ if not a: return [] b = cumsum(sort(a,0)[::-1]) try: degen = [searchsorted(b[:,idx],cutoff) for idx in range(len(b[0]))] except TypeError: raise ValueError, "Array has to be two dimensional" #degen contains now the indices at which the cutoff was hit #to change to the number of characters, add 1 return clip(array(degen)+1,0,a.shape[0])
def row_degeneracy(a,cutoff=.5): """Returns the number of characters that's needed to cover >= cutoff a: Numeric array cutoff: number that should be covered in the array Example: [ [.1 .3 .4 .2], [.5 .3 0 .2], [.8 0 .1 .1]] if cutoff = .75: row_degeneracy -> [3,2,1] if cutoff = .95: row_degeneracy -> [4,3,3] WARNING: watch out with floating point numbers. if the cutoff= 0.9 and in the array is also 0.9, it might not be found >>> searchsorted(cumsum(array([.6,.3,.1])),.9) 2 >>> searchsorted(cumsum(array([.5,.4,.1])),.9) 1 If the cutoff value is not found, the result is clipped to the number of columns in the array. """ if not a: return [] try: b = cumsum(sort(a)[:,::-1],1) except IndexError: raise ValueError, "Array has to be two dimensional" degen = [searchsorted(aln_pos,cutoff) for aln_pos in b] #degen contains now the indices at which the cutoff was hit #to change to the number of characters, add 1 return clip(array(degen)+1,0,a.shape[1])
def splint(xa, ya, y2a, x, derivs=False): """returns the interpolated from from the spline x can either be a scalar or a listable item, in which case a Numeric Float array will be returned and the multiple interpolations will be done somewhat more efficiently. If derivs is not False, return y, y', y'' instead of just y.""" if type(x) is types.IntType or type(x) is types.FloatType: if (x<xa[0] or x>xa[-1]): raise RangeError, "%f not in range (%f, %f) in splint()" % (x, xa[0], xa[-1]) khi=max(searchsorted(xa,x),1) klo=khi-1 h=float(xa[khi]-xa[klo]) a=(xa[khi]-x)/h; b=1.0-a ylo=ya[klo]; yhi=ya[khi]; y2lo=y2a[klo]; y2hi=y2a[khi] else: #if we got here, we are processing a list, and should do so more efficiently if (min(x)<xa[0] or max(x)>xa[-1]): raise RangeError, "(%f, %f) not in range (%f, %f) in splint()" % (min(x), max(x), xa[0], xa[-1]) npoints=len(x) khi=clip(searchsorted(xa,x),1,len(xa)) klo=khi-1 xhi=take(xa, khi) xlo=take(xa, klo) yhi=take(ya, khi) ylo=take(ya, klo) y2hi=take(y2a, khi) y2lo=take(y2a, klo) h=(xhi-xlo).astype(Float) a=(xhi-x)/h b=1.0-a y=a*ylo+b*yhi+((a*a*a-a)*y2lo+(b*b*b-b)*y2hi)*(h*h)/6.0 if derivs: return y, (yhi-ylo)/h+((3*b*b-1)*y2hi-(3*a*a-1)*y2lo)*h/6.0, b*y2hi+a*y2lo else: return y