def lowess2(x, y, xest, f=2./3., iter=3): """Returns estimated values of y in data points xest (or None if estimation fails). Lowess smoother: Robust locally weighted regression. The lowess function fits a nonparametric regression curve to a scatterplot. The arrays x and y contain an equal number of elements; each pair (x[i], y[i]) defines a data point in the scatterplot. The function returns the estimated (smooth) values of y. The smoothing span is given by f. A larger value for f will result in a smoother curve. The number of robustifying iterations is given by iter. The function will run faster with a smaller number of iterations.""" x = Numeric.asarray(x, 'd') y = Numeric.asarray(y, 'd') xest = Numeric.asarray(xest, 'd') n = len(x) nest = len(xest) r = min(int(Numeric.ceil(f*n)),n-1) # radius: num. of points to take into LR h = [Numeric.sort(abs(x-x[i]))[r] for i in range(n)] # distance of the r-th point from x[i] w = Numeric.clip(abs(([x]-Numeric.transpose([x]))/h),0.0,1.0) w = 1-w*w*w w = w*w*w hest = [Numeric.sort(abs(x-xest[i]))[r] for i in range(nest)] # r-th min. distance from xest[i] to x west = Numeric.clip(abs(([xest]-Numeric.transpose([x]))/hest),0.0,1.0) # shape: (len(x), len(xest) west = 1-west*west*west west = west*west*west yest = Numeric.zeros(n,'d') yest2 = Numeric.zeros(nest,'d') delta = Numeric.ones(n,'d') try: for iteration in range(iter): # fit xest for i in range(nest): weights = delta * west[:,i] b = Numeric.array([sum(weights*y), sum(weights*y*x)]) A = Numeric.array([[sum(weights), sum(weights*x)], [sum(weights*x), sum(weights*x*x)]]) beta = LinearAlgebra.solve_linear_equations(A,b) yest2[i] = beta[0] + beta[1]*xest[i] # fit x (to calculate residuals and delta) for i in range(n): weights = delta * w[:,i] b = Numeric.array([sum(weights*y), sum(weights*y*x)]) A = Numeric.array([[sum(weights), sum(weights*x)], [sum(weights*x), sum(weights*x*x)]]) beta = LinearAlgebra.solve_linear_equations(A,b) yest[i] = beta[0] + beta[1]*x[i] residuals = y-yest s = MLab.median(abs(residuals)) delta = Numeric.clip(residuals/(6*s),-1,1) delta = 1-delta*delta delta = delta*delta except LinearAlgebra.LinAlgError: print "Warning: NumExtn.lowess2: LinearAlgebra.solve_linear_equations: Singular matrix" yest2 = None return yest2
def logConfidence(x, R, clip=0): """ Estimate the probability of x NOT beeing a random observation from a lognormal distribution that is described by a set of random values. @param x: observed value @type x: float @param R: sample of random values @type R: [float] @param clip: clip zeros at this value 0->don't clip (default: 0) @type clip: float @return: confidence that x is not random, median of random distr. @rtype: (float, float) """ if clip and 0 in R: R = N.clip(R, clip, max(R)) if clip and x == 0: x = clip ## remove 0 instead of clipping R = N.compress(R, R) if x == 0: return 0, 0 ## get mean and stdv of log-transformed random sample alpha = N.average(N.log(R)) n = len(R) beta = N.sqrt(N.sum(N.power(N.log(R) - alpha, 2)) / (n - 1.)) return logArea(x, alpha, beta), logMedian(alpha)
def logConfidence( x, R, clip=0 ): """ Estimate the probability of x NOT beeing a random observation from a lognormal distribution that is described by a set of random values. @param x: observed value @type x: float @param R: sample of random values @type R: [float] @param clip: clip zeros at this value 0->don't clip (default: 0) @type clip: float @return: confidence that x is not random, median of random distr. @rtype: (float, float) """ if clip and 0 in R: R = N.clip( R, clip, max( R ) ) if clip and x == 0: x = clip ## remove 0 instead of clipping R = N.compress( R, R ) if x == 0: return 0, 0 ## get mean and stdv of log-transformed random sample alpha = N.average( N.log( R ) ) n = len( R ) beta = N.sqrt(N.sum(N.power(N.log( R ) - alpha, 2)) / (n - 1.)) return logArea( x, alpha, beta ), logMedian( alpha )
def test_model(self): """PDBDope test final model""" from Biskit import PDBModel if self.local: print '\nData added to info record of model (key -- value):' for k in self.d.m.info.keys(): print '%s -- %s'%(k, self.d.m.info[k]) print '\nAdded atom profiles:' print self.M.atoms print '\nAdded residue profiles:' print self.M.residues ## check that nothing has changed print '\nChecking that models are unchanged by doping ...' m_ref = PDBModel( self.f ) m_ref = m_ref.compress( m_ref.maskProtein() ) for k in m_ref.atoms.keys(): #ref = [ m_ref.atoms[i][k] for i in m_ref.atomRange() ] #mod = [ self.M.atoms[i][k] for i in self.M.atomRange() ] self.assert_( N.all( m_ref[k] == self.M[k]) ) ## display in Pymol if self.local: print "Starting PyMol..." from Biskit.Pymoler import Pymoler pm = Pymoler() pm.addPdb( self.M, 'm' ) pm.colorAtoms( 'm', N.clip(self.M.profile('relAS'), 0.0, 100.0) ) pm.show()
def linearMap(self, data, data_min, data_max, val_min, val_max, arr_min, arr_max, datatype): k2,c2 = self.ScaleMap((val_min, val_max), (data_min, data_max)) if k2 == 1 and c2 == 0: return data new_arr = Numeric.clip(k2*data+c2, k2*data_min+c2, k2*data_max+c2) #return new_arr.astype(data.dtype.char) return new_arr.astype(datatype)
def UpdateX(x, deltaX, lowBound=None, highBound=None): x = x + deltaX if lowBound != None and highBound != None: x = clip(x, lowBound, highBound) elif lowBound != None: x = Numeric.maximum(x, lowBound) elif highBound != None: x = Numeric.minimum(x, highBound) return x
def fractionNativeSurface(self, cont, contRef ): """ fraction of atoms/residues that are involved in B{any} contacts in both complexes. @param cont: contact matrix @type cont: matrix @param contRef: reference contact matrix @type contRef: matrix @return: (fractRec, fractLig), fraction of atoms/residues that are involved in any contacts in both complexes @rtype: (float, float) """ lig, ligRef = N.clip( N.sum(cont),0,1), N.clip( N.sum(contRef), 0,1) rec = N.clip( N.sum(cont, 1),0,1) recRef = N.clip( N.sum(contRef, 1), 0,1) fLig = N.sum( N.logical_and( lig, ligRef )) *1./ N.sum( ligRef ) fRec = N.sum( N.logical_and( rec, recRef )) *1./ N.sum( recRef ) return (fRec, fLig)
def test_conservation(self): """PDBDope.addConservation (Hmmer) test""" if self.local: print "Adding conservation data...", self.d.addConservation() if self.local: print 'Done.' ## display in Pymol if self.local: print "Starting PyMol..." from Biskit.Pymoler import Pymoler pm = Pymoler() pm.addPdb( self.M, 'm' ) pm.colorAtoms( 'm', N.clip(self.M.profile('cons_ent'), 0.0, 100.0) ) pm.show()
def relExposure( model, absSurf, key='AS', clip=1 ): """ Calculate how exposed an atom is relative to the same atom in a GLY-XXX-GLY tripeptide, an approximation of the unfolded state. @param absSurf: Absolute MS OR AS values @type absSurf: [float] @param key: MS or AS @type key: MS|AS @param clip: clip values above 100% (default: 1) @type clip: 1|0 @return: rel - list of relative accessible surfaces @rtype: [float] """ if not key=='MS' and not key=='AS': raise Exception,\ 'Incorrect key for relative exposiure: %s '%key rel = [] i=0 ## loop over chains for j in range( model.lenChains()): c = model.takeChains([j]) k=0 cIdx = c.resIndex() ## and loop over atoms in chain for a in c.atoms.iterDicts(): ## N-terminal residue if k < cIdx[1]: rel = __Nter( a, rel, absSurf, key, i ) ## C-terminal residue if k >= cIdx[-1]: rel = __Cter( a, rel, absSurf, key, i ) ## everything but N- and C termini if not k < cIdx[1] and not k >= cIdx[-1]: rel = __bulk( a, rel, absSurf, key, i ) i+=1 k+=1 if clip: return Numeric.clip( Numeric.array(rel), 0.0, 100.0 ) else: return Numeric.array(rel)
def relExposure(model, absSurf, key='AS', clip=1): """ Calculate how exposed an atom is relative to the same atom in a GLY-XXX-GLY tripeptide, an approximation of the unfolded state. @param absSurf: Absolute MS OR AS values @type absSurf: [float] @param key: MS or AS @type key: MS|AS @param clip: clip values above 100% (default: 1) @type clip: 1|0 @return: rel - list of relative accessible surfaces @rtype: [float] """ if not key == 'MS' and not key == 'AS': raise Exception,\ 'Incorrect key for relative exposiure: %s '%key rel = [] i = 0 ## loop over chains for j in range(model.lenChains()): c = model.takeChains([j]) k = 0 cIdx = c.resIndex() ## and loop over atoms in chain for a in c.atoms.iterDicts(): ## N-terminal residue if k < cIdx[1]: rel = __Nter(a, rel, absSurf, key, i) ## C-terminal residue if k >= cIdx[-1]: rel = __Cter(a, rel, absSurf, key, i) ## everything but N- and C termini if not k < cIdx[1] and not k >= cIdx[-1]: rel = __bulk(a, rel, absSurf, key, i) i += 1 k += 1 if clip: return Numeric.clip(Numeric.array(rel), 0.0, 100.0) else: return Numeric.array(rel)
def entropy( self, emmProb, nullProb ): """ calculate entropy for normalized probabilities scaled by aa freq. emmProb & nullProb is shape 1,len(alphabet) @param emmProb: emmission probabilities @type emmProb: array @param nullProb: null probabilities @type nullProb: array @return: entropy value @rtype: float """ ## remove zeros to avoid log error emmProb = N.clip(emmProb, 1.e-10, 1.) return N.sum( emmProb * N.log(emmProb/nullProb) )
def logConfidence( x, R, clip=1e-32 ): """ Estimate the probability of x NOT beeing a random observation from a lognormal distribution that is described by a set of random values. The exact solution to this problem is in L{Biskit.Statistics.lognormal}. @param x: observed value @type x: float @param R: sample of random values; 0 -> don't clip (default: 1e-32) @type R: [float] @param clip: clip zeros at this value @type clip: float @return: confidence that x is not random, mean of random distrib. @rtype: (float, float) """ if clip and 0 in R: R = N.clip( R, clip, max( R ) ) ## get mean and stdv of log-transformed random sample mean = N.average( N.log( R ) ) n = len( R ) stdv = N.sqrt(N.sum(N.power(N.log( R ) - mean, 2)) / (n - 1.)) ## create dense lognormal distribution representing the random sample stop = max( R ) * 50.0 step = stop / 100000 start = step / 10.0 X = [(v, p_lognormal(v, mean, stdv) ) for v in N.arange(start, stop, step)] ## analyse distribution d = Density( X ) return d.findConfidenceInterval( x * 1.0 )[0], d.average()
def logConfidence(x, R, clip=1e-32): """ Estimate the probability of x NOT beeing a random observation from a lognormal distribution that is described by a set of random values. The exact solution to this problem is in L{Biskit.Statistics.lognormal}. @param x: observed value @type x: float @param R: sample of random values; 0 -> don't clip (default: 1e-32) @type R: [float] @param clip: clip zeros at this value @type clip: float @return: confidence that x is not random, mean of random distrib. @rtype: (float, float) """ if clip and 0 in R: R = N.clip(R, clip, max(R)) ## get mean and stdv of log-transformed random sample mean = N.average(N.log(R)) n = len(R) stdv = N.sqrt(N.sum(N.power(N.log(R) - mean, 2)) / (n - 1.)) ## create dense lognormal distribution representing the random sample stop = max(R) * 50.0 step = stop / 100000 start = step / 10.0 X = [(v, p_lognormal(v, mean, stdv)) for v in N.arange(start, stop, step)] ## analyse distribution d = Density(X) return d.findConfidenceInterval(x * 1.0)[0], d.average()
pardict["Zeropoint"] = str(zpoint) pardict["Zeropoint_Error"] = zpoint_err self.logfile.write("Photometric ZeroPoint of " + os.path.basename(fitsfile) + ": " + str(zpoint)) #---------------- Temporary zero point correction --------------------# ## Commented 24-Sep-2002 as per Bugzilla bug #1800 ## ## zpointCor=fUtil.zeroPointCorrection(imfilter) ## zpoint+= zpointCor ## self.logfile.write("ZeroPoint %s corrected by %.2f mag" % (imfilter,zpointCor)) ## print "ZeroPoint %s corrected by %.2f mag" % (imfilter,zpointCor) ## #---------------- End temporary zero point correction ----------------# flux[i, :] = Numeric.clip(flux[i, :], 1e-100, 1e100) m[i, :] = Numeric.where( detected, -2.5 * Numeric.log10(abs(flux[i, :])) + zpoint, m[i, :]) m[i, :] = Numeric.where(nondetected, 99.0, m[i, :]) m[i, :] = Numeric.where(nonobserved, -99.0, m[i, :]) # the filter specific extinction correction is applied. m_corr[i, :] = Numeric.where(nondetected, 99.0, m[i, :] - filterXCorr) m_corr[i, :] = Numeric.where(nonobserved, -99, m_corr[i, :]) m_bpz[i, :] = Numeric.where(nondetected, 99.0, m_corr[i, :] + ap_corr[i, :]) m_bpz[i, :] = Numeric.where(nonobserved, -99, m_bpz[i, :]) # clip values from being too small or large, i.e. 0 or inf.
def lowessW(x, y, xest, f=2./3., iter=3, dWeights=None, callback=None): """Returns estimated values of y in data points xest (or None if estimation fails). Lowess smoother: Robust locally weighted regression. The lowess function fits a nonparametric regression curve to a scatterplot. The arrays x and y contain an equal number of elements; each pair (x[i], y[i]) defines a data point in the scatterplot. The function returns the estimated (smooth) values of y. The smoothing span is given by f. A larger value for f will result in a smoother curve. The number of robustifying iterations is given by iter. The function will run faster with a smaller number of iterations. Data points may be assigned weights; if None, all weights equal 1. """ x = Numeric.asarray(x, 'd') y = Numeric.asarray(y, 'd') xest = Numeric.asarray(xest, 'd') n = len(x) if n <> len(y): raise AttributeError, "Error: lowessW(x,y,xest,f,iter,dWeights): len(x)=%i not equal to len(y)=%i" % (len(x), len(y)) nest = len(xest) # weights of data points (optional) if dWeights <> None: dWeights = Numeric.asarray(dWeights, 'd') if len(dWeights) <> n: raise AttributeError, "Error: lowessW(x,y,xest,f,iter,dWeights): len(dWeights)=%i not equal to len(x)=%i" % (len(dWeights), len(x)) ## dWeights = dWeights.reshape((n,1)) else: ## dWeights = Numeric.ones((n,1)) dWeights = Numeric.ones((n,)) r = min(int(Numeric.ceil(f*n)),n-1) # radius: num. of points to take into LR h = [Numeric.sort(abs(x-x[i]))[r] for i in range(n)] # distance of the r-th point from x[i] w = Numeric.clip(abs(([x]-Numeric.transpose([x]))/h),0.0,1.0) w = 1-w*w*w w = w*w*w hest = [Numeric.sort(abs(x-xest[i]))[r] for i in range(nest)] # r-th min. distance from xest[i] to x west = Numeric.clip(abs(([xest]-Numeric.transpose([x]))/hest),0.0,1.0) # shape: (len(x), len(xest)) west = 1-west*west*west west = west*west*west yest = Numeric.zeros(n,'d') yest2 = Numeric.zeros(nest,'d') delta = Numeric.ones(n,'d') try: for iteration in range(int(iter)): # fit xest for i in range(nest): ## print delta.shape, west[:,i].shape, dWeights.shape weights = delta * west[:,i] * dWeights b = Numeric.array([sum(weights*y), sum(weights*y*x)]) A = Numeric.array([[sum(weights), sum(weights*x)], [sum(weights*x), sum(weights*x*x)]]) beta = LinearAlgebra.solve_linear_equations(A,b) yest2[i] = beta[0] + beta[1]*xest[i] # fit x (to calculate residuals and delta) for i in range(n): weights = delta * w[:,i] * dWeights b = Numeric.array([sum(weights*y), sum(weights*y*x)]) A = Numeric.array([[sum(weights), sum(weights*x)], [sum(weights*x), sum(weights*x*x)]]) beta = LinearAlgebra.solve_linear_equations(A,b) yest[i] = beta[0] + beta[1]*x[i] residuals = y-yest s = MLab.median(abs(residuals)) delta = Numeric.clip(residuals/(6*s),-1,1) delta = 1-delta*delta delta = delta*delta if callback: callback() except LinearAlgebra.LinAlgError: print "Warning: NumExtn.lowessW: LinearAlgebra.solve_linear_equations: Singular matrix" yest2 = None return yest2
def calc_membership_matrix(self, d2): ## remove 0s (if a cluster center is exactly on one item) d2 = N.clip( d2, N.power(1e200, 1-self.w), 1e300 ) q = N.power(d2, 1. / (1. - self.w)) return q / N.sum(q)
def clipped_exp(x): return Numeric.exp(Numeric.clip(x,-709., 709))
def calc_membership_matrix(self, d2): ## remove 0s (if a cluster center is exactly on one item) d2 = N.clip(d2, N.power(1e200, 1 - self.w), 1e300) q = N.power(d2, 1. / (1. - self.w)) return q / N.sum(q)