Пример #1
0
def lowess2(x, y, xest, f=2./3., iter=3):
    """Returns estimated values of y in data points xest (or None if estimation fails).
    Lowess smoother: Robust locally weighted regression.
    The lowess function fits a nonparametric regression curve to a scatterplot.
    The arrays x and y contain an equal number of elements; each pair
    (x[i], y[i]) defines a data point in the scatterplot. The function returns
    the estimated (smooth) values of y.

    The smoothing span is given by f. A larger value for f will result in a
    smoother curve. The number of robustifying iterations is given by iter. The
    function will run faster with a smaller number of iterations."""
    x = Numeric.asarray(x, 'd')
    y = Numeric.asarray(y, 'd')
    xest = Numeric.asarray(xest, 'd')
    n = len(x)
    nest = len(xest)
    r = min(int(Numeric.ceil(f*n)),n-1) # radius: num. of points to take into LR
    h = [Numeric.sort(abs(x-x[i]))[r] for i in range(n)]    # distance of the r-th point from x[i]
    w = Numeric.clip(abs(([x]-Numeric.transpose([x]))/h),0.0,1.0)
    w = 1-w*w*w
    w = w*w*w
    hest = [Numeric.sort(abs(x-xest[i]))[r] for i in range(nest)]    # r-th min. distance from xest[i] to x
    west = Numeric.clip(abs(([xest]-Numeric.transpose([x]))/hest),0.0,1.0)  # shape: (len(x), len(xest)
    west = 1-west*west*west
    west = west*west*west
    yest = Numeric.zeros(n,'d')
    yest2 = Numeric.zeros(nest,'d')
    delta = Numeric.ones(n,'d')
    try:
        for iteration in range(iter):
            # fit xest
            for i in range(nest):
                weights = delta * west[:,i]
                b = Numeric.array([sum(weights*y), sum(weights*y*x)])
                A = Numeric.array([[sum(weights), sum(weights*x)], [sum(weights*x), sum(weights*x*x)]])
                beta = LinearAlgebra.solve_linear_equations(A,b)
                yest2[i] = beta[0] + beta[1]*xest[i]
            # fit x (to calculate residuals and delta)
            for i in range(n):
                weights = delta * w[:,i]
                b = Numeric.array([sum(weights*y), sum(weights*y*x)])
                A = Numeric.array([[sum(weights), sum(weights*x)], [sum(weights*x), sum(weights*x*x)]])
                beta = LinearAlgebra.solve_linear_equations(A,b)
                yest[i] = beta[0] + beta[1]*x[i]
            residuals = y-yest
            s = MLab.median(abs(residuals))
            delta = Numeric.clip(residuals/(6*s),-1,1)
            delta = 1-delta*delta
            delta = delta*delta
    except LinearAlgebra.LinAlgError:
        print "Warning: NumExtn.lowess2: LinearAlgebra.solve_linear_equations: Singular matrix"
        yest2 = None
    return yest2
Пример #2
0
def logConfidence(x, R, clip=0):
    """
    Estimate the probability of x NOT beeing a random observation from a
    lognormal distribution that is described by a set of random values.

    @param x: observed value
    @type  x: float
    @param R: sample of random values
    @type  R: [float]
    @param clip: clip zeros at this value  0->don't clip (default: 0)
    @type  clip: float

    @return: confidence that x is not random, median of random distr.
    @rtype: (float, float)
    """
    if clip and 0 in R:
        R = N.clip(R, clip, max(R))
    if clip and x == 0:
        x = clip

    ## remove 0 instead of clipping
    R = N.compress(R, R)
    if x == 0:
        return 0, 0

    ## get mean and stdv of log-transformed random sample
    alpha = N.average(N.log(R))

    n = len(R)

    beta = N.sqrt(N.sum(N.power(N.log(R) - alpha, 2)) / (n - 1.))

    return logArea(x, alpha, beta), logMedian(alpha)
Пример #3
0
def logConfidence( x, R, clip=0 ):
    """
    Estimate the probability of x NOT beeing a random observation from a
    lognormal distribution that is described by a set of random values.

    @param x: observed value
    @type  x: float
    @param R: sample of random values
    @type  R: [float]
    @param clip: clip zeros at this value  0->don't clip (default: 0)
    @type  clip: float

    @return: confidence that x is not random, median of random distr.
    @rtype: (float, float)
    """
    if clip and 0 in R:
        R = N.clip( R, clip, max( R ) )
    if clip and x == 0:
        x = clip

    ## remove 0 instead of clipping
    R = N.compress( R, R )
    if x == 0:
        return 0, 0

    ## get mean and stdv of log-transformed random sample
    alpha = N.average( N.log( R ) )

    n = len( R )

    beta = N.sqrt(N.sum(N.power(N.log( R ) - alpha, 2)) / (n - 1.))

    return logArea( x, alpha, beta ), logMedian( alpha )
Пример #4
0
    def test_model(self):
        """PDBDope test final model"""
        from Biskit import PDBModel

        if self.local:
            print '\nData added to info record of model (key -- value):'
            for k in self.d.m.info.keys():
                print '%s -- %s'%(k, self.d.m.info[k])

            print '\nAdded atom profiles:'
            print self.M.atoms

            print '\nAdded residue  profiles:'
            print self.M.residues

            ## check that nothing has changed
            print '\nChecking that models are unchanged by doping ...'

        m_ref = PDBModel( self.f )
        m_ref = m_ref.compress( m_ref.maskProtein() )
        for k in m_ref.atoms.keys():
            #ref = [ m_ref.atoms[i][k] for i in m_ref.atomRange() ]
            #mod = [ self.M.atoms[i][k] for i in self.M.atomRange() ]
            self.assert_( N.all( m_ref[k] == self.M[k]) )

        ## display in Pymol
        if self.local:
            print "Starting PyMol..."
            from Biskit.Pymoler import Pymoler

            pm = Pymoler()
            pm.addPdb( self.M, 'm' )
            pm.colorAtoms( 'm', N.clip(self.M.profile('relAS'), 0.0, 100.0) )
            pm.show()
Пример #5
0
 def linearMap(self, data, data_min, data_max, val_min, val_max,
               arr_min, arr_max, datatype):
     k2,c2 = self.ScaleMap((val_min, val_max), (data_min, data_max))
     if k2 == 1 and c2 == 0:
         return data
     new_arr = Numeric.clip(k2*data+c2, k2*data_min+c2, k2*data_max+c2)
     #return new_arr.astype(data.dtype.char)
     return new_arr.astype(datatype)
Пример #6
0
def UpdateX(x, deltaX, lowBound=None, highBound=None):

    x = x + deltaX
    
    if lowBound != None and highBound != None:
        x = clip(x, lowBound, highBound)
    elif lowBound != None:
        x = Numeric.maximum(x, lowBound)
    elif highBound != None:
        x = Numeric.minimum(x, highBound)

    return x
Пример #7
0
    def fractionNativeSurface(self, cont, contRef ):
        """
        fraction of atoms/residues that are involved in B{any} contacts
        in both complexes.

        @param cont: contact matrix
        @type  cont: matrix
        @param contRef: reference contact matrix
        @type  contRef: matrix
        
        @return: (fractRec, fractLig), fraction of atoms/residues that
                  are involved in any contacts in both complexes
        @rtype: (float, float)
           
        """
        lig, ligRef = N.clip( N.sum(cont),0,1),  N.clip( N.sum(contRef), 0,1)
        rec    = N.clip( N.sum(cont, 1),0,1)
        recRef = N.clip( N.sum(contRef, 1), 0,1)

        fLig = N.sum( N.logical_and( lig, ligRef )) *1./ N.sum( ligRef )
        fRec = N.sum( N.logical_and( rec, recRef )) *1./ N.sum( recRef )

        return (fRec, fLig)
Пример #8
0
    def test_conservation(self):
        """PDBDope.addConservation (Hmmer) test"""
        if self.local: print "Adding conservation data...",
        self.d.addConservation()
        if self.local: print 'Done.'

        ## display in Pymol
        if self.local:
            print "Starting PyMol..."
            from Biskit.Pymoler import Pymoler

            pm = Pymoler()
            pm.addPdb( self.M, 'm' )
            pm.colorAtoms( 'm', N.clip(self.M.profile('cons_ent'), 0.0, 100.0) )
            pm.show()
Пример #9
0
def relExposure( model, absSurf, key='AS', clip=1 ):
    """
    Calculate how exposed an atom is relative to the same
    atom in a GLY-XXX-GLY tripeptide, an approximation of
    the unfolded state.

    @param absSurf: Absolute MS OR AS values
    @type  absSurf: [float]
    @param key: MS or AS
    @type  key: MS|AS
    @param clip: clip values above 100% (default: 1)
    @type  clip: 1|0
    
    @return: rel - list of relative accessible surfaces
    @rtype: [float]
    """
    if not key=='MS' and not key=='AS':
        raise Exception,\
              'Incorrect key for relative exposiure: %s '%key

    rel = []
    i=0

    ## loop over chains
    for j in range( model.lenChains()):
        c = model.takeChains([j])

        k=0
        cIdx = c.resIndex()
        ## and loop over atoms in chain
        for a in c.atoms.iterDicts():
            ## N-terminal residue
            if k < cIdx[1]:
                rel = __Nter( a, rel, absSurf, key, i )
            ## C-terminal residue
            if k >= cIdx[-1]:
                rel = __Cter( a, rel, absSurf, key, i )
            ## everything but N- and C termini
            if not k < cIdx[1] and not k >= cIdx[-1]:
                rel = __bulk( a, rel, absSurf, key, i )
            i+=1
            k+=1

    if clip:
        return  Numeric.clip( Numeric.array(rel), 0.0, 100.0 )
    else:
        return  Numeric.array(rel)
Пример #10
0
def relExposure(model, absSurf, key='AS', clip=1):
    """
    Calculate how exposed an atom is relative to the same
    atom in a GLY-XXX-GLY tripeptide, an approximation of
    the unfolded state.

    @param absSurf: Absolute MS OR AS values
    @type  absSurf: [float]
    @param key: MS or AS
    @type  key: MS|AS
    @param clip: clip values above 100% (default: 1)
    @type  clip: 1|0
    
    @return: rel - list of relative accessible surfaces
    @rtype: [float]
    """
    if not key == 'MS' and not key == 'AS':
        raise Exception,\
              'Incorrect key for relative exposiure: %s '%key

    rel = []
    i = 0

    ## loop over chains
    for j in range(model.lenChains()):
        c = model.takeChains([j])

        k = 0
        cIdx = c.resIndex()
        ## and loop over atoms in chain
        for a in c.atoms.iterDicts():
            ## N-terminal residue
            if k < cIdx[1]:
                rel = __Nter(a, rel, absSurf, key, i)
            ## C-terminal residue
            if k >= cIdx[-1]:
                rel = __Cter(a, rel, absSurf, key, i)
            ## everything but N- and C termini
            if not k < cIdx[1] and not k >= cIdx[-1]:
                rel = __bulk(a, rel, absSurf, key, i)
            i += 1
            k += 1

    if clip:
        return Numeric.clip(Numeric.array(rel), 0.0, 100.0)
    else:
        return Numeric.array(rel)
Пример #11
0
    def entropy( self, emmProb, nullProb ):
        """
        calculate entropy for normalized probabilities scaled by aa freq.
        emmProb & nullProb is shape 1,len(alphabet)

        @param emmProb: emmission probabilities
        @type  emmProb: array
        @param nullProb: null probabilities
        @type  nullProb: array

        @return: entropy value
        @rtype:  float
        """
        ## remove zeros to avoid log error
        emmProb = N.clip(emmProb, 1.e-10, 1.)

        return N.sum( emmProb * N.log(emmProb/nullProb) )
Пример #12
0
def logConfidence( x, R, clip=1e-32 ):
    """
    Estimate the probability of x NOT beeing a random observation from a
    lognormal distribution that is described by a set of random values.
    The exact solution to this problem is in L{Biskit.Statistics.lognormal}.

    @param x: observed value
    @type  x: float
    @param R: sample of random values; 0 -> don't clip (default: 1e-32)
    @type  R: [float]
    @param clip: clip zeros at this value
    @type  clip: float

    @return:  confidence that x is not random, mean of random distrib.
    @rtype: (float, float)
    """
    if clip and 0 in R:
        R = N.clip( R, clip, max( R ) )
    ## get mean and stdv of log-transformed random sample
    mean = N.average( N.log( R ) )

    n = len( R )

    stdv = N.sqrt(N.sum(N.power(N.log( R ) - mean, 2)) / (n - 1.))

    ## create dense lognormal distribution representing the random sample
    stop = max( R ) * 50.0
    step = stop / 100000
    start = step / 10.0

    X = [(v, p_lognormal(v, mean, stdv) ) for v in N.arange(start, stop, step)]

    ## analyse distribution
    d = Density( X )

    return d.findConfidenceInterval( x * 1.0 )[0], d.average()
Пример #13
0
def logConfidence(x, R, clip=1e-32):
    """
    Estimate the probability of x NOT beeing a random observation from a
    lognormal distribution that is described by a set of random values.
    The exact solution to this problem is in L{Biskit.Statistics.lognormal}.

    @param x: observed value
    @type  x: float
    @param R: sample of random values; 0 -> don't clip (default: 1e-32)
    @type  R: [float]
    @param clip: clip zeros at this value
    @type  clip: float

    @return:  confidence that x is not random, mean of random distrib.
    @rtype: (float, float)
    """
    if clip and 0 in R:
        R = N.clip(R, clip, max(R))
    ## get mean and stdv of log-transformed random sample
    mean = N.average(N.log(R))

    n = len(R)

    stdv = N.sqrt(N.sum(N.power(N.log(R) - mean, 2)) / (n - 1.))

    ## create dense lognormal distribution representing the random sample
    stop = max(R) * 50.0
    step = stop / 100000
    start = step / 10.0

    X = [(v, p_lognormal(v, mean, stdv)) for v in N.arange(start, stop, step)]

    ## analyse distribution
    d = Density(X)

    return d.findConfidenceInterval(x * 1.0)[0], d.average()
Пример #14
0
            pardict["Zeropoint"] = str(zpoint)
            pardict["Zeropoint_Error"] = zpoint_err
            self.logfile.write("Photometric ZeroPoint of " +
                               os.path.basename(fitsfile) + ": " + str(zpoint))

            #---------------- Temporary zero point correction --------------------#
            ## Commented 24-Sep-2002 as per Bugzilla bug #1800
            ##
            ## zpointCor=fUtil.zeroPointCorrection(imfilter)
            ## zpoint+= zpointCor
            ## self.logfile.write("ZeroPoint %s corrected by %.2f mag" % (imfilter,zpointCor))
            ## print "ZeroPoint %s corrected by %.2f mag" % (imfilter,zpointCor)
            ##
            #---------------- End temporary zero point correction ----------------#

            flux[i, :] = Numeric.clip(flux[i, :], 1e-100, 1e100)
            m[i, :] = Numeric.where(
                detected, -2.5 * Numeric.log10(abs(flux[i, :])) + zpoint,
                m[i, :])
            m[i, :] = Numeric.where(nondetected, 99.0, m[i, :])
            m[i, :] = Numeric.where(nonobserved, -99.0, m[i, :])
            # the filter specific extinction correction is applied.
            m_corr[i, :] = Numeric.where(nondetected, 99.0,
                                         m[i, :] - filterXCorr)
            m_corr[i, :] = Numeric.where(nonobserved, -99, m_corr[i, :])

            m_bpz[i, :] = Numeric.where(nondetected, 99.0,
                                        m_corr[i, :] + ap_corr[i, :])
            m_bpz[i, :] = Numeric.where(nonobserved, -99, m_bpz[i, :])

            # clip values from being too small or large, i.e. 0 or inf.
Пример #15
0
def lowessW(x, y, xest, f=2./3., iter=3, dWeights=None, callback=None):
    """Returns estimated values of y in data points xest (or None if estimation fails).
    Lowess smoother: Robust locally weighted regression.
    The lowess function fits a nonparametric regression curve to a scatterplot.
    The arrays x and y contain an equal number of elements; each pair
    (x[i], y[i]) defines a data point in the scatterplot. The function returns
    the estimated (smooth) values of y.

    The smoothing span is given by f. A larger value for f will result in a
    smoother curve. The number of robustifying iterations is given by iter. The
    function will run faster with a smaller number of iterations.

    Data points may be assigned weights; if None, all weights equal 1.
    """
    x = Numeric.asarray(x, 'd')
    y = Numeric.asarray(y, 'd')
    xest = Numeric.asarray(xest, 'd')
    n = len(x)
    if n <> len(y):
        raise AttributeError, "Error: lowessW(x,y,xest,f,iter,dWeights): len(x)=%i not equal to len(y)=%i" % (len(x), len(y))
    nest = len(xest)
    # weights of data points (optional)
    if dWeights <> None:
        dWeights = Numeric.asarray(dWeights, 'd')
        if len(dWeights) <> n:
            raise AttributeError, "Error: lowessW(x,y,xest,f,iter,dWeights): len(dWeights)=%i not equal to len(x)=%i" % (len(dWeights), len(x))
##        dWeights = dWeights.reshape((n,1))
    else:
##        dWeights = Numeric.ones((n,1))
        dWeights = Numeric.ones((n,))
    r = min(int(Numeric.ceil(f*n)),n-1) # radius: num. of points to take into LR
    h = [Numeric.sort(abs(x-x[i]))[r] for i in range(n)]    # distance of the r-th point from x[i]
    w = Numeric.clip(abs(([x]-Numeric.transpose([x]))/h),0.0,1.0)
    w = 1-w*w*w
    w = w*w*w
    hest = [Numeric.sort(abs(x-xest[i]))[r] for i in range(nest)]    # r-th min. distance from xest[i] to x
    west = Numeric.clip(abs(([xest]-Numeric.transpose([x]))/hest),0.0,1.0)  # shape: (len(x), len(xest))
    west = 1-west*west*west
    west = west*west*west
    yest = Numeric.zeros(n,'d')
    yest2 = Numeric.zeros(nest,'d')
    delta = Numeric.ones(n,'d')
    try:
        for iteration in range(int(iter)):
            # fit xest
            for i in range(nest):
##                print delta.shape, west[:,i].shape, dWeights.shape
                weights = delta * west[:,i] * dWeights
                b = Numeric.array([sum(weights*y), sum(weights*y*x)])
                A = Numeric.array([[sum(weights), sum(weights*x)], [sum(weights*x), sum(weights*x*x)]])
                beta = LinearAlgebra.solve_linear_equations(A,b)
                yest2[i] = beta[0] + beta[1]*xest[i]
            # fit x (to calculate residuals and delta)
            for i in range(n):
                weights = delta * w[:,i] * dWeights
                b = Numeric.array([sum(weights*y), sum(weights*y*x)])
                A = Numeric.array([[sum(weights), sum(weights*x)], [sum(weights*x), sum(weights*x*x)]])
                beta = LinearAlgebra.solve_linear_equations(A,b)
                yest[i] = beta[0] + beta[1]*x[i]
            residuals = y-yest
            s = MLab.median(abs(residuals))
            delta = Numeric.clip(residuals/(6*s),-1,1)
            delta = 1-delta*delta
            delta = delta*delta
            if callback: callback()
    except LinearAlgebra.LinAlgError:
        print "Warning: NumExtn.lowessW: LinearAlgebra.solve_linear_equations: Singular matrix"
        yest2 = None
    return yest2
Пример #16
0
 def calc_membership_matrix(self, d2):
     ## remove 0s (if a cluster center is exactly on one item)
     d2 = N.clip( d2, N.power(1e200, 1-self.w), 1e300 )
     q = N.power(d2, 1. / (1. - self.w))
     return q / N.sum(q)
Пример #17
0
def clipped_exp(x):
    return Numeric.exp(Numeric.clip(x,-709., 709))
Пример #18
0
 def calc_membership_matrix(self, d2):
     ## remove 0s (if a cluster center is exactly on one item)
     d2 = N.clip(d2, N.power(1e200, 1 - self.w), 1e300)
     q = N.power(d2, 1. / (1. - self.w))
     return q / N.sum(q)