Python compress示例，numpy.oldnumeric.compress Python示例

示例#1

0

显示文件

文件： Hmmer.py 项目： ostrokach/biskit

    def castHmmDic( self, hmmDic, repete, hmmGap, key ):
        """
        Blow up hmmDic to the number of repetes of the profile used.
        Correct scores for possible deletions in the search sequence.

        @param hmmDic: dictionary from L{getHmmProfile}
        @type  hmmDic: dict
        @param repete: repete information from L{align}
        @type  repete: int
        @param hmmGap: information about gaps from L{align}
        @type  hmmGap: [int]
        @param key: name of scoring method to adjust for gaps and repetes
        @type  key: str
        
        @return: dictionary with information about the profile
        @rtype: dict        
        """
        s = hmmDic[key]

        for i in range( repete ):
            mask = N.ones( len(s) )
            N.put( mask, hmmGap[i], 0 )
            if i == 0:
                score = N.compress( mask, s, 0 )
            if i > 0:
                score = N.concatenate( ( N.compress( mask, s, 0 ), score ) )

        hmmDic[key] = score

        return hmmDic

示例#2

0

显示文件

文件： Complex.py 项目： ostrokach/biskit

    def contactResDistribution( self, cm=None ):
        """
        Count occurrence of residues in protein-protein interface.
        
        @param cm: pre-calculated contact matrix (default: None)
        @type  cm: matrix
        
        @return: dict {'A':3, 'C':1, .. } (20 standard amino acids)
        @rtype: dict
        """
        if cm == None:
            cm = self.resContacts()

        ## get mask for residues involved in contacts
        maskLig = N.sum( cm )
        maskRec = N.sum( N.transpose( cm ))

        ## get sequence of contact residues only
        seqLig = N.compress( maskLig, self.lig().sequence() )
        seqRec = N.compress( maskRec, self.rec().sequence() )
        seq    = ''.join( seqLig ) + ''.join(seqRec) ## convert back to string

        ## count occurrence of letters
        result = {}
        for aa in molUtils.allAA():
            result[aa] = seq.count( aa )

        return result

示例#3

0

显示文件

文件： Complex.py 项目： ostrokach/biskit

    def __atomContacts(self, cutoff, rec_mask, lig_mask, cache):
        """
        Intermolecular distances below cutoff after applying the two masks.
        
        @param cutoff: cutoff for B{atom-atom} contact in \AA
        @type  cutoff: float
        @param rec_mask: atom mask
        @type  rec_mask: [1|0]
        @param lig_mask: atom mask
        @type  lig_mask: [1|0]
        @param cache: cache pairwise atom distance matrix
        @type  cache: 1|0
        
        @return: atom contact matrix, array sum_rec_mask x sum_lig_mask
        @rtype: array
        """
        ## get atom coordinats as array 3 x all_atoms
        rec_xyz = self.rec().getXyz()
        lig_xyz = self.lig().getXyz()

        ## get pair-wise distances -> atoms_rec x atoms_lig
        dist = getattr( self, 'pw_dist', None )
        if dist is None or \
               N.shape( dist ) != ( N.sum(rec_mask), N.sum(lig_mask) ):
            dist = self.__pairwiseDistances(N.compress( rec_mask, rec_xyz, 0),
                                            N.compress( lig_mask, lig_xyz, 0) )
        if cache:
            self.pw_dist = dist

        ## reduce to 1 (distance < cutoff) or 0 -> n_atoms_rec x n_atoms_lig
        return N.less( dist, cutoff )

示例#4

0

显示文件

    def castHmmDic(self, hmmDic, repete, hmmGap, key):
        """
        Blow up hmmDic to the number of repetes of the profile used.
        Correct scores for possible deletions in the search sequence.

        @param hmmDic: dictionary from L{getHmmProfile}
        @type  hmmDic: dict
        @param repete: repete information from L{align}
        @type  repete: int
        @param hmmGap: information about gaps from L{align}
        @type  hmmGap: [int]
        @param key: name of scoring method to adjust for gaps and repetes
        @type  key: str
        
        @return: dictionary with information about the profile
        @rtype: dict        
        """
        s = hmmDic[key]

        for i in range(repete):
            mask = N.ones(len(s))
            N.put(mask, hmmGap[i], 0)
            if i == 0:
                score = N.compress(mask, s, 0)
            if i > 0:
                score = N.concatenate((N.compress(mask, s, 0), score))

        hmmDic[key] = score

        return hmmDic

示例#5

0

显示文件

文件： numpyExtn.py 项目： www3838438/orange-bio

def triangularGet(m2d, upper=1):
    """Returns 1D masked array with elements from the upper (lower) triangular part of the given matrix.
    For a symetric matrix triangularGet(m2d, 0) and triangularGet(m2d, 1) return elements in different order.
    """
    assert upper in [0,1], "upper: [0|1] expected"
    m2d = MA.asarray(m2d)
    assert MA.rank(m2d) == 2, "2D (masked) array expected"
    if upper:
        takeInd = Numeric.compress(Numeric.ravel(Numeric.fromfunction(lambda i,j: i<j, m2d.shape)), Numeric.arange(0, Numeric.multiply.reduce(m2d.shape), typecode=Numeric.Int))
    else:
        takeInd = Numeric.compress(Numeric.ravel(Numeric.fromfunction(lambda i,j: i>j, m2d.shape)), Numeric.arange(0, Numeric.multiply.reduce(m2d.shape), typecode=Numeric.Int))
    return MA.ravel(m2d).take(takeInd)

示例#6

0

显示文件

文件： chipimpute.py 项目： JakaKokosar/orange-bio

def kNNimputeMA(arr2d, K=20, callback=None):
    """Returns a new 2D MA.array with missing values imputed from K nearest neighbours.
    Find K rows (axis 0) with the most similar values where similarity measure corresponds to weighted Euclidean distance.
    Imputed value = weighted average of the corresponding values of K nearest neighbours,
    where weights equal to tricubic distribution of distances to all rows.
    Impute missing rows by average over all rows.
    Version: 30.8.2005
    """
    arr2d = MA.asarray(arr2d)
    assert len(arr2d.shape) == 2, "2D array expected"
    # make a copy for imputation
    aImp2 = MA.array(arr2d)
    # leave out columns with 0 known values (columnInd: non-zero columns)
    columnCond = Numeric.greater(MA.count(arr2d, axis=0), 0)
    columnIndAll = Numeric.arange(arr2d.shape[1])
    columnInd = Numeric.compress(columnCond, columnIndAll)
    # impute the rows where 0 < #known_values < #non_zero_columns, i.e. exclude the rows with 0 and all (non-zero-column) values
    countByRows = MA.count(arr2d, axis=1)
    for rowIdx in Numeric.compress(Numeric.logical_and(Numeric.greater(countByRows, 0), Numeric.less(countByRows, columnInd.shape[0])), Numeric.arange(arr2d.shape[0])):
        rowResized = MA.resize(arr2d[rowIdx], arr2d.shape)
        diff = arr2d - rowResized
        distances = MA.sqrt(MA.add.reduce((diff)**2, 1) / MA.count(diff, axis=1))
        # nearest neighbours row indices (without the current row index)
        indSorted = MA.argsort(distances)[1:]
        distSorted = distances.take(indSorted)
        # number of distances different from MA.masked
        numNonMasked = distSorted.shape[0] - Numeric.add.reduce(Numeric.asarray(MA.getmaskarray(distSorted), Numeric.Int))
        # number of distances to account for (K or less)
        if numNonMasked > 1:
            weightsSorted = MA.power(1-MA.power(distSorted/distSorted[numNonMasked-1],3),3) # tricubic distribution of all weights
        else:
            weightsSorted = Numeric.ones(distSorted.shape[0])
        # compute average for each column separately in order to account for K non-masked values
        colInd4CurrRow = Numeric.compress(Numeric.logical_and(MA.getmaskarray(arr2d[rowIdx]), columnCond), columnIndAll)
        for colIdx in colInd4CurrRow:
            # column values sorted by distances
            columnVals = arr2d[:,colIdx].take(indSorted)
            # take only those weights where columnVals does not equal MA.masked
            weightsSortedCompressed = MA.compress(1-MA.getmaskarray(columnVals), weightsSorted)
            # impute from K (or possibly less) values
            aImp2[rowIdx,colIdx] = MA.average(columnVals.compressed()[:K], weights=weightsSortedCompressed[:K])
        if callback:
            callback()
    # impute the unknown rows with average profile
    avrgRow = MA.average(arr2d, 0)
    for rowIdx in Numeric.compress(Numeric.equal(countByRows, 0), Numeric.arange(arr2d.shape[0])):
        aImp2[rowIdx] = avrgRow
        if callback:
            callback()
    return aImp2

示例#7

0

显示文件

文件： Trajectory.py 项目： ostrokach/biskit

    def residusMaximus(self, atomValues, mask=None):
        """
        Take list of value per atom, return list where all atoms of any
        residue are set to the highest value of any atom in that residue.
        (after applying mask)

        @param atomValues: list 1 x N, values per atom
        @type  atomValues: [ float ]
        @param mask: list 1 x N, 0|1, 'master' atoms of each residue
        @type  mask: [1|0]

        @return: Numpy array 1 x N of float
        @rtype: array
        """
        if mask is None:
            mask = N.ones(len(self.frames[0]), N.int32)

        ## eliminate all values that do not belong to the selected atoms
        masked = atomValues * mask

        result = []

        ## set all atoms of each residue to uniform value
        for res in range(0, self.resMap()[-1] + 1):

            ## get atom entries for this residue
            resAtoms = N.compress(N.equal(self.resMap(), res), masked)

            ## get maximum value
            masterValue = max(resAtoms)

            result += resAtoms * 0.0 + masterValue

        return N.array(result)

示例#8

0

显示文件

文件： Trajectory.py 项目： ostrokach/biskit

    def takeFrames( self, indices ):
        """
        Return a copy of the trajectory containing only the specified frames.

        @param indices: positions to take
        @type  indices: [int]

        @return: copy of this Trajectory (fewer frames, semi-deep copy of ref)
        @rtype: Trajectory
        """
        ## remove out-of-bound indices
        indices = N.compress( N.less( indices, len( self.frames) ), indices )

        r = self.__class__()

        ## this step takes some time for large frames !
        r.frames = N.take( self.frames, indices, 0 )

        ## semi-deep copy of reference model
        r.setRef( self.ref.take( range( self.ref.lenAtoms() )) )

        if self.frameNames != None:
            r.frameNames = N.take( self.frameNames, indices, 0 )
            r.frameNames = map( ''.join, r.frameNames.tolist() )

        r.pc = self.__takePca( indices )

        r.profiles = self.profiles.take( indices )

        r.resIndex = self.resIndex

        return r

示例#9

0

显示文件

文件： Trajectory.py 项目： ostrokach/biskit

    def pairwiseRmsd(self, aMask=None, noFit=0):
        """
        Calculate rmsd between each 2 coordinate frames.

        @param aMask: atom mask
        @type  aMask: [1|0]
        @return: frames x frames array of float
        @rtype: array
        """
        frames = self.frames

        if aMask != None:
            frames = N.compress(aMask, frames, 1)

        result = N.zeros((len(frames), len(frames)), N.Float32)

        for i in range(0, len(frames)):

            for j in range(i + 1, len(frames)):
                if noFit:
                    d = N.sqrt(N.sum(N.power(frames[i] - frames[j], 2), 1))
                    result[i, j] = result[j, i] = N.sqrt(N.average(d**2))

                else:
                    rt, rmsdLst = rmsFit.match(frames[i], frames[j], 1)
                    result[i, j] = result[j, i] = rmsdLst[0][1]

        return result

示例#10

0

显示文件

文件： PDBDope.py 项目： ostrokach/biskit

    def addDensity( self, radius=6, minasa=None, profName='density' ):
        """
        Count the number of heavy atoms within the given radius.
        Values are only collected for atoms with |minasa| accessible surface
        area.

        @param minasa: relative exposed surface - 0 to 100%
        @type  minasa: float
        @param radius: in Angstrom
        @type  radius: float
        """
        mHeavy = self.m.maskHeavy()

        xyz = N.compress( mHeavy, self.m.getXyz(), 0 )

        if minasa and self.m.profile( 'relAS', 0 ) == 0:
            self.addASA()

        if minasa:
            mSurf = self.m.profile2mask( 'relAS', minasa )
        else:
            mSurf = N.ones( self.m.lenAtoms() )

        ## loop over all surface atoms
        surf_pos = N.nonzero( mSurf )
        contacts = []

        for i in surf_pos:
            dist = N.sum(( xyz - self.m.xyz[i])**2, 1)
            contacts += [ N.sum( N.less(dist, radius**2 )) -1]

        self.m.atoms.set( profName, contacts, mSurf, default=-1,
                          comment='atom density radius %3.1fA' % radius,
                          version= T.dateString() + ' ' + self.version() )

示例#11

0

显示文件

文件： hexTools.py 项目： ostrokach/biskit

def centerSurfDist( model, surf_mask, mask=None ):
    """
    Calculate the longest and shortest distance from
    the center of the molecule to the surface.

    @param mask: atoms not to be considerd (default: None)
    @type  mask: [1|0]
    @param surf_mask: atom surface mask, needed for minimum surface distance
    @type  surf_mask: [1|0]

    @return: max distance, min distance
    @rtype: float, float
    """
    if mask is None:
        mask = model.maskHeavy()

    ## calculate center of mass
    center = model.centerOfMass()

    ## surface atom coordinates
    surf_xyz = N.compress( mask*surf_mask, model.getXyz(), 0 )

    ## find the atom closest and furthest away from center
    dist = N.sqrt( N.sum( (surf_xyz-center)**2 , 1 ) )
    minDist = min(dist)
    maxDist = max(dist)

    return maxDist, minDist

示例#12

0

显示文件

def logConfidence(x, R, clip=0):
    """
    Estimate the probability of x NOT beeing a random observation from a
    lognormal distribution that is described by a set of random values.

    @param x: observed value
    @type  x: float
    @param R: sample of random values
    @type  R: [float]
    @param clip: clip zeros at this value  0->don't clip (default: 0)
    @type  clip: float

    @return: confidence that x is not random, median of random distr.
    @rtype: (float, float)
    """
    if clip and 0 in R:
        R = N.clip(R, clip, max(R))
    if clip and x == 0:
        x = clip

    ## remove 0 instead of clipping
    R = N.compress(R, R)
    if x == 0:
        return 0, 0

    ## get mean and stdv of log-transformed random sample
    alpha = N.average(N.log(R))

    n = len(R)

    beta = N.sqrt(N.sum(N.power(N.log(R) - alpha, 2)) / (n - 1.))

    return logArea(x, alpha, beta), logMedian(alpha)

示例#13

0

显示文件

文件： estimateReferences.py 项目： VuisterLab/cing

def stable_sd(x, n_sd=3., min_length=20):

    if len(x) < min_length:
        if len(x) == 1:
            return 0.
        else:
            return standardDeviation(x)

    x = Numeric.array(x)
    _x = x
    _outliers = 0.
    
    i = 0

    while i < 10:

        mu = median(_x)
        sd = standardDeviation(_x, mu)

        outliers = Numeric.greater(abs(x-mu), n_sd*sd)

        if not Numeric.sum(outliers) or Numeric.sum(outliers==_outliers) == len(x):
            break

        _x = Numeric.compress(Numeric.logical_not(outliers), x)
        _outliers = outliers

        i += 1

    return sd

示例#14

0

显示文件

def centerSurfDist(model, surf_mask, mask=None):
    """
    Calculate the longest and shortest distance from
    the center of the molecule to the surface.

    @param mask: atoms not to be considerd (default: None)
    @type  mask: [1|0]
    @param surf_mask: atom surface mask, needed for minimum surface distance
    @type  surf_mask: [1|0]

    @return: max distance, min distance
    @rtype: float, float
    """
    if mask is None:
        mask = model.maskHeavy()

    ## calculate center of mass
    center = model.centerOfMass()

    ## surface atom coordinates
    surf_xyz = N.compress(mask * surf_mask, model.getXyz(), 0)

    ## find the atom closest and furthest away from center
    dist = N.sqrt(N.sum((surf_xyz - center)**2, 1))
    minDist = min(dist)
    maxDist = max(dist)

    return maxDist, minDist

示例#15

0

显示文件

文件： Trajectory.py 项目： ostrokach/biskit

    def takeFrames(self, indices):
        """
        Return a copy of the trajectory containing only the specified frames.

        @param indices: positions to take
        @type  indices: [int]

        @return: copy of this Trajectory (fewer frames, semi-deep copy of ref)
        @rtype: Trajectory
        """
        ## remove out-of-bound indices
        indices = N.compress(N.less(indices, len(self.frames)), indices)

        r = self.__class__()

        ## this step takes some time for large frames !
        r.frames = N.take(self.frames, indices, 0)

        ## semi-deep copy of reference model
        r.setRef(self.ref.take(range(self.ref.lenAtoms())))

        if self.frameNames != None:
            r.frameNames = N.take(self.frameNames, indices, 0)
            r.frameNames = map(''.join, r.frameNames.tolist())

        r.pc = self.__takePca(indices)

        r.profiles = self.profiles.take(indices)

        r.resIndex = self.resIndex

        return r

示例#16

0

显示文件

文件： chipimpute.py 项目： JakaKokosar/orange-bio

def loessMA(m, windowSize, axis=0, approxMasked=True, verbose=False, callback=None):
    """Returns a new array with values at the given axis smoothed by loess;
    if approxMasked==True: the masked values are approximated by loess;
    assumes equidistant spacing of points on the given axis.
    """
    assert 0 < windowSize <= m.shape[axis]+0.1, "0 < windowSize[%s] <= 1 OR windowSize in range(1.1,m.shape[axis]+1) expected, got %f" % ("%", windowSize)
    m = MA.asarray(m)
    if m.dtype.char <> Numeric.Float:
        m = m.astype(Numeric.Float)
    shp_other = list(m.shape)
    shp_other.pop(axis)
    # get a transposed and reshaped mask and data from m; if m.mask() == None, construct a new array of zeros
    mask = Numeric.reshape(Numeric.transpose(MA.getmaskarray(m), [axis] + range(0,axis) + range(axis+1,len(m.shape))), (m.shape[axis], Numeric.multiply.reduce(shp_other)))
    data = MA.reshape(MA.transpose(m, [axis] + range(0,axis) + range(axis+1,len(m.shape))), (m.shape[axis], Numeric.multiply.reduce(shp_other)))
    maskInv = -1*(mask-1)
    xall = Numeric.arange(data.shape[0])
    xallList = xall.tolist()
    for ii in Numeric.compress(Numeric.add.reduce(maskInv,0) > 1, range(data.shape[1])):    # run loess if the profile contains more than 2 values
        try:
            data[:,ii] = MA.array(statc.loess(zip(MA.compress(maskInv[:,ii], xall).tolist(), MA.compress(maskInv[:,ii], data[:,ii]).tolist()), xallList, windowSize))[:,1]
        except:
            if verbose:
                print "Warning: loessMA: could not loess axis %i index %i" % (axis, ii)
        if callback:
            callback()
    if not approxMasked:
        data = MA.array(data, mask=mask)
    return MA.transpose(MA.reshape(data, [m.shape[axis]] + shp_other), [axis] + range(0,axis) + range(axis+1,len(m.shape)))

示例#17

0

显示文件

文件： lognormal.py 项目： ostrokach/biskit

def logConfidence( x, R, clip=0 ):
    """
    Estimate the probability of x NOT beeing a random observation from a
    lognormal distribution that is described by a set of random values.

    @param x: observed value
    @type  x: float
    @param R: sample of random values
    @type  R: [float]
    @param clip: clip zeros at this value  0->don't clip (default: 0)
    @type  clip: float

    @return: confidence that x is not random, median of random distr.
    @rtype: (float, float)
    """
    if clip and 0 in R:
        R = N.clip( R, clip, max( R ) )
    if clip and x == 0:
        x = clip

    ## remove 0 instead of clipping
    R = N.compress( R, R )
    if x == 0:
        return 0, 0

    ## get mean and stdv of log-transformed random sample
    alpha = N.average( N.log( R ) )

    n = len( R )

    beta = N.sqrt(N.sum(N.power(N.log( R ) - alpha, 2)) / (n - 1.))

    return logArea( x, alpha, beta ), logMedian( alpha )

示例#18

0

显示文件

文件： Trajectory.py 项目： ostrokach/biskit

    def residusMaximus( self, atomValues, mask=None ):
        """
        Take list of value per atom, return list where all atoms of any
        residue are set to the highest value of any atom in that residue.
        (after applying mask)

        @param atomValues: list 1 x N, values per atom
        @type  atomValues: [ float ]
        @param mask: list 1 x N, 0|1, 'master' atoms of each residue
        @type  mask: [1|0]

        @return: Numpy array 1 x N of float
        @rtype: array
        """
        if mask is None:
            mask = N.ones( len( self.frames[0] ), N.int32 )

        ## eliminate all values that do not belong to the selected atoms
        masked = atomValues * mask

        result = []

        ## set all atoms of each residue to uniform value
        for res in range( 0, self.resMap()[-1]+1 ):

            ## get atom entries for this residue
            resAtoms = N.compress( N.equal( self.resMap(), res ), masked )

            ## get maximum value
            masterValue = max( resAtoms )

            result += resAtoms * 0.0 + masterValue

        return N.array( result )

示例#19

0

显示文件

文件： numpyExtn.py 项目： www3838438/orange-bio

def triangularPut(m1d, upper=1, lower=0):
    """Returns 2D masked array with elements of the given 1D array in the strictly upper (lower) triangle.
    Elements of the 1D array should be ordered according to the upper triangular part of the 2D matrix.
    The lower triangular part (if requested) equals to the transposed upper triangular part.
    If upper == lower == 1 a symetric matrix is returned.
    """
    assert upper in [0,1] and lower in [0,1], "[0|1] expected for upper / lower"
    m1d = MA.asarray(m1d)
    assert MA.rank(m1d) == 1, "1D masked array expected"
    m2dShape0 = math.ceil(math.sqrt(2*m1d.shape[0]))
    assert m1d.shape[0] == m2dShape0*(m2dShape0-1)/2, "the length of m1d does not correspond to n(n-1)/2"
    if upper:
        if lower:
            mask = Numeric.fromfunction(lambda i,j: i==j, (m2dShape0, m2dShape0))
        else:
            mask = Numeric.fromfunction(lambda i,j: i>=j, (m2dShape0, m2dShape0))
    else:
        if lower:
            mask = Numeric.fromfunction(lambda i,j: i<=j, (m2dShape0, m2dShape0))
        else:
            mask = Numeric.ones((m2dShape0, m2dShape0))

    m2d = MA.ravel(MA.zeros((m2dShape0, m2dShape0), m1d.dtype.char))
    condUpperTriang = Numeric.fromfunction(lambda i,j: i<j, (m2dShape0, m2dShape0))
    putIndices = Numeric.compress(Numeric.ravel(condUpperTriang), Numeric.arange(0, m2dShape0**2, typecode=Numeric.Int))
    MA.put(m2d, putIndices, m1d)
    m2d = MA.reshape(m2d, (m2dShape0, m2dShape0))
    m2d = MA.where(condUpperTriang, m2d, MA.transpose(m2d))
    return MA.array(m2d, mask=Numeric.logical_or(mask, MA.getmaskarray(m2d)))

示例#20

0

显示文件

文件： Trajectory.py 项目： ostrokach/biskit

    def pairwiseRmsd( self, aMask=None, noFit=0 ):
        """
        Calculate rmsd between each 2 coordinate frames.

        @param aMask: atom mask
        @type  aMask: [1|0]
        @return: frames x frames array of float
        @rtype: array
        """
        frames = self.frames

        if aMask != None:
            frames = N.compress( aMask, frames, 1 )

        result = N.zeros( (len( frames ), len( frames )), N.Float32 )

        for i in range(0, len( frames ) ):

            for j in range( i+1, len( frames ) ):
                if noFit:
                    d = N.sqrt(N.sum(N.power(frames[i]-frames[j], 2), 1))
                    result[i,j] = result[j,i] = N.sqrt( N.average(d**2) )

                else:
                    rt, rmsdLst = rmsFit.match( frames[i], frames[j], 1 )
                    result[i,j] = result[j,i] = rmsdLst[0][1]

        return result

示例#21

0

显示文件

文件： OWImputeProfiles.py 项目： acopar/orange-bio

 def chipdata(self, data):
     """Input data: [(dirname0, [et0, et1, ...]), ...]
     """
     self.numRowsMissingChipData = 0
     self._chipdataMA = []
     if data != None:
         self._chipdata = data
         numValsAll = 0
         numValsNonMasked = 0
         numFiles = 0
         numExamplesList = []
         attribDict = {}
         numColMissing = 0
         for (name, etList) in data:
             numFiles += len(etList)
             self._chipdataMA.append((name, []))
             for et in etList:
                 attribDict.update(
                     dict(
                         zip(map(lambda x: x.name, et.domain.attributes),
                             et.domain.attributes)))
                 numExamplesList.append(len(et))
                 etm = et.toNumpyMA("a")[0]
                 colNonMissingInd = Numeric.compress(
                     Numeric.not_equal(MA.count(etm, 0), 0),
                     Numeric.arange(etm.shape[1])
                 )  # indices of columns that are not completely missing
                 numColMissing += etm.shape[1] - colNonMissingInd.shape[0]
                 self.numRowsMissingChipData += int(
                     Numeric.add.reduce(
                         Numeric.less(
                             MA.count(etm.take(colNonMissingInd, 1), 1),
                             etm.shape[1])))
                 numValsAll += int(Numeric.multiply.reduce(etm.shape))
                 numValsNonMasked += int(MA.count(etm))
                 self._chipdataMA[-1][1].append(etm)
         # info text
         self.infoc.setText(
             "Structured Data: %i data files with %i profiles on %i points"
             % (numFiles, numExamplesList[0], len(attribDict)))
         numTotalMissing = numValsAll - numValsNonMasked
         if numTotalMissing > 0:
             print numTotalMissing, numColMissing, self.numRowsMissingChipData
             print type(numTotalMissing), type(numColMissing), type(
                 self.numRowsMissingChipData)
             self.infod.setText(
                 "missing %i values, %i column%s completely, %i row%s partially"
                 % (numTotalMissing, numColMissing, [
                     "", "s"
                 ][numColMissing != 1], self.numRowsMissingChipData,
                    ["", "s"][self.numRowsMissingChipData != 1]))
         else:
             self.infod.setText("")
     else:
         self._chipdata = None
         self.infoc.setText("No structured data on input")
         self.infod.setText("")
     self.setGuiCommonExpChip()
     if self.commitOnChange:
         self.senddata(2)

示例#22

0

显示文件

文件： numpyExtn.py 项目： www3838438/orange-bio

def compressIndices(ma):
    """Returns 1D compressed Numeric array and the indices of the non-masked places.
    usage:  nu,ind = compressIndices(ma)
            nu = Numeric.elementwise_function(nu)
            ma = MA.put(ma, ind, nu)
    """
    ma = MA.asarray(ma)
    nonMaskedInd = Numeric.compress(1-Numeric.ravel(MA.getmaskarray(ma)), Numeric.arange(Numeric.multiply.reduce(ma.shape)))
    return MA.filled(ma.compressed()), nonMaskedInd

示例#23

0

显示文件

文件： Complex.py 项目： ostrokach/biskit

    def __extractLigandMatrix(self, fcomplex):
        """
        Compare structure from hex complex with original ligand pdb
        and store transformation matrix of ligand in self.ligandMatrix.
        
        @param fcomplex: pdb file with hex complex
        @type  fcomplex: complec
        
        @return: rotation matrix and translation matrix as tuple
        @rtype: (array, array)
        """
        docked_pdb = self._extractLigandStructure(fcomplex)

        xyz_docked = N.compress( docked_pdb.maskCA(), docked_pdb.xyz )
        xyz_template = N.compress( self.lig_model.maskCA(),
                                 self.lig_model.xyz )

        (r, t) = self._findTransformation(xyz_docked, xyz_template)
        return (r,t)

示例#24

0

显示文件

文件： Ramachandran.py 项目： ostrokach/biskit

    def phi_and_psi(self, model):
        """
        Calculate phi and psi torsion angles for all
        residues in model::
        
          phi - rotation about the N-CA bond
              - last position in a chain = None
          psi - rotation about CA-C
              - first position in a chain = None          

        @param model: PDBModel
        @type  model: PDBModel 
        """
        for c in range(model.lenChains(breaks=1)):
            cModel = model.takeChains([c], breaks=1)

            xyz = cModel.xyz

            xyz_CA = N.compress(cModel.maskCA(), xyz, 0)
            xyz_N = N.compress(cModel.mask(['N']), xyz, 0)
            xyz_C = N.compress(cModel.mask(['C']), xyz, 0)

            ## phi: c1 - N
            ##      c2 - CA
            ##      c3 - C
            ##      c4 - N of next residue
            for i in range(len(xyz_N) - 1):
                self.phi += [
                    self.dihedral(xyz_N[i], xyz_CA[i], xyz_C[i], xyz_N[i + 1])
                ]
            self.phi += [None]

            ## psi: c1 - C of previous residue
            ##      c2 - N
            ##      c3 - CA
            ##      c4 - C
            self.psi += [None]
            for i in range(1, len(xyz_N)):
                self.psi += [
                    self.dihedral(xyz_C[i - 1], xyz_N[i], xyz_CA[i], xyz_C[i])
                ]

示例#25

0

显示文件

文件： numpyExtn.py 项目： www3838438/orange-bio

def diagonalPut(m1d, m2d):
    """Puts the given 1D masked array into the diagonal of the given 2D masked array and returns a new copy of the 2D array.
    """
    m1d = MA.asarray(m1d)
    m2d = MA.asarray(m2d)
    assert MA.rank(m1d) == 1 and MA.rank(m2d) == 2, "1D and 2D masked array expected"
    assert m1d.shape[0] == m2d.shape[0] == m2d.shape[1], "the shape of the given arrays does not match"
    putIndices = Numeric.compress(Numeric.ravel(Numeric.fromfunction(lambda i,j: i==j, m2d.shape)), Numeric.arange(0, Numeric.multiply.reduce(m2d.shape), typecode=Numeric.Int))
    m2dShape = m2d.shape
    m2d = MA.ravel(m2d)
    MA.put(m2d, putIndices, m1d)
    return MA.reshape(m2d, m2dShape)

示例#26

0

显示文件

文件： ComplexTraj.py 项目： ostrokach/biskit

    def plotContactDensity( self, step=1, cutoff=4.5 ):
        """
        Example. plot histogramm of contact density. Somehing wrong??

        @raise ComplexTrajError: if gnuplot program is not installed
        """
        if not gnuplot.installed:
            raise ComplexTrajError, 'gnuplot program is not installed'
        r = self.averageContacts( step, cutoff )
        r = N.ravel( r )
        r = N.compress( r, r )
        gnuplot.plot( hist.density( r, 10 ) )

示例#27

0

显示文件

文件： match2seq.py 项目： ostrokach/biskit

def compareSequences(seqAA_1, seqAA_2):
    """
    """
    seqAA_1 = list(seqAA_1)
    seqAA_2 = list(seqAA_2)
    seqNr_1 = range(len(seqAA_1))
    seqNr_2 = range(len(seqAA_2))

    # get mask
    mask_1 = N.zeros(len(seqNr_1))
    mask_2 = N.zeros(len(seqNr_2))

    # compare sequences
    seqDiff = getOpCodes(seqAA_1, seqAA_2)

    # get delete lists
    del_1, del_2 = getSkipLists(seqDiff)

    del_1 = [expandRepeats(seqAA_1, *pos) for pos in del_1]
    del_2 = [expandRepeats(seqAA_2, *pos) for pos in del_2]

    mask1 = del2mask(seqAA_1, *del_1)
    mask2 = del2mask(seqAA_2, *del_2)

    seqAA_1 = N.compress(mask1, seqAA_1).tolist()
    seqNr_1 = N.compress(mask1, seqNr_1).tolist()
    seqAA_2 = N.compress(mask2, seqAA_2).tolist()
    seqNr_2 = N.compress(mask2, seqNr_2).tolist()

    # get equal parts
    seqDiff = getOpCodes(seqAA_1, seqAA_2)
    equal_1, equal_2 = getEqualLists(seqDiff)
    seqAA_1, seqNr_1 = getEqual(seqAA_1, seqNr_1, equal_1)
    seqAA_2, seqNr_2 = getEqual(seqAA_2, seqNr_2, equal_2)

    N.put(mask_1, seqNr_1, 1)
    N.put(mask_2, seqNr_2, 1)

    return mask_1, mask_2

示例#28

0

显示文件

文件： match2seq.py 项目： ostrokach/biskit

def compareSequences( seqAA_1, seqAA_2 ):
    """
    """
    seqAA_1 = list( seqAA_1 )
    seqAA_2 = list( seqAA_2 )
    seqNr_1 = range( len( seqAA_1 ) )
    seqNr_2 = range( len( seqAA_2 ) )

    # get mask
    mask_1 = N.zeros( len( seqNr_1 ) )
    mask_2 = N.zeros( len( seqNr_2 ) )

    # compare sequences
    seqDiff = getOpCodes( seqAA_1, seqAA_2)

    # get delete lists
    del_1, del_2 =  getSkipLists( seqDiff )
    
    del_1 = [ expandRepeats( seqAA_1, *pos ) for pos in del_1 ]
    del_2 = [ expandRepeats( seqAA_2, *pos ) for pos in del_2 ]
    
    mask1 = del2mask( seqAA_1, *del_1 )
    mask2 = del2mask( seqAA_2, *del_2 )
 
    seqAA_1 = N.compress( mask1, seqAA_1 ).tolist()
    seqNr_1 = N.compress( mask1, seqNr_1 ).tolist()
    seqAA_2 = N.compress( mask2, seqAA_2 ).tolist()
    seqNr_2 = N.compress( mask2, seqNr_2 ).tolist()
    
    # get equal parts
    seqDiff = getOpCodes( seqAA_1, seqAA_2 )
    equal_1, equal_2 = getEqualLists( seqDiff ) 
    seqAA_1, seqNr_1 = getEqual( seqAA_1, seqNr_1, equal_1)
    seqAA_2, seqNr_2 = getEqual( seqAA_2, seqNr_2, equal_2 )

    N.put( mask_1, seqNr_1 , 1 )
    N.put( mask_2, seqNr_2 , 1 )

    return mask_1, mask_2

示例#29

0

显示文件

文件： Srf.py 项目： uceearq/polymode-1

 def __init__(self, crv1, crv2):
     if not isinstance(crv1, Crv.Crv):
         raise NURBSError, 'Parameter crv1 not derived from Crv class!'
     if not isinstance(crv2, Crv.Crv):
         raise NURBSError, 'Parameter crv2 not derived from Crv class!'
     # ensure both curves have a common degree
     d = max(crv1.degree, crv2.degree)
     crv1.degelev(d - crv1.degree)
     crv2.degelev(d - crv2.degree)
     # merge the knot vectors, to obtain a common knot vector
     k1 = crv1.uknots
     k2 = crv2.uknots
     ku = []
     for item in k1:
         if not numerix.sometrue(numerix.equal(k2, item)):
             if item not in ku:
                 ku.append(item)
     for item in k2:
         if not numerix.sometrue(numerix.equal(k1, item)):
             if item not in ku:
                 ku.append(item)
     ku = numerix.sort(numerix.asarray(ku, numerix.Float))
     n = ku.shape[0]
     ka = numerix.array([], numerix.Float)
     kb = numerix.array([], numerix.Float)
     for i in range(0, n):
         i1 = numerix.compress(numerix.equal(k1, ku[i]), k1).shape[0]
         i2 = numerix.compress(numerix.equal(k2, ku[i]), k2).shape[0]
         m = max(i1, i2)
         ka = numerix.concatenate((ka, ku[i] * numerix.ones(
             (m - i1, ), numerix.Float)))
         kb = numerix.concatenate((kb, ku[i] * numerix.ones(
             (m - i2, ), numerix.Float)))
     crv1.kntins(ka)
     crv2.kntins(kb)
     coefs = numerix.zeros((4, crv1.cntrl.shape[1], 2), numerix.Float)
     coefs[:, :, 0] = crv1.cntrl
     coefs[:, :, 1] = crv2.cntrl
     Srf.__init__(self, coefs, crv1.uknots, [0., 0., 1., 1.])

示例#30

0

显示文件

def loessMA(m,
            windowSize,
            axis=0,
            approxMasked=True,
            verbose=False,
            callback=None):
    """Returns a new array with values at the given axis smoothed by loess;
    if approxMasked==True: the masked values are approximated by loess;
    assumes equidistant spacing of points on the given axis.
    """
    assert 0 < windowSize <= m.shape[
        axis] + 0.1, "0 < windowSize[%s] <= 1 OR windowSize in range(1.1,m.shape[axis]+1) expected, got %f" % (
            "%", windowSize)
    m = MA.asarray(m)
    if m.dtype.char <> Numeric.Float:
        m = m.astype(Numeric.Float)
    shp_other = list(m.shape)
    shp_other.pop(axis)
    # get a transposed and reshaped mask and data from m; if m.mask() == None, construct a new array of zeros
    mask = Numeric.reshape(
        Numeric.transpose(MA.getmaskarray(m), [axis] + range(0, axis) +
                          range(axis + 1, len(m.shape))),
        (m.shape[axis], Numeric.multiply.reduce(shp_other)))
    data = MA.reshape(
        MA.transpose(m,
                     [axis] + range(0, axis) + range(axis + 1, len(m.shape))),
        (m.shape[axis], Numeric.multiply.reduce(shp_other)))
    maskInv = -1 * (mask - 1)
    xall = Numeric.arange(data.shape[0])
    xallList = xall.tolist()
    for ii in Numeric.compress(
            Numeric.add.reduce(maskInv, 0) > 1, range(data.shape[1])
    ):  # run loess if the profile contains more than 2 values
        try:
            data[:, ii] = MA.array(
                statc.loess(
                    zip(
                        MA.compress(maskInv[:, ii], xall).tolist(),
                        MA.compress(maskInv[:, ii], data[:, ii]).tolist()),
                    xallList, windowSize))[:, 1]
        except:
            if verbose:
                print "Warning: loessMA: could not loess axis %i index %i" % (
                    axis, ii)
        if callback:
            callback()
    if not approxMasked:
        data = MA.array(data, mask=mask)
    return MA.transpose(MA.reshape(data, [m.shape[axis]] + shp_other), [axis] +
                        range(0, axis) + range(axis + 1, len(m.shape)))

示例#31

0

显示文件

文件： mathUtils.py 项目： ostrokach/biskit

def random2DArray( matrix, ranNr=1, mask=None):
    """
    Create randomized 2D array containing ones and zeros.

    @param matrix: matrix to randomize
    @type  matrix: 2D array
    @param mask: mask OR None (default: None)
    @type  mask: list(1|0)
    @param ranNr: number of matricies to add up (default: 1)
    @type  ranNr: integer

    @return: 2D array or |ranNr| added contact matricies
    @rtype:2D array

    @raise MathUtilError: if mask does not fit matrix
    """
    ## get shape of matrix
    a,b = N.shape( matrix )

    ## get array from matrix that is to be randomized
    if mask is not None:
        if len(mask) == len( N.ravel(matrix) ):
            array = N.compress( mask, N.ravel(matrix) )

        if len(mask) != len( N.ravel(matrix) ):
            raise MathUtilError(
                'MatUtils.random2DArray - mask of incorrect length' +
                '\tMatrix length: %i Mask length: %i'\
                %(len( N.ravel(matrix) ), len(mask)))

    if not mask:
        array = N.ravel(matrix)

    ## number of ones and length of array
    nOnes = int( N.sum( array ) )
    lenArray = len( array )
    ranArray = N.zeros( lenArray )

    ## create random array
    for n in range(ranNr):
        ranArray += randomMask( nOnes, lenArray )

    ## blow up to size of original matix
    if mask is not None:
        r = N.zeros(a*b)
        N.put( r, N.nonzero(mask), ranArray)
        return N.reshape( r, (a,b) )

    if not mask:
        return  N.reshape( ranArray, (a,b) )

示例#32

0

显示文件

def random2DArray(matrix, ranNr=1, mask=None):
    """
    Create randomized 2D array containing ones and zeros.

    @param matrix: matrix to randomize
    @type  matrix: 2D array
    @param mask: mask OR None (default: None)
    @type  mask: list(1|0)
    @param ranNr: number of matricies to add up (default: 1)
    @type  ranNr: integer

    @return: 2D array or |ranNr| added contact matricies
    @rtype:2D array

    @raise MathUtilError: if mask does not fit matrix
    """
    ## get shape of matrix
    a, b = N.shape(matrix)

    ## get array from matrix that is to be randomized
    if mask is not None:
        if len(mask) == len(N.ravel(matrix)):
            array = N.compress(mask, N.ravel(matrix))

        if len(mask) != len(N.ravel(matrix)):
            raise MathUtilError(
                'MatUtils.random2DArray - mask of incorrect length' +
                '\tMatrix length: %i Mask length: %i'\
                %(len( N.ravel(matrix) ), len(mask)))

    if not mask:
        array = N.ravel(matrix)

    ## number of ones and length of array
    nOnes = int(N.sum(array))
    lenArray = len(array)
    ranArray = N.zeros(lenArray)

    ## create random array
    for n in range(ranNr):
        ranArray += randomMask(nOnes, lenArray)

    ## blow up to size of original matix
    if mask is not None:
        r = N.zeros(a * b)
        N.put(r, N.nonzero(mask), ranArray)
        return N.reshape(r, (a, b))

    if not mask:
        return N.reshape(ranArray, (a, b))

示例#33

0

显示文件

文件： Srf.py 项目： adocherty/polymode

 def __init__(self, crv1, crv2):
     if not isinstance(crv1, Crv.Crv):
             raise NURBSError, 'Parameter crv1 not derived from Crv class!'
     if not isinstance(crv2, Crv.Crv):
             raise NURBSError, 'Parameter crv2 not derived from Crv class!'
     # ensure both curves have a common degree
     d = max(crv1.degree, crv2.degree)
     crv1.degelev(d - crv1.degree)
     crv2.degelev(d - crv2.degree)
     # merge the knot vectors, to obtain a common knot vector
     k1 = crv1.uknots
     k2 = crv2.uknots
     ku = []
     for item in k1:
         if not numerix.sometrue(numerix.equal(k2, item)):
             if item not in ku:
                 ku.append(item)
     for item in k2:
         if not numerix.sometrue(numerix.equal(k1, item)):
             if item not in ku:
                 ku.append(item)
     ku = numerix.sort(numerix.asarray(ku, numerix.Float))
     n = ku.shape[0]
     ka = numerix.array([], numerix.Float)
     kb = numerix.array([], numerix.Float)
     for i in range(0, n):
         i1 = numerix.compress(numerix.equal(k1, ku[i]), k1).shape[0]
         i2 = numerix.compress(numerix.equal(k2, ku[i]), k2).shape[0]
         m = max(i1, i2)
         ka = numerix.concatenate((ka , ku[i] * numerix.ones((m - i1,), numerix.Float)))
         kb = numerix.concatenate((kb , ku[i] * numerix.ones((m - i2,), numerix.Float)))
     crv1.kntins(ka)
     crv2.kntins(kb)
     coefs = numerix.zeros((4, crv1.cntrl.shape[1], 2), numerix.Float)
     coefs[:,:,0] = crv1.cntrl
     coefs[:,:,1] = crv2.cntrl
     Srf.__init__(self, coefs, crv1.uknots, [0., 0., 1., 1.])

示例#34

0

显示文件

文件： Trajectory.py 项目： ostrokach/biskit

    def pca( self, atomMask=None, frameMask=None, fit=1 ):
        """
        Calculate principal components of trajectory frames.

        @param atomMask: 1 x N_atoms, [111001110..] atoms to consider
                         (default: all)
        @type  atomMask: [1|0]
        @param frameMask: 1 x N_frames, [001111..] frames to consider
                          (default all )
        @type  frameMask: [1|0]

        @return: (N_frames x N_frames), (1 x N_frames),
                 projection of each frame in PC space, eigenvalue of each PC
        @rtype: array, array, array
        """
        if frameMask is None: frameMask = N.ones( len( self.frames ), N.int32 )

        if atomMask is None: atomMask = N.ones(self.getRef().lenAtoms(),
                                               N.int32)

        if fit:
            self.fit( atomMask )

        refxyz = N.average( self.frames, 0 )

        data = N.compress( frameMask, self.frames, 0 )

        data = data - refxyz

        data = N.compress( atomMask, data, 1 )

        ## reduce to 2D array
        data = N.array( map( N.ravel, data ) )

        V, L, U = LA.singular_value_decomposition( data )

        return U, V * L, N.power(L, 2)

示例#35

0

显示文件

文件： Trajectory.py 项目： ostrokach/biskit

    def pca(self, atomMask=None, frameMask=None, fit=1):
        """
        Calculate principal components of trajectory frames.

        @param atomMask: 1 x N_atoms, [111001110..] atoms to consider
                         (default: all)
        @type  atomMask: [1|0]
        @param frameMask: 1 x N_frames, [001111..] frames to consider
                          (default all )
        @type  frameMask: [1|0]

        @return: (N_frames x N_frames), (1 x N_frames),
                 projection of each frame in PC space, eigenvalue of each PC
        @rtype: array, array, array
        """
        if frameMask is None: frameMask = N.ones(len(self.frames), N.int32)

        if atomMask is None:
            atomMask = N.ones(self.getRef().lenAtoms(), N.int32)

        if fit:
            self.fit(atomMask)

        refxyz = N.average(self.frames, 0)

        data = N.compress(frameMask, self.frames, 0)

        data = data - refxyz

        data = N.compress(atomMask, data, 1)

        ## reduce to 2D array
        data = N.array(map(N.ravel, data))

        V, L, U = LA.singular_value_decomposition(data)

        return U, V * L, N.power(L, 2)

示例#36

0

显示文件

def area(curve, start=0.0, stop=1.0):
    """
    Numerically add up the area under the given curve.
    The curve is a 2-D array or list of tupples.
    The x-axis is the first column of this array (curve[:,0]).
    (originally taken from Biskit.Statistics.ROCalyzer)

    @param curve: a list of x,y coordinates
    @type  curve: [ (y,x), ] or N.array
    @param start: lower boundary (in x) (default: 0.0)
    @type  start: float
    @param stop: upper boundary (in x) (default: 1.0)
    @type  stop: float
    @return: the area underneath the curve between start and stop.
    @rtype: float
    """
    ## convert and swap axes
    curve = N.array(curve)
    c = N.zeros(N.shape(curve), curve.dtype)
    c[:, 0] = curve[:, 1]
    c[:, 1] = curve[:, 0]

    assert len(N.shape(c)) == 2

    ## apply boundaries  ## here we have a problem with flat curves
    mask = N.greater_equal(c[:, 1], start)
    mask *= N.less_equal(c[:, 1], stop)
    c = N.compress(mask, c, axis=0)

    ## fill to boundaries -- not absolutely accurate: we actually should
    ## interpolate to the neighboring points instead
    c = N.concatenate((N.array([
        [c[0, 0], start],
    ]), c, N.array([
        [c[-1, 0], stop],
    ])))
    x = c[:, 1]
    y = c[:, 0]

    dx = x[1:] - x[:-1]  # distance on x between points
    dy = y[1:] - y[:-1]  # distance on y between points

    areas1 = y[:-1] * dx  # the rectangles between all points
    areas2 = dx * dy / 2.0  # the triangles between all points

    return N.sum(areas1) + N.sum(areas2)

示例#37

0

显示文件

文件： OWHypTest.py 项目： acopar/orange-bio

 def anova2(self, ma3d, groupLens, addInteraction, repMeasuresOnA, callback):
     """Conducts two-way ANOVA on individual examples;
     returns a Numeric array of p-values in shape (2, numExamples) or (3, numExamples), depending whether we test for interaction;
     Note: levels of factors A and B that cause empty cells are removed prior to conducting ANOVA.
     """
     groupLens = Numeric.asarray(groupLens)
     # arrays to store p-vals
     if addInteraction:
         ps = Numeric.ones((3, ma3d.shape[0]), Numeric.Float)
     else:
         ps = Numeric.ones((2, ma3d.shape[0]), Numeric.Float)
     # decide between non-repeated / repeated measures ANOVA for factor time
     if repMeasuresOnA:
         fAnova = Anova.AnovaRM12LR
     else:
         fAnova = Anova.Anova2wayLR
     # check for empty cells for all genes at once and remove them
     tInd2rem = []
     ax2Ind = Numeric.concatenate(([0], Numeric.add.accumulate(groupLens)))
     for aIdx in range(ma3d.shape[1]):
         for rIdx in range(groupLens.shape[0]):
             if Numeric.add.reduce(MA.count(ma3d[:,aIdx,ax2Ind[rIdx]:ax2Ind[rIdx+1]],1)) == 0:
                 tInd2rem.append(aIdx)
                 break
     if len(tInd2rem) > 0:
         print "Warning: removing time indices %s for all genes" % (str(tInd2rem))
         tInd2keep = range(ma3d.shape[1])
         for aIdx in tInd2rem:
             tInd2keep.remove(aIdx)
         ma3d = ma3d.take(tInd2keep, 1)
     # for each gene...
     for eIdx in range(ma3d.shape[0]):
         # faster check for empty cells for that gene -> remove time indices with empty cells
         ma2d = ma3d[eIdx]
         cellCount = MA.zeros((ma2d.shape[0], groupLens.shape[0]), Numeric.Int)
         for g,(i0,i1) in enumerate(zip(ax2Ind[:-1], ax2Ind[1:])):
             cellCount[:,g] = MA.count(ma2d[:,i0:i1], 1)
         ma2dTakeInd = Numeric.logical_not(Numeric.add.reduce(Numeric.equal(cellCount,0),1)) # 1 where to take, 0 where not to take
         if Numeric.add.reduce(ma2dTakeInd) != ma2dTakeInd.shape[0]:
             print "Warning: removing time indices %s for gene %i" % (str(Numeric.compress(ma2dTakeInd == 0, Numeric.arange(ma2dTakeInd.shape[0]))), eIdx)
             ma2d = MA.compress(ma2dTakeInd, ma2d, 0)
         an = fAnova(ma2d, groupLens, addInteraction, allowReductA=True, allowReductB=True)
         ps[:,eIdx] = an.ps
         callback()
     return ps

示例#38

0

显示文件

文件： Benchmark.py 项目： ostrokach/biskit

    def calc_rmsd(self, fitted_model_if, fitted_model_wo_if, reference, model):
        """
        Takes the two fitted structures (with and without iterative fitting),
        the known structure (reference), and the associated model inside the
        pdb_list. Calculates the different RMSD and set the profiles

        @param fitted_model_if: itteratively fitted model
        @type  fitted_model_if: PDBModel
        @param fitted_model_wo_if: normaly fitted model
        @type  fitted_model_wo_if: PDBModel
        @param reference: reference model
        @type  reference: PDBModel
        @param model: model
        @type  model: PDBModel
        """
        ## first calculate rmsd for heavy atoms and CA without
        ## removing any residues from the model
        mask_CA = fitted_model_wo_if.maskCA()

        rmsd_aa = fitted_model_wo_if.rms( reference, fit=0 )
        rmsd_ca = fitted_model_wo_if.rms( reference, mask=mask_CA, fit=1 )

        model.info["rmsd2ref_aa_wo_if"] = rmsd_aa
        model.info["rmsd2ref_ca_wo_if"] = rmsd_ca

        outliers_mask = N.logical_not(fitted_model_if.profile("rms_outliers"))

        ## Now remove the residues that were outliers in the iterative fit
        ## and calculate the rmsd again
        fitted_model_if = fitted_model_if.compress( outliers_mask )
        reference = reference.compress( outliers_mask )

        mask_CA = fitted_model_if.maskCA()

        rmsd_aa_if = fitted_model_if.rms( reference, fit=0 )
        rmsd_ca_if = fitted_model_if.rms( reference, mask=mask_CA, fit=1 )

        model.info["rmsd2ref_aa_if"] = rmsd_aa_if
        model.info["rmsd2ref_ca_if"] = rmsd_ca_if
        model.info["rmsd2ref_aa_outliers"] = 1.*(len(outliers_mask) \
                                                 - N.sum(outliers_mask)) / len(outliers_mask)
        model.info["rmsd2ref_ca_outliers"] = 1.*(N.sum(mask_CA) \
                                                 - N.sum(N.compress(mask_CA, outliers_mask))) \
             / N.sum(mask_CA)

示例#39

0

显示文件

    def calc_rmsd(self, fitted_model_if, fitted_model_wo_if, reference, model):
        """
        Takes the two fitted structures (with and without iterative fitting),
        the known structure (reference), and the associated model inside the
        pdb_list. Calculates the different RMSD and set the profiles

        @param fitted_model_if: itteratively fitted model
        @type  fitted_model_if: PDBModel
        @param fitted_model_wo_if: normaly fitted model
        @type  fitted_model_wo_if: PDBModel
        @param reference: reference model
        @type  reference: PDBModel
        @param model: model
        @type  model: PDBModel
        """
        ## first calculate rmsd for heavy atoms and CA without
        ## removing any residues from the model
        mask_CA = fitted_model_wo_if.maskCA()

        rmsd_aa = fitted_model_wo_if.rms(reference, fit=0)
        rmsd_ca = fitted_model_wo_if.rms(reference, mask=mask_CA, fit=1)

        model.info["rmsd2ref_aa_wo_if"] = rmsd_aa
        model.info["rmsd2ref_ca_wo_if"] = rmsd_ca

        outliers_mask = N.logical_not(fitted_model_if.profile("rms_outliers"))

        ## Now remove the residues that were outliers in the iterative fit
        ## and calculate the rmsd again
        fitted_model_if = fitted_model_if.compress(outliers_mask)
        reference = reference.compress(outliers_mask)

        mask_CA = fitted_model_if.maskCA()

        rmsd_aa_if = fitted_model_if.rms(reference, fit=0)
        rmsd_ca_if = fitted_model_if.rms(reference, mask=mask_CA, fit=1)

        model.info["rmsd2ref_aa_if"] = rmsd_aa_if
        model.info["rmsd2ref_ca_if"] = rmsd_ca_if
        model.info["rmsd2ref_aa_outliers"] = 1.*(len(outliers_mask) \
                                                 - N.sum(outliers_mask)) / len(outliers_mask)
        model.info["rmsd2ref_ca_outliers"] = 1.*(N.sum(mask_CA) \
                                                 - N.sum(N.compress(mask_CA, outliers_mask))) \
             / N.sum(mask_CA)

示例#40

0

显示文件

文件： mathUtils.py 项目： ostrokach/biskit

def area(curve, start=0.0, stop=1.0 ):
    """
    Numerically add up the area under the given curve.
    The curve is a 2-D array or list of tupples.
    The x-axis is the first column of this array (curve[:,0]).
    (originally taken from Biskit.Statistics.ROCalyzer)

    @param curve: a list of x,y coordinates
    @type  curve: [ (y,x), ] or N.array
    @param start: lower boundary (in x) (default: 0.0)
    @type  start: float
    @param stop: upper boundary (in x) (default: 1.0)
    @type  stop: float
    @return: the area underneath the curve between start and stop.
    @rtype: float
    """
    ## convert and swap axes
    curve = N.array( curve )
    c = N.zeros( N.shape(curve), curve.dtype )
    c[:,0] = curve[:,1]
    c[:,1] = curve[:,0]

    assert len( N.shape( c ) ) == 2

    ## apply boundaries  ## here we have a problem with flat curves
    mask = N.greater_equal( c[:,1], start )
    mask *= N.less_equal( c[:,1], stop )
    c = N.compress( mask, c, axis=0 )

    ## fill to boundaries -- not absolutely accurate: we actually should
    ## interpolate to the neighboring points instead
    c = N.concatenate((N.array([[c[0,0], start],]), c,
                       N.array([[c[-1,0],stop ],])) )
    x = c[:,1]
    y = c[:,0]

    dx = x[1:] - x[:-1] # distance on x between points 
    dy = y[1:] - y[:-1] # distance on y between points

    areas1 = y[:-1] * dx  # the rectangles between all points
    areas2 = dx * dy / 2.0 # the triangles between all points

    return N.sum(areas1) + N.sum(areas2)

示例#41

0

显示文件

文件： Trajectory.py 项目： ostrokach/biskit

    def getFluct_global(self, mask=None):
        """
        Get RMS of each atom from it's average position in trajectory.
        The frames should be superimposed (fit() ) to a reference.

        @param mask: N x 1 list/Numpy array of 0|1, (N=atoms),
                     atoms to be considered.
        @type  mask: [1|0]

        @return: Numpy array ( N_unmasked x 1 ) of float.
        @rtype: array
        """
        frames = self.frames
        if mask is not None:
            frames = N.compress(mask, frames, 1)

        ## mean position of each atom in all frames
        avg = N.average(frames)

        return N.average(N.sqrt(N.sum(N.power(frames - avg, 2), 2)))

示例#42

0

显示文件

文件： Trajectory.py 项目： ostrokach/biskit

    def getFluct_global( self, mask=None ):
        """
        Get RMS of each atom from it's average position in trajectory.
        The frames should be superimposed (fit() ) to a reference.

        @param mask: N x 1 list/Numpy array of 0|1, (N=atoms),
                     atoms to be considered.
        @type  mask: [1|0]

        @return: Numpy array ( N_unmasked x 1 ) of float.
        @rtype: array
        """
        frames = self.frames
        if mask is not None:
            frames = N.compress( mask, frames, 1 )

        ## mean position of each atom in all frames
        avg = N.average( frames )

        return N.average(N.sqrt(N.sum(N.power(frames - avg, 2), 2) ))

示例#43

0

显示文件

文件： OWImputeProfiles.py 项目： acopar/orange-bio

 def data(self, data):
     if data != None:
         self._data = data
         ##            self._dataMA = chipstat.orng2ma(data)
         self._dataMA = data.toNumpyMA("a")[0]
         # info text
         self.infoa.setText("Examples: %i profiles on %i points" %
                            (self._dataMA.shape[0], self._dataMA.shape[1]))
         numTotalMissing = int(
             Numeric.multiply.reduce(self._dataMA.shape) -
             MA.count(self._dataMA))
         if numTotalMissing > 0:
             numValsByCol = MA.count(self._dataMA, 0)
             numEmptyCol = Numeric.add.reduce(
                 Numeric.where(numValsByCol == 0, 1, 0))
             colNonEmpty = Numeric.compress(
                 numValsByCol != 0, Numeric.arange(self._dataMA.shape[1]))
             dataRemEmptyCol = self._dataMA.take(colNonEmpty, 1)
             self.numRowsMissing = Numeric.add.reduce(
                 Numeric.where(
                     MA.count(dataRemEmptyCol, 1) <
                     dataRemEmptyCol.shape[1], 1, 0))
             s1 = ""
             s2 = ""
             if numEmptyCol > 0: s1 = "s"
             if self.numRowsMissing > 0: s2 = "s"
             self.infob.setText(
                 "missing %i values, %i column%s completely, %i row%s partially"
                 % (numTotalMissing, numEmptyCol, s1, self.numRowsMissing,
                    s2))
         else:
             self.infob.setText("")
     else:
         self._data = None
         self._dataMA = None
         self.infoa.setText("No examples on input")
         self.infob.setText("")
         self.numRowsMissing = 0
     self.setGuiCommonExpChip()
     if self.commitOnChange:
         self.senddata(1)

示例#44

0

显示文件

文件： Ramachandran.py 项目： ostrokach/biskit

    def test_Ramachandran(self):
        """Ramachandran test"""
        self.traj = T.load(T.testRoot() + '/lig_pcr_00/traj.dat')

        self.traj.ref.atoms.set('mass', self.traj.ref.masses())

        self.mdl = [self.traj[0], self.traj[11]]
        self.mdl = [md.compress(md.maskProtein()) for md in self.mdl]

        self.rama = Ramachandran(self.mdl,
                                 name='test',
                                 profileName='mass',
                                 verbose=self.local)

        self.psi = N.array(self.rama.psi)

        if self.local:
            self.rama.show()

        r = N.sum(N.compress(N.logical_not(N.equal(self.psi, None)), self.psi))
        self.assertAlmostEqual(r, -11717.909796797909, 2)

示例#45

0

显示文件

文件： mathUtils.py 项目： ostrokach/biskit

def outliers( a, z=5, it=5 ):
    """
    Iterative detection of outliers in a set of numeric values.
    Requirement: len(a) > 0; outlier detection is only performed if len(a)>2
    
    @param a: array or list of values
    @type  a: [ float ]
    @param z: z-score threshold for iterative refinement of median and SD
    @type  z: float
    @param it: maximum number of iterations
    @type  it: int
    
    @return: outlier mask, median and standard deviation of last iteration
    @rtype: N.array( int ), float, float
    """
    assert( len(a) > 0 )
    mask = N.ones( len(a) )
    out  = N.zeros( len(a) )
    
    if len(a) < 3:
        return out, N.median(a), N.std(a)
    
    for i in range( it ):
        b  = N.compress( N.logical_not(out), a )
        me = N.median( b )
        sd = N.std( b )
        
        bz = N.absolute((N.array( a ) - me) / sd)  # pseudo z-score of each value
        o  = bz > z
        ##        print 'iteration %i: <%5.2f> +- %5.2f -- %i outliers' % (i,me,sd,N.sum(o))

        ## stop if converged or reached bottom
        if (N.sum(o) == N.sum(out)) or (N.sum(o) > len(a) - 3):
            return o, me, sd
            
        out = o
    
    return out, me, sd

示例#46

0

显示文件

def outliers(a, z=5, it=5):
    """
    Iterative detection of outliers in a set of numeric values.
    Requirement: len(a) > 0; outlier detection is only performed if len(a)>2
    
    @param a: array or list of values
    @type  a: [ float ]
    @param z: z-score threshold for iterative refinement of median and SD
    @type  z: float
    @param it: maximum number of iterations
    @type  it: int
    
    @return: outlier mask, median and standard deviation of last iteration
    @rtype: N.array( int ), float, float
    """
    assert (len(a) > 0)
    mask = N.ones(len(a))
    out = N.zeros(len(a))

    if len(a) < 3:
        return out, N.median(a), N.std(a)

    for i in range(it):
        b = N.compress(N.logical_not(out), a)
        me = N.median(b)
        sd = N.std(b)

        bz = N.absolute((N.array(a) - me) / sd)  # pseudo z-score of each value
        o = bz > z
        ##        print 'iteration %i: <%5.2f> +- %5.2f -- %i outliers' % (i,me,sd,N.sum(o))

        ## stop if converged or reached bottom
        if (N.sum(o) == N.sum(out)) or (N.sum(o) > len(a) - 3):
            return o, me, sd

        out = o

    return out, me, sd

示例#47

0

显示文件

文件： numpyExtn.py 项目： JakaKokosar/orange-bio

def condition2indices(condition):
    """Input: condition=[1,0,0,1]; output: indices=[0,3]
    """
    condition = Numeric.asarray(condition)
    assert len(condition.shape) == 1
    return Numeric.compress(condition, Numeric.arange(condition.shape[0]))

示例#48

0

显示文件

文件： estimateReferences.py 项目： VuisterLab/cing

def estimate_reference_single(entry, stats, bounds, ref=0.0, verbose=False,
                              exclude=None, entry_name=None, atom_type='H',
                              exclude_outliers=False,molType='protein'):

    A = 0.
    B = 0.

    S = 0.
    N = 1

    ## loop through all atom types

    classes = decompose_classes(entry, bounds, atom_type,molType=molType)

    if exclude and not entry_name:
        raise TypeError, 'attribute entry_name needs to be set.'

    n_excluded = 0
    n_total = 0
    for key, shifts in classes.items():

##         print entry_name, key

        if not key in stats:
            if verbose:
                print key,'no statistics.'
            continue

        if exclude and (entry_name, key) in exclude:
            print entry_name, key, 'excluded from ref estimation.'
            continue

        ## get statistics for current atom type

        mu, sd = stats[key][:2]
        k = 1./sd**2

        if exclude_outliers is not False:

        ## calculate Z scores and exclude shifts with high Z scores from analysis
            
            Z = abs(shifts-mu)/sd

            mask_include = Numeric.less(Z, exclude_outliers)

            shifts = Numeric.compress(mask_include, shifts)

            n_excluded += len(Z)-Numeric.sum(mask_include)
            n_total += len(Z)

        n = len(shifts)
        
        if not n:
            continue

        A += k*n*(median(shifts)-mu)
        B += k*n

        S += -0.5*len(shifts)*Numeric.log(k)+0.5*k*sum((Numeric.array(shifts)-mu-ref)**2)
        N += n

    if B > 0.:

        ref_mu = A/B
        ref_sd = 1./Numeric.sqrt(B)

    else:
        ref_mu = None
        ref_sd = None

    if exclude_outliers is not False and n_excluded == n_total:
        print '%d/%d outliers discarded' % (n_excluded, n_total)

    return ref_mu, ref_sd, S/N

示例#49

0

显示文件

            #-------------------- Limiting Magnitude Section --------------------#
            # N.B. This section merely determines the limiting magnitude, if it can.
            # The limMag value is currently not used.  Hence the try/except clause.
            #To define the typical 1-sigma limiting magnitude (which now changes from object to object),
            #we take the median of the 5% faintest 1-sigma fluxerror_auto fluxes. This is obviously
            #biased, but shows which is the depth in the deepest part of the images with the current
            #SExtractor parameters, etc.

            # one_sigma_mags = Numeric.compress(Numeric.less_equal(abs(em[i,:]-0.7526),0.02),m[i,:])
            # n_one  = len(one_sigma_mags)
            # test snippet:

            dm = 0.02
            n_one = 0
            while n_one < 21:
                one_sigma_mags = Numeric.compress(
                    Numeric.less_equal(abs(em[i, :] - 0.7526), dm), m[i, :])
                n_one = len(one_sigma_mags)
                dm += 0.01
                if dm > 0.03 and dm < 1.1:
                    message = "Warning: not enough 1-sigma objects in the catalog. Using dm=+-%.2f" % dm
                    self.logfile.write(message)
                if dm >= 1.1:
                    message = "Warning: Stopped searching for 1-sigma objects at dm=+-%.2f" % dm
                    self.logfile.write(message)
                    break

            try:
                limMag = MLab.median(Numeric.sort(one_sigma_mags)[-20:])
                self.logfile.write("Limiting Mag  " + basefits + ":" +
                                   str(limMag))
                print "Limiting Mag  " + basefits + ":" + str(limMag)

示例#50

0

显示文件

文件： Benchmark.py 项目： ostrokach/biskit

    def go(self, model_list = None, reference = None):
        """
        Run benchmarking.

        @param model_list: list of models
                           (default: None S{->} outFolder/L{F_PDBModels})
        @type  model_list: ModelList
        @param reference: reference model
                        (default: None S{->} outFolder/L{F_INPUT_REFERENCE})
        @type  reference: PDBModel
        """
        model_list = model_list or self.outFolder + self.F_PDBModels
        reference = reference or self.outFolder + self.F_INPUT_REFERENCE

        pdb_list = T.load('%s'%model_list)
        reference = PDBModel(reference)

        # check with python 2.4
        iref, imodel = reference.compareAtoms(pdb_list[0])

        mask_casting = N.zeros(len(pdb_list[0]))
        N.put(mask_casting, imodel, 1)

        reference = reference.take(iref)
        #reference_mask_CA = reference_rmsd.maskCA()

        atom_mask = N.zeros(len(pdb_list[0]))
        N.put(atom_mask,imodel,1)

        rmask = pdb_list[0].profile2mask("n_templates", 1,1000)
        amask = pdb_list[0].res2atomMask(rmask)

        mask_final_ref = N.compress(mask_casting, amask)
        mask_final = mask_casting * amask

        reference = reference.compress(mask_final_ref)

        for i in range(len(pdb_list)):

            #self.cad(reference, pdb_list[i])

            pdb_list[i], pdb_wo_if = self.output_fittedStructures(\
                pdb_list[i], reference, i, mask_final)

            fitted_model_if = pdb_list[i].compress(mask_final)
            fitted_model_wo_if = pdb_wo_if.compress(mask_final)

            coord1 = reference.getXyz()
            coord2 = fitted_model_if.getXyz()

            aprofile = self.rmsd_res(coord1,coord2)

            self.calc_rmsd(fitted_model_if, fitted_model_wo_if,
                           reference, pdb_list[i])

            pdb_list[i].atoms.set('rmsd2ref_if', aprofile,
                                  mask=mask_final, default = -1,
                                  comment="rmsd to known reference structure")

        self.output_rmsd_aa(pdb_list)
        self.output_rmsd_ca(pdb_list)
        self.output_rmsd_res(pdb_list)

        self.write_PDBModels(pdb_list)

示例#51

0

显示文件

文件： ReduceCoordinates.py 项目： ostrokach/biskit

    def makeMap( self, maxPerCenter=4 ):
        """
        Calculate mapping between complete and reduced atom list.
        Creates a (list of lists of int, list of atom dictionaries)
        containing groups of atom indices into original model, new center atoms
        
        @param maxPerCenter: max number of atoms per side chain center atom
                             (default: 4)
        @type  maxPerCenter: int
        """

        resIndex = self.m_sorted.resIndex()
        resModels= self.m_sorted.resModels()
        m = self.m_sorted

        self.currentAtom = 0

        groups = []
        atoms = DictList()

        for i in range( len( resIndex ) ):

            first_atom = resIndex[ i ]

            if i < len( resIndex )-1:
                last_atom  = resIndex[ i+1 ] - 1
            else:
                last_atom = len( self.a_indices ) - 1

            a = m.atoms[ first_atom ]

##             res_name  = m.atoms[ first_atom ]['residue_name']
##             segid     = m.atoms[ first_atom ]['segment_id']
##             chainId   = m.atoms[ first_atom ]['chain_id']
##             res_number= m.atoms[ first_atom ]['serial_number']

            ## position of this residue's atoms in original PDBModel (unsorted)
            a_indices = self.a_indices[ first_atom : last_atom+1 ]

            ## for each center create list of atom indices and a center atom
            if a['residue_name'] != 'GLY' and a['residue_name'] != 'ALA':

                bb_a_indices = N.compress( resModels[i].maskBB(), a_indices)
                sc_a_indices = N.compress(
                    N.logical_not( resModels[i].maskBB()), a_indices )

                sc_groups = self.group( sc_a_indices, maxPerCenter )

            else:
                bb_a_indices = a_indices
                sc_groups = []

            groups += [ bb_a_indices ]
            atoms  += [ self.nextAtom(a, 'BB') ]

            i = 0
            for g in sc_groups:
                groups += [ g ]
                atoms  += [ self.nextAtom( a, 'SC%i'%i) ]
                i += 1

        self.groups = groups
        self.atoms = atoms

示例#52

0

显示文件

文件： numpyExtn.py 项目： JakaKokosar/orange-bio

def getPositions(m, val):
    """Input: arbitrary (masked) array and a value from that array;
    Output: array of positions of the given value in a flat m;
    """
    m = MA.asarray(m)
    return Numeric.compress(MA.equal(MA.ravel(m),val), Numeric.arange(Numeric.multiply.reduce(m.shape)))

示例#53

0

显示文件

文件： statpack.py 项目： VuisterLab/cing

def histogram2d_2(data, bins, xrange = None, yrange = None):

    try:
        data = Numeric.array(data, Float)
    except:
        raise TypeError, 'data: list or array excepted, %s given', \
              str(type(data))

    if not len(shape(data)) == 2:
        raise ValueError, 'shape of data array must be (n,2)'
    
    if type(bins) == type(0):
        bins = (bins, bins)
    elif not type(bins) in (type([]), type(())):
        raise TypeError, 'bins: int, list or tuple expected. %s given', \
              str(type(bins))

    if yrange is None:
        yrange = (min(data[:,1]), max(data[:,1]))

    x_min = min(data[:,0])
    x_max = max(data[:,0])
    x_spacing = (x_max - x_min) / bins[0]

    ystep = abs(yrange[1] - yrange[0]) / float(bins[1])

    X = []
    Y = []
    N = []

    for y in arange(yrange[0] + ystep , yrange[1] + ystep, ystep):

        ## collect values which are in [y,y+ystep]

        mask = less_equal(data[:,1], y)
        set = compress(mask, data, 0)

        ## create histogram for x-dimension

        if shape(set[:,0])[0]:
            x_histogram = histogram(set[:,0], bins[0], range = xrange)
        else:
            x_bins = arange(x_min + x_spacing / 2., x_max + x_spacing / 2.,
                            x_spacing)

            ## no. of x_bins might be larger as it should be
            ## (due to numerical errors).

            if shape(x_bins)[0] - 1 == bins[0]:
                x_bins = x_bins[:-1]

            x_histogram = Numeric.concatenate((x_bins[:,NewAxis],
                                       zeros((bins[0],1))), 1)

        ## append #point per cell (x_i, y_i, n_i)

        X.append(x_histogram[:,0])
        N.append(x_histogram[:,1])

        s = ones(shape(x_histogram)[0]) * (y - ystep / 2.)

        Y.append(s)

        ## discard processed data

        data = Numeric.compress(Numeric.logical_not(mask), data, 0)

    return Numeric.array(X), Numeric.array(Y), Numeric.array(N)

示例#54

0

显示文件

文件： ChainSeparator.py 项目： ostrokach/biskit

    def _removeDuplicateChains(self, chainMask=None):
        """
        Get rid of identical chains by comparing all chains with Blast2seq.

        @param chainMask: chain mask for overriding the
                          chain identity checking (default: None)
        @type  chainMask: [int]
        
        @return: number of chains removed
        @rtype: int
        """
        chainCount = len(self.chains)
        matrix = 1.0 * N.zeros((chainCount,chainCount))
        chain_ids = []

        ## create identity matrix for all chains against all chains
        for i in range(0, chainCount):
            chain_ids = chain_ids + [self.chains[i].chain_id] # collect for log file
            for j in range(i, len(self.chains)):

                # convert 3-letter-code res list into 1-letter-code String
                seq1 = singleAA( self.chains[i].sequence() )
                seq2 = singleAA( self.chains[j].sequence() )

##                 if len(seq1) > len(seq2):           # take shorter sequence
##                 # aln len at least half the len of the shortest sequence
##                     alnCutoff = len(seq2) * 0.5     
##                 else:
##                     alnCutoff = len(seq1) * 0.5
##                 if id['aln_len'] > alnCutoff:
##                     matrix[i,j] = id['aln_id']
##                 else:                           # aln length too short, ignore
##                     matrix[i,j] = 0

                matrix[i,j] = self._compareSequences( seq1, seq2 )

        ## report activity
        self.log.add("\n  Chain ID's of compared chains: "+str(chain_ids))
        self.log.add("  Cross-Identity between chains:\n"+str(matrix))
        self.log.add("  Identity threshold used: "+str(self.threshold))
        
        ## override the automatic chain deletion by supplying a
        ## chain mask to this function
        if chainMask:
            if len(chainMask) == chainCount:
                self.chains = N.compress(chainMask, self.chains)
                self.log.add("NOTE: chain mask %s used for removing chains.\n"%chainMask)
           
            else:
                self.log.add("########## ERROR ###############")
                self.log.add("# Chain mask is only %i chains long"%len(chainMask))
                self.log.add("# when a mask of length %i is needed"%chainCount)
                self.log.add("# No cleaning will be performed.\n")

        if not chainMask:
            ## look at diagonals in "identity matrix"
            ## (each chain against each)
            duplicate = len(self.chains)
            for offset in range(1,chainCount):
                diag = N.diagonal(matrix, offset ,0,1)
                # diagonal of 1's mark begin of duplicate
                avg = 1.0 * N.sum(diag)/len(diag)
                if (avg >= self.threshold):
                    duplicate = offset
                    break
            self.chains = self.chains[:duplicate]
            self.log.add("NOTE: Identity matrix will be used for removing identical chains.")

        ## report activit
        self.log.add(str(chainCount - len(self.chains))+\
                     " chains have been removed.\n")
        
        # how many chains have been removed?
        return (chainCount - len(self.chains))