def __atomContacts(self, cutoff, rec_mask, lig_mask, cache): """ Intermolecular distances below cutoff after applying the two masks. @param cutoff: cutoff for B{atom-atom} contact in \AA @type cutoff: float @param rec_mask: atom mask @type rec_mask: [1|0] @param lig_mask: atom mask @type lig_mask: [1|0] @param cache: cache pairwise atom distance matrix @type cache: 1|0 @return: atom contact matrix, array sum_rec_mask x sum_lig_mask @rtype: array """ ## get atom coordinats as array 3 x all_atoms rec_xyz = self.rec().getXyz() lig_xyz = self.lig().getXyz() ## get pair-wise distances -> atoms_rec x atoms_lig dist = getattr( self, 'pw_dist', None ) if dist is None or \ N0.shape( dist ) != ( N0.sum(rec_mask), N0.sum(lig_mask) ): dist = self.__pairwiseDistances(N0.compress( rec_mask, rec_xyz, 0), N0.compress( lig_mask, lig_xyz, 0) ) if cache: self.pw_dist = dist ## reduce to 1 (distance < cutoff) or 0 -> n_atoms_rec x n_atoms_lig return N0.less( dist, cutoff )
def contactResDistribution(self, cm=None): """ Count occurrence of residues in protein-protein interface. @param cm: pre-calculated contact matrix (default: None) @type cm: matrix @return: dict {'A':3, 'C':1, .. } (20 standard amino acids) @rtype: dict """ if cm is None: cm = self.resContacts() ## get mask for residues involved in contacts maskLig = N0.sum(cm) maskRec = N0.sum(N0.transpose(cm)) ## get sequence of contact residues only seqLig = N0.compress(maskLig, self.lig().sequence()) seqRec = N0.compress(maskRec, self.rec().sequence()) seq = ''.join(seqLig) + ''.join(seqRec) ## convert back to string ## count occurrence of letters result = {} for aa in molUtils.allAA(): result[aa] = seq.count(aa) return result
def contactResDistribution( self, cm=None ): """ Count occurrence of residues in protein-protein interface. @param cm: pre-calculated contact matrix (default: None) @type cm: matrix @return: dict {'A':3, 'C':1, .. } (20 standard amino acids) @rtype: dict """ if cm is None: cm = self.resContacts() ## get mask for residues involved in contacts maskLig = N0.sum( cm ) maskRec = N0.sum( N0.transpose( cm )) ## get sequence of contact residues only seqLig = N0.compress( maskLig, self.lig().sequence() ) seqRec = N0.compress( maskRec, self.rec().sequence() ) seq = ''.join( seqLig ) + ''.join(seqRec) ## convert back to string ## count occurrence of letters result = {} for aa in molUtils.allAA(): result[aa] = seq.count( aa ) return result
def __atomContacts(self, cutoff, rec_mask, lig_mask, cache): """ Intermolecular distances below cutoff after applying the two masks. @param cutoff: cutoff for B{atom-atom} contact in \AA @type cutoff: float @param rec_mask: atom mask @type rec_mask: [1|0] @param lig_mask: atom mask @type lig_mask: [1|0] @param cache: cache pairwise atom distance matrix @type cache: 1|0 @return: atom contact matrix, array sum_rec_mask x sum_lig_mask @rtype: array """ ## get atom coordinats as array 3 x all_atoms rec_xyz = self.rec().getXyz() lig_xyz = self.lig().getXyz() ## get pair-wise distances -> atoms_rec x atoms_lig dist = getattr(self, 'pw_dist', None) if dist is None or \ N0.shape( dist ) != ( N0.sum(rec_mask), N0.sum(lig_mask) ): dist = self.__pairwiseDistances(N0.compress(rec_mask, rec_xyz, 0), N0.compress(lig_mask, lig_xyz, 0)) if cache: self.pw_dist = dist ## reduce to 1 (distance < cutoff) or 0 -> n_atoms_rec x n_atoms_lig return N0.less(dist, cutoff)
def pairwiseRmsd( self, aMask=None, noFit=0 ): """ Calculate rmsd between each 2 coordinate frames. :param aMask: atom mask :type aMask: [1|0] :return: frames x frames array of float :rtype: array """ frames = self.frames if aMask is not None: frames = N0.compress( aMask, frames, 1 ) result = N0.zeros( (len( frames ), len( frames )), N0.Float32 ) for i in range(0, len( frames ) ): for j in range( i+1, len( frames ) ): if noFit: d = N0.sqrt(N0.sum(N0.power(frames[i]-frames[j], 2), 1)) result[i,j] = result[j,i] = N0.sqrt( N0.average(d**2) ) else: rt, rmsdLst = rmsFit.match( frames[i], frames[j], 1 ) result[i,j] = result[j,i] = rmsdLst[0][1] return result
def residusMaximus( self, atomValues, mask=None ): """ Take list of value per atom, return list where all atoms of any residue are set to the highest value of any atom in that residue. (after applying mask) :param atomValues: list 1 x N, values per atom :type atomValues: [ float ] :param mask: list 1 x N, 0|1, 'master' atoms of each residue :type mask: [1|0] :return: Numpy array 1 x N of float :rtype: array """ if mask is None: mask = N0.ones( len( self.frames[0] ), N0.Int32 ) ## eliminate all values that do not belong to the selected atoms masked = atomValues * mask result = [] ## set all atoms of each residue to uniform value for res in range( 0, self.resMap()[-1]+1 ): ## get atom entries for this residue resAtoms = N0.compress( N0.equal( self.resMap(), res ), masked ) ## get maximum value masterValue = max( resAtoms ) result += resAtoms * 0.0 + masterValue return N0.array( result )
def takeFrames( self, indices ): """ Return a copy of the trajectory containing only the specified frames. :param indices: positions to take :type indices: [int] :return: copy of this Trajectory (fewer frames, semi-deep copy of ref) :rtype: Trajectory """ ## remove out-of-bound indices indices = N0.compress( N0.less( indices, len( self.frames) ), indices ) r = self.__class__() ## this step takes some time for large frames ! r.frames = N0.take( self.frames, indices, 0 ) ## semi-deep copy of reference model r.setRef( self.ref.take( list(range( self.ref.lenAtoms()))) ) if self.frameNames is not None: r.frameNames = N0.take( self.frameNames, indices, 0 ) r.frameNames = list(map( ''.join, r.frameNames.tolist() )) r.pc = self.__takePca( indices ) r.profiles = self.profiles.take( indices ) r.resIndex = self.resIndex return r
def centerSurfDist(model, surf_mask, mask=None): """ Calculate the longest and shortest distance from the center of the molecule to the surface. @param mask: atoms not to be considerd (default: None) @type mask: [1|0] @param surf_mask: atom surface mask, needed for minimum surface distance @type surf_mask: [1|0] @return: max distance, min distance @rtype: float, float """ if mask is None: mask = model.maskHeavy() ## calculate center of mass center = model.centerOfMass() ## surface atom coordinates surf_xyz = N0.compress(mask * surf_mask, model.getXyz(), 0) ## find the atom closest and furthest away from center dist = N0.sqrt(N0.sum((surf_xyz - center)**2, 1)) minDist = min(dist) maxDist = max(dist) return maxDist, minDist
def addDensity( self, radius=6, minasa=None, profName='density' ): """ Count the number of heavy atoms within the given radius. Values are only collected for atoms with |minasa| accessible surface area. @param minasa: relative exposed surface - 0 to 100% @type minasa: float @param radius: in Angstrom @type radius: float """ mHeavy = self.m.maskHeavy() xyz = N0.compress( mHeavy, self.m.getXyz(), 0 ) if minasa and self.m.profile( 'relAS', 0 ) == 0: self.addASA() if minasa: mSurf = self.m.profile2mask( 'relAS', minasa ) else: mSurf = N0.ones( self.m.lenAtoms() ) ## loop over all surface atoms surf_pos = N0.nonzero( mSurf ) contacts = [] for i in surf_pos: dist = N0.sum(( xyz - self.m.xyz[i])**2, 1) contacts += [ N0.sum( N0.less(dist, radius**2 )) -1] self.m.atoms.set( profName, contacts, mSurf, default=-1, comment='atom density radius %3.1fA' % radius, version= T.dateString() + ' ' + self.version() )
def __extractLigandMatrix(self, fcomplex): """ Compare structure from hex complex with original ligand pdb and store transformation matrix of ligand in self.ligandMatrix. @param fcomplex: pdb file with hex complex @type fcomplex: complec @return: rotation matrix and translation matrix as tuple @rtype: (array, array) """ docked_pdb = self._extractLigandStructure(fcomplex) xyz_docked = N0.compress(docked_pdb.maskCA(), docked_pdb.xyz) xyz_template = N0.compress(self.lig_model.maskCA(), self.lig_model.xyz) (r, t) = self._findTransformation(xyz_docked, xyz_template) return (r, t)
def __extractLigandMatrix(self, fcomplex): """ Compare structure from hex complex with original ligand pdb and store transformation matrix of ligand in self.ligandMatrix. @param fcomplex: pdb file with hex complex @type fcomplex: complec @return: rotation matrix and translation matrix as tuple @rtype: (array, array) """ docked_pdb = self._extractLigandStructure(fcomplex) xyz_docked = N0.compress( docked_pdb.maskCA(), docked_pdb.xyz ) xyz_template = N0.compress( self.lig_model.maskCA(), self.lig_model.xyz ) (r, t) = self._findTransformation(xyz_docked, xyz_template) return (r,t)
def phi_and_psi(self, model): """ Calculate phi and psi torsion angles for all residues in model:: phi - rotation about the N-CA bond - last position in a chain = None psi - rotation about CA-C - first position in a chain = None @param model: PDBModel @type model: PDBModel """ for c in range(model.lenChains(breaks=1)): cModel = model.takeChains([c], breaks=1) xyz = cModel.xyz xyz_CA = N0.compress(cModel.maskCA(), xyz, 0) xyz_N = N0.compress(cModel.mask(['N']), xyz, 0) xyz_C = N0.compress(cModel.mask(['C']), xyz, 0) ## phi: c1 - N ## c2 - CA ## c3 - C ## c4 - N of next residue for i in range(len(xyz_N) - 1): self.phi += [ self.dihedral(xyz_N[i], xyz_CA[i], xyz_C[i], xyz_N[i + 1]) ] self.phi += [None] ## psi: c1 - C of previous residue ## c2 - N ## c3 - CA ## c4 - C self.psi += [None] for i in range(1, len(xyz_N)): self.psi += [ self.dihedral(xyz_C[i - 1], xyz_N[i], xyz_CA[i], xyz_C[i]) ]
def plotContactDensity( self, step=1, cutoff=4.5 ): """ Example. plot histogramm of contact density. Somehing wrong?? @raise ComplexTrajError: if gnuplot program is not installed """ if not gnuplot.installed: raise ComplexTrajError('gnuplot program is not installed') r = self.averageContacts( step, cutoff ) r = N0.ravel( r ) r = N0.compress( r, r ) gnuplot.plot( hist.density( r, 10 ) )
def plotContactDensity(self, step=1, cutoff=4.5): """ Example. plot histogramm of contact density. Somehing wrong?? @raise ComplexTrajError: if gnuplot program is not installed """ if not gnuplot.installed: raise ComplexTrajError('gnuplot program is not installed') r = self.averageContacts(step, cutoff) r = N0.ravel(r) r = N0.compress(r, r) gnuplot.plot(hist.density(r, 10))
def phi_and_psi( self, model ): """ Calculate phi and psi torsion angles for all residues in model:: phi - rotation about the N-CA bond - last position in a chain = None psi - rotation about CA-C - first position in a chain = None @param model: PDBModel @type model: PDBModel """ for c in range( model.lenChains(breaks=1) ): cModel = model.takeChains( [c], breaks=1 ) xyz = cModel.xyz xyz_CA = N0.compress( cModel.maskCA(), xyz, 0 ) xyz_N = N0.compress( cModel.mask( ['N'] ), xyz, 0 ) xyz_C = N0.compress( cModel.mask( ['C'] ), xyz, 0 ) ## phi: c1 - N ## c2 - CA ## c3 - C ## c4 - N of next residue for i in range( len(xyz_N)-1 ): self.phi += [self.dihedral( xyz_N[i], xyz_CA[i], xyz_C[i], xyz_N[i+1] )] self.phi += [None] ## psi: c1 - C of previous residue ## c2 - N ## c3 - CA ## c4 - C self.psi += [None] for i in range( 1, len(xyz_N) ): self.psi += [self.dihedral( xyz_C[i-1], xyz_N[i], xyz_CA[i], xyz_C[i] )]
def random2DArray( matrix, ranNr=1, mask=None): """ Create randomized 2D array containing ones and zeros. :param matrix: matrix to randomize :type matrix: 2D array :param mask: mask OR None (default: None) :type mask: list(1|0) :param ranNr: number of matricies to add up (default: 1) :type ranNr: integer :return: 2D array or |ranNr| added contact matricies :rtype:2D array :raise MathUtilError: if mask does not fit matrix """ ## get shape of matrix a,b = N0.shape( matrix ) ## get array from matrix that is to be randomized if mask is not None: if len(mask) == len( N0.ravel(matrix) ): array = N0.compress( mask, N0.ravel(matrix) ) if len(mask) != len( N0.ravel(matrix) ): raise MathUtilError( 'MatUtils.random2DArray - mask of incorrect length' + '\tMatrix length: %i Mask length: %i'\ %(len( N0.ravel(matrix) ), len(mask))) if not mask: array = N0.ravel(matrix) ## number of ones and length of array nOnes = int( N0.sum( array ) ) lenArray = len( array ) ranArray = N0.zeros( lenArray ) ## create random array for n in range(ranNr): ranArray += randomMask( nOnes, lenArray ) ## blow up to size of original matix if mask is not None: r = N0.zeros(a*b) N0.put( r, N0.nonzero(mask), ranArray) return N0.reshape( r, (a,b) ) if not mask: return N0.reshape( ranArray, (a,b) )
def random2DArray(matrix, ranNr=1, mask=None): """ Create randomized 2D array containing ones and zeros. :param matrix: matrix to randomize :type matrix: 2D array :param mask: mask OR None (default: None) :type mask: list(1|0) :param ranNr: number of matricies to add up (default: 1) :type ranNr: integer :return: 2D array or |ranNr| added contact matricies :rtype:2D array :raise MathUtilError: if mask does not fit matrix """ ## get shape of matrix a, b = N0.shape(matrix) ## get array from matrix that is to be randomized if mask is not None: if len(mask) == len(N0.ravel(matrix)): array = N0.compress(mask, N0.ravel(matrix)) if len(mask) != len(N0.ravel(matrix)): raise MathUtilError( 'MatUtils.random2DArray - mask of incorrect length' + '\tMatrix length: %i Mask length: %i'\ %(len( N0.ravel(matrix) ), len(mask))) if not mask: array = N0.ravel(matrix) ## number of ones and length of array nOnes = int(N0.sum(array)) lenArray = len(array) ranArray = N0.zeros(lenArray) ## create random array for n in range(ranNr): ranArray += randomMask(nOnes, lenArray) ## blow up to size of original matix if mask is not None: r = N0.zeros(a * b) N0.put(r, N0.nonzero(mask), ranArray) return N0.reshape(r, (a, b)) if not mask: return N0.reshape(ranArray, (a, b))
def pca( self, atomMask=None, frameMask=None, fit=1 ): """ Calculate principal components of trajectory frames. :param atomMask: 1 x N_atoms, [111001110..] atoms to consider (default: all) :type atomMask: [1|0] :param frameMask: 1 x N_frames, [001111..] frames to consider (default all ) :type frameMask: [1|0] :return: (N_frames x N_frames), (1 x N_frames), projection of each frame in PC space, eigenvalue of each PC :rtype: array, array, array """ if frameMask is None: frameMask = N0.ones( len( self.frames ), N0.Int32 ) if atomMask is None: atomMask = N0.ones(self.getRef().lenAtoms(), N0.Int32) if fit: self.fit( atomMask ) refxyz = N0.average( self.frames, 0 ) data = N0.compress( frameMask, self.frames, 0 ) data = data - refxyz data = N0.compress( atomMask, data, 1 ) ## reduce to 2D array data = N0.array( map( N0.ravel, data ) ) V, L, U = LA.svd( data ) return U, V * L, N0.power(L, 2)
def __init__( self, model, maxPerCenter=4 ): """ Prepare reduction of coordinates from a given model. @param model: reference model defining atom content and order @type model: PDBModel @param maxPerCenter: max number of atoms per side chain center atom (default: 4) @type maxPerCenter: int """ self.m = model self.__addMassProfile( self.m ) ## sort atoms within residues into standard order def cmpAtoms( a1, a2 ): """ Comparison function for bringing atoms into standard order within residues as defined by L{ aaAtoms }. @param a1: model @type a1: PDBModel @param a2: model @type a2: PDBModel @return: int or list of matching positions @rtype: [-1|0|1] """ ## cmp vanished in python 3.x (but still available in past.builtins) cmp = lambda x, y: (x > y) - (x < y) res = a1['residue_name'] target = self.aaAtoms[ res ] try: return cmp(target.index( a1['name'] ), target.index( a2['name'] )) except ValueError as why: return cmp( a1['name'], a2['name'] ) ## s = "Unknown atom for %s %i: %s or %s" % \ ## (res, a1['residue_number'], a1['name'], a2['name'] ) ## raise PDBError( s ) self.a_indices = self.m.argsort( cmpAtoms ) self.m_sorted = self.m.sort( self.a_indices ) ## remove H from internal model and from list of atom positions maskH = self.m_sorted.remove( self.m_sorted.maskH() ) self.a_indices = N0.compress( maskH, self.a_indices ) self.makeMap( maxPerCenter )
def area(curve, start=0.0, stop=1.0): """ Numerically add up the area under the given curve. The curve is a 2-D array or list of tupples. The x-axis is the first column of this array (curve[:,0]). (originally taken from biskit.Statistics.ROCalyzer) :param curve: a list of x,y coordinates :type curve: [ (y,x), ] or N0.array :param start: lower boundary (in x) (default: 0.0) :type start: float :param stop: upper boundary (in x) (default: 1.0) :type stop: float :return: the area underneath the curve between start and stop. :rtype: float """ ## convert and swap axes curve = N0.array(curve) c = N0.zeros(N0.shape(curve), curve.dtype) c[:, 0] = curve[:, 1] c[:, 1] = curve[:, 0] assert len(N0.shape(c)) == 2 ## apply boundaries ## here we have a problem with flat curves mask = N0.greater_equal(c[:, 1], start) mask *= N0.less_equal(c[:, 1], stop) c = N0.compress(mask, c, axis=0) ## fill to boundaries -- not absolutely accurate: we actually should ## interpolate to the neighboring points instead c = N0.concatenate((N0.array([ [c[0, 0], start], ]), c, N0.array([ [c[-1, 0], stop], ]))) x = c[:, 1] y = c[:, 0] dx = x[1:] - x[:-1] # distance on x between points dy = y[1:] - y[:-1] # distance on y between points areas1 = y[:-1] * dx # the rectangles between all points areas2 = dx * dy / 2.0 # the triangles between all points return N0.sum(areas1) + N0.sum(areas2)
def area(curve, start=0.0, stop=1.0 ): """ Numerically add up the area under the given curve. The curve is a 2-D array or list of tupples. The x-axis is the first column of this array (curve[:,0]). (originally taken from biskit.Statistics.ROCalyzer) :param curve: a list of x,y coordinates :type curve: [ (y,x), ] or N0.array :param start: lower boundary (in x) (default: 0.0) :type start: float :param stop: upper boundary (in x) (default: 1.0) :type stop: float :return: the area underneath the curve between start and stop. :rtype: float """ ## convert and swap axes curve = N0.array( curve ) c = N0.zeros( N0.shape(curve), curve.dtype ) c[:,0] = curve[:,1] c[:,1] = curve[:,0] assert len( N0.shape( c ) ) == 2 ## apply boundaries ## here we have a problem with flat curves mask = N0.greater_equal( c[:,1], start ) mask *= N0.less_equal( c[:,1], stop ) c = N0.compress( mask, c, axis=0 ) ## fill to boundaries -- not absolutely accurate: we actually should ## interpolate to the neighboring points instead c = N0.concatenate((N0.array([[c[0,0], start],]), c, N0.array([[c[-1,0],stop ],])) ) x = c[:,1] y = c[:,0] dx = x[1:] - x[:-1] # distance on x between points dy = y[1:] - y[:-1] # distance on y between points areas1 = y[:-1] * dx # the rectangles between all points areas2 = dx * dy / 2.0 # the triangles between all points return N0.sum(areas1) + N0.sum(areas2)
def getFluct_global( self, mask=None ): """ Get RMS of each atom from it's average position in trajectory. The frames should be superimposed (fit() ) to a reference. :param mask: N x 1 list/Numpy array of 0|1, (N=atoms), atoms to be considered. :type mask: [1|0] :return: Numpy array ( N_unmasked x 1 ) of float. :rtype: array """ frames = self.frames if mask is not None: frames = N0.compress( mask, frames, 1 ) ## mean position of each atom in all frames avg = N0.average( frames ) return N0.average(N0.sqrt(N0.sum(N0.power(frames - avg, 2), 2) ))
def outliers(a, z=5, it=5): """ Iterative detection of outliers in a set of numeric values. Requirement: len(a) > 0; outlier detection is only performed if len(a)>2 :param a: array or list of values :type a: [ float ] :param z: z-score threshold for iterative refinement of median and SD :type z: float :param it: maximum number of iterations :type it: int :return: outlier mask, median and standard deviation of last iteration :rtype: N0.array( int ), float, float """ assert (len(a) > 0) mask = N0.ones(len(a)) out = N0.zeros(len(a)) if len(a) < 3: return out, N0.median(a), N0.std(a) for i in range(it): b = N0.compress(N0.logical_not(out), a) me = N0.median(b) sd = N0.std(b) bz = N0.absolute( (N0.array(a) - me) / sd) # pseudo z-score of each value o = bz > z ## print 'iteration %i: <%5.2f> +- %5.2f -- %i outliers' % (i,me,sd,N0.sum(o)) ## stop if converged or reached bottom if (N0.sum(o) == N0.sum(out)) or (N0.sum(o) > len(a) - 3): return o, me, sd out = o return out, me, sd
def outliers( a, z=5, it=5 ): """ Iterative detection of outliers in a set of numeric values. Requirement: len(a) > 0; outlier detection is only performed if len(a)>2 :param a: array or list of values :type a: [ float ] :param z: z-score threshold for iterative refinement of median and SD :type z: float :param it: maximum number of iterations :type it: int :return: outlier mask, median and standard deviation of last iteration :rtype: N0.array( int ), float, float """ assert( len(a) > 0 ) mask = N0.ones( len(a) ) out = N0.zeros( len(a) ) if len(a) < 3: return out, N0.median(a), N0.std(a) for i in range( it ): b = N0.compress( N0.logical_not(out), a ) me = N0.median( b ) sd = N0.std( b ) bz = N0.absolute((N0.array( a ) - me) / sd) # pseudo z-score of each value o = bz > z ## print 'iteration %i: <%5.2f> +- %5.2f -- %i outliers' % (i,me,sd,N0.sum(o)) ## stop if converged or reached bottom if (N0.sum(o) == N0.sum(out)) or (N0.sum(o) > len(a) - 3): return o, me, sd out = o return out, me, sd
def addDensity(self, radius=6, minasa=None, profName='density'): """ Count the number of heavy atoms within the given radius. Values are only collected for atoms with |minasa| accessible surface area. @param minasa: relative exposed surface - 0 to 100% @type minasa: float @param radius: in Angstrom @type radius: float """ mHeavy = self.m.maskHeavy() xyz = N0.compress(mHeavy, self.m.getXyz(), 0) if minasa and self.m.profile('relAS', 0) == 0: self.addASA() if minasa: mSurf = self.m.profile2mask('relAS', minasa) else: mSurf = N0.ones(self.m.lenAtoms()) ## loop over all surface atoms surf_pos = N0.nonzero(mSurf) contacts = [] for i in surf_pos: dist = N0.sum((xyz - self.m.xyz[i])**2, 1) contacts += [N0.sum(N0.less(dist, radius**2)) - 1] self.m.atoms.set(profName, contacts, mSurf, default=-1, comment='atom density radius %3.1fA' % radius, version=T.dateString() + ' ' + self.version())
def match(x, y, n_iterations=1, z=2, eps_rmsd=0.5, eps_stdv=0.05): """ Matches two arrays onto each other, while iteratively removing outliers. Superimposed array y would be C{ N0.dot(y, N0.transpose(r)) + t }. :param n_iterations: number of calculations:: 1 .. no iteration 0 .. until convergence :type n_iterations: 1|0 :param z: number of standard deviations for outlier definition (default: 2) :type z: float :param eps_rmsd: tolerance in rmsd (default: 0.5) :type eps_rmsd: float :param eps_stdv: tolerance in standard deviations (default: 0.05) :type eps_stdv: float :return: (r,t), [ [percent_considered, rmsd_for_it, outliers] ] :rtype: (array, array), [float, float, int] """ iter_trace = [] rmsd_old = 0 stdv_old = 0 n = 0 converged = 0 mask = N0.ones(len(y), N0.Int32) while not converged: ## find transformation for best match r, t = findTransformation(N0.compress(mask, x, 0), N0.compress(mask, y, 0)) ## transform coordinates xt = N0.dot(y, N0.transpose(r)) + t ## calculate row distances d = N0.sqrt(N0.sum(N0.power(x - xt, 2), 1)) * mask ## calculate rmsd and stdv rmsd = N0.sqrt(N0.average(N0.compress(mask, d)**2)) stdv = MU.SD(N0.compress(mask, d)) ## check conditions for convergence d_rmsd = abs(rmsd - rmsd_old) d_stdv = abs(1 - stdv_old / stdv) if d_rmsd < eps_rmsd and d_stdv < eps_stdv: converged = 1 else: rmsd_old = rmsd stdv_old = stdv ## store result perc = round(float(N0.sum(mask)) / float(len(mask)), 2) ## throw out non-matching rows mask = N0.logical_and(mask, N0.less(d, rmsd + z * stdv)) outliers = N0.nonzero(N0.logical_not(mask)) iter_trace.append([perc, round(rmsd, 3), outliers]) n += 1 if n_iterations and n >= n_iterations: break return (r, t), iter_trace
def makeMap( self, maxPerCenter=4 ): """ Calculate mapping between complete and reduced atom list. Creates a (list of lists of int, list of atom dictionaries) containing groups of atom indices into original model, new center atoms @param maxPerCenter: max number of atoms per side chain center atom (default: 4) @type maxPerCenter: int """ resIndex = self.m_sorted.resIndex() resModels= self.m_sorted.resModels() m = self.m_sorted self.currentAtom = 0 groups = [] atoms = DictList() for i in range( len( resIndex ) ): first_atom = resIndex[ i ] if i < len( resIndex )-1: last_atom = resIndex[ i+1 ] - 1 else: last_atom = len( self.a_indices ) - 1 a = m.atoms[ first_atom ] ## res_name = m.atoms[ first_atom ]['residue_name'] ## segid = m.atoms[ first_atom ]['segment_id'] ## chainId = m.atoms[ first_atom ]['chain_id'] ## res_number= m.atoms[ first_atom ]['serial_number'] ## position of this residue's atoms in original PDBModel (unsorted) a_indices = self.a_indices[ first_atom : last_atom+1 ] ## for each center create list of atom indices and a center atom if a['residue_name'] != 'GLY' and a['residue_name'] != 'ALA': bb_a_indices = N0.compress( resModels[i].maskBB(), a_indices) sc_a_indices = N0.compress( N0.logical_not( resModels[i].maskBB()), a_indices ) sc_groups = self.group( sc_a_indices, maxPerCenter ) else: bb_a_indices = a_indices sc_groups = [] groups += [ bb_a_indices ] atoms += [ self.nextAtom(a, 'BB') ] i = 0 for g in sc_groups: groups += [ g ] atoms += [ self.nextAtom( a, 'SC%i'%i) ] i += 1 self.groups = groups self.atoms = atoms
def match(x, y, n_iterations=1, z=2, eps_rmsd=0.5, eps_stdv=0.05): """ Matches two arrays onto each other, while iteratively removing outliers. Superimposed array y would be C{ N0.dot(y, N0.transpose(r)) + t }. :param n_iterations: number of calculations:: 1 .. no iteration 0 .. until convergence :type n_iterations: 1|0 :param z: number of standard deviations for outlier definition (default: 2) :type z: float :param eps_rmsd: tolerance in rmsd (default: 0.5) :type eps_rmsd: float :param eps_stdv: tolerance in standard deviations (default: 0.05) :type eps_stdv: float :return: (r,t), [ [percent_considered, rmsd_for_it, outliers] ] :rtype: (array, array), [float, float, int] """ iter_trace = [] rmsd_old = 0 stdv_old = 0 n = 0 converged = 0 mask = N0.ones(len(y), N0.Int32 ) while not converged: ## find transformation for best match r, t = findTransformation(N0.compress(mask, x, 0), N0.compress(mask, y, 0)) ## transform coordinates xt = N0.dot(y, N0.transpose(r)) + t ## calculate row distances d = N0.sqrt(N0.sum(N0.power(x - xt, 2), 1)) * mask ## calculate rmsd and stdv rmsd = N0.sqrt(N0.average(N0.compress(mask, d)**2)) stdv = MU.SD(N0.compress(mask, d)) ## check conditions for convergence d_rmsd = abs(rmsd - rmsd_old) d_stdv = abs(1 - stdv_old / stdv) if d_rmsd < eps_rmsd and d_stdv < eps_stdv: converged = 1 else: rmsd_old = rmsd stdv_old = stdv ## store result perc = round(float(N0.sum(mask)) / float(len(mask)), 2) ## throw out non-matching rows mask = N0.logical_and(mask, N0.less(d, rmsd + z * stdv)) outliers = N0.nonzero( N0.logical_not( mask ) ) iter_trace.append([perc, round(rmsd, 3), outliers]) n += 1 if n_iterations and n >= n_iterations: break return (r, t), iter_trace
def fit( self, mask=None, ref=None, n_it=1, prof='rms', verbose=1, fit=1, **profInfos ): """ Superimpose all coordinate frames on reference coordinates. Put rms values in a profile. If n_it > 1, the fraction of atoms considered for the fit is put into a profile called |prof|_considered (i.e. by default 'rms_considered'). :param mask: atom mask, atoms to consider default: [all] :type mask: [1|0] :param ref: use as reference, default: None, average Structure :type ref: PDBModel :param n_it: number of fit iterations, kicking out outliers on the way 1 -> classic single fit, 0 -> until convergence (default: 1) :type n_it: int :param prof: save rms per frame in profile of this name, ['rms'] :type prof: str :param verbose: print progress info to STDERR (default: 1) :type verbose: 1|0 :param fit: transform frames after match, otherwise just calc rms (default: 1) :type fit: 1|0 :param profInfos: additional key=value pairs for rms profile info [] :type profInfos: key=value """ if ref is None: refxyz = N0.average( self.frames, 0 ) else: refxyz = ref.getXyz() if mask is None: mask = N0.ones( len( refxyz ), N0.Int32 ) refxyz = N0.compress( mask, refxyz, 0 ) if verbose: T.errWrite( "rmsd fitting..." ) rms = [] ## rms value of each frame non_outliers = [] ## fraction of atoms considered for rms and fit iterations = [] ## number of iterations performed on each frame for i in range(0, len( self.frames) ): xyz = self.frames[i] if n_it != 1: (r, t), rmsdList = rmsFit.match( refxyz, N0.compress( mask, xyz, 0), n_it) iterations.append( len( rmsdList ) ) non_outliers.append( rmsdList[-1][0] ) xyz_transformed = N0.dot( xyz, N0.transpose(r)) + t rms += [ rmsdList[-1][1] ] else: r, t = rmsFit.findTransformation( refxyz, N0.compress( mask, xyz, 0)) xyz_transformed = N0.dot( xyz, N0.transpose(r)) + t d = N0.sqrt(N0.sum(N0.power( N0.compress(mask, xyz_transformed,0)\ - refxyz, 2), 1)) rms += [ N0.sqrt( N0.average(d**2) ) ] if fit: self.frames[i] = xyz_transformed.astype(N0.Float32) if verbose and i%100 == 0: T.errWrite( '#' ) self.setProfile( prof, rms, n_iterations=n_it, **profInfos ) if non_outliers: self.setProfile( prof+'_considered', non_outliers, n_iterations=n_it, comment='fraction of atoms considered for iterative fit' ) if verbose: T.errWrite( 'done\n' )