def residusMaximus( self, atomValues, mask=None ): """ Take list of value per atom, return list where all atoms of any residue are set to the highest value of any atom in that residue. (after applying mask) :param atomValues: list 1 x N, values per atom :type atomValues: [ float ] :param mask: list 1 x N, 0|1, 'master' atoms of each residue :type mask: [1|0] :return: Numpy array 1 x N of float :rtype: array """ if mask is None: mask = N0.ones( len( self.frames[0] ), N0.Int32 ) ## eliminate all values that do not belong to the selected atoms masked = atomValues * mask result = [] ## set all atoms of each residue to uniform value for res in range( 0, self.resMap()[-1]+1 ): ## get atom entries for this residue resAtoms = N0.compress( N0.equal( self.resMap(), res ), masked ) ## get maximum value masterValue = max( resAtoms ) result += resAtoms * 0.0 + masterValue return N0.array( result )
def group( self, a_indices, maxPerCenter ): """ Group a bunch of integers (atom indices in PDBModel) so that each group has at most maxPerCenter items. @param a_indices: atom indices @type a_indices: [int] @param maxPerCenter: max entries per group @type maxPerCenter: int @return: list of lists of int @rtype: [[int],[int]..] """ ## how many groups are necessary? n_centers = len( a_indices ) // maxPerCenter ## floor division if len( a_indices ) % maxPerCenter: n_centers += 1 ## how many items/atoms go into each group? nAtoms = N0.ones(n_centers, N0.Int) * int(len( a_indices ) / n_centers) i=0 while N0.sum(nAtoms) != len( a_indices ): nAtoms[i] += 1 i += 1 ## distribute atom indices into groups result = [] pos = 0 for n in nAtoms: result += [ N0.take( a_indices, N0.arange(n) + pos) ] pos += n return result
def cluster( self, n_clusters, weight=1.13, converged=1e-11, aMask=None, force=0 ): """ Calculate new clusters. @param n_clusters: number of clusters @type n_clusters: int @param weight: fuzziness weigth @type weight: float (default: 1.13) @param converged: stop iteration if min dist changes less than converged (default: 1e-11) @type converged: float @param aMask: atom mask applied before clustering @type aMask: [1|0] @param force: re-calculate even if parameters haven't changed (default:0) @type force: 1|0 """ if aMask is None: aMask = N0.ones( self.traj.getRef().lenAtoms() ) if self.fc is None or force or self.fcWeight != weight \ or self.n_clusters != n_clusters or N0.any( self.aMask != aMask) \ or self.fcConverged != converged: self.n_clusters = n_clusters self.fcWeight = weight self.aMask = aMask self.fc = FuzzyCluster( self.__raveled(), self.n_clusters, self.fcWeight ) self.fcCenters = self.fc.go( self.fcConverged, 1000, nstep=10, verbose=self.verbose )
def addDensity( self, radius=6, minasa=None, profName='density' ): """ Count the number of heavy atoms within the given radius. Values are only collected for atoms with |minasa| accessible surface area. @param minasa: relative exposed surface - 0 to 100% @type minasa: float @param radius: in Angstrom @type radius: float """ mHeavy = self.m.maskHeavy() xyz = N0.compress( mHeavy, self.m.getXyz(), 0 ) if minasa and self.m.profile( 'relAS', 0 ) == 0: self.addASA() if minasa: mSurf = self.m.profile2mask( 'relAS', minasa ) else: mSurf = N0.ones( self.m.lenAtoms() ) ## loop over all surface atoms surf_pos = N0.nonzero( mSurf ) contacts = [] for i in surf_pos: dist = N0.sum(( xyz - self.m.xyz[i])**2, 1) contacts += [ N0.sum( N0.less(dist, radius**2 )) -1] self.m.atoms.set( profName, contacts, mSurf, default=-1, comment='atom density radius %3.1fA' % radius, version= T.dateString() + ' ' + self.version() )
def pca( self, atomMask=None, frameMask=None, fit=1 ): """ Calculate principal components of trajectory frames. :param atomMask: 1 x N_atoms, [111001110..] atoms to consider (default: all) :type atomMask: [1|0] :param frameMask: 1 x N_frames, [001111..] frames to consider (default all ) :type frameMask: [1|0] :return: (N_frames x N_frames), (1 x N_frames), projection of each frame in PC space, eigenvalue of each PC :rtype: array, array, array """ if frameMask is None: frameMask = N0.ones( len( self.frames ), N0.Int32 ) if atomMask is None: atomMask = N0.ones(self.getRef().lenAtoms(), N0.Int32) if fit: self.fit( atomMask ) refxyz = N0.average( self.frames, 0 ) data = N0.compress( frameMask, self.frames, 0 ) data = data - refxyz data = N0.compress( atomMask, data, 1 ) ## reduce to 2D array data = N0.array( map( N0.ravel, data ) ) V, L, U = LA.svd( data ) return U, V * L, N0.power(L, 2)
def test_surfaceRacerTools(self): """surfaceRacerTools test""" from biskit import PDBModel import biskit.tools as T ## load a structure self.m = PDBModel(T.testRoot() + '/lig/1A19.pdb') self.m = self.m.compress(self.m.maskProtein()) self.m = self.m.compress(self.m.maskHeavy()) ## some fake surface data surf = N0.ones(self.m.lenAtoms()) * 10.0 relExp = relExposure(self.m, surf) ## if self.local: ## globals().update( locals() ) self.assertAlmostEqual(N0.sum(relExp), 44276.86085222386, 8)
def test_surfaceRacerTools(self): """surfaceRacerTools test""" from biskit import PDBModel import biskit.tools as T ## load a structure self.m = PDBModel( T.testRoot()+'/lig/1A19.pdb' ) self.m = self.m.compress( self.m.maskProtein() ) self.m = self.m.compress( self.m.maskHeavy() ) ## some fake surface data surf = N0.ones( self.m.lenAtoms()) * 10.0 relExp = relExposure( self.m, surf ) ## if self.local: ## globals().update( locals() ) self.assertAlmostEqual( N0.sum(relExp), 44276.86085222386, 8 )
def getPca( self, aMask=None, fMask=None, fit=1 ): """ Get the results form a principal component analysis. :param aMask: 1 x N_atoms of 1|0, atom mask, default: last one used :type aMask: [1|0] :param fMask: 1 x N_frames of 1|0, frame mask, default: all :type fMask: [1|0] :param fit: fit to average structure before doing the PC analysis (default: 1) :type fit: 1|0 :return: Dictionary with results from the PC analysis:: dic {'p': projection of each frame in PC space, 'e': list of eigen values, 'fit':.., 'aMask':.., 'fMask':.. parameters used} :rtype: dict """ if aMask is None: aMask = N0.ones( self.getRef().lenAtoms(), N0.Int32 ) pc = getattr(self, 'pc', None) ## return chached result if parameters haven't changed if pc is not None and pc['fMask'] == fMask and pc['fit'] == fit and \ aMask == pc['aMask']: return pc evectors, proj, evalues = self.pca( aMask, fMask, fit ) pc = {} pc['aMask'] = aMask pc['fMask'] = fMask pc['fit'] = fit pc['p'] = proj pc['e'] = evalues pc['u'] = evectors self.pc = pc return pc
def outliers(a, z=5, it=5): """ Iterative detection of outliers in a set of numeric values. Requirement: len(a) > 0; outlier detection is only performed if len(a)>2 :param a: array or list of values :type a: [ float ] :param z: z-score threshold for iterative refinement of median and SD :type z: float :param it: maximum number of iterations :type it: int :return: outlier mask, median and standard deviation of last iteration :rtype: N0.array( int ), float, float """ assert (len(a) > 0) mask = N0.ones(len(a)) out = N0.zeros(len(a)) if len(a) < 3: return out, N0.median(a), N0.std(a) for i in range(it): b = N0.compress(N0.logical_not(out), a) me = N0.median(b) sd = N0.std(b) bz = N0.absolute( (N0.array(a) - me) / sd) # pseudo z-score of each value o = bz > z ## print 'iteration %i: <%5.2f> +- %5.2f -- %i outliers' % (i,me,sd,N0.sum(o)) ## stop if converged or reached bottom if (N0.sum(o) == N0.sum(out)) or (N0.sum(o) > len(a) - 3): return o, me, sd out = o return out, me, sd
def outliers( a, z=5, it=5 ): """ Iterative detection of outliers in a set of numeric values. Requirement: len(a) > 0; outlier detection is only performed if len(a)>2 :param a: array or list of values :type a: [ float ] :param z: z-score threshold for iterative refinement of median and SD :type z: float :param it: maximum number of iterations :type it: int :return: outlier mask, median and standard deviation of last iteration :rtype: N0.array( int ), float, float """ assert( len(a) > 0 ) mask = N0.ones( len(a) ) out = N0.zeros( len(a) ) if len(a) < 3: return out, N0.median(a), N0.std(a) for i in range( it ): b = N0.compress( N0.logical_not(out), a ) me = N0.median( b ) sd = N0.std( b ) bz = N0.absolute((N0.array( a ) - me) / sd) # pseudo z-score of each value o = bz > z ## print 'iteration %i: <%5.2f> +- %5.2f -- %i outliers' % (i,me,sd,N0.sum(o)) ## stop if converged or reached bottom if (N0.sum(o) == N0.sum(out)) or (N0.sum(o) > len(a) - 3): return o, me, sd out = o return out, me, sd
def addDensity(self, radius=6, minasa=None, profName='density'): """ Count the number of heavy atoms within the given radius. Values are only collected for atoms with |minasa| accessible surface area. @param minasa: relative exposed surface - 0 to 100% @type minasa: float @param radius: in Angstrom @type radius: float """ mHeavy = self.m.maskHeavy() xyz = N0.compress(mHeavy, self.m.getXyz(), 0) if minasa and self.m.profile('relAS', 0) == 0: self.addASA() if minasa: mSurf = self.m.profile2mask('relAS', minasa) else: mSurf = N0.ones(self.m.lenAtoms()) ## loop over all surface atoms surf_pos = N0.nonzero(mSurf) contacts = [] for i in surf_pos: dist = N0.sum((xyz - self.m.xyz[i])**2, 1) contacts += [N0.sum(N0.less(dist, radius**2)) - 1] self.m.atoms.set(profName, contacts, mSurf, default=-1, comment='atom density radius %3.1fA' % radius, version=T.dateString() + ' ' + self.version())
def conservationScore(self, cons_type='cons_ent', ranNr=150, log=StdLog(), verbose=1): """ Score of conserved residue pairs in the interaction surface. Optionally, normalized by radom surface contacts. @param cons_type: precalculated conservation profile name, see L{Biskit.PDBDope}. @type cons_type: str @param ranNr: number of random matricies to use (default: 150) @type ranNr: int @param log: log file [STDOUT] @type log: Biskit.LogFile @param verbose: give progress report [1] @type verbose: bool | int @return: conservation score @rtype: float """ try: recCons = self.rec().profile(cons_type, updateMissing=1) except: if verbose: log.add('\n'+'*'*30+'\nNO HHM PROFILE FOR RECEPTOR\n'+\ '*'*30+'\n') recCons = N0.ones(self.rec().lenResidues()) try: ligCons = self.lig().profile(cons_type, updateMissing=1) except: if verbose: log.add(\ '\n'+'*'*30+'\nNO HHM PROFILE FOR LIGAND\n'+'*'*30+'\n') ligCons = N0.ones(self.lig().lenResidues()) if self.rec().profile('surfMask'): recSurf = self.rec().profile('surfMask') else: d = PDBDope(self.rec()) d.addSurfaceMask() if self.lig().profile('surfMask'): ligSurf = self.lig().profile('surfMask') else: d = PDBDope(self.lig()) d.addSurfaceMask() surfMask = N0.ravel(N0.outerproduct(recSurf, ligSurf)) missing = N0.outerproduct(N0.equal(recCons, 0), N0.equal(ligCons, 0)) cont = self.resContacts() * N0.logical_not(missing) consMat = N0.outerproduct(recCons, ligCons) score = cont * consMat # get a random score if ranNr != 0: if self.verbose: self.log.write('.') ranMat = mathUtils.random2DArray(cont, ranNr, mask=surfMask) random_score = N0.sum(N0.sum(ranMat * consMat)) / (ranNr * 1.0) return N0.sum(N0.sum(score)) / random_score else: return N0.sum(N0.sum(score)) / N0.sum(N0.sum(cont))
def fit( self, mask=None, ref=None, n_it=1, prof='rms', verbose=1, fit=1, **profInfos ): """ Superimpose all coordinate frames on reference coordinates. Put rms values in a profile. If n_it > 1, the fraction of atoms considered for the fit is put into a profile called |prof|_considered (i.e. by default 'rms_considered'). :param mask: atom mask, atoms to consider default: [all] :type mask: [1|0] :param ref: use as reference, default: None, average Structure :type ref: PDBModel :param n_it: number of fit iterations, kicking out outliers on the way 1 -> classic single fit, 0 -> until convergence (default: 1) :type n_it: int :param prof: save rms per frame in profile of this name, ['rms'] :type prof: str :param verbose: print progress info to STDERR (default: 1) :type verbose: 1|0 :param fit: transform frames after match, otherwise just calc rms (default: 1) :type fit: 1|0 :param profInfos: additional key=value pairs for rms profile info [] :type profInfos: key=value """ if ref is None: refxyz = N0.average( self.frames, 0 ) else: refxyz = ref.getXyz() if mask is None: mask = N0.ones( len( refxyz ), N0.Int32 ) refxyz = N0.compress( mask, refxyz, 0 ) if verbose: T.errWrite( "rmsd fitting..." ) rms = [] ## rms value of each frame non_outliers = [] ## fraction of atoms considered for rms and fit iterations = [] ## number of iterations performed on each frame for i in range(0, len( self.frames) ): xyz = self.frames[i] if n_it != 1: (r, t), rmsdList = rmsFit.match( refxyz, N0.compress( mask, xyz, 0), n_it) iterations.append( len( rmsdList ) ) non_outliers.append( rmsdList[-1][0] ) xyz_transformed = N0.dot( xyz, N0.transpose(r)) + t rms += [ rmsdList[-1][1] ] else: r, t = rmsFit.findTransformation( refxyz, N0.compress( mask, xyz, 0)) xyz_transformed = N0.dot( xyz, N0.transpose(r)) + t d = N0.sqrt(N0.sum(N0.power( N0.compress(mask, xyz_transformed,0)\ - refxyz, 2), 1)) rms += [ N0.sqrt( N0.average(d**2) ) ] if fit: self.frames[i] = xyz_transformed.astype(N0.Float32) if verbose and i%100 == 0: T.errWrite( '#' ) self.setProfile( prof, rms, n_iterations=n_it, **profInfos ) if non_outliers: self.setProfile( prof+'_considered', non_outliers, n_iterations=n_it, comment='fraction of atoms considered for iterative fit' ) if verbose: T.errWrite( 'done\n' )
def match(x, y, n_iterations=1, z=2, eps_rmsd=0.5, eps_stdv=0.05): """ Matches two arrays onto each other, while iteratively removing outliers. Superimposed array y would be C{ N0.dot(y, N0.transpose(r)) + t }. :param n_iterations: number of calculations:: 1 .. no iteration 0 .. until convergence :type n_iterations: 1|0 :param z: number of standard deviations for outlier definition (default: 2) :type z: float :param eps_rmsd: tolerance in rmsd (default: 0.5) :type eps_rmsd: float :param eps_stdv: tolerance in standard deviations (default: 0.05) :type eps_stdv: float :return: (r,t), [ [percent_considered, rmsd_for_it, outliers] ] :rtype: (array, array), [float, float, int] """ iter_trace = [] rmsd_old = 0 stdv_old = 0 n = 0 converged = 0 mask = N0.ones(len(y), N0.Int32 ) while not converged: ## find transformation for best match r, t = findTransformation(N0.compress(mask, x, 0), N0.compress(mask, y, 0)) ## transform coordinates xt = N0.dot(y, N0.transpose(r)) + t ## calculate row distances d = N0.sqrt(N0.sum(N0.power(x - xt, 2), 1)) * mask ## calculate rmsd and stdv rmsd = N0.sqrt(N0.average(N0.compress(mask, d)**2)) stdv = MU.SD(N0.compress(mask, d)) ## check conditions for convergence d_rmsd = abs(rmsd - rmsd_old) d_stdv = abs(1 - stdv_old / stdv) if d_rmsd < eps_rmsd and d_stdv < eps_stdv: converged = 1 else: rmsd_old = rmsd stdv_old = stdv ## store result perc = round(float(N0.sum(mask)) / float(len(mask)), 2) ## throw out non-matching rows mask = N0.logical_and(mask, N0.less(d, rmsd + z * stdv)) outliers = N0.nonzero( N0.logical_not( mask ) ) iter_trace.append([perc, round(rmsd, 3), outliers]) n += 1 if n_iterations and n >= n_iterations: break return (r, t), iter_trace
def conservationScore( self, cons_type='cons_ent', ranNr=150, log=StdLog(), verbose=1 ): """ Score of conserved residue pairs in the interaction surface. Optionally, normalized by radom surface contacts. @param cons_type: precalculated conservation profile name, see L{Biskit.PDBDope}. @type cons_type: str @param ranNr: number of random matricies to use (default: 150) @type ranNr: int @param log: log file [STDOUT] @type log: Biskit.LogFile @param verbose: give progress report [1] @type verbose: bool | int @return: conservation score @rtype: float """ try: recCons = self.rec().profile( cons_type, updateMissing=1 ) except: if verbose: log.add('\n'+'*'*30+'\nNO HHM PROFILE FOR RECEPTOR\n'+\ '*'*30+'\n') recCons = N0.ones( self.rec().lenResidues() ) try: ligCons = self.lig().profile( cons_type, updateMissing=1 ) except: if verbose: log.add(\ '\n'+'*'*30+'\nNO HHM PROFILE FOR LIGAND\n'+'*'*30+'\n') ligCons = N0.ones( self.lig().lenResidues() ) if self.rec().profile( 'surfMask' ): recSurf = self.rec().profile( 'surfMask' ) else: d = PDBDope(self.rec()) d.addSurfaceMask() if self.lig().profile( 'surfMask' ): ligSurf = self.lig().profile( 'surfMask' ) else: d = PDBDope(self.lig()) d.addSurfaceMask() surfMask = N0.ravel(N0.outerproduct( recSurf, ligSurf )) missing = N0.outerproduct( N0.equal( recCons, 0), N0.equal(ligCons,0)) cont = self.resContacts() * N0.logical_not(missing) consMat = N0.outerproduct( recCons, ligCons ) score = cont* consMat # get a random score if ranNr != 0: if self.verbose: self.log.write('.') ranMat = mathUtils.random2DArray( cont, ranNr, mask=surfMask ) random_score = N0.sum(N0.sum( ranMat * consMat ))/( ranNr*1.0 ) return N0.sum(N0.sum(score))/random_score else: return N0.sum(N0.sum(score))/ N0.sum(N0.sum(cont))
def getFluct_local( self, mask=None, border_res=1, left_atoms=['C'], right_atoms=['N'], verbose=1 ): """ Get mean displacement of each atom from it's average position after fitting of each residue to the reference backbone coordinates of itself and selected atoms of neighboring residues to the right and left. :param mask: N_atoms x 1 array of 0||1, atoms for which fluctuation should be calculated :type mask: array :param border_res: number of neighboring residues to use for fitting :type border_res: int :param left_atoms: atoms (names) to use from these neighbore residues :type left_atoms: [str] :param right_atoms: atoms (names) to use from these neighbore residues :type right_atoms: [str] :return: Numpy array ( N_unmasked x 1 ) of float :rtype: array """ if mask is None: mask = N0.ones( len( self.frames[0] ), N0.Int32 ) if verbose: T.errWrite( "rmsd fitting per residue..." ) residues = N0.nonzero( self.ref.atom2resMask( mask ) ) ## backbone atoms used for fit fit_atoms_right = N0.nonzero( self.ref.mask( right_atoms ) ) fit_atoms_left = N0.nonzero( self.ref.mask( left_atoms ) ) ## chain index of each residue rchainMap = N0.take( self.ref.chainMap(), self.ref.resIndex() ) result = [] for res in residues: i_res, i_border = self.__resWindow(res, border_res, rchainMap, fit_atoms_left, fit_atoms_right) try: if not len( i_res ): raise PDBError('empty residue') t_res = self.takeAtoms( i_res + i_border ) i_center = range( len( i_res ) ) mask_BB = t_res.ref.maskBB() * t_res.ref.maskHeavy() ## fit with border atoms .. t_res.fit( ref=t_res.ref, mask=mask_BB, verbose=0 ) ## .. but calculate only with center residue atoms frames = N0.take( t_res.frames, i_center, 1 ) avg = N0.average( frames ) rmsd = N0.average(N0.sqrt(N0.sum(N0.power(frames - avg, 2), 2) )) result.extend( rmsd ) if verbose: T.errWrite('#') except ZeroDivisionError: result.extend( N0.zeros( len(i_res), N0.Float32 ) ) T.errWrite('?' + str( res )) if verbose: T.errWriteln( "done" ) return result
def match(x, y, n_iterations=1, z=2, eps_rmsd=0.5, eps_stdv=0.05): """ Matches two arrays onto each other, while iteratively removing outliers. Superimposed array y would be C{ N0.dot(y, N0.transpose(r)) + t }. :param n_iterations: number of calculations:: 1 .. no iteration 0 .. until convergence :type n_iterations: 1|0 :param z: number of standard deviations for outlier definition (default: 2) :type z: float :param eps_rmsd: tolerance in rmsd (default: 0.5) :type eps_rmsd: float :param eps_stdv: tolerance in standard deviations (default: 0.05) :type eps_stdv: float :return: (r,t), [ [percent_considered, rmsd_for_it, outliers] ] :rtype: (array, array), [float, float, int] """ iter_trace = [] rmsd_old = 0 stdv_old = 0 n = 0 converged = 0 mask = N0.ones(len(y), N0.Int32) while not converged: ## find transformation for best match r, t = findTransformation(N0.compress(mask, x, 0), N0.compress(mask, y, 0)) ## transform coordinates xt = N0.dot(y, N0.transpose(r)) + t ## calculate row distances d = N0.sqrt(N0.sum(N0.power(x - xt, 2), 1)) * mask ## calculate rmsd and stdv rmsd = N0.sqrt(N0.average(N0.compress(mask, d)**2)) stdv = MU.SD(N0.compress(mask, d)) ## check conditions for convergence d_rmsd = abs(rmsd - rmsd_old) d_stdv = abs(1 - stdv_old / stdv) if d_rmsd < eps_rmsd and d_stdv < eps_stdv: converged = 1 else: rmsd_old = rmsd stdv_old = stdv ## store result perc = round(float(N0.sum(mask)) / float(len(mask)), 2) ## throw out non-matching rows mask = N0.logical_and(mask, N0.less(d, rmsd + z * stdv)) outliers = N0.nonzero(N0.logical_not(mask)) iter_trace.append([perc, round(rmsd, 3), outliers]) n += 1 if n_iterations and n >= n_iterations: break return (r, t), iter_trace