def __collectFrames(self, pdbs, castAll=0): """ Read coordinates from list of pdb files. @param pdbs: list of file names @type pdbs: [str] @param castAll: analyze atom content of each frame for casting (default: 0) @type castAll: 0|1 @return: frames x (N x 3) Numpy array (of float) @rtype: array """ frameList = [] i = 0 atomCast = None if self.verbose: T.errWrite('reading %i pdbs...' % len(pdbs)) refNames = self.ref.atomNames() ## cache for atom checking for f in pdbs: ## Load m = PDBModel(f) ## compare atom order & content of first frame to reference pdb if castAll or i == 0: atomCast, castRef = m.compareAtoms(self.ref) if castRef != range(len(self.ref)): ## we can take away atoms from each frame but not from ref raise TrajError("Reference PDB doesn't match %s." % m.fileName) if N.all(atomCast == range(len(m))): atomCast = None ## no casting necessary else: if self.verbose: T.errWrite(' casting ') ## assert that frame fits reference if atomCast: m = m.take(atomCast) ## additional check on each 100st frame if i % 100 == 0 and m.atomNames() <> refNames: raise TrajError("%s doesn't match reference pdb." % m.fileName) frameList.append(m.xyz) i += 1 if i % 10 == 0 and self.verbose: T.errWrite('#') if self.verbose: T.errWrite('done\n') ## convert to 3-D Numpy Array return N.array(frameList).astype(N.Float32)
def __collectFrames( self, pdbs, castAll=0 ): """ Read coordinates from list of pdb files. @param pdbs: list of file names @type pdbs: [str] @param castAll: analyze atom content of each frame for casting (default: 0) @type castAll: 0|1 @return: frames x (N x 3) Numpy array (of float) @rtype: array """ frameList = [] i = 0 atomCast = None if self.verbose: T.errWrite('reading %i pdbs...' % len(pdbs) ) refNames = self.ref.atomNames() ## cache for atom checking for f in pdbs: ## Load m = PDBModel(f) ## compare atom order & content of first frame to reference pdb if castAll or i==0: atomCast, castRef = m.compareAtoms( self.ref ) if castRef != range( len( self.ref ) ): ## we can take away atoms from each frame but not from ref raise TrajError("Reference PDB doesn't match %s." %m.fileName) if N.all( atomCast == range( len( m ) ) ): atomCast = None ## no casting necessary else: if self.verbose: T.errWrite(' casting ') ## assert that frame fits reference if atomCast: m = m.take( atomCast ) ## additional check on each 100st frame if i%100 == 0 and m.atomNames() <> refNames: raise TrajError("%s doesn't match reference pdb."%m.fileName ) frameList.append( m.xyz ) i += 1 if i%10 == 0 and self.verbose: T.errWrite('#') if self.verbose: T.errWrite( 'done\n' ) ## convert to 3-D Numpy Array return N.array(frameList).astype(N.Float32)
def go(self, errorthreshold, n_iterations=1e10, nstep=10, verbose=1): """ Start the cluestering. Run until the error is below the error treshold or the max number of iterations have been run. @param errorthreshold: treshold value for error @type errorthreshold: float @param n_iterations: treshold value for number of iterations (default: 1e10) @type n_iterations: int @param nstep: print information for every n'th step in the iteration @type nstep: int @return: array with cluster centers @rtype: array('f') """ iteration = 0 rel_err = 1e10 error = 1.e10 msm = self.create_membership_matrix() centers = self.calc_cluster_center(msm) while rel_err > errorthreshold and iteration < n_iterations: d2, msm, centers = self.iterate(centers) old_error = error error = self.error(msm, d2) rel_err = abs(1. - error/old_error) iteration = iteration+1 if not iteration % nstep and verbose: tools.errWrite( "%i %f\n" % (iteration, error) ) self.centers = centers self.msm = msm self.d2 = d2 return centers
def fit( self, mask=None, ref=None, n_it=1, prof='rms', verbose=1, fit=1, **profInfos ): """ Superimpose all coordinate frames on reference coordinates. Put rms values in a profile. If n_it > 1, the fraction of atoms considered for the fit is put into a profile called |prof|_considered (i.e. by default 'rms_considered'). @param mask: atom mask, atoms to consider default: [all] @type mask: [1|0] @param ref: use as reference, default: None, average Structure @type ref: PDBModel @param n_it: number of fit iterations, kicking out outliers on the way 1 -> classic single fit, 0 -> until convergence (default: 1) @type n_it: int @param prof: save rms per frame in profile of this name, ['rms'] @type prof: str @param verbose: print progress info to STDERR (default: 1) @type verbose: 1|0 @param fit: transform frames after match, otherwise just calc rms (default: 1) @type fit: 1|0 @param profInfos: additional key=value pairs for rms profile info [] @type profInfos: key=value """ if ref == None: refxyz = N.average( self.frames, 0 ) else: refxyz = ref.getXyz() if mask is None: mask = N.ones( len( refxyz ), N.int32 ) refxyz = N.compress( mask, refxyz, 0 ) if verbose: T.errWrite( "rmsd fitting..." ) rms = [] ## rms value of each frame non_outliers = [] ## fraction of atoms considered for rms and fit iterations = [] ## number of iterations performed on each frame for i in range(0, len( self.frames) ): xyz = self.frames[i] if n_it != 1: (r, t), rmsdList = rmsFit.match( refxyz, N.compress( mask, xyz, 0), n_it) iterations.append( len( rmsdList ) ) non_outliers.append( rmsdList[-1][0] ) xyz_transformed = N.dot( xyz, N.transpose(r)) + t rms += [ rmsdList[-1][1] ] else: r, t = rmsFit.findTransformation( refxyz, N.compress( mask, xyz, 0)) xyz_transformed = N.dot( xyz, N.transpose(r)) + t d = N.sqrt(N.sum(N.power( N.compress(mask, xyz_transformed,0)\ - refxyz, 2), 1)) rms += [ N.sqrt( N.average(d**2) ) ] if fit: self.frames[i] = xyz_transformed.astype(N.float32) if verbose and i%100 == 0: T.errWrite( '#' ) self.setProfile( prof, rms, n_iterations=n_it, **profInfos ) if non_outliers: self.setProfile( prof+'_considered', non_outliers, n_iterations=n_it, comment='fraction of atoms considered for iterative fit' ) if verbose: T.errWrite( 'done\n' )
def getFluct_local( self, mask=None, border_res=1, left_atoms=['C'], right_atoms=['N'], verbose=1 ): """ Get mean displacement of each atom from it's average position after fitting of each residue to the reference backbone coordinates of itself and selected atoms of neighboring residues to the right and left. @param mask: N_atoms x 1 array of 0||1, atoms for which fluctuation should be calculated @type mask: array @param border_res: number of neighboring residues to use for fitting @type border_res: int @param left_atoms: atoms (names) to use from these neighbore residues @type left_atoms: [str] @param right_atoms: atoms (names) to use from these neighbore residues @type right_atoms: [str] @return: Numpy array ( N_unmasked x 1 ) of float @rtype: array """ if mask is None: mask = N.ones( len( self.frames[0] ), N.int32 ) if verbose: T.errWrite( "rmsd fitting per residue..." ) residues = N.nonzero( self.ref.atom2resMask( mask ) ) ## backbone atoms used for fit fit_atoms_right = N.nonzero( self.ref.mask( right_atoms ) ) fit_atoms_left = N.nonzero( self.ref.mask( left_atoms ) ) ## chain index of each residue rchainMap = N.take( self.ref.chainMap(), self.ref.resIndex() ) result = [] for res in residues: i_res, i_border = self.__resWindow(res, border_res, rchainMap, fit_atoms_left, fit_atoms_right) try: if not len( i_res ): raise PDBError, 'empty residue' t_res = self.takeAtoms( i_res + i_border ) i_center = range( len( i_res ) ) mask_BB = t_res.ref.maskBB() * t_res.ref.maskHeavy() ## fit with border atoms .. t_res.fit( ref=t_res.ref, mask=mask_BB, verbose=0 ) ## .. but calculate only with center residue atoms frames = N.take( t_res.frames, i_center, 1 ) avg = N.average( frames ) rmsd = N.average(N.sqrt(N.sum(N.power(frames - avg, 2), 2) )) result.extend( rmsd ) if verbose: T.errWrite('#') except ZeroDivisionError: result.extend( N.zeros( len(i_res), N.Float32 ) ) T.errWrite('?' + str( res )) if verbose: T.errWriteln( "done" ) return result
def fit(self, mask=None, ref=None, n_it=1, prof='rms', verbose=1, fit=1, **profInfos): """ Superimpose all coordinate frames on reference coordinates. Put rms values in a profile. If n_it > 1, the fraction of atoms considered for the fit is put into a profile called |prof|_considered (i.e. by default 'rms_considered'). @param mask: atom mask, atoms to consider default: [all] @type mask: [1|0] @param ref: use as reference, default: None, average Structure @type ref: PDBModel @param n_it: number of fit iterations, kicking out outliers on the way 1 -> classic single fit, 0 -> until convergence (default: 1) @type n_it: int @param prof: save rms per frame in profile of this name, ['rms'] @type prof: str @param verbose: print progress info to STDERR (default: 1) @type verbose: 1|0 @param fit: transform frames after match, otherwise just calc rms (default: 1) @type fit: 1|0 @param profInfos: additional key=value pairs for rms profile info [] @type profInfos: key=value """ if ref == None: refxyz = N.average(self.frames, 0) else: refxyz = ref.getXyz() if mask is None: mask = N.ones(len(refxyz), N.int32) refxyz = N.compress(mask, refxyz, 0) if verbose: T.errWrite("rmsd fitting...") rms = [] ## rms value of each frame non_outliers = [] ## fraction of atoms considered for rms and fit iterations = [] ## number of iterations performed on each frame for i in range(0, len(self.frames)): xyz = self.frames[i] if n_it != 1: (r, t), rmsdList = rmsFit.match(refxyz, N.compress(mask, xyz, 0), n_it) iterations.append(len(rmsdList)) non_outliers.append(rmsdList[-1][0]) xyz_transformed = N.dot(xyz, N.transpose(r)) + t rms += [rmsdList[-1][1]] else: r, t = rmsFit.findTransformation(refxyz, N.compress(mask, xyz, 0)) xyz_transformed = N.dot(xyz, N.transpose(r)) + t d = N.sqrt(N.sum(N.power( N.compress(mask, xyz_transformed,0)\ - refxyz, 2), 1)) rms += [N.sqrt(N.average(d**2))] if fit: self.frames[i] = xyz_transformed.astype(N.float32) if verbose and i % 100 == 0: T.errWrite('#') self.setProfile(prof, rms, n_iterations=n_it, **profInfos) if non_outliers: self.setProfile( prof + '_considered', non_outliers, n_iterations=n_it, comment='fraction of atoms considered for iterative fit') if verbose: T.errWrite('done\n')
def getFluct_local(self, mask=None, border_res=1, left_atoms=['C'], right_atoms=['N'], verbose=1): """ Get mean displacement of each atom from it's average position after fitting of each residue to the reference backbone coordinates of itself and selected atoms of neighboring residues to the right and left. @param mask: N_atoms x 1 array of 0||1, atoms for which fluctuation should be calculated @type mask: array @param border_res: number of neighboring residues to use for fitting @type border_res: int @param left_atoms: atoms (names) to use from these neighbore residues @type left_atoms: [str] @param right_atoms: atoms (names) to use from these neighbore residues @type right_atoms: [str] @return: Numpy array ( N_unmasked x 1 ) of float @rtype: array """ if mask is None: mask = N.ones(len(self.frames[0]), N.int32) if verbose: T.errWrite("rmsd fitting per residue...") residues = N.nonzero(self.ref.atom2resMask(mask)) ## backbone atoms used for fit fit_atoms_right = N.nonzero(self.ref.mask(right_atoms)) fit_atoms_left = N.nonzero(self.ref.mask(left_atoms)) ## chain index of each residue rchainMap = N.take(self.ref.chainMap(), self.ref.resIndex()) result = [] for res in residues: i_res, i_border = self.__resWindow(res, border_res, rchainMap, fit_atoms_left, fit_atoms_right) try: if not len(i_res): raise PDBError, 'empty residue' t_res = self.takeAtoms(i_res + i_border) i_center = range(len(i_res)) mask_BB = t_res.ref.maskBB() * t_res.ref.maskHeavy() ## fit with border atoms .. t_res.fit(ref=t_res.ref, mask=mask_BB, verbose=0) ## .. but calculate only with center residue atoms frames = N.take(t_res.frames, i_center, 1) avg = N.average(frames) rmsd = N.average(N.sqrt(N.sum(N.power(frames - avg, 2), 2))) result.extend(rmsd) if verbose: T.errWrite('#') except ZeroDivisionError: result.extend(N.zeros(len(i_res), N.Float32)) T.errWrite('?' + str(res)) if verbose: T.errWriteln("done") return result