def __add3( self, n_members, excluded, trippleIndex ): """ Add a tripple of numbers from range( n_members ) to be excluded for error estimation. Tripples are chosen to have minimal overlap. For 10 trajectories (*n_members*=10), the first 3 tripples will be (1,2,3), (4,5,6), (7,8,9). @param n_members: number of member trajectories @type n_members: int @param excluded: excluded member trajectories @type excluded: [ int ] @param trippleIndex: @type trippleIndex: int @return: the indices of all excluded member trajectories @rtype: [ int ] """ remaining = MU.difference( range( n_members ), excluded ) tripple = self.tripples( remaining, trippleIndex+1 )[-1] return MU.union( excluded, list(tripple) )
def getOutliers( self, traj, outlaws=[] ): """ Identify member trajectories that haved moved much further than normal. @param traj: Trajectory to analyze @type traj: Trajectory @param outlaws: members already marked for exclusion @type outlaws: [int] @return: member indices of outlyer trajectories (plus outlaws) @rtype: [int] """ if not self.zfilter: return outlaws outliers = N.nonzero( traj.outliers( z=self.zfilter, mask=traj.ref.maskCA(), step=10) ) self.log.add('identified %i outliers with z-threshold %3.1f' %\ ( len(outliers), self.zfilter ) ) return MU.union( outliers, outlaws )
def update(self, model, source, skipRes=None, updateMissing=0, force=0, headPatterns=[]): """ Update empty or missing fields of model from the source. The model will be connected to the source via model.source. Profiles that are derived from the source are labeled 'changed'=0. The same holds for coordinates (xyzChanged=0). However, existing profiles or coordinates or fields remain untouched. @param model: existing model @type model: PDBModel @param source: source PDB file @type source: str @param skipRes: list residue names that should not be parsed @type skipRes: [ str ] @param updateMissing: ignored @type updateMissing: 1|0 @param headPatterns: [(putIntoKey, regex)] extract given REMARKS @type headPatterns: [(str, str)] @raise PDBParserError - if something is wrong with the source file """ try: ## atoms and/or coordinates need to be updated from PDB if force or self.needsUpdate(model): atoms, xyz, info = self.__collectAll(source, skipRes, headPatterns) keys = M.union(atoms.keys(), self.DEFAULTS.keys()) for k in keys: a = model.atoms.get(k, default=0, update=False) if (a is 0) or (a is None): dflt = self.DEFAULTS.get(k, None) model.atoms.set(k, atoms.get(k, dflt), changed=0) if model.xyz is None: model.xyz = xyz model.xyzChanged = 0 model._resIndex = None model._chainIndex = None model.fileName = model.fileName or source model.pdbCode = model.pdbCode or info.get('pdb_code', None) or \ self.idFromName( model.fileName) ## ## make biounit from the dictionary we have parsed if 'BIOMT' in info: ## biomt = info['BIOMT'] ## model.biounit = BU.BioUnit(model, biomt) ## del info['BIOMT'] model.info.update(info) except: msg = self.__xplorAtomIndicesTest(source) or ' ' raise PDBParserError('Cannot read ' + str(source) + ' as PDB\n'\ '\ERROR: ' + T.lastError() + msg) model.setSource(source)
def parmSolvated( self, f_out, f_out_crd=None, f_out_pdb=None, hetatm=0, norun=0, cap=0, capN=[], capC=[], fmod=['frcmod.ionsjc_tip3p'], fprep=[], box=10.0, center=True, **kw ): """ @param f_out: target file for parm (topology) @type f_out: str @param f_out_crd: target file for crd (coordinates) (default:|f_out_base|.crd) @type f_out_crd: str @param f_out_pdb: target file for pdb (default:|f_out_base|.pdb) @type f_out_pdb: str @param hetatm: keep hetero atoms (default: 0) @type hetatm: 1|0 @param cap: put ACE and NME capping residue on chain breaks (default: 0) @type cap: 1|0 @param capN: indices of chains that should get ACE cap (default: []) @type capN: [int] @param capC: indices of chains that should get NME cap (default: []) @type capC: [int] @param box: minimal distance of solute from box edge (default: 10.0) @type box: float @param center: re-center coordinates (default: True) @type center: bool @param fmod: list of files with amber parameter modifications to be loaded into leap with loadAmberParams (default:['frcmod.ionsjc_tip3p'] ... mod file needed for default Amber ff10 ions -- topology saving will fail if this one is missing) @type fmod: [str] @param fprep: list of files with amber residue definitions (to be loaded into leap with loadAmberPrep) (default: []) @type fprep: [str] @param kw: additional key=value pairs for leap input template @type kw: key=value @raise IOError: """ f_out = t.absfile( f_out ) f_out_crd = t.absfile( f_out_crd ) or t.stripSuffix( f_out ) + '.crd' f_out_pdb = t.absfile( f_out_pdb ) or t.stripSuffix( f_out ) +\ '_leap.pdb' ## removed: (bugfix 3434136) #fmod = [ t.absfile( f ) for f in t.toList( fmod ) ] #fprep = [ t.absfile( f ) for f in t.toList( fprep ) ] try: if self.verbose: self.log.add( '\nCleaning PDB file for Amber:' ) m = self.leapModel( hetatm=hetatm, center=center ) if cap: end_broken = m.atom2chainIndices( m.chainBreaks() ) capC = MU.union( capC, end_broken ) capN = MU.union( capN, N.array( end_broken ) + 1 ) for i in capN: if self.verbose: self.log.add( 'Adding ACE cap to chain %i' % i ) m = self.capACE( m, i ) for i in capC: if self.verbose: self.log.add( 'Adding NME cap to chain %i' % i ) m = self.capNME( m, i ) m.renumberResidues( addChainId=1 ) ## again, to accomodate capping template = open( self.leap_template ).read() leap_mod = self.__fLines( 'm = loadAmberParams %s\n', fmod ) leap_prep= self.__fLines( 'loadAmberPrep %s\n', fprep ) ss = self.__ssBonds( m, cutoff=4. ) self.__cys2cyx( m, ss ) leap_ss = self.__fLines( self.ss_bond, ss ) if self.verbose: self.log.add('Found %i disulfide bonds: %s' % (len(ss),str(ss))) if self.verbose: self.log.add( 'writing cleaned PDB to %s' % self.leap_pdb ) m.writePdb( self.leap_pdb, ter=3 ) self.__runLeap( template, in_pdb=self.leap_pdb, out_parm=f_out, out_crd=f_out_crd, ss_bonds=leap_ss, fmod=leap_mod, fprep=leap_prep, norun=norun, box=box, **kw ) if not norun: parm_pdb = self.parm2pdb( f_out, f_out_crd, f_out_pdb ) if not self.keep_leap_pdb and not self.debug: t.tryRemove( self.leap_pdb ) except IOError, why: raise IOError, why
def parmSolvated(self, f_out, f_out_crd=None, f_out_pdb=None, hetatm=0, norun=0, cap=0, capN=[], capC=[], fmod=['frcmod.ionsjc_tip3p'], fprep=[], box=10.0, center=True, **kw): """ @param f_out: target file for parm (topology) @type f_out: str @param f_out_crd: target file for crd (coordinates) (default:|f_out_base|.crd) @type f_out_crd: str @param f_out_pdb: target file for pdb (default:|f_out_base|.pdb) @type f_out_pdb: str @param hetatm: keep hetero atoms (default: 0) @type hetatm: 1|0 @param cap: put ACE and NME capping residue on chain breaks (default: 0) @type cap: 1|0 @param capN: indices of chains that should get ACE cap (default: []) @type capN: [int] @param capC: indices of chains that should get NME cap (default: []) @type capC: [int] @param box: minimal distance of solute from box edge (default: 10.0) @type box: float @param center: re-center coordinates (default: True) @type center: bool @param fmod: list of files with amber parameter modifications to be loaded into leap with loadAmberParams (default:['frcmod.ionsjc_tip3p'] ... mod file needed for default Amber ff10 ions -- topology saving will fail if this one is missing) @type fmod: [str] @param fprep: list of files with amber residue definitions (to be loaded into leap with loadAmberPrep) (default: []) @type fprep: [str] @param kw: additional key=value pairs for leap input template @type kw: key=value @raise IOError: """ f_out = t.absfile(f_out) f_out_crd = t.absfile(f_out_crd) or t.stripSuffix(f_out) + '.crd' f_out_pdb = t.absfile( f_out_pdb ) or t.stripSuffix( f_out ) +\ '_leap.pdb' ## removed: (bugfix 3434136) #fmod = [ t.absfile( f ) for f in t.toList( fmod ) ] #fprep = [ t.absfile( f ) for f in t.toList( fprep ) ] try: if self.verbose: self.log.add('\nCleaning PDB file for Amber:') m = self.leapModel(hetatm=hetatm, center=center) if cap: end_broken = m.atom2chainIndices(m.chainBreaks()) capC = MU.union(capC, end_broken) capN = MU.union(capN, N.array(end_broken) + 1) for i in capN: if self.verbose: self.log.add('Adding ACE cap to chain %i' % i) m = self.capACE(m, i) for i in capC: if self.verbose: self.log.add('Adding NME cap to chain %i' % i) m = self.capNME(m, i) m.renumberResidues(addChainId=1) ## again, to accomodate capping template = open(self.leap_template).read() leap_mod = self.__fLines('m = loadAmberParams %s\n', fmod) leap_prep = self.__fLines('loadAmberPrep %s\n', fprep) ss = self.__ssBonds(m, cutoff=4.) self.__cys2cyx(m, ss) leap_ss = self.__fLines(self.ss_bond, ss) if self.verbose: self.log.add('Found %i disulfide bonds: %s' % (len(ss), str(ss))) if self.verbose: self.log.add('writing cleaned PDB to %s' % self.leap_pdb) m.writePdb(self.leap_pdb, ter=3) self.__runLeap(template, in_pdb=self.leap_pdb, out_parm=f_out, out_crd=f_out_crd, ss_bonds=leap_ss, fmod=leap_mod, fprep=leap_prep, norun=norun, box=box, **kw) if not norun: parm_pdb = self.parm2pdb(f_out, f_out_crd, f_out_pdb) if not self.keep_leap_pdb and not self.debug: t.tryRemove(self.leap_pdb) except IOError, why: raise IOError, why
def update( self, model, source, skipRes=None, updateMissing=0, force=0, headPatterns=[]): """ Update empty or missing fields of model from the source. The model will be connected to the source via model.source. Profiles that are derived from the source are labeled 'changed'=0. The same holds for coordinates (xyzChanged=0). However, existing profiles or coordinates or fields remain untouched. @param model: existing model @type model: PDBModel @param source: source PDB file @type source: str @param skipRes: list residue names that should not be parsed @type skipRes: [ str ] @param updateMissing: ignored @type updateMissing: 1|0 @param headPatterns: [(putIntoKey, regex)] extract given REMARKS @type headPatterns: [(str, str)] @raise PDBParserError - if something is wrong with the source file """ try: ## atoms and/or coordinates need to be updated from PDB if force or self.needsUpdate( model ): atoms, xyz, info = self.__collectAll( source, skipRes, headPatterns ) keys = M.union( atoms.keys(), self.DEFAULTS.keys() ) for k in keys: a = model.atoms.get( k, default=0, update=False ) if (a is 0) or (a is None): dflt = self.DEFAULTS.get( k, None ) model.atoms.set(k, atoms.get(k, dflt), changed=0 ) if model.xyz is None: model.xyz = xyz model.xyzChanged = 0 model._resIndex =None model._chainIndex=None model.fileName = model.fileName or source model.pdbCode = model.pdbCode or info.get('pdb_code', None) or \ self.idFromName( model.fileName) ## make biounit from the dictionary we have parsed if 'BIOMT' in info: biomt = info['BIOMT'] model.biounit = BU.BioUnit(model, biomt) del info['BIOMT'] model.info.update( info ) except: msg = self.__xplorAtomIndicesTest( source ) or ' ' raise PDBParserError('Cannot read ' + str(source) + ' as PDB\n'\ '\ERROR: ' + T.lastError() + msg) model.setSource( source )
def cleanPDB(self, hetatm=1, keepwaters=1, cap=1, capN=[], capC=[], **kw): """ Try to save a clean object file that can be used in simulation from the loaded pdb when AmberOFFBuilder was instantiated. Modified method from parmSolvated @param f_out: target file for Amber OFF file @type f_out: str @param outparm: write topology, coordinates and pdb from saved objectfile. This can be used to check the OFF file was correctly generated. @type outparm: bool @param hetatm: keep hetero atoms (default: 1) @type hetatm: 1|0 @param cap: put ACE and NME capping residue on chain breaks (default: 0) @type cap: 1|0 @param capN: indices of chains that should get ACE cap (default: []) @type capN: [int] @param capC: indices of chains that should get NME cap (default: []) @type capC: [int] @param fmod: list of files with amber parameter modifications (to be loaded into leap with loadAmberParams) (default:[]) @type fmod: [str] @param fprep: list of files with amber residue definitions (to be loaded into leap with loadAmberPrep) (default: []) @type fprep: [str] @raise IOError: """ try: if self.verbose: self.log.add( 'Cleaning PDB file for Amber:' ) self.m.setXyz(self.m.xyz - self.m.center()) # center object if keepwaters: wats = self.m.compress(self.m.maskH2O()) m = self.m.clone() m = m.compress(~m.maskH2O()) m.xplor2amber() if cap: if m.chainBreaks(): end_broken = m.atom2chainIndices( m.chainBreaks() ) capC = MU.union( capC, end_broken ) or [0] capN = MU.union( capN, N.array( end_broken ) + 1 ) or [0] for i in capN: if self.verbose: self.log.add( 'Adding ACE cap to chain %i' % i ) if cap: m = self.capACE( m, i ) for i in capC: if self.verbose: self.log.add( 'Adding NME cap to chain %i' % i ) if cap: m = self.capNME( m, i ) m.renumberResidues( addChainId=1 ) ## again, to accomodate capping ss = self._AmberParmBuilder__ssBonds( m, cutoff=4. ) self._AmberParmBuilder__cys2cyx( m, ss ) # Corrected by Xiaofeng on 02/12/2016 self.leap_ss = self._AmberParmBuilder__fLines( self.ss_bond, ss ).replace('p', 'sys').split("\n") # if self.verbose: self.log.add('Found %i disulfide bonds: %s' % (len(ss),str(ss))) # if self.verbose: # self.log.add( 'writing cleaned PDB to %s' % self.clean_pdb ) # m.writePdb( self.clean_pdb, ter=3, amber=1 ) if keepwaters: import scipy.spatial as spat # Add only waters that do not clash dmat = spat.distance_matrix(wats.xyz, m.xyz) clashes = npy.where(dmat < 1.5)[0] if npy.any(clashes): clashres = wats.atom2resIndices(clashes) # Remove residues which have clashes wats.removeRes(clashres) m = m.concat(wats) m.renumberResidues( addChainId=1 ) self.m = m return m except IOError, why: raise IOError, why
def capTerminals(self, auto=False, breaks=False, capN=[], capC=[]): """ Add NME and ACE capping residues to chain breaks or normal N- and C-terminals. Note: these capping residues contain hydrogen atoms. Chain indices for capN and capC arguments can be interpreted either with or without chain break detection enabled. For example, let's assume we have a two-chain protein with some missing residues (chain break) in the first chain: A: MGSKVSK---FLNAGSK B: FGHLAKSDAK Then: capTerminals( breaks=False, capN=[1], capC=[1]) will add N-and C-terminal caps to chain B. However: capTerminals( breaks=True, capN=[1], capC=[1]) will add N- and C-terminal caps to the second fragment of chain A. Note: this operation *replaces* the internal model. @param auto: put ACE and NME capping residue on chain breaks and on suspected false N- and C-termini (default: False) @type auto: bool @param breaks: switch on chain break detection before interpreting capN and capC @type breaks: False @param capN: indices of chains that should get ACE cap (default: []) @type capN: [int] @param capC: indices of chains that should get NME cap (default: []) @type capC: [int] """ m = self.model c_len = m.lenChains() i_breaks = m.chainBreaks() if auto: if not breaks: capN = self.convertChainIdsNter(m, capN) capC = self.convertChainIdsCter(m, capC) breaks = True capN, capC = self.unresolvedTerminals(m) end_broken = m.atom2chainIndices(m.chainBreaks(), breaks=1) capC = M.union(capC, end_broken) capN = M.union(capN, N0.array(end_broken) + 1) capN = self.filterProteinChains(m, capN, m.chainIndex(breaks=breaks)) capC = self.filterProteinChains(m, capC, m.chainEndIndex(breaks=breaks)) for i in capN: m = self.capACE(m, i, breaks=breaks) assert m.lenChains() == c_len, '%i != %i' % \ (m.lenChains(), c_len) assert len(m.chainBreaks(force=True)) == len(i_breaks) assert m[ 'serial_number'].dtype == N0.Int32, 'serial_number not int' for i in capC: m = self.capNME(m, i, breaks=breaks) assert m.lenChains() == c_len assert len(m.chainBreaks(force=True)) == len(i_breaks) self.model = m return self.model
def capTerminals( self, auto=False, breaks=False, capN=[], capC=[] ): """ Add NME and ACE capping residues to chain breaks or normal N- and C-terminals. Note: these capping residues contain hydrogen atoms. Chain indices for capN and capC arguments can be interpreted either with or without chain break detection enabled. For example, let's assume we have a two-chain protein with some missing residues (chain break) in the first chain: A: MGSKVSK---FLNAGSK B: FGHLAKSDAK Then: capTerminals( breaks=False, capN=[1], capC=[1]) will add N-and C-terminal caps to chain B. However: capTerminals( breaks=True, capN=[1], capC=[1]) will add N- and C-terminal caps to the second fragment of chain A. Note: this operation *replaces* the internal model. @param auto: put ACE and NME capping residue on chain breaks and on suspected false N- and C-termini (default: False) @type auto: bool @param breaks: switch on chain break detection before interpreting capN and capC @type breaks: False @param capN: indices of chains that should get ACE cap (default: []) @type capN: [int] @param capC: indices of chains that should get NME cap (default: []) @type capC: [int] """ m = self.model c_len = m.lenChains() i_breaks = m.chainBreaks() if auto: if not breaks: capN = self.convertChainIdsNter( m, capN ) capC = self.convertChainIdsCter( m, capC ) breaks=True capN, capC = self.unresolvedTerminals( m ) end_broken = m.atom2chainIndices( m.chainBreaks(), breaks=1 ) capC = M.union( capC, end_broken ) capN = M.union( capN, N.array( end_broken ) + 1 ) capN = self.filterProteinChains(m, capN, m.chainIndex(breaks=breaks)) capC = self.filterProteinChains(m, capC, m.chainEndIndex(breaks=breaks)) for i in capN: m = self.capACE( m, i, breaks=breaks ) assert m.lenChains() == c_len, '%i != %i' % \ (m.lenChains(), c_len) assert len(m.chainBreaks(force=True)) == len(i_breaks) for i in capC: m = self.capNME( m, i, breaks=breaks ) assert m.lenChains() == c_len assert len(m.chainBreaks(force=True)) == len(i_breaks) self.model = m return self.model