def split(filename, outdir): """ Splits a stream file into an rtf and prm file. Parameters ---------- filename : str Stream file name """ regex = re.compile('^(toppar_)?(.*)\.str$') base = os.path.basename(os.path.normpath(filename)) base = regex.findall(base)[0][1] outrtf = os.path.join(outdir, 'top_{}.rtf'.format(base)) outprm = os.path.join(outdir, 'par_{}.prm'.format(base)) startrtf = re.compile('^read rtf card', flags=re.IGNORECASE) startprm = re.compile('^read para\w* card', flags=re.IGNORECASE) endsection = re.compile('^end', flags=re.IGNORECASE) rtfsection = 0 prmsection = 0 section = 'junk' rtfstr = '' prmstr = '' f = open(filename, 'r') for line in f: if startrtf.match(line): rtfsection += 1 if rtfsection > 1: rtfstr += '! WARNING -- ANOTHER rtf SECTION FOUND\n' section = 'rtf' elif startprm.match(line): prmsection += 1 if prmsection > 1: prmstr += '! WARNING -- ANOTHER para SECTION FOUND\n' section = 'prm' elif endsection.match(line): section = 'junk' elif section == 'rtf': rtfstr += line elif section == 'prm': prmstr += line f.close() if rtfsection > 1: raise BuildError('Multiple ({}) rtf topology sections found in {} stream file.'.format(rtfsection, filename)) if prmsection > 1: raise BuildError('Multiple ({}) prm parameter sections found in {} stream file.'.format(prmsection, filename)) f = open(outrtf, 'w') f.write(rtfstr + 'END\n') f.close() f = open(outprm, 'w') f.write(prmstr + 'END\n') f.close() return outrtf, outprm
def build(mol, topo=None, param=None, stream=None, prefix='structure', outdir='./build', caps=None, ionize=True, saltconc=0, saltanion=None, saltcation=None, disulfide=None, patches=None, noregen=None, psfgen=None, execute=True, _clean=True): """ Builds a system for CHARMM Uses VMD and psfgen to build a system for CHARMM. Additionally it allows for ionization and adding of disulfide bridges. Parameters ---------- mol : :class:`Molecule <htmd.molecule.molecule.Molecule>` object The Molecule object containing the system topo : list of str A list of topology `rtf` files. Use :func:`charmm.listFiles <htmd.builder.charmm.listFiles>` to get a list of available topology files. Default: ['top/top_all36_prot.rtf', 'top/top_all36_lipid.rtf', 'top/top_water_ions.rtf'] param : list of str A list of parameter `prm` files. Use :func:`charmm.listFiles <htmd.builder.charmm.listFiles>` to get a list of available parameter files. Default: ['par/par_all36_prot_mod.prm', 'par/par_all36_lipid.prm', 'par/par_water_ions.prm'] stream : list of str A list of stream `str` files containing topologies and parameters. Use :func:`charmm.listFiles <htmd.builder.charmm.listFiles>` to get a list of available stream files. Default: ['str/prot/toppar_all36_prot_arg0.str'] prefix : str The prefix for the generated pdb and psf files outdir : str The path to the output directory Default: './build' caps : dict A dictionary with keys segids and values lists of strings describing the caps of that segment. e.g. caps['P'] = ['first ACE', 'last CT3'] or caps['P'] = ['first none', 'last none']. Default: will apply ACE and CT3 caps to proteins and none caps to the rest. ionize : bool Enable or disable ionization saltconc : float Salt concentration (in Molar) to add to the system after neutralization. saltanion : {'CLA'} The anion type. Please use only CHARMM ion atom names. saltcation : {'SOD', 'MG', 'POT', 'CES', 'CAL', 'ZN2'} The cation type. Please use only CHARMM ion atom names. disulfide : list of :class:`DisulfideBridge <htmd.builder.builder.DisulfideBridge>` objects If None it will guess disulfide bonds. Otherwise provide a list of `DisulfideBridge` objects. patches : list of str Any further patches the user wants to apply noregen : list of str A list of patches that must not be regenerated (angles and dihedrals) Default: ['FHEM', 'PHEM', 'PLOH', 'PLO2', 'PLIG', 'PSUL'] psfgen : str Path to psfgen executable used to build for CHARMM execute : bool Disable building. Will only write out the input script needed by psfgen. Does not include ionization. Returns ------- molbuilt : :class:`Molecule <htmd.molecule.molecule.Molecule>` object The built system in a Molecule object Example ------- >>> from htmd import * >>> mol = Molecule("3PTB") >>> mol.filter("not resname BEN") >>> mol.renumberResidues() >>> molbuilt = charmm.build(mol, outdir='/tmp/build', ionize=False) # doctest: +ELLIPSIS Bond between A: [serial 185 resid 42 resname CYS chain A segid 0] B: [serial 298 resid 58 resname CYS chain A segid 0]... >>> # More complex example >>> topos = ['top/top_all36_prot.rtf', './benzamidine.rtf', 'top/top_water_ions.rtf'] >>> params = ['par/par_all36_prot_mod.prm', './benzamidine.prm', 'par/par_water_ions.prm'] >>> disu = [DisulfideBridge('P', 157, 'P', 13), DisulfideBridge('K', 1, 'K', 25)] >>> molbuilt = charmm.build(mol, topo=topos, param=params, outdir='/tmp/build', saltconc=0.15, disulfide=disu) # doctest: +SKIP """ mol = mol.copy() _missingSegID(mol) _checkMixedSegment(mol) _checkResidueInsertions(mol) if psfgen is None: psfgen = shutil.which('psfgen', mode=os.X_OK) if not psfgen: raise FileNotFoundError( 'Could not find psfgen executable, or no execute permissions are given. ' 'Run `conda install psfgen`.') if not os.path.isdir(outdir): os.makedirs(outdir) if _clean: _cleanOutDir(outdir) if topo is None: topo = defaultTopo() if param is None: param = defaultParam() if stream is None: stream = defaultStream() if caps is None: caps = _defaultCaps(mol) # patches that must _not_ be regenerated if noregen is None: noregen = ['FHEM', 'PHEM', 'PLOH', 'PLO2', 'PLIG', 'PSUL'] alltopo = topo.copy() allparam = param.copy() # Splitting the stream files and adding them to the list of parameter and topology files charmmdir = path.join(home(), 'builder', 'charmmfiles') for s in stream: if s[0] != '.' and path.isfile(path.join(charmmdir, s)): s = path.join(charmmdir, s) outrtf, outprm = _prepareStream(s) alltopo.append(outrtf) allparam.append(outprm) #_missingChain(mol) #_checkProteinGaps(mol) if patches is None: patches = [] if isinstance(patches, str): patches = [patches] allpatches = [] allpatches += patches # Find protonated residues and add patches for them allpatches += _protonationPatches(mol) f = open(path.join(outdir, 'build.vmd'), 'w') f.write('# psfgen file generated by charmm.build\n') f.write('package require psfgen;\n') f.write('psfcontext reset;\n\n') # Copying and printing out the topologies if not path.exists(path.join(outdir, 'topologies')): os.makedirs(path.join(outdir, 'topologies')) for i in range(len(alltopo)): if alltopo[i][0] != '.' and path.isfile( path.join(charmmdir, alltopo[i])): alltopo[i] = path.join(charmmdir, alltopo[i]) localname = '{}.'.format(i) + path.basename(alltopo[i]) shutil.copy(alltopo[i], path.join(outdir, 'topologies', localname)) f.write('topology ' + path.join('topologies', localname) + '\n') f.write('\n') _printAliases(f) # Printing out segments if not path.exists(path.join(outdir, 'segments')): os.makedirs(path.join(outdir, 'segments')) logger.info('Writing out segments.') segments = _getSegments(mol) wateratoms = mol.atomselect('water') for seg in segments: pdbname = 'segment' + seg + '.pdb' segatoms = mol.segid == seg mol.write(path.join(outdir, 'segments', pdbname), sel=segatoms) segwater = wateratoms & segatoms f.write('segment ' + seg + ' {\n') if np.all(segatoms == segwater): # If segment only contains waters, set: auto none f.write('\tauto none\n') f.write('\tpdb ' + path.join('segments', pdbname) + '\n') if caps is not None and seg in caps: for c in caps[seg]: f.write('\t' + c + '\n') f.write('}\n') f.write('coordpdb ' + path.join('segments', pdbname) + ' ' + seg + '\n\n') # Printing out patches for the disulfide bridges if disulfide is None: disulfide = detectDisulfideBonds(mol) if len(disulfide) != 0: for d in disulfide: f.write('patch DISU {}:{} {}:{}\n'.format(d.segid1, d.resid1, d.segid2, d.resid2)) f.write('\n') noregenpatches = [p for p in allpatches if p.split()[1] in noregen] regenpatches = [p for p in allpatches if p.split()[1] not in noregen] # Printing regenerable patches if len(regenpatches) != 0: for p in regenpatches: f.write(p + '\n') f.write('\n') # Regenerate angles and dihedrals f.write('regenerate angles dihedrals\n') f.write('\n') # Printing non-regenerable patches if len(noregenpatches) != 0: for p in noregenpatches: f.write(p + '\n') f.write('\n') f.write('guesscoord\n') f.write('writepsf ' + prefix + '.psf\n') f.write('writepdb ' + prefix + '.pdb\n') #f.write('quit\n') f.close() if allparam is not None: combine(allparam, path.join(outdir, 'parameters')) molbuilt = None if execute: logpath = os.path.abspath('{}/log.txt'.format(outdir)) logger.info('Starting the build.') currdir = os.getcwd() os.chdir(outdir) f = open(logpath, 'w') #call([vmd, '-dispdev', 'text', '-e', './build.vmd'], stdout=f) call([psfgen, './build.vmd'], stdout=f) f.close() errors = _logParser(logpath) os.chdir(currdir) if errors: raise BuildError(errors + [ 'Check {} for further information on errors in building.'. format(logpath) ]) logger.info('Finished building.') if path.isfile(path.join(outdir, 'structure.pdb')) and path.isfile( path.join(outdir, 'structure.psf')): molbuilt = Molecule(path.join(outdir, 'structure.pdb')) molbuilt.read(path.join(outdir, 'structure.psf')) else: raise BuildError( 'No structure pdb/psf file was generated. Check {} for errors in building.' .format(logpath)) if ionize: os.makedirs(path.join(outdir, 'pre-ionize')) data = glob(path.join(outdir, '*')) for f in data: shutil.move(f, path.join(outdir, 'pre-ionize')) totalcharge = np.sum(molbuilt.charge) nwater = np.sum(molbuilt.atomselect('water and noh')) anion, cation, anionatom, cationatom, nanion, ncation = ionizef( totalcharge, nwater, saltconc=saltconc, anion=saltanion, cation=saltcation) newmol = ionizePlace(mol, anion, cation, anionatom, cationatom, nanion, ncation) # Redo the whole build but now with ions included return build(newmol, topo=alltopo, param=allparam, stream=[], prefix=prefix, outdir=outdir, ionize=False, caps=caps, execute=execute, saltconc=saltconc, disulfide=disulfide, patches=patches, noregen=noregen, psfgen=psfgen, _clean=False) _checkFailedAtoms(molbuilt) _recoverProtonations(molbuilt) return molbuilt
def build(mol, ff=None, topo=None, param=None, prefix='structure', outdir='./build', caps=None, ionize=True, saltconc=0, saltanion=None, saltcation=None, disulfide=None, tleap=None, execute=True, atomtypes=None, offlibraries=None, gbsa=False, igb=2): """ Builds a system for AMBER Uses tleap to build a system for AMBER. Additionally it allows the user to ionize and add disulfide bridges. Parameters ---------- mol : :class:`Molecule <htmd.molecule.molecule.Molecule>` object The Molecule object containing the system ff : list of str A list of leaprc forcefield files. Use :func:`amber.listFiles <htmd.builder.amber.listFiles>` to get a list of available forcefield files. Default: :func:`amber.defaultFf <htmd.builder.amber.defaultFf>` topo : list of str A list of topology `prepi/prep/in` files. Use :func:`amber.listFiles <htmd.builder.amber.listFiles>` to get a list of available topology files. Default: :func:`amber.defaultTopo <htmd.builder.amber.defaultTopo>` param : list of str A list of parameter `frcmod` files. Use :func:`amber.listFiles <htmd.builder.amber.listFiles>` to get a list of available parameter files. Default: :func:`amber.defaultParam <htmd.builder.amber.defaultParam>` prefix : str The prefix for the generated pdb and psf files outdir : str The path to the output directory Default: './build' caps : dict A dictionary with keys segids and values lists of strings describing the caps for a particular protein segment. e.g. caps['P'] = ['ACE', 'NME'] or caps['P'] = ['none', 'none']. Default: will apply ACE and NME caps to every protein segment. ionize : bool Enable or disable ionization saltconc : float Salt concentration to add to the system after neutralization. saltanion : {'Cl-'} The anion type. Please use only AMBER ion atom names. saltcation : {'Na+', 'K+', 'Cs+'} The cation type. Please use only AMBER ion atom names. disulfide : list of pairs of atomselection strings If None it will guess disulfide bonds. Otherwise provide a list pairs of atomselection strings for each pair of residues forming the disulfide bridge. tleap : str Path to tleap executable used to build the system for AMBER execute : bool Disable building. Will only write out the input script needed by tleap. Does not include ionization. atomtypes : list of triplets Custom atom types defined by the user as ('type', 'element', 'hybrid') triplets e.g. (('C1', 'C', 'sp2'), ('CI', 'C', 'sp3')). Check `addAtomTypes` in AmberTools docs. offlibraries : str or list A path or a list of paths to OFF library files. Check `loadOFF` in AmberTools docs. gbsa : bool Modify radii for GBSA implicit water model igb : int GB model. Select: 1 for mbondi, 2 and 5 for mbondi2, 7 for bondi and 8 for mbondi3. Check section 4. The Generalized Born/Surface Area Model of the AMBER manual. Returns ------- molbuilt : :class:`Molecule <htmd.molecule.molecule.Molecule>` object The built system in a Molecule object Example ------- >>> from htmd.ui import * # doctest: +SKIP >>> mol = Molecule("3PTB") >>> molbuilt = amber.build(mol, outdir='/tmp/build') # doctest: +SKIP >>> # More complex example >>> disu = [['segid P and resid 157', 'segid P and resid 13'], ['segid K and resid 1', 'segid K and resid 25']] >>> molbuilt = amber.build(mol, outdir='/tmp/build', saltconc=0.15, disulfide=disu) # doctest: +SKIP """ # Remove pdb protein bonds as they can be regenerated by tleap. Keep non-protein bonds i.e. for ligands mol = mol.copy() _removeProteinBonds(mol) if tleap is None: tleap = _findTleap() else: if shutil.which(tleap) is None: raise NameError( 'Could not find executable: `{}` in the PATH. Cannot build for AMBER.' .format(tleap)) if not os.path.isdir(outdir): os.makedirs(outdir) _cleanOutDir(outdir) if ff is None: ff = defaultFf() if topo is None: topo = defaultTopo() if param is None: param = defaultParam() if caps is None: caps = _defaultProteinCaps(mol) _missingSegID(mol) _checkMixedSegment(mol) mol = _charmmLipid2Amber(mol) _applyProteinCaps(mol, caps) f = open(os.path.join(outdir, 'tleap.in'), 'w') f.write('# tleap file generated by amber.build\n') # Printing out the forcefields if isinstance(ff, str): ff = [ff] for i, force in enumerate(ff): if not os.path.isfile(force): force = _locateFile(force, 'ff', tleap) if force is None: continue newname = 'ff{}_{}'.format(i, os.path.basename(force)) shutil.copy(force, os.path.join(outdir, newname)) f.write('source {}\n'.format(newname)) f.write('\n') if gbsa: gbmodels = { 1: 'mbondi', 2: 'mbondi2', 5: 'mbondi2', 7: 'bondi', 8: 'mbondi3' } f.write('set default PBradii {}\n\n'.format(gbmodels[igb])) # Adding custom atom types if atomtypes is not None: atomtypes = ensurelist(tocheck=atomtypes[0], tomod=atomtypes) f.write('addAtomTypes {\n') for at in atomtypes: if len(at) != 3: raise RuntimeError( 'Atom type definitions have to be triplets. Check the AMBER documentation.' ) f.write(' {{ "{}" "{}" "{}" }}\n'.format(at[0], at[1], at[2])) f.write('}\n\n') # Loading OFF libraries if offlibraries is not None: offlibraries = ensurelist(offlibraries) for off in offlibraries: if not os.path.isfile(off): raise RuntimeError( 'Could not find off-library in location {}'.format(off)) newname = 'offlib{}_{}'.format(i, os.path.basename(off)) shutil.copy(off, os.path.join(outdir, newname)) f.write('loadoff {}\n'.format(newname)) # Loading frcmod parameters f.write('# Loading parameter files\n') for i, p in enumerate(param): if not os.path.isfile(p): p = _locateFile(p, 'param', tleap) if p is None: continue newname = 'param{}_{}'.format(i, os.path.basename(p)) shutil.copy(p, os.path.join(outdir, newname)) f.write('loadamberparams {}\n'.format(newname)) f.write('\n') # Loading prepi topologies f.write('# Loading prepi topologies\n') for i, t in enumerate(topo): if not os.path.isfile(t): t = _locateFile(t, 'topo', tleap) if t is None: continue newname = 'topo{}_{}'.format(i, os.path.basename(t)) shutil.copy(t, os.path.join(outdir, newname)) f.write('loadamberprep {}\n'.format(newname)) f.write('\n') f.write('# Loading the system\n') f.write('mol = loadpdb input.pdb\n\n') if np.sum(mol.atomtype != '') != 0: logger.debug('Writing mol2 files for input to tleap.') segs = np.unique(mol.segid[mol.atomtype != '']) combstr = 'mol = combine {mol' for s in segs: name = 'segment{}'.format(s) mol2name = os.path.join(outdir, '{}.mol2'.format(name)) mol.write(mol2name, (mol.atomtype != '') & (mol.segid == s)) if not os.path.isfile(mol2name): raise NameError( 'Could not write a mol2 file out of the given Molecule.') f.write('# Loading the rest of the system\n') f.write('{} = loadmol2 {}.mol2\n\n'.format(name, name)) combstr += ' {}'.format(name) combstr += '}\n\n' f.write(combstr) # Write patches for disulfide bonds (only after ionizing) if not ionize: # TODO: Remove this once we deprecate the class from htmd.builder.builder import DisulfideBridge from htmd.molecule.molecule import UniqueResidueID if disulfide is not None and len(disulfide) != 0 and isinstance( disulfide[0], DisulfideBridge): newdisu = [] for d in disulfide: r1 = UniqueResidueID.fromMolecule( mol, 'resid {} and segname {}'.format(d.resid1, d.segid1)) r2 = UniqueResidueID.fromMolecule( mol, 'resid {} and segname {}'.format(d.resid2, d.segid2)) newdisu.append([r1, r2]) disulfide = newdisu # TODO: Remove up to here ---------------------- if disulfide is not None and len(disulfide) != 0 and isinstance( disulfide[0][0], str): disulfide = convertDisulfide(mol, disulfide) if disulfide is None: logger.info('Detecting disulfide bonds.') disulfide = detectDisulfideBonds(mol) # Fix structure to match the disulfide patching if len(disulfide) != 0: torem = np.zeros(mol.numAtoms, dtype=bool) f.write('# Adding disulfide bonds\n') for d in disulfide: # Rename the residues to CYX if there is a disulfide bond atoms1 = d[0].selectAtoms(mol, indexes=False) atoms2 = d[1].selectAtoms(mol, indexes=False) mol.resname[atoms1] = 'CYX' mol.resname[atoms2] = 'CYX' # Remove (eventual) HG hydrogens on these CYS (from proteinPrepare) torem |= (atoms1 & (mol.name == 'HG')) | (atoms2 & (mol.name == 'HG')) # Convert to stupid amber residue numbering uqseqid = sequenceID( (mol.resid, mol.insertion, mol.segid)) + mol.resid[0] uqres1 = int(np.unique(uqseqid[atoms1])) uqres2 = int(np.unique(uqseqid[atoms2])) f.write('bond mol.{}.SG mol.{}.SG\n'.format(uqres1, uqres2)) f.write('\n') mol.remove(torem, _logger=False) f.write('# Writing out the results\n') f.write('saveamberparm mol ' + prefix + '.prmtop ' + prefix + '.crd\n') f.write('quit') f.close() # Printing and loading the PDB file. AMBER can work with a single PDB file if the segments are separate by TER logger.debug('Writing PDB file for input to tleap.') pdbname = os.path.join(outdir, 'input.pdb') # mol2 files have atomtype, here we only write parts not coming from mol2 # We need to write the input.pdb at the end since we modify the resname for disulfide bridges in mol mol.write(pdbname, mol.atomtype == '') if not os.path.isfile(pdbname): raise NameError( 'Could not write a PDB file out of the given Molecule.') molbuilt = None if execute: # Source paths of extra dirs (our dirs, not amber default) htmdamberdir = os.path.abspath( os.path.join(home(), 'builder', 'amberfiles')) sourcepaths = [htmdamberdir] sourcepaths += [ os.path.join(htmdamberdir, os.path.dirname(f)) for f in ff if os.path.isfile(os.path.join(htmdamberdir, f)) ] extrasource = [] for p in sourcepaths: extrasource.append('-I') extrasource.append('{}'.format(p)) logpath = os.path.abspath(os.path.join(outdir, 'log.txt')) logger.info('Starting the build.') currdir = os.getcwd() os.chdir(outdir) f = open(logpath, 'w') try: cmd = [tleap, '-f', './tleap.in'] cmd[1:1] = extrasource call(cmd, stdout=f) except: raise NameError('tleap failed at execution') f.close() errors = _logParser(logpath) os.chdir(currdir) if errors: raise BuildError(errors + [ 'Check {} for further information on errors in building.'. format(logpath) ]) logger.info('Finished building.') if os.path.exists(os.path.join(outdir, 'structure.crd')) and \ os.path.getsize(os.path.join(outdir, 'structure.crd')) != 0 and \ os.path.getsize(os.path.join(outdir, 'structure.prmtop')) != 0: molbuilt = Molecule(os.path.join(outdir, 'structure.prmtop')) molbuilt.read(os.path.join(outdir, 'structure.crd')) else: raise BuildError( 'No structure pdb/prmtop file was generated. Check {} for errors in building.' .format(logpath)) if ionize: shutil.move(os.path.join(outdir, 'structure.crd'), os.path.join(outdir, 'structure.noions.crd')) shutil.move(os.path.join(outdir, 'structure.prmtop'), os.path.join(outdir, 'structure.noions.prmtop')) totalcharge = np.sum(molbuilt.charge) nwater = np.sum(molbuilt.atomselect('water and noh')) anion, cation, anionatom, cationatom, nanion, ncation = ionizef( totalcharge, nwater, saltconc=saltconc, anion=saltanion, cation=saltcation) newmol = ionizePlace(mol, anion, cation, anionatom, cationatom, nanion, ncation) # Redo the whole build but now with ions included return build(newmol, ff=ff, topo=topo, param=param, prefix=prefix, outdir=outdir, caps={}, ionize=False, execute=execute, saltconc=saltconc, disulfide=disulfide, tleap=tleap, atomtypes=atomtypes, offlibraries=offlibraries) tmpbonds = molbuilt.bonds molbuilt.bonds = [] # Removing the bonds to speed up writing molbuilt.write(os.path.join(outdir, 'structure.pdb')) molbuilt.bonds = tmpbonds # Restoring the bonds return molbuilt
def build( mol, ff=None, topo=None, param=None, prefix="structure", outdir="./build", caps=None, ionize=True, saltconc=0, saltanion=None, saltcation=None, disulfide=None, teleap=None, teleapimports=None, execute=True, atomtypes=None, offlibraries=None, gbsa=False, igb=2, ): """Builds a system for AMBER Uses tleap to build a system for AMBER. Additionally it allows the user to ionize and add disulfide bridges. Parameters ---------- mol : :class:`Molecule <moleculekit.molecule.Molecule>` object The Molecule object containing the system ff : list of str A list of leaprc forcefield files. Use :func:`amber.listFiles <htmd.builder.amber.listFiles>` to get a list of available forcefield files. Default: :func:`amber.defaultFf <htmd.builder.amber.defaultFf>` topo : list of str A list of topology `prepi/prep/in` files. Use :func:`amber.listFiles <htmd.builder.amber.listFiles>` to get a list of available topology files. Default: :func:`amber.defaultTopo <htmd.builder.amber.defaultTopo>` param : list of str A list of parameter `frcmod` files. Use :func:`amber.listFiles <htmd.builder.amber.listFiles>` to get a list of available parameter files. Default: :func:`amber.defaultParam <htmd.builder.amber.defaultParam>` prefix : str The prefix for the generated pdb and psf files outdir : str The path to the output directory Default: './build' caps : dict A dictionary with keys segids and values lists of strings describing the caps for a particular protein segment. e.g. caps['P'] = ['ACE', 'NME'] or caps['P'] = ['none', 'none']. Default: will apply ACE and NME caps to every protein segment. ionize : bool Enable or disable ionization saltconc : float Salt concentration to add to the system after neutralization. saltanion : {'Cl-'} The anion type. Please use only AMBER ion atom names. saltcation : {'Na+', 'K+', 'Cs+'} The cation type. Please use only AMBER ion atom names. disulfide : list of pairs of atomselection strings If None it will guess disulfide bonds. Otherwise provide a list pairs of atomselection strings for each pair of residues forming the disulfide bridge. teleap : str Path to teLeap executable used to build the system for AMBER teleapimports : list A list of paths to pass to teLeap '-I' flag, i.e. directories to be searched Default: determined from :func:`amber.defaultAmberHome <htmd.builder.amber.defaultAmberHome>` and :func:`amber.htmdAmberHome <htmd.builder.amber.htmdAmberHome>` execute : bool Disable building. Will only write out the input script needed by tleap. Does not include ionization. atomtypes : list of triplets Custom atom types defined by the user as ('type', 'element', 'hybrid') triplets e.g. (('C1', 'C', 'sp2'), ('CI', 'C', 'sp3')). Check `addAtomTypes` in AmberTools docs. offlibraries : str or list A path or a list of paths to OFF library files. Check `loadOFF` in AmberTools docs. gbsa : bool Modify radii for GBSA implicit water model igb : int GB model. Select: 1 for mbondi, 2 and 5 for mbondi2, 7 for bondi and 8 for mbondi3. Check section 4. The Generalized Born/Surface Area Model of the AMBER manual. Returns ------- molbuilt : :class:`Molecule <moleculekit.molecule.Molecule>` object The built system in a Molecule object Example ------- >>> from htmd.ui import * # doctest: +SKIP >>> mol = Molecule("3PTB") >>> molbuilt = amber.build(mol, outdir='/tmp/build') # doctest: +SKIP >>> # More complex example >>> disu = [['segid P and resid 157', 'segid P and resid 13'], ['segid K and resid 1', 'segid K and resid 25']] >>> molbuilt = amber.build(mol, outdir='/tmp/build', saltconc=0.15, disulfide=disu) # doctest: +SKIP """ # Remove pdb protein bonds as they can be regenerated by tleap. Keep non-protein bonds i.e. for ligands mol = mol.copy() _removeProteinBonds(mol) if teleap is None: teleap = _findTeLeap() else: if shutil.which(teleap) is None: raise NameError( f"Could not find executable: `{teleap}` in the PATH. Cannot build for AMBER. Please install it with `conda install ambermini -c acellera`" ) if not os.path.isdir(outdir): os.makedirs(outdir) _cleanOutDir(outdir) if ff is None: ff = defaultFf() if topo is None: topo = defaultTopo() if param is None: param = defaultParam() if caps is None: caps = _defaultProteinCaps(mol) _missingSegID(mol) _checkMixedSegment(mol) mol = _charmmLipid2Amber(mol) _applyProteinCaps(mol, caps) f = open(os.path.join(outdir, "tleap.in"), "w") f.write("# tleap file generated by amber.build\n") # Printing out the forcefields for i, force in enumerate(ensurelist(ff)): if not os.path.isfile(force): force = _locateFile(force, "ff", teleap) if force is None: continue newname = f"ff{i}_{os.path.basename(force)}" shutil.copy(force, os.path.join(outdir, newname)) f.write(f"source {newname}\n") f.write("\n") if gbsa: gbmodels = { 1: "mbondi", 2: "mbondi2", 5: "mbondi2", 7: "bondi", 8: "mbondi3" } f.write(f"set default PBradii {gbmodels[igb]}\n\n") # Adding custom atom types if atomtypes is not None: atomtypes = ensurelist(tocheck=atomtypes[0], tomod=atomtypes) f.write("addAtomTypes {\n") for at in atomtypes: if len(at) != 3: raise RuntimeError( "Atom type definitions have to be triplets. Check the AMBER documentation." ) f.write(f' {{ "{at[0]}" "{at[1]}" "{at[2]}" }}\n') f.write("}\n\n") # Loading OFF libraries if offlibraries is not None: offlibraries = ensurelist(offlibraries) for i, off in enumerate(offlibraries): if not os.path.isfile(off): raise RuntimeError( f"Could not find off-library in location {off}") newname = f"offlib{i}_{os.path.basename(off)}" shutil.copy(off, os.path.join(outdir, newname)) f.write(f"loadoff {newname}\n") # Loading frcmod parameters f.write("# Loading parameter files\n") for i, p in enumerate(param): if not os.path.isfile(p): p = _locateFile(p, "param", teleap) if p is None: continue newname = f"param{i}_{os.path.basename(p)}" shutil.copy(p, os.path.join(outdir, newname)) f.write(f"loadamberparams {newname}\n") f.write("\n") # Loading prepi topologies f.write("# Loading prepi topologies\n") for i, t in enumerate(topo): if not os.path.isfile(t): t = _locateFile(t, "topo", teleap) if t is None: continue newname = f"topo{i}_{os.path.basename(t)}" shutil.copy(t, os.path.join(outdir, newname)) f.write(f"loadamberprep {newname}\n") f.write("\n") f.write("# Loading the system\n") f.write("mol = loadpdb input.pdb\n\n") if np.sum(mol.atomtype != "") != 0: f.write("# Loading the ligands\n") segs = np.unique(mol.segid[mol.atomtype != ""]) # teLeap crashes if you try to combine too many molecules in a single command so we will do them by 10s for k in range(0, len(segs), 10): segments_string = "" for seg in segs[k:min(k + 10, len(segs))]: name = f"segment{seg}" segments_string += f" {name}" mol2name = os.path.join(outdir, f"{name}.mol2") mol.write(mol2name, (mol.atomtype != "") & (mol.segid == seg)) if not os.path.isfile(mol2name): raise NameError("Failed writing ligand mol2 file.") f.write(f"{name} = loadmol2 {name}.mol2\n") f.write(f"mol = combine {{mol{segments_string}}}\n\n") # Write patches for disulfide bonds (only after ionizing) if not ionize: # TODO: Remove this once we deprecate the class from htmd.builder.builder import DisulfideBridge from moleculekit.molecule import UniqueResidueID if (disulfide is not None and len(disulfide) != 0 and isinstance(disulfide[0], DisulfideBridge)): newdisu = [] for d in disulfide: r1 = UniqueResidueID.fromMolecule( mol, f"resid {d.resid1} and segname {d.segid1}") r2 = UniqueResidueID.fromMolecule( mol, f"resid {d.resid2} and segname {d.segid2}") newdisu.append([r1, r2]) disulfide = newdisu # TODO: Remove up to here ---------------------- if (disulfide is not None and len(disulfide) != 0 and isinstance(disulfide[0][0], str)): disulfide = convertDisulfide(mol, disulfide) if disulfide is None: logger.info("Detecting disulfide bonds.") disulfide = detectDisulfideBonds(mol) # Fix structure to match the disulfide patching if len(disulfide) != 0: torem = np.zeros(mol.numAtoms, dtype=bool) f.write("# Adding disulfide bonds\n") for d in disulfide: # Rename the residues to CYX if there is a disulfide bond atoms1 = d[0].selectAtoms(mol, indexes=False) atoms2 = d[1].selectAtoms(mol, indexes=False) mol.resname[atoms1] = "CYX" mol.resname[atoms2] = "CYX" # Remove (eventual) HG hydrogens on these CYS (from proteinPrepare) torem |= (atoms1 & (mol.name == "HG")) | (atoms2 & (mol.name == "HG")) # Convert to stupid amber residue numbering uqseqid = (sequenceID( (mol.resid, mol.insertion, mol.segid)) + mol.resid[0]) uqres1 = int(np.unique(uqseqid[atoms1])) uqres2 = int(np.unique(uqseqid[atoms2])) f.write(f"bond mol.{uqres1}.SG mol.{uqres2}.SG\n") f.write("\n") mol.remove(torem, _logger=False) # Calculate the bounding box and store it in the CRD file f.write('setBox mol "vdw"\n\n') f.write("# Writing out the results\n") f.write(f"saveamberparm mol {prefix}.prmtop {prefix}.crd\n") f.write("quit") f.close() # Printing and loading the PDB file. AMBER can work with a single PDB file if the segments are separate by TER logger.debug("Writing PDB file for input to tleap.") pdbname = os.path.join(outdir, "input.pdb") # mol2 files have atomtype, here we only write parts not coming from mol2 # We need to write the input.pdb at the end since we modify the resname for disulfide bridges in mol mol.write(pdbname, mol.atomtype == "") if not os.path.isfile(pdbname): raise NameError( "Could not write a PDB file out of the given Molecule.") molbuilt = None if execute: if not teleapimports: teleapimports = [] # Source default Amber (i.e. the same paths tleap imports) amberhome = defaultAmberHome(teleap=teleap) teleapimports += [ os.path.join(amberhome, s) for s in _defaultAmberSearchPaths.values() ] if len(teleapimports) == 0: raise RuntimeWarning( f"No default Amber force-field found. Check teLeap location: {teleap}" ) # Source HTMD Amber paths that contain ffs htmdamberdir = htmdAmberHome() teleapimports += [ os.path.join(htmdamberdir, os.path.dirname(f)) for f in ff if os.path.isfile(os.path.join(htmdamberdir, f)) ] if len(teleapimports) == 0: raise RuntimeError( "No default Amber force-field imports found. Check " "`htmd.builder.amber.defaultAmberHome()` and `htmd.builder.amber.htmdAmberHome()`" ) # Set import flags for teLeap teleapimportflags = [] for p in teleapimports: teleapimportflags.append("-I") teleapimportflags.append(str(p)) logpath = os.path.abspath(os.path.join(outdir, "log.txt")) logger.info("Starting the build.") currdir = os.getcwd() os.chdir(outdir) f = open(logpath, "w") try: cmd = [teleap, "-f", "./tleap.in"] cmd[1:1] = teleapimportflags logger.debug(cmd) call(cmd, stdout=f) except: raise NameError("teLeap failed at execution") f.close() errors = _logParser(logpath) os.chdir(currdir) if errors: raise BuildError(errors + [ f"Check {logpath} for further information on errors in building." ]) logger.info("Finished building.") if (os.path.exists(os.path.join(outdir, "structure.crd")) and os.path.getsize(os.path.join(outdir, "structure.crd")) != 0 and os.path.getsize(os.path.join(outdir, "structure.prmtop")) != 0): try: molbuilt = Molecule(os.path.join(outdir, "structure.prmtop")) molbuilt.read(os.path.join(outdir, "structure.crd")) except Exception as e: raise RuntimeError( f"Failed at reading structure.prmtop/structure.crd due to error: {e}" ) else: raise BuildError( f"No structure pdb/prmtop file was generated. Check {logpath} for errors in building." ) if ionize: shutil.move( os.path.join(outdir, "structure.crd"), os.path.join(outdir, "structure.noions.crd"), ) shutil.move( os.path.join(outdir, "structure.prmtop"), os.path.join(outdir, "structure.noions.prmtop"), ) totalcharge = np.sum(molbuilt.charge) nwater = np.sum(molbuilt.atomselect("water and noh")) anion, cation, anionatom, cationatom, nanion, ncation = ionizef( totalcharge, nwater, saltconc=saltconc, anion=saltanion, cation=saltcation, ) newmol = ionizePlace(mol, anion, cation, anionatom, cationatom, nanion, ncation) # Redo the whole build but now with ions included return build( newmol, ff=ff, topo=topo, param=param, prefix=prefix, outdir=outdir, caps={}, ionize=False, execute=execute, saltconc=saltconc, disulfide=disulfide, teleap=teleap, atomtypes=atomtypes, offlibraries=offlibraries, ) tmpbonds = molbuilt.bonds molbuilt.bonds = [] # Removing the bonds to speed up writing molbuilt.write(os.path.join(outdir, "structure.pdb")) molbuilt.bonds = tmpbonds # Restoring the bonds detectCisPeptideBonds(molbuilt) # Warn in case of cis bonds return molbuilt
def split(filename, outdir): """Splits a stream file into an rtf and prm file. Parameters ---------- filename : str Stream file name """ regex = re.compile(r"^(toppar_)?(.*)\.str$") base = os.path.basename(os.path.normpath(filename)) base = regex.findall(base)[0][1] outrtf = os.path.join(outdir, f"top_{base}.rtf") outprm = os.path.join(outdir, f"par_{base}.prm") startrtf = re.compile(r"^read rtf card", flags=re.IGNORECASE) startprm = re.compile(r"^read para\w* card", flags=re.IGNORECASE) endsection = re.compile(r"^end", flags=re.IGNORECASE) rtfsection = 0 prmsection = 0 section = "junk" rtfstr = "" prmstr = "" f = open(filename, "r") for line in f: if startrtf.match(line): rtfsection += 1 if rtfsection > 1: rtfstr += "! WARNING -- ANOTHER rtf SECTION FOUND\n" section = "rtf" elif startprm.match(line): prmsection += 1 if prmsection > 1: prmstr += "! WARNING -- ANOTHER para SECTION FOUND\n" section = "prm" elif endsection.match(line): section = "junk" elif section == "rtf": rtfstr += line elif section == "prm": prmstr += line f.close() if rtfsection > 1: raise BuildError( "Multiple ({}) rtf topology sections found in {} stream file.". format(rtfsection, filename)) if prmsection > 1: raise BuildError( "Multiple ({}) prm parameter sections found in {} stream file.". format(prmsection, filename)) f = open(outrtf, "w") f.write(rtfstr + "END\n") f.close() f = open(outprm, "w") f.write(prmstr + "END\n") f.close() return outrtf, outprm
def build( mol, topo=None, param=None, stream=None, prefix="structure", outdir="./build", caps=None, ionize=True, saltconc=0, saltanion=None, saltcation=None, disulfide=None, regenerate=["angles", "dihedrals"], patches=None, noregen=None, aliasresidues=None, psfgen=None, execute=True, _clean=True, ): """Builds a system for CHARMM Uses VMD and psfgen to build a system for CHARMM. Additionally it allows for ionization and adding of disulfide bridges. Parameters ---------- mol : :class:`Molecule <moleculekit.molecule.Molecule>` object The Molecule object containing the system topo : list of str A list of topology `rtf` files. Use :func:`charmm.listFiles <htmd.builder.charmm.listFiles>` to get a list of available topology files. Default: ['top/top_all36_prot.rtf', 'top/top_all36_lipid.rtf', 'top/top_water_ions.rtf'] param : list of str A list of parameter `prm` files. Use :func:`charmm.listFiles <htmd.builder.charmm.listFiles>` to get a list of available parameter files. Default: ['par/par_all36_prot.prm', 'par/par_all36_lipid.prm', 'par/par_water_ions.prm'] stream : list of str A list of stream `str` files containing topologies and parameters. Use :func:`charmm.listFiles <htmd.builder.charmm.listFiles>` to get a list of available stream files. Default: ['str/prot/toppar_all36_prot_arg0.str'] prefix : str The prefix for the generated pdb and psf files outdir : str The path to the output directory Default: './build' caps : dict A dictionary with keys segids and values lists of strings describing the caps of that segment. e.g. caps['P'] = ['first ACE', 'last CT3'] or caps['P'] = ['first none', 'last none']. Default: will apply ACE and CT3 caps to proteins and none caps to the rest. ionize : bool Enable or disable ionization saltconc : float Salt concentration (in Molar) to add to the system after neutralization. saltanion : {'CLA'} The anion type. Please use only CHARMM ion atom names. saltcation : {'SOD', 'MG', 'POT', 'CES', 'CAL', 'ZN2'} The cation type. Please use only CHARMM ion atom names. disulfide : list of pairs of atomselection strings If None it will guess disulfide bonds. Otherwise provide a list pairs of atomselection strings for each pair of residues forming the disulfide bridge. regenerate : None or list of strings of: ['angles', 'dihedrals'] Disable angle/dihedral regeneration with `regenerate=None`, or enable it with `regenerate=['angles', 'diheldrals']` or just one of the two options with `regenerate=['angles']` or `regenerate=['diheldrals']`. patches : list of str Any further patches the user wants to apply noregen : list of str A list of patches that must not be regenerated (angles and dihedrals) Default: ['FHEM', 'PHEM', 'PLOH', 'PLO2', 'PLIG', 'PSUL'] aliasresidues : dict of aliases A dictionary of key: value pairs of residue names we want to alias psfgen : str Path to psfgen executable used to build for CHARMM execute : bool Disable building. Will only write out the input script needed by psfgen. Does not include ionization. Returns ------- molbuilt : :class:`Molecule <moleculekit.molecule.Molecule>` object The built system in a Molecule object Example ------- >>> from htmd.ui import * >>> mol = Molecule("3PTB") >>> mol.filter("not resname BEN") >>> molbuilt = charmm.build(mol, outdir='/tmp/build', ionize=False) # doctest: +ELLIPSIS Bond between A: [serial 185 resid 42 resname CYS chain A segid 0] B: [serial 298 resid 58 resname CYS chain A segid 0]... >>> # More complex example >>> topos = ['top/top_all36_prot.rtf', './BEN.rtf', 'top/top_water_ions.rtf'] >>> params = ['par/par_all36_prot.prm', './BEN.prm', 'par/par_water_ions.prm'] >>> disu = [['segid P and resid 157', 'segid P and resid 13'], ['segid K and resid 1', 'segid K and resid 25']] >>> ar = {'SAPI24': 'SP24'} # Alias large resnames to a short-hand version >>> molbuilt = charmm.build(mol, topo=topos, param=params, outdir='/tmp/build', saltconc=0.15, disulfide=disu, aliasresidues=ar) # doctest: +SKIP """ mol = mol.copy() _missingSegID(mol) _checkMixedSegment(mol) _checkLongResnames(mol, aliasresidues) if psfgen is None: psfgen = shutil.which("psfgen", mode=os.X_OK) if not psfgen: raise FileNotFoundError( "Could not find psfgen executable, or no execute permissions are given. " "Run `conda install psfgen -c acellera`.") if not os.path.isdir(outdir): os.makedirs(outdir) if _clean: _cleanOutDir(outdir) if topo is None: topo = defaultTopo() if param is None: param = defaultParam() if stream is None: stream = defaultStream() if caps is None: caps = _defaultCaps(mol) # patches that must _not_ be regenerated if noregen is None: noregen = ["FHEM", "PHEM", "PLOH", "PLO2", "PLIG", "PSUL"] alltopo = topo.copy() allparam = param.copy() # Splitting the stream files and adding them to the list of parameter and topology files charmmdir = htmdCharmmHome() for s in stream: if s[0] != "." and path.isfile(path.join(charmmdir, s)): s = path.join(charmmdir, s) outrtf, outprm = _prepareStream(s) alltopo.append(outrtf) allparam.append(outprm) # _missingChain(mol) # _checkProteinGaps(mol) if patches is None: patches = [] if isinstance(patches, str): patches = [patches] allpatches = [] allpatches += patches # Find protonated residues and add patches for them allpatches += _protonationPatches(mol) f = open(path.join(outdir, "build.vmd"), "w") f.write("# psfgen file generated by charmm.build\n") f.write("package require psfgen;\n") f.write("psfcontext reset;\n\n") # Copying and printing out the topologies if not path.exists(path.join(outdir, "topologies")): os.makedirs(path.join(outdir, "topologies")) for i in range(len(alltopo)): if alltopo[i][0] != "." and path.isfile( path.join(charmmdir, alltopo[i])): alltopo[i] = path.join(charmmdir, alltopo[i]) localname = "{}.".format(i) + path.basename(alltopo[i]) shutil.copy(alltopo[i], path.join(outdir, "topologies", localname)) f.write("topology " + path.join("topologies", localname) + "\n") f.write("\n") _printAliases(f) if aliasresidues is not None: # User defined aliases for key, val in aliasresidues.items(): mol.resname[mol.resname == key] = val f.write(" pdbalias residue {} {}\n".format(val, key)) # Printing out segments if not path.exists(path.join(outdir, "segments")): os.makedirs(path.join(outdir, "segments")) logger.info("Writing out segments.") segments = _getSegments(mol) wateratoms = mol.atomselect("water") for seg in segments: pdbname = "segment" + seg + ".pdb" segatoms = mol.segid == seg mol.write(path.join(outdir, "segments", pdbname), sel=segatoms) segwater = wateratoms & segatoms f.write("segment " + seg + " {\n") if np.all(segatoms == segwater): # If segment only contains waters, set: auto none f.write("\tauto none\n") f.write("\tpdb " + path.join("segments", pdbname) + "\n") if caps is not None and seg in caps: for c in caps[seg]: f.write("\t" + c + "\n") f.write("}\n") f.write("coordpdb " + path.join("segments", pdbname) + " " + seg + "\n\n") if (disulfide is not None and len(disulfide) != 0 and isinstance(disulfide[0][0], str)): disulfide = convertDisulfide(mol, disulfide) if disulfide is None: disulfide = detectDisulfideBonds(mol) if len(disulfide) != 0: for d in sorted(disulfide, key=lambda x: x[0].segid): str0 = f"{d[0].segid}:{d[0].resid}{d[0].insertion}" str1 = f"{d[1].segid}:{d[1].resid}{d[1].insertion}" f.write(f"patch DISU {str0} {str1}\n") f.write("\n") noregenpatches = [p for p in allpatches if p.split()[1] in noregen] regenpatches = [p for p in allpatches if p.split()[1] not in noregen] # Printing regenerable patches if len(regenpatches) != 0: for p in regenpatches: f.write(p + "\n") f.write("\n") # Regenerate angles and dihedrals if regenerate is not None: f.write("regenerate {}\n".format(" ".join(regenerate))) f.write("\n") # Printing non-regenerable patches if len(noregenpatches) != 0: for p in noregenpatches: f.write(p + "\n") f.write("\n") f.write("guesscoord\n") f.write("writepsf " + prefix + ".psf\n") f.write("writepdb " + prefix + ".pdb\n") # f.write('quit\n') f.close() if allparam is not None: combine(allparam, path.join(outdir, "parameters")) molbuilt = None if execute: logpath = os.path.abspath("{}/log.txt".format(outdir)) logger.info("Starting the build.") currdir = os.getcwd() os.chdir(outdir) f = open(logpath, "w") # call([vmd, '-dispdev', 'text', '-e', './build.vmd'], stdout=f) my_env = os.environ.copy() my_env["LC_ALL"] = "C" call([psfgen, "./build.vmd"], stdout=f, stderr=f, env=my_env) f.close() errors = _logParser(logpath) os.chdir(currdir) if errors: raise BuildError(errors + [ "Check {} for further information on errors in building.". format(logpath) ]) logger.info("Finished building.") if path.isfile(path.join(outdir, "structure.pdb")) and path.isfile( path.join(outdir, "structure.psf")): molbuilt = Molecule(path.join(outdir, "structure.pdb")) molbuilt.read(path.join(outdir, "structure.psf")) else: raise BuildError( "No structure pdb/psf file was generated. Check {} for errors in building." .format(logpath)) if ionize: os.makedirs(path.join(outdir, "pre-ionize")) data = glob(path.join(outdir, "*")) for f in data: shutil.move(f, path.join(outdir, "pre-ionize")) totalcharge = np.sum(molbuilt.charge) nwater = np.sum(molbuilt.atomselect("water and noh")) anion, cation, anionatom, cationatom, nanion, ncation = ionizef( molbuilt, totalcharge, nwater, saltconc=saltconc, anion=saltanion, cation=saltcation, ) newmol = ionizePlace(mol, anion, cation, anionatom, cationatom, nanion, ncation) # Redo the whole build but now with ions included return build( newmol, topo=alltopo, param=allparam, stream=[], prefix=prefix, outdir=outdir, ionize=False, caps=caps, execute=execute, saltconc=saltconc, disulfide=disulfide, regenerate=regenerate, patches=patches, noregen=noregen, aliasresidues=aliasresidues, psfgen=psfgen, _clean=False, ) _checkFailedAtoms(molbuilt) _recoverProtonations(molbuilt) detectCisPeptideBonds(molbuilt, respect_bonds=True) # Warn in case of cis bonds return molbuilt