Exemplo n.º 1
0
    def test_tmscore(self):
        from htmd.molecule.molecule import Molecule
        expectedTMscore = np.array([0.21418524, 0.2367377, 0.23433833, 0.21362964, 0.20935164,
                                    0.20279461, 0.27012895, 0.22675238, 0.21230793, 0.2372011])
        expectedRMSD = np.array([3.70322128, 3.43637027, 3.188193, 3.84455877, 3.53053882,
                                 3.46781854, 2.93777629, 2.97978692, 2.70792428, 2.63051318])

        mol = Molecule(os.path.join(home(dataDir='tmscore'), 'filtered.pdb'))
        mol.read(os.path.join(home(dataDir='tmscore'), 'traj.xtc'))
        ref = Molecule(os.path.join(home(dataDir='tmscore'), 'ntl9_2hbb.pdb'))
        tmscore, rmsd = molTMscore(mol, ref, mol.atomselect('protein'), ref.atomselect('protein'))

        self.assertTrue(np.allclose(tmscore, expectedTMscore))
        self.assertTrue(np.allclose(rmsd, expectedRMSD))
Exemplo n.º 2
0
    def __init__(self, sims, refmol, trajalnstr, refalnstr, atomsel, centerstr):
        self._refmol = refmol
        self._refalnsel = self._refmol.atomselect(refalnstr)
        self._trajalnsel = trajalnstr
        self._centersel = centerstr
        self._atomsel = atomsel
        self._pc_trajalnsel = None  # pc = Pre-calculated
        self._pc_atomsel = None
        self._pc_centersel = None

        (single, molfile) = _singleMolfile(sims)
        if single:
            mol = Molecule(molfile)
            self._pc_trajalnsel = mol.atomselect(trajalnstr)
            self._pc_atomsel = mol.atomselect(atomsel)
            self._pc_centersel = mol.atomselect(centerstr)
Exemplo n.º 3
0
    def __init__(self, sims, refmol, trajalnstr, refalnstr, atomsel, centerstr):
        self._refmol = refmol
        self._refalnsel = self._refmol.atomselect(refalnstr)
        self._trajalnsel = trajalnstr
        self._centersel = centerstr
        self._atomsel = atomsel
        self._pc_trajalnsel = None  # pc = Pre-calculated
        self._pc_atomsel = None
        self._pc_centersel = None

        (single, molfile) = _singleMolfile(sims)
        if single:
            mol = Molecule(molfile)
            self._pc_trajalnsel = mol.atomselect(trajalnstr)
            self._pc_atomsel = mol.atomselect(atomsel)
            self._pc_centersel = mol.atomselect(centerstr)
Exemplo n.º 4
0
def _filtSim(i, sims, outFolder, filterSel):
    name = _simName(sims[i].trajectory[0])
    directory = path.join(outFolder, name)
    if not path.exists(directory):
        makedirs(directory)

    logger.debug('Processing trajectory ' + name)

    fmolfile = path.join(outFolder, 'filtered.pdb')
    (traj, outtraj) = _renameSims(sims[i].trajectory, name, outFolder)
    if not traj:
        ftrajectory = _listXTCs(path.join(outFolder, name))
        return Sim(simid=sims[i].simid, parent=sims[i], input=None, trajectory=ftrajectory, molfile=fmolfile)

    try:
        mol = Molecule(sims[i].molfile)
    except:
        logger.warning('Error! Skipping simulation ' + name)
        return

    sel = mol.atomselect(filterSel)

    for j in range(0, len(traj)):
        try:
            mol.read(traj[j])
        except IOError as e:
            logger.warning(e.strerror + ', skipping trajectory')
            break

        mol.write(outtraj[j], sel)

    ftrajectory = _listXTCs(path.join(outFolder, name))
    #bar.progress()
    return Sim(simid=sims[i].simid, parent=sims[i], input=None, trajectory=ftrajectory, molfile=fmolfile)
Exemplo n.º 5
0
    def __init__(self, sims, sel, simple):
        self._pc_sel = None
        self._sel = sel
        self._simple = simple

        (single, molfile) = _singleMolfile(sims)
        if single:
            mol = Molecule(molfile)
            self._pc_sel = mol.atomselect(sel)
Exemplo n.º 6
0
    def __init__(self, sims, sel, simple):
        self._pc_sel = None
        self._sel = sel
        self._simple = simple

        (single, molfile) = _singleMolfile(sims)
        if single:
            mol = Molecule(molfile)
            self._pc_sel = mol.atomselect(sel)
Exemplo n.º 7
0
    def __init__(self, sims, protsel, dih=None, sincos=True):
        self._protsel = protsel
        self._sincos = sincos
        self._dih = dih  # TODO: Calculate the dihedral
        self._pc_dih = None

        (single, molfile) = _singleMolfile(sims)
        if single:
            mol = Molecule(molfile)
            self._pc_dih = self._dihedralPrecalc(mol, mol.atomselect(protsel))
Exemplo n.º 8
0
    def test_tmscore(self):
        from htmd.molecule.molecule import Molecule
        expectedTMscore = np.array([
            0.21418524, 0.2367377, 0.23433833, 0.21362964, 0.20935164,
            0.20279461, 0.27012895, 0.22675238, 0.21230793, 0.2372011
        ])
        expectedRMSD = np.array([
            3.70322128, 3.43637027, 3.188193, 3.84455877, 3.53053882,
            3.46781854, 2.93777629, 2.97978692, 2.70792428, 2.63051318
        ])

        mol = Molecule(os.path.join(home(dataDir='tmscore'), 'filtered.pdb'))
        mol.read(os.path.join(home(dataDir='tmscore'), 'traj.xtc'))
        ref = Molecule(os.path.join(home(dataDir='tmscore'), 'ntl9_2hbb.pdb'))
        tmscore, rmsd = molTMscore(mol, ref, mol.atomselect('protein'),
                                   ref.atomselect('protein'))

        self.assertTrue(np.allclose(tmscore, expectedTMscore))
        self.assertTrue(np.allclose(rmsd, expectedRMSD))
Exemplo n.º 9
0
    def __init__(self, sims, protsel, dih=None, sincos=True):
        self._protsel = protsel
        self._sincos = sincos
        self._dih = dih  # TODO: Calculate the dihedral
        self._pc_dih = None

        (single, molfile) = _singleMolfile(sims)
        if single:
            mol = Molecule(molfile)
            self._pc_dih = self._dihedralPrecalc(mol, mol.atomselect(protsel))
Exemplo n.º 10
0
def _filtSim(i, sims, outFolder, filterSel):
    name = _simName(sims[i].trajectory[0])
    directory = path.join(outFolder, name)
    if not path.exists(directory):
        makedirs(directory)

    logger.debug('Processing trajectory ' + name)

    fmolfile = path.join(outFolder, 'filtered.pdb')
    (traj, outtraj) = _renameSims(sims[i].trajectory, name, outFolder)
    if not traj:
        ftrajectory = _autoDetectTrajectories(path.join(outFolder, name))
        numframes = _getNumFrames(sims[i], ftrajectory)
        return Sim(simid=sims[i].simid,
                   parent=sims[i],
                   input=None,
                   trajectory=ftrajectory,
                   molfile=fmolfile,
                   numframes=numframes)

    try:
        from htmd.molecule.molecule import Molecule
        mol = Molecule(sims[i].molfile)
    except:
        logger.warning('Error! Skipping simulation ' + name)
        return

    sel = mol.atomselect(filterSel)

    for j in range(0, len(traj)):
        try:
            mol.read(traj[j])
        except IOError as e:
            logger.warning('{}, skipping trajectory'.format(e))
            break

        mol.write(outtraj[j], sel)

    ftrajectory = _autoDetectTrajectories(path.join(outFolder, name))
    numframes = _getNumFrames(sims[i], ftrajectory)
    return Sim(simid=sims[i].simid,
               parent=sims[i],
               input=None,
               trajectory=ftrajectory,
               molfile=fmolfile,
               numframes=numframes)
Exemplo n.º 11
0
def _filtSim(i, sims, outFolder, filterSel):
    name = _simName(sims[i].trajectory[0])
    directory = path.join(outFolder, name)
    if not path.exists(directory):
        makedirs(directory)

    logger.debug('Processing trajectory ' + name)

    fmolfile = path.join(outFolder, 'filtered.pdb')
    (traj, outtraj) = _renameSims(sims[i].trajectory, name, outFolder)
    if not traj:
        ftrajectory = _listTrajectories(path.join(outFolder, name))
        return Sim(simid=sims[i].simid,
                   parent=sims[i],
                   input=None,
                   trajectory=ftrajectory,
                   molfile=fmolfile)

    try:
        mol = Molecule(sims[i].molfile)
    except:
        logger.warning('Error! Skipping simulation ' + name)
        return

    sel = mol.atomselect(filterSel)

    for j in range(0, len(traj)):
        try:
            mol.read(traj[j])
        except IOError as e:
            logger.warning(e.strerror + ', skipping trajectory')
            break

        mol.write(outtraj[j], sel)

    ftrajectory = _listTrajectories(path.join(outFolder, name))
    #bar.progress()
    return Sim(simid=sims[i].simid,
               parent=sims[i],
               input=None,
               trajectory=ftrajectory,
               molfile=fmolfile)
Exemplo n.º 12
0
def _filtSim(i, sims, outFolder, filterSel):
    name = _simName(sims[i].trajectory[0])
    directory = path.join(outFolder, name)
    if not path.exists(directory):
        makedirs(directory)

    logger.debug('Processing trajectory ' + name)

    fmolfile = path.join(outFolder, 'filtered.pdb')
    (traj, outtraj) = _renameSims(sims[i].trajectory, name, outFolder)
    if not traj:
        ftrajectory = _autoDetectTrajectories(path.join(outFolder, name))
        numframes = _getNumFrames(sims[i], ftrajectory)
        return Sim(simid=sims[i].simid, parent=sims[i], input=None, trajectory=ftrajectory, molfile=fmolfile, numframes=numframes)

    try:
        from htmd.molecule.molecule import Molecule
        mol = Molecule(sims[i].molfile)
    except:
        logger.warning('Error! Skipping simulation ' + name)
        return

    sel = mol.atomselect(filterSel)

    for j in range(0, len(traj)):
        try:
            mol.read(traj[j])
        except IOError as e:
            logger.warning('{}, skipping trajectory'.format(e))
            break

        mol.write(outtraj[j], sel)

    ftrajectory = _autoDetectTrajectories(path.join(outFolder, name))
    numframes = _getNumFrames(sims[i], ftrajectory)
    return Sim(simid=sims[i].simid, parent=sims[i], input=None, trajectory=ftrajectory, molfile=fmolfile, numframes=numframes)
Exemplo n.º 13
0
def build(mol, topo=None, param=None, stream=None, prefix='structure', outdir='./build', caps=None, ionize=True, saltconc=0,
          saltanion=None, saltcation=None, disulfide=None, patches=None, noregen=None, psfgen=None, execute=True, _clean=True):
    """ Builds a system for CHARMM

    Uses VMD and psfgen to build a system for CHARMM. Additionally it allows for ionization and adding of disulfide bridges.

    Parameters
    ----------
    mol : :class:`Molecule <htmd.molecule.molecule.Molecule>` object
        The Molecule object containing the system
    topo : list of str
        A list of topology `rtf` files.
        Use :func:`charmm.listFiles <htmd.builder.charmm.listFiles>` to get a list of available topology files.
        Default: ['top/top_all36_prot.rtf', 'top/top_all36_lipid.rtf', 'top/top_water_ions.rtf']
    param : list of str
        A list of parameter `prm` files.
        Use :func:`charmm.listFiles <htmd.builder.charmm.listFiles>` to get a list of available parameter files.
        Default: ['par/par_all36_prot_mod.prm', 'par/par_all36_lipid.prm', 'par/par_water_ions.prm']
    stream : list of str
        A list of stream `str` files containing topologies and parameters.
        Use :func:`charmm.listFiles <htmd.builder.charmm.listFiles>` to get a list of available stream files.
        Default: ['str/prot/toppar_all36_prot_arg0.str']
    prefix : str
        The prefix for the generated pdb and psf files
    outdir : str
        The path to the output directory
        Default: './build'
    caps : dict
        A dictionary with keys segids and values lists of strings describing the caps of that segment.
        e.g. caps['P'] = ['first ACE', 'last CT3'] or caps['P'] = ['first none', 'last none']. 
        Default: will apply ACE and CT3 caps to proteins and none caps to the rest.
    ionize : bool
        Enable or disable ionization
    saltconc : float
        Salt concentration (in Molar) to add to the system after neutralization.
    saltanion : {'CLA'}
        The anion type. Please use only CHARMM ion atom names.
    saltcation : {'SOD', 'MG', 'POT', 'CES', 'CAL', 'ZN2'}
        The cation type. Please use only CHARMM ion atom names.
    disulfide : list of :class:`DisulfideBridge <htmd.builder.builder.DisulfideBridge>` objects
        If None it will guess disulfide bonds. Otherwise provide a list of `DisulfideBridge` objects.
    patches : list of str
        Any further patches the user wants to apply
    noregen : list of str
        A list of patches that must not be regenerated (angles and dihedrals)
        Default: ['FHEM', 'PHEM', 'PLOH', 'PLO2', 'PLIG', 'PSUL']
    psfgen : str
        Path to psfgen executable used to build for CHARMM
    execute : bool
        Disable building. Will only write out the input script needed by psfgen. Does not include ionization.

    Returns
    -------
    molbuilt : :class:`Molecule <htmd.molecule.molecule.Molecule>` object
        The built system in a Molecule object

    Example
    -------
    >>> from htmd import *
    >>> mol = Molecule("3PTB")
    >>> mol.filter("not resname BEN")
    >>> mol.renumberResidues()
    >>> molbuilt = charmm.build(mol, outdir='/tmp/build', ionize=False)  # doctest: +ELLIPSIS
    Bond between A: [serial 185 resid 42 resname CYS chain A segid 0]
                 B: [serial 298 resid 58 resname CYS chain A segid 0]...
    >>> # More complex example
    >>> topos  = ['top/top_all36_prot.rtf', './benzamidine.rtf', 'top/top_water_ions.rtf']
    >>> params = ['par/par_all36_prot_mod.prm', './benzamidine.prm', 'par/par_water_ions.prm']
    >>> disu = [DisulfideBridge('P', 157, 'P', 13), DisulfideBridge('K', 1, 'K', 25)]
    >>> molbuilt = charmm.build(mol, topo=topos, param=params, outdir='/tmp/build', saltconc=0.15, disulfide=disu)  # doctest: +SKIP
    """

    mol = mol.copy()
    _missingSegID(mol)
    _checkMixedSegment(mol)
    _checkResidueInsertions(mol)
    if psfgen is None:
        psfgen = shutil.which('psfgen', mode=os.X_OK)
        if not psfgen:
            raise FileNotFoundError('Could not find psfgen executable, or no execute permissions are given. '
                                    'Run `conda install psfgen`.')
    if not os.path.isdir(outdir):
        os.makedirs(outdir)
    if _clean:
        _cleanOutDir(outdir)
    if topo is None:
        topo = defaultTopo()
    if param is None:
        param = defaultParam()
    if stream is None:
        stream = defaultStream()
    if caps is None:
        caps = _defaultCaps(mol)
    # patches that must _not_ be regenerated
    if noregen is None:
        noregen = ['FHEM', 'PHEM', 'PLOH', 'PLO2', 'PLIG', 'PSUL']

    alltopo = topo.copy()
    allparam = param.copy()

    # Splitting the stream files and adding them to the list of parameter and topology files
    charmmdir = path.join(home(), 'builder', 'charmmfiles')
    for s in stream:
        if s[0] != '.' and path.isfile(path.join(charmmdir, s)):
            s = path.join(charmmdir, s)
        outrtf, outprm = _prepareStream(s)
        alltopo.append(outrtf)
        allparam.append(outprm)

    #_missingChain(mol)
    #_checkProteinGaps(mol)
    if patches is None:
        patches = []
    if isinstance(patches, str):
        patches = [patches]
    allpatches = []
    allpatches += patches
    # Find protonated residues and add patches for them
    allpatches += _protonationPatches(mol)

    f = open(path.join(outdir, 'build.vmd'), 'w')
    f.write('# psfgen file generated by charmm.build\n')
    f.write('package require psfgen;\n')
    f.write('psfcontext reset;\n\n')

    # Copying and printing out the topologies
    if not path.exists(path.join(outdir, 'topologies')):
        os.makedirs(path.join(outdir, 'topologies'))
    for i in range(len(alltopo)):
        if alltopo[i][0] != '.' and path.isfile(path.join(charmmdir, alltopo[i])):
            alltopo[i] = path.join(charmmdir, alltopo[i])
        localname = '{}.'.format(i) + path.basename(alltopo[i])
        shutil.copy(alltopo[i], path.join(outdir, 'topologies', localname))
        f.write('topology ' + path.join('topologies', localname) + '\n')
    f.write('\n')

    _printAliases(f)

    # Printing out segments
    if not path.exists(path.join(outdir, 'segments')):
        os.makedirs(path.join(outdir, 'segments'))
    logger.info('Writing out segments.')
    segments = _getSegments(mol)
    wateratoms = mol.atomselect('water')
    for seg in segments:
        pdbname = 'segment' + seg + '.pdb'
        segatoms = mol.segid == seg
        mol.write(path.join(outdir, 'segments', pdbname), sel=segatoms)

        segwater = wateratoms & segatoms
        f.write('segment ' + seg + ' {\n')
        if np.all(segatoms == segwater):  # If segment only contains waters, set: auto none
            f.write('\tauto none\n')
        f.write('\tpdb ' + path.join('segments', pdbname) + '\n')
        if caps is not None and seg in caps:
            for c in caps[seg]:
                f.write('\t' + c + '\n')
        f.write('}\n')
        f.write('coordpdb ' + path.join('segments', pdbname) + ' ' + seg + '\n\n')

    # Printing out patches for the disulfide bridges
    if disulfide is None:
        disulfide = detectDisulfideBonds(mol)

    if len(disulfide) != 0:
        for d in disulfide:
            f.write('patch DISU {}:{} {}:{}\n'.format(d.segid1, d.resid1, d.segid2, d.resid2))
        f.write('\n')

    noregenpatches = [p for p in allpatches if p.split()[1] in noregen]
    regenpatches = [p for p in allpatches if p.split()[1] not in noregen]

    # Printing regenerable patches
    if len(regenpatches) != 0:
        for p in regenpatches:
            f.write(p + '\n')
        f.write('\n')

    # Regenerate angles and dihedrals
    f.write('regenerate angles dihedrals\n')
    f.write('\n')

    # Printing non-regenerable patches
    if len(noregenpatches) != 0:
        for p in noregenpatches:
            f.write(p + '\n')
        f.write('\n')

    f.write('guesscoord\n')
    f.write('writepsf ' + prefix + '.psf\n')
    f.write('writepdb ' + prefix + '.pdb\n')
    #f.write('quit\n')
    f.close()

    if allparam is not None:
        combine(allparam, path.join(outdir, 'parameters'))

    molbuilt = None
    if execute:
        logpath = os.path.abspath('{}/log.txt'.format(outdir))
        logger.info('Starting the build.')
        currdir = os.getcwd()
        os.chdir(outdir)
        f = open(logpath, 'w')
        #call([vmd, '-dispdev', 'text', '-e', './build.vmd'], stdout=f)
        call([psfgen, './build.vmd'], stdout=f)
        f.close()
        errors = _logParser(logpath)
        os.chdir(currdir)
        if errors:
            raise BuildError(errors + ['Check {} for further information on errors in building.'.format(logpath)])
        logger.info('Finished building.')

        if path.isfile(path.join(outdir, 'structure.pdb')) and path.isfile(path.join(outdir, 'structure.psf')):
            molbuilt = Molecule(path.join(outdir, 'structure.pdb'))
            molbuilt.read(path.join(outdir, 'structure.psf'))
        else:
            raise BuildError('No structure pdb/psf file was generated. Check {} for errors in building.'.format(logpath))

        if ionize:
            os.makedirs(path.join(outdir, 'pre-ionize'))
            data = glob(path.join(outdir, '*'))
            for f in data:
                shutil.move(f, path.join(outdir, 'pre-ionize'))
            totalcharge = np.sum(molbuilt.charge)
            nwater = np.sum(molbuilt.atomselect('water and noh'))
            anion, cation, anionatom, cationatom, nanion, ncation = ionizef(totalcharge, nwater, saltconc=saltconc, ff='charmm', anion=saltanion, cation=saltcation)
            newmol = ionizePlace(mol, anion, cation, anionatom, cationatom, nanion, ncation)
            # Redo the whole build but now with ions included
            return build(newmol, topo=alltopo, param=allparam, stream=[], prefix=prefix, outdir=outdir, ionize=False, caps=caps,
                         execute=execute, saltconc=saltconc, disulfide=disulfide, patches=patches, noregen=noregen, psfgen=psfgen, _clean=False)
    _checkFailedAtoms(molbuilt)
    _recoverProtonations(molbuilt)
    return molbuilt
Exemplo n.º 14
0
def build(mol,
          ff=None,
          topo=None,
          param=None,
          prefix='structure',
          outdir='./build',
          caps=None,
          ionize=True,
          saltconc=0,
          saltanion=None,
          saltcation=None,
          disulfide=None,
          tleap=None,
          execute=True,
          atomtypes=None,
          offlibraries=None,
          gbsa=False,
          igb=2):
    """ Builds a system for AMBER

    Uses tleap to build a system for AMBER. Additionally it allows the user to ionize and add disulfide bridges.

    Parameters
    ----------
    mol : :class:`Molecule <htmd.molecule.molecule.Molecule>` object
        The Molecule object containing the system
    ff : list of str
        A list of leaprc forcefield files.
        Use :func:`amber.listFiles <htmd.builder.amber.listFiles>` to get a list of available forcefield files.
        Default: :func:`amber.defaultFf <htmd.builder.amber.defaultFf>`
    topo : list of str
        A list of topology `prepi/prep/in` files.
        Use :func:`amber.listFiles <htmd.builder.amber.listFiles>` to get a list of available topology files.
        Default: :func:`amber.defaultTopo <htmd.builder.amber.defaultTopo>`
    param : list of str
        A list of parameter `frcmod` files.
        Use :func:`amber.listFiles <htmd.builder.amber.listFiles>` to get a list of available parameter files.
        Default: :func:`amber.defaultParam <htmd.builder.amber.defaultParam>`
    prefix : str
        The prefix for the generated pdb and psf files
    outdir : str
        The path to the output directory
        Default: './build'
    caps : dict
        A dictionary with keys segids and values lists of strings describing the caps for a particular protein segment.
        e.g. caps['P'] = ['ACE', 'NME'] or caps['P'] = ['none', 'none']. Default: will apply ACE and NME caps to every
        protein segment.
    ionize : bool
        Enable or disable ionization
    saltconc : float
        Salt concentration to add to the system after neutralization.
    saltanion : {'Cl-'}
        The anion type. Please use only AMBER ion atom names.
    saltcation : {'Na+', 'K+', 'Cs+'}
        The cation type. Please use only AMBER ion atom names.
    disulfide : list of pairs of atomselection strings
        If None it will guess disulfide bonds. Otherwise provide a list pairs of atomselection strings for each pair of
        residues forming the disulfide bridge.
    tleap : str
        Path to tleap executable used to build the system for AMBER
    execute : bool
        Disable building. Will only write out the input script needed by tleap. Does not include ionization.
    atomtypes : list of triplets
        Custom atom types defined by the user as ('type', 'element', 'hybrid') triplets
        e.g. (('C1', 'C', 'sp2'), ('CI', 'C', 'sp3')). Check `addAtomTypes` in AmberTools docs.
    offlibraries : str or list
        A path or a list of paths to OFF library files. Check `loadOFF` in AmberTools docs.
    gbsa : bool
        Modify radii for GBSA implicit water model
    igb : int
        GB model. Select: 1 for mbondi, 2 and 5 for mbondi2, 7 for bondi and 8 for mbondi3.
        Check section 4. The Generalized Born/Surface Area Model of the AMBER manual.

    Returns
    -------
    molbuilt : :class:`Molecule <htmd.molecule.molecule.Molecule>` object
        The built system in a Molecule object

    Example
    -------
    >>> from htmd.ui import *  # doctest: +SKIP
    >>> mol = Molecule("3PTB")
    >>> molbuilt = amber.build(mol, outdir='/tmp/build')  # doctest: +SKIP
    >>> # More complex example
    >>> disu = [['segid P and resid 157', 'segid P and resid 13'], ['segid K and resid 1', 'segid K and resid 25']]
    >>> molbuilt = amber.build(mol, outdir='/tmp/build', saltconc=0.15, disulfide=disu)  # doctest: +SKIP
    """
    # Remove pdb protein bonds as they can be regenerated by tleap. Keep non-protein bonds i.e. for ligands
    mol = mol.copy()
    _removeProteinBonds(mol)

    if tleap is None:
        tleap = _findTleap()
    else:
        if shutil.which(tleap) is None:
            raise NameError(
                'Could not find executable: `{}` in the PATH. Cannot build for AMBER.'
                .format(tleap))

    if not os.path.isdir(outdir):
        os.makedirs(outdir)
    _cleanOutDir(outdir)
    if ff is None:
        ff = defaultFf()
    if topo is None:
        topo = defaultTopo()
    if param is None:
        param = defaultParam()
    if caps is None:
        caps = _defaultProteinCaps(mol)

    _missingSegID(mol)
    _checkMixedSegment(mol)

    mol = _charmmLipid2Amber(mol)

    _applyProteinCaps(mol, caps)

    f = open(os.path.join(outdir, 'tleap.in'), 'w')
    f.write('# tleap file generated by amber.build\n')

    # Printing out the forcefields
    if isinstance(ff, str):
        ff = [ff]
    for i, force in enumerate(ff):
        if not os.path.isfile(force):
            force = _locateFile(force, 'ff', tleap)
            if force is None:
                continue
        newname = 'ff{}_{}'.format(i, os.path.basename(force))
        shutil.copy(force, os.path.join(outdir, newname))
        f.write('source {}\n'.format(newname))
    f.write('\n')

    if gbsa:
        gbmodels = {
            1: 'mbondi',
            2: 'mbondi2',
            5: 'mbondi2',
            7: 'bondi',
            8: 'mbondi3'
        }
        f.write('set default PBradii {}\n\n'.format(gbmodels[igb]))

    # Adding custom atom types
    if atomtypes is not None:
        atomtypes = ensurelist(tocheck=atomtypes[0], tomod=atomtypes)
        f.write('addAtomTypes {\n')
        for at in atomtypes:
            if len(at) != 3:
                raise RuntimeError(
                    'Atom type definitions have to be triplets. Check the AMBER documentation.'
                )
            f.write('    {{ "{}" "{}" "{}" }}\n'.format(at[0], at[1], at[2]))
        f.write('}\n\n')

    # Loading OFF libraries
    if offlibraries is not None:
        offlibraries = ensurelist(offlibraries)
        for off in offlibraries:
            if not os.path.isfile(off):
                raise RuntimeError(
                    'Could not find off-library in location {}'.format(off))
            newname = 'offlib{}_{}'.format(i, os.path.basename(off))
            shutil.copy(off, os.path.join(outdir, newname))
            f.write('loadoff {}\n'.format(newname))

    # Loading frcmod parameters
    f.write('# Loading parameter files\n')
    for i, p in enumerate(param):
        if not os.path.isfile(p):
            p = _locateFile(p, 'param', tleap)
            if p is None:
                continue
        newname = 'param{}_{}'.format(i, os.path.basename(p))
        shutil.copy(p, os.path.join(outdir, newname))
        f.write('loadamberparams {}\n'.format(newname))
    f.write('\n')

    # Loading prepi topologies
    f.write('# Loading prepi topologies\n')
    for i, t in enumerate(topo):
        if not os.path.isfile(t):
            t = _locateFile(t, 'topo', tleap)
            if t is None:
                continue
        newname = 'topo{}_{}'.format(i, os.path.basename(t))
        shutil.copy(t, os.path.join(outdir, newname))
        f.write('loadamberprep {}\n'.format(newname))
    f.write('\n')

    f.write('# Loading the system\n')
    f.write('mol = loadpdb input.pdb\n\n')

    if np.sum(mol.atomtype != '') != 0:
        logger.debug('Writing mol2 files for input to tleap.')
        segs = np.unique(mol.segid[mol.atomtype != ''])
        combstr = 'mol = combine {mol'
        for s in segs:
            name = 'segment{}'.format(s)
            mol2name = os.path.join(outdir, '{}.mol2'.format(name))
            mol.write(mol2name, (mol.atomtype != '') & (mol.segid == s))
            if not os.path.isfile(mol2name):
                raise NameError(
                    'Could not write a mol2 file out of the given Molecule.')
            f.write('# Loading the rest of the system\n')
            f.write('{} = loadmol2 {}.mol2\n\n'.format(name, name))
            combstr += ' {}'.format(name)
        combstr += '}\n\n'
        f.write(combstr)

    # Write patches for disulfide bonds (only after ionizing)
    if not ionize:
        # TODO: Remove this once we deprecate the class
        from htmd.builder.builder import DisulfideBridge
        from htmd.molecule.molecule import UniqueResidueID
        if disulfide is not None and len(disulfide) != 0 and isinstance(
                disulfide[0], DisulfideBridge):
            newdisu = []
            for d in disulfide:
                r1 = UniqueResidueID.fromMolecule(
                    mol, 'resid {} and segname {}'.format(d.resid1, d.segid1))
                r2 = UniqueResidueID.fromMolecule(
                    mol, 'resid {} and segname {}'.format(d.resid2, d.segid2))
                newdisu.append([r1, r2])
            disulfide = newdisu
        # TODO: Remove up to here ----------------------

        if disulfide is not None and len(disulfide) != 0 and isinstance(
                disulfide[0][0], str):
            disulfide = convertDisulfide(mol, disulfide)

        if disulfide is None:
            logger.info('Detecting disulfide bonds.')
            disulfide = detectDisulfideBonds(mol)

        # Fix structure to match the disulfide patching
        if len(disulfide) != 0:
            torem = np.zeros(mol.numAtoms, dtype=bool)
            f.write('# Adding disulfide bonds\n')
            for d in disulfide:
                # Rename the residues to CYX if there is a disulfide bond
                atoms1 = d[0].selectAtoms(mol, indexes=False)
                atoms2 = d[1].selectAtoms(mol, indexes=False)
                mol.resname[atoms1] = 'CYX'
                mol.resname[atoms2] = 'CYX'
                # Remove (eventual) HG hydrogens on these CYS (from proteinPrepare)
                torem |= (atoms1 & (mol.name == 'HG')) | (atoms2 &
                                                          (mol.name == 'HG'))
                # Convert to stupid amber residue numbering
                uqseqid = sequenceID(
                    (mol.resid, mol.insertion, mol.segid)) + mol.resid[0]
                uqres1 = int(np.unique(uqseqid[atoms1]))
                uqres2 = int(np.unique(uqseqid[atoms2]))
                f.write('bond mol.{}.SG mol.{}.SG\n'.format(uqres1, uqres2))
            f.write('\n')
            mol.remove(torem, _logger=False)

    f.write('# Writing out the results\n')
    f.write('saveamberparm mol ' + prefix + '.prmtop ' + prefix + '.crd\n')
    f.write('quit')
    f.close()

    # Printing and loading the PDB file. AMBER can work with a single PDB file if the segments are separate by TER
    logger.debug('Writing PDB file for input to tleap.')
    pdbname = os.path.join(outdir, 'input.pdb')

    # mol2 files have atomtype, here we only write parts not coming from mol2
    # We need to write the input.pdb at the end since we modify the resname for disulfide bridges in mol
    mol.write(pdbname, mol.atomtype == '')
    if not os.path.isfile(pdbname):
        raise NameError(
            'Could not write a PDB file out of the given Molecule.')

    molbuilt = None
    if execute:
        # Source paths of extra dirs (our dirs, not amber default)
        htmdamberdir = os.path.abspath(
            os.path.join(home(), 'builder', 'amberfiles'))
        sourcepaths = [htmdamberdir]
        sourcepaths += [
            os.path.join(htmdamberdir, os.path.dirname(f)) for f in ff
            if os.path.isfile(os.path.join(htmdamberdir, f))
        ]
        extrasource = []
        for p in sourcepaths:
            extrasource.append('-I')
            extrasource.append('{}'.format(p))
        logpath = os.path.abspath(os.path.join(outdir, 'log.txt'))
        logger.info('Starting the build.')
        currdir = os.getcwd()
        os.chdir(outdir)
        f = open(logpath, 'w')
        try:
            cmd = [tleap, '-f', './tleap.in']
            cmd[1:1] = extrasource
            call(cmd, stdout=f)
        except:
            raise NameError('tleap failed at execution')
        f.close()
        errors = _logParser(logpath)
        os.chdir(currdir)
        if errors:
            raise BuildError(errors + [
                'Check {} for further information on errors in building.'.
                format(logpath)
            ])
        logger.info('Finished building.')

        if os.path.exists(os.path.join(outdir, 'structure.crd')) and \
                        os.path.getsize(os.path.join(outdir, 'structure.crd')) != 0 and \
                        os.path.getsize(os.path.join(outdir, 'structure.prmtop')) != 0:
            molbuilt = Molecule(os.path.join(outdir, 'structure.prmtop'))
            molbuilt.read(os.path.join(outdir, 'structure.crd'))
        else:
            raise BuildError(
                'No structure pdb/prmtop file was generated. Check {} for errors in building.'
                .format(logpath))

        if ionize:
            shutil.move(os.path.join(outdir, 'structure.crd'),
                        os.path.join(outdir, 'structure.noions.crd'))
            shutil.move(os.path.join(outdir, 'structure.prmtop'),
                        os.path.join(outdir, 'structure.noions.prmtop'))
            totalcharge = np.sum(molbuilt.charge)
            nwater = np.sum(molbuilt.atomselect('water and noh'))
            anion, cation, anionatom, cationatom, nanion, ncation = ionizef(
                totalcharge,
                nwater,
                saltconc=saltconc,
                anion=saltanion,
                cation=saltcation)
            newmol = ionizePlace(mol, anion, cation, anionatom, cationatom,
                                 nanion, ncation)
            # Redo the whole build but now with ions included
            return build(newmol,
                         ff=ff,
                         topo=topo,
                         param=param,
                         prefix=prefix,
                         outdir=outdir,
                         caps={},
                         ionize=False,
                         execute=execute,
                         saltconc=saltconc,
                         disulfide=disulfide,
                         tleap=tleap,
                         atomtypes=atomtypes,
                         offlibraries=offlibraries)
    tmpbonds = molbuilt.bonds
    molbuilt.bonds = []  # Removing the bonds to speed up writing
    molbuilt.write(os.path.join(outdir, 'structure.pdb'))
    molbuilt.bonds = tmpbonds  # Restoring the bonds
    return molbuilt
Exemplo n.º 15
0
def build(mol, ff=None, topo=None, param=None, prefix='structure', outdir='./', caps=None, ionize=True, saltconc=0,
          saltanion=None, saltcation=None, disulfide=None, tleap='tleap', execute=True):
    """ Builds a system for AMBER

    Uses tleap to build a system for AMBER. Additionally it allows the user to ionize and add disulfide bridges.

    Parameters
    ----------
    mol : :class:`Molecule <htmd.molecule.molecule.Molecule>` object
        The Molecule object containing the system
    ff : list of str
        A list of leaprc forcefield files. Default: ['leaprc.lipid14', 'leaprc.ff14SB', 'leaprc.gaff']
    topo : list of str
        A list of topology `prepi` files.
    param : list of str
        A list of parameter `frcmod` files.
    prefix : str
        The prefix for the generated pdb and psf files
    outdir : str
        The path to the output directory
    caps : dict
        A dictionary with keys segids and values lists of strings describing the caps of that segment.
        e.g. caps['P'] = ['ACE', 'NME']. Default: will apply ACE and NME caps to proteins and no caps
        to the rest.
    ionize : bool
        Enable or disable ionization
    saltconc : float
        Salt concentration to add to the system after neutralization.
    saltanion : {'Cl-'}
        The anion type. Please use only AMBER ion atom names.
    saltcation : {'Na+', 'K+', 'Cs+'}
        The cation type. Please use only AMBER ion atom names.
    disulfide : np.ndarray
        If None it will guess disulfide bonds. Otherwise provide a 2D array where each row is a pair of atom indexes that makes a disulfide bond
    tleap : str
        Path to tleap executable used to build the system for AMBER
    execute : bool
        Disable building. Will only write out the input script needed by tleap. Does not include ionization.

    Returns
    -------
    molbuilt : :class:`Molecule <htmd.molecule.molecule.Molecule>` object
        The built system in a Molecule object

    Example
    -------
    >>> ffs = ['leaprc.lipid14', 'leaprc.ff14SB', 'leaprc.gaff']
    >>> molbuilt = amber.build(mol, ff=ffs, outdir='/tmp/build', saltconc=0.15)
    """
    # Remove pdb bonds!
    mol = mol.copy()
    mol.bonds = []
    if shutil.which(tleap) is None:
        raise NameError('Could not find executable: `' + tleap + '` in the PATH. Cannot build for AMBER.')
    if not os.path.isdir(outdir):
        os.makedirs(outdir)
    _cleanOutDir(outdir)
    if ff is None:
        ff = ['leaprc.lipid14', 'leaprc.ff14SB', 'leaprc.gaff']
    if topo is None:
        topo = []
    if param is None:
        param = []
    if caps is None:
        caps = _defaultCaps(mol)

    _missingSegID(mol)
    _checkMixedSegment(mol)

    logger.info('Converting CHARMM membranes to AMBER.')
    mol = _charmmLipid2Amber(mol)

    #_checkProteinGaps(mol)
    _applyCaps(mol, caps)

    f = open(path.join(outdir, 'tleap.in'), 'w')
    f.write('# tleap file generated by amber.build\n')

    # Printing out the forcefields
    for force in ff:
        f.write('source ' + force + '\n')
    f.write('\n')

    # Loading TIP3P water parameters
    f.write('# Loading ions and TIP3P water parameters\n')
    f.write('loadamberparams frcmod.ionsjc_tip3p\n\n')

    # Printing out topologies
    logger.info('Writing prepi files.')
    f.write('# Loading prepi topologies\n')
    for t in topo:
        shutil.copy(t, outdir)
        f.write('loadamberprep ' + path.basename(t) + '\n')
    f.write('\n')

    # Printing and loading the PDB file. AMBER can work with a single PDB file if the segments are separate by TER
    logger.info('Writing PDB file for input to tleap.')
    pdbname = path.join(outdir, 'input.pdb')
    mol.write(pdbname)
    if not os.path.isfile(pdbname):
        raise NameError('Could not write a PDB file out of the given Molecule.')
    f.write('# Loading the system\n')
    f.write('mol = loadpdb input.pdb\n\n')

    # Printing out patches for the disulfide bridges
    '''if disulfide is None and not ionize:
        logger.info('Detecting disulfide bonds.')
        disulfide = detectDisulfideBonds(mol)

    if not ionize and len(disulfide) != 0:  # Only make disu bonds after ionizing!
        f.write('# Adding disulfide bonds\n')
        for d in disulfide:
            # Convert to stupid amber residue numbering
            uqseqid = sequenceID(mol.resid)
            uqres1 = int(np.unique(uqseqid[mol.atomselect('segid {} and resid {}'.format(d.segid1, d.resid1))]))
            uqres2 = int(np.unique(uqseqid[mol.atomselect('segid {} and resid {}'.format(d.segid2, d.resid2))]))
            # Rename the CYS to CYX if there is a disulfide bond
            mol.set('resname', 'CYX', sel='segid {} and resid {}'.format(d.segid1, d.resid1))
            mol.set('resname', 'CYX', sel='segid {} and resid {}'.format(d.segid2, d.resid2))
            f.write('bond mol.{}.SG mol.{}.SG\n'.format(uqres1, uqres2))
        f.write('\n')'''

    f.write('# Writing out the results\n')
    f.write('savepdb mol ' + prefix + '.pdb\n')
    f.write('saveamberparm mol ' + prefix + '.prmtop ' + prefix + '.crd\n')
    f.write('quit')
    f.close()

    molbuilt = None
    if execute:
        logpath = os.path.abspath('{}/log.txt'.format(outdir))
        logger.info('Starting the build.')
        currdir = os.getcwd()
        os.chdir(outdir)
        f = open(logpath, 'w')
        try:
            call([tleap, '-f', './tleap.in'], stdout=f)
        except:
            raise NameError('tleap failed at execution')
        f.close()
        os.chdir(currdir)
        logger.info('Finished building.')

        if path.getsize(path.join(outdir, 'structure.pdb')) != 0 and path.getsize(path.join(outdir, 'structure.prmtop')) != 0:
            molbuilt = Molecule(path.join(outdir, 'structure.pdb'))
            molbuilt.read(path.join(outdir, 'structure.prmtop'))
            molbuilt.bonds = []  # Causes problems in ionization mol.remove and mol._removeBonds
        else:
            raise NameError('No structure pdb/prmtop file was generated. Check {} for errors in building.'.format(logpath))

        if ionize:
            shutil.move(path.join(outdir, 'structure.pdb'), path.join(outdir, 'structure.noions.pdb'))
            shutil.move(path.join(outdir, 'structure.crd'), path.join(outdir, 'structure.noions.crd'))
            shutil.move(path.join(outdir, 'structure.prmtop'), path.join(outdir, 'structure.noions.prmtop'))
            totalcharge = np.sum(molbuilt.charge)
            nwater = np.sum(molbuilt.atomselect('water and noh'))
            anion, cation, anionatom, cationatom, nanion, ncation = ionizef(totalcharge, nwater, saltconc=saltconc, ff='amber', anion=saltanion, cation=saltcation)
            newmol = ionizePlace(molbuilt, anion, cation, anionatom, cationatom, nanion, ncation)
            # Redo the whole build but now with ions included
            return build(newmol, ff=ff, topo=topo, param=param, prefix=prefix, outdir=outdir, caps={}, ionize=False,
                         execute=execute, saltconc=saltconc, disulfide=disulfide, tleap=tleap)
    return molbuilt
Exemplo n.º 16
0
def build(mol,
          ff=None,
          topo=None,
          param=None,
          prefix='structure',
          outdir='./',
          caps=None,
          ionize=True,
          saltconc=0,
          saltanion=None,
          saltcation=None,
          disulfide=None,
          tleap='tleap',
          execute=True):
    """ Builds a system for AMBER

    Uses tleap to build a system for AMBER. Additionally it allows the user to ionize and add disulfide bridges.

    Parameters
    ----------
    mol : :class:`Molecule <htmd.molecule.molecule.Molecule>` object
        The Molecule object containing the system
    ff : list of str
        A list of leaprc forcefield files. Default: ['leaprc.lipid14', 'leaprc.ff14SB', 'leaprc.gaff']
    topo : list of str
        A list of topology `prepi` files.
    param : list of str
        A list of parameter `frcmod` files.
    prefix : str
        The prefix for the generated pdb and psf files
    outdir : str
        The path to the output directory
    caps : dict
        A dictionary with keys segids and values lists of strings describing the caps of that segment.
        e.g. caps['P'] = ['ACE', 'NME']. Default: will apply ACE and NME caps to proteins and no caps
        to the rest.
    ionize : bool
        Enable or disable ionization
    saltconc : float
        Salt concentration to add to the system after neutralization.
    saltanion : {'Cl-'}
        The anion type. Please use only AMBER ion atom names.
    saltcation : {'Na+', 'K+', 'Cs+'}
        The cation type. Please use only AMBER ion atom names.
    disulfide : np.ndarray
        If None it will guess disulfide bonds. Otherwise provide a 2D array where each row is a pair of atom indexes that makes a disulfide bond
    tleap : str
        Path to tleap executable used to build the system for AMBER
    execute : bool
        Disable building. Will only write out the input script needed by tleap. Does not include ionization.

    Returns
    -------
    molbuilt : :class:`Molecule <htmd.molecule.molecule.Molecule>` object
        The built system in a Molecule object

    Example
    -------
    >>> ffs = ['leaprc.lipid14', 'leaprc.ff14SB', 'leaprc.gaff']
    >>> molbuilt = amber.build(mol, ff=ffs, outdir='/tmp/build', saltconc=0.15)
    """
    # Remove pdb bonds!
    mol = mol.copy()
    mol.bonds = np.empty((0, 2), dtype=np.uint32)
    if shutil.which(tleap) is None:
        raise NameError('Could not find executable: `' + tleap +
                        '` in the PATH. Cannot build for AMBER.')
    if not os.path.isdir(outdir):
        os.makedirs(outdir)
    _cleanOutDir(outdir)
    if ff is None:
        ff = ['leaprc.lipid14', 'leaprc.ff14SB', 'leaprc.gaff']
    if topo is None:
        topo = []
    if param is None:
        param = []
    if caps is None:
        caps = _defaultCaps(mol)

    _missingSegID(mol)
    _checkMixedSegment(mol)

    logger.info('Converting CHARMM membranes to AMBER.')
    mol = _charmmLipid2Amber(mol)

    #_checkProteinGaps(mol)
    _applyCaps(mol, caps)

    f = open(path.join(outdir, 'tleap.in'), 'w')
    f.write('# tleap file generated by amber.build\n')

    # Printing out the forcefields
    if isinstance(ff, str):
        ff = [ff]
    for force in ff:
        f.write('source ' + force + '\n')
    f.write('\n')

    # Loading TIP3P water parameters
    f.write('# Loading ions and TIP3P water parameters\n')
    f.write('loadamberparams frcmod.ionsjc_tip3p\n\n')

    # Loading user parameters
    f.write('# Loading parameter files\n')
    for p in param:
        shutil.copy(p, outdir)
        f.write('loadamberparams ' + path.basename(p) + '\n')
    f.write('\n')

    # Printing out topologies
    f.write('# Loading prepi topologies\n')
    for t in topo:
        shutil.copy(t, outdir)
        f.write('loadamberprep ' + path.basename(t) + '\n')
    f.write('\n')

    # Printing and loading the PDB file. AMBER can work with a single PDB file if the segments are separate by TER
    logger.info('Writing PDB file for input to tleap.')
    pdbname = path.join(outdir, 'input.pdb')
    mol.write(pdbname)
    if not os.path.isfile(pdbname):
        raise NameError(
            'Could not write a PDB file out of the given Molecule.')
    f.write('# Loading the system\n')
    f.write('mol = loadpdb input.pdb\n\n')

    # Printing out patches for the disulfide bridges
    if disulfide is None and not ionize:
        logger.info('Detecting disulfide bonds.')
        disulfide = detectDisulfideBonds(mol)

    if not ionize and len(
            disulfide) != 0:  # Only make disu bonds after ionizing!
        f.write('# Adding disulfide bonds\n')
        for d in disulfide:
            # Convert to stupid amber residue numbering
            uqseqid = sequenceID(
                (mol.resid, mol.insertion, mol.segid)) + mol.resid[0] - 1
            uqres1 = int(
                np.unique(uqseqid[mol.atomselect(
                    'segid {} and resid {}'.format(d.segid1, d.resid1))]))
            uqres2 = int(
                np.unique(uqseqid[mol.atomselect(
                    'segid {} and resid {}'.format(d.segid2, d.resid2))]))
            # Rename the CYS to CYX if there is a disulfide bond
            mol.set('resname',
                    'CYX',
                    sel='segid {} and resid {}'.format(d.segid1, d.resid1))
            mol.set('resname',
                    'CYX',
                    sel='segid {} and resid {}'.format(d.segid2, d.resid2))
            f.write('bond mol.{}.SG mol.{}.SG\n'.format(uqres1, uqres2))
        f.write('\n')

    f.write('# Writing out the results\n')
    f.write('saveamberparm mol ' + prefix + '.prmtop ' + prefix + '.crd\n')
    f.write('quit')
    f.close()

    molbuilt = None
    if execute:
        # Source paths of extra dirs
        htmdamberdir = path.join(home(), 'builder', 'amberfiles')
        sourcepaths = [htmdamberdir]
        sourcepaths += [path.join(htmdamberdir, path.dirname(f)) for f in ff]
        extrasource = ''
        for p in sourcepaths:
            extrasource += '-I {} '.format(p)
        logpath = os.path.abspath('{}/log.txt'.format(outdir))
        logger.info('Starting the build.')
        currdir = os.getcwd()
        os.chdir(outdir)
        f = open(logpath, 'w')
        try:
            call([tleap, extrasource, '-f', './tleap.in'], stdout=f)
        except:
            raise NameError('tleap failed at execution')
        f.close()
        os.chdir(currdir)
        logger.info('Finished building.')

        if path.getsize(path.join(
                outdir, 'structure.crd')) != 0 and path.getsize(
                    path.join(outdir, 'structure.prmtop')) != 0:
            molbuilt = Molecule(path.join(outdir, 'structure.prmtop'))
            molbuilt.read(path.join(outdir, 'structure.crd'))
        else:
            raise NameError(
                'No structure pdb/prmtop file was generated. Check {} for errors in building.'
                .format(logpath))

        if ionize:
            shutil.move(path.join(outdir, 'structure.crd'),
                        path.join(outdir, 'structure.noions.crd'))
            shutil.move(path.join(outdir, 'structure.prmtop'),
                        path.join(outdir, 'structure.noions.prmtop'))
            totalcharge = np.sum(molbuilt.charge)
            nwater = np.sum(molbuilt.atomselect('water and noh'))
            anion, cation, anionatom, cationatom, nanion, ncation = ionizef(
                totalcharge,
                nwater,
                saltconc=saltconc,
                ff='amber',
                anion=saltanion,
                cation=saltcation)
            newmol = ionizePlace(mol, anion, cation, anionatom, cationatom,
                                 nanion, ncation)
            # Redo the whole build but now with ions included
            return build(newmol,
                         ff=ff,
                         topo=topo,
                         param=param,
                         prefix=prefix,
                         outdir=outdir,
                         caps={},
                         ionize=False,
                         execute=execute,
                         saltconc=saltconc,
                         disulfide=disulfide,
                         tleap=tleap)
    molbuilt.write(path.join(outdir, 'structure.pdb'))
    return molbuilt
Exemplo n.º 17
0
def build(mol, ff=None, topo=None, param=None, prefix='structure', outdir='./build', caps=None, ionize=True, saltconc=0,
          saltanion=None, saltcation=None, disulfide=None, tleap='tleap', execute=True):
    """ Builds a system for AMBER

    Uses tleap to build a system for AMBER. Additionally it allows the user to ionize and add disulfide bridges.

    Parameters
    ----------
    mol : :class:`Molecule <htmd.molecule.molecule.Molecule>` object
        The Molecule object containing the system
    ff : list of str
        A list of leaprc forcefield files. Default: ['leaprc.lipid14', 'leaprc.ff14SB', 'leaprc.gaff']
    topo : list of str
        A list of topology `prepi` files.
    param : list of str
        A list of parameter `frcmod` files.
    prefix : str
        The prefix for the generated pdb and psf files
    outdir : str
        The path to the output directory
        Default: './build'
    caps : dict
        A dictionary with keys segids and values lists of strings describing the caps for a particular protein segment.
        e.g. caps['P'] = ['ACE', 'NME'] or caps['P'] = ['none', 'none']. Default: will apply ACE and NME caps to every
        protein segment.
    ionize : bool
        Enable or disable ionization
    saltconc : float
        Salt concentration to add to the system after neutralization.
    saltanion : {'Cl-'}
        The anion type. Please use only AMBER ion atom names.
    saltcation : {'Na+', 'K+', 'Cs+'}
        The cation type. Please use only AMBER ion atom names.
    disulfide : np.ndarray
        If None it will guess disulfide bonds. Otherwise provide a 2D array where each row is a pair of atom indexes that makes a disulfide bond
    tleap : str
        Path to tleap executable used to build the system for AMBER
    execute : bool
        Disable building. Will only write out the input script needed by tleap. Does not include ionization.

    Returns
    -------
    molbuilt : :class:`Molecule <htmd.molecule.molecule.Molecule>` object
        The built system in a Molecule object

    Example
    -------
    >>> ffs = ['leaprc.lipid14', 'leaprc.ff14SB', 'leaprc.gaff']
    >>> molbuilt = amber.build(mol, ff=ffs, outdir='/tmp/build', saltconc=0.15)
    """
    # Remove pdb bonds!
    mol = mol.copy()
    mol.bonds = np.empty((0, 2), dtype=np.uint32)
    if shutil.which(tleap) is None:
        raise NameError('Could not find executable: `' + tleap + '` in the PATH. Cannot build for AMBER.')
    if not os.path.isdir(outdir):
        os.makedirs(outdir)
    _cleanOutDir(outdir)
    if ff is None:
        ff = ['leaprc.lipid14', 'leaprc.ff14SB', 'leaprc.gaff']
    if topo is None:
        topo = []
    if param is None:
        param = []
    if caps is None:
        caps = _defaultProteinCaps(mol)

    _missingSegID(mol)
    _checkMixedSegment(mol)

    logger.info('Converting CHARMM membranes to AMBER.')
    mol = _charmmLipid2Amber(mol)

    #_checkProteinGaps(mol)
    _applyProteinCaps(mol, caps)

    f = open(path.join(outdir, 'tleap.in'), 'w')
    f.write('# tleap file generated by amber.build\n')

    # Printing out the forcefields
    if isinstance(ff, str):
        ff = [ff]
    for force in ff:
        f.write('source ' + force + '\n')
    f.write('\n')

    # Loading TIP3P water parameters
    f.write('# Loading ions and TIP3P water parameters\n')
    f.write('loadamberparams frcmod.ionsjc_tip3p\n\n')

    # Loading user parameters
    f.write('# Loading parameter files\n')
    for p in param:
        try:
            shutil.copy(p, outdir)
            f.write('loadamberparams ' + path.basename(p) + '\n')
        except:
            f.write('loadamberparams ' + p + '\n')
            logger.info("Path {:s} not found, assuming a standard AmberTools file.".
                        format(p))
    f.write('\n')

    # Printing out topologies
    f.write('# Loading prepi topologies\n')
    for t in topo:
        shutil.copy(t, outdir)
        f.write('loadamberprep ' + path.basename(t) + '\n')
    f.write('\n')

    # Detect disulfide bonds
    if disulfide is None and not ionize:
        logger.info('Detecting disulfide bonds.')
        disulfide = detectDisulfideBonds(mol)
        if len(disulfide) != 0:
            for d in disulfide:
                # Convert to stupid amber residue numbering
                uqseqid = sequenceID((mol.resid, mol.insertion, mol.segid)) + mol.resid[0] - 1
                uqres1 = int(np.unique(uqseqid[mol.atomselect('segid {} and resid {}'.format(d.segid1, d.resid1))]))
                uqres2 = int(np.unique(uqseqid[mol.atomselect('segid {} and resid {}'.format(d.segid2, d.resid2))]))
                # Rename the CYS to CYX if there is a disulfide bond
                mol.set('resname', 'CYX', sel='segid {} and resid {}'.format(d.segid1, d.resid1))
                mol.set('resname', 'CYX', sel='segid {} and resid {}'.format(d.segid2, d.resid2))
                # Remove (eventual) HG hydrogens on these CYS (from proteinPrepare)
                mol.remove('name HG and segid {} and resid {}'.format(d.segid1, d.resid1), _logger=False)
                mol.remove('name HG and segid {} and resid {}'.format(d.segid2, d.resid2), _logger=False)

    # Printing and loading the PDB file. AMBER can work with a single PDB file if the segments are separate by TER
    logger.info('Writing PDB file for input to tleap.')
    pdbname = path.join(outdir, 'input.pdb')

    # mol2 files have atomtype, here we only write parts not coming from mol2
    mol.write(pdbname, mol.atomtype == '')
    if not os.path.isfile(pdbname):
        raise NameError('Could not write a PDB file out of the given Molecule.')
    f.write('# Loading the system\n')
    f.write('mol = loadpdb input.pdb\n\n')

    if np.sum(mol.atomtype != '') != 0:
        logger.info('Writing mol2 files for input to tleap.')
        segs = np.unique(mol.segid[mol.atomtype != ''])
        combstr = 'mol = combine {mol'
        for s in segs:
            name = 'segment{}'.format(s)
            mol2name = path.join(outdir, '{}.mol2'.format(name))
            mol.write(mol2name, (mol.atomtype != '') & (mol.segid == s))
            if not os.path.isfile(mol2name):
                raise NameError('Could not write a mol2 file out of the given Molecule.')
            f.write('# Loading the rest of the system\n')
            f.write('{} = loadmol2 {}.mol2\n\n'.format(name, name))
            combstr += ' {}'.format(name)
        combstr += '}\n\n'
        f.write(combstr)

    # Printing out patches for the disulfide bridges
    if disulfide is None and not ionize:
        logger.info('Detecting disulfide bonds.')
        disulfide = detectDisulfideBonds(mol)

    # Write patches for disulfide bonds (only after ionizing)
    if not ionize and len(disulfide) != 0:
        f.write('# Adding disulfide bonds\n')
        for d in disulfide:
            # Convert to stupid amber residue numbering
            uqseqid = sequenceID((mol.resid, mol.insertion, mol.segid)) + mol.resid[0] - 1
            uqres1 = int(np.unique(uqseqid[mol.atomselect('segid {} and resid {}'.format(d.segid1, d.resid1))]))
            uqres2 = int(np.unique(uqseqid[mol.atomselect('segid {} and resid {}'.format(d.segid2, d.resid2))]))
            f.write('bond mol.{}.SG mol.{}.SG\n'.format(uqres1, uqres2))
        f.write('\n')

    f.write('# Writing out the results\n')
    f.write('saveamberparm mol ' + prefix + '.prmtop ' + prefix + '.crd\n')
    f.write('quit')
    f.close()

    molbuilt = None
    if execute:
        # Source paths of extra dirs (our dirs, not amber default)
        htmdamberdir = path.abspath(path.join(home(), 'builder', 'amberfiles'))
        sourcepaths = [htmdamberdir]
        sourcepaths += [path.join(htmdamberdir, path.dirname(f))
                        for f in ff if path.isfile(path.join(htmdamberdir, f))]
        extrasource = []
        for p in sourcepaths:
            extrasource.append('-I')
            extrasource.append('{}'.format(p))
        logpath = os.path.abspath(os.path.join(outdir, 'log.txt'))
        logger.info('Starting the build.')
        currdir = os.getcwd()
        os.chdir(outdir)
        f = open(logpath, 'w')
        try:
            cmd = [tleap, '-f', './tleap.in']
            cmd[1:1] = extrasource
            call(cmd, stdout=f)
        except:
            raise NameError('tleap failed at execution')
        f.close()
        os.chdir(currdir)
        logger.info('Finished building.')

        if path.getsize(path.join(outdir, 'structure.crd')) != 0 and path.getsize(path.join(outdir, 'structure.prmtop')) != 0:
            molbuilt = Molecule(path.join(outdir, 'structure.prmtop'))
            molbuilt.read(path.join(outdir, 'structure.crd'))
        else:
            raise NameError('No structure pdb/prmtop file was generated. Check {} for errors in building.'.format(logpath))

        if ionize:
            shutil.move(path.join(outdir, 'structure.crd'), path.join(outdir, 'structure.noions.crd'))
            shutil.move(path.join(outdir, 'structure.prmtop'), path.join(outdir, 'structure.noions.prmtop'))
            totalcharge = np.sum(molbuilt.charge)
            nwater = np.sum(molbuilt.atomselect('water and noh'))
            anion, cation, anionatom, cationatom, nanion, ncation = ionizef(totalcharge, nwater, saltconc=saltconc, ff='amber', anion=saltanion, cation=saltcation)
            newmol = ionizePlace(mol, anion, cation, anionatom, cationatom, nanion, ncation)
            # Redo the whole build but now with ions included
            return build(newmol, ff=ff, topo=topo, param=param, prefix=prefix, outdir=outdir, caps={}, ionize=False,
                         execute=execute, saltconc=saltconc, disulfide=disulfide, tleap=tleap)
    molbuilt.write(path.join(outdir, 'structure.pdb'))
    return molbuilt
Exemplo n.º 18
0
    def write(self, inputdir, outputdir):
        """ Write the equilibration protocol

        Writes the equilibration protocol and files into a folder for execution
        using files inside the inputdir directory

        Parameters
        ----------
        inputdir : str
            Path to a directory containing the files produced by a build process.
        outputdir : str
            Directory where to write the equilibration setup files.

        Examples
        --------
        >>> md = Equilibration()
        >>> md.write('./build','./equil')
        """
        self._findFiles(inputdir)
        self._amberFixes()

        from htmd.units import convert
        numsteps = convert(self.timeunits, 'timesteps', self.runtime, timestep=self.acemd.timestep)

        pdbfile = os.path.join(inputdir, self.acemd.coordinates)
        inmol = Molecule(pdbfile)

        if np.any(inmol.atomselect('lipids')) and not self.useconstantratio:
            logger.warning('Lipids detected in input structure. We highly recommend setting useconstantratio=True '
                           'for membrane simulations.')

        if self.constraintsteps is None:
            constrsteps = int(numsteps / 2)
        else:
            constrsteps = int(self.constraintsteps)

        if isinstance(self.acemd.TCL, tuple):
            tcl = list(self.acemd.TCL)
            tcl[0] = tcl[0].format(NUMSTEPS=numsteps, KCONST=self.fb_k,
                                   REFINDEX=' '.join(map(str, inmol.get('index', self.fb_reference))),
                                   SELINDEX=' '.join(map(str, inmol.get('index', self.fb_selection))),
                                   BOX=' '.join(map(str, self.fb_box)),
                                   NVTSTEPS=self.nvtsteps, CONSTRAINTSTEPS=constrsteps, TEMPERATURE=self.temperature)
            self.acemd.TCL = tcl[0] + tcl[1]
        else:
            logger.warning('{} default TCL was already formatted.'.format(self.__class__.__name__))

        if self.acemd.celldimension is None and self.acemd.extendedsystem is None:
            coords = inmol.get('coords', sel='water')
            if coords.size == 0:  # It's a vacuum simulation
                coords = inmol.get('coords', sel='all')
                dim = np.max(coords, axis=0) - np.min(coords, axis=0)
                dim = dim + 12.
            else:
                dim = np.max(coords, axis=0) - np.min(coords, axis=0)
            self.acemd.celldimension = '{} {} {}'.format(dim[0], dim[1], dim[2])

        if self.useconstantratio:
            self.acemd.useconstantratio = 'on'

        self.acemd.setup(inputdir, outputdir, overwrite=True)

        # Adding constraints by writing them to the consref file
        inconsreffile = os.path.join(inputdir, self.acemd.consref)
        consrefmol = Molecule(inconsreffile)
        consrefmol.set('occupancy', 0)
        consrefmol.set('beta', 0)
        if len(self.constraints) == 0:
            raise RuntimeError('You have not defined any constraints for the Equilibration (constraints={}).')
        else:
            for sel in self.constraints:
                consrefmol.set('beta', self.constraints[sel], sel)
        outconsreffile = os.path.join(outputdir, self.acemd.consref)
        consrefmol.write(outconsreffile)
Exemplo n.º 19
0
    def write(self, inputdir, outputdir):
        """ Write the equilibration protocol

        Writes the equilibration protocol and files into a folder for execution
        using files inside the inputdir directory

        Parameters
        ----------
        inputdir : str
            Path to a directory containing the files produced by a build process.
        outputdir : str
            Directory where to write the equilibration setup files.

        Examples
        --------
        >>> md = Equilibration()
        >>> md.write('./build','./equil')
        """

        from htmd.molecule.molecule import Molecule

        # Do version consistency check
        if (self._version == 2 and not isinstance(self.acemd, Acemd2)) and \
                (self._version == 3 and not isinstance(self.acemd, Acemd)):
            raise RuntimeError('Acemd object version ({}) inconsistent with protocol version at instantiation '
                               '({})'.format(type(self.acemd), self._version))

        self._findFiles(inputdir)
        self._amberFixes()

        from htmd.units import convert
        numsteps = convert(self.timeunits, 'timesteps', self.runtime, timestep=self.acemd.timestep)
        if self._version == 3:
            self.acemd.temperature = self.temperature
            self.acemd.thermostattemp = self.temperature
            self.acemd.run = str(numsteps)

        pdbfile = os.path.join(inputdir, self.acemd.coordinates)
        inmol = Molecule(pdbfile)

        if np.any(inmol.atomselect('lipids')) and not self.useconstantratio:
            logger.warning('Lipids detected in input structure. We highly recommend setting useconstantratio=True '
                           'for membrane simulations.')

        if self.constraintsteps is None:
            constrsteps = int(numsteps / 2)
        else:
            constrsteps = int(self.constraintsteps)

        if self._version == 2:
            if self.restraints:
                raise RuntimeWarning('restraints are only available on {}(_version=3)'.format(self.__class__.__name__))
            if isinstance(self.acemd.TCL, tuple):
                tcl = list(self.acemd.TCL)
                tcl[0] = tcl[0].format(NUMSTEPS=numsteps, KCONST=self.fb_k,
                                       REFINDEX=' '.join(map(str, inmol.get('index', self.fb_reference))),
                                       SELINDEX=' '.join(map(str, inmol.get('index', self.fb_selection))),
                                       BOX=' '.join(map(str, self.fb_box)),
                                       NVTSTEPS=self.nvtsteps, CONSTRAINTSTEPS=constrsteps, TEMPERATURE=self.temperature)
                self.acemd.TCL = tcl[0] + tcl[1]
            else:
                logger.warning('{} default TCL was already formatted.'.format(self.__class__.__name__))
        elif self._version == 3:
            if self.restraints is not None:
                logger.info('Using user-provided restraints and ignoring constraints and fb_potential')
                self.acemd.restraints = self.restraints
            else:
                logger.warning('Converting constraints and fb_potential to restraints. This is a convenience '
                               'functional conversion. We recommend start using restraints with '
                               '{}(_version=3)'.format(self.__class__.__name__))
                restraints = list()
                # convert constraints to restraints and add them
                if self.constraints is not None:
                    restraints += self._constraints2restraints(constrsteps)
                # convert fb_potential to restraints and add them
                if self.fb_k > 0:
                    restraints += self._fb_potential2restraints(inputdir)
                self.acemd.restraints = restraints

        if self.acemd.celldimension is None and self.acemd.extendedsystem is None:
            coords = inmol.get('coords', sel='water')
            if coords.size == 0:  # It's a vacuum simulation
                coords = inmol.get('coords', sel='all')
                dim = np.max(coords, axis=0) - np.min(coords, axis=0)
                dim += 12.
            else:
                dim = np.max(coords, axis=0) - np.min(coords, axis=0)
            self.acemd.celldimension = '{} {} {}'.format(dim[0], dim[1], dim[2])

        if self.useconstantratio:
            self.acemd.useconstantratio = 'on'

        self.acemd.setup(inputdir, outputdir, overwrite=True)

        if self._version == 2:
            # Adding constraints by writing them to the consref file
            inconsreffile = os.path.join(inputdir, self.acemd.consref)
            consrefmol = Molecule(inconsreffile)
            consrefmol.set('occupancy', 0)
            consrefmol.set('beta', 0)
            if len(self.constraints) == 0:
                raise RuntimeError('You have not defined any constraints for the {} ('
                                   'constraints={{}}).'.format(self.__class__.__name__))
            else:
                for sel in self.constraints:
                    consrefmol.set('beta', self.constraints[sel], sel)
            outconsreffile = os.path.join(outputdir, self.acemd.consref)
            consrefmol.write(outconsreffile)
Exemplo n.º 20
0
class MutualInformation:
    def __init__(self, model, mol=None, fstep=0.1, skip=1):
        """ Class that calculates the mutual information of protein residues.

        Parameters
        ----------
        model : :class:`Model <htmd.model.Model>` object
            A Model object with a calculated MSM
        mol : :class:`Molecule <htmd.molecule.molecule.Molecule>` object
            A reference molecule from which to obtain structural information. By default model.data.simlist[0].molfile
            will be used.
        fstep : float
            The frame step of the simulations
        skip : int
            Frame skipping

        Examples
        --------
        >>> from htmd.mutualinformation import MutualInformation
        >>> from htmd.ui import *
        >>>
        >>> sims = simlist(glob('./filtered/*/'), './filtered/filtered.pdb')
        >>> mol = Molecule('./filtered/filtered.pdb')
        >>>
        >>> metr = Metric(sims)
        >>> metr.set(MetricDihedral())
        >>> datadih = metr.project()
        >>> datadih.fstep = 0.1
        >>> datadih.dropTraj()
        >>>
        >>> metr = Metric(datadih.simlist)
        >>> metr.set(MetricSelfDistance('protein and name CA', metric='contacts'))
        >>> dataco = metr.project()
        >>> dataco.fstep = 0.1
        >>> dataco.dropTraj()
        >>>
        >>> tica = TICA(datadih, 20, units='ns')
        >>> datatica = tica.project(3)
        >>> datatica.cluster(MiniBatchKMeans(n_clusters=1500))
        >>> model = Model(datatica)
        >>> model.markovModel(12, 4, units='ns')
        >>>
        >>> mu = MutualInformation(model)
        >>> mu.calculate()
        >>> mu.saveMI('./mi_matrix.npy')
        >>> mu.weightGraph(dataco, 0.005)
        >>> mu.save_graphml('./weightgraph.graphml')
        """
        self.model = model

        self.mol = mol
        if mol is None:
            self.mol = Molecule(self.model.data.simlist[0].molfile)

        self._computeChiDihedrals(fstep=fstep, skip=skip)

        self.bindihcat = self._histogram()  # Lasts two minutes
        self.resids = self.mol.get('resid', 'name CA')
        self.residmap = np.ones(self.mol.resid.max() + 1, dtype=int) * -1
        self.residmap[self.resids] = np.arange(len(self.resids))

        self.mi_matrix = None
        self.graph_array = None
        self.graph = None

    def calculate(self):
        from htmd.config import _config
        from htmd.parallelprogress import ParallelExecutor
        numchi = self.chi.numDimensions
        statdist = self.model.msm.stationary_distribution
        stconcat = np.concatenate(self.model.data.St)
        microcat = self.model.micro_ofcluster[stconcat]

        aprun = ParallelExecutor(n_jobs=_config['ncpus'])
        res = aprun(total=numchi, desc='Calculating MI')(
            delayed(self._parallelAll)(numchi, dih1, 4, self.model.micronum,
                                       self.bindihcat, microcat, statdist)
            for dih1 in range(numchi))
        MI_all = np.zeros((len(self.resids), len(self.resids)))
        for r in res:
            dihcounts = r[0]
            pairs = r[1]
            for dihc, p in zip(dihcounts, pairs):
                dih1, dih2 = p
                if dih1 == dih2:
                    continue
                resid1 = self.residmap[self.mol.resid[
                    self.chi.description.atomIndexes[dih1][0]]]
                resid2 = self.residmap[self.mol.resid[
                    self.chi.description.atomIndexes[dih2][0]]]
                MI_all[resid1][resid2] = self._calcMutualInfo(dihc)
        self.mi_matrix = self._cleanautocorrelations(MI_all)

    def _computeChiDihedrals(self, fstep=0.1, skip=1):
        chis = []
        protmol = self.mol.copy()
        protmol.filter('protein')
        caidx = self.mol.atomselect('protein and name CA')
        resids = self.mol.resid[caidx]
        resnames = self.mol.resname[caidx]
        for residue, resname in zip(resids, resnames):
            ch = Dihedral.chi1(protmol, residue)
            if ch is not None:
                chis.append(ch)

        metr = Metric(self.model.data.simlist, skip=skip)
        metr.set(MetricDihedral(chis, sincos=False))
        data = metr.project()
        data.fstep = fstep
        self.chi = data

    def _histogram(self):
        condata = np.concatenate(self.chi.dat)
        bins = np.array([-180, -150, -90, -30, 0, 30, 90, 150, 180])
        dic = {1: 3, 2: 0, 3: 1, 4: 0, 5: 0, 6: 2, 7: 0, 8: 3, 9: 3}
        binneddih = np.zeros([condata.shape[0], condata.shape[1]])
        for dihedral in range(condata.shape[1]):
            binning = np.digitize(condata[:, dihedral], bins)
            binneddih[:, dihedral] = [
                dic[n] if n in dic.keys() else n for n in binning
            ]
        return binneddih

    def _calcMutualInfo(self, contingency):
        # Ripped out of sklearn since it converts floats to integers without warning which breaks our use-case
        from math import log
        nzx, nzy = np.nonzero(contingency)
        nz_val = contingency[nzx, nzy]
        contingency_sum = contingency.sum()
        pi = np.ravel(contingency.sum(axis=1))
        pj = np.ravel(contingency.sum(axis=0))
        log_contingency_nm = np.log(nz_val)
        contingency_nm = nz_val / contingency_sum
        # Don't need to calculate the full outer product, just for non-zeroes
        outer = pi.take(nzx) * pj.take(nzy)
        log_outer = -np.log(outer) + log(pi.sum()) + log(pj.sum())
        mi = (contingency_nm * (log_contingency_nm - log(contingency_sum)) +
              contingency_nm * log_outer)
        return mi.sum()

    def _parallelAll(self, numdih, dih1, numbins, micronum, bindihcat,
                     microcat, stat_dist):
        results = []
        resultpairs = []
        for dih2 in range(dih1, numdih):
            dihcounts = np.zeros((numbins, numbins, micronum))

            # Find pairs of dihedrals (keys) and which absolute frames they occur in (vals)
            df = pd.DataFrame(
                {'a': list(zip(bindihcat[:, dih1], bindihcat[:, dih2]))})
            gg = df.groupby(by=df.a).groups

            for pair in gg:
                microsofpairs = microcat[gg[
                    pair]]  # Get the microstates of all frames having given dihedral pair
                microsofpairs = np.delete(
                    microsofpairs,
                    np.where(
                        microsofpairs == -1)[0])  # Delete dropped clusters
                counts = np.bincount(
                    microsofpairs
                )  # Count number of frames with that pair for each microstate (0, max_micro_seen)
                dihcounts[
                    int(pair[0]),
                    int(pair[1]), :len(counts)] += counts  # Add the counts
            dihcounts /= dihcounts.sum(axis=0).sum(
                axis=0
            )  # Normalize all slices by total counts in each microstate
            dihcounts *= stat_dist  # Multiply by stationary distribution of each state
            dihcounts = np.sum(
                dihcounts,
                axis=2)  # Calculate the weighted sum over all states
            results.append(dihcounts)
            resultpairs.append((dih1, dih2))

        return results, resultpairs

    def _cleanautocorrelations(self, mi):
        # TODO: vectorize this
        # np.diag(np.ones(3), k=-1).astype(bool) | np.diag(np.ones(3), k=1).astype(bool) | np.diag(np.ones(4)).astype(bool)
        for i in range(mi.shape[0]):
            for j in range(mi.shape[1]):
                if abs(i - j) < 2:
                    mi[i][j] = 0
        return mi

    def saveMI(self, path):
        np.save(path, self.mi_matrix)

    def loadMI(self, path):
        self.mi_matrix = np.load(path)

    def weightGraph(self, datacontacts, mi_threshold, time_treshold=0.6):
        if len(self.mol.get('resid', 'name CA')) != len(self.resids):
            raise Exception(
                'The length of the protein doesn\'t match the Mutual Information data'
            )
        contactcat = np.concatenate(datacontacts.dat)
        contacts_matrix = np.zeros([len(self.resids), len(self.resids)])
        for i in range(contactcat.shape[1]):
            counter = np.count_nonzero(contactcat[:, i])
            resid1 = self.residmap[self.mol.resid[
                datacontacts.description.atomIndexes[i][0]]]
            resid2 = self.residmap[self.mol.resid[
                datacontacts.description.atomIndexes[i][1]]]
            contacts_matrix[resid1][resid2] = counter

        self.graph_array = np.zeros(
            [contacts_matrix.shape[0], contacts_matrix.shape[0]])
        mask = (self.mi_matrix >
                mi_threshold) & (contacts_matrix >
                                 (time_treshold * contactcat.shape[0]))
        self.graph_array[mask] = self.mi_matrix[mask]

        intermed = []
        for source in range(self.graph_array.shape[0]):
            for target in range(source, self.graph_array.shape[1]):
                if self.graph_array[source, target] != 0 and target > source:
                    intermed.append([
                        int(self.resids[source]),
                        int(self.resids[target]),
                        float(self.graph_array[source, target])
                    ])
        import pandas as pd
        import networkx as nx
        from sklearn.cluster.spectral import SpectralClustering

        pd = pd.DataFrame(intermed, columns=['source', 'target', 'weight'])
        pd[['source',
            'target']] = pd[['source',
                             'target']].astype(type('int', (int, ), {}))
        pd['weight'] = pd['weight'].astype(type('float', (float, ), {}))
        G = nx.from_pandas_dataframe(pd, 'source', 'target', ['weight'])
        ## setSegment
        segids = self.mol.get('segid', 'name CA')
        seg_res_dict = {
            key: value
            for (key, value) in zip(self.resids, segids)
            if np.any(pd.loc[(pd['source'] == key)].index) or np.any(pd.loc[(
                pd['target'] == key)].index)
        }
        nx.set_node_attributes(G, 'Segment', seg_res_dict)
        ## set
        if not nx.is_connected(G):
            G = max(nx.connected_component_subgraphs(G), key=len)
        flow_cent = nx.current_flow_betweenness_centrality(G, weight='weight')
        nx.set_node_attributes(G, 'flowcent', flow_cent)
        Spectre = SpectralClustering(n_clusters=10, affinity='precomputed')
        model = Spectre.fit_predict(self.graph_array)
        model = model.astype(type('float', (float, ), {}))
        spectral_dict = {
            key: value
            for (key, value) in zip(self.resids, model) if key in G.nodes()
        }
        nx.set_node_attributes(G, 'spectral', spectral_dict)
        self.graph = G

    def save_graphml(self, path):
        import networkx as nx
        nx.write_graphml(self.graph, path)

    # def save_pdf(self, graphpath, outpath):
    #     print(self.graph_array)
    #     self.save_graphml(graphpath)
    #     compile_java('./GephiGraph.java')
    #     compile_java('./GraphTest.java')
    #     execute_java('GraphTest', graphpath, outpath)


# if __name__ == '__main__':
#     from htmd.mutualinformation import MutualInformation
#     from htmd import *
#
#     sims = simlist(glob('/shared/adria/2ov5/adaptive_amber/batches/1/filtered/*/'),
#                    '/shared/adria/2ov5/adaptive_amber/batches/1/filtered/filtered.pdb')
#     mol = Molecule('/shared/adria/2ov5/adaptive_amber/batches/1/filtered/filtered.pdb')
#
#     metr = Metric(sims[0:100])
#     metr.set(MetricDihedral())
#     datadih = metr.project()
#     datadih.fstep = 0.1
#     datadih.dropTraj()
#
#     metr = Metric(datadih.simlist)
#     metr.set(MetricSelfDistance('protein and name CA', metric='contacts', threshold=8))
#     dataco = metr.project()
#     dataco.fstep = 0.1
#     dataco.dropTraj()
#
#     tica = TICA(datadih, 20, units='ns')
#     datatica = tica.project(3)
#     datatica.cluster(MiniBatchKMeans(n_clusters=1500))
#     model = Model(datatica)
#     model.markovModel(12, 4, units='ns')
#
#     mu = MutualInformation(model)
#     mu.calculate()
#     mu.saveMI('/tmp/mi_matrix.npy')
#     mu.weightGraph(dataco, 0.005)
#     mu.save_graphml('/tmp/weightgraph_0-005.graphml')
Exemplo n.º 21
0
def build(mol,
          ff=None,
          topo=None,
          param=None,
          prefix='structure',
          outdir='./build',
          caps=None,
          ionize=True,
          saltconc=0,
          saltanion=None,
          saltcation=None,
          disulfide=None,
          tleap='tleap',
          execute=True,
          atomtypes=None,
          offlibraries=None):
    """ Builds a system for AMBER

    Uses tleap to build a system for AMBER. Additionally it allows the user to ionize and add disulfide bridges.

    Parameters
    ----------
    mol : :class:`Molecule <htmd.molecule.molecule.Molecule>` object
        The Molecule object containing the system
    ff : list of str
        A list of leaprc forcefield files.
        Use :func:`amber.listFiles <htmd.builder.amber.listFiles>` to get a list of available forcefield files.
        Default: :func:`amber.defaultFf <htmd.builder.amber.defaultFf>`
    topo : list of str
        A list of topology `prepi` files.
        Use :func:`amber.listFiles <htmd.builder.amber.listFiles>` to get a list of available topology files.
        Default: :func:`amber.defaultTopo <htmd.builder.amber.defaultTopo>`
    param : list of str
        A list of parameter `frcmod` files.
        Use :func:`amber.listFiles <htmd.builder.amber.listFiles>` to get a list of available parameter files.
        Default: :func:`amber.defaultParam <htmd.builder.amber.defaultParam>`
    prefix : str
        The prefix for the generated pdb and psf files
    outdir : str
        The path to the output directory
        Default: './build'
    caps : dict
        A dictionary with keys segids and values lists of strings describing the caps for a particular protein segment.
        e.g. caps['P'] = ['ACE', 'NME'] or caps['P'] = ['none', 'none']. Default: will apply ACE and NME caps to every
        protein segment.
    ionize : bool
        Enable or disable ionization
    saltconc : float
        Salt concentration to add to the system after neutralization.
    saltanion : {'Cl-'}
        The anion type. Please use only AMBER ion atom names.
    saltcation : {'Na+', 'K+', 'Cs+'}
        The cation type. Please use only AMBER ion atom names.
    disulfide : list of :class:`DisulfideBridge <htmd.builder.builder.DisulfideBridge>` objects
        If None it will guess disulfide bonds. Otherwise provide a list of `DisulfideBridge` objects.
    tleap : str
        Path to tleap executable used to build the system for AMBER
    execute : bool
        Disable building. Will only write out the input script needed by tleap. Does not include ionization.
    atomtypes : list of triplets
        Custom atom types defined by the user as ('type', 'element', 'hybrid') triplets
        e.g. (('C1', 'C', 'sp2'), ('CI', 'C', 'sp3')). Check `addAtomTypes` in AmberTools docs.
    offlibraries : str or list
        A path or a list of paths to OFF library files. Check `loadOFF` in AmberTools docs.

    Returns
    -------
    molbuilt : :class:`Molecule <htmd.molecule.molecule.Molecule>` object
        The built system in a Molecule object

    Example
    -------
    >>> from htmd.ui import *
    >>> mol = Molecule("3PTB")
    >>> molbuilt = amber.build(mol, outdir='/tmp/build')  # doctest: +SKIP
    ...
    >>> # More complex example
    >>> disu = [DisulfideBridge('P', 157, 'P', 13), DisulfideBridge('K', 1, 'K', 25)]
    >>> molbuilt = amber.build(mol, outdir='/tmp/build', saltconc=0.15, disulfide=disu)  # doctest: +SKIP
    """
    # Remove pdb protein bonds as they can be regenerated by tleap. Keep non-protein bonds i.e. for ligands
    mol = mol.copy()
    _removeProteinBonds(mol)

    if shutil.which(tleap) is None:
        raise NameError(
            'Could not find executable: `{}` in the PATH. Cannot build for AMBER.'
            .format(tleap))
    if not os.path.isdir(outdir):
        os.makedirs(outdir)
    _cleanOutDir(outdir)
    if ff is None:
        ff = defaultFf()
    if topo is None:
        topo = defaultTopo()
    if param is None:
        param = defaultParam()
    if caps is None:
        caps = _defaultProteinCaps(mol)

    _missingSegID(mol)
    _checkMixedSegment(mol)
    _checkResidueInsertions(mol)

    mol = _charmmLipid2Amber(mol)

    _applyProteinCaps(mol, caps)

    f = open(os.path.join(outdir, 'tleap.in'), 'w')
    f.write('# tleap file generated by amber.build\n')

    # Printing out the forcefields
    if isinstance(ff, str):
        ff = [ff]
    for force in ff:
        f.write('source ' + force + '\n')
    f.write('\n')

    # Adding custom atom types
    if atomtypes is not None:
        atomtypes = ensurelist(tocheck=atomtypes[0], tomod=atomtypes)
        f.write('addAtomTypes {\n')
        for at in atomtypes:
            if len(at) != 3:
                raise RuntimeError(
                    'Atom type definitions have to be triplets. Check the AMBER documentation.'
                )
            f.write('    {{ "{}" "{}" "{}" }}\n'.format(at[0], at[1], at[2]))
        f.write('}\n\n')

    # Loading OFF libraries
    if offlibraries is not None:
        if not isinstance(offlibraries, list) and not isinstance(
                offlibraries, tuple):
            offlibraries = [
                offlibraries,
            ]
        for off in offlibraries:
            f.write('loadoff {}\n\n'.format(off))

    # Loading frcmod parameters
    f.write('# Loading parameter files\n')
    for p in param:
        try:
            shutil.copy(p, outdir)
            f.write('loadamberparams ' + os.path.basename(p) + '\n')
        except:
            f.write('loadamberparams ' + p + '\n')
            logger.info(
                "File {:s} not found, assuming its present on the standard Amber location"
                .format(p))
    f.write('\n')

    # Loading prepi topologies
    f.write('# Loading prepi topologies\n')
    for t in topo:
        shutil.copy(t, outdir)
        f.write('loadamberprep ' + os.path.basename(t) + '\n')
    f.write('\n')

    # Detect disulfide bridges if not defined by user
    if disulfide is None and not ionize:
        logger.info('Detecting disulfide bonds.')
        disulfide = detectDisulfideBonds(mol)

    # Fix structure to match the disulfide patching
    if not ionize and len(disulfide) != 0:
        for d in disulfide:
            # Rename the residues to CYX if there is a disulfide bond
            atoms1 = (mol.segid == d.segid1) & (mol.resid == d.resid1)
            atoms2 = (mol.segid == d.segid2) & (mol.resid == d.resid2)
            mol.resname[atoms1] = 'CYX'
            mol.resname[atoms2] = 'CYX'
            # Remove (eventual) HG hydrogens on these CYS (from proteinPrepare)
            mol.remove(atoms1 & (mol.name == 'HG'), _logger=False)
            mol.remove(atoms2 & (mol.name == 'HG'), _logger=False)

    # Printing and loading the PDB file. AMBER can work with a single PDB file if the segments are separate by TER
    logger.debug('Writing PDB file for input to tleap.')
    pdbname = os.path.join(outdir, 'input.pdb')

    # mol2 files have atomtype, here we only write parts not coming from mol2
    mol.write(pdbname, mol.atomtype == '')
    if not os.path.isfile(pdbname):
        raise NameError(
            'Could not write a PDB file out of the given Molecule.')
    f.write('# Loading the system\n')
    f.write('mol = loadpdb input.pdb\n\n')

    if np.sum(mol.atomtype != '') != 0:
        logger.debug('Writing mol2 files for input to tleap.')
        segs = np.unique(mol.segid[mol.atomtype != ''])
        combstr = 'mol = combine {mol'
        for s in segs:
            name = 'segment{}'.format(s)
            mol2name = os.path.join(outdir, '{}.mol2'.format(name))
            mol.write(mol2name, (mol.atomtype != '') & (mol.segid == s))
            if not os.path.isfile(mol2name):
                raise NameError(
                    'Could not write a mol2 file out of the given Molecule.')
            f.write('# Loading the rest of the system\n')
            f.write('{} = loadmol2 {}.mol2\n\n'.format(name, name))
            combstr += ' {}'.format(name)
        combstr += '}\n\n'
        f.write(combstr)

    # Write patches for disulfide bonds (only after ionizing)
    if not ionize and len(disulfide) != 0:
        f.write('# Adding disulfide bonds\n')
        for d in disulfide:
            # Convert to stupid amber residue numbering
            uqseqid = sequenceID(
                (mol.resid, mol.insertion, mol.segid)) + mol.resid[0]
            uqres1 = int(
                np.unique(uqseqid[(mol.segid == d.segid1)
                                  & (mol.resid == d.resid1)]))
            uqres2 = int(
                np.unique(uqseqid[(mol.segid == d.segid2)
                                  & (mol.resid == d.resid2)]))
            f.write('bond mol.{}.SG mol.{}.SG\n'.format(uqres1, uqres2))
        f.write('\n')

    f.write('# Writing out the results\n')
    f.write('saveamberparm mol ' + prefix + '.prmtop ' + prefix + '.crd\n')
    f.write('quit')
    f.close()

    molbuilt = None
    if execute:
        # Source paths of extra dirs (our dirs, not amber default)
        htmdamberdir = os.path.abspath(
            os.path.join(home(), 'builder', 'amberfiles'))
        sourcepaths = [htmdamberdir]
        sourcepaths += [
            os.path.join(htmdamberdir, os.path.dirname(f)) for f in ff
            if os.path.isfile(os.path.join(htmdamberdir, f))
        ]
        extrasource = []
        for p in sourcepaths:
            extrasource.append('-I')
            extrasource.append('{}'.format(p))
        logpath = os.path.abspath(os.path.join(outdir, 'log.txt'))
        logger.info('Starting the build.')
        currdir = os.getcwd()
        os.chdir(outdir)
        f = open(logpath, 'w')
        try:
            cmd = [tleap, '-f', './tleap.in']
            cmd[1:1] = extrasource
            call(cmd, stdout=f)
        except:
            raise NameError('tleap failed at execution')
        f.close()
        os.chdir(currdir)
        logger.info('Finished building.')

        if os.path.exists(os.path.join(outdir, 'structure.crd')) and \
                        os.path.getsize(os.path.join(outdir, 'structure.crd')) != 0 and \
                        os.path.getsize(os.path.join(outdir, 'structure.prmtop')) != 0:
            molbuilt = Molecule(os.path.join(outdir, 'structure.prmtop'))
            molbuilt.read(os.path.join(outdir, 'structure.crd'))
        else:
            raise NameError(
                'No structure pdb/prmtop file was generated. Check {} for errors in building.'
                .format(logpath))

        if ionize:
            shutil.move(os.path.join(outdir, 'structure.crd'),
                        os.path.join(outdir, 'structure.noions.crd'))
            shutil.move(os.path.join(outdir, 'structure.prmtop'),
                        os.path.join(outdir, 'structure.noions.prmtop'))
            totalcharge = np.sum(molbuilt.charge)
            nwater = np.sum(molbuilt.atomselect('water and noh'))
            anion, cation, anionatom, cationatom, nanion, ncation = ionizef(
                totalcharge,
                nwater,
                saltconc=saltconc,
                ff='amber',
                anion=saltanion,
                cation=saltcation)
            newmol = ionizePlace(mol, anion, cation, anionatom, cationatom,
                                 nanion, ncation)
            # Redo the whole build but now with ions included
            return build(newmol,
                         ff=ff,
                         topo=topo,
                         param=param,
                         prefix=prefix,
                         outdir=outdir,
                         caps={},
                         ionize=False,
                         execute=execute,
                         saltconc=saltconc,
                         disulfide=disulfide,
                         tleap=tleap,
                         atomtypes=atomtypes,
                         offlibraries=offlibraries)
    tmpbonds = molbuilt.bonds
    molbuilt.bonds = []  # Removing the bonds to speed up writing
    molbuilt.write(os.path.join(outdir, 'structure.pdb'))
    molbuilt.bonds = tmpbonds  # Restoring the bonds
    return molbuilt
Exemplo n.º 22
0
    def write(self, inputdir, outputdir):
        """ Writes the production protocol and files into a folder.

        Parameters
        ----------
        inputdir : str
            Path to a directory containing the files produced by a equilibration process.
        outputdir : str
            Directory where to write the production setup files.
        """
        from htmd.molecule.molecule import Molecule

        # Do version consistency check
        if (self._version == 2 and not isinstance(self.acemd, Acemd2)) and \
                (self._version == 3 and not isinstance(self.acemd, Acemd)):
            raise RuntimeError(
                'Acemd object version ({}) inconsistent with protocol version at instantiation '
                '({})'.format(type(self.acemd), self._version))

        self._findFiles(inputdir)
        self._amberFixes()

        if self._version == 2:
            self.acemd.temperature = str(self.temperature)
            self.acemd.langevintemp = str(self.temperature)
        elif self._version == 3:
            self.acemd.temperature = self.temperature
            self.acemd.thermostattemp = self.temperature

        from htmd.units import convert
        numsteps = convert(self.timeunits,
                           'timesteps',
                           self.runtime,
                           timestep=self.acemd.timestep)
        if self._version == 3:
            self.acemd.run = str(numsteps)

        pdbfile = os.path.join(inputdir, self.acemd.coordinates)
        inmol = Molecule(pdbfile)

        if np.any(inmol.atomselect('lipids')) and not self.useconstantratio:
            logger.warning(
                'Lipids detected in input structure. We highly recommend setting useconstantratio=True '
                'for membrane simulations.')

        if self._version == 2:
            if self.restraints:
                raise RuntimeWarning(
                    'restraints are only available on {}(_version=3)'.format(
                        self.__class__.__name__))
            if self.fb_k > 0:  # use TCL only for flatbottom
                self.acemd.tclforces = 'on'
                if isinstance(self.acemd.TCL, tuple):
                    tcl = list(self.acemd.TCL)
                    tcl[0] = tcl[0].format(
                        NUMSTEPS=numsteps,
                        TEMPERATURE=self.temperature,
                        KCONST=self.fb_k,
                        REFINDEX=' '.join(
                            map(str, inmol.get('index', self.fb_reference))),
                        SELINDEX=' '.join(
                            map(str, inmol.get('index', self.fb_selection))),
                        BOX=' '.join(map(str, self.fb_box)))
                    self.acemd.TCL = tcl[0] + tcl[1]
                else:
                    logger.warning(
                        '{} default TCL was already formatted.'.format(
                            self.__class__.__name__))
            else:
                self.acemd.TCL = 'set numsteps {NUMSTEPS}\n'.format(
                    NUMSTEPS=numsteps)
            if self.useconstraints:
                # Turn on constraints
                self.acemd.constraints = 'on'
                self.acemd.constraintscaling = '1.0'
            else:
                if len(self.constraints) != 0:
                    logger.warning(
                        'You have setup constraints to {} but constraints are turned off. '
                        'If you want to use constraints, define '
                        'useconstraints=True'.format(self.constraints))
        elif self._version == 3:
            if self.restraints is not None:
                logger.info(
                    'Using user-provided restraints and ignoring constraints and fb_potential'
                )
                self.acemd.restraints = self.restraints
            else:
                restraints = list()
                if self.fb_k > 0:
                    logger.warning(
                        'Converting fb_potential to restraints. This is a convenience '
                        'functional conversion. We recommend start using restraints with '
                        '{}(_version=3)'.format(self.__class__.__name__))
                    restraints += self._fb_potential2restraints(inputdir)
                if self.useconstraints:
                    logger.warning(
                        'Converting constraints to restraints. This is a convenience '
                        'functional conversion. We recommend start using restraints with '
                        '{}(_version=3)'.format(self.__class__.__name__))
                    restraints += self._constraints2restraints()
                else:
                    if len(self.constraints) != 0:
                        logger.warning(
                            'You have setup constraints to {} but constraints are turned off. '
                            'If you want to use constraints, define '
                            'useconstraints=True'.format(self.constraints))
                if len(restraints) != 0:
                    self.acemd.restraints = restraints

        if self.useconstantratio:
            self.acemd.useconstantratio = 'on'

        if self.adaptive:
            self.acemd.binvelocities = None

        self.acemd.setup(inputdir, outputdir, overwrite=True)

        if self._version == 2:
            # Adding constraints by writing them to the consref file
            if self.useconstraints:
                inconsreffile = os.path.join(inputdir, self.acemd.consref)
                consrefmol = Molecule(inconsreffile)
                consrefmol.set('occupancy', 0)
                consrefmol.set('beta', 0)
                if len(self.constraints) == 0:
                    raise RuntimeError(
                        'You have set the production to use constraints (useconstraints=True), but have '
                        'not defined any constraints (constraints={}).')
                else:
                    for sel in self.constraints:
                        consrefmol.set('beta', self.constraints[sel], sel)
                outconsreffile = os.path.join(outputdir, self.acemd.consref)
                consrefmol.write(outconsreffile)
Exemplo n.º 23
0
def build(mol, ff=None, topo=None, param=None, prefix='structure', outdir='./build', caps=None, ionize=True, saltconc=0,
          saltanion=None, saltcation=None, disulfide=None, tleap=None, execute=True, atomtypes=None,
          offlibraries=None, gbsa=False, igb=2):
    """ Builds a system for AMBER

    Uses tleap to build a system for AMBER. Additionally it allows the user to ionize and add disulfide bridges.

    Parameters
    ----------
    mol : :class:`Molecule <htmd.molecule.molecule.Molecule>` object
        The Molecule object containing the system
    ff : list of str
        A list of leaprc forcefield files.
        Use :func:`amber.listFiles <htmd.builder.amber.listFiles>` to get a list of available forcefield files.
        Default: :func:`amber.defaultFf <htmd.builder.amber.defaultFf>`
    topo : list of str
        A list of topology `prepi/prep/in` files.
        Use :func:`amber.listFiles <htmd.builder.amber.listFiles>` to get a list of available topology files.
        Default: :func:`amber.defaultTopo <htmd.builder.amber.defaultTopo>`
    param : list of str
        A list of parameter `frcmod` files.
        Use :func:`amber.listFiles <htmd.builder.amber.listFiles>` to get a list of available parameter files.
        Default: :func:`amber.defaultParam <htmd.builder.amber.defaultParam>`
    prefix : str
        The prefix for the generated pdb and psf files
    outdir : str
        The path to the output directory
        Default: './build'
    caps : dict
        A dictionary with keys segids and values lists of strings describing the caps for a particular protein segment.
        e.g. caps['P'] = ['ACE', 'NME'] or caps['P'] = ['none', 'none']. Default: will apply ACE and NME caps to every
        protein segment.
    ionize : bool
        Enable or disable ionization
    saltconc : float
        Salt concentration to add to the system after neutralization.
    saltanion : {'Cl-'}
        The anion type. Please use only AMBER ion atom names.
    saltcation : {'Na+', 'K+', 'Cs+'}
        The cation type. Please use only AMBER ion atom names.
    disulfide : list of pairs of atomselection strings
        If None it will guess disulfide bonds. Otherwise provide a list pairs of atomselection strings for each pair of
        residues forming the disulfide bridge.
    tleap : str
        Path to tleap executable used to build the system for AMBER
    execute : bool
        Disable building. Will only write out the input script needed by tleap. Does not include ionization.
    atomtypes : list of triplets
        Custom atom types defined by the user as ('type', 'element', 'hybrid') triplets
        e.g. (('C1', 'C', 'sp2'), ('CI', 'C', 'sp3')). Check `addAtomTypes` in AmberTools docs.
    offlibraries : str or list
        A path or a list of paths to OFF library files. Check `loadOFF` in AmberTools docs.
    gbsa : bool
        Modify radii for GBSA implicit water model
    igb : int
        GB model. Select: 1 for mbondi, 2 and 5 for mbondi2, 7 for bondi and 8 for mbondi3.
        Check section 4. The Generalized Born/Surface Area Model of the AMBER manual.

    Returns
    -------
    molbuilt : :class:`Molecule <htmd.molecule.molecule.Molecule>` object
        The built system in a Molecule object

    Example
    -------
    >>> from htmd.ui import *  # doctest: +SKIP
    >>> mol = Molecule("3PTB")
    >>> molbuilt = amber.build(mol, outdir='/tmp/build')  # doctest: +SKIP
    >>> # More complex example
    >>> disu = [['segid P and resid 157', 'segid P and resid 13'], ['segid K and resid 1', 'segid K and resid 25']]
    >>> molbuilt = amber.build(mol, outdir='/tmp/build', saltconc=0.15, disulfide=disu)  # doctest: +SKIP
    """
    # Remove pdb protein bonds as they can be regenerated by tleap. Keep non-protein bonds i.e. for ligands
    mol = mol.copy()
    _removeProteinBonds(mol)

    if tleap is None:
        tleap = _findTleap()
    else:
        if shutil.which(tleap) is None:
            raise NameError('Could not find executable: `{}` in the PATH. Cannot build for AMBER.'.format(tleap))

    if not os.path.isdir(outdir):
        os.makedirs(outdir)
    _cleanOutDir(outdir)
    if ff is None:
        ff = defaultFf()
    if topo is None:
        topo = defaultTopo()
    if param is None:
        param = defaultParam()
    if caps is None:
        caps = _defaultProteinCaps(mol)

    _missingSegID(mol)
    _checkMixedSegment(mol)

    mol = _charmmLipid2Amber(mol)

    _applyProteinCaps(mol, caps)

    f = open(os.path.join(outdir, 'tleap.in'), 'w')
    f.write('# tleap file generated by amber.build\n')

    # Printing out the forcefields
    if isinstance(ff, str):
        ff = [ff]
    for i, force in enumerate(ff):
        if not os.path.isfile(force):
            force = _locateFile(force, 'ff', tleap)
            if force is None:
                continue
        newname = 'ff{}_{}'.format(i, os.path.basename(force))
        shutil.copy(force, os.path.join(outdir, newname))
        f.write('source {}\n'.format(newname))
    f.write('\n')

    if gbsa:
        gbmodels = {1: 'mbondi', 2: 'mbondi2', 5: 'mbondi2', 7: 'bondi', 8: 'mbondi3'}
        f.write('set default PBradii {}\n\n'.format(gbmodels[igb]))

    # Adding custom atom types
    if atomtypes is not None:
        atomtypes = ensurelist(tocheck=atomtypes[0], tomod=atomtypes)
        f.write('addAtomTypes {\n')
        for at in atomtypes:
            if len(at) != 3:
                raise RuntimeError('Atom type definitions have to be triplets. Check the AMBER documentation.')
            f.write('    {{ "{}" "{}" "{}" }}\n'.format(at[0], at[1], at[2]))
        f.write('}\n\n')

    # Loading OFF libraries
    if offlibraries is not None:
        offlibraries = ensurelist(offlibraries)
        for off in offlibraries:
            if not os.path.isfile(off):
                raise RuntimeError('Could not find off-library in location {}'.format(off))
            newname = 'offlib{}_{}'.format(i, os.path.basename(off))
            shutil.copy(off, os.path.join(outdir, newname))
            f.write('loadoff {}\n'.format(newname))

    # Loading frcmod parameters
    f.write('# Loading parameter files\n')
    for i, p in enumerate(param):
        if not os.path.isfile(p):
            p = _locateFile(p, 'param', tleap)
            if p is None:
                continue
        newname = 'param{}_{}'.format(i, os.path.basename(p))
        shutil.copy(p, os.path.join(outdir, newname))
        f.write('loadamberparams {}\n'.format(newname))
    f.write('\n')

    # Loading prepi topologies
    f.write('# Loading prepi topologies\n')
    for i, t in enumerate(topo):
        if not os.path.isfile(t):
            t = _locateFile(t, 'topo', tleap)
            if t is None:
                continue
        newname = 'topo{}_{}'.format(i, os.path.basename(t))
        shutil.copy(t, os.path.join(outdir, newname))
        f.write('loadamberprep {}\n'.format(newname))
    f.write('\n')

    f.write('# Loading the system\n')
    f.write('mol = loadpdb input.pdb\n\n')

    if np.sum(mol.atomtype != '') != 0:
        logger.debug('Writing mol2 files for input to tleap.')
        segs = np.unique(mol.segid[mol.atomtype != ''])
        combstr = 'mol = combine {mol'
        for s in segs:
            name = 'segment{}'.format(s)
            mol2name = os.path.join(outdir, '{}.mol2'.format(name))
            mol.write(mol2name, (mol.atomtype != '') & (mol.segid == s))
            if not os.path.isfile(mol2name):
                raise NameError('Could not write a mol2 file out of the given Molecule.')
            f.write('# Loading the rest of the system\n')
            f.write('{} = loadmol2 {}.mol2\n\n'.format(name, name))
            combstr += ' {}'.format(name)
        combstr += '}\n\n'
        f.write(combstr)

    # Write patches for disulfide bonds (only after ionizing)
    if not ionize:
        # TODO: Remove this once we deprecate the class
        from htmd.builder.builder import DisulfideBridge
        from htmd.molecule.molecule import UniqueResidueID
        if disulfide is not None and len(disulfide) != 0 and isinstance(disulfide[0], DisulfideBridge):
            newdisu = []
            for d in disulfide:
                r1 = UniqueResidueID.fromMolecule(mol, 'resid {} and segname {}'.format(d.resid1, d.segid1))
                r2 = UniqueResidueID.fromMolecule(mol, 'resid {} and segname {}'.format(d.resid2, d.segid2))
                newdisu.append([r1, r2])
            disulfide = newdisu
        # TODO: Remove up to here ----------------------

        if disulfide is not None and len(disulfide) != 0 and isinstance(disulfide[0][0], str):
            disulfide = convertDisulfide(mol, disulfide)

        if disulfide is None:
            logger.info('Detecting disulfide bonds.')
            disulfide = detectDisulfideBonds(mol)

        # Fix structure to match the disulfide patching
        if len(disulfide) != 0:
            torem = np.zeros(mol.numAtoms, dtype=bool)
            f.write('# Adding disulfide bonds\n')
            for d in disulfide:
                # Rename the residues to CYX if there is a disulfide bond
                atoms1 = d[0].selectAtoms(mol, indexes=False)
                atoms2 = d[1].selectAtoms(mol, indexes=False)
                mol.resname[atoms1] = 'CYX'
                mol.resname[atoms2] = 'CYX'
                # Remove (eventual) HG hydrogens on these CYS (from proteinPrepare)
                torem |= (atoms1 & (mol.name == 'HG')) | (atoms2 & (mol.name == 'HG'))
                # Convert to stupid amber residue numbering
                uqseqid = sequenceID((mol.resid, mol.insertion, mol.segid)) + mol.resid[0]
                uqres1 = int(np.unique(uqseqid[atoms1]))
                uqres2 = int(np.unique(uqseqid[atoms2]))
                f.write('bond mol.{}.SG mol.{}.SG\n'.format(uqres1, uqres2))
            f.write('\n')
            mol.remove(torem, _logger=False)

    f.write('# Writing out the results\n')
    f.write('saveamberparm mol ' + prefix + '.prmtop ' + prefix + '.crd\n')
    f.write('quit')
    f.close()

    # Printing and loading the PDB file. AMBER can work with a single PDB file if the segments are separate by TER
    logger.debug('Writing PDB file for input to tleap.')
    pdbname = os.path.join(outdir, 'input.pdb')

    # mol2 files have atomtype, here we only write parts not coming from mol2
    # We need to write the input.pdb at the end since we modify the resname for disulfide bridges in mol
    mol.write(pdbname, mol.atomtype == '')
    if not os.path.isfile(pdbname):
        raise NameError('Could not write a PDB file out of the given Molecule.')

    molbuilt = None
    if execute:
        # Source paths of extra dirs (our dirs, not amber default)
        htmdamberdir = os.path.abspath(os.path.join(home(), 'builder', 'amberfiles'))
        sourcepaths = [htmdamberdir]
        sourcepaths += [os.path.join(htmdamberdir, os.path.dirname(f))
                        for f in ff if os.path.isfile(os.path.join(htmdamberdir, f))]
        extrasource = []
        for p in sourcepaths:
            extrasource.append('-I')
            extrasource.append('{}'.format(p))
        logpath = os.path.abspath(os.path.join(outdir, 'log.txt'))
        logger.info('Starting the build.')
        currdir = os.getcwd()
        os.chdir(outdir)
        f = open(logpath, 'w')
        try:
            cmd = [tleap, '-f', './tleap.in']
            cmd[1:1] = extrasource
            call(cmd, stdout=f)
        except:
            raise NameError('tleap failed at execution')
        f.close()
        errors = _logParser(logpath)
        os.chdir(currdir)
        if errors:
            raise BuildError(errors + ['Check {} for further information on errors in building.'.format(logpath)])
        logger.info('Finished building.')

        if os.path.exists(os.path.join(outdir, 'structure.crd')) and \
                        os.path.getsize(os.path.join(outdir, 'structure.crd')) != 0 and \
                        os.path.getsize(os.path.join(outdir, 'structure.prmtop')) != 0:
            molbuilt = Molecule(os.path.join(outdir, 'structure.prmtop'))
            molbuilt.read(os.path.join(outdir, 'structure.crd'))
        else:
            raise BuildError('No structure pdb/prmtop file was generated. Check {} for errors in building.'.format(logpath))

        if ionize:
            shutil.move(os.path.join(outdir, 'structure.crd'), os.path.join(outdir, 'structure.noions.crd'))
            shutil.move(os.path.join(outdir, 'structure.prmtop'), os.path.join(outdir, 'structure.noions.prmtop'))
            totalcharge = np.sum(molbuilt.charge)
            nwater = np.sum(molbuilt.atomselect('water and noh'))
            anion, cation, anionatom, cationatom, nanion, ncation = ionizef(totalcharge, nwater, saltconc=saltconc,
                                                                            anion=saltanion, cation=saltcation)
            newmol = ionizePlace(mol, anion, cation, anionatom, cationatom, nanion, ncation)
            # Redo the whole build but now with ions included
            return build(newmol, ff=ff, topo=topo, param=param, prefix=prefix, outdir=outdir, caps={}, ionize=False,
                         execute=execute, saltconc=saltconc, disulfide=disulfide, tleap=tleap, atomtypes=atomtypes,
                         offlibraries=offlibraries)
    tmpbonds = molbuilt.bonds
    molbuilt.bonds = []  # Removing the bonds to speed up writing
    molbuilt.write(os.path.join(outdir, 'structure.pdb'))
    molbuilt.bonds = tmpbonds  # Restoring the bonds
    return molbuilt
Exemplo n.º 24
0
    def write(self, inputdir, outputdir):
        """ Writes the production protocol and files into a folder.

        Parameters
        ----------
        inputdir : str
            Path to a directory containing the files produced by a equilibration process.
        outputdir : str
            Directory where to write the production setup files.
        """
        self._findFiles(inputdir)
        self._amberFixes()

        self.acemd.temperature = self.temperature
        self.acemd.langevintemp = self.temperature

        from htmd.units import convert
        numsteps = convert(self.timeunits, 'timesteps', self.runtime, timestep=self.acemd.timestep)

        pdbfile = os.path.join(inputdir, self.acemd.coordinates)
        inmol = Molecule(pdbfile)

        if np.any(inmol.atomselect('lipids')) and not self.useconstantratio:
            logger.warning('Lipids detected in input structure. We highly recommend setting useconstantratio=True '
                           'for membrane simulations.')

        if self.fb_k > 0:  # use TCL only for flatbottom
            self.acemd.tclforces = 'on'
            tcl = list(self.acemd.TCL)
            tcl[0] = tcl[0].format(NUMSTEPS=numsteps, TEMPERATURE=self.temperature, KCONST=self.fb_k,
                                   REFINDEX=' '.join(map(str, inmol.get('index', self.fb_reference))),
                                   SELINDEX=' '.join(map(str, inmol.get('index', self.fb_selection))),
                                   BOX=' '.join(map(str, self.fb_box)))
            self.acemd.TCL = tcl[0] + tcl[1]
        else:
            self.acemd.TCL = 'set numsteps {NUMSTEPS}\n' \
                             'set temperature {TEMPERATURE}\n'.format(NUMSTEPS=numsteps, TEMPERATURE=self.temperature)

        if self.useconstraints:
            # Turn on constraints
            self.acemd.constraints = 'on'
            self.acemd.constraintscaling = 1.0
        else:
            if len(self.constraints) != 0:
                logger.warning('You have setup constraints to {} but constraints are turned off. '
                               'If you want to use constraints, define useconstraints=True'.format(self.constraints))

        if self.useconstantratio:
            self.acemd.useconstantratio = 'on'

        if self.adaptive:
            self.acemd.binvelocities = None

        self.acemd.setup(inputdir, outputdir, overwrite=True)

        # Adding constraints by writing them to the consref file
        if self.useconstraints:
            inconsreffile = os.path.join(inputdir, self.acemd.consref)
            consrefmol = Molecule(inconsreffile)
            consrefmol.set('occupancy', 0)
            consrefmol.set('beta', 0)
            if len(self.constraints) == 0:
                raise RuntimeError('You have set the production to use constraints (useconstraints=True), but have not '
                                   'defined any constraints (constraints={}).')
            else:
                for sel in self.constraints:
                    consrefmol.set('beta', self.constraints[sel], sel)
            outconsreffile = os.path.join(outputdir, self.acemd.consref)
            consrefmol.write(outconsreffile)
Exemplo n.º 25
0
    return angles, dihedrals


# A test method
if __name__ == "__main__":
    from htmd.molecule.molecule import Molecule
    from htmd.home import home
    import numpy as np
    from os import path
    import doctest
    #doctest.testmod(extraglobs={"mol" : Molecule("3PTB")})

    expectedTMscore = np.array([ 0.21418524,  0.2367377 ,  0.23433833,  0.21362964,  0.20935164,
        0.20279461,  0.27012895,  0.22675238,  0.21230793,  0.2372011 ])
    expectedRMSD = np.array([ 3.70322128,  3.43637027,  3.188193  ,  3.84455877,  3.53053882,
        3.46781854,  2.93777629,  2.97978692,  2.70792428,  2.63051318])

    mol = Molecule(path.join(home(), 'data', 'tmscore', 'filtered.pdb'))
    mol.read(path.join(home(), 'data', 'tmscore', 'traj.xtc'))
    ref = Molecule(path.join(home(), 'data', 'tmscore', 'ntl9_2hbb.pdb'))
    tmscore, rmsd = molTMscore(mol, ref, mol.atomselect('protein'), ref.atomselect('protein'))

    assert np.allclose(tmscore, expectedTMscore)
    assert np.allclose(rmsd, expectedRMSD)

    #
    # rhodopsin = Molecule('1F88')
    # d3r = Molecule('3PBL')
    # alnmol = sequenceStructureAlignment(rhodopsin, d3r)
Exemplo n.º 26
0
    def write(self, inputdir, outputdir):
        """ Writes the production protocol and files into a folder.

        Parameters
        ----------
        inputdir : str
            Path to a directory containing the files produced by a equilibration process.
        outputdir : str
            Directory where to write the production setup files.
        """
        from htmd.molecule.molecule import Molecule

        # Do version consistency check
        if (self._version == 2 and not isinstance(self.acemd, Acemd2)) and \
                (self._version == 3 and not isinstance(self.acemd, Acemd)):
            raise RuntimeError('Acemd object version ({}) inconsistent with protocol version at instantiation '
                               '({})'.format(type(self.acemd), self._version))

        self._findFiles(inputdir)
        self._amberFixes()

        if self._version == 2:
            self.acemd.temperature = str(self.temperature)
            self.acemd.langevintemp = str(self.temperature)
        elif self._version == 3:
            self.acemd.temperature = self.temperature
            self.acemd.thermostattemp = self.temperature

        from htmd.units import convert
        numsteps = convert(self.timeunits, 'timesteps', self.runtime, timestep=self.acemd.timestep)
        if self._version == 3:
            self.acemd.run = str(numsteps)

        pdbfile = os.path.join(inputdir, self.acemd.coordinates)
        inmol = Molecule(pdbfile)

        if np.any(inmol.atomselect('lipids')) and not self.useconstantratio:
            logger.warning('Lipids detected in input structure. We highly recommend setting useconstantratio=True '
                           'for membrane simulations.')

        if self._version == 2:
            if self.restraints:
                raise RuntimeWarning('restraints are only available on {}(_version=3)'.format(self.__class__.__name__))
            if self.fb_k > 0:  # use TCL only for flatbottom
                self.acemd.tclforces = 'on'
                if isinstance(self.acemd.TCL, tuple):
                    tcl = list(self.acemd.TCL)
                    tcl[0] = tcl[0].format(NUMSTEPS=numsteps, TEMPERATURE=self.temperature, KCONST=self.fb_k,
                                           REFINDEX=' '.join(map(str, inmol.get('index', self.fb_reference))),
                                           SELINDEX=' '.join(map(str, inmol.get('index', self.fb_selection))),
                                           BOX=' '.join(map(str, self.fb_box)))
                    self.acemd.TCL = tcl[0] + tcl[1]
                else:
                    logger.warning('{} default TCL was already formatted.'.format(self.__class__.__name__))
            else:
                self.acemd.TCL = 'set numsteps {NUMSTEPS}\n'.format(NUMSTEPS=numsteps)
            if self.useconstraints:
                # Turn on constraints
                self.acemd.constraints = 'on'
                self.acemd.constraintscaling = '1.0'
            else:
                if len(self.constraints) != 0:
                    logger.warning('You have setup constraints to {} but constraints are turned off. '
                                   'If you want to use constraints, define '
                                   'useconstraints=True'.format(self.constraints))
        elif self._version == 3:
            if self.restraints is not None:
                logger.info('Using user-provided restraints and ignoring constraints and fb_potential')
                self.acemd.restraints = self.restraints
            else:
                restraints = list()
                if self.fb_k > 0:
                    logger.warning('Converting fb_potential to restraints. This is a convenience '
                                   'functional conversion. We recommend start using restraints with '
                                   '{}(_version=3)'.format(self.__class__.__name__))
                    restraints += self._fb_potential2restraints(inputdir)
                if self.useconstraints:
                    logger.warning('Converting constraints to restraints. This is a convenience '
                                   'functional conversion. We recommend start using restraints with '
                                   '{}(_version=3)'.format(self.__class__.__name__))
                    restraints += self._constraints2restraints()
                else:
                    if len(self.constraints) != 0:
                        logger.warning('You have setup constraints to {} but constraints are turned off. '
                                       'If you want to use constraints, define '
                                       'useconstraints=True'.format(self.constraints))
                if len(restraints) != 0:
                    self.acemd.restraints = restraints

        if self.useconstantratio:
            self.acemd.useconstantratio = 'on'

        if self.adaptive:
            self.acemd.binvelocities = None

        self.acemd.setup(inputdir, outputdir, overwrite=True)

        if self._version == 2:
            # Adding constraints by writing them to the consref file
            if self.useconstraints:
                inconsreffile = os.path.join(inputdir, self.acemd.consref)
                consrefmol = Molecule(inconsreffile)
                consrefmol.set('occupancy', 0)
                consrefmol.set('beta', 0)
                if len(self.constraints) == 0:
                    raise RuntimeError('You have set the production to use constraints (useconstraints=True), but have '
                                       'not defined any constraints (constraints={}).')
                else:
                    for sel in self.constraints:
                        consrefmol.set('beta', self.constraints[sel], sel)
                outconsreffile = os.path.join(outputdir, self.acemd.consref)
                consrefmol.write(outconsreffile)
Exemplo n.º 27
0
    def write(self, inputdir, outputdir):
        """ Write the equilibration protocol

        Writes the equilibration protocol and files into a folder for execution
        using files inside the inputdir directory

        Parameters
        ----------
        inputdir : str
            Path to a directory containing the files produced by a build process.
        outputdir : str
            Directory where to write the equilibration setup files.

        Examples
        --------
        >>> md = Equilibration()
        >>> md.write('./build','./equil')
        """
        self._findFiles(inputdir)
        self._amberFixes()

        from htmd.units import convert
        numsteps = convert(self.timeunits,
                           'timesteps',
                           self.runtime,
                           timestep=self.acemd.timestep)

        pdbfile = os.path.join(inputdir, self.acemd.coordinates)
        inmol = Molecule(pdbfile)

        if np.any(inmol.atomselect('lipids')) and not self.useconstantratio:
            logger.warning(
                'Lipids detected in input structure. We highly recommend setting useconstantratio=True '
                'for membrane simulations.')

        if self.constraintsteps is None:
            constrsteps = int(numsteps / 2)
        else:
            constrsteps = int(self.constraintsteps)

        if isinstance(self.acemd.TCL, tuple):
            tcl = list(self.acemd.TCL)
            tcl[0] = tcl[0].format(
                NUMSTEPS=numsteps,
                KCONST=self.fb_k,
                REFINDEX=' '.join(
                    map(str, inmol.get('index', self.fb_reference))),
                SELINDEX=' '.join(
                    map(str, inmol.get('index', self.fb_selection))),
                BOX=' '.join(map(str, self.fb_box)),
                NVTSTEPS=self.nvtsteps,
                CONSTRAINTSTEPS=constrsteps,
                TEMPERATURE=self.temperature)
            self.acemd.TCL = tcl[0] + tcl[1]
        else:
            logger.warning('{} default TCL was already formatted.'.format(
                self.__class__.__name__))

        if self.acemd.celldimension is None and self.acemd.extendedsystem is None:
            coords = inmol.get('coords', sel='water')
            if coords.size == 0:  # It's a vacuum simulation
                coords = inmol.get('coords', sel='all')
                dim = np.max(coords, axis=0) - np.min(coords, axis=0)
                dim = dim + 12.
            else:
                dim = np.max(coords, axis=0) - np.min(coords, axis=0)
            self.acemd.celldimension = '{} {} {}'.format(
                dim[0], dim[1], dim[2])

        if self.useconstantratio:
            self.acemd.useconstantratio = 'on'

        self.acemd.setup(inputdir, outputdir, overwrite=True)

        # Adding constraints by writing them to the consref file
        inconsreffile = os.path.join(inputdir, self.acemd.consref)
        consrefmol = Molecule(inconsreffile)
        consrefmol.set('occupancy', 0)
        consrefmol.set('beta', 0)
        if len(self.constraints) == 0:
            raise RuntimeError(
                'You have not defined any constraints for the Equilibration (constraints={}).'
            )
        else:
            for sel in self.constraints:
                consrefmol.set('beta', self.constraints[sel], sel)
        outconsreffile = os.path.join(outputdir, self.acemd.consref)
        consrefmol.write(outconsreffile)
Exemplo n.º 28
0
    def write(self, inputdir, outputdir):
        """ Writes the production protocol and files into a folder.

        Parameters
        ----------
        inputdir : str
            Path to a directory containing the files produced by a equilibration process.
        outputdir : str
            Directory where to write the production setup files.
        """
        self._findFiles(inputdir)
        self._amberFixes()

        self.acemd.temperature = self.temperature
        self.acemd.langevintemp = self.temperature

        from htmd.units import convert
        numsteps = convert(self.timeunits,
                           'timesteps',
                           self.runtime,
                           timestep=self.acemd.timestep)

        pdbfile = os.path.join(inputdir, self.acemd.coordinates)
        inmol = Molecule(pdbfile)

        if np.any(inmol.atomselect('lipids')) and not self.useconstantratio:
            logger.warning(
                'Lipids detected in input structure. We highly recommend setting useconstantratio=True '
                'for membrane simulations.')

        if self.fb_k > 0:  # use TCL only for flatbottom
            self.acemd.tclforces = 'on'
            tcl = list(self.acemd.TCL)
            tcl[0] = tcl[0].format(
                NUMSTEPS=numsteps,
                TEMPERATURE=self.temperature,
                KCONST=self.fb_k,
                REFINDEX=' '.join(
                    map(str, inmol.get('index', self.fb_reference))),
                SELINDEX=' '.join(
                    map(str, inmol.get('index', self.fb_selection))),
                BOX=' '.join(map(str, self.fb_box)))
            self.acemd.TCL = tcl[0] + tcl[1]
        else:
            self.acemd.TCL = 'set numsteps {NUMSTEPS}\n' \
                             'set temperature {TEMPERATURE}\n'.format(NUMSTEPS=numsteps, TEMPERATURE=self.temperature)

        if self.useconstraints:
            # Turn on constraints
            self.acemd.constraints = 'on'
            self.acemd.constraintscaling = 1.0
        else:
            if len(self.constraints) != 0:
                logger.warning(
                    'You have setup constraints to {} but constraints are turned off. '
                    'If you want to use constraints, define useconstraints=True'
                    .format(self.constraints))

        if self.useconstantratio:
            self.acemd.useconstantratio = 'on'

        if self.adaptive:
            self.acemd.binvelocities = None

        self.acemd.setup(inputdir, outputdir, overwrite=True)

        # Adding constraints by writing them to the consref file
        if self.useconstraints:
            inconsreffile = os.path.join(inputdir, self.acemd.consref)
            consrefmol = Molecule(inconsreffile)
            consrefmol.set('occupancy', 0)
            consrefmol.set('beta', 0)
            if len(self.constraints) == 0:
                raise RuntimeError(
                    'You have set the production to use constraints (useconstraints=True), but have not '
                    'defined any constraints (constraints={}).')
            else:
                for sel in self.constraints:
                    consrefmol.set('beta', self.constraints[sel], sel)
            outconsreffile = os.path.join(outputdir, self.acemd.consref)
            consrefmol.write(outconsreffile)
Exemplo n.º 29
0
Arquivo: util.py Projeto: jeiros/htmd
    return angles, dihedrals


# A test method
if __name__ == "__main__":
    from htmd.molecule.molecule import Molecule
    from htmd.home import home
    import numpy as np
    from os import path
    import doctest
    #doctest.testmod(extraglobs={"mol" : Molecule("3PTB")})

    expectedTMscore = np.array([ 0.21418524,  0.2367377 ,  0.23433833,  0.21362964,  0.20935164,
        0.20279461,  0.27012895,  0.22675238,  0.21230793,  0.2372011 ])
    expectedRMSD = np.array([ 3.70322128,  3.43637027,  3.188193  ,  3.84455877,  3.53053882,
        3.46781854,  2.93777629,  2.97978692,  2.70792428,  2.63051318])

    mol = Molecule(path.join(home(), 'data', 'tmscore', 'filtered.pdb'))
    mol.read(path.join(home(), 'data', 'tmscore', 'traj.xtc'))
    ref = Molecule(path.join(home(), 'data', 'tmscore', 'ntl9_2hbb.pdb'))
    tmscore, rmsd = molTMscore(mol, ref, mol.atomselect('protein'), ref.atomselect('protein'))

    assert np.allclose(tmscore, expectedTMscore)
    assert np.allclose(rmsd, expectedRMSD)

    #
    # rhodopsin = Molecule('1F88')
    # d3r = Molecule('3PBL')
    # alnmol = sequenceStructureAlignment(rhodopsin, d3r)
Exemplo n.º 30
0
def build(mol,
          topo=None,
          param=None,
          prefix='structure',
          outdir='./',
          caps=None,
          ionize=True,
          saltconc=0,
          saltanion=None,
          saltcation=None,
          disulfide=None,
          patches=None,
          psfgen=None,
          execute=True):
    """ Builds a system for CHARMM

    Uses VMD and psfgen to build a system for CHARMM. Additionally it allows for ionization and adding of disulfide bridges.

    Parameters
    ----------
    mol : :class:`Molecule <htmd.molecule.molecule.Molecule>` object
        The Molecule object containing the system
    topo : list of str
        A list of topology `rtf` files. Default: ['top/top_all22star_prot.rtf', 'top/top_all36_lipid.rtf', 'top/top_water_ions.rtf']
    param : list of str
        A list of parameter `prm` files. Default: ['par/par_all22star_prot.prm', 'par/par_all36_lipid.prm', 'par/par_water_ions.prm']
    prefix : str
        The prefix for the generated pdb and psf files
    outdir : str
        The path to the output directory
    caps : dict
        A dictionary with keys segids and values lists of strings describing the caps of that segment.
        e.g. caps['P'] = ['first ACE', 'last CT3']. Default: will apply ACE and CT3 caps to proteins and none caps
        to the rest
    ionize : bool
        Enable or disable ionization
    saltconc : float
        Salt concentration (in Molar) to add to the system after neutralization.
    saltanion : {'CLA'}
        The anion type. Please use only CHARMM ion atom names.
    saltcation : {'SOD', 'MG', 'POT', 'CES', 'CAL', 'ZN2'}
        The cation type. Please use only CHARMM ion atom names.
    disulfide : list of :class:`DisulfideBridge <htmd.builder.builder.DisulfideBridge>` objects
        If None it will guess disulfide bonds. Otherwise provide a list of `DisulfideBridge` objects.
    patches : list of str
        Any further patches the user wants to apply
    psfgen : str
        Path to psfgen executable used to build for CHARMM
    execute : bool
        Disable building. Will only write out the input script needed by psfgen. Does not include ionization.

    Returns
    -------
    molbuilt : :class:`Molecule <htmd.molecule.molecule.Molecule>` object
        The built system in a Molecule object

    Example
    -------
    >>> charmm.listFiles()
    >>> topos  = ['top/top_all22star_prot.rtf', './benzamidine.rtf']
    >>> params = ['par/par_all22star_prot.prm', './benzamidine.prm']
    >>> molbuilt = charmm.build(mol, topo=topos, param=params, outdir='/tmp/build', saltconc=0.15)
    """
    mol = mol.copy()
    _missingSegID(mol)
    if psfgen is None:
        try:
            psfgen = shutil.which('psfgen', mode=os.X_OK)
        except:
            raise FileNotFoundError(
                'Could not find psfgen executable, or no execute permissions are given. '
                'Run `conda install psfgen`.')
    if not os.path.isdir(outdir):
        os.makedirs(outdir)
    _cleanOutDir(outdir)
    if topo is None:
        topo = _defaultTopo()
    if param is None:
        param = _defaultParam()
    if caps is None:
        caps = _defaultCaps(mol)

    #_missingChain(mol)
    #_checkProteinGaps(mol)
    if patches is None:
        patches = []
    if isinstance(patches, str):
        patches = [patches]
    # Find protonated residues and add patches for them
    patches += _protonationPatches(mol)

    f = open(path.join(outdir, 'build.vmd'), 'w')
    f.write('# psfgen file generated by charmm.build\n')
    f.write('package require psfgen;\n')
    f.write('psfcontext reset;\n\n')

    # Copying and printing out the topologies
    charmmdir = path.join(home(), 'builder', 'charmmfiles')
    for i in range(len(topo)):
        if topo[i][0] != '.' and path.isfile(path.join(charmmdir, topo[i])):
            topo[i] = path.join(charmmdir, topo[i])
        localname = '{}.'.format(i) + path.basename(topo[i])
        shutil.copy(topo[i], path.join(outdir, localname))
        f.write('topology ' + localname + '\n')
    f.write('\n')

    _printAliases(f)

    # Printing out segments
    logger.info('Writing out segments.')
    segments = _getSegments(mol)
    for seg in segments:
        pdbname = 'segment' + seg + '.pdb'
        mol.write(path.join(outdir, pdbname), sel='segid ' + seg)

        segatoms = mol.atomselect('segid {}'.format(seg))
        segwater = mol.atomselect('segid {} and water'.format(seg))

        f.write('segment ' + seg + ' {\n')
        if np.all(segatoms ==
                  segwater):  # If segment only contains waters, set: auto none
            f.write('\tauto none\n')
        f.write('\tpdb ' + pdbname + '\n')
        if caps is not None and seg in caps:
            for c in caps[seg]:
                f.write('\t' + c + '\n')
        f.write('}\n')
        f.write('coordpdb ' + pdbname + ' ' + seg + '\n\n')

    # Printing out patches for the disulfide bridges
    if disulfide is None:
        disulfide = detectDisulfideBonds(mol)

    if len(disulfide) != 0:
        for d in disulfide:
            f.write('patch DISU {}:{} {}:{}\n'.format(d.segid1, d.resid1,
                                                      d.segid2, d.resid2))
        f.write('\n')

    # Printing out extra patches
    if len(patches) != 0:
        for p in patches:
            f.write(p + '\n')
        f.write('\n')

    f.write('guesscoord\n')
    f.write('writepsf ' + prefix + '.psf\n')
    f.write('writepdb ' + prefix + '.pdb\n')
    #f.write('quit\n')
    f.close()

    if param is not None:
        _charmmCombine(param, path.join(outdir, 'parameters'))

    molbuilt = None
    if execute:
        logpath = os.path.abspath('{}/log.txt'.format(outdir))
        logger.info('Starting the build.')
        currdir = os.getcwd()
        os.chdir(outdir)
        f = open(logpath, 'w')
        #call([vmd, '-dispdev', 'text', '-e', './build.vmd'], stdout=f)
        call([psfgen, './build.vmd'], stdout=f)
        f.close()
        _logParser(logpath)
        os.chdir(currdir)
        logger.info('Finished building.')

        if path.isfile(path.join(outdir, 'structure.pdb')) and path.isfile(
                path.join(outdir, 'structure.psf')):
            molbuilt = Molecule(path.join(outdir, 'structure.pdb'))
            molbuilt.read(path.join(outdir, 'structure.psf'))
        else:
            raise NameError(
                'No structure pdb/psf file was generated. Check {} for errors in building.'
                .format(logpath))

        if ionize:
            shutil.move(path.join(outdir, 'structure.pdb'),
                        path.join(outdir, 'structure.noions.pdb'))
            shutil.move(path.join(outdir, 'structure.psf'),
                        path.join(outdir, 'structure.noions.psf'))
            totalcharge = np.sum(molbuilt.charge)
            nwater = np.sum(molbuilt.atomselect('water and noh'))
            anion, cation, anionatom, cationatom, nanion, ncation = ionizef(
                totalcharge,
                nwater,
                saltconc=saltconc,
                ff='charmm',
                anion=saltanion,
                cation=saltcation)
            newmol = ionizePlace(mol, anion, cation, anionatom, cationatom,
                                 nanion, ncation)
            # Redo the whole build but now with ions included
            return build(newmol,
                         topo=topo,
                         param=param,
                         prefix=prefix,
                         outdir=outdir,
                         ionize=False,
                         caps=caps,
                         execute=execute,
                         saltconc=saltconc,
                         disulfide=disulfide,
                         patches=patches,
                         psfgen=psfgen)
    return molbuilt
Exemplo n.º 31
0
class MutualInformation:
    def __init__(self, model, mol=None, fstep=0.1, skip=1):
        """ Class that calculates the mutual information of protein residues.

        Parameters
        ----------
        model : :class:`Model <htmd.model.Model>` object
            A Model object with a calculated MSM
        mol : :class:`Molecule <htmd.molecule.molecule.Molecule>` object
            A reference molecule from which to obtain structural information. By default model.data.simlist[0].molfile
            will be used.
        fstep : float
            The frame step of the simulations
        skip : int
            Frame skipping

        Examples
        --------
        >>> from htmd.mutualinformation import MutualInformation
        >>> from htmd.ui import *
        >>>
        >>> sims = simlist(glob('./filtered/*/'), './filtered/filtered.pdb')
        >>> mol = Molecule('./filtered/filtered.pdb')
        >>>
        >>> metr = Metric(sims)
        >>> metr.set(MetricDihedral())
        >>> datadih = metr.project()
        >>> datadih.fstep = 0.1
        >>> datadih.dropTraj()
        >>>
        >>> metr = Metric(datadih.simlist)
        >>> metr.set(MetricSelfDistance('protein and name CA', metric='contacts'))
        >>> dataco = metr.project()
        >>> dataco.fstep = 0.1
        >>> dataco.dropTraj()
        >>>
        >>> tica = TICA(datadih, 20, units='ns')
        >>> datatica = tica.project(3)
        >>> datatica.cluster(MiniBatchKMeans(n_clusters=1500))
        >>> model = Model(datatica)
        >>> model.markovModel(12, 4, units='ns')
        >>>
        >>> mu = MutualInformation(model)
        >>> mu.calculate()
        >>> mu.saveMI('./mi_matrix.npy')
        >>> mu.weightGraph(dataco, 0.005)
        >>> mu.save_graphml('./weightgraph.graphml')
        """
        self.model = model

        self.mol = mol
        if mol is None:
            self.mol = Molecule(self.model.data.simlist[0].molfile)

        self._computeChiDihedrals(fstep=fstep, skip=skip)

        self.bindihcat = self._histogram()  # Lasts two minutes
        self.resids = self.mol.get('resid', 'name CA')
        self.residmap = np.ones(self.mol.resid.max() + 1, dtype=int) * -1
        self.residmap[self.resids] = np.arange(len(self.resids))

        self.mi_matrix = None
        self.graph_array = None
        self.graph = None


    def calculate(self):
        from htmd.config import _config
        from htmd.parallelprogress import ParallelExecutor
        numchi = self.chi.numDimensions
        statdist = self.model.msm.stationary_distribution
        stconcat = np.concatenate(self.model.data.St)
        microcat = self.model.micro_ofcluster[stconcat]

        aprun = ParallelExecutor(n_jobs=_config['ncpus'])
        res = aprun(total=numchi, desc='Calculating MI')(delayed(self._parallelAll)(numchi, dih1, 4, self.model.micronum, self.bindihcat, microcat, statdist) for dih1 in range(numchi))
        MI_all = np.zeros((len(self.resids), len(self.resids)))
        for r in res:
            dihcounts = r[0]
            pairs = r[1]
            for dihc, p in zip(dihcounts, pairs):
                dih1, dih2 = p
                if dih1 == dih2:
                    continue
                resid1 = self.residmap[self.mol.resid[self.chi.description.atomIndexes[dih1][0]]]
                resid2 = self.residmap[self.mol.resid[self.chi.description.atomIndexes[dih2][0]]]
                MI_all[resid1][resid2] = self._calcMutualInfo(dihc)
        self.mi_matrix = self._cleanautocorrelations(MI_all)

    def _computeChiDihedrals(self, fstep=0.1, skip=1):
        chis = []
        protmol = self.mol.copy()
        protmol.filter('protein')
        caidx = self.mol.atomselect('protein and name CA')
        resids = self.mol.resid[caidx]
        resnames = self.mol.resname[caidx]
        for residue, resname in zip(resids, resnames):
            ch = Dihedral.chi1(protmol, residue)
            if ch is not None:
                chis.append(ch)

        metr = Metric(self.model.data.simlist, skip=skip)
        metr.set(MetricDihedral(chis, sincos=False))
        data = metr.project()
        data.fstep = fstep
        self.chi = data

    def _histogram(self):
        condata = np.concatenate(self.chi.dat)
        bins = np.array([-180, -150, -90, -30, 0, 30, 90, 150, 180])
        dic = {1: 3, 2: 0, 3: 1, 4: 0, 5: 0, 6: 2, 7: 0, 8: 3, 9:3}
        binneddih = np.zeros([condata.shape[0], condata.shape[1]])
        for dihedral in range(condata.shape[1]):
            binning = np.digitize(condata[:, dihedral], bins)
            binneddih[:, dihedral] = [dic[n] if n in dic.keys() else n for n in binning]
        return binneddih

    def _calcMutualInfo(self, contingency):
        # Ripped out of sklearn since it converts floats to integers without warning which breaks our use-case
        from math import log
        nzx, nzy = np.nonzero(contingency)
        nz_val = contingency[nzx, nzy]
        contingency_sum = contingency.sum()
        pi = np.ravel(contingency.sum(axis=1))
        pj = np.ravel(contingency.sum(axis=0))
        log_contingency_nm = np.log(nz_val)
        contingency_nm = nz_val / contingency_sum
        # Don't need to calculate the full outer product, just for non-zeroes
        outer = pi.take(nzx) * pj.take(nzy)
        log_outer = -np.log(outer) + log(pi.sum()) + log(pj.sum())
        mi = (contingency_nm * (log_contingency_nm - log(contingency_sum)) +
              contingency_nm * log_outer)
        return mi.sum()

    def _parallelAll(self, numdih, dih1, numbins, micronum, bindihcat, microcat, stat_dist):
        results = []
        resultpairs = []
        for dih2 in range(dih1, numdih):
            dihcounts = np.zeros((numbins, numbins, micronum))

            # Find pairs of dihedrals (keys) and which absolute frames they occur in (vals)
            df = pd.DataFrame({'a': list(zip(bindihcat[:, dih1], bindihcat[:, dih2]))})
            gg = df.groupby(by=df.a).groups

            for pair in gg:
                microsofpairs = microcat[gg[pair]]  # Get the microstates of all frames having given dihedral pair
                microsofpairs = np.delete(microsofpairs, np.where(microsofpairs == -1)[0])  # Delete dropped clusters
                counts = np.bincount(microsofpairs)  # Count number of frames with that pair for each microstate (0, max_micro_seen)
                dihcounts[int(pair[0]), int(pair[1]), :len(counts)] += counts  # Add the counts
            dihcounts /= dihcounts.sum(axis=0).sum(axis=0)  # Normalize all slices by total counts in each microstate
            dihcounts *= stat_dist  # Multiply by stationary distribution of each state
            dihcounts = np.sum(dihcounts, axis=2)  # Calculate the weighted sum over all states
            results.append(dihcounts)
            resultpairs.append((dih1, dih2))

        return results, resultpairs

    def _cleanautocorrelations(self, mi):
        # TODO: vectorize this
        # np.diag(np.ones(3), k=-1).astype(bool) | np.diag(np.ones(3), k=1).astype(bool) | np.diag(np.ones(4)).astype(bool)
        for i in range(mi.shape[0]):
            for j in range(mi.shape[1]):
                if abs(i - j) < 2:
                    mi[i][j] = 0
        return mi

    def saveMI(self, path):
        np.save(path, self.mi_matrix)

    def loadMI(self, path):
        self.mi_matrix = np.load(path)

    def weightGraph(self, datacontacts, mi_threshold, time_treshold=0.6):
        if len(self.mol.get('resid', 'name CA')) != len(self.resids):
            raise Exception('The length of the protein doesn\'t match the Mutual Information data')
        contactcat = np.concatenate(datacontacts.dat)
        contacts_matrix = np.zeros([len(self.resids), len(self.resids)])
        for i in range(contactcat.shape[1]):
            counter = np.count_nonzero(contactcat[:, i])
            resid1 = self.residmap[self.mol.resid[datacontacts.description.atomIndexes[i][0]]]
            resid2 = self.residmap[self.mol.resid[datacontacts.description.atomIndexes[i][1]]]
            contacts_matrix[resid1][resid2] = counter

        self.graph_array = np.zeros([contacts_matrix.shape[0], contacts_matrix.shape[0]])
        mask = (self.mi_matrix > mi_threshold) & (contacts_matrix > (time_treshold * contactcat.shape[0]))
        self.graph_array[mask] = self.mi_matrix[mask]

        intermed = []
        for source in range(self.graph_array.shape[0]):
            for target in range(source, self.graph_array.shape[1]):
                if self.graph_array[source, target] != 0 and target > source:
                    intermed.append(
                        [int(self.resids[source]), int(self.resids[target]), float(self.graph_array[source, target])])
        import pandas as pd
        import networkx as nx
        from sklearn.cluster.spectral import SpectralClustering

        pd = pd.DataFrame(intermed, columns=['source', 'target', 'weight'])
        pd[['source', 'target']] = pd[['source', 'target']].astype(type('int', (int,), {}))
        pd['weight'] = pd['weight'].astype(type('float', (float,), {}))
        G = nx.from_pandas_dataframe(pd, 'source', 'target', ['weight'])
        ## setSegment
        segids = self.mol.get('segid', 'name CA')
        seg_res_dict = {key: value for (key, value) in zip(self.resids, segids) if
                        np.any(pd.loc[(pd['source'] == key)].index) or np.any(pd.loc[(pd['target'] == key)].index)}
        nx.set_node_attributes(G, 'Segment', seg_res_dict)
        ## set
        if not nx.is_connected(G):
            G = max(nx.connected_component_subgraphs(G), key=len)
        flow_cent = nx.current_flow_betweenness_centrality(G, weight='weight')
        nx.set_node_attributes(G, 'flowcent', flow_cent)
        Spectre = SpectralClustering(n_clusters=10, affinity='precomputed')
        model = Spectre.fit_predict(self.graph_array)
        model = model.astype(type('float', (float,), {}))
        spectral_dict = {key: value for (key, value) in zip(self.resids, model) if key in G.nodes()}
        nx.set_node_attributes(G, 'spectral', spectral_dict)
        self.graph = G

    def save_graphml(self, path):
        import networkx as nx
        nx.write_graphml(self.graph, path)

    # def save_pdf(self, graphpath, outpath):
    #     print(self.graph_array)
    #     self.save_graphml(graphpath)
    #     compile_java('./GephiGraph.java')
    #     compile_java('./GraphTest.java')
    #     execute_java('GraphTest', graphpath, outpath)

# if __name__ == '__main__':
#     from htmd.mutualinformation import MutualInformation
#     from htmd import *
#
#     sims = simlist(glob('/shared/adria/2ov5/adaptive_amber/batches/1/filtered/*/'),
#                    '/shared/adria/2ov5/adaptive_amber/batches/1/filtered/filtered.pdb')
#     mol = Molecule('/shared/adria/2ov5/adaptive_amber/batches/1/filtered/filtered.pdb')
#
#     metr = Metric(sims[0:100])
#     metr.set(MetricDihedral())
#     datadih = metr.project()
#     datadih.fstep = 0.1
#     datadih.dropTraj()
#
#     metr = Metric(datadih.simlist)
#     metr.set(MetricSelfDistance('protein and name CA', metric='contacts', threshold=8))
#     dataco = metr.project()
#     dataco.fstep = 0.1
#     dataco.dropTraj()
#
#     tica = TICA(datadih, 20, units='ns')
#     datatica = tica.project(3)
#     datatica.cluster(MiniBatchKMeans(n_clusters=1500))
#     model = Model(datatica)
#     model.markovModel(12, 4, units='ns')
#
#     mu = MutualInformation(model)
#     mu.calculate()
#     mu.saveMI('/tmp/mi_matrix.npy')
#     mu.weightGraph(dataco, 0.005)
#     mu.save_graphml('/tmp/weightgraph_0-005.graphml')
Exemplo n.º 32
0
def build(mol,
          topo=None,
          param=None,
          stream=None,
          prefix='structure',
          outdir='./build',
          caps=None,
          ionize=True,
          saltconc=0,
          saltanion=None,
          saltcation=None,
          disulfide=None,
          patches=None,
          noregen=None,
          psfgen=None,
          execute=True,
          _clean=True):
    """ Builds a system for CHARMM

    Uses VMD and psfgen to build a system for CHARMM. Additionally it allows for ionization and adding of disulfide bridges.

    Parameters
    ----------
    mol : :class:`Molecule <htmd.molecule.molecule.Molecule>` object
        The Molecule object containing the system
    topo : list of str
        A list of topology `rtf` files.
        Use :func:`charmm.listFiles <htmd.builder.charmm.listFiles>` to get a list of available topology files.
        Default: ['top/top_all36_prot.rtf', 'top/top_all36_lipid.rtf', 'top/top_water_ions.rtf']
    param : list of str
        A list of parameter `prm` files.
        Use :func:`charmm.listFiles <htmd.builder.charmm.listFiles>` to get a list of available parameter files.
        Default: ['par/par_all36_prot_mod.prm', 'par/par_all36_lipid.prm', 'par/par_water_ions.prm']
    stream : list of str
        A list of stream `str` files containing topologies and parameters.
        Use :func:`charmm.listFiles <htmd.builder.charmm.listFiles>` to get a list of available stream files.
        Default: ['str/prot/toppar_all36_prot_arg0.str']
    prefix : str
        The prefix for the generated pdb and psf files
    outdir : str
        The path to the output directory
        Default: './build'
    caps : dict
        A dictionary with keys segids and values lists of strings describing the caps of that segment.
        e.g. caps['P'] = ['first ACE', 'last CT3'] or caps['P'] = ['first none', 'last none']. 
        Default: will apply ACE and CT3 caps to proteins and none caps to the rest.
    ionize : bool
        Enable or disable ionization
    saltconc : float
        Salt concentration (in Molar) to add to the system after neutralization.
    saltanion : {'CLA'}
        The anion type. Please use only CHARMM ion atom names.
    saltcation : {'SOD', 'MG', 'POT', 'CES', 'CAL', 'ZN2'}
        The cation type. Please use only CHARMM ion atom names.
    disulfide : list of :class:`DisulfideBridge <htmd.builder.builder.DisulfideBridge>` objects
        If None it will guess disulfide bonds. Otherwise provide a list of `DisulfideBridge` objects.
    patches : list of str
        Any further patches the user wants to apply
    noregen : list of str
        A list of patches that must not be regenerated (angles and dihedrals)
        Default: ['FHEM', 'PHEM', 'PLOH', 'PLO2', 'PLIG', 'PSUL']
    psfgen : str
        Path to psfgen executable used to build for CHARMM
    execute : bool
        Disable building. Will only write out the input script needed by psfgen. Does not include ionization.

    Returns
    -------
    molbuilt : :class:`Molecule <htmd.molecule.molecule.Molecule>` object
        The built system in a Molecule object

    Example
    -------
    >>> from htmd import *
    >>> mol = Molecule("3PTB")
    >>> mol.filter("not resname BEN")
    >>> mol.renumberResidues()
    >>> molbuilt = charmm.build(mol, outdir='/tmp/build', ionize=False)  # doctest: +ELLIPSIS
    Bond between A: [serial 185 resid 42 resname CYS chain A segid 0]
                 B: [serial 298 resid 58 resname CYS chain A segid 0]...
    >>> # More complex example
    >>> topos  = ['top/top_all36_prot.rtf', './benzamidine.rtf', 'top/top_water_ions.rtf']
    >>> params = ['par/par_all36_prot_mod.prm', './benzamidine.prm', 'par/par_water_ions.prm']
    >>> disu = [DisulfideBridge('P', 157, 'P', 13), DisulfideBridge('K', 1, 'K', 25)]
    >>> molbuilt = charmm.build(mol, topo=topos, param=params, outdir='/tmp/build', saltconc=0.15, disulfide=disu)  # doctest: +SKIP
    """

    mol = mol.copy()
    _missingSegID(mol)
    _checkMixedSegment(mol)
    _checkResidueInsertions(mol)
    if psfgen is None:
        psfgen = shutil.which('psfgen', mode=os.X_OK)
        if not psfgen:
            raise FileNotFoundError(
                'Could not find psfgen executable, or no execute permissions are given. '
                'Run `conda install psfgen`.')
    if not os.path.isdir(outdir):
        os.makedirs(outdir)
    if _clean:
        _cleanOutDir(outdir)
    if topo is None:
        topo = defaultTopo()
    if param is None:
        param = defaultParam()
    if stream is None:
        stream = defaultStream()
    if caps is None:
        caps = _defaultCaps(mol)
    # patches that must _not_ be regenerated
    if noregen is None:
        noregen = ['FHEM', 'PHEM', 'PLOH', 'PLO2', 'PLIG', 'PSUL']

    alltopo = topo.copy()
    allparam = param.copy()

    # Splitting the stream files and adding them to the list of parameter and topology files
    charmmdir = path.join(home(), 'builder', 'charmmfiles')
    for s in stream:
        if s[0] != '.' and path.isfile(path.join(charmmdir, s)):
            s = path.join(charmmdir, s)
        outrtf, outprm = _prepareStream(s)
        alltopo.append(outrtf)
        allparam.append(outprm)

    #_missingChain(mol)
    #_checkProteinGaps(mol)
    if patches is None:
        patches = []
    if isinstance(patches, str):
        patches = [patches]
    allpatches = []
    allpatches += patches
    # Find protonated residues and add patches for them
    allpatches += _protonationPatches(mol)

    f = open(path.join(outdir, 'build.vmd'), 'w')
    f.write('# psfgen file generated by charmm.build\n')
    f.write('package require psfgen;\n')
    f.write('psfcontext reset;\n\n')

    # Copying and printing out the topologies
    if not path.exists(path.join(outdir, 'topologies')):
        os.makedirs(path.join(outdir, 'topologies'))
    for i in range(len(alltopo)):
        if alltopo[i][0] != '.' and path.isfile(
                path.join(charmmdir, alltopo[i])):
            alltopo[i] = path.join(charmmdir, alltopo[i])
        localname = '{}.'.format(i) + path.basename(alltopo[i])
        shutil.copy(alltopo[i], path.join(outdir, 'topologies', localname))
        f.write('topology ' + path.join('topologies', localname) + '\n')
    f.write('\n')

    _printAliases(f)

    # Printing out segments
    if not path.exists(path.join(outdir, 'segments')):
        os.makedirs(path.join(outdir, 'segments'))
    logger.info('Writing out segments.')
    segments = _getSegments(mol)
    wateratoms = mol.atomselect('water')
    for seg in segments:
        pdbname = 'segment' + seg + '.pdb'
        segatoms = mol.segid == seg
        mol.write(path.join(outdir, 'segments', pdbname), sel=segatoms)

        segwater = wateratoms & segatoms
        f.write('segment ' + seg + ' {\n')
        if np.all(segatoms ==
                  segwater):  # If segment only contains waters, set: auto none
            f.write('\tauto none\n')
        f.write('\tpdb ' + path.join('segments', pdbname) + '\n')
        if caps is not None and seg in caps:
            for c in caps[seg]:
                f.write('\t' + c + '\n')
        f.write('}\n')
        f.write('coordpdb ' + path.join('segments', pdbname) + ' ' + seg +
                '\n\n')

    # Printing out patches for the disulfide bridges
    if disulfide is None:
        disulfide = detectDisulfideBonds(mol)

    if len(disulfide) != 0:
        for d in disulfide:
            f.write('patch DISU {}:{} {}:{}\n'.format(d.segid1, d.resid1,
                                                      d.segid2, d.resid2))
        f.write('\n')

    noregenpatches = [p for p in allpatches if p.split()[1] in noregen]
    regenpatches = [p for p in allpatches if p.split()[1] not in noregen]

    # Printing regenerable patches
    if len(regenpatches) != 0:
        for p in regenpatches:
            f.write(p + '\n')
        f.write('\n')

    # Regenerate angles and dihedrals
    f.write('regenerate angles dihedrals\n')
    f.write('\n')

    # Printing non-regenerable patches
    if len(noregenpatches) != 0:
        for p in noregenpatches:
            f.write(p + '\n')
        f.write('\n')

    f.write('guesscoord\n')
    f.write('writepsf ' + prefix + '.psf\n')
    f.write('writepdb ' + prefix + '.pdb\n')
    #f.write('quit\n')
    f.close()

    if allparam is not None:
        combine(allparam, path.join(outdir, 'parameters'))

    molbuilt = None
    if execute:
        logpath = os.path.abspath('{}/log.txt'.format(outdir))
        logger.info('Starting the build.')
        currdir = os.getcwd()
        os.chdir(outdir)
        f = open(logpath, 'w')
        #call([vmd, '-dispdev', 'text', '-e', './build.vmd'], stdout=f)
        call([psfgen, './build.vmd'], stdout=f)
        f.close()
        errors = _logParser(logpath)
        os.chdir(currdir)
        if errors:
            raise BuildError(errors + [
                'Check {} for further information on errors in building.'.
                format(logpath)
            ])
        logger.info('Finished building.')

        if path.isfile(path.join(outdir, 'structure.pdb')) and path.isfile(
                path.join(outdir, 'structure.psf')):
            molbuilt = Molecule(path.join(outdir, 'structure.pdb'))
            molbuilt.read(path.join(outdir, 'structure.psf'))
        else:
            raise BuildError(
                'No structure pdb/psf file was generated. Check {} for errors in building.'
                .format(logpath))

        if ionize:
            os.makedirs(path.join(outdir, 'pre-ionize'))
            data = glob(path.join(outdir, '*'))
            for f in data:
                shutil.move(f, path.join(outdir, 'pre-ionize'))
            totalcharge = np.sum(molbuilt.charge)
            nwater = np.sum(molbuilt.atomselect('water and noh'))
            anion, cation, anionatom, cationatom, nanion, ncation = ionizef(
                totalcharge,
                nwater,
                saltconc=saltconc,
                anion=saltanion,
                cation=saltcation)
            newmol = ionizePlace(mol, anion, cation, anionatom, cationatom,
                                 nanion, ncation)
            # Redo the whole build but now with ions included
            return build(newmol,
                         topo=alltopo,
                         param=allparam,
                         stream=[],
                         prefix=prefix,
                         outdir=outdir,
                         ionize=False,
                         caps=caps,
                         execute=execute,
                         saltconc=saltconc,
                         disulfide=disulfide,
                         patches=patches,
                         noregen=noregen,
                         psfgen=psfgen,
                         _clean=False)
    _checkFailedAtoms(molbuilt)
    _recoverProtonations(molbuilt)
    return molbuilt
Exemplo n.º 33
0
def build(mol, topo=None, param=None, stream=None, prefix='structure', outdir='./', caps=None, ionize=True, saltconc=0,
          saltanion=None, saltcation=None, disulfide=None, patches=None, psfgen=None, execute=True):
    """ Builds a system for CHARMM

    Uses VMD and psfgen to build a system for CHARMM. Additionally it allows for ionization and adding of disulfide bridges.

    Parameters
    ----------
    mol : :class:`Molecule <htmd.molecule.molecule.Molecule>` object
        The Molecule object containing the system
    topo : list of str
        A list of topology `rtf` files.
        Use :func:`charmm.listFiles <htmd.builder.charmm.listFiles>` to get a list of available topology files.
        Default: ['top/top_all36_prot.rtf', 'top/top_all36_lipid.rtf', 'top/top_water_ions.rtf']
    param : list of str
        A list of parameter `prm` files.
        Use :func:`charmm.listFiles <htmd.builder.charmm.listFiles>` to get a list of available parameter files.
        Default: ['par/par_all36_prot_mod.prm', 'par/par_all36_lipid.prm', 'par/par_water_ions.prm']
    stream : list of str
        A list of stream `str` files containing topologies and parameters.
        Use :func:`charmm.listFiles <htmd.builder.charmm.listFiles>` to get a list of available stream files.
        Default: ['str/prot/toppar_all36_prot_arg0.str']
    prefix : str
        The prefix for the generated pdb and psf files
    outdir : str
        The path to the output directory
    caps : dict
        A dictionary with keys segids and values lists of strings describing the caps of that segment.
        e.g. caps['P'] = ['first ACE', 'last CT3']. Default: will apply ACE and CT3 caps to proteins and none caps
        to the rest
    ionize : bool
        Enable or disable ionization
    saltconc : float
        Salt concentration (in Molar) to add to the system after neutralization.
    saltanion : {'CLA'}
        The anion type. Please use only CHARMM ion atom names.
    saltcation : {'SOD', 'MG', 'POT', 'CES', 'CAL', 'ZN2'}
        The cation type. Please use only CHARMM ion atom names.
    disulfide : list of :class:`DisulfideBridge <htmd.builder.builder.DisulfideBridge>` objects
        If None it will guess disulfide bonds. Otherwise provide a list of `DisulfideBridge` objects.
    patches : list of str
        Any further patches the user wants to apply
    psfgen : str
        Path to psfgen executable used to build for CHARMM
    execute : bool
        Disable building. Will only write out the input script needed by psfgen. Does not include ionization.

    Returns
    -------
    molbuilt : :class:`Molecule <htmd.molecule.molecule.Molecule>` object
        The built system in a Molecule object

    Example
    -------
    >>> charmm.listFiles()
    >>> topos  = ['top/top_all36_prot.rtf', './benzamidine.rtf']
    >>> params = ['par/par_all36_prot_mod.prm', './benzamidine.prm']
    >>> molbuilt = charmm.build(mol, topo=topos, param=params, outdir='/tmp/build', saltconc=0.15)
    """
    mol = mol.copy()
    _missingSegID(mol)
    if psfgen is None:
        try:
            psfgen = shutil.which('psfgen', mode=os.X_OK)
        except:
            raise FileNotFoundError('Could not find psfgen executable, or no execute permissions are given. '
                                    'Run `conda install psfgen`.')
    if not os.path.isdir(outdir):
        os.makedirs(outdir)
    _cleanOutDir(outdir)
    if topo is None:
        topo = ['top/top_all36_prot.rtf', 'top/top_all36_lipid.rtf', 'top/top_water_ions.rtf']
    if param is None:
        param = ['par/par_all36_prot_mod.prm', 'par/par_all36_lipid.prm', 'par/par_water_ions.prm']
    if stream is None:
        stream = ['str/prot/toppar_all36_prot_arg0.str']
    if caps is None:
        caps = _defaultCaps(mol)

    # Splitting the stream files and adding them to the list of parameter and topology files
    charmmdir = path.join(home(), 'builder', 'charmmfiles')
    for s in stream:
        if s[0] != '.' and path.isfile(path.join(charmmdir, s)):
            s = path.join(charmmdir, s)
        outrtf, outprm = _prepareStream(s)
        topo.append(outrtf)
        param.append(outprm)

    #_missingChain(mol)
    #_checkProteinGaps(mol)
    if patches is None:
        patches = []
    if isinstance(patches, str):
        patches = [patches]
    # Find protonated residues and add patches for them
    patches += _protonationPatches(mol)

    f = open(path.join(outdir, 'build.vmd'), 'w')
    f.write('# psfgen file generated by charmm.build\n')
    f.write('package require psfgen;\n')
    f.write('psfcontext reset;\n\n')

    # Copying and printing out the topologies
    for i in range(len(topo)):
        if topo[i][0] != '.' and path.isfile(path.join(charmmdir, topo[i])):
            topo[i] = path.join(charmmdir, topo[i])
        localname = '{}.'.format(i) + path.basename(topo[i])
        shutil.copy(topo[i], path.join(outdir, localname))
        f.write('topology ' + localname + '\n')
    f.write('\n')

    _printAliases(f)

    # Printing out segments
    logger.info('Writing out segments.')
    segments = _getSegments(mol)
    for seg in segments:
        pdbname = 'segment' + seg + '.pdb'
        mol.write(path.join(outdir, pdbname), sel='segid '+seg)

        segatoms = mol.atomselect('segid {}'.format(seg))
        segwater = mol.atomselect('segid {} and water'.format(seg))

        f.write('segment ' + seg + ' {\n')
        if np.all(segatoms == segwater):  # If segment only contains waters, set: auto none
            f.write('\tauto none\n')
        f.write('\tpdb ' + pdbname + '\n')
        if caps is not None and seg in caps:
            for c in caps[seg]:
                f.write('\t' + c + '\n')
        f.write('}\n')
        f.write('coordpdb ' + pdbname + ' ' + seg + '\n\n')

    # Printing out patches for the disulfide bridges
    if disulfide is None:
        disulfide = detectDisulfideBonds(mol)

    if len(disulfide) != 0:
        for d in disulfide:
            f.write('patch DISU {}:{} {}:{}\n'.format(d.segid1, d.resid1, d.segid2, d.resid2))
        f.write('\n')

    # Printing out extra patches
    if len(patches) != 0:
        for p in patches:
            f.write(p + '\n')
        f.write('\n')

    f.write('guesscoord\n')
    f.write('writepsf ' + prefix + '.psf\n')
    f.write('writepdb ' + prefix + '.pdb\n')
    #f.write('quit\n')
    f.close()

    if param is not None:
        _charmmCombine(param, path.join(outdir, 'parameters'))

    molbuilt = None
    if execute:
        logpath = os.path.abspath('{}/log.txt'.format(outdir))
        logger.info('Starting the build.')
        currdir = os.getcwd()
        os.chdir(outdir)
        f = open(logpath, 'w')
        #call([vmd, '-dispdev', 'text', '-e', './build.vmd'], stdout=f)
        call([psfgen, './build.vmd'], stdout=f)
        f.close()
        _logParser(logpath)
        os.chdir(currdir)
        logger.info('Finished building.')

        if path.isfile(path.join(outdir, 'structure.pdb')) and path.isfile(path.join(outdir, 'structure.psf')):
            molbuilt = Molecule(path.join(outdir, 'structure.pdb'))
            molbuilt.read(path.join(outdir, 'structure.psf'))
        else:
            raise NameError('No structure pdb/psf file was generated. Check {} for errors in building.'.format(logpath))

        if ionize:
            shutil.move(path.join(outdir, 'structure.pdb'), path.join(outdir, 'structure.noions.pdb'))
            shutil.move(path.join(outdir, 'structure.psf'), path.join(outdir, 'structure.noions.psf'))
            totalcharge = np.sum(molbuilt.charge)
            nwater = np.sum(molbuilt.atomselect('water and noh'))
            anion, cation, anionatom, cationatom, nanion, ncation = ionizef(totalcharge, nwater, saltconc=saltconc, ff='charmm', anion=saltanion, cation=saltcation)
            newmol = ionizePlace(mol, anion, cation, anionatom, cationatom, nanion, ncation)
            # Redo the whole build but now with ions included
            return build(newmol, topo=topo, param=param, prefix=prefix, outdir=outdir, ionize=False, caps=caps,
                         execute=execute, saltconc=saltconc, disulfide=disulfide, patches=patches, psfgen=psfgen)
    _checkFailedAtoms(molbuilt)
    return molbuilt