Пример #1
0
def removeLipidsInProtein(prot, memb, lipidsel='lipids'):
    """ Calculates the convex hull of the protein. If a lipid lies inside the hull it gets removed.

    This does not work well for lipids crossing out of the hull. If even one atom of the lipid is outside it will
    change the hull and will not get removed. I assume it will get removed by the clashes with the protein though.
    """
    # TODO: Do the same with Morphological Snakes
    from scipy.spatial import ConvexHull
    memb = memb.copy()

    # Convex hull of the protein
    cacoords = prot.get('coords', 'name CA')
    hull = ConvexHull(cacoords)

    sequence = sequenceID((memb.resid, memb.segid))
    uqres = np.unique(sequence)

    toremove = np.zeros(len(sequence), dtype=bool)
    numlipsrem = 0
    for res in uqres:  # For each lipid check if it's atoms lie within the convex hull
        atoms = np.where(sequence == res)[0]
        newhull = ConvexHull(
            np.append(cacoords, np.squeeze(memb.coords[atoms, :, :]), axis=0))

        # If the hull didn't change by adding the lipid, it lies within convex hull. Remove it.
        if list(hull.vertices) == list(newhull.vertices):
            toremove[atoms] = True
            numlipsrem += 1

    lipids = memb.atomselect(lipidsel)  # Only remove lipids, waters are ok
    memb.remove(toremove & lipids)
    return memb, numlipsrem
Пример #2
0
    def _calcarrays(self, mol):
        mol = mol.copy()
        mol.filter(self.sel, _logger=False)

        def get_or_minus1(idx):
            if len(idx) != 0:
                return idx[0]
            else:
                return -1

        mol.resid = sequenceID((mol.resid, mol.chain, mol.insertion))

        ca_indices = mol.atomselect('name CA', indexes=True)
        chainids = mol.get('chain', sel=ca_indices)
        resnames = mol.get('resname', sel=ca_indices)

        uqchains = np.unique(chainids)
        chain_ids = np.zeros(len(chainids), dtype=np.int32)
        for i, uqc in enumerate(uqchains):
            idx = chainids == uqc
            chain_ids[idx] = i

        nco_indices = []
        uqresid = np.unique(mol.resid)
        for res in uqresid:
            nco_indices.append([get_or_minus1(mol.atomselect('resid {} and backbone and name "N.*"'.format(res), indexes=True)),
                                get_or_minus1(mol.atomselect('resid {} and backbone and name C'.format(res), indexes=True)),
                                get_or_minus1(mol.atomselect('resid {} and backbone and name "O.*"'.format(res), indexes=True))])
        nco_indices = np.array(nco_indices, np.int32)
        proline_indices = np.array(resnames == 'PRO', dtype=np.int32)
        ca_indices = np.array(ca_indices, dtype=np.int32)
        return ca_indices, nco_indices, proline_indices, chain_ids
Пример #3
0
    def _processTraj(self, mol):
        sel = self._getSelections(mol)

        mol = mol.copy()
        mol.filter(sel, _logger=False)

        # Ugly conversions to feed it to sensitive C code
        uqresid = np.unique(mol.get('resid'))
        numres = len(uqresid)
        resatmcount = np.bincount(mol.resid)
        resatmcount = resatmcount[uqresid]
        residstr = [str(x) for x in mol.resid]

        atomids = np.ones(mol.numAtoms, dtype=np.int32) * -1
        for i in range(numres):
            resatms = mol.resid == uqresid[i]
            atomids[resatms] = np.arange(np.sum(resatms))

        sslist = stride(mol.coords.ravel(order='F'),
                        np.array([numres], dtype=np.int32),
                        resatmcount.astype(np.int32),
                        np.array(sequenceID(mol.resid) - 1, dtype=np.int32),
                        np.zeros(mol.numAtoms, dtype=np.int32),
                        atomids.astype(np.int32), mol.chain[0], residstr,
                        mol.resname.tolist(), mol.name.tolist(), 1,
                        mol.numFrames, mol.numAtoms)

        ss = np.zeros(len(sslist), dtype=object)
        for i in range(len(sslist)):
            ss[i] = np.array(list(sslist[i]))

        if self._simple:
            return _ssmap(ss)
        else:
            return ss
Пример #4
0
    def _calcRadiiMapping(self, mol):
        sel = mol.atomselect(self._sel)

        _ATOMIC_RADII = {
            'C': 1.5,
            'F': 1.2,
            'H': 0.4,
            'N': 1.10,
            'O': 1.05,
            'S': 1.6,
            'P': 1.6
        }
        elements = [n[0] for n in mol.name[sel]]
        atom_radii = np.vectorize(_ATOMIC_RADII.__getitem__)(elements)
        radii = np.array(atom_radii, np.float32) + self._probeRadius

        if self._mode == 'atom':
            atom_mapping = np.arange(np.sum(sel), dtype=np.int32)
        elif self._mode == 'residue':
            atom_mapping = sequenceID((mol.resid[sel], mol.chain[sel],
                                       mol.segid[sel])).astype(np.int32)
        else:
            raise ValueError(
                'mode must be one of "residue", "atom". "{}" supplied'.format(
                    self._mode))

        return radii, atom_mapping, sel
Пример #5
0
def removeLipidsInProtein(prot, memb,lipidsel='lipids'):
    """ Calculates the convex hull of the protein. If a lipid lies inside the hull it gets removed.

    This does not work well for lipids crossing out of the hull. If even one atom of the lipid is outside it will
    change the hull and will not get removed. I assume it will get removed by the clashes with the protein though.
    """
    # TODO: Do the same with Morphological Snakes
    from scipy.spatial import ConvexHull
    memb = memb.copy()

    # Convex hull of the protein
    cacoords = prot.get('coords', 'name CA')
    hull = ConvexHull(cacoords)

    sequence = sequenceID((memb.resid, memb.segid))
    uqres = np.unique(sequence)

    toremove = np.zeros(len(sequence), dtype=bool)
    numlipsrem = 0
    for res in uqres:  # For each lipid check if it's atoms lie within the convex hull
        atoms = np.where(sequence == res)[0]
        newhull = ConvexHull(np.vstack((cacoords, memb.get('coords', sel=atoms))))

        # If the hull didn't change by adding the lipid, it lies within convex hull. Remove it.
        if list(hull.vertices) == list(newhull.vertices):
            toremove[atoms] = True
            numlipsrem += 1

    lipids = memb.atomselect(lipidsel)  # Only remove lipids, waters are ok
    memb.remove(toremove & lipids)
    return memb, numlipsrem
Пример #6
0
    def _calcarrays(self, mol):
        mol = mol.copy()
        mol.filter(self.sel, _logger=False)

        def get_or_minus1(idx):
            if len(idx) != 0:
                return idx[0]
            else:
                return -1

        mol.resid = sequenceID((mol.resid, mol.chain, mol.insertion))

        ca_indices = mol.atomselect('name CA', indexes=True)
        chainids = mol.get('chain', sel=ca_indices)
        resnames = mol.get('resname', sel=ca_indices)

        uqchains = np.unique(chainids)
        chain_ids = np.zeros(len(chainids), dtype=np.int32)
        for i, uqc in enumerate(uqchains):
            idx = chainids == uqc
            chain_ids[idx] = i

        nco_indices = []
        uqresid = np.unique(mol.resid)
        for res in uqresid:
            nco_indices.append([get_or_minus1(mol.atomselect('resid {} and backbone and name "N.*"'.format(res), indexes=True)),
                                get_or_minus1(mol.atomselect('resid {} and backbone and name C'.format(res), indexes=True)),
                                get_or_minus1(mol.atomselect('resid {} and backbone and name "O.*"'.format(res), indexes=True))])
        nco_indices = np.array(nco_indices, np.int32)
        proline_indices = np.array(resnames == 'PRO', dtype=np.int32)
        ca_indices = np.array(ca_indices, dtype=np.int32)
        return ca_indices, nco_indices, proline_indices, chain_ids
Пример #7
0
def tileMembrane(memb, xmin, ymin, xmax, ymax, buffer=1.5):
    """ Tile a membrane in the X and Y dimensions to reach a specific size.

    Parameters
    ----------
    memb
    xmin
    ymin
    xmax
    ymax
    buffer

    Returns
    -------
    megamemb :
        A big membrane Molecule
    """
    from htmd.progress.progress import ProgressBar
    memb = memb.copy()
    memb.resid = sequenceID(memb.resid)

    minmemb = np.min(memb.get('coords', 'water'), axis=0).flatten()

    size = np.max(memb.get('coords', 'water'), axis=0) - np.min(memb.get('coords', 'water'), axis=0)
    size = size.flatten()
    xreps = int(np.ceil((xmax - xmin) / size[0]))
    yreps = int(np.ceil((ymax - ymin) / size[1]))

    logger.info('Replicating Membrane {}x{}'.format(xreps, yreps))

    from htmd.molecule.molecule import Molecule
    megamemb = Molecule()
    bar = ProgressBar(xreps * yreps, description='Replicating Membrane')
    k = 0
    for x in range(xreps):
        for y in range(yreps):
            tmpmemb = memb.copy()
            xpos = xmin + x * (size[0] + buffer)
            ypos = ymin + y * (size[1] + buffer)

            tmpmemb.moveBy([-float(minmemb[0]) + xpos, -float(minmemb[1]) + ypos, 0])
            tmpmemb.remove('same resid as (x > {} or y > {})'.format(xmax, ymax), _logger=False)
            tmpmemb.set('segid', 'M{}'.format(k))

            megamemb.append(tmpmemb)
            k += 1
            bar.progress()
    bar.stop()
    # Membranes don't tile perfectly. Need to remove waters that clash with lipids of other tiles
    # Some clashes will still occur between periodic images however
    megamemb.remove('same fragment as water and within 1.5 of not water', _logger=False)
    return megamemb
Пример #8
0
def removeAtomsInHull(mol1, mol2, hullsel, removesel):
    """ Calculates the convex hull of an atom selection in mol1 and removes atoms within that hull in mol2.

    Parameters
    ----------
    mol1 : :class:`Molecule <htmd.molecule.molecule.Molecule>` object
        Molecule for which to calculate the convex hull
    mol2 : :class:`Molecule <htmd.molecule.molecule.Molecule>` object
        Molecule which contains the atoms which we check if they are within the hull
    hullsel : str
        Atom selection string for atoms in mol1 from which to calculate the convex hull.
        See more `here <http://www.ks.uiuc.edu/Research/vmd/vmd-1.9.2/ug/node89.html>`__
    removesel : str
        Atom selection string for atoms in mol2 from which to remove the ones which are within the hull.
        See more `here <http://www.ks.uiuc.edu/Research/vmd/vmd-1.9.2/ug/node89.html>`__

    Returns
    -------
    newmol2 : Molecule
        mol2 but without any atoms located within the convex hull
    numrem : int
        Number of fragments removed
    """
    # TODO: Look into Morphological Snakes
    from scipy.spatial import ConvexHull

    mol2 = mol2.copy()
    # Convex hull of the protein
    hullcoords = mol1.get('coords', hullsel)
    hull = ConvexHull(hullcoords)

    sequence = sequenceID((mol2.resid, mol2.segid))
    uqres = np.unique(sequence)

    toremove = np.zeros(len(sequence), dtype=bool)
    numlipsrem = 0
    for res in uqres:  # For each fragment check if it's atoms lie within the convex hull
        atoms = np.where(sequence == res)[0]
        newhull = ConvexHull(
            np.vstack((hullcoords, mol2.get('coords', sel=atoms))))

        # If the hull didn't change by adding the fragment, it lies within convex hull. Remove it.
        if list(hull.vertices) == list(newhull.vertices):
            toremove[atoms] = True
            numlipsrem += 1

    rematoms = mol2.atomselect(removesel)
    mol2.remove(toremove & rematoms)
    return mol2, numlipsrem
Пример #9
0
def embed(mol1, mol2, gap=1.3):
    '''Embeds one molecule into another removing overlaps.

    Will remove residues of mol1 which have collisions with atoms of mol2.

    Parameters
    ----------
    mol1 : :class:`Molecule <htmd.molecule.molecule.Molecule>` object
        The first Molecule object
    mol2 : :class:`Molecule <htmd.molecule.molecule.Molecule>` object
        The second Molecule object
    gap : float
        Minimum space in A between atoms of the two molecules

    Return
    ------
    newmol : :class:`Molecule <htmd.molecule.molecule.Molecule>` object
        The resulting Molecule object

    Example
    -------
    >>> all = embed(memb, prot)
    '''
    mol1 = mol1.copy()
    mol2 = mol2.copy()
    #Set different occupancy to separate atoms of mol1 and mol2
    occ1 = mol1.get('occupancy')
    occ2 = mol2.get('occupancy')
    mol1.set('occupancy', 1)
    mol2.set('occupancy', 2)

    mol2.append(mol1)
    s1 = mol2.atomselect('occupancy 1')
    s2 = mol2.atomselect('occupancy 2')
    # Give unique "residue" beta number to all resids
    beta = mol2.get('beta')
    mol2.set('beta', sequenceID(mol2.resid))
    # Calculate overlapping atoms
    overlaps = mol2.atomselect('(occupancy 2) and same beta as exwithin ' +
                               str(gap) + ' of (occupancy 1)')
    # Restore original beta and occupancy
    mol2.set('beta', beta)
    mol2.set('occupancy', occ1, s1)
    mol2.set('occupancy', occ2, s2)

    # Remove the overlaps
    mol2.remove(overlaps, _logger=False)
    return mol2
Пример #10
0
def removeAtomsInHull(mol1, mol2, hullsel, removesel):
    """ Calculates the convex hull of an atom selection in mol1 and removes atoms within that hull in mol2.

    Parameters
    ----------
    mol1 : :class:`Molecule <htmd.molecule.molecule.Molecule>` object
        Molecule for which to calculate the convex hull
    mol2 : :class:`Molecule <htmd.molecule.molecule.Molecule>` object
        Molecule which contains the atoms which we check if they are within the hull
    hullsel : str
        Atom selection string for atoms in mol1 from which to calculate the convex hull.
        See more `here <http://www.ks.uiuc.edu/Research/vmd/vmd-1.9.2/ug/node89.html>`__
    removesel : str
        Atom selection string for atoms in mol2 from which to remove the ones which are within the hull.
        See more `here <http://www.ks.uiuc.edu/Research/vmd/vmd-1.9.2/ug/node89.html>`__

    Returns
    -------
    newmol2 : Molecule
        mol2 but without any atoms located within the convex hull
    numrem : int
        Number of fragments removed
    """
    # TODO: Look into Morphological Snakes
    from scipy.spatial import ConvexHull

    mol2 = mol2.copy()
    # Convex hull of the protein
    hullcoords = mol1.get('coords', hullsel)
    hull = ConvexHull(hullcoords)

    sequence = sequenceID((mol2.resid, mol2.segid))
    uqres = np.unique(sequence)

    toremove = np.zeros(len(sequence), dtype=bool)
    numlipsrem = 0
    for res in uqres:  # For each fragment check if it's atoms lie within the convex hull
        atoms = np.where(sequence == res)[0]
        newhull = ConvexHull(np.vstack((hullcoords, mol2.get('coords', sel=atoms))))

        # If the hull didn't change by adding the fragment, it lies within convex hull. Remove it.
        if list(hull.vertices) == list(newhull.vertices):
            toremove[atoms] = True
            numlipsrem += 1

    rematoms = mol2.atomselect(removesel)
    mol2.remove(toremove & rematoms)
    return mol2, numlipsrem
Пример #11
0
    def _calcRadiiMapping(self, mol):
        sel = mol.atomselect(self._sel)

        _ATOMIC_RADII = {'C': 1.5, 'F': 1.2, 'H': 0.4, 'N': 1.10, 'O': 1.05, 'S': 1.6, 'P': 1.6}
        elements = [n[0] for n in mol.name[sel]]
        atom_radii = np.vectorize(_ATOMIC_RADII.__getitem__)(elements)
        radii = np.array(atom_radii, np.float32) + self._probeRadius

        if self._mode == 'atom':
            atom_mapping = np.arange(np.sum(sel), dtype=np.int32)
        elif self._mode == 'residue':
            atom_mapping = sequenceID((mol.resid[sel], mol.chain[sel], mol.segid[sel])).astype(np.int32)
        else:
            raise ValueError('mode must be one of "residue", "atom". "{}" supplied'.format(self._mode))

        return radii, atom_mapping, sel
Пример #12
0
def embed(mol1, mol2, gap=1.3):
    '''Embeds one molecule into another removing overlaps.

    Will remove residues of mol1 which have collisions with atoms of mol2.

    Parameters
    ----------
    mol1 : :class:`Molecule <htmd.molecule.molecule.Molecule>` object
        The first Molecule object
    mol2 : :class:`Molecule <htmd.molecule.molecule.Molecule>` object
        The second Molecule object
    gap : float
        Minimum space in A between atoms of the two molecules

    Return
    ------
    newmol : :class:`Molecule <htmd.molecule.molecule.Molecule>` object
        The resulting Molecule object

    Example
    -------
    >>> all = embed(memb, prot)
    '''
    mol1 = mol1.copy()
    mol2 = mol2.copy()
    #Set different occupancy to separate atoms of mol1 and mol2
    occ1 = mol1.get('occupancy')
    occ2 = mol2.get('occupancy')
    mol1.set('occupancy', 1)
    mol2.set('occupancy', 2)

    mol2.append(mol1)
    s1 = mol2.atomselect('occupancy 1')
    s2 = mol2.atomselect('occupancy 2')
    # Give unique "residue" beta number to all resids
    beta = mol2.get('beta')
    mol2.set('beta', sequenceID(mol2.resid))
    # Calculate overlapping atoms
    overlaps = mol2.atomselect('(occupancy 2) and same beta as exwithin '+ str(gap) + ' of (occupancy 1)')
    # Restore original beta and occupancy
    mol2.set('beta', beta)
    mol2.set('occupancy', occ1, s1)
    mol2.set('occupancy', occ2, s2)

    # Remove the overlaps
    mol2.remove(overlaps, _logger=False)
    return mol2
Пример #13
0
    def project(self, mol):
        """ Project molecule.

        Parameters
        ----------
        mol : :class:`Molecule <htmd.molecule.molecule.Molecule>`
            A :class:`Molecule <htmd.molecule.molecule.Molecule>` object to project.

        Returns
        -------
        data : np.ndarray
            An array containing the projected data.
        """
        coords = super().project(mol)

        # TODO: Could precalculate some stuff like this to speed it up
        resids = sequenceID(self._refmol.resid)

        if self._refpos == 'mean':
            refcoords = np.mean(coords, axis=0)
        elif self._refpos == 'refmol':
            refcoords = _MetricCoordinate(self._refmol,
                                          self._atomsel).project(self._refmol)
        else:
            raise RuntimeError('Wrong refpos option')

        mapping = super().getMapping(self._refmol)
        xyzgroups = mapping.groupby('atomIndexes').groups
        numatoms = len(xyzgroups)

        atomfluct = np.zeros((coords.shape[0], numatoms))
        squarediff = (coords - refcoords)**2
        atomresids = np.zeros(numatoms, dtype=int)
        for i, atom in enumerate(sorted(xyzgroups.values(),
                                        key=lambda x: x[0])):
            assert len(np.unique(mapping.atomIndexes[atom])) == 1
            atomfluct[:, i] = squarediff[:, atom].sum(axis=1)
            atomresids[i] = resids[int(mapping.atomIndexes[atom[0]])]

        if self._groupsel == 'residue':
            numres = len(np.unique(atomresids))
            meanresfluct = np.zeros((coords.shape[0], numres))
            for i, r in enumerate(np.unique(atomresids)):
                meanresfluct[:, i] = atomfluct[:, atomresids == r].mean(axis=1)
            return meanresfluct
        elif self._groupsel is None:
            return atomfluct
Пример #14
0
    def project(self, mol):
        """ Project molecule.

        Parameters
        ----------
        mol : :class:`Molecule <htmd.molecule.molecule.Molecule>`
            A :class:`Molecule <htmd.molecule.molecule.Molecule>` object to project.

        Returns
        -------
        data : np.ndarray
            An array containing the projected data.
        """
        coords = super().project(mol)

        # TODO: Could precalculate some stuff like this to speed it up
        resids = sequenceID(self._refmol.resid)

        if self._refpos == 'mean':
            refcoords = np.mean(coords, axis=0)
        elif self._refpos == 'refmol':
            refcoords = _MetricCoordinate(self._refmol, self._atomsel).project(self._refmol)
        else:
            raise RuntimeError('Wrong refpos option')

        mapping = super().getMapping(self._refmol)
        xyzgroups = mapping.groupby('atomIndexes').groups
        numatoms = len(xyzgroups)

        atomfluct = np.zeros((coords.shape[0], numatoms))
        squarediff = (coords - refcoords) ** 2
        atomresids = np.zeros(numatoms, dtype=int)
        for i, atom in enumerate(sorted(xyzgroups.values(), key=lambda x: x[0])):
            assert len(np.unique(mapping.atomIndexes[atom])) == 1
            atomfluct[:, i] = squarediff[:, atom].sum(axis=1)
            atomresids[i] = resids[int(mapping.atomIndexes[atom[0]])]

        if self._groupsel == 'residue':
            numres = len(np.unique(atomresids))
            meanresfluct = np.zeros((coords.shape[0], numres))
            for i, r in enumerate(np.unique(atomresids)):
                meanresfluct[:, i] = atomfluct[:, atomresids == r].mean(axis=1)
            return meanresfluct
        elif self._groupsel is None:
            return atomfluct
Пример #15
0
    def _viewStatesNGL(self, states, statetype, protein, ligand, mols, numsamples, gui=False):
        from htmd.molecule.util import sequenceID
        if states is None:
            states = range(self.macronum)
        if isinstance(states, int):
            states = [states]
        if mols is None:
            mols = self.getStates(states, statetype, numsamples=min(numsamples, 15))
        colors = [0, 1, 3, 4, 5, 6, 7, 9]
        hexcolors = {0: '#0000ff', 1: '#ff0000', 2: '#333333', 3: '#ff6600', 4: '#ffff00', 5: '#4c4d00', 6: '#b2b2cc',
                     7: '#33cc33', 8: '#ffffff', 9: '#ff3399', 10: '#33ccff'}
        if protein is None and ligand is None:
            raise NameError('Please provide either the "protein" or "ligand" parameter for viewStates.')
        k = 0
        from nglview import NGLWidget, HTMDTrajectory
        view = NGLWidget(gui=gui)
        ref = mols[0].copy()
        for i, s in enumerate(states):
            if protein:
                mol = Molecule()
            if ligand:
                mol = ref.copy()
                mol.remove(ligand, _logger=False)
                mol.dropFrames(keep=0)
                mols[i].filter(ligand, _logger=False)
            mols[i].set('chain', '{}'.format(s))
            tmpcoo = mols[i].coords
            for j in range(mols[i].numFrames):
                mols[i].coords = np.atleast_3d(tmpcoo[:, :, j])
                if ligand:
                    mols[i].set('segid', sequenceID(mols[i].resid)+k)
                    k = int(mols[i].segid[-1])
                mol.append(mols[i])
            view.add_trajectory(HTMDTrajectory(mol))
            # Setting up representations
            if ligand:
                view[i].add_cartoon('protein', color='sstruc')
                view[i].add_hyperball(':{}'.format(s), color=hexcolors[np.mod(i, len(hexcolors))])
            if protein:
                view[i].add_cartoon('protein', color='residueindex')

        self._nglButtons(view, statetype, states)
        return view
Пример #16
0
    def _viewStatesNGL(self, states, statetype, protein, ligand, mols, numsamples, gui=False):
        from htmd.molecule.util import sequenceID
        if states is None:
            states = range(self.macronum)
        if isinstance(states, int):
            states = [states]
        if mols is None:
            mols = self.getStates(states, statetype, numsamples=min(numsamples, 15))
        colors = [0, 1, 3, 4, 5, 6, 7, 9]
        hexcolors = {0: '#0000ff', 1: '#ff0000', 2: '#333333', 3: '#ff6600', 4: '#ffff00', 5: '#4c4d00', 6: '#b2b2cc',
                     7: '#33cc33', 8: '#ffffff', 9: '#ff3399', 10: '#33ccff'}
        if protein is None and ligand is None:
            raise NameError('Please provide either the "protein" or "ligand" parameter for viewStates.')
        k = 0
        from nglview import NGLWidget, HTMDTrajectory
        view = NGLWidget(gui=gui)
        ref = mols[0].copy()
        for i, s in enumerate(states):
            if protein:
                mol = Molecule()
            if ligand:
                mol = ref.copy()
                mol.remove(ligand, _logger=False)
                mol.dropFrames(keep=0)
                mols[i].filter(ligand, _logger=False)
            mols[i].set('chain', '{}'.format(s))
            tmpcoo = mols[i].coords
            for j in range(mols[i].numFrames):
                mols[i].coords = np.atleast_3d(tmpcoo[:, :, j])
                if ligand:
                    mols[i].set('segid', sequenceID(mols[i].resid)+k)
                    k = int(mols[i].segid[-1])
                mol.append(mols[i])
            view.add_trajectory(HTMDTrajectory(mol))
            # Setting up representations
            if ligand:
                view[i].add_cartoon('protein', color='sstruc')
                view[i].add_hyperball(':{}'.format(s), color=hexcolors[np.mod(i, len(hexcolors))])
            if protein:
                view[i].add_cartoon('protein', color='residueindex')

        self._nglButtons(view, statetype, states)
        return view
Пример #17
0
def tileMembrane(memb, xmin, ymin, xmax, ymax):
    """ Tile the membrane in the X and Y dimensions to reach a specific size.
    Returns
    -------
    megamemb :
        A big membrane Molecule
    """
    from htmd.progress.progress import ProgressBar
    memb = memb.copy()
    memb.resid = sequenceID(memb.resid)

    minmemb = np.min(memb.get('coords', 'water'), axis=0).flatten()

    size = np.max(memb.get('coords', 'water'), axis=0) - np.min(
        memb.get('coords', 'water'), axis=0)
    size = size.flatten()
    xreps = int(np.ceil((xmax - xmin) / size[0]))
    yreps = int(np.ceil((ymax - ymin) / size[1]))

    logger.info('Replicating Membrane {}x{}'.format(xreps, yreps))

    from htmd.molecule.molecule import Molecule
    megamemb = Molecule()
    bar = ProgressBar(xreps * yreps, description='Replicating Membrane')
    k = 0
    for x in range(xreps):
        for y in range(yreps):
            tmpmemb = memb.copy()
            xpos = xmin + x * size[0]
            ypos = ymin + y * size[1]

            tmpmemb.moveBy(
                [-float(minmemb[0]) + xpos, -float(minmemb[1]) + ypos, 0])
            sel = 'same resid as (x > {} or y > {})'.format(xmax, ymax)
            tmpmemb.remove(sel, _logger=False)
            tmpmemb.set('segid', 'M{}'.format(k))

            megamemb.append(tmpmemb)
            k += 1
            bar.progress()
    bar.stop()
    return megamemb
Пример #18
0
    def _processTraj(self, mol):
        sel = self._getSelections(mol)

        mol = mol.copy()
        mol.filter(sel, _logger=False)

        # Ugly conversions to feed it to sensitive C code
        uqresid = np.unique(mol.get('resid'))
        numres = len(uqresid)
        resatmcount = np.bincount(mol.resid)
        resatmcount = resatmcount[uqresid]
        residstr = [str(x) for x in mol.resid]

        atomids = np.ones(mol.numAtoms, dtype=np.int32) * -1
        for i in range(numres):
            resatms = mol.resid == uqresid[i]
            atomids[resatms] = np.arange(np.sum(resatms))

        sslist = stride(mol.coords.ravel(order='F'),
                        np.array([numres], dtype=np.int32),
                        resatmcount.astype(np.int32),
                        np.array(sequenceID(mol.resid) - 1, dtype=np.int32),
                        np.zeros(mol.numAtoms, dtype=np.int32),
                        atomids.astype(np.int32),
                        mol.chain[0],
                        residstr,
                        mol.resname.tolist(),
                        mol.name.tolist(),
                        1,
                        mol.numFrames,
                        mol.numAtoms)

        ss = np.zeros(len(sslist), dtype=object)
        for i in range(len(sslist)):
            ss[i] = np.array(list(sslist[i]))

        if self._simple:
            return _ssmap(ss)
        else:
            return ss
Пример #19
0
def tileMembrane(memb, xmin, ymin, xmax, ymax):
    """ Tile the membrane in the X and Y dimensions to reach a specific size.
    Returns
    -------
    megamemb :
        A big membrane Molecule
    """
    from htmd.progress.progress import ProgressBar
    memb = memb.copy()
    memb.resid = sequenceID(memb.resid)

    minmemb = np.min(memb.get('coords', 'water'), axis=0).flatten()

    size = np.max(memb.get('coords', 'water'), axis=0) - np.min(memb.get('coords', 'water'), axis=0)
    size = size.flatten()
    xreps = int(np.ceil((xmax - xmin) / size[0]))
    yreps = int(np.ceil((ymax - ymin) / size[1]))

    logger.info('Replicating Membrane {}x{}'.format(xreps, yreps))

    from htmd.molecule.molecule import Molecule
    megamemb = Molecule()
    bar = ProgressBar(xreps * yreps, description='Replicating Membrane')
    k = 0
    for x in range(xreps):
        for y in range(yreps):
            tmpmemb = memb.copy()
            xpos = xmin + x * size[0]
            ypos = ymin + y * size[1]

            tmpmemb.moveBy([-float(minmemb[0]) + xpos, -float(minmemb[1]) + ypos, 0])
            sel = 'same resid as (x > {} or y > {})'.format(xmax, ymax)
            tmpmemb.remove(sel, _logger=False)
            tmpmemb.set('segid', 'M{}'.format(k))

            megamemb.append(tmpmemb)
            k += 1
            bar.progress()
    bar.stop()
    return megamemb
Пример #20
0
    def _calcarrays(self, mol):
        mol = mol.copy()
        mol.filter(self.sel, _logger=False)

        residues = sequenceID((mol.resid, mol.chain, mol.insertion))

        backbone = mol.atomselect('backbone')
        ca_indices = np.where(mol.name == 'CA')[0].astype(np.int32)
        chainids = mol.chain[ca_indices]
        resnames = mol.resname[ca_indices]
        proline_indices = np.array(resnames == 'PRO', dtype=np.int32)

        _, chain_ids = np.unique(chainids, return_inverse=True)

        nco_indices = np.ones((residues.max()+1, 3), dtype=np.int32) * -1
        natriums = np.where((mol.name == 'N') & backbone)[0]
        carbons = np.where((mol.name == 'C') & backbone)[0]
        oxygens = np.where((mol.name == 'O') & backbone)[0]
        nco_indices[residues[natriums], 0] = natriums
        nco_indices[residues[carbons], 1] = carbons
        nco_indices[residues[oxygens], 2] = oxygens

        return ca_indices, nco_indices, proline_indices, chain_ids.astype(np.int32)
Пример #21
0
    def _calcarrays(self, mol):
        mol = mol.copy()
        mol.filter(self.sel, _logger=False)

        residues = sequenceID((mol.resid, mol.chain, mol.insertion))

        backbone = mol.atomselect('backbone')
        ca_indices = np.where(mol.name == 'CA')[0].astype(np.int32)
        chainids = mol.chain[ca_indices]
        resnames = mol.resname[ca_indices]
        proline_indices = np.array(resnames == 'PRO', dtype=np.int32)

        _, chain_ids = np.unique(chainids, return_inverse=True)

        nco_indices = np.ones((residues.max() + 1, 3), dtype=np.int32) * -1
        natriums = np.where((mol.name == 'N') & backbone)[0]
        carbons = np.where((mol.name == 'C') & backbone)[0]
        oxygens = np.where((mol.name == 'O') & backbone)[0]
        nco_indices[residues[natriums], 0] = natriums
        nco_indices[residues[carbons], 1] = carbons
        nco_indices[residues[oxygens], 2] = oxygens

        return ca_indices, nco_indices, proline_indices, chain_ids.astype(
            np.int32)
Пример #22
0
def autoSegment(mol, sel='all', basename='P', spatial=True, spatialgap=4.0, field="segid"):
    """ Detects resid gaps in a selection and assigns incrementing segid to each fragment

    !!!WARNING!!! If you want to use atom selections like 'protein' or 'fragment',
    use this function on a Molecule containing only protein atoms, otherwise the protein selection can fail.

    Parameters
    ----------
    mol : :class:`Molecule <htmd.molecule.molecule.Molecule>` object
        The Molecule object
    sel : str
        Atom selection string on which to check for gaps.
        See more `here <http://www.ks.uiuc.edu/Research/vmd/vmd-1.9.2/ug/node89.html>`__
    basename : str
        The basename for segment ids. For example if given 'P' it will name the segments 'P1', 'P2', ...
    spatial : bool
        Only considers a discontinuity in resid as a gap of the CA atoms have distance more than `spatialgap` Angstrom
    spatialgap : float
        The size of a spatial gap which validates a discontinuity (A)
    field : str
        Field to fix. Can be "segid" (default), "chain", or "both"

    Returns
    -------
    newmol : :class:`Molecule <htmd.molecule.molecule.Molecule>` object
        A new Molecule object with modified segids

    Example
    -------
    >>> newmol = autoSegment(mol,'chain B','P')
    """
    mol = mol.copy()

    idx = mol.atomselect(sel, indexes=True)
    rid = mol.get('resid', sel)
    residiff = np.diff(rid)
    gappos = np.where((residiff != 1) & (residiff != 0))[0]  # Points to the index before the gap!

    # Letters to be used for chains, if free: 0123456789abcd...ABCD..., minus chain symbols already used
    used_chains = set(mol.chain)
    chain_alphabet = list(string.digits + string.ascii_letters)
    available_chains = [x for x in chain_alphabet if x not in used_chains]

    idxstartseg = [idx[0]] + idx[gappos + 1].tolist()
    idxendseg = idx[gappos].tolist() + [idx[-1]]

    mol.set('segid', basename, sel)

    if len(gappos) == 0:
        mol.set('segid', basename+'0', sel)
        return mol

    if spatial:
        residbackup = mol.get('resid')
        mol.set('resid', sequenceID(mol.resid))  # Assigning unique resids to be able to do the distance selection

        todelete = []
        i = 0
        for s, e in zip(idxstartseg[1:], idxendseg[:-1]):
            coords = mol.get('coords', sel='resid "{}" "{}" and name CA'.format(mol.resid[e], mol.resid[s]))
            if np.shape(coords) == (2, 3):
                dist = np.sqrt(np.sum((coords[0, :] - coords[1, :]) ** 2))
                if dist < spatialgap:
                    todelete.append(i)
            i += 1
        # Join the non-real gaps into segments
        idxstartseg = np.delete(idxstartseg, np.array(todelete)+1)
        idxendseg = np.delete(idxendseg, todelete)

        mol.set('resid', residbackup)  # Restoring the original resids

    i = 0
    for s, e in zip(idxstartseg, idxendseg):
        # Fixup segid
        if field in ['segid', 'both']:
            newsegid = basename + str(i)
            if np.any(mol.segid == newsegid):
                raise RuntimeError('Segid {} already exists in the molecule. Please choose different prefix.'.format(newsegid))
            logger.info('Created segment {} between resid {} and {}.'.format(newsegid, mol.resid[s], mol.resid[e]))
            mol.segid[s:e+1] = newsegid
        # Fixup chain
        if field in ['chain', 'both']:
            newchainid = available_chains[i]
            logger.info('Set chain {} between resid {} and {}.'.format(newchainid, mol.resid[s], mol.resid[e]))
            mol.chain[s:e+1] = newchainid

        i += 1

    return mol
Пример #23
0
def autoSegment(mol, sel='all', basename='P', spatial=True, spatialgap=4):
    """ Detects resid gaps in a selection and assigns incrementing segid to each fragment

    !!!WARNING!!! If you want to use atom selections like 'protein' or 'fragment',
    use this function on a Molecule containing only protein atoms, otherwise the protein selection can fail.

    Parameters
    ----------
    mol : :class:`Molecule <htmd.molecule.molecule.Molecule>` object
        The Molecule object
    sel : str
        Atom selection on which to check for gaps.
    basename : str
        The basename for segment ids. For example if given 'P' it will name the segments 'P1', 'P2', ...
    spatial : bool
        Only considers a discontinuity in resid as a gap of the CA atoms have distance more than `spatialgap` Angstrom
    spatialgap : float
        The size of a spatial gap which validates a discontinuity

    Returns
    -------
    newmol : :class:`Molecule <htmd.molecule.molecule.Molecule>` object
        A new Molecule object with modified segids

    Example
    -------
    >>> newmol = autoSegment(mol,'chain B','P')
    """
    mol = mol.copy()

    idx = mol.atomselect(sel, indexes=True)
    rid = mol.get('resid', sel)
    residiff = np.diff(rid)
    gappos = np.where((residiff != 1) & (residiff != 0))[0]  # Points to the index before the gap!

    idxstartseg = [idx[0]] + idx[gappos + 1].tolist()
    idxendseg = idx[gappos].tolist() + [idx[-1]]

    mol.set('segid', basename, sel)

    if len(gappos) == 0:
        return mol

    if spatial:
        residbackup = mol.get('resid')
        mol.set('resid', sequenceID(mol.resid))  # Assigning unique resids to be able to do the distance selection

        todelete = []
        i = 0
        for s, e in zip(idxstartseg[1:], idxendseg[:-1]):
            coords = mol.get('coords', sel='resid "{}" "{}" and name CA'.format(mol.resid[e], mol.resid[s]))
            if np.shape(coords) == (2, 3):
                dist = np.sqrt(np.sum((coords[0, :] - coords[1, :]) ** 2))
                if dist < spatialgap:
                    todelete.append(i)
            i += 1
        # Join the non-real gaps into segments
        idxstartseg = np.delete(idxstartseg, np.array(todelete)+1)
        idxendseg = np.delete(idxendseg, todelete)

        mol.set('resid', residbackup)  # Restoring the original resids

    i = 0
    for s, e in zip(idxstartseg, idxendseg):
        newsegid = basename + str(i)
        if np.any(mol.segid == newsegid):
            raise RuntimeError('Segid {} already exists in the molecule. Please choose different prefix.'.format(newsegid))
        logger.info('Created segment {} between resid {} and {}.'.format(newsegid, mol.resid[s], mol.resid[e]))
        mol.segid[s:e+1] = newsegid
        i += 1

    return mol
Пример #24
0
def tileMembrane(memb, xmin, ymin, xmax, ymax, buffer=1.5):
    """ Tile a membrane in the X and Y dimensions to reach a specific size.

    Parameters
    ----------
    memb : :class:`Molecule <htmd.molecule.molecule.Molecule>` object
        The membrane to be tiled
    xmin : float
        Minimum x coordinate
    ymin : float
        Minimum y coordinate
    xmax : float
        Maximum x coordinate
    ymax : float
        Maximum y coordinate
    buffer : float
        Buffer distance between tiles

    Returns
    -------
    megamemb :
        A big membrane Molecule
    """
    from tqdm import tqdm
    memb = memb.copy()
    memb.resid = sequenceID((memb.resid, memb.insertion, memb.chain, memb.segid))

    minmemb = np.min(memb.get('coords', 'water'), axis=0).flatten()

    size = np.max(memb.get('coords', 'water'), axis=0) - np.min(memb.get('coords', 'water'), axis=0)
    size = size.flatten()
    xreps = int(np.ceil((xmax - xmin) / size[0]))
    yreps = int(np.ceil((ymax - ymin) / size[1]))

    logger.info('Replicating Membrane {}x{}'.format(xreps, yreps))

    from htmd.molecule.molecule import Molecule
    megamemb = Molecule()
    bar = tqdm(total=xreps * yreps, desc='Replicating Membrane')
    k = 0
    for x in range(xreps):
        for y in range(yreps):
            tmpmemb = memb.copy()
            xpos = xmin + x * (size[0] + buffer)
            ypos = ymin + y * (size[1] + buffer)

            tmpmemb.moveBy([-float(minmemb[0]) + xpos, -float(minmemb[1]) + ypos, 0])
            tmpmemb.remove('same resid as (x > {} or y > {})'.format(xmax, ymax), _logger=False)
            if tmpmemb.numAtoms == 0:
                continue

            tmpmemb.set('segid', 'M{}'.format(k), sel='not water')
            tmpmemb.set('segid', 'MW{}'.format(k), sel='water')

            megamemb.append(tmpmemb)
            k += 1
            bar.update(1)
    bar.close()

    # Membranes don't tile perfectly. Need to remove waters that clash with lipids of other tiles
    # Some clashes will still occur between periodic images however
    megamemb.remove('same resid as water and within 1.5 of not water', _logger=False)
    return megamemb
Пример #25
0
def ionizePlace(mol,
                anion,
                cation,
                anionatom,
                cationatom,
                nanion,
                ncation,
                dfrom=5,
                dbetween=5,
                segname=None):
    newmol = mol.copy()

    logger.info('Min distance of ions from molecule: ' + str(dfrom) + 'A')
    logger.info('Min distance between ions: ' + str(dbetween) + 'A')
    logger.info('Placing ' + str(nanion + ncation) + ' ions.')

    if (nanion + ncation) == 0:
        return newmol

    segname = _getSegname(newmol, segname)
    nions = nanion + ncation

    betabackup = newmol.beta
    newmol.set('beta', sequenceID((newmol.resid, newmol.segid)))

    # Find water oxygens to replace with ions
    ntries = 0
    maxtries = 10
    while True:
        ionlist = np.empty(0, dtype=int)
        watindex = newmol.atomselect('noh and water and not (within ' +
                                     str(dfrom) + ' of not water)',
                                     indexes=True)
        watsize = len(watindex)

        if watsize == 0:
            raise NameError(
                'No waters could be found further than ' + str(dfrom) +
                ' from other molecules to be replaced by ions. You might need to solvate with a bigger box.'
            )

        while len(ionlist) < nions:
            if len(watindex) == 0:
                break
            randwat = np.random.randint(len(watindex))
            thision = watindex[randwat]
            addit = True
            if len(ionlist) != 0:  # Check for distance from precious ions
                ionspos = newmol.get('coords', sel=ionlist)
                thispos = newmol.get('coords', sel=thision)
                dists = distance.cdist(np.atleast_2d(ionspos),
                                       np.atleast_2d(thispos),
                                       metric='euclidean')

                if np.any(dists < dbetween):
                    addit = False
            if addit:
                ionlist = np.append(ionlist, thision)
                watindex = np.delete(watindex, randwat)
        if len(ionlist) == nions:
            break

        ntries += 1
        if ntries == maxtries:
            raise NameError(
                'Failed to add ions after ' + str(maxtries) +
                ' attempts. Try decreasing the '
                'from'
                ' and '
                'between'
                ' parameters, decreasing ion concentration or making a larger water box.'
            )

    # Delete waters but keep their coordinates
    waterpos = np.atleast_2d(newmol.get('coords', ionlist))
    stringsel = 'beta'
    for x in newmol.beta[ionlist]:
        stringsel += ' ' + str(int(x))
    atmrem = np.sum(newmol.atomselect(stringsel))
    atmput = 3 * len(ionlist)
    assert atmrem == atmput, 'Removing {} atoms instead of {}. Report this bug.'.format(
        atmrem, atmput)
    sel = newmol.remove(stringsel, _logger=False)
    assert np.size(
        sel
    ) == atmput, 'Removed {} atoms instead of {}. Report this bug.'.format(
        np.size(sel), atmput)
    betabackup = np.delete(betabackup, sel)

    # Add the ions
    randidx = np.random.permutation(np.size(waterpos, 0))
    atom = Molecule()
    atom.record = 'ATOM'
    atom.chain = 'I'
    atom.segid = 'I'
    atom.occupancy = 0
    atom.beta = 0
    atom.insertion = ''
    atom.element = ''
    atom.altloc = ''

    for i in range(nanion):
        atom.name = anionatom
        atom.resname = anion
        atom.resid = newmol.resid[-1] + 1
        atom.coords = waterpos[randidx[i], :]
        newmol.insert(atom, len(newmol.name))
    for i in range(ncation):
        atom.name = cationatom
        atom.resname = cation
        atom.resid = newmol.resid[-1] + 1
        atom.coords = waterpos[randidx[i + nanion], :]
        newmol.insert(atom, len(newmol.name))

    # Restoring the original betas
    sel = np.ones(len(betabackup) + nions, dtype=bool)
    sel[len(betabackup)::] = False
    newmol.set('beta', betabackup, sel=sel)
    return newmol
Пример #26
0
def build(mol,
          ff=None,
          topo=None,
          param=None,
          prefix='structure',
          outdir='./build',
          caps=None,
          ionize=True,
          saltconc=0,
          saltanion=None,
          saltcation=None,
          disulfide=None,
          tleap=None,
          execute=True,
          atomtypes=None,
          offlibraries=None,
          gbsa=False,
          igb=2):
    """ Builds a system for AMBER

    Uses tleap to build a system for AMBER. Additionally it allows the user to ionize and add disulfide bridges.

    Parameters
    ----------
    mol : :class:`Molecule <htmd.molecule.molecule.Molecule>` object
        The Molecule object containing the system
    ff : list of str
        A list of leaprc forcefield files.
        Use :func:`amber.listFiles <htmd.builder.amber.listFiles>` to get a list of available forcefield files.
        Default: :func:`amber.defaultFf <htmd.builder.amber.defaultFf>`
    topo : list of str
        A list of topology `prepi/prep/in` files.
        Use :func:`amber.listFiles <htmd.builder.amber.listFiles>` to get a list of available topology files.
        Default: :func:`amber.defaultTopo <htmd.builder.amber.defaultTopo>`
    param : list of str
        A list of parameter `frcmod` files.
        Use :func:`amber.listFiles <htmd.builder.amber.listFiles>` to get a list of available parameter files.
        Default: :func:`amber.defaultParam <htmd.builder.amber.defaultParam>`
    prefix : str
        The prefix for the generated pdb and psf files
    outdir : str
        The path to the output directory
        Default: './build'
    caps : dict
        A dictionary with keys segids and values lists of strings describing the caps for a particular protein segment.
        e.g. caps['P'] = ['ACE', 'NME'] or caps['P'] = ['none', 'none']. Default: will apply ACE and NME caps to every
        protein segment.
    ionize : bool
        Enable or disable ionization
    saltconc : float
        Salt concentration to add to the system after neutralization.
    saltanion : {'Cl-'}
        The anion type. Please use only AMBER ion atom names.
    saltcation : {'Na+', 'K+', 'Cs+'}
        The cation type. Please use only AMBER ion atom names.
    disulfide : list of pairs of atomselection strings
        If None it will guess disulfide bonds. Otherwise provide a list pairs of atomselection strings for each pair of
        residues forming the disulfide bridge.
    tleap : str
        Path to tleap executable used to build the system for AMBER
    execute : bool
        Disable building. Will only write out the input script needed by tleap. Does not include ionization.
    atomtypes : list of triplets
        Custom atom types defined by the user as ('type', 'element', 'hybrid') triplets
        e.g. (('C1', 'C', 'sp2'), ('CI', 'C', 'sp3')). Check `addAtomTypes` in AmberTools docs.
    offlibraries : str or list
        A path or a list of paths to OFF library files. Check `loadOFF` in AmberTools docs.
    gbsa : bool
        Modify radii for GBSA implicit water model
    igb : int
        GB model. Select: 1 for mbondi, 2 and 5 for mbondi2, 7 for bondi and 8 for mbondi3.
        Check section 4. The Generalized Born/Surface Area Model of the AMBER manual.

    Returns
    -------
    molbuilt : :class:`Molecule <htmd.molecule.molecule.Molecule>` object
        The built system in a Molecule object

    Example
    -------
    >>> from htmd.ui import *  # doctest: +SKIP
    >>> mol = Molecule("3PTB")
    >>> molbuilt = amber.build(mol, outdir='/tmp/build')  # doctest: +SKIP
    >>> # More complex example
    >>> disu = [['segid P and resid 157', 'segid P and resid 13'], ['segid K and resid 1', 'segid K and resid 25']]
    >>> molbuilt = amber.build(mol, outdir='/tmp/build', saltconc=0.15, disulfide=disu)  # doctest: +SKIP
    """
    # Remove pdb protein bonds as they can be regenerated by tleap. Keep non-protein bonds i.e. for ligands
    mol = mol.copy()
    _removeProteinBonds(mol)

    if tleap is None:
        tleap = _findTleap()
    else:
        if shutil.which(tleap) is None:
            raise NameError(
                'Could not find executable: `{}` in the PATH. Cannot build for AMBER.'
                .format(tleap))

    if not os.path.isdir(outdir):
        os.makedirs(outdir)
    _cleanOutDir(outdir)
    if ff is None:
        ff = defaultFf()
    if topo is None:
        topo = defaultTopo()
    if param is None:
        param = defaultParam()
    if caps is None:
        caps = _defaultProteinCaps(mol)

    _missingSegID(mol)
    _checkMixedSegment(mol)

    mol = _charmmLipid2Amber(mol)

    _applyProteinCaps(mol, caps)

    f = open(os.path.join(outdir, 'tleap.in'), 'w')
    f.write('# tleap file generated by amber.build\n')

    # Printing out the forcefields
    if isinstance(ff, str):
        ff = [ff]
    for i, force in enumerate(ff):
        if not os.path.isfile(force):
            force = _locateFile(force, 'ff', tleap)
            if force is None:
                continue
        newname = 'ff{}_{}'.format(i, os.path.basename(force))
        shutil.copy(force, os.path.join(outdir, newname))
        f.write('source {}\n'.format(newname))
    f.write('\n')

    if gbsa:
        gbmodels = {
            1: 'mbondi',
            2: 'mbondi2',
            5: 'mbondi2',
            7: 'bondi',
            8: 'mbondi3'
        }
        f.write('set default PBradii {}\n\n'.format(gbmodels[igb]))

    # Adding custom atom types
    if atomtypes is not None:
        atomtypes = ensurelist(tocheck=atomtypes[0], tomod=atomtypes)
        f.write('addAtomTypes {\n')
        for at in atomtypes:
            if len(at) != 3:
                raise RuntimeError(
                    'Atom type definitions have to be triplets. Check the AMBER documentation.'
                )
            f.write('    {{ "{}" "{}" "{}" }}\n'.format(at[0], at[1], at[2]))
        f.write('}\n\n')

    # Loading OFF libraries
    if offlibraries is not None:
        offlibraries = ensurelist(offlibraries)
        for off in offlibraries:
            if not os.path.isfile(off):
                raise RuntimeError(
                    'Could not find off-library in location {}'.format(off))
            newname = 'offlib{}_{}'.format(i, os.path.basename(off))
            shutil.copy(off, os.path.join(outdir, newname))
            f.write('loadoff {}\n'.format(newname))

    # Loading frcmod parameters
    f.write('# Loading parameter files\n')
    for i, p in enumerate(param):
        if not os.path.isfile(p):
            p = _locateFile(p, 'param', tleap)
            if p is None:
                continue
        newname = 'param{}_{}'.format(i, os.path.basename(p))
        shutil.copy(p, os.path.join(outdir, newname))
        f.write('loadamberparams {}\n'.format(newname))
    f.write('\n')

    # Loading prepi topologies
    f.write('# Loading prepi topologies\n')
    for i, t in enumerate(topo):
        if not os.path.isfile(t):
            t = _locateFile(t, 'topo', tleap)
            if t is None:
                continue
        newname = 'topo{}_{}'.format(i, os.path.basename(t))
        shutil.copy(t, os.path.join(outdir, newname))
        f.write('loadamberprep {}\n'.format(newname))
    f.write('\n')

    f.write('# Loading the system\n')
    f.write('mol = loadpdb input.pdb\n\n')

    if np.sum(mol.atomtype != '') != 0:
        logger.debug('Writing mol2 files for input to tleap.')
        segs = np.unique(mol.segid[mol.atomtype != ''])
        combstr = 'mol = combine {mol'
        for s in segs:
            name = 'segment{}'.format(s)
            mol2name = os.path.join(outdir, '{}.mol2'.format(name))
            mol.write(mol2name, (mol.atomtype != '') & (mol.segid == s))
            if not os.path.isfile(mol2name):
                raise NameError(
                    'Could not write a mol2 file out of the given Molecule.')
            f.write('# Loading the rest of the system\n')
            f.write('{} = loadmol2 {}.mol2\n\n'.format(name, name))
            combstr += ' {}'.format(name)
        combstr += '}\n\n'
        f.write(combstr)

    # Write patches for disulfide bonds (only after ionizing)
    if not ionize:
        # TODO: Remove this once we deprecate the class
        from htmd.builder.builder import DisulfideBridge
        from htmd.molecule.molecule import UniqueResidueID
        if disulfide is not None and len(disulfide) != 0 and isinstance(
                disulfide[0], DisulfideBridge):
            newdisu = []
            for d in disulfide:
                r1 = UniqueResidueID.fromMolecule(
                    mol, 'resid {} and segname {}'.format(d.resid1, d.segid1))
                r2 = UniqueResidueID.fromMolecule(
                    mol, 'resid {} and segname {}'.format(d.resid2, d.segid2))
                newdisu.append([r1, r2])
            disulfide = newdisu
        # TODO: Remove up to here ----------------------

        if disulfide is not None and len(disulfide) != 0 and isinstance(
                disulfide[0][0], str):
            disulfide = convertDisulfide(mol, disulfide)

        if disulfide is None:
            logger.info('Detecting disulfide bonds.')
            disulfide = detectDisulfideBonds(mol)

        # Fix structure to match the disulfide patching
        if len(disulfide) != 0:
            torem = np.zeros(mol.numAtoms, dtype=bool)
            f.write('# Adding disulfide bonds\n')
            for d in disulfide:
                # Rename the residues to CYX if there is a disulfide bond
                atoms1 = d[0].selectAtoms(mol, indexes=False)
                atoms2 = d[1].selectAtoms(mol, indexes=False)
                mol.resname[atoms1] = 'CYX'
                mol.resname[atoms2] = 'CYX'
                # Remove (eventual) HG hydrogens on these CYS (from proteinPrepare)
                torem |= (atoms1 & (mol.name == 'HG')) | (atoms2 &
                                                          (mol.name == 'HG'))
                # Convert to stupid amber residue numbering
                uqseqid = sequenceID(
                    (mol.resid, mol.insertion, mol.segid)) + mol.resid[0]
                uqres1 = int(np.unique(uqseqid[atoms1]))
                uqres2 = int(np.unique(uqseqid[atoms2]))
                f.write('bond mol.{}.SG mol.{}.SG\n'.format(uqres1, uqres2))
            f.write('\n')
            mol.remove(torem, _logger=False)

    f.write('# Writing out the results\n')
    f.write('saveamberparm mol ' + prefix + '.prmtop ' + prefix + '.crd\n')
    f.write('quit')
    f.close()

    # Printing and loading the PDB file. AMBER can work with a single PDB file if the segments are separate by TER
    logger.debug('Writing PDB file for input to tleap.')
    pdbname = os.path.join(outdir, 'input.pdb')

    # mol2 files have atomtype, here we only write parts not coming from mol2
    # We need to write the input.pdb at the end since we modify the resname for disulfide bridges in mol
    mol.write(pdbname, mol.atomtype == '')
    if not os.path.isfile(pdbname):
        raise NameError(
            'Could not write a PDB file out of the given Molecule.')

    molbuilt = None
    if execute:
        # Source paths of extra dirs (our dirs, not amber default)
        htmdamberdir = os.path.abspath(
            os.path.join(home(), 'builder', 'amberfiles'))
        sourcepaths = [htmdamberdir]
        sourcepaths += [
            os.path.join(htmdamberdir, os.path.dirname(f)) for f in ff
            if os.path.isfile(os.path.join(htmdamberdir, f))
        ]
        extrasource = []
        for p in sourcepaths:
            extrasource.append('-I')
            extrasource.append('{}'.format(p))
        logpath = os.path.abspath(os.path.join(outdir, 'log.txt'))
        logger.info('Starting the build.')
        currdir = os.getcwd()
        os.chdir(outdir)
        f = open(logpath, 'w')
        try:
            cmd = [tleap, '-f', './tleap.in']
            cmd[1:1] = extrasource
            call(cmd, stdout=f)
        except:
            raise NameError('tleap failed at execution')
        f.close()
        errors = _logParser(logpath)
        os.chdir(currdir)
        if errors:
            raise BuildError(errors + [
                'Check {} for further information on errors in building.'.
                format(logpath)
            ])
        logger.info('Finished building.')

        if os.path.exists(os.path.join(outdir, 'structure.crd')) and \
                        os.path.getsize(os.path.join(outdir, 'structure.crd')) != 0 and \
                        os.path.getsize(os.path.join(outdir, 'structure.prmtop')) != 0:
            molbuilt = Molecule(os.path.join(outdir, 'structure.prmtop'))
            molbuilt.read(os.path.join(outdir, 'structure.crd'))
        else:
            raise BuildError(
                'No structure pdb/prmtop file was generated. Check {} for errors in building.'
                .format(logpath))

        if ionize:
            shutil.move(os.path.join(outdir, 'structure.crd'),
                        os.path.join(outdir, 'structure.noions.crd'))
            shutil.move(os.path.join(outdir, 'structure.prmtop'),
                        os.path.join(outdir, 'structure.noions.prmtop'))
            totalcharge = np.sum(molbuilt.charge)
            nwater = np.sum(molbuilt.atomselect('water and noh'))
            anion, cation, anionatom, cationatom, nanion, ncation = ionizef(
                totalcharge,
                nwater,
                saltconc=saltconc,
                anion=saltanion,
                cation=saltcation)
            newmol = ionizePlace(mol, anion, cation, anionatom, cationatom,
                                 nanion, ncation)
            # Redo the whole build but now with ions included
            return build(newmol,
                         ff=ff,
                         topo=topo,
                         param=param,
                         prefix=prefix,
                         outdir=outdir,
                         caps={},
                         ionize=False,
                         execute=execute,
                         saltconc=saltconc,
                         disulfide=disulfide,
                         tleap=tleap,
                         atomtypes=atomtypes,
                         offlibraries=offlibraries)
    tmpbonds = molbuilt.bonds
    molbuilt.bonds = []  # Removing the bonds to speed up writing
    molbuilt.write(os.path.join(outdir, 'structure.pdb'))
    molbuilt.bonds = tmpbonds  # Restoring the bonds
    return molbuilt
Пример #27
0
def tileMembrane(memb, xmin, ymin, xmax, ymax, buffer=1.5):
    """ Tile a membrane in the X and Y dimensions to reach a specific size.

    Parameters
    ----------
    memb : :class:`Molecule <htmd.molecule.molecule.Molecule>` object
        The membrane to be tiled
    xmin : float
        Minimum x coordinate
    ymin : float
        Minimum y coordinate
    xmax : float
        Maximum x coordinate
    ymax : float
        Maximum y coordinate
    buffer : float
        Buffer distance between tiles

    Returns
    -------
    megamemb :
        A big membrane Molecule
    """
    from tqdm import tqdm
    memb = memb.copy()
    memb.resid = sequenceID((memb.resid, memb.insertion, memb.chain, memb.segid))

    minmemb = np.min(memb.get('coords', 'water'), axis=0).flatten()

    size = np.max(memb.get('coords', 'water'), axis=0) - np.min(memb.get('coords', 'water'), axis=0)
    size = size.flatten()
    xreps = int(np.ceil((xmax - xmin) / size[0]))
    yreps = int(np.ceil((ymax - ymin) / size[1]))

    logger.info('Replicating Membrane {}x{}'.format(xreps, yreps))

    from htmd.molecule.molecule import Molecule
    megamemb = Molecule()
    bar = tqdm(total=xreps * yreps, desc='Replicating Membrane')
    k = 0
    for x in range(xreps):
        for y in range(yreps):
            tmpmemb = memb.copy()
            xpos = xmin + x * (size[0] + buffer)
            ypos = ymin + y * (size[1] + buffer)

            tmpmemb.moveBy([-float(minmemb[0]) + xpos, -float(minmemb[1]) + ypos, 0])
            tmpmemb.remove('same resid as (x > {} or y > {})'.format(xmax, ymax), _logger=False)
            if tmpmemb.numAtoms == 0:
                continue

            tmpmemb.set('segid', 'M{}'.format(k), sel='not water')
            tmpmemb.set('segid', 'MW{}'.format(k), sel='water')

            megamemb.append(tmpmemb)
            k += 1
            bar.update(1)
    bar.close()

    # Membranes don't tile perfectly. Need to remove waters that clash with lipids of other tiles
    # Some clashes will still occur between periodic images however
    megamemb.remove('same resid as water and within 1.5 of not water', _logger=False)
    return megamemb
Пример #28
0
def autoSegment(mol, sel='all', basename='P', spatial=True, spatialgap=4.0, field="segid"):
    """ Detects resid gaps in a selection and assigns incrementing segid to each fragment

    !!!WARNING!!! If you want to use atom selections like 'protein' or 'fragment',
    use this function on a Molecule containing only protein atoms, otherwise the protein selection can fail.

    Parameters
    ----------
    mol : :class:`Molecule <htmd.molecule.molecule.Molecule>` object
        The Molecule object
    sel : str
        Atom selection on which to check for gaps.
    basename : str
        The basename for segment ids. For example if given 'P' it will name the segments 'P1', 'P2', ...
    spatial : bool
        Only considers a discontinuity in resid as a gap of the CA atoms have distance more than `spatialgap` Angstrom
    spatialgap : float
        The size of a spatial gap which validates a discontinuity (A)
    field : str
        Field to fix. Can be "segid" (default), "chain", or "both"

    Returns
    -------
    newmol : :class:`Molecule <htmd.molecule.molecule.Molecule>` object
        A new Molecule object with modified segids

    Example
    -------
    >>> newmol = autoSegment(mol,'chain B','P')
    """
    mol = mol.copy()

    idx = mol.atomselect(sel, indexes=True)
    rid = mol.get('resid', sel)
    residiff = np.diff(rid)
    gappos = np.where((residiff != 1) & (residiff != 0))[0]  # Points to the index before the gap!

    # Letters to be used for chains, if free: 0123456789abcd...ABCD..., minus chain symbols already used
    used_chains = set(mol.chain)
    chain_alphabet = list(string.digits + string.ascii_letters)
    available_chains = [x for x in chain_alphabet if x not in used_chains]

    idxstartseg = [idx[0]] + idx[gappos + 1].tolist()
    idxendseg = idx[gappos].tolist() + [idx[-1]]

    mol.set('segid', basename, sel)

    if len(gappos) == 0:
        mol.set('segid', basename+'0', sel)
        return mol

    if spatial:
        residbackup = mol.get('resid')
        mol.set('resid', sequenceID(mol.resid))  # Assigning unique resids to be able to do the distance selection

        todelete = []
        i = 0
        for s, e in zip(idxstartseg[1:], idxendseg[:-1]):
            coords = mol.get('coords', sel='resid "{}" "{}" and name CA'.format(mol.resid[e], mol.resid[s]))
            if np.shape(coords) == (2, 3):
                dist = np.sqrt(np.sum((coords[0, :] - coords[1, :]) ** 2))
                if dist < spatialgap:
                    todelete.append(i)
            i += 1
        # Join the non-real gaps into segments
        idxstartseg = np.delete(idxstartseg, np.array(todelete)+1)
        idxendseg = np.delete(idxendseg, todelete)

        mol.set('resid', residbackup)  # Restoring the original resids

    i = 0
    for s, e in zip(idxstartseg, idxendseg):
        # Fixup segid
        if field in ['segid', 'both']:
            newsegid = basename + str(i)
            if np.any(mol.segid == newsegid):
                raise RuntimeError('Segid {} already exists in the molecule. Please choose different prefix.'.format(newsegid))
            logger.info('Created segment {} between resid {} and {}.'.format(newsegid, mol.resid[s], mol.resid[e]))
            mol.segid[s:e+1] = newsegid
        # Fixup chain
        if field in ['chain', 'both']:
            newchainid = available_chains[i]
            logger.info('Set chain {} between resid {} and {}.'.format(newchainid, mol.resid[s], mol.resid[e]))
            mol.chain[s:e+1] = newchainid

        i += 1

    return mol
Пример #29
0
def build(mol, ff=None, topo=None, param=None, prefix='structure', outdir='./build', caps=None, ionize=True, saltconc=0,
          saltanion=None, saltcation=None, disulfide=None, tleap='tleap', execute=True):
    """ Builds a system for AMBER

    Uses tleap to build a system for AMBER. Additionally it allows the user to ionize and add disulfide bridges.

    Parameters
    ----------
    mol : :class:`Molecule <htmd.molecule.molecule.Molecule>` object
        The Molecule object containing the system
    ff : list of str
        A list of leaprc forcefield files. Default: ['leaprc.lipid14', 'leaprc.ff14SB', 'leaprc.gaff']
    topo : list of str
        A list of topology `prepi` files.
    param : list of str
        A list of parameter `frcmod` files.
    prefix : str
        The prefix for the generated pdb and psf files
    outdir : str
        The path to the output directory
        Default: './build'
    caps : dict
        A dictionary with keys segids and values lists of strings describing the caps for a particular protein segment.
        e.g. caps['P'] = ['ACE', 'NME'] or caps['P'] = ['none', 'none']. Default: will apply ACE and NME caps to every
        protein segment.
    ionize : bool
        Enable or disable ionization
    saltconc : float
        Salt concentration to add to the system after neutralization.
    saltanion : {'Cl-'}
        The anion type. Please use only AMBER ion atom names.
    saltcation : {'Na+', 'K+', 'Cs+'}
        The cation type. Please use only AMBER ion atom names.
    disulfide : np.ndarray
        If None it will guess disulfide bonds. Otherwise provide a 2D array where each row is a pair of atom indexes that makes a disulfide bond
    tleap : str
        Path to tleap executable used to build the system for AMBER
    execute : bool
        Disable building. Will only write out the input script needed by tleap. Does not include ionization.

    Returns
    -------
    molbuilt : :class:`Molecule <htmd.molecule.molecule.Molecule>` object
        The built system in a Molecule object

    Example
    -------
    >>> ffs = ['leaprc.lipid14', 'leaprc.ff14SB', 'leaprc.gaff']
    >>> molbuilt = amber.build(mol, ff=ffs, outdir='/tmp/build', saltconc=0.15)
    """
    # Remove pdb bonds!
    mol = mol.copy()
    mol.bonds = np.empty((0, 2), dtype=np.uint32)
    if shutil.which(tleap) is None:
        raise NameError('Could not find executable: `' + tleap + '` in the PATH. Cannot build for AMBER.')
    if not os.path.isdir(outdir):
        os.makedirs(outdir)
    _cleanOutDir(outdir)
    if ff is None:
        ff = ['leaprc.lipid14', 'leaprc.ff14SB', 'leaprc.gaff']
    if topo is None:
        topo = []
    if param is None:
        param = []
    if caps is None:
        caps = _defaultProteinCaps(mol)

    _missingSegID(mol)
    _checkMixedSegment(mol)

    logger.info('Converting CHARMM membranes to AMBER.')
    mol = _charmmLipid2Amber(mol)

    #_checkProteinGaps(mol)
    _applyProteinCaps(mol, caps)

    f = open(path.join(outdir, 'tleap.in'), 'w')
    f.write('# tleap file generated by amber.build\n')

    # Printing out the forcefields
    if isinstance(ff, str):
        ff = [ff]
    for force in ff:
        f.write('source ' + force + '\n')
    f.write('\n')

    # Loading TIP3P water parameters
    f.write('# Loading ions and TIP3P water parameters\n')
    f.write('loadamberparams frcmod.ionsjc_tip3p\n\n')

    # Loading user parameters
    f.write('# Loading parameter files\n')
    for p in param:
        try:
            shutil.copy(p, outdir)
            f.write('loadamberparams ' + path.basename(p) + '\n')
        except:
            f.write('loadamberparams ' + p + '\n')
            logger.info("Path {:s} not found, assuming a standard AmberTools file.".
                        format(p))
    f.write('\n')

    # Printing out topologies
    f.write('# Loading prepi topologies\n')
    for t in topo:
        shutil.copy(t, outdir)
        f.write('loadamberprep ' + path.basename(t) + '\n')
    f.write('\n')

    # Detect disulfide bonds
    if disulfide is None and not ionize:
        logger.info('Detecting disulfide bonds.')
        disulfide = detectDisulfideBonds(mol)
        if len(disulfide) != 0:
            for d in disulfide:
                # Convert to stupid amber residue numbering
                uqseqid = sequenceID((mol.resid, mol.insertion, mol.segid)) + mol.resid[0] - 1
                uqres1 = int(np.unique(uqseqid[mol.atomselect('segid {} and resid {}'.format(d.segid1, d.resid1))]))
                uqres2 = int(np.unique(uqseqid[mol.atomselect('segid {} and resid {}'.format(d.segid2, d.resid2))]))
                # Rename the CYS to CYX if there is a disulfide bond
                mol.set('resname', 'CYX', sel='segid {} and resid {}'.format(d.segid1, d.resid1))
                mol.set('resname', 'CYX', sel='segid {} and resid {}'.format(d.segid2, d.resid2))
                # Remove (eventual) HG hydrogens on these CYS (from proteinPrepare)
                mol.remove('name HG and segid {} and resid {}'.format(d.segid1, d.resid1), _logger=False)
                mol.remove('name HG and segid {} and resid {}'.format(d.segid2, d.resid2), _logger=False)

    # Printing and loading the PDB file. AMBER can work with a single PDB file if the segments are separate by TER
    logger.info('Writing PDB file for input to tleap.')
    pdbname = path.join(outdir, 'input.pdb')

    # mol2 files have atomtype, here we only write parts not coming from mol2
    mol.write(pdbname, mol.atomtype == '')
    if not os.path.isfile(pdbname):
        raise NameError('Could not write a PDB file out of the given Molecule.')
    f.write('# Loading the system\n')
    f.write('mol = loadpdb input.pdb\n\n')

    if np.sum(mol.atomtype != '') != 0:
        logger.info('Writing mol2 files for input to tleap.')
        segs = np.unique(mol.segid[mol.atomtype != ''])
        combstr = 'mol = combine {mol'
        for s in segs:
            name = 'segment{}'.format(s)
            mol2name = path.join(outdir, '{}.mol2'.format(name))
            mol.write(mol2name, (mol.atomtype != '') & (mol.segid == s))
            if not os.path.isfile(mol2name):
                raise NameError('Could not write a mol2 file out of the given Molecule.')
            f.write('# Loading the rest of the system\n')
            f.write('{} = loadmol2 {}.mol2\n\n'.format(name, name))
            combstr += ' {}'.format(name)
        combstr += '}\n\n'
        f.write(combstr)

    # Printing out patches for the disulfide bridges
    if disulfide is None and not ionize:
        logger.info('Detecting disulfide bonds.')
        disulfide = detectDisulfideBonds(mol)

    # Write patches for disulfide bonds (only after ionizing)
    if not ionize and len(disulfide) != 0:
        f.write('# Adding disulfide bonds\n')
        for d in disulfide:
            # Convert to stupid amber residue numbering
            uqseqid = sequenceID((mol.resid, mol.insertion, mol.segid)) + mol.resid[0] - 1
            uqres1 = int(np.unique(uqseqid[mol.atomselect('segid {} and resid {}'.format(d.segid1, d.resid1))]))
            uqres2 = int(np.unique(uqseqid[mol.atomselect('segid {} and resid {}'.format(d.segid2, d.resid2))]))
            f.write('bond mol.{}.SG mol.{}.SG\n'.format(uqres1, uqres2))
        f.write('\n')

    f.write('# Writing out the results\n')
    f.write('saveamberparm mol ' + prefix + '.prmtop ' + prefix + '.crd\n')
    f.write('quit')
    f.close()

    molbuilt = None
    if execute:
        # Source paths of extra dirs (our dirs, not amber default)
        htmdamberdir = path.abspath(path.join(home(), 'builder', 'amberfiles'))
        sourcepaths = [htmdamberdir]
        sourcepaths += [path.join(htmdamberdir, path.dirname(f))
                        for f in ff if path.isfile(path.join(htmdamberdir, f))]
        extrasource = []
        for p in sourcepaths:
            extrasource.append('-I')
            extrasource.append('{}'.format(p))
        logpath = os.path.abspath(os.path.join(outdir, 'log.txt'))
        logger.info('Starting the build.')
        currdir = os.getcwd()
        os.chdir(outdir)
        f = open(logpath, 'w')
        try:
            cmd = [tleap, '-f', './tleap.in']
            cmd[1:1] = extrasource
            call(cmd, stdout=f)
        except:
            raise NameError('tleap failed at execution')
        f.close()
        os.chdir(currdir)
        logger.info('Finished building.')

        if path.getsize(path.join(outdir, 'structure.crd')) != 0 and path.getsize(path.join(outdir, 'structure.prmtop')) != 0:
            molbuilt = Molecule(path.join(outdir, 'structure.prmtop'))
            molbuilt.read(path.join(outdir, 'structure.crd'))
        else:
            raise NameError('No structure pdb/prmtop file was generated. Check {} for errors in building.'.format(logpath))

        if ionize:
            shutil.move(path.join(outdir, 'structure.crd'), path.join(outdir, 'structure.noions.crd'))
            shutil.move(path.join(outdir, 'structure.prmtop'), path.join(outdir, 'structure.noions.prmtop'))
            totalcharge = np.sum(molbuilt.charge)
            nwater = np.sum(molbuilt.atomselect('water and noh'))
            anion, cation, anionatom, cationatom, nanion, ncation = ionizef(totalcharge, nwater, saltconc=saltconc, ff='amber', anion=saltanion, cation=saltcation)
            newmol = ionizePlace(mol, anion, cation, anionatom, cationatom, nanion, ncation)
            # Redo the whole build but now with ions included
            return build(newmol, ff=ff, topo=topo, param=param, prefix=prefix, outdir=outdir, caps={}, ionize=False,
                         execute=execute, saltconc=saltconc, disulfide=disulfide, tleap=tleap)
    molbuilt.write(path.join(outdir, 'structure.pdb'))
    return molbuilt
Пример #30
0
def ionizePlace(mol, anion, cation, anionatom, cationatom, nanion, ncation, dfrom=5, dbetween=5, segname=None):
    newmol = mol.copy()

    logger.info('Min distance of ions from molecule: ' + str(dfrom) + 'A')
    logger.info('Min distance between ions: ' + str(dbetween) + 'A')
    logger.info('Placing ' + str(nanion+ncation) + ' ions.')

    if (nanion + ncation) == 0:
        return newmol

    segname = _getSegname(newmol, segname)
    nions = nanion + ncation

    betabackup = newmol.beta
    newmol.set('beta', sequenceID((newmol.resid, newmol.segid)))

    # Find water oxygens to replace with ions
    ntries = 0
    maxtries = 10
    while True:
        ionlist = np.empty(0, dtype=int)
        watindex = newmol.atomselect('noh and water and not (within ' + str(dfrom) + ' of not water)', indexes=True)
        watsize = len(watindex)

        if watsize == 0:
            raise NameError('No waters could be found further than ' + str(dfrom) + ' from other molecules to be replaced by ions. You might need to solvate with a bigger box or disable the ionize property when building.')

        while len(ionlist) < nions:
            if len(watindex) == 0:
                break
            randwat = np.random.randint(len(watindex))
            thision = watindex[randwat]
            addit = True
            if len(ionlist) != 0:  # Check for distance from precious ions
                ionspos = newmol.get('coords', sel=ionlist)
                thispos = newmol.get('coords', sel=thision)
                dists = distance.cdist(np.atleast_2d(ionspos), np.atleast_2d(thispos), metric='euclidean')

                if np.any(dists < dbetween):
                    addit = False
            if addit:
                ionlist = np.append(ionlist, thision)
                watindex = np.delete(watindex, randwat)
        if len(ionlist) == nions:
            break

        ntries += 1
        if ntries == maxtries:
            raise NameError('Failed to add ions after ' + str(maxtries) + ' attempts. Try decreasing the ''from'' and ''between'' parameters, decreasing ion concentration or making a larger water box.')

    # Delete waters but keep their coordinates
    waterpos = np.atleast_2d(newmol.get('coords', ionlist))
    stringsel = 'beta'
    for x in newmol.beta[ionlist]:
        stringsel += ' ' + str(int(x))
    atmrem = np.sum(newmol.atomselect(stringsel))
    atmput = 3 * len(ionlist)
    # assert atmrem == atmput, 'Removing {} atoms instead of {}. Report this bug.'.format(atmrem, atmput)
    sel = newmol.atomselect(stringsel, indexes=True)
    newmol.remove(sel, _logger=False)
    # assert np.size(sel) == atmput, 'Removed {} atoms instead of {}. Report this bug.'.format(np.size(sel), atmput)
    betabackup = np.delete(betabackup, sel)

    # Add the ions
    randidx = np.random.permutation(np.size(waterpos, 0))
    atom = Molecule()
    atom.empty(1)
    atom.set('chain', 'I')
    atom.set('segid', 'I')

    for i in range(nanion):
        atom.set('name', anionatom)
        atom.set('resname', anion)
        atom.set('resid', newmol.resid[-1] + 1)
        atom.coords = waterpos[randidx[i], :]
        newmol.insert(atom, len(newmol.name))
    for i in range(ncation):
        atom.set('name', cationatom)
        atom.set('resname', cation)
        atom.set('resid', newmol.resid[-1] + 1)
        atom.coords = waterpos[randidx[i+nanion], :]
        newmol.insert(atom, len(newmol.name))

    # Restoring the original betas
    sel = np.ones(len(betabackup) + nions, dtype=bool)
    sel[len(betabackup)::] = False
    newmol.set('beta', betabackup, sel=sel)
    return newmol
Пример #31
0
def autoSegment(mol, sel='all', basename='P', spatial=True, spatialgap=4):
    """ Detects resid gaps in a selection and assigns incrementing segid to each fragment

    !!!WARNING!!! If you want to use atom selections like 'protein' or 'fragment',
    use this function on a Molecule containing only protein atoms, otherwise the protein selection can fail.

    Parameters
    ----------
    mol : :class:`Molecule <htmd.molecule.molecule.Molecule>` object
        The Molecule object
    sel : str
        Atom selection on which to check for gaps.
    basename : str
        The basename for segment ids. For example if given 'P' it will name the segments 'P1', 'P2', ...
    spatial : bool
        Only considers a discontinuity in resid as a gap of the CA atoms have distance more than `spatialgap` Angstrom
    spatialgap : float
        The size of a spatial gap which validates a discontinuity

    Returns
    -------
    newmol : :class:`Molecule <htmd.molecule.molecule.Molecule>` object
        A new Molecule object with modified segids

    Example
    -------
    >>> newmol = autoSegment(mol,'chain B','P')
    """
    mol = mol.copy()

    idx = mol.atomselect(sel, indexes=True)
    rid = mol.get(
        'resid', sel
    )  # TODO:maybe easier without sel, rid = mol.get('resid') and then no need of idx
    idxdiff = np.diff(idx)
    residiff = np.diff(rid)
    gappos = np.where((residiff != 1) & (residiff != 0))[
        0]  # Points to the index before the gap!

    idxstartseg = [idx[0]] + idx[gappos + 1].tolist()
    idxendseg = idx[gappos].tolist() + [idx[-1]]

    mol.set('segid', basename, sel)

    if len(gappos) == 0:
        return mol

    if spatial:
        residbackup = mol.get('resid')
        mol.set(
            'resid', sequenceID(mol.resid)
        )  # Assigning unique resids to be able to do the distance selection

        todelete = []
        i = 0
        for s, e in zip(idxstartseg[1:], idxendseg[:-1]):
            coords = mol.get('coords',
                             sel='resid "{}" "{}" and name CA'.format(
                                 mol.resid[e], mol.resid[s]))
            if np.shape(coords) == (2, 3):
                dist = np.sqrt(np.sum((coords[0, :] - coords[1, :])**2))
                if dist < spatialgap:
                    todelete.append(i)
            i += 1
        # Join the non-real gaps into segments
        idxstartseg = np.delete(idxstartseg, np.array(todelete) + 1)
        idxendseg = np.delete(idxendseg, todelete)

        mol.set('resid', residbackup)  # Restoring the original resids

    i = 0
    for s, e in zip(idxstartseg, idxendseg):
        newsegid = basename + str(i)
        if np.any(mol.segid == newsegid):
            raise RuntimeError(
                'Segid {} already exists in the molecule. Please choose different prefix.'
                .format(newsegid))
        logger.info('Created segment {} between resid {} and {}.'.format(
            newsegid, mol.resid[s], mol.resid[e]))
        mol.segid[s:e + 1] = newsegid
        i += 1

    return mol
Пример #32
0
def build(mol,
          ff=None,
          topo=None,
          param=None,
          prefix='structure',
          outdir='./',
          caps=None,
          ionize=True,
          saltconc=0,
          saltanion=None,
          saltcation=None,
          disulfide=None,
          tleap='tleap',
          execute=True):
    """ Builds a system for AMBER

    Uses tleap to build a system for AMBER. Additionally it allows the user to ionize and add disulfide bridges.

    Parameters
    ----------
    mol : :class:`Molecule <htmd.molecule.molecule.Molecule>` object
        The Molecule object containing the system
    ff : list of str
        A list of leaprc forcefield files. Default: ['leaprc.lipid14', 'leaprc.ff14SB', 'leaprc.gaff']
    topo : list of str
        A list of topology `prepi` files.
    param : list of str
        A list of parameter `frcmod` files.
    prefix : str
        The prefix for the generated pdb and psf files
    outdir : str
        The path to the output directory
    caps : dict
        A dictionary with keys segids and values lists of strings describing the caps of that segment.
        e.g. caps['P'] = ['ACE', 'NME']. Default: will apply ACE and NME caps to proteins and no caps
        to the rest.
    ionize : bool
        Enable or disable ionization
    saltconc : float
        Salt concentration to add to the system after neutralization.
    saltanion : {'Cl-'}
        The anion type. Please use only AMBER ion atom names.
    saltcation : {'Na+', 'K+', 'Cs+'}
        The cation type. Please use only AMBER ion atom names.
    disulfide : np.ndarray
        If None it will guess disulfide bonds. Otherwise provide a 2D array where each row is a pair of atom indexes that makes a disulfide bond
    tleap : str
        Path to tleap executable used to build the system for AMBER
    execute : bool
        Disable building. Will only write out the input script needed by tleap. Does not include ionization.

    Returns
    -------
    molbuilt : :class:`Molecule <htmd.molecule.molecule.Molecule>` object
        The built system in a Molecule object

    Example
    -------
    >>> ffs = ['leaprc.lipid14', 'leaprc.ff14SB', 'leaprc.gaff']
    >>> molbuilt = amber.build(mol, ff=ffs, outdir='/tmp/build', saltconc=0.15)
    """
    # Remove pdb bonds!
    mol = mol.copy()
    mol.bonds = np.empty((0, 2), dtype=np.uint32)
    if shutil.which(tleap) is None:
        raise NameError('Could not find executable: `' + tleap +
                        '` in the PATH. Cannot build for AMBER.')
    if not os.path.isdir(outdir):
        os.makedirs(outdir)
    _cleanOutDir(outdir)
    if ff is None:
        ff = ['leaprc.lipid14', 'leaprc.ff14SB', 'leaprc.gaff']
    if topo is None:
        topo = []
    if param is None:
        param = []
    if caps is None:
        caps = _defaultCaps(mol)

    _missingSegID(mol)
    _checkMixedSegment(mol)

    logger.info('Converting CHARMM membranes to AMBER.')
    mol = _charmmLipid2Amber(mol)

    #_checkProteinGaps(mol)
    _applyCaps(mol, caps)

    f = open(path.join(outdir, 'tleap.in'), 'w')
    f.write('# tleap file generated by amber.build\n')

    # Printing out the forcefields
    if isinstance(ff, str):
        ff = [ff]
    for force in ff:
        f.write('source ' + force + '\n')
    f.write('\n')

    # Loading TIP3P water parameters
    f.write('# Loading ions and TIP3P water parameters\n')
    f.write('loadamberparams frcmod.ionsjc_tip3p\n\n')

    # Loading user parameters
    f.write('# Loading parameter files\n')
    for p in param:
        shutil.copy(p, outdir)
        f.write('loadamberparams ' + path.basename(p) + '\n')
    f.write('\n')

    # Printing out topologies
    f.write('# Loading prepi topologies\n')
    for t in topo:
        shutil.copy(t, outdir)
        f.write('loadamberprep ' + path.basename(t) + '\n')
    f.write('\n')

    # Printing and loading the PDB file. AMBER can work with a single PDB file if the segments are separate by TER
    logger.info('Writing PDB file for input to tleap.')
    pdbname = path.join(outdir, 'input.pdb')
    mol.write(pdbname)
    if not os.path.isfile(pdbname):
        raise NameError(
            'Could not write a PDB file out of the given Molecule.')
    f.write('# Loading the system\n')
    f.write('mol = loadpdb input.pdb\n\n')

    # Printing out patches for the disulfide bridges
    if disulfide is None and not ionize:
        logger.info('Detecting disulfide bonds.')
        disulfide = detectDisulfideBonds(mol)

    if not ionize and len(
            disulfide) != 0:  # Only make disu bonds after ionizing!
        f.write('# Adding disulfide bonds\n')
        for d in disulfide:
            # Convert to stupid amber residue numbering
            uqseqid = sequenceID(
                (mol.resid, mol.insertion, mol.segid)) + mol.resid[0] - 1
            uqres1 = int(
                np.unique(uqseqid[mol.atomselect(
                    'segid {} and resid {}'.format(d.segid1, d.resid1))]))
            uqres2 = int(
                np.unique(uqseqid[mol.atomselect(
                    'segid {} and resid {}'.format(d.segid2, d.resid2))]))
            # Rename the CYS to CYX if there is a disulfide bond
            mol.set('resname',
                    'CYX',
                    sel='segid {} and resid {}'.format(d.segid1, d.resid1))
            mol.set('resname',
                    'CYX',
                    sel='segid {} and resid {}'.format(d.segid2, d.resid2))
            f.write('bond mol.{}.SG mol.{}.SG\n'.format(uqres1, uqres2))
        f.write('\n')

    f.write('# Writing out the results\n')
    f.write('saveamberparm mol ' + prefix + '.prmtop ' + prefix + '.crd\n')
    f.write('quit')
    f.close()

    molbuilt = None
    if execute:
        # Source paths of extra dirs
        htmdamberdir = path.join(home(), 'builder', 'amberfiles')
        sourcepaths = [htmdamberdir]
        sourcepaths += [path.join(htmdamberdir, path.dirname(f)) for f in ff]
        extrasource = ''
        for p in sourcepaths:
            extrasource += '-I {} '.format(p)
        logpath = os.path.abspath('{}/log.txt'.format(outdir))
        logger.info('Starting the build.')
        currdir = os.getcwd()
        os.chdir(outdir)
        f = open(logpath, 'w')
        try:
            call([tleap, extrasource, '-f', './tleap.in'], stdout=f)
        except:
            raise NameError('tleap failed at execution')
        f.close()
        os.chdir(currdir)
        logger.info('Finished building.')

        if path.getsize(path.join(
                outdir, 'structure.crd')) != 0 and path.getsize(
                    path.join(outdir, 'structure.prmtop')) != 0:
            molbuilt = Molecule(path.join(outdir, 'structure.prmtop'))
            molbuilt.read(path.join(outdir, 'structure.crd'))
        else:
            raise NameError(
                'No structure pdb/prmtop file was generated. Check {} for errors in building.'
                .format(logpath))

        if ionize:
            shutil.move(path.join(outdir, 'structure.crd'),
                        path.join(outdir, 'structure.noions.crd'))
            shutil.move(path.join(outdir, 'structure.prmtop'),
                        path.join(outdir, 'structure.noions.prmtop'))
            totalcharge = np.sum(molbuilt.charge)
            nwater = np.sum(molbuilt.atomselect('water and noh'))
            anion, cation, anionatom, cationatom, nanion, ncation = ionizef(
                totalcharge,
                nwater,
                saltconc=saltconc,
                ff='amber',
                anion=saltanion,
                cation=saltcation)
            newmol = ionizePlace(mol, anion, cation, anionatom, cationatom,
                                 nanion, ncation)
            # Redo the whole build but now with ions included
            return build(newmol,
                         ff=ff,
                         topo=topo,
                         param=param,
                         prefix=prefix,
                         outdir=outdir,
                         caps={},
                         ionize=False,
                         execute=execute,
                         saltconc=saltconc,
                         disulfide=disulfide,
                         tleap=tleap)
    molbuilt.write(path.join(outdir, 'structure.pdb'))
    return molbuilt
Пример #33
0
def PDBread(filename, mode='pdb', frame=None, topoloc=None):
    from pandas import read_fwf
    import io

    tempfile = False
    if not os.path.isfile(filename) and len(filename) == 4:  # Could be a PDB id. Try to load it from the PDB website
        filename, tempfile = _getPDB(filename)

    """
    COLUMNS        DATA  TYPE    FIELD        DEFINITION
    -------------------------------------------------------------------------------------
     1 -  6        Record name   "ATOM  "
     7 - 11        Integer       serial       Atom  serial number.
    13 - 16        Atom          name         Atom name.
    17             Character     altLoc       Alternate location indicator.
    18 - 20        Residue name  resName      Residue name.
    22             Character     chainID      Chain identifier.
    23 - 26        Integer       resSeq       Residue sequence number.
    27             AChar         iCode        Code for insertion of residues.
    31 - 38        Real(8.3)     x            Orthogonal coordinates for X in Angstroms.
    39 - 46        Real(8.3)     y            Orthogonal coordinates for Y in Angstroms.
    47 - 54        Real(8.3)     z            Orthogonal coordinates for Z in Angstroms.
    55 - 60        Real(6.2)     occupancy    Occupancy.
    61 - 66        Real(6.2)     tempFactor   Temperature  factor.
    77 - 78        LString(2)    element      Element symbol, right-justified.
    79 - 80        LString(2)    charge       Charge  on the atom.
    """
    if mode == 'pdb':
        topocolspecs = [(0, 6), (6, 11), (12, 16), (16, 17), (17, 21), (21, 22), (22, 26), (26, 27),
                        (54, 60), (60, 66), (72, 76), (76, 78), (78, 80)]
        toponames = ('record', 'serial', 'name', 'altloc', 'resname', 'chain', 'resid', 'insertion',
                     'occupancy', 'beta', 'segid', 'element', 'charge')
    elif mode == 'pdbqt':
        # http://autodock.scripps.edu/faqs-help/faq/what-is-the-format-of-a-pdbqt-file
        # The rigid root contains one or more PDBQT-style ATOM or HETATM records. These records resemble their
        # traditional PDB counterparts, but diverge in columns 71-79 inclusive (where the first character in the line
        # corresponds to column 1). The partial charge is stored in columns 71-76 inclusive (in %6.3f format, i.e.
        # right-justified, 6 characters wide, with 3 decimal places). The AutoDock atom-type is stored in columns 78-79
        # inclusive (in %-2.2s format, i.e. left-justified and 2 characters wide..
        topocolspecs = [(0, 6), (6, 11), (12, 16), (16, 17), (17, 21), (21, 22), (22, 26), (26, 27),
                        (54, 60), (60, 66), (70, 76), (77, 79)]
        toponames = ('record', 'serial', 'name', 'altloc', 'resname', 'chain', 'resid', 'insertion',
                     'occupancy', 'beta', 'charge', 'element')
    topodtypes = {
        'record': str,
        'serial': np.int,
        'name': str,
        'altloc': str,
        'resname': str,
        'chain': str,
        'resid': np.int,
        'insertion': str,
        'occupancy': np.float32,
        'beta': np.float32,
        'segid': str,
        'element': str,
        'charge': np.float32,
        'chargesign': str,
    }
    coordcolspecs = [(30, 38), (38, 46), (46, 54)]
    coordnames = ('x', 'y', 'z')

    """
    COLUMNS       DATA  TYPE      FIELD        DEFINITION
    -------------------------------------------------------------------------
     1 -  6        Record name    "CONECT"
     7 - 11        Integer        serial       Atom  serial number
    12 - 16        Integer        serial       Serial number of bonded atom
    17 - 21        Integer        serial       Serial  number of bonded atom
    22 - 26        Integer        serial       Serial number of bonded atom
    27 - 31        Integer        serial       Serial number of bonded atom
    """
    bondcolspecs = [(6, 11), (11, 16), (16, 21), (21, 26), (26, 31)]
    bondnames = ('serial1', 'serial2', 'serial3', 'serial4', 'serial5')

    """
    COLUMNS       DATA  TYPE    FIELD          DEFINITION
    -------------------------------------------------------------
     1 -  6       Record name   "CRYST1"
     7 - 15       Real(9.3)     a              a (Angstroms).
    16 - 24       Real(9.3)     b              b (Angstroms).
    25 - 33       Real(9.3)     c              c (Angstroms).
    34 - 40       Real(7.2)     alpha          alpha (degrees).
    41 - 47       Real(7.2)     beta           beta (degrees).
    48 - 54       Real(7.2)     gamma          gamma (degrees).
    56 - 66       LString       sGroup         Space  group.
    67 - 70       Integer       z              Z value.
    """
    cryst1colspecs = [(6, 15), (15, 24), (24, 33), (33, 40), (40, 47), (47, 54), (55, 66), (66, 70)]
    cryst1names = ('a', 'b', 'c', 'alpha', 'beta', 'gamma', 'sGroup', 'z')

    """
    Guessing columns for REMARK 290 SMTRY from the example since the specs don't define them
              1         2         3         4         5         6         7
    01234567890123456789012345678901234567890123456789012345678901234567890
    REMARK 290   SMTRY1   1  1.000000  0.000000  0.000000        0.00000
    REMARK 290   SMTRY2   1  0.000000  1.000000  0.000000        0.00000
    REMARK 290   SMTRY3   1  0.000000  0.000000  1.000000        0.00000
    REMARK 290   SMTRY1   2 -1.000000  0.000000  0.000000       36.30027
    REMARK 290   SMTRY2   2  0.000000 -1.000000  0.000000        0.00000
    REMARK 290   SMTRY3   2  0.000000  0.000000  1.000000       59.50256
    REMARK 290   SMTRY1   3 -1.000000  0.000000  0.000000        0.00000
    REMARK 290   SMTRY2   3  0.000000  1.000000  0.000000       46.45545
    REMARK 290   SMTRY3   3  0.000000  0.000000 -1.000000       59.50256
    REMARK 290   SMTRY1   4  1.000000  0.000000  0.000000       36.30027
    REMARK 290   SMTRY2   4  0.000000 -1.000000  0.000000       46.45545
    REMARK 290   SMTRY3   4  0.000000  0.000000 -1.000000        0.00000

    Guessing columns for REMARK 350   BIOMT from the example since the specs don't define them
    REMARK 350   BIOMT1   1 -0.981559  0.191159  0.000000        0.00000
    REMARK 350   BIOMT2   1 -0.191159 -0.981559  0.000000        0.00000
    REMARK 350   BIOMT3   1  0.000000  0.000000  1.000000      -34.13878
    REMARK 350   BIOMT1   2 -0.838088  0.545535  0.000000        0.00000
    REMARK 350   BIOMT2   2 -0.545535 -0.838088  0.000000        0.00000
    REMARK 350   BIOMT3   2  0.000000  0.000000  1.000000      -32.71633
    """
    symmetrycolspecs = [(20, 23), (23, 33), (33, 43), (43, 53), (53, 68)]
    symmetrynames = ('idx', 'rot1', 'rot2', 'rot3', 'trans')

    def concatCoords(coords, coorddata):
        if coorddata.tell() != 0:  # Not empty
            coorddata.seek(0)
            parsedcoor = read_fwf(coorddata, colspecs=coordcolspecs, names=coordnames, na_values=_NA_VALUES, keep_default_na=False)
            if coords is None:
                coords = np.zeros((len(parsedcoor), 3, 0), dtype=np.float32)
            currcoords = np.vstack((parsedcoor.x, parsedcoor.y, parsedcoor.z)).T
            if coords.shape[0] != currcoords.shape[0]:
                logger.warning('Different number of atoms read in different MODELs in the PDB file. '
                               'Keeping only the first {} model(s)'.format(coords.shape[2]))
                return coords
            coords = np.append(coords, currcoords[:, :, np.newaxis], axis=2)
        return coords

    teridx = []
    currter = 0
    topoend = False

    cryst1data = io.StringIO()
    topodata = io.StringIO()
    conectdata = io.StringIO()
    coorddata = io.StringIO()
    symmetrydata = io.StringIO()

    coords = None

    with open(filename, 'r') as f:
        for line in f:
            if line.startswith('CRYST1'):
                cryst1data.write(line)
            if line.startswith('ATOM') or line.startswith('HETATM'):
                coorddata.write(line)
            if (line.startswith('ATOM') or line.startswith('HETATM')) and not topoend:
                topodata.write(line)
                teridx.append(str(currter))
            if line.startswith('TER'):
                currter += 1
            if (mode == 'pdb' and line.startswith('END')) or \
               (mode == 'pdbqt' and line.startswith('ENDMDL')):  # pdbqt should not stop reading at ENDROOT or ENDBRANCH
                topoend = True
            if line.startswith('CONECT'):
                conectdata.write(line)
            if line.startswith('MODEL'):
                coords = concatCoords(coords, coorddata)
                coorddata = io.StringIO()
            if line.startswith('REMARK 290   SMTRY'):  # TODO: Support BIOMT fields. It's a bit more complicated. Can't be done with pandas
                symmetrydata.write(line)

        cryst1data.seek(0)
        topodata.seek(0)
        conectdata.seek(0)
        symmetrydata.seek(0)

        coords = concatCoords(coords, coorddata)

        parsedbonds = read_fwf(conectdata, colspecs=bondcolspecs, names=bondnames, na_values=_NA_VALUES, keep_default_na=False)
        parsedcryst1 = read_fwf(cryst1data, colspecs=cryst1colspecs, names=cryst1names, na_values=_NA_VALUES, keep_default_na=False)
        parsedtopo = read_fwf(topodata, colspecs=topocolspecs, names=toponames, na_values=_NA_VALUES, keep_default_na=False)  #, dtype=topodtypes)
        parsedsymmetry = read_fwf(symmetrydata, colspecs=symmetrycolspecs, names=symmetrynames, na_values=_NA_VALUES, keep_default_na=False)

    # if 'chargesign' in parsedtopo and not np.all(parsedtopo.chargesign.isnull()):
    #    parsedtopo.loc[parsedtopo.chargesign == '-', 'charge'] *= -1

    # Fixing PDB format charges which can come after the number
    if parsedtopo.charge.dtype == 'object':
        minuses = np.where(parsedtopo.charge.str.match('\d\-') == True)[0]
        pluses = np.where(parsedtopo.charge.str.match('\d\+') == True)[0]
        for m in minuses:
            parsedtopo.loc[m, 'charge'] = int(parsedtopo.charge[m][0]) * -1
        for p in pluses:
            parsedtopo.loc[p, 'charge'] = int(parsedtopo.charge[p][0])
        parsedtopo.loc[parsedtopo.charge.isnull(), 'charge'] = 0
    # Fixing hexadecimal index and resids
    # Support for reading hexadecimal
    if parsedtopo.serial.dtype == 'object':
        logger.warning('Non-integer values were read from the PDB "serial" field. Dropping PDB values and assigning new ones.')
        parsedtopo.serial = sequenceID(parsedtopo.serial)
    if parsedtopo.resid.dtype == 'object':
        logger.warning('Non-integer values were read from the PDB "resid" field. Dropping PDB values and assigning new ones.')
        parsedtopo.resid = sequenceID(parsedtopo.resid)

    if parsedtopo.insertion.dtype == np.float64 and not np.all(np.isnan(parsedtopo.insertion)):
        # Trying to minimize damage from resids overflowing into insertion field
        logger.warning('Integer values detected in "insertion" field. Your resids might be overflowing. Take care')
        parsedtopo.insertion = [str(int(x)) if not np.isnan(x) else '' for x in parsedtopo.insertion]

    if len(parsedtopo) > 99999:
        logger.warning('Reading PDB file with more than 99999 atoms. Bond information can be wrong.')

    crystalinfo = {}
    if len(parsedcryst1):
        crystalinfo = parsedcryst1.iloc[0].to_dict()
        if isinstance(crystalinfo['sGroup'], str) or not np.isnan(crystalinfo['sGroup']):
            crystalinfo['sGroup'] = crystalinfo['sGroup'].split()
    if len(parsedsymmetry):
        numcopies = int(len(parsedsymmetry)/3)
        crystalinfo['numcopies'] = numcopies
        crystalinfo['rotations'] = parsedsymmetry[['rot1', 'rot2', 'rot3']].as_matrix().reshape((numcopies, 3, 3))
        crystalinfo['translations'] = parsedsymmetry['trans'].as_matrix().reshape((numcopies, 3))

    topo = Topology(parsedtopo)

    # Bond formatting part
    # TODO: Speed this up. This is the slowest part for large PDB files. From 700ms to 7s
    serials = parsedtopo.serial.as_matrix()
    # if isinstance(serials[0], str) and np.any(serials == '*****'):
    #     logger.info('Non-integer serials were read. For safety we will discard all bond information and serials will be assigned automatically.')
    #     topo.serial = np.arange(1, len(serials)+1, dtype=np.int)
    if np.max(parsedbonds.max()) > np.max(serials):
        logger.info('Bond indexes in PDB file exceed atom indexes. For safety we will discard all bond information.')
    else:
        mapserials = np.empty(np.max(serials)+1)
        mapserials[:] = np.NAN
        mapserials[serials] = list(range(len(serials)))
        for i in range(len(parsedbonds)):
            row = parsedbonds.loc[i].tolist()
            for b in range(1, 5):
                if not np.isnan(row[b]):
                    topo.bonds.append([int(row[0]), int(row[b])])
        topo.bonds = np.array(topo.bonds, dtype=np.uint32)
        if topo.bonds.size != 0:
            mappedbonds = mapserials[topo.bonds[:]]
            wrongidx, _ = np.where(np.isnan(mappedbonds))  # Some PDBs have bonds to non-existing serials... go figure
            if len(wrongidx):
                logger.info('Discarding {} bonds to non-existing indexes in the PDB file.'.format(len(wrongidx)))
            mappedbonds = np.delete(mappedbonds, wrongidx, axis=0)
            topo.bonds = np.array(mappedbonds, dtype=np.uint32)

    if len(topo.segid) == 0 and currter != 0:  # If no segid was read, use the TER rows to define segments
        topo.segid = teridx

    if tempfile:
        os.unlink(filename)

    topo.crystalinfo = crystalinfo
    traj = Trajectory(coords=coords)
    return topo, traj
Пример #34
0
def _charmmLipid2Amber(mol):
    """ Convert a CHARMM lipid membrane to AMBER format

    Parameters
    ----------
    mol : :class:`Molecule <htmd.molecule.molecule.Molecule>` object
        The Molecule object containing the membrane

    Returns
    -------
    newmol : :class:`Molecule <htmd.molecule.molecule.Molecule>` object
        A new Molecule object with the membrane converted to AMBER
    """

    resdict = _readcsvdict(
        os.path.join(home(), 'builder', 'charmmlipid2amber.csv'))

    natoms = mol.numAtoms
    neworder = np.array(
        list(range(natoms)
             ))  # After renaming the atoms and residues I have to reorder them

    begs = np.zeros(natoms, dtype=bool)
    fins = np.zeros(natoms, dtype=bool)
    begters = np.zeros(natoms, dtype=bool)
    finters = np.zeros(natoms, dtype=bool)

    # Iterate over the translation dictionary
    mol = mol.copy()
    incrresids = sequenceID((mol.resid, mol.insertion, mol.segid))

    for res in resdict.keys():
        molresidx = mol.resname == res
        if not np.any(molresidx):
            continue
        names = mol.name.copy(
        )  # Need to make a copy or I accidentally double-modify atoms

        atommap = resdict[res]
        for atom in atommap.keys():
            rule = atommap[atom]

            molatomidx = np.zeros(len(names), dtype=bool)
            molatomidx[molresidx] = names[molresidx] == atom

            mol.set('resname', rule.replaceresname, sel=molatomidx)
            mol.set('name', rule.replaceatom, sel=molatomidx)
            neworder[molatomidx] = rule.order

            if rule.order == 0:  # First atom (with or without ters)
                begs[molatomidx] = True
            if rule.order == rule.natoms - 1:  # Last atom (with or without ters)
                fins[molatomidx] = True
            if rule.order == 0 and rule.ter:  # First atom with ter
                begters[molatomidx] = True
            if rule.order == rule.natoms - 1 and rule.ter:  # Last atom with ter
                finters[molatomidx] = True

    uqresids = np.unique(incrresids[begs])
    residuebegs = np.ones(len(uqresids), dtype=int) * -1
    residuefins = np.ones(len(uqresids), dtype=int) * -1
    for i in range(len(uqresids)):
        residuebegs[i] = np.where(incrresids == uqresids[i])[0][0]
        residuefins[i] = np.where(incrresids == uqresids[i])[0][-1]
    for i in range(len(residuebegs)):
        beg = residuebegs[i]
        fin = residuefins[i] + 1
        neworder[beg:fin] = neworder[beg:fin] + beg
    idx = np.argsort(neworder)

    _reorderMol(mol, idx)

    begters = np.where(begters[idx])[0]  # Sort the begs and ters
    finters = np.where(finters[idx])[0]

    #if len(begters) > 999:
    #    raise NameError('More than 999 lipids. Cannot define separate segments for all of them.')

    for i in range(len(begters)):
        map = np.zeros(len(mol.resid), dtype=bool)
        map[begters[i]:finters[i] + 1] = True
        mol.set('resid', sequenceID(mol.get('resname', sel=map)), sel=map)
        mol.set('segid', 'L{}'.format(i % 2), sel=map)

    return mol
Пример #35
0
def ionizePlace(mol,
                anion_resname,
                cation_resname,
                anion_name,
                cation_name,
                nanion,
                ncation,
                dfrom=5,
                dbetween=5,
                segname=None):
    """Place a given number of negative and positive ions in the solvent.

    Replaces water molecules al long as they respect the given distance criteria.

    Parameters
    ----------
    mol : :class:`Molecule <htmd.molecule.molecule.Molecule>` object
        The Molecule object
    anion_resname : str
        Resname of the added anions
    cation_resname : str
        Resname of the added cations
    anion_name : str
        Name of the added anions
    cation_name : str
        Name of the added cations
    nanion : int
        Number of anions to add
    ncation : int
        Number of cations to add
    dfrom : float
        Min distance of ions from molecule
    dbetween : float
        Min distance between ions
    segname : str
        Segment name to add
        
    Returns
    -------
    mol : :class:`Molecule <htmd.molecule.molecule.Molecule>` object
        The molecule with the ions added
    """

    newmol = mol.copy()

    logger.info('Min distance of ions from molecule: ' + str(dfrom) + 'A')
    logger.info('Min distance between ions: ' + str(dbetween) + 'A')
    logger.info('Placing {:d} anions and {:d} cations.'.format(
        nanion, ncation))

    if (nanion + ncation) == 0:
        return newmol

    segname = _getSegname(newmol, segname)
    nions = nanion + ncation

    betabackup = newmol.beta
    newmol.set('beta', sequenceID((newmol.resid, newmol.segid)))

    # Find water oxygens to replace with ions
    ntries = 0
    maxtries = 10
    while True:
        ionlist = []
        watindex = newmol.atomselect('noh and water and not (within ' +
                                     str(dfrom) + ' of not water)',
                                     indexes=True)
        watsize = len(watindex)

        if watsize == 0:
            raise NameError(
                'No waters could be found further than ' + str(dfrom) +
                ' from other molecules to be replaced by ions. You might need to solvate with a bigger box or disable the ionize property when building.'
            )

        while len(ionlist) < nions:
            if len(watindex) == 0:
                break
            randwat = np.random.randint(len(watindex))
            thision = watindex[randwat]
            addit = True
            if len(ionlist) != 0:  # Check for distance from precious ions
                ionspos = newmol.get('coords', sel=ionlist)
                thispos = newmol.get('coords', sel=thision)
                dists = distance.cdist(np.atleast_2d(ionspos),
                                       np.atleast_2d(thispos),
                                       metric='euclidean')

                if np.any(dists < dbetween):
                    addit = False
            if addit:
                ionlist.append(thision)
                watindex = np.delete(watindex, randwat)
        if len(ionlist) == nions:
            break

        ntries += 1
        if ntries == maxtries:
            raise NameError(
                'Failed to add ions after ' + str(maxtries) +
                ' attempts. Try decreasing the '
                'from'
                ' and '
                'between'
                ' parameters, decreasing ion concentration or making a larger water box.'
            )

    # Delete waters but keep their coordinates
    waterpos = np.atleast_2d(newmol.get('coords', ionlist))
    betasel = np.zeros(newmol.numAtoms, dtype=bool)
    for b in newmol.beta[ionlist]:
        betasel |= newmol.beta == b
    atmrem = np.sum(betasel)
    atmput = 3 * len(ionlist)
    # assert atmrem == atmput, 'Removing {} atoms instead of {}. Report this bug.'.format(atmrem, atmput)
    sel = np.where(betasel)[0]
    newmol.remove(sel, _logger=False)
    # assert np.size(sel) == atmput, 'Removed {} atoms instead of {}. Report this bug.'.format(np.size(sel), atmput)
    betabackup = np.delete(betabackup, sel)

    # Add the ions
    randidx = np.random.permutation(np.size(waterpos, 0))
    atom = Molecule()
    atom.empty(1)
    atom.set('chain', 'I')
    atom.set('segid', 'I')

    for i in range(nanion):
        atom.set('name', anion_name)
        atom.set('resname', anion_resname)
        atom.set('resid', newmol.resid[-1] + 1)
        atom.coords = waterpos[randidx[i], :]
        newmol.insert(atom, len(newmol.name))
    for i in range(ncation):
        atom.set('name', cation_name)
        atom.set('resname', cation_resname)
        atom.set('resid', newmol.resid[-1] + 1)
        atom.coords = waterpos[randidx[i + nanion], :]
        newmol.insert(atom, len(newmol.name))

    # Restoring the original betas
    sel = np.ones(len(betabackup) + nions, dtype=bool)
    sel[len(betabackup)::] = False
    newmol.set('beta', betabackup, sel=sel)
    return newmol
Пример #36
0
def build(mol, ff=None, topo=None, param=None, prefix='structure', outdir='./build', caps=None, ionize=True, saltconc=0,
          saltanion=None, saltcation=None, disulfide=None, tleap=None, execute=True, atomtypes=None,
          offlibraries=None, gbsa=False, igb=2):
    """ Builds a system for AMBER

    Uses tleap to build a system for AMBER. Additionally it allows the user to ionize and add disulfide bridges.

    Parameters
    ----------
    mol : :class:`Molecule <htmd.molecule.molecule.Molecule>` object
        The Molecule object containing the system
    ff : list of str
        A list of leaprc forcefield files.
        Use :func:`amber.listFiles <htmd.builder.amber.listFiles>` to get a list of available forcefield files.
        Default: :func:`amber.defaultFf <htmd.builder.amber.defaultFf>`
    topo : list of str
        A list of topology `prepi/prep/in` files.
        Use :func:`amber.listFiles <htmd.builder.amber.listFiles>` to get a list of available topology files.
        Default: :func:`amber.defaultTopo <htmd.builder.amber.defaultTopo>`
    param : list of str
        A list of parameter `frcmod` files.
        Use :func:`amber.listFiles <htmd.builder.amber.listFiles>` to get a list of available parameter files.
        Default: :func:`amber.defaultParam <htmd.builder.amber.defaultParam>`
    prefix : str
        The prefix for the generated pdb and psf files
    outdir : str
        The path to the output directory
        Default: './build'
    caps : dict
        A dictionary with keys segids and values lists of strings describing the caps for a particular protein segment.
        e.g. caps['P'] = ['ACE', 'NME'] or caps['P'] = ['none', 'none']. Default: will apply ACE and NME caps to every
        protein segment.
    ionize : bool
        Enable or disable ionization
    saltconc : float
        Salt concentration to add to the system after neutralization.
    saltanion : {'Cl-'}
        The anion type. Please use only AMBER ion atom names.
    saltcation : {'Na+', 'K+', 'Cs+'}
        The cation type. Please use only AMBER ion atom names.
    disulfide : list of pairs of atomselection strings
        If None it will guess disulfide bonds. Otherwise provide a list pairs of atomselection strings for each pair of
        residues forming the disulfide bridge.
    tleap : str
        Path to tleap executable used to build the system for AMBER
    execute : bool
        Disable building. Will only write out the input script needed by tleap. Does not include ionization.
    atomtypes : list of triplets
        Custom atom types defined by the user as ('type', 'element', 'hybrid') triplets
        e.g. (('C1', 'C', 'sp2'), ('CI', 'C', 'sp3')). Check `addAtomTypes` in AmberTools docs.
    offlibraries : str or list
        A path or a list of paths to OFF library files. Check `loadOFF` in AmberTools docs.
    gbsa : bool
        Modify radii for GBSA implicit water model
    igb : int
        GB model. Select: 1 for mbondi, 2 and 5 for mbondi2, 7 for bondi and 8 for mbondi3.
        Check section 4. The Generalized Born/Surface Area Model of the AMBER manual.

    Returns
    -------
    molbuilt : :class:`Molecule <htmd.molecule.molecule.Molecule>` object
        The built system in a Molecule object

    Example
    -------
    >>> from htmd.ui import *  # doctest: +SKIP
    >>> mol = Molecule("3PTB")
    >>> molbuilt = amber.build(mol, outdir='/tmp/build')  # doctest: +SKIP
    >>> # More complex example
    >>> disu = [['segid P and resid 157', 'segid P and resid 13'], ['segid K and resid 1', 'segid K and resid 25']]
    >>> molbuilt = amber.build(mol, outdir='/tmp/build', saltconc=0.15, disulfide=disu)  # doctest: +SKIP
    """
    # Remove pdb protein bonds as they can be regenerated by tleap. Keep non-protein bonds i.e. for ligands
    mol = mol.copy()
    _removeProteinBonds(mol)

    if tleap is None:
        tleap = _findTleap()
    else:
        if shutil.which(tleap) is None:
            raise NameError('Could not find executable: `{}` in the PATH. Cannot build for AMBER.'.format(tleap))

    if not os.path.isdir(outdir):
        os.makedirs(outdir)
    _cleanOutDir(outdir)
    if ff is None:
        ff = defaultFf()
    if topo is None:
        topo = defaultTopo()
    if param is None:
        param = defaultParam()
    if caps is None:
        caps = _defaultProteinCaps(mol)

    _missingSegID(mol)
    _checkMixedSegment(mol)

    mol = _charmmLipid2Amber(mol)

    _applyProteinCaps(mol, caps)

    f = open(os.path.join(outdir, 'tleap.in'), 'w')
    f.write('# tleap file generated by amber.build\n')

    # Printing out the forcefields
    if isinstance(ff, str):
        ff = [ff]
    for i, force in enumerate(ff):
        if not os.path.isfile(force):
            force = _locateFile(force, 'ff', tleap)
            if force is None:
                continue
        newname = 'ff{}_{}'.format(i, os.path.basename(force))
        shutil.copy(force, os.path.join(outdir, newname))
        f.write('source {}\n'.format(newname))
    f.write('\n')

    if gbsa:
        gbmodels = {1: 'mbondi', 2: 'mbondi2', 5: 'mbondi2', 7: 'bondi', 8: 'mbondi3'}
        f.write('set default PBradii {}\n\n'.format(gbmodels[igb]))

    # Adding custom atom types
    if atomtypes is not None:
        atomtypes = ensurelist(tocheck=atomtypes[0], tomod=atomtypes)
        f.write('addAtomTypes {\n')
        for at in atomtypes:
            if len(at) != 3:
                raise RuntimeError('Atom type definitions have to be triplets. Check the AMBER documentation.')
            f.write('    {{ "{}" "{}" "{}" }}\n'.format(at[0], at[1], at[2]))
        f.write('}\n\n')

    # Loading OFF libraries
    if offlibraries is not None:
        offlibraries = ensurelist(offlibraries)
        for off in offlibraries:
            if not os.path.isfile(off):
                raise RuntimeError('Could not find off-library in location {}'.format(off))
            newname = 'offlib{}_{}'.format(i, os.path.basename(off))
            shutil.copy(off, os.path.join(outdir, newname))
            f.write('loadoff {}\n'.format(newname))

    # Loading frcmod parameters
    f.write('# Loading parameter files\n')
    for i, p in enumerate(param):
        if not os.path.isfile(p):
            p = _locateFile(p, 'param', tleap)
            if p is None:
                continue
        newname = 'param{}_{}'.format(i, os.path.basename(p))
        shutil.copy(p, os.path.join(outdir, newname))
        f.write('loadamberparams {}\n'.format(newname))
    f.write('\n')

    # Loading prepi topologies
    f.write('# Loading prepi topologies\n')
    for i, t in enumerate(topo):
        if not os.path.isfile(t):
            t = _locateFile(t, 'topo', tleap)
            if t is None:
                continue
        newname = 'topo{}_{}'.format(i, os.path.basename(t))
        shutil.copy(t, os.path.join(outdir, newname))
        f.write('loadamberprep {}\n'.format(newname))
    f.write('\n')

    f.write('# Loading the system\n')
    f.write('mol = loadpdb input.pdb\n\n')

    if np.sum(mol.atomtype != '') != 0:
        logger.debug('Writing mol2 files for input to tleap.')
        segs = np.unique(mol.segid[mol.atomtype != ''])
        combstr = 'mol = combine {mol'
        for s in segs:
            name = 'segment{}'.format(s)
            mol2name = os.path.join(outdir, '{}.mol2'.format(name))
            mol.write(mol2name, (mol.atomtype != '') & (mol.segid == s))
            if not os.path.isfile(mol2name):
                raise NameError('Could not write a mol2 file out of the given Molecule.')
            f.write('# Loading the rest of the system\n')
            f.write('{} = loadmol2 {}.mol2\n\n'.format(name, name))
            combstr += ' {}'.format(name)
        combstr += '}\n\n'
        f.write(combstr)

    # Write patches for disulfide bonds (only after ionizing)
    if not ionize:
        # TODO: Remove this once we deprecate the class
        from htmd.builder.builder import DisulfideBridge
        from htmd.molecule.molecule import UniqueResidueID
        if disulfide is not None and len(disulfide) != 0 and isinstance(disulfide[0], DisulfideBridge):
            newdisu = []
            for d in disulfide:
                r1 = UniqueResidueID.fromMolecule(mol, 'resid {} and segname {}'.format(d.resid1, d.segid1))
                r2 = UniqueResidueID.fromMolecule(mol, 'resid {} and segname {}'.format(d.resid2, d.segid2))
                newdisu.append([r1, r2])
            disulfide = newdisu
        # TODO: Remove up to here ----------------------

        if disulfide is not None and len(disulfide) != 0 and isinstance(disulfide[0][0], str):
            disulfide = convertDisulfide(mol, disulfide)

        if disulfide is None:
            logger.info('Detecting disulfide bonds.')
            disulfide = detectDisulfideBonds(mol)

        # Fix structure to match the disulfide patching
        if len(disulfide) != 0:
            torem = np.zeros(mol.numAtoms, dtype=bool)
            f.write('# Adding disulfide bonds\n')
            for d in disulfide:
                # Rename the residues to CYX if there is a disulfide bond
                atoms1 = d[0].selectAtoms(mol, indexes=False)
                atoms2 = d[1].selectAtoms(mol, indexes=False)
                mol.resname[atoms1] = 'CYX'
                mol.resname[atoms2] = 'CYX'
                # Remove (eventual) HG hydrogens on these CYS (from proteinPrepare)
                torem |= (atoms1 & (mol.name == 'HG')) | (atoms2 & (mol.name == 'HG'))
                # Convert to stupid amber residue numbering
                uqseqid = sequenceID((mol.resid, mol.insertion, mol.segid)) + mol.resid[0]
                uqres1 = int(np.unique(uqseqid[atoms1]))
                uqres2 = int(np.unique(uqseqid[atoms2]))
                f.write('bond mol.{}.SG mol.{}.SG\n'.format(uqres1, uqres2))
            f.write('\n')
            mol.remove(torem, _logger=False)

    f.write('# Writing out the results\n')
    f.write('saveamberparm mol ' + prefix + '.prmtop ' + prefix + '.crd\n')
    f.write('quit')
    f.close()

    # Printing and loading the PDB file. AMBER can work with a single PDB file if the segments are separate by TER
    logger.debug('Writing PDB file for input to tleap.')
    pdbname = os.path.join(outdir, 'input.pdb')

    # mol2 files have atomtype, here we only write parts not coming from mol2
    # We need to write the input.pdb at the end since we modify the resname for disulfide bridges in mol
    mol.write(pdbname, mol.atomtype == '')
    if not os.path.isfile(pdbname):
        raise NameError('Could not write a PDB file out of the given Molecule.')

    molbuilt = None
    if execute:
        # Source paths of extra dirs (our dirs, not amber default)
        htmdamberdir = os.path.abspath(os.path.join(home(), 'builder', 'amberfiles'))
        sourcepaths = [htmdamberdir]
        sourcepaths += [os.path.join(htmdamberdir, os.path.dirname(f))
                        for f in ff if os.path.isfile(os.path.join(htmdamberdir, f))]
        extrasource = []
        for p in sourcepaths:
            extrasource.append('-I')
            extrasource.append('{}'.format(p))
        logpath = os.path.abspath(os.path.join(outdir, 'log.txt'))
        logger.info('Starting the build.')
        currdir = os.getcwd()
        os.chdir(outdir)
        f = open(logpath, 'w')
        try:
            cmd = [tleap, '-f', './tleap.in']
            cmd[1:1] = extrasource
            call(cmd, stdout=f)
        except:
            raise NameError('tleap failed at execution')
        f.close()
        errors = _logParser(logpath)
        os.chdir(currdir)
        if errors:
            raise BuildError(errors + ['Check {} for further information on errors in building.'.format(logpath)])
        logger.info('Finished building.')

        if os.path.exists(os.path.join(outdir, 'structure.crd')) and \
                        os.path.getsize(os.path.join(outdir, 'structure.crd')) != 0 and \
                        os.path.getsize(os.path.join(outdir, 'structure.prmtop')) != 0:
            molbuilt = Molecule(os.path.join(outdir, 'structure.prmtop'))
            molbuilt.read(os.path.join(outdir, 'structure.crd'))
        else:
            raise BuildError('No structure pdb/prmtop file was generated. Check {} for errors in building.'.format(logpath))

        if ionize:
            shutil.move(os.path.join(outdir, 'structure.crd'), os.path.join(outdir, 'structure.noions.crd'))
            shutil.move(os.path.join(outdir, 'structure.prmtop'), os.path.join(outdir, 'structure.noions.prmtop'))
            totalcharge = np.sum(molbuilt.charge)
            nwater = np.sum(molbuilt.atomselect('water and noh'))
            anion, cation, anionatom, cationatom, nanion, ncation = ionizef(totalcharge, nwater, saltconc=saltconc,
                                                                            anion=saltanion, cation=saltcation)
            newmol = ionizePlace(mol, anion, cation, anionatom, cationatom, nanion, ncation)
            # Redo the whole build but now with ions included
            return build(newmol, ff=ff, topo=topo, param=param, prefix=prefix, outdir=outdir, caps={}, ionize=False,
                         execute=execute, saltconc=saltconc, disulfide=disulfide, tleap=tleap, atomtypes=atomtypes,
                         offlibraries=offlibraries)
    tmpbonds = molbuilt.bonds
    molbuilt.bonds = []  # Removing the bonds to speed up writing
    molbuilt.write(os.path.join(outdir, 'structure.pdb'))
    molbuilt.bonds = tmpbonds  # Restoring the bonds
    return molbuilt
Пример #37
0
def build(mol,
          ff=None,
          topo=None,
          param=None,
          prefix='structure',
          outdir='./build',
          caps=None,
          ionize=True,
          saltconc=0,
          saltanion=None,
          saltcation=None,
          disulfide=None,
          tleap='tleap',
          execute=True,
          atomtypes=None,
          offlibraries=None):
    """ Builds a system for AMBER

    Uses tleap to build a system for AMBER. Additionally it allows the user to ionize and add disulfide bridges.

    Parameters
    ----------
    mol : :class:`Molecule <htmd.molecule.molecule.Molecule>` object
        The Molecule object containing the system
    ff : list of str
        A list of leaprc forcefield files.
        Use :func:`amber.listFiles <htmd.builder.amber.listFiles>` to get a list of available forcefield files.
        Default: :func:`amber.defaultFf <htmd.builder.amber.defaultFf>`
    topo : list of str
        A list of topology `prepi` files.
        Use :func:`amber.listFiles <htmd.builder.amber.listFiles>` to get a list of available topology files.
        Default: :func:`amber.defaultTopo <htmd.builder.amber.defaultTopo>`
    param : list of str
        A list of parameter `frcmod` files.
        Use :func:`amber.listFiles <htmd.builder.amber.listFiles>` to get a list of available parameter files.
        Default: :func:`amber.defaultParam <htmd.builder.amber.defaultParam>`
    prefix : str
        The prefix for the generated pdb and psf files
    outdir : str
        The path to the output directory
        Default: './build'
    caps : dict
        A dictionary with keys segids and values lists of strings describing the caps for a particular protein segment.
        e.g. caps['P'] = ['ACE', 'NME'] or caps['P'] = ['none', 'none']. Default: will apply ACE and NME caps to every
        protein segment.
    ionize : bool
        Enable or disable ionization
    saltconc : float
        Salt concentration to add to the system after neutralization.
    saltanion : {'Cl-'}
        The anion type. Please use only AMBER ion atom names.
    saltcation : {'Na+', 'K+', 'Cs+'}
        The cation type. Please use only AMBER ion atom names.
    disulfide : list of :class:`DisulfideBridge <htmd.builder.builder.DisulfideBridge>` objects
        If None it will guess disulfide bonds. Otherwise provide a list of `DisulfideBridge` objects.
    tleap : str
        Path to tleap executable used to build the system for AMBER
    execute : bool
        Disable building. Will only write out the input script needed by tleap. Does not include ionization.
    atomtypes : list of triplets
        Custom atom types defined by the user as ('type', 'element', 'hybrid') triplets
        e.g. (('C1', 'C', 'sp2'), ('CI', 'C', 'sp3')). Check `addAtomTypes` in AmberTools docs.
    offlibraries : str or list
        A path or a list of paths to OFF library files. Check `loadOFF` in AmberTools docs.

    Returns
    -------
    molbuilt : :class:`Molecule <htmd.molecule.molecule.Molecule>` object
        The built system in a Molecule object

    Example
    -------
    >>> from htmd.ui import *
    >>> mol = Molecule("3PTB")
    >>> molbuilt = amber.build(mol, outdir='/tmp/build')  # doctest: +SKIP
    ...
    >>> # More complex example
    >>> disu = [DisulfideBridge('P', 157, 'P', 13), DisulfideBridge('K', 1, 'K', 25)]
    >>> molbuilt = amber.build(mol, outdir='/tmp/build', saltconc=0.15, disulfide=disu)  # doctest: +SKIP
    """
    # Remove pdb protein bonds as they can be regenerated by tleap. Keep non-protein bonds i.e. for ligands
    mol = mol.copy()
    _removeProteinBonds(mol)

    if shutil.which(tleap) is None:
        raise NameError(
            'Could not find executable: `{}` in the PATH. Cannot build for AMBER.'
            .format(tleap))
    if not os.path.isdir(outdir):
        os.makedirs(outdir)
    _cleanOutDir(outdir)
    if ff is None:
        ff = defaultFf()
    if topo is None:
        topo = defaultTopo()
    if param is None:
        param = defaultParam()
    if caps is None:
        caps = _defaultProteinCaps(mol)

    _missingSegID(mol)
    _checkMixedSegment(mol)
    _checkResidueInsertions(mol)

    mol = _charmmLipid2Amber(mol)

    _applyProteinCaps(mol, caps)

    f = open(os.path.join(outdir, 'tleap.in'), 'w')
    f.write('# tleap file generated by amber.build\n')

    # Printing out the forcefields
    if isinstance(ff, str):
        ff = [ff]
    for force in ff:
        f.write('source ' + force + '\n')
    f.write('\n')

    # Adding custom atom types
    if atomtypes is not None:
        atomtypes = ensurelist(tocheck=atomtypes[0], tomod=atomtypes)
        f.write('addAtomTypes {\n')
        for at in atomtypes:
            if len(at) != 3:
                raise RuntimeError(
                    'Atom type definitions have to be triplets. Check the AMBER documentation.'
                )
            f.write('    {{ "{}" "{}" "{}" }}\n'.format(at[0], at[1], at[2]))
        f.write('}\n\n')

    # Loading OFF libraries
    if offlibraries is not None:
        if not isinstance(offlibraries, list) and not isinstance(
                offlibraries, tuple):
            offlibraries = [
                offlibraries,
            ]
        for off in offlibraries:
            f.write('loadoff {}\n\n'.format(off))

    # Loading frcmod parameters
    f.write('# Loading parameter files\n')
    for p in param:
        try:
            shutil.copy(p, outdir)
            f.write('loadamberparams ' + os.path.basename(p) + '\n')
        except:
            f.write('loadamberparams ' + p + '\n')
            logger.info(
                "File {:s} not found, assuming its present on the standard Amber location"
                .format(p))
    f.write('\n')

    # Loading prepi topologies
    f.write('# Loading prepi topologies\n')
    for t in topo:
        shutil.copy(t, outdir)
        f.write('loadamberprep ' + os.path.basename(t) + '\n')
    f.write('\n')

    # Detect disulfide bridges if not defined by user
    if disulfide is None and not ionize:
        logger.info('Detecting disulfide bonds.')
        disulfide = detectDisulfideBonds(mol)

    # Fix structure to match the disulfide patching
    if not ionize and len(disulfide) != 0:
        for d in disulfide:
            # Rename the residues to CYX if there is a disulfide bond
            atoms1 = (mol.segid == d.segid1) & (mol.resid == d.resid1)
            atoms2 = (mol.segid == d.segid2) & (mol.resid == d.resid2)
            mol.resname[atoms1] = 'CYX'
            mol.resname[atoms2] = 'CYX'
            # Remove (eventual) HG hydrogens on these CYS (from proteinPrepare)
            mol.remove(atoms1 & (mol.name == 'HG'), _logger=False)
            mol.remove(atoms2 & (mol.name == 'HG'), _logger=False)

    # Printing and loading the PDB file. AMBER can work with a single PDB file if the segments are separate by TER
    logger.debug('Writing PDB file for input to tleap.')
    pdbname = os.path.join(outdir, 'input.pdb')

    # mol2 files have atomtype, here we only write parts not coming from mol2
    mol.write(pdbname, mol.atomtype == '')
    if not os.path.isfile(pdbname):
        raise NameError(
            'Could not write a PDB file out of the given Molecule.')
    f.write('# Loading the system\n')
    f.write('mol = loadpdb input.pdb\n\n')

    if np.sum(mol.atomtype != '') != 0:
        logger.debug('Writing mol2 files for input to tleap.')
        segs = np.unique(mol.segid[mol.atomtype != ''])
        combstr = 'mol = combine {mol'
        for s in segs:
            name = 'segment{}'.format(s)
            mol2name = os.path.join(outdir, '{}.mol2'.format(name))
            mol.write(mol2name, (mol.atomtype != '') & (mol.segid == s))
            if not os.path.isfile(mol2name):
                raise NameError(
                    'Could not write a mol2 file out of the given Molecule.')
            f.write('# Loading the rest of the system\n')
            f.write('{} = loadmol2 {}.mol2\n\n'.format(name, name))
            combstr += ' {}'.format(name)
        combstr += '}\n\n'
        f.write(combstr)

    # Write patches for disulfide bonds (only after ionizing)
    if not ionize and len(disulfide) != 0:
        f.write('# Adding disulfide bonds\n')
        for d in disulfide:
            # Convert to stupid amber residue numbering
            uqseqid = sequenceID(
                (mol.resid, mol.insertion, mol.segid)) + mol.resid[0]
            uqres1 = int(
                np.unique(uqseqid[(mol.segid == d.segid1)
                                  & (mol.resid == d.resid1)]))
            uqres2 = int(
                np.unique(uqseqid[(mol.segid == d.segid2)
                                  & (mol.resid == d.resid2)]))
            f.write('bond mol.{}.SG mol.{}.SG\n'.format(uqres1, uqres2))
        f.write('\n')

    f.write('# Writing out the results\n')
    f.write('saveamberparm mol ' + prefix + '.prmtop ' + prefix + '.crd\n')
    f.write('quit')
    f.close()

    molbuilt = None
    if execute:
        # Source paths of extra dirs (our dirs, not amber default)
        htmdamberdir = os.path.abspath(
            os.path.join(home(), 'builder', 'amberfiles'))
        sourcepaths = [htmdamberdir]
        sourcepaths += [
            os.path.join(htmdamberdir, os.path.dirname(f)) for f in ff
            if os.path.isfile(os.path.join(htmdamberdir, f))
        ]
        extrasource = []
        for p in sourcepaths:
            extrasource.append('-I')
            extrasource.append('{}'.format(p))
        logpath = os.path.abspath(os.path.join(outdir, 'log.txt'))
        logger.info('Starting the build.')
        currdir = os.getcwd()
        os.chdir(outdir)
        f = open(logpath, 'w')
        try:
            cmd = [tleap, '-f', './tleap.in']
            cmd[1:1] = extrasource
            call(cmd, stdout=f)
        except:
            raise NameError('tleap failed at execution')
        f.close()
        os.chdir(currdir)
        logger.info('Finished building.')

        if os.path.exists(os.path.join(outdir, 'structure.crd')) and \
                        os.path.getsize(os.path.join(outdir, 'structure.crd')) != 0 and \
                        os.path.getsize(os.path.join(outdir, 'structure.prmtop')) != 0:
            molbuilt = Molecule(os.path.join(outdir, 'structure.prmtop'))
            molbuilt.read(os.path.join(outdir, 'structure.crd'))
        else:
            raise NameError(
                'No structure pdb/prmtop file was generated. Check {} for errors in building.'
                .format(logpath))

        if ionize:
            shutil.move(os.path.join(outdir, 'structure.crd'),
                        os.path.join(outdir, 'structure.noions.crd'))
            shutil.move(os.path.join(outdir, 'structure.prmtop'),
                        os.path.join(outdir, 'structure.noions.prmtop'))
            totalcharge = np.sum(molbuilt.charge)
            nwater = np.sum(molbuilt.atomselect('water and noh'))
            anion, cation, anionatom, cationatom, nanion, ncation = ionizef(
                totalcharge,
                nwater,
                saltconc=saltconc,
                ff='amber',
                anion=saltanion,
                cation=saltcation)
            newmol = ionizePlace(mol, anion, cation, anionatom, cationatom,
                                 nanion, ncation)
            # Redo the whole build but now with ions included
            return build(newmol,
                         ff=ff,
                         topo=topo,
                         param=param,
                         prefix=prefix,
                         outdir=outdir,
                         caps={},
                         ionize=False,
                         execute=execute,
                         saltconc=saltconc,
                         disulfide=disulfide,
                         tleap=tleap,
                         atomtypes=atomtypes,
                         offlibraries=offlibraries)
    tmpbonds = molbuilt.bonds
    molbuilt.bonds = []  # Removing the bonds to speed up writing
    molbuilt.write(os.path.join(outdir, 'structure.pdb'))
    molbuilt.bonds = tmpbonds  # Restoring the bonds
    return molbuilt
Пример #38
0
def _charmmLipid2Amber(mol):
    """ Convert a CHARMM lipid membrane to AMBER format

    Parameters
    ----------
    mol : :class:`Molecule <htmd.molecule.molecule.Molecule>` object
        The Molecule object containing the membrane

    Returns
    -------
    newmol : :class:`Molecule <htmd.molecule.molecule.Molecule>` object
        A new Molecule object with the membrane converted to AMBER
    """
    resdict = _readcsvdict(path.join(home(), 'builder', 'charmmlipid2amber.csv'))

    natoms = mol.numAtoms
    neworder = np.array(list(range(natoms)))  # After renaming the atoms and residues I have to reorder them

    begs = np.zeros(natoms, dtype=bool)
    fins = np.zeros(natoms, dtype=bool)
    begters = np.zeros(natoms, dtype=bool)
    finters = np.zeros(natoms, dtype=bool)

    betabackup = mol.beta.copy()

    mol = mol.copy()
    mol.set('beta', sequenceID(mol.resid))
    for res in resdict.keys():
        molresidx = mol.resname == res
        if not np.any(molresidx):
            continue
        names = mol.name.copy()  # Need to make a copy or I accidentally double-modify atoms

        atommap = resdict[res]
        for atom in atommap.keys():
            rule = atommap[atom]

            molatomidx = np.zeros(len(names), dtype=bool)
            molatomidx[molresidx] = names[molresidx] == atom

            mol.set('resname', rule.replaceresname, sel=molatomidx)
            mol.set('name', rule.replaceatom, sel=molatomidx)
            neworder[molatomidx] = rule.order

            if rule.order == 0:  # First atom (with or without ters)
                begs[molatomidx] = True
            if rule.order == rule.natoms - 1:  # Last atom (with or without ters)
                fins[molatomidx] = True
            if rule.order == 0 and rule.ter:  # First atom with ter
                begters[molatomidx] = True
            if rule.order == rule.natoms - 1 and rule.ter:  # Last atom with ter
                finters[molatomidx] = True

    betas = np.unique(mol.beta[begs])
    residuebegs = np.ones(len(betas), dtype=int) * -1
    residuefins = np.ones(len(betas), dtype=int) * -1
    for i in range(len(betas)):
        residuebegs[i] = np.where(mol.beta == betas[i])[0][0]
        residuefins[i] = np.where(mol.beta == betas[i])[0][-1]
    for i in range(len(residuebegs)):
        beg = residuebegs[i]
        fin = residuefins[i] + 1
        neworder[beg:fin] = neworder[beg:fin] + beg
    idx = np.argsort(neworder)
    mol.beta = betabackup
    _reorderMol(mol, idx)

    begters = np.where(begters[idx])[0]  # Sort the begs and ters
    finters = np.where(finters[idx])[0]

    if len(begters) > 999:
        raise NameError('More than 999 lipids. Cannot define separate segments for all of them.')

    for i in range(len(begters)):
        map = np.zeros(len(mol.resid), dtype=bool)
        map[begters[i]:finters[i]+1] = True
        mol.set('resid', sequenceID(mol.get('resname', sel=map)), sel=map)
        mol.set('segid', 'L' + str(i+1), sel=map)

    return mol
Пример #39
0
def PDBread(filename, mode='pdb', frame=None, topoloc=None):
    from pandas import read_fwf
    import io

    tempfile = False
    if not os.path.isfile(filename) and len(filename) == 4:  # Could be a PDB id. Try to load it from the PDB website
        filename, tempfile = _getPDB(filename)

    """
    COLUMNS        DATA  TYPE    FIELD        DEFINITION
    -------------------------------------------------------------------------------------
     1 -  6        Record name   "ATOM  "
     7 - 11        Integer       serial       Atom  serial number.
    13 - 16        Atom          name         Atom name.
    17             Character     altLoc       Alternate location indicator.
    18 - 20        Residue name  resName      Residue name.
    22             Character     chainID      Chain identifier.
    23 - 26        Integer       resSeq       Residue sequence number.
    27             AChar         iCode        Code for insertion of residues.
    31 - 38        Real(8.3)     x            Orthogonal coordinates for X in Angstroms.
    39 - 46        Real(8.3)     y            Orthogonal coordinates for Y in Angstroms.
    47 - 54        Real(8.3)     z            Orthogonal coordinates for Z in Angstroms.
    55 - 60        Real(6.2)     occupancy    Occupancy.
    61 - 66        Real(6.2)     tempFactor   Temperature  factor.
    77 - 78        LString(2)    element      Element symbol, right-justified.
    79 - 80        LString(2)    charge       Charge  on the atom.
    """
    if mode == 'pdb':
        topocolspecs = [(0, 6), (6, 11), (12, 16), (16, 17), (17, 21), (21, 22), (22, 26), (26, 27),
                        (54, 60), (60, 66), (72, 76), (76, 78), (78, 80)]
        toponames = ('record', 'serial', 'name', 'altloc', 'resname', 'chain', 'resid', 'insertion',
                     'occupancy', 'beta', 'segid', 'element', 'charge')
    elif mode == 'pdbqt':
        # http://autodock.scripps.edu/faqs-help/faq/what-is-the-format-of-a-pdbqt-file
        # The rigid root contains one or more PDBQT-style ATOM or HETATM records. These records resemble their
        # traditional PDB counterparts, but diverge in columns 71-79 inclusive (where the first character in the line
        # corresponds to column 1). The partial charge is stored in columns 71-76 inclusive (in %6.3f format, i.e.
        # right-justified, 6 characters wide, with 3 decimal places). The AutoDock atom-type is stored in columns 78-79
        # inclusive (in %-2.2s format, i.e. left-justified and 2 characters wide..
        topocolspecs = [(0, 6), (6, 11), (12, 16), (16, 17), (17, 21), (21, 22), (22, 26), (26, 27),
                        (54, 60), (60, 66), (70, 76), (77, 79)]
        toponames = ('record', 'serial', 'name', 'altloc', 'resname', 'chain', 'resid', 'insertion',
                     'occupancy', 'beta', 'charge', 'element')
    topodtypes = {
        'record': str,
        'serial': np.int,
        'name': str,
        'altloc': str,
        'resname': str,
        'chain': str,
        'resid': np.int,
        'insertion': str,
        'occupancy': np.float32,
        'beta': np.float32,
        'segid': str,
        'element': str,
        'charge': np.float32,
        'chargesign': str,
    }
    coordcolspecs = [(30, 38), (38, 46), (46, 54)]
    coordnames = ('x', 'y', 'z')

    """
    COLUMNS       DATA  TYPE      FIELD        DEFINITION
    -------------------------------------------------------------------------
     1 -  6        Record name    "CONECT"
     7 - 11        Integer        serial       Atom  serial number
    12 - 16        Integer        serial       Serial number of bonded atom
    17 - 21        Integer        serial       Serial  number of bonded atom
    22 - 26        Integer        serial       Serial number of bonded atom
    27 - 31        Integer        serial       Serial number of bonded atom
    """
    bondcolspecs = [(6, 11), (11, 16), (16, 21), (21, 26), (26, 31)]
    bondnames = ('serial1', 'serial2', 'serial3', 'serial4', 'serial5')

    """
    COLUMNS       DATA  TYPE    FIELD          DEFINITION
    -------------------------------------------------------------
     1 -  6       Record name   "CRYST1"
     7 - 15       Real(9.3)     a              a (Angstroms).
    16 - 24       Real(9.3)     b              b (Angstroms).
    25 - 33       Real(9.3)     c              c (Angstroms).
    34 - 40       Real(7.2)     alpha          alpha (degrees).
    41 - 47       Real(7.2)     beta           beta (degrees).
    48 - 54       Real(7.2)     gamma          gamma (degrees).
    56 - 66       LString       sGroup         Space  group.
    67 - 70       Integer       z              Z value.
    """
    cryst1colspecs = [(6, 15), (15, 24), (24, 33), (33, 40), (40, 47), (47, 54), (55, 66), (66, 70)]
    cryst1names = ('a', 'b', 'c', 'alpha', 'beta', 'gamma', 'sGroup', 'z')

    """
    Guessing columns for REMARK 290 SMTRY from the example since the specs don't define them
              1         2         3         4         5         6         7
    01234567890123456789012345678901234567890123456789012345678901234567890
    REMARK 290   SMTRY1   1  1.000000  0.000000  0.000000        0.00000
    REMARK 290   SMTRY2   1  0.000000  1.000000  0.000000        0.00000
    REMARK 290   SMTRY3   1  0.000000  0.000000  1.000000        0.00000
    REMARK 290   SMTRY1   2 -1.000000  0.000000  0.000000       36.30027
    REMARK 290   SMTRY2   2  0.000000 -1.000000  0.000000        0.00000
    REMARK 290   SMTRY3   2  0.000000  0.000000  1.000000       59.50256
    REMARK 290   SMTRY1   3 -1.000000  0.000000  0.000000        0.00000
    REMARK 290   SMTRY2   3  0.000000  1.000000  0.000000       46.45545
    REMARK 290   SMTRY3   3  0.000000  0.000000 -1.000000       59.50256
    REMARK 290   SMTRY1   4  1.000000  0.000000  0.000000       36.30027
    REMARK 290   SMTRY2   4  0.000000 -1.000000  0.000000       46.45545
    REMARK 290   SMTRY3   4  0.000000  0.000000 -1.000000        0.00000

    Guessing columns for REMARK 350   BIOMT from the example since the specs don't define them
    REMARK 350   BIOMT1   1 -0.981559  0.191159  0.000000        0.00000
    REMARK 350   BIOMT2   1 -0.191159 -0.981559  0.000000        0.00000
    REMARK 350   BIOMT3   1  0.000000  0.000000  1.000000      -34.13878
    REMARK 350   BIOMT1   2 -0.838088  0.545535  0.000000        0.00000
    REMARK 350   BIOMT2   2 -0.545535 -0.838088  0.000000        0.00000
    REMARK 350   BIOMT3   2  0.000000  0.000000  1.000000      -32.71633
    """
    symmetrycolspecs = [(20, 23), (23, 33), (33, 43), (43, 53), (53, 68)]
    symmetrynames = ('idx', 'rot1', 'rot2', 'rot3', 'trans')

    def concatCoords(coords, coorddata):
        if coorddata.tell() != 0:  # Not empty
            coorddata.seek(0)
            parsedcoor = read_fwf(coorddata, colspecs=coordcolspecs, names=coordnames, na_values=_NA_VALUES, keep_default_na=False)
            if coords is None:
                coords = np.zeros((len(parsedcoor), 3, 0), dtype=np.float32)
            currcoords = np.vstack((parsedcoor.x, parsedcoor.y, parsedcoor.z)).T
            if coords.shape[0] != currcoords.shape[0]:
                logger.warning('Different number of atoms read in different MODELs in the PDB file. '
                               'Keeping only the first {} model(s)'.format(coords.shape[2]))
                return coords
            coords = np.append(coords, currcoords[:, :, np.newaxis], axis=2)
        return coords

    teridx = []
    currter = 0
    topoend = False

    cryst1data = io.StringIO()
    topodata = io.StringIO()
    conectdata = io.StringIO()
    coorddata = io.StringIO()
    symmetrydata = io.StringIO()

    coords = None

    with open(filename, 'r') as f:
        for line in f:
            if line.startswith('CRYST1'):
                cryst1data.write(line)
            if line.startswith('ATOM') or line.startswith('HETATM'):
                coorddata.write(line)
            if (line.startswith('ATOM') or line.startswith('HETATM')) and not topoend:
                topodata.write(line)
                teridx.append(str(currter))
            if line.startswith('TER'):
                currter += 1
            if (mode == 'pdb' and line.startswith('END')) or \
               (mode == 'pdbqt' and line.startswith('ENDMDL')):  # pdbqt should not stop reading at ENDROOT or ENDBRANCH
                topoend = True
            if line.startswith('CONECT'):
                conectdata.write(line)
            if line.startswith('MODEL'):
                coords = concatCoords(coords, coorddata)
                coorddata = io.StringIO()
            if line.startswith('REMARK 290   SMTRY'):  # TODO: Support BIOMT fields. It's a bit more complicated. Can't be done with pandas
                symmetrydata.write(line)

        cryst1data.seek(0)
        topodata.seek(0)
        conectdata.seek(0)
        symmetrydata.seek(0)

        coords = concatCoords(coords, coorddata)

        parsedbonds = read_fwf(conectdata, colspecs=bondcolspecs, names=bondnames, na_values=_NA_VALUES, keep_default_na=False)
        parsedcryst1 = read_fwf(cryst1data, colspecs=cryst1colspecs, names=cryst1names, na_values=_NA_VALUES, keep_default_na=False)
        parsedtopo = read_fwf(topodata, colspecs=topocolspecs, names=toponames, na_values=_NA_VALUES, keep_default_na=False)  #, dtype=topodtypes)
        parsedsymmetry = read_fwf(symmetrydata, colspecs=symmetrycolspecs, names=symmetrynames, na_values=_NA_VALUES, keep_default_na=False)

    # if 'chargesign' in parsedtopo and not np.all(parsedtopo.chargesign.isnull()):
    #    parsedtopo.loc[parsedtopo.chargesign == '-', 'charge'] *= -1

    # Fixing PDB format charges which can come after the number
    if parsedtopo.charge.dtype == 'object':
        minuses = np.where(parsedtopo.charge.str.match('\d\-') == True)[0]
        pluses = np.where(parsedtopo.charge.str.match('\d\+') == True)[0]
        for m in minuses:
            parsedtopo.loc[m, 'charge'] = int(parsedtopo.charge[m][0]) * -1
        for p in pluses:
            parsedtopo.loc[p, 'charge'] = int(parsedtopo.charge[p][0])
        parsedtopo.loc[parsedtopo.charge.isnull(), 'charge'] = 0
    # Fixing hexadecimal index and resids
    # Support for reading hexadecimal
    if parsedtopo.serial.dtype == 'object':
        logger.warning('Non-integer values were read from the PDB "serial" field. Dropping PDB values and assigning new ones.')
        parsedtopo.serial = sequenceID(parsedtopo.serial)
    if parsedtopo.resid.dtype == 'object':
        logger.warning('Non-integer values were read from the PDB "resid" field. Dropping PDB values and assigning new ones.')
        parsedtopo.resid = sequenceID(parsedtopo.resid)

    if parsedtopo.insertion.dtype == np.float64 and not np.all(np.isnan(parsedtopo.insertion)):
        # Trying to minimize damage from resids overflowing into insertion field
        logger.warning('Integer values detected in "insertion" field. Your resids might be overflowing. Take care')
        parsedtopo.insertion = [str(int(x)) if not np.isnan(x) else '' for x in parsedtopo.insertion]

    if len(parsedtopo) > 99999:
        logger.warning('Reading PDB file with more than 99999 atoms. Bond information can be wrong.')

    crystalinfo = {}
    if len(parsedcryst1):
        crystalinfo = parsedcryst1.iloc[0].to_dict()
        if isinstance(crystalinfo['sGroup'], str) or not np.isnan(crystalinfo['sGroup']):
            crystalinfo['sGroup'] = crystalinfo['sGroup'].split()
    if len(parsedsymmetry):
        numcopies = int(len(parsedsymmetry)/3)
        crystalinfo['numcopies'] = numcopies
        crystalinfo['rotations'] = parsedsymmetry[['rot1', 'rot2', 'rot3']].as_matrix().reshape((numcopies, 3, 3))
        crystalinfo['translations'] = parsedsymmetry['trans'].as_matrix().reshape((numcopies, 3))

    topo = Topology(parsedtopo)

    # Bond formatting part
    # TODO: Speed this up. This is the slowest part for large PDB files. From 700ms to 7s
    serials = parsedtopo.serial.as_matrix()
    # if isinstance(serials[0], str) and np.any(serials == '*****'):
    #     logger.info('Non-integer serials were read. For safety we will discard all bond information and serials will be assigned automatically.')
    #     topo.serial = np.arange(1, len(serials)+1, dtype=np.int)
    if np.max(parsedbonds.max()) > np.max(serials):
        logger.info('Bond indexes in PDB file exceed atom indexes. For safety we will discard all bond information.')
    else:
        mapserials = np.empty(np.max(serials)+1)
        mapserials[:] = np.NAN
        mapserials[serials] = list(range(len(serials)))
        for i in range(len(parsedbonds)):
            row = parsedbonds.loc[i].tolist()
            for b in range(1, 5):
                if not np.isnan(row[b]):
                    topo.bonds.append([int(row[0]), int(row[b])])
        topo.bonds = np.array(topo.bonds, dtype=np.uint32)
        if topo.bonds.size != 0:
            mappedbonds = mapserials[topo.bonds[:]]
            wrongidx, _ = np.where(np.isnan(mappedbonds))  # Some PDBs have bonds to non-existing serials... go figure
            if len(wrongidx):
                logger.info('Discarding {} bonds to non-existing indexes in the PDB file.'.format(len(wrongidx)))
            mappedbonds = np.delete(mappedbonds, wrongidx, axis=0)
            topo.bonds = np.array(mappedbonds, dtype=np.uint32)

    if len(topo.segid) == 0 and currter != 0:  # If no segid was read, use the TER rows to define segments
        topo.segid = teridx

    if tempfile:
        os.unlink(filename)

    topo.crystalinfo = crystalinfo
    traj = Trajectory(coords=coords)
    return topo, traj
Пример #40
0
def ionizePlace(mol, anion_resname, cation_resname, anion_name, cation_name, nanion, ncation, dfrom=5, dbetween=5, segname=None):
    """Place a given number of negative and positive ions in the solvent.

    Replaces water molecules al long as they respect the given distance criteria.

    Parameters
    ----------
    mol : :class:`Molecule <htmd.molecule.molecule.Molecule>` object
        The Molecule object
    anion_resname : str
        Resname of the added anions
    cation_resname : str
        Resname of the added cations
    anion_name : str
        Name of the added anions
    cation_name : str
        Name of the added cations
    nanion : int
        Number of anions to add
    ncation : int
        Number of cations to add
    dfrom : float
        Min distance of ions from molecule
    dbetween : float
        Min distance between ions
    segname : str
        Segment name to add
        
    Returns
    -------
    mol : :class:`Molecule <htmd.molecule.molecule.Molecule>` object
        The molecule with the ions added
    """

    newmol = mol.copy()

    logger.debug('Min distance of ions from molecule: ' + str(dfrom) + 'A')
    logger.debug('Min distance between ions: ' + str(dbetween) + 'A')
    logger.debug('Placing {:d} anions and {:d} cations.'.format(nanion,ncation))

    if (nanion + ncation) == 0:
        return newmol

    nions = nanion + ncation

    betabackup = newmol.beta.copy()
    newmol.set('beta', sequenceID((newmol.resid, newmol.insertion, newmol.segid)))

    # Find water oxygens to replace with ions
    ntries = 0
    maxtries = 10
    while True:
        ionlist = []
        watindex = newmol.atomselect('noh and water and not (within ' + str(dfrom) + ' of not water)', indexes=True)
        watsize = len(watindex)

        if watsize == 0:
            raise NameError('No waters could be found further than ' + str(dfrom) + ' from other molecules to be replaced by ions. You might need to solvate with a bigger box or disable the ionize property when building.')

        while len(ionlist) < nions:
            if len(watindex) == 0:
                break
            randwat = np.random.randint(len(watindex))
            thision = watindex[randwat]
            addit = True
            if len(ionlist) != 0:  # Check for distance from precious ions
                ionspos = newmol.get('coords', sel=ionlist)
                thispos = newmol.get('coords', sel=thision)
                dists = distance.cdist(np.atleast_2d(ionspos), np.atleast_2d(thispos), metric='euclidean')

                if np.any(dists < dbetween):
                    addit = False
            if addit:
                ionlist.append(thision)
                watindex = np.delete(watindex, randwat)
        if len(ionlist) == nions:
            break

        ntries += 1
        if ntries == maxtries:
            raise NameError('Failed to add ions after ' + str(maxtries) + ' attempts. Try decreasing the ''from'' and ''between'' parameters, decreasing ion concentration or making a larger water box.')

    # Delete waters but keep their coordinates
    waterpos = np.atleast_2d(newmol.get('coords', ionlist))
    betasel = np.zeros(newmol.numAtoms, dtype=bool)
    for b in newmol.beta[ionlist]:
        betasel |= newmol.beta == b
    atmrem = np.sum(betasel)
    atmput = 3 * len(ionlist)
    # assert atmrem == atmput, 'Removing {} atoms instead of {}. Report this bug.'.format(atmrem, atmput)
    sel = np.where(betasel)[0]
    newmol.remove(sel, _logger=False)
    # assert np.size(sel) == atmput, 'Removed {} atoms instead of {}. Report this bug.'.format(np.size(sel), atmput)
    betabackup = np.delete(betabackup, sel)

    # Add the ions
    randidx = np.random.permutation(np.size(waterpos, 0))
    atom = Molecule()
    atom.empty(1)
    atom.set('chain', 'I')
    atom.set('segid', 'I')

    for i in range(nanion):
        atom.set('name', anion_name)
        atom.set('resname', anion_resname)
        atom.set('resid', newmol.resid[-1] + 1)
        atom.coords = waterpos[randidx[i], :]
        newmol.insert(atom, len(newmol.name))
    for i in range(ncation):
        atom.set('name', cation_name)
        atom.set('resname', cation_resname)
        atom.set('resid', newmol.resid[-1] + 1)
        atom.coords = waterpos[randidx[i+nanion], :]
        newmol.insert(atom, len(newmol.name))

    # Restoring the original betas
    newmol.beta[:len(betabackup)] = betabackup
    return newmol