Exemplo n.º 1
0
    def __init__(self,
                 sel1,
                 sel2,
                 numshells=4,
                 shellwidth=3,
                 pbc=True,
                 gap=None,
                 truncate=None):
        super().__init__()

        from moleculekit.projections.metricdistance import MetricDistance

        self.symmetrical = sel1 == sel2
        self.metricdistance = MetricDistance(
            sel1=sel1,
            sel2=sel2,
            groupsel1=None,
            groupsel2=None,
            metric="distances",
            threshold=8,
            pbc=pbc,
            truncate=truncate,
        )

        self.numshells = numshells
        self.shellwidth = shellwidth
        self.description = None
        self.shellcenters = None
Exemplo n.º 2
0
def mygoalfunction(mol):
    distance_metric = MetricDistance(
        'protein and resname HID and resid 42 and name CA',
        'resname MOL and name C11')
    distance = distance_metric.project(mol)
    distance[distance < 20.0] = 1.0
    print('THE PROJECTION VALUES:', distance)
    return -distance  # or even 1/distance
Exemplo n.º 3
0
    def compute_salt_bridges(self):

        salts = []
        [
            self.reps.remove(index)
            for index, rep in reversed(list(enumerate(self.reps.replist)))
        ]

        metr = MetricDistance('sidechain and acidic and element O',
                              'sidechain and basic and element N',
                              metric="contacts",
                              threshold=3.2,
                              pbc=False)
        try:
            data = metr.project(self)
            mapping = metr.getMapping(self)

            if len(np.shape(data)) > 1:
                data = data[0].copy()  # handling NMR structures

            self.reps.add(sel='protein', style='NewCartoon', color=8)

            if mapping[data].atomIndexes.values.any():
                for salt in mapping[data].atomIndexes.values:
                    resid1 = self.get(
                        "resid", sel=f"same residue as index {salt[0]}")[0]
                    chain1 = self.get(
                        "chain", sel=f"same residue as index {salt[0]}")[0]
                    resid2 = self.get(
                        "resid", sel=f"same residue as index {salt[1]}")[0]
                    chain2 = self.get(
                        "chain", sel=f"same residue as index {salt[1]}")[0]

                    if [resid1, resid2] not in salts:
                        salts.append({
                            "residues": [int(resid1), int(resid2)],
                            "chain": [chain1, chain2]
                        })
                        self.reps.add(f"protein and resid {resid1}",
                                      style="Licorice",
                                      color="1")
                        self.reps.add(f"protein and resid {resid2}",
                                      style="Licorice",
                                      color="0")
        except:
            logger.error("Molecule has no basic or acidic residues")
            raise

        graph = make_graph_salts(salts)
        comp, _ = label_components(graph)
        if comp.a.size != 0:
            salts = add_networks_salts(graph, comp)
        else:
            logger.warning('No salt bridges present in the structure')
        return salts
Exemplo n.º 4
0
    def setUpClass(self):
        from htmd.simlist import simlist, simfilter
        from glob import glob
        from htmd.projections.metric import Metric
        from moleculekit.projections.metricdistance import MetricDistance
        from moleculekit.projections.metricdihedral import MetricDihedral
        from moleculekit.util import tempname
        from htmd.home import home
        from os.path import join

        sims = simlist(
            glob(join(home(dataDir="adaptive"), "data", "*", "")),
            glob(join(home(dataDir="adaptive"), "input", "*")),
        )
        fsims = simfilter(sims, tempname(), "not water")

        metr = Metric(fsims)
        metr.set(
            MetricDistance(
                "protein and resid 10 and name CA",
                "resname BEN and noh",
                periodic="selections",
                metric="contacts",
                groupsel1="residue",
                threshold=4,
            )
        )
        self.data1 = metr.project()

        metr.set(MetricDihedral())
        self.data2 = metr.project()
Exemplo n.º 5
0
    def setUpClass(self):
        from htmd.simlist import simlist, simfilter
        from glob import glob
        from htmd.projections.metric import Metric
        from moleculekit.projections.metricdistance import MetricDistance
        from moleculekit.projections.metricdihedral import MetricDihedral
        from moleculekit.util import tempname
        from htmd.home import home
        from os.path import join

        sims = simlist(glob(join(home(dataDir='adaptive'), 'data', '*', '')),
                       glob(join(home(dataDir='adaptive'), 'input', '*')))
        fsims = simfilter(sims, tempname(), 'not water')

        metr = Metric(fsims)
        metr.set(
            MetricDistance('protein and resid 10 and name CA',
                           'resname BEN and noh',
                           metric='contacts',
                           groupsel1='residue',
                           threshold=4))
        self.data1 = metr.project()

        metr.set(MetricDihedral())
        self.data2 = metr.project()
Exemplo n.º 6
0
def main(argv):
    inputfile = ''
    outputfile = ''
    try:
        opts, args = getopt.getopt(argv, "hi:o:", ["ifile=", "ofile="])
    except getopt.GetoptError("usage:"):
        print('salt_bridges.py -i <inputfile> -o <outputfile>')
        sys.exit(2)
    for opt, arg in opts:
        if opt == '-h':
            print('salt_bridges.py -i <inputfile> -o <outputfile>')
            sys.exit()
        elif opt in ("-i", "--ifile"):
            inputfile = arg
        elif opt in ("-o", "--ofile"):
            outputfile = arg

    #1. Load molecule
    logger.info("Filtering and writing PDB")
    mol = filter_mol(inputfile)

    #2. Compute distances
    logger.info("Computing distances among all polar residues")
    metr = MetricDistance('chain A and sidechain and acidic and element O',
                          'chain A and sidechain and basic and element N',
                          metric="contacts",
                          threshold=3.2,
                          pbc=False)
    try:
        data = metr.project(mol)
    except:
        logger.error("Molecule has no basic or acidic residues")
        raise

    if len(np.shape(data)) > 1:
        data = data[0].copy()  # handling NMR structures
    mapping = metr.getMapping(mol)

    #3. Write txt and vmd session out
    write_salt_bridges(data, mapping, mol, outputfile)
    inputfile_processed = f"{inputfile[:-4]}-chainA.pdb"
    postprocess_session(inputfile_processed, outputfile)
    logger.info("Saving VMD session")
Exemplo n.º 7
0
    def compute_salt_bridges(self):

        salts = []
        [
            self.reps.remove(index)
            for index, rep in reversed(list(enumerate(self.reps.replist)))
        ]
        metr = MetricDistance('sidechain and acidic and element O',
                              'sidechain and basic and element N',
                              metric="contacts",
                              threshold=3.2,
                              pbc=False)
        try:
            data = metr.project(self)
        except:
            logger.error("Molecule has no basic or acidic residues")
            raise
        if len(np.shape(data)) > 1:
            data = data[0].copy()  # handling NMR structures
        mapping = metr.getMapping(self)
        self.reps.add(sel='protein', style='NewCartoon', color=8)
        if mapping[data].atomIndexes.values.any():
            for bond in mapping[data].atomIndexes.values:
                resid1 = self.get("resid",
                                  sel=f"same residue as index {bond[0]}")[0]
                resid2 = self.get("resid",
                                  sel=f"same residue as index {bond[1]}")[0]
                if [resid1, resid2] not in salts:
                    salts.append([resid1, resid2])
                self.reps.add(f"protein and resid {resid1}",
                              style="Licorice",
                              color="1")
                self.reps.add(f"protein and resid {resid2}",
                              style="Licorice",
                              color="0")
        else:
            logger.warning("No salt bridges found in this protein")

        return salts
Exemplo n.º 8
0
    def __init__(
        self,
        sel1,
        sel2,
        periodic,
        numshells=4,
        shellwidth=3,
        pbc=None,
        gap=None,
        truncate=None,
    ):
        super().__init__()

        if pbc is not None:
            raise DeprecationWarning(
                "The `pbc` option is deprecated please use the `periodic` option as described in MetricDistance."
            )

        from moleculekit.projections.metricdistance import MetricDistance

        self.symmetrical = sel1 == sel2
        self.metricdistance = MetricDistance(
            sel1=sel1,
            sel2=sel2,
            periodic=periodic,
            groupsel1=None,
            groupsel2=None,
            metric="distances",
            threshold=8,
            truncate=truncate,
        )

        self.numshells = numshells
        self.shellwidth = shellwidth
        self.description = None
        self.shellcenters = None
Exemplo n.º 9
0
    def test_adaptive(self):
        from sklearn.cluster import MiniBatchKMeans
        from jobqueues.localqueue import LocalCPUQueue
        from moleculekit.projections.metricdistance import MetricDistance

        import numpy as np
        import random
        np.random.seed(
            0)  # Needed for the clustering to always give same results
        random.seed(0)

        md = AdaptiveBandit()
        md.app = LocalCPUQueue()
        md.generatorspath = 'generators'
        md.inputpath = 'input'
        md.datapath = 'data'
        md.coorname = 'input.coor'
        md.filter = True
        md.filtersel = 'all'

        md.clustmethod = MiniBatchKMeans
        md.projection = MetricDistance('protein resid 173 and name CA',
                                       'resname BEN and name C1 C2 C3 C7',
                                       periodic='selections')
        md.ticadim = 2
        md.nmin = 1
        md.nmax = 2
        md.nepochs = 9999
        md.nframes = 1000000

        md.reward_method = 'mean'
        md.exploration = 0.01
        md.actionspace = 'tica'
        md.actionpool = 0
        md.recluster = False

        md.save = True
        md.dryrun = True
        md.run()
Exemplo n.º 10
0
if __name__ == "__main__":
    import htmd.home
    import os
    import shutil
    from htmd.util import tempname
    from moleculekit.projections.metricdistance import MetricDistance

    tmpdir = tempname()
    shutil.copytree(htmd.home.home() + '/data/adaptive/', tmpdir)
    os.chdir(tmpdir)
    md = AdaptiveMD()
    # md.dryrun = True
    md.nmin = 1
    md.nmax = 2
    md.nepochs = 3
    md.ticalag = 2
    md.ticadim = 3
    md.updateperiod = 5
    md.projection = MetricDistance('protein and name CA',
                                   'resname BEN and noh')
    md.projection = [
        MetricDistance('protein and name CA', 'resname BEN and noh'),
        MetricDistance('protein and name CA', 'resname BEN and noh')
    ]
    # md.generatorspath = htmd.home()+'/data/dhfr'
    # md.datapath = 'input'
    # md.app = AcemdLocal(inputfile='input.acemd')

    # md.app = AcemdLocal(datadir='data')
    # md.run()  # Takes too long (2 minutes on 780).
Exemplo n.º 11
0
        # Calculating how many timescales are above the lag time to limit number of macrostates
        from pyemma.msm import timescales_msm
        timesc = timescales_msm(data.St.tolist(), lags=self.lag, nits=macronum).get_timescales()
        macronum = min(self.macronum, max(np.sum(timesc > self.lag), 2))
        return macronum


if __name__ == "__main__":
    import htmd.home
    import os
    import shutil
    from htmd.util import tempname
    from moleculekit.projections.metricdistance import MetricDistance

    tmpdir = tempname()
    shutil.copytree(htmd.home.home()+'/data/adaptive/', tmpdir)
    os.chdir(tmpdir)
    md = AdaptiveMD()
    # md.dryrun = True
    md.nmin = 1
    md.nmax = 2
    md.nepochs = 3
    md.ticalag = 2
    md.ticadim = 3
    md.updateperiod = 5
    md.projection = MetricDistance('protein and name CA', 'resname BEN and noh', periodic='selections')
    md.projection = [MetricDistance('protein and name CA', 'resname BEN and noh', periodic='selections'), MetricDistance('protein and name CA', 'resname BEN and noh', periodic='selections')]


Exemplo n.º 12
0
class MetricShell(Projection):
    """ Calculates the density of atoms around other atoms.

    The MetricShell class calculates the density of a set of
    interchangeable atoms in concentric spherical shells around some
    other atoms. Thus it can treat identical molecules (like water or
    ions) and calculate summary values like the changes in water density
    around atoms. It produces a n-by-s dimensional vector where n the
    number of atoms in the first selection and s the number of shells
    around each of the n atoms.

    Parameters
    ----------
    sel1 : str
        Atom selection string for the first set of atoms around which the shells will be calculated.
        See more `here <http://www.ks.uiuc.edu/Research/vmd/vmd-1.9.2/ug/node89.html>`__
    sel2 : str
        Atom selection string for the second set of atoms whose density will be calculated in shells around `sel1`.
        See more `here <http://www.ks.uiuc.edu/Research/vmd/vmd-1.9.2/ug/node89.html>`__
    numshells : int, optional
        Number of shells to use around atoms of `sel1`
    shellwidth : int, optional
        The width of each concentric shell in Angstroms
    pbc : bool, optional
        Set to false to disable distance calculations using periodic distances
    gap : int, optional
        Not functional yet
    truncate : float, optional
        Set all distances larger than `truncate` to `truncate`
    """
    def __init__(self,
                 sel1,
                 sel2,
                 numshells=4,
                 shellwidth=3,
                 pbc=True,
                 gap=None,
                 truncate=None):
        super().__init__()

        from moleculekit.projections.metricdistance import MetricDistance
        self.metricdistance = MetricDistance(sel1=sel1,
                                             sel2=sel2,
                                             groupsel1=None,
                                             groupsel2=None,
                                             metric='distances',
                                             threshold=8,
                                             pbc=pbc,
                                             truncate=truncate)

        self.numshells = numshells
        self.shellwidth = shellwidth
        self.description = None
        self.shellcenters = None

    def _calculateMolProp(self, mol, props='all'):
        props = ('shellcenters', 'map') if props == 'all' else props
        res = {}

        mapping = np.vstack(self.metricdistance.getMapping(mol).atomIndexes)
        if 'map' in props:
            res['map'] = mapping
        if 'shellcenters' in props:
            res['shellcenters'] = np.unique(mapping[:, 0])
        return res

    def project(self, mol):
        """ Project molecule.

        Parameters
        ----------
        mol : :class:`Molecule <moleculekit.molecule.Molecule>`
            A :class:`Molecule <moleculekit.molecule.Molecule>` object to project.
        kwargs :
            Do not use this argument. Only used for backward compatibility. Will be removed in later versions.

        Returns
        -------
        data : np.ndarray
            An array containing the projected data.
        """
        molprops = self._getMolProp(mol, 'all')
        distances = self.metricdistance.project(mol)
        if distances.ndim == 1:
            distances = distances[np.newaxis, :]
        return _shells(distances, molprops['map'][:,
                                                  0], molprops['shellcenters'],
                       self.numshells, self.shellwidth)

    def getMapping(self, mol):
        """ Returns the description of each projected dimension.

        Parameters
        ----------
        mol : :class:`Molecule <moleculekit.molecule.Molecule>` object
            A Molecule object which will be used to calculate the descriptions of the projected dimensions.

        Returns
        -------
        map : :class:`DataFrame <pandas.core.frame.DataFrame>` object
            A DataFrame containing the descriptions of each dimension
        """
        shellcenters = self.metricdistance._getMolProp(mol, 'sel1')

        from pandas import DataFrame
        types = []
        indexes = []
        description = []
        for i in np.where(shellcenters)[0]:
            for n in range(self.numshells):
                types += ['shell']
                indexes += [i]
                description += [
                    'Density of sel2 atoms in shell {}-{} A centered on atom {} {} {}'
                    .format(n * self.shellwidth, (n + 1) * self.shellwidth,
                            mol.resname[i], mol.resid[i], mol.name[i])
                ]
        return DataFrame({
            'type': types,
            'atomIndexes': indexes,
            'description': description
        })
Exemplo n.º 13
0
if __name__ == '__main__':
    from htmd.simlist import simlist, simfilter
    from glob import glob
    from htmd.projections.metric import Metric
    from moleculekit.projections.metricdistance import MetricDistance
    from moleculekit.projections.metricdihedral import MetricDihedral
    from moleculekit.util import tempname
    from htmd.home import home
    from os.path import join

    testfolder = home(dataDir='adaptive')

    sims = simlist(glob(join(testfolder, 'data', '*', '')), glob(join(testfolder, 'input', '*', 'structure.pdb')))
    fsims = simfilter(sims, tempname(), 'not water')
    metr = Metric(fsims)
    metr.set(MetricDistance('protein and resid 10 and name CA', 'resname BEN and noh', metric='contacts',
                            groupsel1='residue', threshold=4))
    data1 = metr.project()
    metr.set(MetricDihedral())
    data2 = metr.project()

    # Testing combining of metrics
    data1.combine(data2)

    # Testing dimensions
    assert np.array_equal(data1.description.shape, (897, 3)), 'combine not working correct'
    assert np.array_equal(data1.trajectories[0].projection.shape, (6, 897)), 'combine not working correct'
    assert np.array_equal(np.where(data1.description.type == 'contact')[0], [0, 1, 2, 3, 4, 5, 6, 7, 8]), 'combine not working correct'

    # Testing dimension dropping / keeping
    datatmp = data1.copy()
    data1.dropDimensions(range(9))
Exemplo n.º 14
0
    def rmsdgoal(proj):
        return -proj  # Lower RMSDs should give higher score

    tmpdir = tempname()
    shutil.copytree(htmd.home.home() + "/data/adaptive/", tmpdir)
    os.chdir(tmpdir)
    md = AdaptiveGoal()
    md.dryrun = True
    md.nmin = 1
    md.nmax = 2
    md.nepochs = 3
    md.ticalag = 2
    md.ticadim = 3
    md.updateperiod = 5
    md.projection = MetricDistance("protein and name CA",
                                   "resname BEN and noh",
                                   periodic="selections")
    # md.goalprojection = MetricRmsd(Molecule(htmd.home() + '/data/adaptive/generators/1/structure.pdb'),
    #                               'protein and name CA')
    md.goalfunction = rmsdgoal
    # md.app = LocalGPUQueue()
    # md.run()

    # Some real testing now
    from moleculekit.projections.metricsecondarystructure import (
        MetricSecondaryStructure, )
    from moleculekit.projections.metricdistance import MetricSelfDistance

    os.chdir(path.join(home(), "data", "test-adaptive"))

    goalProjectionDict = {
Exemplo n.º 15
0
class MetricShell(Projection):
    """Calculates the density of atoms around other atoms.

    The MetricShell class calculates the density of a set of
    interchangeable atoms in concentric spherical shells around some
    other atoms. Thus it can treat identical molecules (like water or
    ions) and calculate summary values like the changes in water density
    around atoms. It produces a n-by-s dimensional vector where n the
    number of atoms in the first selection and s the number of shells
    around each of the n atoms.

    Parameters
    ----------
    sel1 : str
        Atom selection string for the first set of atoms around which the shells will be calculated.
        See more `here <http://www.ks.uiuc.edu/Research/vmd/vmd-1.9.2/ug/node89.html>`__
    sel2 : str
        Atom selection string for the second set of atoms whose density will be calculated in shells around `sel1`.
        See more `here <http://www.ks.uiuc.edu/Research/vmd/vmd-1.9.2/ug/node89.html>`__
    periodic : str
        See the documentation of MetricDistance class for options.
    numshells : int, optional
        Number of shells to use around atoms of `sel1`
    shellwidth : int, optional
        The width of each concentric shell in Angstroms
    gap : int, optional
        Not functional yet
    truncate : float, optional
        Set all distances larger than `truncate` to `truncate`
    """

    def __init__(
        self,
        sel1,
        sel2,
        periodic,
        numshells=4,
        shellwidth=3,
        pbc=None,
        gap=None,
        truncate=None,
    ):
        super().__init__()

        if pbc is not None:
            raise DeprecationWarning(
                "The `pbc` option is deprecated please use the `periodic` option as described in MetricDistance."
            )

        from moleculekit.projections.metricdistance import MetricDistance

        self.symmetrical = sel1 == sel2
        self.metricdistance = MetricDistance(
            sel1=sel1,
            sel2=sel2,
            periodic=periodic,
            groupsel1=None,
            groupsel2=None,
            metric="distances",
            threshold=8,
            truncate=truncate,
        )

        self.numshells = numshells
        self.shellwidth = shellwidth
        self.description = None
        self.shellcenters = None

    def _calculateMolProp(self, mol, props="all"):
        props = (
            ("map", "shellcenters", "shelledges", "shellvol")
            if props == "all"
            else props
        )
        res = {}

        mapping = np.vstack(self.metricdistance.getMapping(mol).atomIndexes)
        if "map" in props:
            res["map"] = mapping
        if "shellcenters" in props:
            res["shellcenters"] = (
                np.unique(mapping[:, 0]) if not self.symmetrical else np.unique(mapping)
            )
        if "shelledges" in props:
            res["shelledges"] = np.arange(
                self.shellwidth * (self.numshells + 1), step=self.shellwidth
            )
        if "shellvol" in props:
            res["shellvol"] = (
                4
                / 3
                * np.pi
                * (res["shelledges"][1:] ** 3 - res["shelledges"][:-1] ** 3)
            )

        return res

    def project(self, mol):
        """Project molecule.

        Parameters
        ----------
        mol : :class:`Molecule <moleculekit.molecule.Molecule>`
            A :class:`Molecule <moleculekit.molecule.Molecule>` object to project.
        kwargs :
            Do not use this argument. Only used for backward compatibility. Will be removed in later versions.

        Returns
        -------
        data : np.ndarray
            An array containing the projected data.
        """
        molprops = self._getMolProp(mol, "all")

        distances = self.metricdistance.project(mol)
        if distances.ndim == 1:
            distances = distances[np.newaxis, :]

        return _shells(
            distances,
            molprops["map"],
            molprops["shellcenters"],
            self.numshells,
            molprops["shelledges"],
            molprops["shellvol"],
            self.symmetrical,
        )

    def getMapping(self, mol):
        """Returns the description of each projected dimension.

        Parameters
        ----------
        mol : :class:`Molecule <moleculekit.molecule.Molecule>` object
            A Molecule object which will be used to calculate the descriptions of the projected dimensions.

        Returns
        -------
        map : :class:`DataFrame <pandas.core.frame.DataFrame>` object
            A DataFrame containing the descriptions of each dimension
        """
        shellcenters = self._getMolProp(mol, "shellcenters")

        from pandas import DataFrame

        types = []
        indexes = []
        description = []
        for i in shellcenters:
            for n in range(self.numshells):
                types += ["shell"]
                indexes += [i]
                description += [
                    "Density of sel2 atoms in shell {}-{} A centered on atom {} {} {}".format(
                        n * self.shellwidth,
                        (n + 1) * self.shellwidth,
                        mol.resname[i],
                        mol.resid[i],
                        mol.name[i],
                    )
                ]
        return DataFrame(
            {"type": types, "atomIndexes": indexes, "description": description}
        )