def test_lagged_iterator(self):
        import pyemma.coordinates as coor
        from pyemma.coordinates.tests.util import create_traj, get_top

        trajectory_length = 4720
        lagtime = 1000
        n_trajs = 15

        top = get_top()
        trajs_data = [
            create_traj(top=top, length=trajectory_length)
            for _ in range(n_trajs)
        ]
        trajs = [t[0] for t in trajs_data]
        xyzs = [t[1].reshape(-1, 9) for t in trajs_data]

        reader = coor.source(trajs, top=top, chunksize=5000)

        for chunk in [
                None, 0, trajectory_length, trajectory_length + 1,
                trajectory_length + 1000
        ]:
            it = reader.iterator(lag=lagtime,
                                 chunk=chunk,
                                 return_trajindex=True)
            with it:
                for itraj, X, Y in it:
                    np.testing.assert_equal(X.shape, Y.shape)
                    np.testing.assert_equal(X.shape[0],
                                            trajectory_length - lagtime)
                    np.testing.assert_array_almost_equal(
                        X, xyzs[itraj][:trajectory_length - lagtime])
                    np.testing.assert_array_almost_equal(
                        Y, xyzs[itraj][lagtime:])
    def test_fragmented_reader_random_access(self):
        with TemporaryDirectory() as td:
            trajfiles = []
            for i in range(3):
                trajfiles.append(
                    create_traj(start=i * 10, dir=td, length=20)[0])
            topfile = get_top()

            trajfiles = [
                trajfiles[0], (trajfiles[0], trajfiles[1]), trajfiles[2]
            ]

            source = coor.source(trajfiles, top=topfile)
            assert isinstance(source, FragmentedTrajectoryReader)

            for chunksize in [0, 2, 3, 100000]:
                out = source.get_output(stride=self.stride, chunk=chunksize)
                keys = np.unique(self.stride[:, 0])
                for i, coords in enumerate(out):
                    if i in keys:
                        traj = mdtraj.load(trajfiles[i], top=topfile)
                        np.testing.assert_equal(
                            coords, traj.xyz[np.array(
                                self.stride[self.stride[:,
                                                        0] == i][:,
                                                                 1])].reshape(
                                                                     -1,
                                                                     3 * 3))
Пример #3
0
    def test_RA_high_stride(self):
        """ ensure we use a random access pattern for high strides chunksize combinations to avoid memory issues."""
        n = int(1e5)
        n_bytes = 3 * 3 * 8 * n  # ~8Mb
        savable_formats_mdtra_18 = ('.xtc', '.trr', '.dcd', '.h5', '.binpos',
                                    '.nc', '.netcdf', '.ncdf', '.tng')
        for ext in savable_formats_mdtra_18:
            traj = create_traj(length=n, dir=self.tmpdir, format=ext)[0]

            from mock import patch
            # temporarily overwrite the memory cutoff with a smaller value, to trigger the switch to RA stride.
            with patch(
                    'pyemma.coordinates.util.patches.iterload.MEMORY_CUTOFF',
                    n_bytes - 1):
                r = coor.source(traj, top=get_top())
                it = r.iterator(stride=1000, chunk=100000)
                assert it._mditer.is_ra_iter

                out_ra = r.get_output(stride=1000, chunk=10000)
            it = r.iterator(stride=1)
            assert not it._mditer.is_ra_iter
            out = r.get_output(stride=1000)
            np.testing.assert_equal(out_ra, out)

            # check max stride exceeding
            from pyemma.coordinates.util.patches import iterload
            it = r.iterator(stride=iterload.MAX_STRIDE_SWITCH_TO_RA + 1)
            assert it._mditer.is_ra_iter

            it = r.iterator(stride=iterload.MAX_STRIDE_SWITCH_TO_RA)
            assert not it._mditer.is_ra_iter
Пример #4
0
    def test_trajs_larger_than_frame_index(self):
        """ file list is larger than largest traj file """
        from pyemma.coordinates.tests.util import create_traj, get_top
        files = [create_traj(length=10)[0] for _ in range(20)]
        inds = np.vstack((np.arange(20), np.arange(20))).T

        with self.assertRaises(ValueError) as cm:
            _frames_from_file(files, top=get_top(), frames=inds)
        import re
        matches = re.match(".*10\).*is larger than trajectory length.*\= 10",
                           cm.exception.args[0])
        assert matches
    def test_lagged_iterator_optimized(self):
        import pyemma.coordinates as coor
        from pyemma.coordinates.tests.util import create_traj, get_top
        from pyemma.coordinates.util.patches import iterload

        trajectory_length = 4720
        lagtime = 20
        n_trajs = 15
        stride = iterload.MAX_STRIDE_SWITCH_TO_RA + 1

        top = get_top()
        trajs_data = [
            create_traj(top=top, length=trajectory_length)
            for _ in range(n_trajs)
        ]
        trajs = [t[0] for t in trajs_data]
        xyzs = [t[1].reshape(-1, 9)[::stride] for t in trajs_data]
        xyzs_lagged = [
            t[1].reshape(-1, 9)[lagtime::stride] for t in trajs_data
        ]

        reader = coor.source(trajs, stride=stride, top=top, chunksize=5000)

        memory_cutoff = iterload.MEMORY_CUTOFF
        try:
            iterload.MEMORY_CUTOFF = 8
            it = reader.iterator(stride=stride,
                                 lag=lagtime,
                                 chunk=5000,
                                 return_trajindex=True)
            with it:
                curr_itraj = 0
                t = 0
                for itraj, X, Y in it:
                    if itraj != curr_itraj:
                        curr_itraj = itraj
                        t = 0
                    np.testing.assert_equal(X.shape, Y.shape)
                    l = len(X)
                    np.testing.assert_array_almost_equal(
                        X, xyzs[itraj][t:t + l])
                    np.testing.assert_array_almost_equal(
                        Y, xyzs_lagged[itraj][t:t + l])
                    t += l
        finally:
            iterload.MEMORY_CUTOFF = memory_cutoff
    def test_fragmented_reader_random_access1(self):
        with TemporaryDirectory() as td:
            trajfiles = []
            for i in range(3):
                trajfiles.append(
                    create_traj(start=i * 10, dir=td, length=20)[0])
            topfile = get_top()
            trajfiles = [(trajfiles[0], trajfiles[1]), trajfiles[0],
                         trajfiles[2]]

            source = coor.source(trajfiles, top=topfile)
            assert isinstance(source, FragmentedTrajectoryReader)

            for r in source._readers:
                if not isinstance(r, (list, tuple)):
                    r = r[0]
                for _r in r:
                    _r._return_traj_obj = True

            from collections import defaultdict
            for chunksize in [0, 2, 3, 100000]:
                frames = defaultdict(list)
                with source.iterator(chunk=chunksize,
                                     return_trajindex=True,
                                     stride=self.stride) as it:
                    for itraj, t in it:
                        frames[itraj].append(t)

                dest = []
                for itraj in frames.keys():
                    dest.append(frames[itraj][0])

                    for t in frames[itraj][1:]:
                        dest[-1] = dest[-1].join(t)

                keys = np.unique(self.stride[:, 0])
                for i, coords in enumerate(dest):
                    if i in keys:
                        traj = mdtraj.load(trajfiles[i], top=topfile)
                        np.testing.assert_equal(
                            coords.xyz,
                            traj.xyz[np.array(
                                self.stride[self.stride[:, 0] == i][:, 1])],
                            err_msg="not equal for chunksize=%s" % chunksize)
Пример #7
0
    def test_cache_miss_same_filename(self):
        # reproduces issue #1541
        tmpdir = None
        try:
            fname_pdb = os.path.basename(pdbfile)
            fname_xtc = os.path.basename(xtcfiles[0])
            tmpdir = Path(tempfile.mkdtemp())
            shutil.copyfile(pdbfile, tmpdir / fname_pdb)
            shutil.copyfile(xtcfiles[0], tmpdir / fname_xtc)
            _ = pyemma.coordinates.source(tmpdir / fname_xtc,
                                          top=tmpdir / fname_pdb)
            shutil.copyfile(get_top(), tmpdir / fname_pdb)  # overwrite pdb

            t = mdtraj.load(tmpdir / fname_pdb)
            t.xyz = np.zeros(shape=(400, 3, 3))
            t.time = np.arange(len(t.xyz))
            t.save(tmpdir / fname_xtc, force_overwrite=True)
            _ = pyemma.coordinates.source(tmpdir / fname_xtc,
                                          top=tmpdir / fname_pdb)
        finally:
            shutil.rmtree(tmpdir, ignore_errors=True)
Пример #8
0
    def test_with_fragmented_reader(self):
        from pyemma.util.files import TemporaryDirectory
        trajlen = 35
        # trajectory 0 (first trajectory, is trajfiles[2])
        #   -> skipped
        # trajectory 1 (second trajectory, is {trajfiles[0], trajfiles[1]})
        #   fragment 1:
        #       -> frames 0,1,2,10
        #   fragment 2:
        #       -> frames 1 (i.e., 36) and 34 (i.e., 69)
        # trajectory 2 (third trajectory, is trajfiles[2])
        #   -> frame 5
        ra_indices = np.array([[1, 0], [1, 1], [1, 2], [1, 10],
                               [1, trajlen + 1], [1, 2 * trajlen - 1], [2, 5]],
                              dtype=int)
        with TemporaryDirectory() as td:

            trajfiles = []
            xyzs = []
            for i in range(3):
                tf, xyz, _ = create_traj(start=i * 10, dir=td, length=trajlen)
                trajfiles.append(tf)
                xyzs.append(xyz)

            topfile = get_top()
            frag_traj = [
                trajfiles[2], [trajfiles[0], trajfiles[1]], trajfiles[2]
            ]

            expected = xyzs[0][np.array([0, 1, 2, 10]), :], xyzs[1][np.array(
                [1, 34])], np.array([(xyzs[2][5, :])])
            expected = np.vstack(expected)

            reader = coor.source(frag_traj, top=topfile)

            for cs in range(1, 10):
                traj = save_traj(reader, ra_indices, None, chunksize=cs)
                np.testing.assert_almost_equal(traj.xyz, expected)