def test_lagged_iterator(self): import pyemma.coordinates as coor from pyemma.coordinates.tests.util import create_traj, get_top trajectory_length = 4720 lagtime = 1000 n_trajs = 15 top = get_top() trajs_data = [ create_traj(top=top, length=trajectory_length) for _ in range(n_trajs) ] trajs = [t[0] for t in trajs_data] xyzs = [t[1].reshape(-1, 9) for t in trajs_data] reader = coor.source(trajs, top=top, chunksize=5000) for chunk in [ None, 0, trajectory_length, trajectory_length + 1, trajectory_length + 1000 ]: it = reader.iterator(lag=lagtime, chunk=chunk, return_trajindex=True) with it: for itraj, X, Y in it: np.testing.assert_equal(X.shape, Y.shape) np.testing.assert_equal(X.shape[0], trajectory_length - lagtime) np.testing.assert_array_almost_equal( X, xyzs[itraj][:trajectory_length - lagtime]) np.testing.assert_array_almost_equal( Y, xyzs[itraj][lagtime:])
def test_fragmented_reader_random_access(self): with TemporaryDirectory() as td: trajfiles = [] for i in range(3): trajfiles.append( create_traj(start=i * 10, dir=td, length=20)[0]) topfile = get_top() trajfiles = [ trajfiles[0], (trajfiles[0], trajfiles[1]), trajfiles[2] ] source = coor.source(trajfiles, top=topfile) assert isinstance(source, FragmentedTrajectoryReader) for chunksize in [0, 2, 3, 100000]: out = source.get_output(stride=self.stride, chunk=chunksize) keys = np.unique(self.stride[:, 0]) for i, coords in enumerate(out): if i in keys: traj = mdtraj.load(trajfiles[i], top=topfile) np.testing.assert_equal( coords, traj.xyz[np.array( self.stride[self.stride[:, 0] == i][:, 1])].reshape( -1, 3 * 3))
def test_RA_high_stride(self): """ ensure we use a random access pattern for high strides chunksize combinations to avoid memory issues.""" n = int(1e5) n_bytes = 3 * 3 * 8 * n # ~8Mb savable_formats_mdtra_18 = ('.xtc', '.trr', '.dcd', '.h5', '.binpos', '.nc', '.netcdf', '.ncdf', '.tng') for ext in savable_formats_mdtra_18: traj = create_traj(length=n, dir=self.tmpdir, format=ext)[0] from mock import patch # temporarily overwrite the memory cutoff with a smaller value, to trigger the switch to RA stride. with patch( 'pyemma.coordinates.util.patches.iterload.MEMORY_CUTOFF', n_bytes - 1): r = coor.source(traj, top=get_top()) it = r.iterator(stride=1000, chunk=100000) assert it._mditer.is_ra_iter out_ra = r.get_output(stride=1000, chunk=10000) it = r.iterator(stride=1) assert not it._mditer.is_ra_iter out = r.get_output(stride=1000) np.testing.assert_equal(out_ra, out) # check max stride exceeding from pyemma.coordinates.util.patches import iterload it = r.iterator(stride=iterload.MAX_STRIDE_SWITCH_TO_RA + 1) assert it._mditer.is_ra_iter it = r.iterator(stride=iterload.MAX_STRIDE_SWITCH_TO_RA) assert not it._mditer.is_ra_iter
def test_trajs_larger_than_frame_index(self): """ file list is larger than largest traj file """ from pyemma.coordinates.tests.util import create_traj, get_top files = [create_traj(length=10)[0] for _ in range(20)] inds = np.vstack((np.arange(20), np.arange(20))).T with self.assertRaises(ValueError) as cm: _frames_from_file(files, top=get_top(), frames=inds) import re matches = re.match(".*10\).*is larger than trajectory length.*\= 10", cm.exception.args[0]) assert matches
def test_lagged_iterator_optimized(self): import pyemma.coordinates as coor from pyemma.coordinates.tests.util import create_traj, get_top from pyemma.coordinates.util.patches import iterload trajectory_length = 4720 lagtime = 20 n_trajs = 15 stride = iterload.MAX_STRIDE_SWITCH_TO_RA + 1 top = get_top() trajs_data = [ create_traj(top=top, length=trajectory_length) for _ in range(n_trajs) ] trajs = [t[0] for t in trajs_data] xyzs = [t[1].reshape(-1, 9)[::stride] for t in trajs_data] xyzs_lagged = [ t[1].reshape(-1, 9)[lagtime::stride] for t in trajs_data ] reader = coor.source(trajs, stride=stride, top=top, chunksize=5000) memory_cutoff = iterload.MEMORY_CUTOFF try: iterload.MEMORY_CUTOFF = 8 it = reader.iterator(stride=stride, lag=lagtime, chunk=5000, return_trajindex=True) with it: curr_itraj = 0 t = 0 for itraj, X, Y in it: if itraj != curr_itraj: curr_itraj = itraj t = 0 np.testing.assert_equal(X.shape, Y.shape) l = len(X) np.testing.assert_array_almost_equal( X, xyzs[itraj][t:t + l]) np.testing.assert_array_almost_equal( Y, xyzs_lagged[itraj][t:t + l]) t += l finally: iterload.MEMORY_CUTOFF = memory_cutoff
def test_fragmented_reader_random_access1(self): with TemporaryDirectory() as td: trajfiles = [] for i in range(3): trajfiles.append( create_traj(start=i * 10, dir=td, length=20)[0]) topfile = get_top() trajfiles = [(trajfiles[0], trajfiles[1]), trajfiles[0], trajfiles[2]] source = coor.source(trajfiles, top=topfile) assert isinstance(source, FragmentedTrajectoryReader) for r in source._readers: if not isinstance(r, (list, tuple)): r = r[0] for _r in r: _r._return_traj_obj = True from collections import defaultdict for chunksize in [0, 2, 3, 100000]: frames = defaultdict(list) with source.iterator(chunk=chunksize, return_trajindex=True, stride=self.stride) as it: for itraj, t in it: frames[itraj].append(t) dest = [] for itraj in frames.keys(): dest.append(frames[itraj][0]) for t in frames[itraj][1:]: dest[-1] = dest[-1].join(t) keys = np.unique(self.stride[:, 0]) for i, coords in enumerate(dest): if i in keys: traj = mdtraj.load(trajfiles[i], top=topfile) np.testing.assert_equal( coords.xyz, traj.xyz[np.array( self.stride[self.stride[:, 0] == i][:, 1])], err_msg="not equal for chunksize=%s" % chunksize)
def test_cache_miss_same_filename(self): # reproduces issue #1541 tmpdir = None try: fname_pdb = os.path.basename(pdbfile) fname_xtc = os.path.basename(xtcfiles[0]) tmpdir = Path(tempfile.mkdtemp()) shutil.copyfile(pdbfile, tmpdir / fname_pdb) shutil.copyfile(xtcfiles[0], tmpdir / fname_xtc) _ = pyemma.coordinates.source(tmpdir / fname_xtc, top=tmpdir / fname_pdb) shutil.copyfile(get_top(), tmpdir / fname_pdb) # overwrite pdb t = mdtraj.load(tmpdir / fname_pdb) t.xyz = np.zeros(shape=(400, 3, 3)) t.time = np.arange(len(t.xyz)) t.save(tmpdir / fname_xtc, force_overwrite=True) _ = pyemma.coordinates.source(tmpdir / fname_xtc, top=tmpdir / fname_pdb) finally: shutil.rmtree(tmpdir, ignore_errors=True)
def test_with_fragmented_reader(self): from pyemma.util.files import TemporaryDirectory trajlen = 35 # trajectory 0 (first trajectory, is trajfiles[2]) # -> skipped # trajectory 1 (second trajectory, is {trajfiles[0], trajfiles[1]}) # fragment 1: # -> frames 0,1,2,10 # fragment 2: # -> frames 1 (i.e., 36) and 34 (i.e., 69) # trajectory 2 (third trajectory, is trajfiles[2]) # -> frame 5 ra_indices = np.array([[1, 0], [1, 1], [1, 2], [1, 10], [1, trajlen + 1], [1, 2 * trajlen - 1], [2, 5]], dtype=int) with TemporaryDirectory() as td: trajfiles = [] xyzs = [] for i in range(3): tf, xyz, _ = create_traj(start=i * 10, dir=td, length=trajlen) trajfiles.append(tf) xyzs.append(xyz) topfile = get_top() frag_traj = [ trajfiles[2], [trajfiles[0], trajfiles[1]], trajfiles[2] ] expected = xyzs[0][np.array([0, 1, 2, 10]), :], xyzs[1][np.array( [1, 34])], np.array([(xyzs[2][5, :])]) expected = np.vstack(expected) reader = coor.source(frag_traj, top=topfile) for cs in range(1, 10): traj = save_traj(reader, ra_indices, None, chunksize=cs) np.testing.assert_almost_equal(traj.xyz, expected)