def test_write_to_csv_propagate_filenames(self): from pyerna.coordinates import source, tica with TemporaryDirectory() as td: data = [np.random.random((20, 3))] * 3 fns = [ os.path.join(td, f) for f in ('blah.npy', 'blub.npy', 'foo.npy') ] for x, fn in zip(data, fns): np.save(fn, x) reader = source(fns) assert reader.filenames == fns tica_obj = tica(reader, lag=1, dim=2) tica_obj.write_to_csv(extension=".exotic", chunksize=3) res = sorted([ os.path.abspath(x) for x in glob(td + os.path.sep + '*.exotic') ]) self.assertEqual(len(res), len(fns)) desired_fns = sorted([s.replace('.npy', '.exotic') for s in fns]) self.assertEqual(res, desired_fns) # compare written results expected = tica_obj.get_output() actual = source(list(s.replace('.npy', '.exotic') for s in fns)).get_output() assert len(actual) == len(fns) for a, e in zip(actual, expected): np.testing.assert_allclose(a, e)
def test_fragmented_trajs(self): """ build two fragmented readers consisting out of two fragments each and check if they are merged properly.""" segment_0 = np.arange(20) segment_1 = np.arange(20, 40) s1 = source([(segment_0, segment_1)]) s2 = source([(segment_0, segment_1)]) sm = SourcesMerger((s1, s2)) out = sm.get_output() x = np.atleast_2d(np.arange(40)) expected = [np.concatenate((x, x), axis=0).T] np.testing.assert_equal(out, expected)
def test_length_and_content_feature_reader_and_TICA(self): for stride in range(1, 100, 23): r = coor.source(self.trajnames, top=self.temppdb) t = coor.tica(data=r, lag=2, dim=2) # subsample data out_tica = t.get_output(stride=stride) out_reader = r.get_output(stride=stride) # get length in different ways len_tica = [x.shape[0] for x in out_tica] len_reader = [x.shape[0] for x in out_reader] len_trajs = t.trajectory_lengths(stride=stride) len_ref = [(x.shape[0]-1)//stride+1 for x in self.data] # print 'len_ref', len_ref # compare length np.testing.assert_equal(len_trajs, len_ref) self.assertTrue(len_ref == len_tica) self.assertTrue(len_ref == len_reader) # compare content (reader) for ref_data, test_data in zip(self.data, out_reader): ref_data_reshaped = ref_data.reshape((ref_data.shape[0], ref_data.shape[1]*3)) self.assertTrue(np.allclose(ref_data_reshaped[::stride, :], test_data, atol=1E-3))
def test_non_matching_lengths(self): data = self.readers[1].data data = [data[0], data[1], data[2][:20]] self.readers.append(source(data)) with self.assertRaises(ValueError) as ctx: SourcesMerger(self.readers) self.assertIn('matching', ctx.exception.args[0])
def partial_fit(self, X): """ incrementally update the covariances and mean. Parameters ---------- X: array, list of arrays, PyEMMA reader input data. Notes ----- The projection matrix is first being calculated upon its first access. """ from pyerna.coordinates import source iterable = source(X) if isinstance(self.dim, int): indim = iterable.dimension() if not self.dim <= indim: raise RuntimeError( "requested more output dimensions (%i) than dimension" " of input data (%i)" % (self.dim, indim)) self._covar = self._init_covar(partial=True) self._covar.partial_fit(iterable) self.model.update_model_params( mean_0=self._covar.mean, # TODO: inefficient, fixme mean_t=self._covar.mean_tau, C00=self._covar.C00_, C0t=self._covar.C0t_, Ctt=self._covar.Ctt_) self._estimated = False return self.model
def test_with_save_traj(self): path = pkg_resources.resource_filename(__name__, 'data') + os.path.sep pdb_file = os.path.join(path, 'bpti_ca.pdb') traj_files = [ os.path.join(path, 'bpti_001-033.xtc'), os.path.join(path, 'bpti_034-066.xtc'), os.path.join(path, 'bpti_067-100.xtc') ] source_frag = coor.source([traj_files], top=pdb_file) full_data = source_frag.get_output()[0] last_frame_fragment_0 = [0,32] first_frame_fragment_1 = [0,33] first_frame_fragment_2 = [0,66] reshape = lambda f: f.xyz.reshape((f.xyz.shape[0],f.xyz.shape[1] * f.xyz.shape[2])).squeeze() # Frames in the first fragment: frames = coor.save_traj(source_frag, [last_frame_fragment_0], None) np.testing.assert_equal(reshape(frames), full_data[32]) # Frames the first and second fragments frames = coor.save_traj(source_frag, [last_frame_fragment_0, first_frame_fragment_1], None) np.testing.assert_equal(reshape(frames), full_data[np.array([32, 33])]) # Frames only in the second fragment frames = coor.save_traj(source_frag, [first_frame_fragment_1], None) np.testing.assert_equal(reshape(frames), full_data[33]) # Frames only in the second and third fragment frames = coor.save_traj(source_frag, [first_frame_fragment_1, first_frame_fragment_2], None) np.testing.assert_equal(reshape(frames), full_data[np.array([33, 66])])
def test_fragmented_xtc(self): from pyerna.coordinates.tests.util import create_traj top_file = pkg_resources.resource_filename(__name__, 'data/test.pdb') trajfiles = [] for _ in range(3): f, _, _ = create_traj(top_file) trajfiles.append(f) try: # three trajectories: one consisting of all three, one consisting of the first, # one consisting of the first and the last source = coor.source([trajfiles, [trajfiles[0]], [trajfiles[0], trajfiles[2]]], top=top_file) source.chunksize = 1000 out = source.get_output(stride=1) trajs = [mdtraj.load(trajfiles[i], top=top_file).xyz.reshape(-1,9) for i in range(0,3)] np.testing.assert_equal(out[0], np.vstack(trajs)) np.testing.assert_equal(out[1], trajs[0]) np.testing.assert_equal(out[2], np.vstack((trajs[0], trajs[2]))) finally: for t in trajfiles: try: os.unlink(t) except EnvironmentError: pass
def _test_fragment_reader(self, file_format, stride, lag, chunksize): trajs = self.test_trajs[file_format] reader = coor.source([trajs], top=self.pdb_file, chunksize=chunksize) assert isinstance(reader, FragmentedTrajectoryReader) data = np.vstack(self.traj_data) itraj = None if lag > 0: collected = [] collected_lagged = [] for itraj, X, Y in reader.iterator(stride=stride, lag=lag): collected.append(X) collected_lagged.append(Y) assert collected assert collected_lagged assert len(collected) == len(collected_lagged) collected = np.vstack(collected) collected_lagged = np.vstack(collected_lagged) np.testing.assert_allclose(data[::stride][0:len(collected_lagged)], collected, atol=self.eps, err_msg="lag={}, stride={}, cs={}".format(lag, stride, chunksize )) np.testing.assert_allclose(data[lag::stride], collected_lagged, atol=self.eps) else: collected = [] for itraj, X in reader.iterator(stride=stride): collected.append(X) assert collected collected = np.vstack(collected) np.testing.assert_allclose(data[::stride], collected, atol=self.eps) assert itraj == 0 # only one trajectory
def test_lagged_iterator(self): import pyerna.coordinates as coor from pyerna.coordinates.tests.util import create_traj, get_top trajectory_length = 4720 lagtime = 1000 n_trajs = 15 top = get_top() trajs_data = [ create_traj(top=top, length=trajectory_length) for _ in range(n_trajs) ] trajs = [t[0] for t in trajs_data] xyzs = [t[1].reshape(-1, 9) for t in trajs_data] reader = coor.source(trajs, top=top, chunksize=5000) for chunk in [ None, 0, trajectory_length, trajectory_length + 1, trajectory_length + 1000 ]: it = reader.iterator(lag=lagtime, chunk=chunk, return_trajindex=True) with it: for itraj, X, Y in it: np.testing.assert_equal(X.shape, Y.shape) np.testing.assert_equal(X.shape[0], trajectory_length - lagtime) np.testing.assert_array_almost_equal( X, xyzs[itraj][:trajectory_length - lagtime]) np.testing.assert_array_almost_equal( Y, xyzs[itraj][lagtime:])
def setUp(self): self.readers = [] data_dir = pkg_resources.resource_filename('pyerna.coordinates.tests', 'data') # three md trajs trajs = glob(data_dir + "/bpti_0*.xtc") top = os.path.join(data_dir, 'bpti_ca.pdb') self.readers.append(source(trajs, top=top)) self.readers[0].featurizer.add_all() ndim = self.readers[0].ndim # three random arrays lengths = self.readers[0].trajectory_lengths() arrays = [np.random.random((length, ndim)) for length in lengths] self.readers.append(source(arrays)) self.readers.append(tica(self.readers[-1], dim=20))
def test_MD_data(self): # this is too little data to get reasonable results. We just test to avoid exceptions path = pkg_resources.resource_filename(__name__, 'data') + os.path.sep self.pdb_file = os.path.join(path, 'bpti_ca.pdb') self.xtc_file = os.path.join(path, 'bpti_mini.xtc') inp = source(self.xtc_file, top=self.pdb_file) # see if this doesn't raise ticamini = tica(inp, lag=1)
def partial_fit(self, X): from pyerna.coordinates import source iterable = source(X) self._estimate(iterable, partial=True) self._estimated = False return self
def test_parametrize_with_stride(self): for stride in range(1, 100, 23): r = coor.source(self.trajnames, top=self.temppdb) tau = 5 try: t = coor.tica(r, lag=tau, stride=stride, dim=2) # force_eigenvalues_le_one=True enables an internal consistency check in TICA self.assertTrue(np.all(t.eigenvalues <= 1.0+1.E-12)) except RuntimeError: assert tau % stride != 0
def test_pass_reader(self): from pyerna.coordinates import source reader = source(self.trajfiles, top=self.pdbfile) reader.in_memory = True inds = np.vstack((np.random.randint(0, 1), np.random.randint(0, 100))).T traj_test = _frames_from_file(reader.filenames, self.pdbfile, inds, reader=reader)
def _test_base_reader(self, file_format, stride, skip, chunksize, transform): trajs = self.test_trajs[file_format] reader = coor.source(trajs, top=self.pdb_file, chunksize=chunksize) if transform == 'identity': reader = util.create_transform(reader) if chunksize is not None: np.testing.assert_equal(reader.chunksize, chunksize) it = reader.iterator(stride=stride, skip=skip, lag=0, chunk=chunksize) assert it.chunksize is not None if chunksize is None: max_frames = max_chunksize_from_config(reader.output_type().itemsize) assert it.chunksize <= max_frames # now we set the chunksize to max_frames, to be able to compare the actual shapes of iterator output. chunksize = max_frames traj_data = [data[skip::stride] for data in self.traj_data] valid_itraj = [i for i, x in enumerate(traj_data) if len(x) > 0] output = defaultdict(list) with it: current_itraj = None t = t_total = 0 for itraj, chunk in it: # reset t upon next trajectory if itraj != current_itraj: current_itraj = itraj t = 0 assert len(chunk) <= chunksize or chunksize == 0, '%s' % it if chunksize != 0 and len(traj_data[itraj]) - t >= chunksize: assert len(chunk) == chunksize elif chunksize == 0: assert len(chunk) == len(traj_data[itraj]) output[itraj].append(chunk) t += len(chunk) t_total += len(chunk) for itraj in valid_itraj: assert itraj in output.keys() for itraj in output.keys(): assert itraj in valid_itraj output[itraj] = np.vstack(output[itraj]) np.testing.assert_allclose(output[itraj], traj_data[itraj], atol=self.eps) assert t_total == sum(len(x) for x in output.values()) assert t_total == reader.n_frames_total(stride=stride, skip=skip)
def setUpClass(cls): from pyerna.datasets import get_bpti_test_data d = get_bpti_test_data() trajs, top = d['trajs'], d['top'] s = source(trajs, top=top) t = tica(s, lag=1) c = cluster_kmeans(t) cls.model_file = tempfile.mktemp() c.save(cls.model_file, save_streaming_chain=True)
def _test_lagged_reader(self, file_format, stride, skip, chunksize, lag): trajs = self.test_trajs[file_format] reader = coor.source(trajs, top=self.pdb_file, chunksize=chunksize) it = reader.iterator(stride=stride, skip=skip, lag=lag, chunk=chunksize) traj_data = [data[skip::stride] for data in self.traj_data] traj_data_lagged = [data[skip + lag::stride] for data in self.traj_data] valid_itrajs = [i for i, x in enumerate(traj_data_lagged) if len(x) > 0] assert it.chunksize is not None if chunksize is None: chunksize = max_chunksize_from_config(reader.output_type().itemsize) with it: current_itraj = None t = t_total = 0 collected = defaultdict(list) collected_lag = defaultdict(list) for itraj, chunk, chunk_lagged in it: # reset t upon next trajectory if itraj != current_itraj: current_itraj = itraj t = 0 assert len(chunk) <= chunksize or chunksize == 0 if chunksize != 0 and len(traj_data[itraj]) - t >= chunksize: assert len(chunk) <= chunksize elif chunksize == 0: assert len(chunk) == len(chunk_lagged) == len(traj_data_lagged[itraj]) collected[itraj].append(chunk) collected_lag[itraj].append(chunk_lagged) t += len(chunk) t_total += len(chunk) for itraj in valid_itrajs: assert itraj in collected.keys() assert itraj in collected_lag.keys() assert set(collected.keys()) == set(collected_lag.keys()) for itraj in collected.keys(): assert itraj in valid_itrajs collected[itraj] = np.vstack(collected[itraj]) collected_lag[itraj] = np.vstack(collected_lag[itraj]) # unlagged data is truncated to the length of the lagged data. max_len = len(traj_data_lagged[itraj]) np.testing.assert_allclose(collected[itraj], traj_data[itraj][:max_len], atol=self.eps) np.testing.assert_allclose(collected_lag[itraj], traj_data_lagged[itraj], atol=self.eps) assert t_total == sum(len(x) for x in collected.values()) assert t_total == reader.n_frames_total(stride=stride, skip=skip+lag)
def test_feature_correlation_MD(self): # Copying from the test_MD_data path = pkg_resources.resource_filename(__name__, 'data') + os.path.sep self.pdb_file = os.path.join(path, 'bpti_ca.pdb') self.xtc_file = os.path.join(path, 'bpti_mini.xtc') inp = source(self.xtc_file, top=self.pdb_file) ticamini = tica(inp, lag=1, kinetic_map=False) feature_traj = ticamini.data_producer.get_output()[0] tica_traj = ticamini.get_output()[0] test_corr = ticamini.feature_TIC_correlation true_corr = mycorrcoef(feature_traj, tica_traj, ticamini.lag) #assert np.isclose(test_corr, true_corr).all() np.testing.assert_allclose(test_corr, true_corr, atol=1.E-8)
def partial_fit(self, X): """ incrementally update the estimates Parameters ---------- X: array, list of arrays, PyEMMA reader input data. """ from pyerna.coordinates import source self._estimate(source(X), partial_fit=True) self._estimated = True return self
def test_with_fragmented_reader_chunksize_0(self): # intentionally group bpti dataset to a fake fragmented traj frag_traj = [[self.trajfiles[0], self.trajfiles[1]], self.trajfiles[2], self.trajfiles[2]] reader = coor.source(frag_traj, top=self.pdbfile, chunksize=0) assert reader.chunksize == 0 traj = save_traj(reader, self.sets, None) traj_ref = save_traj_w_md_load_frame(self.reader, self.sets) # Check for diffs (found_diff, errmsg) = compare_coords_md_trajectory_objects(traj, traj_ref, atom=0) np.testing.assert_equal(traj.xyz, traj_ref.xyz) self.assertFalse(found_diff, errmsg)
def test_feature_correlation_MD(self): # Copying from the test_MD_data path = pkg_resources.resource_filename(__name__, 'data') + os.path.sep self.pdb_file = os.path.join(path, 'bpti_ca.pdb') self.xtc_file = os.path.join(path, 'bpti_mini.xtc') inp = source(self.xtc_file, top=self.pdb_file) pcamini = pca(inp) feature_traj = pcamini.data_producer.get_output()[0] nfeat = feature_traj.shape[1] pca_traj = pcamini.get_output()[0] npcs = pca_traj.shape[1] test_corr = pcamini.feature_PC_correlation true_corr = np.corrcoef(feature_traj.T, pca_traj.T)[:nfeat, -npcs:] np.testing.assert_allclose(test_corr, true_corr, atol=1.E-8)
def test_min_rmsd(self): import pyerna.datasets as data d = data.get_bpti_test_data() reader = coor.source(d['trajs'], top=d['top']) N_centers = 9 centers = np.asarray((reader.ra_itraj_jagged[0, [0, 1, 7]], reader.ra_itraj_jagged[1, [32, 1, 23]], reader.ra_itraj_jagged[2, [17, 8, 15]]) ).reshape((N_centers, -1)) dtraj = coor.assign_to_centers(reader, centers=centers, metric='minRMSD', return_dtrajs=True) num_assigned_states = len(np.unique(np.concatenate(dtraj))) self.assertEqual(num_assigned_states, N_centers, "assigned states=%s out of %s possible ones." % (num_assigned_states, N_centers))
def test_content_data_in_memory(self): # prepare test data N_trajs = 10 d = [] for _ in range(N_trajs): N = int(np.random.rand()*1000+10) d.append(np.random.randn(N, 10).astype(np.float32)) # read data reader = coor.source(d) # compare for stride in range(1, 10, 3): out_reader = reader.get_output(stride=stride) for ref_data, test_data in zip(d, out_reader): self.assertTrue(np.all(ref_data[::stride] == test_data)) # here we can test exact equality
def test_assignment_multithread_minrsmd(self): # re-do assignment with multiple threads and compare results import pyerna.datasets as data d = data.get_bpti_test_data() reader = coor.source(d['trajs'], top=d['top']) N_centers = 9 centers = np.asarray((reader.ra_itraj_jagged[0, [0, 1, 7]], reader.ra_itraj_jagged[1, [32, 1, 23]], reader.ra_itraj_jagged[2, [17, 8, 15]]) ).reshape((N_centers, -1)) chunksize = 1000 assignment_mp = coor.assign_to_centers(reader, centers, n_jobs=2, chunksize=chunksize, metric='minRMSD') assignment_sp = coor.assign_to_centers(reader, centers, n_jobs=1, chunksize=chunksize, metric='minRMSD') np.testing.assert_equal(assignment_mp, assignment_sp)
def test_lagged_iterator_optimized(self): import pyerna.coordinates as coor from pyerna.coordinates.tests.util import create_traj, get_top from pyerna.coordinates.util.patches import iterload trajectory_length = 4720 lagtime = 20 n_trajs = 15 stride = iterload.MAX_STRIDE_SWITCH_TO_RA + 1 top = get_top() trajs_data = [ create_traj(top=top, length=trajectory_length) for _ in range(n_trajs) ] trajs = [t[0] for t in trajs_data] xyzs = [t[1].reshape(-1, 9)[::stride] for t in trajs_data] xyzs_lagged = [ t[1].reshape(-1, 9)[lagtime::stride] for t in trajs_data ] reader = coor.source(trajs, stride=stride, top=top, chunksize=5000) memory_cutoff = iterload.MEMORY_CUTOFF try: iterload.MEMORY_CUTOFF = 8 it = reader.iterator(stride=stride, lag=lagtime, chunk=5000, return_trajindex=True) with it: curr_itraj = 0 t = 0 for itraj, X, Y in it: if itraj != curr_itraj: curr_itraj = itraj t = 0 np.testing.assert_equal(X.shape, Y.shape) l = len(X) np.testing.assert_array_almost_equal( X, xyzs[itraj][t:t + l]) np.testing.assert_array_almost_equal( Y, xyzs_lagged[itraj][t:t + l]) t += l finally: iterload.MEMORY_CUTOFF = memory_cutoff
def test_partial_fit(self): from pyerna.coordinates import source reader = source(self.trajnames, top=self.temppdb) reader_output = reader.get_output() for output_params in [{ 'kinetic_map': False }, { 'kinetic_map': True }, { 'kinetic_map': False, 'commute_map': True }]: params = {'lag': 10, 'dim': self.dim} params.update(output_params) tica_obj = tica(**params) tica_obj.partial_fit(reader_output[0]) assert not tica_obj._estimated # acccess eigenvectors to force diagonalization tica_obj.eigenvectors assert tica_obj._estimated tica_obj.partial_fit(reader_output[1]) assert not tica_obj._estimated tica_obj.eigenvalues assert tica_obj._estimated for traj in reader_output[2:]: tica_obj.partial_fit(traj) # reference ref = tica(reader, **params) np.testing.assert_allclose(tica_obj.cov, ref.cov, atol=1e-15) np.testing.assert_allclose(tica_obj.cov_tau, ref.cov_tau, atol=1e-15) np.testing.assert_allclose(tica_obj.eigenvalues, ref.eigenvalues, atol=1e-15)
def setUp(self): self.eps = 1e-10 path = pkg_resources.resource_filename(__name__, 'data') + os.path.sep self.pdbfile = os.path.join(path, 'bpti_ca.pdb') self.trajfiles = [ os.path.join(path, 'bpti_001-033.xtc'), os.path.join(path, 'bpti_034-066.xtc'), os.path.join(path, 'bpti_067-100.xtc') ] # Create random sets of files and frames to be retrieved from trajfiles n_members_set1 = 10 n_members_set2 = 20 from pyerna.util.contexts import numpy_random_seed with numpy_random_seed(34): set_1 = np.vstack((np.random.permutation( [0, 2] * n_members_set1)[:n_members_set1], np.random.randint(32, size=n_members_set1))).T set_2 = np.vstack((np.random.permutation( [0, 2] * n_members_set2)[:n_members_set2], np.random.randint(32, size=n_members_set2))).T self.sets = [set_1, set_2] self.subdir = tempfile.mkdtemp(suffix='save_trajs_test/') self.outfile = os.path.join(self.subdir, 'save_traj_test.xtc') # Instantiate the reader self.reader = coor.source(self.trajfiles, top=self.pdbfile) self.reader.chunksize = 30 self.n_pass_files = [ self.subdir + 'n_pass.set_%06u.xtc' % ii for ii in range(len(self.sets)) ] self.one_pass_files = [ self.subdir + '1_pass.set_%06u.xtc' % ii for ii in range(len(self.sets)) ] self.traj_ref = save_traj_w_md_load_frame(self.reader, self.sets) self.strides = [2, 3, 5]
def partial_fit(self, X): """ incrementally update the covariances and mean. Parameters ---------- X: array, list of arrays, PyEMMA reader input data. Notes ----- The projection matrix is first being calculated upon its first access. """ from pyerna.coordinates import source iterable = source(X, chunksize=self.chunksize) indim = iterable.dimension() if not self.dim <= indim: raise RuntimeError( "requested more output dimensions (%i) than dimension" " of input data (%i)" % (self.dim, indim)) if self._covar is None: self._covar = LaggedCovariance(c00=True, c0t=True, ctt=False, remove_data_mean=True, reversible=self.reversible, lag=self.lag, bessel=False, stride=self.stride, skip=self.skip, weights=self.weights, ncov_max=self.ncov_max) self._covar.partial_fit(iterable) self.model.update_model_params( mean=self._covar.mean, # TODO: inefficient, fixme cov=self._covar.C00_, cov_tau=self._covar.C0t_) self._estimated = False return self
def test_with_fragmented_reader(self): from pyerna.util.files import TemporaryDirectory trajlen = 35 # trajectory 0 (first trajectory, is trajfiles[2]) # -> skipped # trajectory 1 (second trajectory, is {trajfiles[0], trajfiles[1]}) # fragment 1: # -> frames 0,1,2,10 # fragment 2: # -> frames 1 (i.e., 36) and 34 (i.e., 69) # trajectory 2 (third trajectory, is trajfiles[2]) # -> frame 5 ra_indices = np.array([[1, 0], [1, 1], [1, 2], [1, 10], [1, trajlen + 1], [1, 2 * trajlen - 1], [2, 5]], dtype=int) with TemporaryDirectory() as td: trajfiles = [] xyzs = [] for i in range(3): tf, xyz, _ = create_traj(start=i * 10, dir=td, length=trajlen) trajfiles.append(tf) xyzs.append(xyz) topfile = get_top() frag_traj = [ trajfiles[2], [trajfiles[0], trajfiles[1]], trajfiles[2] ] expected = xyzs[0][np.array([0, 1, 2, 10]), :], xyzs[1][np.array( [1, 34])], np.array([(xyzs[2][5, :])]) expected = np.vstack(expected) reader = coor.source(frag_traj, top=topfile) for cs in range(1, 10): traj = save_traj(reader, ra_indices, None, chunksize=cs) np.testing.assert_almost_equal(traj.xyz, expected)
def _test_base_reader_with_random_access_stride(self, file_format, stride, chunksize): trajs = self.test_trajs[file_format] reader = coor.source(trajs, top=self.pdb_file, chunksize=chunksize) if chunksize is not None: np.testing.assert_equal(reader.chunksize, chunksize) it = reader.iterator(stride=stride, lag=0, chunk=chunksize) assert it.chunksize is not None if chunksize is None: max_frames = max_chunksize_from_config(reader.output_type().itemsize) assert it.chunksize <= max_frames # now we set the chunksize to max_frames, to be able to compare the actual shapes of iterator output. chunksize = max_frames traj_data = [data[stride[stride[:, 0] == i][:, 1]] for i, data in enumerate(self.traj_data)] with it: current_itraj = None t = t_total = 0 for itraj, chunk in it: # reset t upon next trajectory if itraj != current_itraj: current_itraj = itraj t = 0 assert len(chunk) <= chunksize or chunksize == 0 if chunksize != 0 and len(traj_data[itraj]) - t >= chunksize: assert len(chunk) == chunksize elif chunksize == 0: assert len(chunk) == len(traj_data[itraj]) np.testing.assert_allclose(chunk, traj_data[itraj][t:t+len(chunk)], atol=self.eps) t += len(chunk) t_total += len(chunk) assert t_total == reader.n_frames_total(stride=stride, skip=0) == it.n_frames_total()