def test_ndim_input(self): data = np.empty((4, 2, 2, 2)) reader = DataInMemory(data) self.assertEqual(reader.ndim, 2 * 2 * 2) self.assertEqual(reader.number_of_trajectories(), 1) self.assertEqual(reader.n_frames_total(), 4) np.testing.assert_equal(reader.trajectory_lengths(), np.array([reader.n_frames_total()]))
def test_ndim_input(self): data = np.empty((4, 2, 2, 2)) reader = DataInMemory(data) self.assertEqual(reader.dimension(), 2 * 2 * 2) self.assertEqual(reader.number_of_trajectories(), 1) self.assertEqual(reader.n_frames_total(), 4) self.assertEqual( reader.trajectory_lengths(), [reader.n_frames_total()])
def test_lagged_iterator_1d(self): n = 57 chunksize = 10 lag = 1 data = [np.arange(n), np.arange(50), np.arange(30)] input_lens = [x.shape[0] for x in data] reader = DataInMemory(data) reader.chunksize = chunksize self.assertEqual(reader.n_frames_total(), sum(input_lens)) # store results by traj chunked_trajs = [[] for _ in range(len(data))] chunked_lagged_trajs = [[] for _ in range(len(data))] # iterate over data for itraj, X, Y in reader.iterator(lag=lag): chunked_trajs[itraj].append(X) chunked_lagged_trajs[itraj].append(Y) trajs = [np.vstack(ichunks) for ichunks in chunked_trajs] lagged_trajs = [np.vstack(ichunks) for ichunks in chunked_lagged_trajs] # unlagged data for traj, input_traj in zip(trajs, data): np.testing.assert_equal(traj.reshape(input_traj.shape), input_traj) # lagged data lagged_0 = [d[lag:] for d in data] for traj, input_traj in zip(lagged_trajs, lagged_0): np.testing.assert_equal(traj.reshape(input_traj.shape), input_traj)
def test1dDataList(self): n = 10 data = [np.arange(n), np.arange(n)] reader = DataInMemory(data) np.testing.assert_equal(reader.trajectory_lengths(), np.array([n, n])) self.assertEqual(reader.ndim, 1) self.assertEqual(reader.number_of_trajectories(), 2) self.assertEqual(reader.n_frames_total(), 2 * n)
def test1dData(self): n = 3 data = np.arange(n) reader = DataInMemory(data) self.assertEqual(reader.trajectory_lengths(), np.array([n])) self.assertEqual(reader.ndim, 1) self.assertEqual(reader.number_of_trajectories(), 1) self.assertEqual(reader.n_frames_total(), n)
def test1dDataList(self): n = 10 data = [np.arange(n), np.arange(n)] reader = DataInMemory(data) self.assertEqual(reader.trajectory_lengths(), [n, n]) self.assertEqual(reader.dimension(), 1) self.assertEqual(reader.number_of_trajectories(), 2) self.assertEqual(reader.n_frames_total(), 2 * n)
def test1dData(self): n = 3 data = np.arange(n) reader = DataInMemory(data) self.assertEqual(reader.trajectory_lengths(), [n]) self.assertEqual(reader.dimension(), 1) self.assertEqual(reader.number_of_trajectories(), 1) self.assertEqual(reader.n_frames_total(), n)
def test_lagged_iterator_1d(self): n = 30 chunksize = 10 lag = 9 stride = 2 data = [np.arange(n), np.arange(50), np.arange(33)] input_lens = [x.shape[0] for x in data] reader = DataInMemory(data, chunksize=chunksize) it = reader.iterator(chunk=chunksize, stride=stride, lag=lag) # lag < chunksize, so we expect a LaggedIter from pyemma.coordinates.data._base.iterable import _LaggedIterator self.assertIsInstance(it, _LaggedIterator) assert reader.chunksize == chunksize self.assertEqual(reader.n_frames_total(), sum(input_lens)) # store results by traj chunked_trajs = [[] for _ in range(len(data))] chunked_lagged_trajs = [[] for _ in range(len(data))] # iterate over data for itraj, X, Y in reader.iterator(lag=lag, stride=stride): chunked_trajs[itraj].append(X) chunked_lagged_trajs[itraj].append(Y) trajs = [np.vstack(ichunks) for ichunks in chunked_trajs] lagged_trajs = [np.vstack(ichunks) for ichunks in chunked_lagged_trajs] # unlagged data for idx, (traj, input_traj) in enumerate(zip(trajs, data)): # do not consider chunks that have no lagged counterpart input_shape = input_traj.shape np.testing.assert_equal( traj.T.squeeze(), input_traj[::stride][:len(lagged_trajs[idx])].squeeze(), err_msg="failed for traj=%s" % idx) # lagged data for idx, (traj, input_traj) in enumerate(zip(lagged_trajs, data)): np.testing.assert_equal(traj.T.squeeze(), input_traj[lag::stride].squeeze(), err_msg="failed for traj=%s" % idx)
def test_lagged_iterator_2d(self): chunksize = 10 lag = 1 data = [ np.arange(300).reshape((100, 3)), np.arange(29 * 3).reshape((29, 3)), np.arange(150).reshape(50, 3) ] input_lens = [x.shape[0] for x in data] # print data[0].shape reader = DataInMemory(data) reader.chunksize = chunksize self.assertEqual(reader.n_frames_total(), sum(input_lens)) # store results by traj chunks = [[] for _ in range(len(data))] lagged_chunks = [[] for _ in range(len(data))] # iterate over data for itraj, X, Y in reader.iterator(lag=lag): chunks[itraj].append(X) lagged_chunks[itraj].append(Y) trajs = [np.vstack(ichunks) for ichunks in chunks] lagged_trajs = [np.vstack(ichunks) for ichunks in lagged_chunks] # unlagged data for traj, input_traj in zip(trajs, data): # do not consider chunks that have no lagged counterpart input_shape = input_traj.shape np.testing.assert_equal(traj.reshape((input_shape[0] - lag, 3)), input_traj[:len(input_traj) - lag]) # lagged data lagged_0 = [d[lag:] for d in data] for traj, input_traj in zip(lagged_trajs, lagged_0): np.testing.assert_equal(traj.reshape(input_traj.shape), input_traj)
def test_time_lagged_chunked_access(self): n = 100 data = [np.random.random((n, 3)), np.zeros((29, 3)), np.random.random((n - 50, 3))] reader = DataInMemory(data) self.assertEqual(reader.n_frames_total(), n + n - 50 + 29) # iterate over data it = reader.iterator(lag=30, return_trajindex=True) for itraj, X, Y in it: if itraj == 0: # self.assertEqual(X.shape, (100, 3)) <-- changed behavior: return only chunks of same size self.assertEqual(X.shape, (70, 3)) self.assertEqual(Y.shape, (70, 3)) elif itraj == 1: # the time lagged chunk can not be built due to lag time self.assertEqual(X.shape, (0, 3)) self.assertEqual(Y.shape, (0, 3)) elif itraj == 2: self.assertEqual(X.shape, (20, 3)) self.assertEqual(Y.shape, (20, 3))
def test_lagged_iterator_2d(self): n = 57 chunksize = 10 lag = 1 # data = [np.random.random((n, 3)), # np.zeros((29, 3)), # np.random.random((n - 50, 3))] data = [np.arange(300).reshape((100, 3)), np.arange(29 * 3).reshape((29, 3)), np.arange(150).reshape(50, 3)] input_lens = [x.shape[0] for x in data] # print data[0].shape reader = DataInMemory(data) reader.chunksize = chunksize self.assertEqual(reader.n_frames_total(), sum(input_lens)) # store results by traj chunks = [[] for _ in xrange(len(data))] lagged_chunks = [[] for _ in xrange(len(data))] # iterate over data for itraj, X, Y in reader.iterator(lag=lag): chunks[itraj].append(X) lagged_chunks[itraj].append(Y) trajs = [np.vstack(ichunks) for ichunks in chunks] lagged_trajs = [np.vstack(ichunks) for ichunks in lagged_chunks] # unlagged data for traj, input_traj in zip(trajs, data): np.testing.assert_equal(traj.reshape(input_traj.shape), input_traj) # lagged data lagged_0 = [d[lag:] for d in data] for traj, input_traj in zip(lagged_trajs, lagged_0): np.testing.assert_equal(traj.reshape(input_traj.shape), input_traj)
def test_time_lagged_chunked_access(self): n = 100 data = [ np.random.random((n, 3)), np.zeros((29, 3)), np.random.random((n - 50, 3)) ] reader = DataInMemory(data) self.assertEqual(reader.n_frames_total(), n + n - 50 + 29) # iterate over data ctx = TransformerIteratorContext(lag=30) t = 0 itraj = 0 last_chunk = False while not last_chunk: last_chunk_in_traj = False t = 0 while not last_chunk_in_traj: X, Y = reader._next_chunk(ctx) if itraj == 0: self.assertEqual(X.shape, (100, 3)) self.assertEqual(Y.shape, (70, 3)) elif itraj == 1: # the time lagged chunk can not be built due to lag time self.assertEqual(X.shape, (29, 3)) self.assertEqual(Y.shape, (0, 3)) elif itraj == 2: self.assertEqual(X.shape, (50, 3)) self.assertEqual(Y.shape, (20, 3)) L = np.shape(X)[0] # last chunk in traj? last_chunk_in_traj = (t + L >= reader.trajectory_length(itraj)) # last chunk? last_chunk = (last_chunk_in_traj and itraj >= reader.number_of_trajectories() - 1) t += L # increment trajectory itraj += 1
def test_time_lagged_chunked_access(self): n = 100 data = [np.random.random((n, 3)), np.zeros((29, 3)), np.random.random((n - 50, 3))] reader = DataInMemory(data) self.assertEqual(reader.n_frames_total(), n + n - 50 + 29) # iterate over data lag = 30 t = 0 itraj = 0 last_chunk = False while not last_chunk: last_chunk_in_traj = False t = 0 while not last_chunk_in_traj: X, Y = reader._next_chunk(lag=lag) if itraj == 0: self.assertEqual(X.shape, (100, 3)) self.assertEqual(Y.shape, (70, 3)) elif itraj == 1: # the time lagged chunk can not be built due to lag time self.assertEqual(X.shape, (29, 3)) self.assertEqual(Y.shape, (0, 3)) elif itraj == 2: self.assertEqual(X.shape, (50, 3)) self.assertEqual(Y.shape, (20, 3)) L = np.shape(X)[0] # last chunk in traj? last_chunk_in_traj = ( t + L >= reader.trajectory_length(itraj)) # last chunk? last_chunk = ( last_chunk_in_traj and itraj >= reader.number_of_trajectories() - 1) t += L # increment trajectory itraj += 1