def full_rank_time_series(request): """ Yields a time series of which the propagator has full rank (7 in this case as data is mean-free). """ random_state = np.random.RandomState(42) d = 8 Q = np.linalg.qr(random_state.normal(size=(d, d)))[0] K = Q @ (np.diag(np.arange(1, d + 1)).astype(np.float64) / d) @ Q.T model = TransferOperatorModel(K) x = np.ones(( 1, d, )) * 100000 traj = [x] for _ in range(1000): traj.append(model.forward(traj[-1])) traj = np.concatenate(traj) if request.param == 'trajectory': return traj, traj elif request.param == 'time-lagged-ds': return traj, TimeLaggedDataset(traj[:-1], traj[1:]) elif request.param == 'concat-time-lagged-ds': return traj, TimeLaggedConcatDataset( [TimeLaggedDataset(traj[:-1], traj[1:])]) elif request.param == 'traj-ds': return traj, TrajectoryDataset(1, traj) elif request.param == 'trajs-ds': return traj, TrajectoriesDataset([TrajectoryDataset(1, traj)]) else: raise ValueError(f"Unexpected request param {request.param}")
def test_estimator(fixed_seed): data = deeptime.data.ellipsoids() obs = data.observations(6000, n_dim=10).astype(np.float32) # set up the lobe lobe = nn.Sequential(nn.Linear(10, 1), nn.Tanh()) # train the lobe opt = torch.optim.Adam(lobe.parameters(), lr=1e-2) for _ in range(50): for X, Y in deeptime.util.data.timeshifted_split(obs, lagtime=1, chunksize=512): opt.zero_grad() lval = vampnet_loss(lobe(torch.from_numpy(X)), lobe(torch.from_numpy(Y))) lval.backward() opt.step() # now let's compare lobe.eval() ds = TrajectoryDataset(1, obs) loader = DataLoader(ds, batch_size=512) loader_val = DataLoader(ds, batch_size=512) vampnet = VAMPNet(lobe=lobe) vampnet_model = vampnet.fit(loader, validation_loader=loader_val).fetch_model() assert_(len(vampnet.train_scores) > 0) assert_(len(vampnet.validation_scores) > 0) # reference model w/o learnt featurization projection = VAMP(lagtime=1, observable_transform=vampnet_model).fit(obs).transform(obs, propagate=True) dtraj = KMeans(2).fit(projection).transform(projection) msm_vampnet = MaximumLikelihoodMSM().fit(dtraj, lagtime=1).fetch_model() np.testing.assert_array_almost_equal(msm_vampnet.transition_matrix, data.msm.transition_matrix, decimal=2)
def test_estimator_fit(fixed_seed, dtype, shared_lobe): data = deeptime.data.ellipsoids() obs = data.observations(60000, n_dim=2).astype(dtype) train, val = torch.utils.data.random_split(TrajectoryDataset(1, obs), [50000, 9999]) # set up the lobe linear_layer = nn.Linear(2, 1) lobe = nn.Sequential(linear_layer, nn.Tanh()) with torch.no_grad(): linear_layer.weight[0, 0] = -0.3030 linear_layer.weight[0, 1] = 0.3060 linear_layer.bias[0] = -0.7392 if shared_lobe: lobe_t = None else: lobe_t = deepcopy(lobe) net = VAMPNet(lobe=lobe, dtype=dtype, learning_rate=1e-8, lobe_timelagged=lobe_t) train_loader = DataLoader(train, batch_size=512, shuffle=True) val_loader = DataLoader(val, batch_size=512) net.fit(train_loader, n_epochs=1, validation_loader=val_loader, validation_score_callback=lambda *x: x) # reference model w/o learnt featurization projection = VAMP(lagtime=1, observable_transform=net).fit(obs).fetch_model().transform(obs) dtraj = KMeans(2).fit(projection).transform(projection) msm_vampnet = MaximumLikelihoodMSM().fit(dtraj, lagtime=1).fetch_model() np.testing.assert_array_almost_equal(msm_vampnet.transition_matrix, data.msm.transition_matrix, decimal=2)
def test_no_side_effects(): mlp = nn.Linear(10, 2) data = deeptime.data.ellipsoids() obs = data.observations(100, n_dim=10).astype(np.float32) net = VAMPNet(lobe=mlp, dtype=np.float32, learning_rate=1e-8) ds = TrajectoryDataset(1, obs) train_loader = DataLoader(ds, batch_size=512, shuffle=True) model1 = net.fit(train_loader, n_epochs=1).fetch_model() model2 = net.fit(train_loader, n_epochs=1).fetch_model() with torch.no_grad(): assert_(model1.lobe is not model2.lobe) # check it is not the same instance
def two_state_hmm(): length = 1000 transition_matrix = np.asarray([[0.9, 0.1], [0.1, 0.9]]) msm = dt.markov.msm.MarkovStateModel(transition_matrix) dtraj = msm.simulate(length, seed=42) traj = np.random.randn(len(dtraj)) traj[np.where(dtraj == 1)[0]] += 20.0 traj_stacked = np.vstack((traj, np.zeros(len(traj)))) phi = np.random.rand() * 2.0 * np.pi rot = np.asarray([[np.cos(phi), -np.sin(phi)], [np.sin(phi), np.cos(phi)]]) traj_rot = np.dot(rot, traj_stacked).T ds = TrajectoryDataset(1, traj_rot.astype(np.float32)) return traj, traj_rot, ds
def test_timelagged_dataset_multitraj(lagtime, ntraj, stride, start, stop): data = [ np.random.normal(size=(7, 3)), np.random.normal(size=(555, 3)), np.random.normal(size=(55, 3)) ] data = data[:ntraj] assert_(len(data) == ntraj) with assert_raises(AssertionError): TrajectoryDataset.from_trajectories(1, []) # empty data with assert_raises(AssertionError): TrajectoryDataset.from_trajectories(lagtime=7, data=data) # lagtime too long with assert_raises(AssertionError): TrajectoryDataset.from_trajectories(lagtime=1, data=data + [np.empty( (55, 7))]) # shape mismatch ds = TrajectoryDataset.from_trajectories(lagtime=lagtime, data=data) assert len(ds) == sum(len(data[i]) - lagtime for i in range(len(data))) # Iterate over data and see if it is the same as iterating over dataset out_full = ds[::] out_strided = ds[start:stop:stride] # we manually iterate over trajectories and collect them in time-lagged fashion X = [] Y = [] for traj in data: X.append(traj[:-lagtime]) Y.append(traj[lagtime:]) X = np.concatenate(X)[start:stop:stride] Y = np.concatenate(Y)[start:stop:stride] # check that manually collected and dataset yielded data coincide assert_equal(len(X), len(out_strided[0])) assert_equal(len(Y), len(out_strided[1])) assert_array_almost_equal(X, out_strided[0]) assert_array_almost_equal(Y, out_strided[1]) # get array of indices based on slice slice_obj = slice(start, stop, stride).indices(len(ds)) indices = np.array(range(*slice_obj)) # iterate over indices for ix in indices: x, y = ds[ix] # check this against full output assert_equal(x, out_full[0][ix]) assert_equal(y, out_full[1][ix])
def test_timelagged_dataset(lagtime): pytest.importorskip("torch.utils.data") import torch.utils.data as data_utils data = np.arange(5000) ds = TrajectoryDataset(lagtime, data) np.testing.assert_equal(len(ds), 5000 - lagtime) sub_datasets = data_utils.random_split(ds, [1000, 2500, 1500 - lagtime]) collected_data = [] for sub_dataset in sub_datasets: loader = data_utils.DataLoader(sub_dataset, batch_size=123) for batch in loader: if lagtime > 0: np.testing.assert_(isinstance(batch, (list, tuple))) collected_data.append(batch[0].numpy()) collected_data.append(batch[1].numpy()) else: collected_data.append(batch.numpy()) collected_data = np.unique(np.concatenate(collected_data)) np.testing.assert_equal(len(np.setdiff1d(collected_data, data)), 0)
@contextmanager def does_not_raise(): yield @pytest.mark.parametrize( "data,lagtime,expectation", [(np.zeros((100, 5)), 5, does_not_raise()), (np.zeros((100, 5)), None, assert_raises(ValueError)), (np.zeros((100, 5)), 0, assert_raises(AssertionError)), (np.zeros((100, 5)), 96, assert_raises(AssertionError)), ((np.zeros((100, 5)), np.zeros((100, 5))), None, does_not_raise()), ((np.zeros((100, 5)), np.zeros( (105, 5))), None, assert_raises(AssertionError)), (TrajectoryDataset.from_trajectories( 5, [np.zeros((55, 5)), np.zeros((55, 5))]), None, does_not_raise())], ids=[ "Trajectory with lagtime", "Trajectory without lagtime", "Trajectory with zero lagtime", "Trajectory with too large lagtime", "X-Y tuple of data", "X-Y tuple of data, length mismatch", "Custom concat dataset of list of trajectories", ]) def test_to_dataset(data, lagtime, expectation): with expectation: ds = to_dataset(data, lagtime=lagtime) assert_(len(ds) in (100, 95)) data = ds[:] assert_equal(len(data), 2)