def test_dim_and_var_cutoff(full_rank_time_series, dim, var_cutoff, partial_fit): # basically dim should be ignored here since var_cutoff takes precedence if it is None est = VAMP(lagtime=1, dim=dim, var_cutoff=var_cutoff) if partial_fit: for chunk in timeshifted_split(full_rank_time_series, lagtime=1, chunksize=15): est.partial_fit(chunk) est2 = VAMP(lagtime=1, dim=dim, var_cutoff=var_cutoff).fit(full_rank_time_series) np.testing.assert_array_almost_equal(est.fetch_model().operator, est2.fetch_model().operator, decimal=4) # can fail on M$ with higher acc. else: est.fit(full_rank_time_series) projection = est.transform(full_rank_time_series) np.testing.assert_equal(projection.shape[0], full_rank_time_series.shape[0]) if var_cutoff is not None: if var_cutoff == 1.: # data is internally mean-free np.testing.assert_equal(projection.shape[1], full_rank_time_series.shape[1] - 1) else: np.testing.assert_array_less(projection.shape[1], full_rank_time_series.shape[1]) else: if dim is None: # data is internally mean-free np.testing.assert_equal(projection.shape[1], full_rank_time_series.shape[1] - 1) else: np.testing.assert_equal(projection.shape[1], dim)
def test_dim(full_rank_time_series, dim): if dim < 1: with np.testing.assert_raises(ValueError): VAMP(lagtime=1, dim=dim).fit(full_rank_time_series).fetch_model() else: est = VAMP(lagtime=1, dim=dim).fit(full_rank_time_series) projection = est.transform(full_rank_time_series) np.testing.assert_equal(projection.shape, (len(full_rank_time_series), dim))
def test_const_data(self): from deeptime.numeric import ZeroRankError with self.assertRaises(ZeroRankError): print( VAMP(lagtime=1).fit(np.ones( (10, 2))).fetch_model().singular_values) with self.assertRaises(ZeroRankError): print( VAMP(lagtime=1).fit(np.ones( (10, 1))).fetch_model().singular_values)
def test_var_cutoff(full_rank_time_series, var_cutoff): if 0 < var_cutoff <= 1: est = VAMP(lagtime=1, var_cutoff=var_cutoff).fit(full_rank_time_series) projection = est.transform(full_rank_time_series) np.testing.assert_equal(projection.shape[0], full_rank_time_series.shape[0]) if var_cutoff == 1.: # data is internally mean-free np.testing.assert_equal(projection.shape[1], full_rank_time_series.shape[1] - 1) else: np.testing.assert_array_less(projection.shape[1], full_rank_time_series.shape[1]) else: with np.testing.assert_raises(ValueError): VAMP(lagtime=1, var_cutoff=var_cutoff).fit(full_rank_time_series).fetch_model()
def test_score_vs_MSM(self): trajs_test, trajs_train = cvsplit_trajs(self.trajs, random_state=32) dtrajs_test, dtrajs_train = cvsplit_trajs(self.dtrajs, random_state=32) methods = (1, 2, 'E') for m in methods: msm_train = estimate_markov_model(dtrajs=dtrajs_train, lag=self.lag, reversible=False) score_msm = msm_train.score(dtrajs_test, m, dim=None) vamp_train = VAMP(lagtime=self.lag, var_cutoff=1.0).fit_from_timeseries(trajs_train).fetch_model() vamp_test = VAMP(lagtime=self.lag, var_cutoff=1.0).fit_from_timeseries(trajs_test).fetch_model() score_vamp = vamp_train.score(test_model=vamp_test, r=m) self.assertAlmostEqual(score_msm, score_vamp, places=2 if m == 'E' else 3, msg=m)
def test_estimator(): data = deeptime.data.ellipsoids() obs = data.observations(60000, n_dim=10).astype(np.float32) # set up the lobe lobe = nn.Sequential(nn.Linear(10, 1), nn.Tanh()) # train the lobe opt = torch.optim.Adam(lobe.parameters(), lr=5e-4) for _ in range(50): for X, Y in deeptime.data.timeshifted_split(obs, lagtime=1, chunksize=512): opt.zero_grad() lval = loss(lobe(torch.from_numpy(X)), lobe(torch.from_numpy(Y))) lval.backward() opt.step() # now let's compare lobe.eval() loader = create_timelagged_data_loader(obs, lagtime=1, batch_size=512) vampnet = VAMPNet(lobe=lobe) vampnet_model = vampnet.fit(loader).fetch_model() # np.testing.assert_array_less(vamp_model.timescales()[0], vampnet_model.timescales()[0]) projection = vampnet_model.transform(obs) # reference model w/o learnt featurization projection = VAMP(lagtime=1).fit(projection).fetch_model().transform(projection) dtraj = Kmeans(2).fit(projection).transform(projection) msm_vampnet = MaximumLikelihoodMSM().fit(dtraj, lagtime=1).fetch_model() np.testing.assert_array_almost_equal(msm_vampnet.transition_matrix, data.msm.transition_matrix, decimal=2)
def setUpClass(cls): N_steps = 10000 N_traj = 20 lag = 1 T = np.linalg.matrix_power(np.array([[0.7, 0.2, 0.1], [0.1, 0.8, 0.1], [0.1, 0.1, 0.8]]), lag) dtrajs = [generate(T, N_steps) for _ in range(N_traj)] p0 = np.zeros(3) p1 = np.zeros(3) trajs = [] for dtraj in dtrajs: traj = np.zeros((N_steps, T.shape[0])) traj[np.arange(len(dtraj)), dtraj] = 1.0 trajs.append(traj) p0 += traj[:-lag, :].sum(axis=0) p1 += traj[lag:, :].sum(axis=0) estimator = VAMP(scaling=None, var_cutoff=1.0) cov = VAMP.covariance_estimator(lagtime=lag).fit(trajs).fetch_model() vamp = estimator.fit(cov).fetch_model() msm = estimate_markov_model(dtrajs, lag=lag, reversible=False) cls.trajs = trajs cls.dtrajs = dtrajs cls.trajs_timeshifted = list(timeshifted_split(cls.trajs, lagtime=lag, chunksize=5000)) cls.lag = lag cls.msm = msm cls.vamp = vamp cls.estimator = estimator cls.p0 = p0 / p0.sum() cls.p1 = p1 / p1.sum() cls.atol = np.finfo(np.float32).eps * 1000.0
def test_estimator_fit(dtype): data = deeptime.data.ellipsoids() obs = data.observations(60000, n_dim=2).astype(dtype) train, val = torch.utils.data.random_split(deeptime.data.TimeLaggedDataset.from_trajectory(1, obs), [50000, 9999]) # set up the lobe linear_layer = nn.Linear(2, 1) lobe = nn.Sequential(linear_layer, nn.Tanh()) with torch.no_grad(): linear_layer.weight[0, 0] = -0.3030 linear_layer.weight[0, 1] = 0.3060 linear_layer.bias[0] = -0.7392 net = VAMPNet(lobe=lobe, dtype=dtype, learning_rate=1e-8) train_loader = create_timelagged_data_loader(train, lagtime=1, batch_size=512) val_loader = create_timelagged_data_loader(val, lagtime=1, batch_size=512) net.fit(train_loader, n_epochs=1, validation_data=val_loader, validation_score_callback=lambda *x: x) projection = net.transform(obs) # reference model w/o learnt featurization projection = VAMP(lagtime=1).fit(projection).fetch_model().transform(projection) dtraj = Kmeans(2).fit(projection).transform(projection) msm_vampnet = MaximumLikelihoodMSM().fit(dtraj, lagtime=1).fetch_model() np.testing.assert_array_almost_equal(msm_vampnet.transition_matrix, data.msm.transition_matrix, decimal=2)
def test_estimator(fixed_seed): data = deeptime.data.ellipsoids() obs = data.observations(6000, n_dim=10).astype(np.float32) # set up the lobe lobe = nn.Sequential(nn.Linear(10, 1), nn.Tanh()) # train the lobe opt = torch.optim.Adam(lobe.parameters(), lr=1e-2) for _ in range(50): for X, Y in deeptime.util.data.timeshifted_split(obs, lagtime=1, chunksize=512): opt.zero_grad() lval = vampnet_loss(lobe(torch.from_numpy(X)), lobe(torch.from_numpy(Y))) lval.backward() opt.step() # now let's compare lobe.eval() ds = TrajectoryDataset(1, obs) loader = DataLoader(ds, batch_size=512) loader_val = DataLoader(ds, batch_size=512) vampnet = VAMPNet(lobe=lobe) vampnet_model = vampnet.fit(loader, validation_loader=loader_val).fetch_model() assert_(len(vampnet.train_scores) > 0) assert_(len(vampnet.validation_scores) > 0) # reference model w/o learnt featurization projection = VAMP(lagtime=1, observable_transform=vampnet_model).fit(obs).transform(obs, propagate=True) dtraj = KMeans(2).fit(projection).transform(projection) msm_vampnet = MaximumLikelihoodMSM().fit(dtraj, lagtime=1).fetch_model() np.testing.assert_array_almost_equal(msm_vampnet.transition_matrix, data.msm.transition_matrix, decimal=2)
def test_cktest(): traj = ellipsoids().observations(n_steps=10000) estimator = VAMP(1, dim=1).fit(traj) validator = estimator.chapman_kolmogorov_validator(4) cktest = validator.fit(traj).fetch_model() np.testing.assert_almost_equal(cktest.predictions, cktest.estimates, decimal=1)
def test_score_cv(dim, random_state, n_jobs): random_state = None if not random_state else np.random.RandomState(53) data = [np.random.uniform(size=(100, 3)) for _ in range(25)] estimator = VAMP(lagtime=5, dim=1) vamp_score_cv(estimator, data, lagtime=20, random_state=random_state, n_jobs=n_jobs)
def test_kinetic_map(self): lag = 10 vamp = VAMP(lagtime=lag, scaling='km').fit_from_timeseries( self.trajs).fetch_model() transformed = [vamp.transform(X)[:-lag] for X in self.trajs] std = np.std(np.concatenate(transformed), axis=0) np.testing.assert_allclose( std, vamp.singular_values[:vamp.output_dimension], atol=1e-4, rtol=1e-4)
def test_expectation_sanity(with_statistics, lag_multiple): data = np.random.normal(size=(10000, 5)) vamp = VAMP(lagtime=1).fit_from_timeseries(data).fetch_model() input_dimension = 5 n_observables = 10 observations = np.random.normal(size=(input_dimension, n_observables)) if with_statistics: n_statistics = 50 statistics = np.random.normal(size=(input_dimension, n_statistics)) else: statistics = None vamp.expectation(observations, statistics, lag_multiple=lag_multiple)
def vamp_score_data(data, data_lagged, transformation=None, r=2, epsilon=1e-6, dim=None): r""" Computes VAMP score based on data and corresponding time-lagged data. Can be equipped with a transformation, defaults to 'identity' transformation. Parameters ---------- data : (T, n) ndarray Instantaneous data. data_lagged : (T, n) ndarray Time-lagged data. transformation : Callable Transformation on data that will be scored. r : int or str, optional, default=2 The type of VAMP score evaluated, see :meth:`deeptime.decomposition.vamp_score`. epsilon : float, optional, default=1e-6 Regularization parameter for the score, see :meth:`deeptime.decomposition.vamp_score`. dim : int, optional, default=None Number of components that should be scored. Defaults to all components. See :meth:`deeptime.decomposition.vamp_score`. Returns ------- score : float The VAMP score. See Also -------- vamp_score """ if transformation is None: def transformation(x): return x from deeptime.decomposition import VAMP model = VAMP(epsilon=epsilon, observable_transform=transformation).fit( (data, data_lagged)).fetch_model() return model.score(r=r, dim=dim, epsilon=epsilon)
def test_vamp_consistency(): trajectory = ellipsoids(seed=13).observations(10000, n_dim=50) cov_estimator = VAMP.covariance_estimator(lagtime=1) cov_estimator.compute_ctt = False cov_estimator.reversible = True cov_estimator.fit(trajectory) koopman1 = VAMP(dim=2).fit(cov_estimator).fetch_model() koopman2 = TICA(dim=2, scaling=None, lagtime=1).fit(trajectory).fetch_model() np.testing.assert_array_almost_equal(koopman1.singular_values, koopman2.singular_values, decimal=1) np.testing.assert_array_almost_equal( np.abs(koopman1.singular_vectors_left), np.abs(koopman2.singular_vectors_left), decimal=2) np.testing.assert_array_almost_equal( np.abs(koopman1.singular_vectors_right), np.abs(koopman2.singular_vectors_right), decimal=2) np.testing.assert_array_almost_equal(koopman1.timescales(), koopman2.timescales(), decimal=2)
np.max(feature_trajectory[:, 0]), 4), np.linspace(np.min(feature_trajectory[:, 1]), np.max(feature_trajectory[:, 1]), 4)) ax.scatter(*feature_trajectory.T, marker='.') ax.quiver(x, y, dxy[0], dxy[1]) ax.set_title(title) ax.set_aspect('equal') ax.set_xlabel('x') ax.set_ylabel('y') data = ellipsoids(seed=17) discrete_trajectory = data.discrete_trajectory(n_steps=1000) feature_trajectory = data.map_discrete_to_observations(discrete_trajectory) vamp = VAMP(dim=1, lagtime=1) vamp = vamp.fit(feature_trajectory).fetch_model() vamp_projection = vamp.transform(feature_trajectory) dxy_vamp = vamp.singular_vectors_left[:, 0] # dominant vamp component tica = TICA(dim=1, lagtime=1) tica = tica.fit(feature_trajectory).fetch_model() tica_projection = tica.transform(feature_trajectory) dxy_tica = tica.singular_vectors_left[:, 0] # dominant tica component pca = PCA(n_components=1) pca.fit(feature_trajectory) pca_projection = pca.transform(feature_trajectory) dxy_pca = pca.components_[0] # dominant pca component f = plt.figure(constrained_layout=False, figsize=(14, 14))
def test_1D_data(self): x = np.random.randn(10, 1) vamp = VAMP(lagtime=1).fit([x]).fetch_model() # Doing VAMP with 1-D data is just centering and normalizing the data. assert_allclose_ignore_phase(vamp.backward(x, propagate=False), (x - np.mean(x[1:, 0])) / np.std(x[1:, 0]))
def do_test(self, dim, rank, test_partial_fit=False): # setup N_frames = [123, 456, 789] N_trajs = len(N_frames) A = random_matrix(dim, rank) trajs = [] mean = np.random.randn(dim) for i in range(N_trajs): # set up data white = np.random.randn(N_frames[i], dim) brown = np.cumsum(white, axis=0) correlated = np.dot(brown, A) trajs.append(correlated + mean) # test tau = 50 vamp = VAMP(scaling=None, lagtime=tau).fit(trajs).fetch_model() assert vamp.output_dimension <= rank atol = np.finfo(np.float32).eps * 10.0 rtol = np.finfo(np.float32).resolution phi_trajs = [vamp.backward(X, propagate=False)[tau:, :] for X in trajs] phi = np.concatenate(phi_trajs) mean_right = phi.sum(axis=0) / phi.shape[0] cov_right = phi.T.dot(phi) / phi.shape[0] np.testing.assert_allclose(mean_right, 0.0, rtol=rtol, atol=atol) np.testing.assert_allclose(cov_right, np.eye(vamp.output_dimension), rtol=rtol, atol=atol) vamp.right = False # vamp = estimate_vamp(trajs, lag=tau, scaling=None, right=False) psi_trajs = [vamp.forward(X, propagate=False)[0:-tau, :] for X in trajs] psi = np.concatenate(psi_trajs) mean_left = psi.sum(axis=0) / psi.shape[0] cov_left = psi.T.dot(psi) / psi.shape[0] np.testing.assert_allclose(mean_left, 0.0, rtol=rtol, atol=atol) np.testing.assert_allclose(cov_left, np.eye(vamp.output_dimension), rtol=rtol, atol=atol) # compute correlation between left and right assert phi.shape[0] == psi.shape[0] C01_psi_phi = psi.T.dot(phi) / phi.shape[0] n = max(C01_psi_phi.shape) C01_psi_phi = C01_psi_phi[0:n, :][:, 0:n] np.testing.assert_allclose(C01_psi_phi, np.diag(vamp.singular_values[0:vamp.output_dimension]), rtol=rtol, atol=atol) if test_partial_fit: vamp2 = VAMP(lagtime=tau).fit(trajs).fetch_model() atol = 1e-14 rtol = 1e-5 np.testing.assert_allclose(vamp.singular_values, vamp2.singular_values) np.testing.assert_allclose(vamp.mean_0, vamp2.mean_0, atol=atol, rtol=rtol) np.testing.assert_allclose(vamp.mean_t, vamp2.mean_t, atol=atol, rtol=rtol) np.testing.assert_allclose(vamp.cov_00, vamp2.cov_00, atol=atol, rtol=rtol) np.testing.assert_allclose(vamp.cov_0t, vamp2.cov_0t, atol=atol, rtol=rtol) np.testing.assert_allclose(vamp.cov_tt, vamp2.cov_tt, atol=atol, rtol=rtol) np.testing.assert_allclose(vamp.epsilon, vamp2.epsilon, atol=atol, rtol=rtol) np.testing.assert_allclose(vamp.output_dimension, vamp2.output_dimension, atol=atol, rtol=rtol) np.testing.assert_equal(vamp.scaling, vamp2.scaling) assert_allclose_ignore_phase(vamp.singular_vectors_left, vamp2.singular_vectors_left, atol=atol) assert_allclose_ignore_phase(vamp.singular_vectors_right, vamp2.singular_vectors_right, atol=rtol) # vamp2.singular_values # trigger diagonalization for t, ref in zip(trajs, phi_trajs): assert_allclose_ignore_phase(vamp2.backward(t[tau:], propagate=False), ref, rtol=rtol, atol=atol) for t, ref in zip(trajs, psi_trajs): assert_allclose_ignore_phase(vamp2.transform(t[0:-tau], propagate=False), ref, rtol=rtol, atol=atol)
import numpy as np from deeptime.data import position_based_fluids from deeptime.decomposition import VAMP pbf_simulator = position_based_fluids(n_burn_in=500, n_jobs=8) trajectory = pbf_simulator.simulate_oscillatory_force(n_oscillations=3, n_steps=400) n_grid_x = 20 n_grid_y = 10 kde_trajectory = pbf_simulator.transform_to_density(trajectory, n_grid_x=n_grid_x, n_grid_y=n_grid_y, n_jobs=8) tau = 100 model = VAMP(lagtime=100).fit(kde_trajectory).fetch_model() projection_left = model.forward(kde_trajectory, propagate=False) projection_right = model.backward(kde_trajectory, propagate=False) f, ax = plt.subplots(1, 1, figsize=(5, 5)) start = 400 stop = len(kde_trajectory) - tau # 5000 left = projection_left[:-tau][start:stop, 0] right = projection_right[tau:][start:stop, 0] lw = 4 ax.plot(np.arange(start, stop), left, label="left", linewidth=lw) ax.plot(np.arange(start, stop)[::50], right[::50], '--', label="right", linewidth=3,