def test_score_cv(dim, random_state, n_jobs): random_state = None if not random_state else np.random.RandomState(53) data = [np.random.uniform(size=(100, 3)) for _ in range(25)] estimator = VAMP(lagtime=5, dim=1) vamp_score_cv(estimator, data, lagtime=20, random_state=random_state, n_jobs=n_jobs)
def test_score_cv(five_state_msm, reversible, sparse): msm = OOMReweightedMSM(lagtime=5, reversible=reversible, sparse=sparse) s1 = vamp_score_cv(msm, trajs=five_state_msm.dtrajs[:500], lagtime=5, n=2, r=1, dim=2, blocksplit=False).mean() np.testing.assert_(1.0 <= s1 <= 2.0) s2 = vamp_score_cv(msm, trajs=five_state_msm.dtrajs[:500], lagtime=5, n=2, r=2, dim=2, blocksplit=False).mean() np.testing.assert_(1.0 <= s2 <= 2.0)
def get_vamp_vs_k(n_clustercenters, data): import logging import deeptime.markov as markov from deeptime.decomposition import vamp_score_cv from deeptime.util import confidence_interval from tqdm.autonotebook import tqdm loggers = [ logging.getLogger(name) for name in logging.root.manager.loggerDict ] for logger in loggers: logger.setLevel(logging.ERROR) n_iter = 5 scores = np.zeros((len(n_clustercenters), n_iter)) for n, k in tqdm(enumerate(n_clustercenters), total=len(n_clustercenters), desc="Loop over k:"): for m in tqdm(range(n_iter), desc="Loop over iterations:", leave=False): _cl = k_means_cluster(data, k, stride=10, max_iter=50, n_proc=8) estimator = markov.msm.MaximumLikelihoodMSM( reversible=True, stationary_distribution_constraint=None, lagtime=1, ) counts = (markov.TransitionCountEstimator( lagtime=1, count_mode="sample").fit(_cl).fetch_model()) _msm = estimator.fit(counts) # return _msm, _cl # exit scores[n, m] = vamp_score_cv(_msm, trajs=[c for c in _cl], n=1, lagtime=1, dim=min(10, k))[0] # Plotting fig, ax = plt.subplots(1, 1) lower, upper = confidence_interval(scores.T.tolist(), conf=0.9) ax.fill_between(n_clustercenters, lower, upper, alpha=0.3) ax.plot(n_clustercenters, np.mean(scores, axis=1), "-o") ax.semilogx() ax.set_xlabel("number of cluster centers") ax.set_ylabel("VAMP-2 score") fig.tight_layout() return fig, ax
def test_score_cv(double_well_msm_all): scenario, est, msm = double_well_msm_all est.lagtime = 10 def fit_fetch(dtrajs): count_model = TransitionCountEstimator(lagtime=10, count_mode="sliding", n_states=85).fit(dtrajs) \ .fetch_model().submodel_largest() return est.fit(count_model).fetch_model() s1 = vamp_score_cv(fit_fetch, trajs=scenario.dtraj, lagtime=10, n=5, r=1, dim=2, n_jobs=1).mean() assert 1.0 <= s1 <= 2.0 s2 = vamp_score_cv(fit_fetch, trajs=scenario.dtraj, lagtime=10, n=5, r=2, dim=2, n_jobs=1).mean() assert 1.0 <= s2 <= 2.0 se = vamp_score_cv(fit_fetch, trajs=scenario.dtraj, lagtime=10, n=5, r="E", dim=2, n_jobs=1).mean() se_inf = vamp_score_cv(fit_fetch, trajs=scenario.dtraj, lagtime=10, n=5, r="E", dim=None, n_jobs=1).mean()
def test_score_cv(double_well_msm_all, n_jobs): scenario, est, msm = double_well_msm_all est.lagtime = 10 fit_fetch = FF(est) with assert_raises(ValueError): vamp_score_cv(fit_fetch, trajs=scenario.dtraj, lagtime=10, n=5, r=1, dim=2, n_jobs=1, splitting_mode="noop") with assert_raises(ValueError): vamp_score_cv(fit_fetch, trajs=scenario.dtraj) # uses blocksplit but no lagtime s1 = vamp_score_cv(fit_fetch, trajs=scenario.dtraj, lagtime=10, n=5, r=1, dim=2, n_jobs=n_jobs).mean() assert 1.0 <= s1 <= 2.0 s2 = vamp_score_cv(fit_fetch, trajs=scenario.dtraj, lagtime=10, n=5, r=2, dim=2, n_jobs=n_jobs).mean() assert 1.0 <= s2 <= 2.0 se = vamp_score_cv(fit_fetch, trajs=scenario.dtraj, lagtime=10, n=5, r="E", dim=2, n_jobs=n_jobs).mean() se_inf = vamp_score_cv(fit_fetch, trajs=scenario.dtraj, lagtime=10, n=5, r="E", dim=None, n_jobs=n_jobs).mean()