def test_score_vs_MSM(self): from pyemma.util.contexts import numpy_random_seed with numpy_random_seed(32): trajs_test, trajs_train = cvsplit_trajs(self.trajs) with numpy_random_seed(32): dtrajs_test, dtrajs_train = cvsplit_trajs(self.dtrajs) methods = ('VAMP1', 'VAMP2', 'VAMPE') for m in methods: msm_train = estimate_markov_model(dtrajs=dtrajs_train, lag=self.lag, reversible=False) score_msm = msm_train.score(dtrajs_test, score_method=m, score_k=None) vamp_train = pyemma_api_vamp(data=trajs_train, lag=self.lag, dim=1.0) score_vamp = vamp_train.score(test_data=trajs_test, score_method=m) self.assertAlmostEqual(score_msm, score_vamp, places=2 if m == 'VAMPE' else 3, msg=m)
def setUpClass(cls): with numpy_random_seed(52): c = super(TestFeatureReaderAndTICAProjection, cls).setUpClass() cls.dim = 99 # dimension (must be divisible by 3) N = 5000 # length of single trajectory # 500000 # 50000 N_trajs = 10 # number of trajectories A = random_invertible(cls.dim) # mixing matrix # tica will approximate its inverse with the projection matrix mean = np.random.randn(cls.dim) # create topology file cls.temppdb = tempfile.mktemp('.pdb') with open(cls.temppdb, 'w') as f: for i in range(cls.dim // 3): print(('ATOM %5d C ACE A 1 28.490 31.600 33.379 0.00 1.00' % i), file=f) t = np.arange(0, N) cls.trajnames = [] # list of xtc file names for i in range(N_trajs): # set up data white = np.random.randn(N, cls.dim) brown = np.cumsum(white, axis=0) correlated = np.dot(brown, A) data = correlated + mean xyz = data.reshape((N, cls.dim // 3, 3)) # create trajectory file traj = mdtraj.load(cls.temppdb) traj.xyz = xyz traj.time = t tempfname = tempfile.mktemp('.xtc') traj.save(tempfname) cls.trajnames.append(tempfname)
def setUpClass(cls): from pyemma.util.contexts import numpy_random_seed with numpy_random_seed(32): # three gaussians X = [np.random.randn(1000)-2.0, np.random.randn(1000), np.random.randn(1000)+2.0] cls.X = np.hstack(X)
def test_ml_msm_sparse(self): from pyemma.util.contexts import numpy_random_seed with numpy_random_seed(42): msm = pyemma.msm.estimate_markov_model( [np.random.randint(0, 1000, size=10000)], sparse=True, lag=1) assert msm.sparse msm.save(self.f) restored = load(self.f) assert restored.sparse
def test(self): # make it deterministic with numpy_random_seed(0): data = np.random.randn(100, 10) tica_obj = api.tica(data=data, lag=10, dim=1) Y = tica_obj._transform_array(data) # right shape assert types.is_float_matrix(Y) assert Y.shape[0] == 100 assert Y.shape[1] == 1, Y.shape[1]
def setUpClass(cls): with numpy_random_seed(123): import msmtools.generation as msmgen # generate HMM with two Gaussians cls.P = np.array([[0.99, 0.01], [0.01, 0.99]]) cls.T = 40000 means = [np.array([-1, 1]), np.array([1, -1])] widths = [np.array([0.3, 2]), np.array([0.3, 2])] # continuous trajectory cls.X = np.zeros((cls.T, 2)) # hidden trajectory dtraj = msmgen.generate_traj(cls.P, cls.T) for t in range(cls.T): s = dtraj[t] cls.X[t, 0] = widths[s][0] * np.random.randn() + means[s][0] cls.X[t, 1] = widths[s][1] * np.random.randn() + means[s][1] # Set the lag time: cls.lag = 10 # Compute mean free data: mref = (np.sum(cls.X[:-cls.lag, :], axis=0) + np.sum( cls.X[cls.lag:, :], axis=0)) / float(2 * (cls.T - cls.lag)) mref_nr = np.sum(cls.X[:-cls.lag, :], axis=0) / float(cls.T - cls.lag) cls.X_mf = cls.X - mref[None, :] cls.X_mf_nr = cls.X - mref_nr[None, :] # Compute correlation matrices: cls.cov_ref = (np.dot(cls.X_mf[:-cls.lag, :].T, cls.X_mf[:-cls.lag, :]) +\ np.dot(cls.X_mf[cls.lag:, :].T, cls.X_mf[cls.lag:, :])) / float(2*(cls.T-cls.lag)) cls.cov_ref_nr = np.dot( cls.X_mf_nr[:-cls.lag, :].T, cls.X_mf_nr[:-cls.lag, :]) / float(cls.T - cls.lag) cls.cov_tau_ref = (np.dot(cls.X_mf[:-cls.lag, :].T, cls.X_mf[cls.lag:, :]) +\ np.dot(cls.X_mf[cls.lag:, :].T, cls.X_mf[:-cls.lag, :])) / float(2*(cls.T-cls.lag)) cls.cov_tau_ref_nr = np.dot( cls.X_mf_nr[:-cls.lag, :].T, cls.X_mf_nr[cls.lag:, :]) / float(cls.T - cls.lag) # do unscaled TICA reader = api.source(cls.X, chunksize=0) cls.tica_obj = api.tica(data=reader, lag=cls.lag, dim=1, kinetic_map=False) # non-reversible TICA cls.tica_obj_nr = api.tica(data=reader, lag=cls.lag, dim=1, kinetic_map=False, reversible=False)
def setUp(self): self.eps = 1e-10 path = pkg_resources.resource_filename(__name__, 'data') + os.path.sep self.pdbfile = os.path.join(path, 'bpti_ca.pdb') self.trajfiles = [ os.path.join(path, 'bpti_001-033.xtc'), os.path.join(path, 'bpti_034-066.xtc'), os.path.join(path, 'bpti_067-100.xtc') ] # Create random sets of files and frames to be retrieved from trajfiles n_members_set1 = 10 n_members_set2 = 20 from pyemma.util.contexts import numpy_random_seed with numpy_random_seed(34): set_1 = np.vstack((np.random.permutation( [0, 2] * n_members_set1)[:n_members_set1], np.random.randint(32, size=n_members_set1))).T set_2 = np.vstack((np.random.permutation( [0, 2] * n_members_set2)[:n_members_set2], np.random.randint(32, size=n_members_set2))).T self.sets = [set_1, set_2] self.subdir = tempfile.mkdtemp(suffix='save_trajs_test/') self.outfile = os.path.join(self.subdir, 'save_traj_test.xtc') # Instantiate the reader self.reader = coor.source(self.trajfiles, top=self.pdbfile) self.reader.chunksize = 30 self.n_pass_files = [ self.subdir + 'n_pass.set_%06u.xtc' % ii for ii in range(len(self.sets)) ] self.one_pass_files = [ self.subdir + '1_pass.set_%06u.xtc' % ii for ii in range(len(self.sets)) ] self.traj_ref = save_traj_w_md_load_frame(self.reader, self.sets) self.strides = [2, 3, 5]
def test_3gaussian_1d_singletraj(self): # generate 1D data from three gaussians from pyemma.util.contexts import numpy_random_seed with numpy_random_seed(42): X = [np.random.randn(200)-2.0, np.random.randn(200), np.random.randn(200)+2.0] X = np.hstack(X) k = 50 from pyemma._base.estimator import param_grid grid = param_grid({'init_strategy': ['uniform', 'kmeans++'], 'fixed_seed': [True, 463498]}) for param in grid: init_strategy = param['init_strategy'] fixed_seed = param['fixed_seed'] kmeans = cluster_kmeans(X, k=k, init_strategy=init_strategy, fixed_seed=fixed_seed, n_jobs=1) cc = kmeans.clustercenters self.assertTrue(np.all(np.isfinite(cc)), "cluster centers borked for strat %s" % init_strategy) assert (np.any(cc < 1.0)), "failed for init_strategy=%s" % init_strategy assert (np.any((cc > -1.0) * (cc < 1.0))), "failed for init_strategy=%s" % init_strategy assert (np.any(cc > -1.0)), "failed for init_strategy=%s" % init_strategy km1 = cluster_kmeans(X, k=k, init_strategy=init_strategy, fixed_seed=fixed_seed, n_jobs=1) km2 = cluster_kmeans(X, k=k, init_strategy=init_strategy, fixed_seed=fixed_seed, n_jobs=1) self.assertEqual(len(km1.clustercenters), k) self.assertEqual(len(km2.clustercenters), k) self.assertEqual(km1.fixed_seed, km2.fixed_seed) # check initial centers (after kmeans++, uniform init) are equal. np.testing.assert_equal(km1.initial_centers_, km2.initial_centers_) while not km1.converged: km1.estimate(X=X, clustercenters=km1.clustercenters, keep_data=True) while not km2.converged: km2.estimate(X=X, clustercenters=km2.clustercenters, keep_data=True) assert np.linalg.norm(km1.clustercenters - km1.initial_centers_) > 0 np.testing.assert_allclose(km1.clustercenters, km2.clustercenters, err_msg="should yield same centers with fixed seed=%s for strategy %s, Initial centers=%s" % (fixed_seed, init_strategy, km2.initial_centers_), atol=1e-6)
def setUpClass(cls): with numpy_random_seed(123): import msmtools.generation as msmgen # generate HMM with two Gaussians cls.P = np.array([[0.99, 0.01], [0.01, 0.99]]) cls.T = 40000 means = [np.array([-1, 1]), np.array([1, -1])] widths = [np.array([0.3, 2]), np.array([0.3, 2])] # continuous trajectory cls.X = np.zeros((cls.T, 2)) # hidden trajectory dtraj = msmgen.generate_traj(cls.P, cls.T) for t in range(cls.T): s = dtraj[t] cls.X[t, 0] = widths[s][0] * np.random.randn() + means[s][0] cls.X[t, 1] = widths[s][1] * np.random.randn() + means[s][1] cls.lag = 10 # do unscaled TICA reader=api.source(cls.X, chunk_size=0) cls.tica_obj = api.tica(data=reader, lag=cls.lag, dim=1, kinetic_map=False)