def test_alanine_dipeptide_mstep(): import pdb, traceback, sys warnings.filterwarnings("ignore", category=DeprecationWarning) try: b = fetch_alanine_dipeptide() trajs = b.trajectories # While debugging, restrict to first trajectory only trajs = [trajs[0]] n_seq = len(trajs) n_frames = trajs[0].n_frames n_atoms = trajs[0].n_atoms n_features = n_atoms * 3 data_home = get_data_home() data_dir = join(data_home, TARGET_DIRECTORY_ALANINE) top = md.load(join(data_dir, 'ala2.pdb')) n_components = 2 # Superpose m data = [] for traj in trajs: traj.superpose(top) Z = traj.xyz Z = np.reshape(Z, (n_frames,n_features), order='F') data.append(Z) # Fit reference model and initial MSLDS model print "Starting Gaussian Model Fit" refmodel = GaussianHMM(n_components=n_components, covariance_type='full').fit(data) print "Done with Gaussian Model Fit" # Obtain sufficient statistics from refmodel rlogprob, rstats = reference_estep(refmodel, data) means = refmodel.means_ covars = refmodel.covars_ transmat = refmodel.transmat_ populations = refmodel.startprob_ As = [] for i in range(n_components): As.append(np.zeros((n_features, n_features))) Qs = refmodel.covars_ bs = refmodel.means_ means = refmodel.means_ covars = refmodel.covars_ # Test AQB solver for MSLDS solver = MetastableSwitchingLDSSolver(n_components, n_features) solver.do_mstep(As, Qs, bs, means, covars, rstats, N_iter=100) except: type, value, tb = sys.exc_info() traceback.print_exc() pdb.post_mortem(tb)
def test_alanine_dipeptide_stats(): import pdb, traceback, sys warnings.filterwarnings("ignore", category=DeprecationWarning) try: b = fetch_alanine_dipeptide() trajs = b.trajectories # While debugging, restrict to first trajectory only trajs = [trajs[0]] n_seq = len(trajs) n_frames = trajs[0].n_frames n_atoms = trajs[0].n_atoms n_features = n_atoms * 3 data_home = get_data_home() data_dir = join(data_home, TARGET_DIRECTORY_ALANINE) top = md.load(join(data_dir, 'ala2.pdb')) n_components = 2 # Superpose m data = [] for traj in trajs: traj.superpose(top) Z = traj.xyz Z = np.reshape(Z, (n_frames, n_features), order='F') data.append(Z) n_hotstart = 3 # Fit reference model and initial MSLDS model refmodel = GaussianHMM(n_components=n_components, covariance_type='full').fit(data) rlogprob, rstats = reference_estep(refmodel, data) model = MetastableSwitchingLDS(n_components, n_features, n_hotstart=n_hotstart) model.inferrer._sequences = data model.means_ = refmodel.means_ model.covars_ = refmodel.covars_ model.transmat_ = refmodel.transmat_ model.populations_ = refmodel.startprob_ As = [] for i in range(n_components): As.append(np.zeros((n_features, n_features))) model.As_ = As Qs = [] eps = 1e-7 for i in range(n_components): Q = refmodel.covars_[i] + eps*np.eye(n_features) Qs.append(Q) model.Qs_ = Qs model.bs_ = refmodel.means_ logprob, stats = model.inferrer.do_estep() yield lambda: np.testing.assert_array_almost_equal(stats['post'], rstats['post'], decimal=2) yield lambda: np.testing.assert_array_almost_equal(stats['post[1:]'], rstats['post[1:]'], decimal=2) yield lambda: np.testing.assert_array_almost_equal(stats['post[:-1]'], rstats['post[:-1]'], decimal=2) yield lambda: np.testing.assert_array_almost_equal(stats['obs'], rstats['obs'], decimal=1) yield lambda: np.testing.assert_array_almost_equal(stats['obs[1:]'], rstats['obs[1:]'], decimal=1) yield lambda: np.testing.assert_array_almost_equal(stats['obs[:-1]'], rstats['obs[:-1]'], decimal=1) yield lambda: np.testing.assert_array_almost_equal(stats['obs*obs.T'], rstats['obs*obs.T'], decimal=1) yield lambda: np.testing.assert_array_almost_equal( stats['obs*obs[t-1].T'], rstats['obs*obs[t-1].T'], decimal=1) yield lambda: np.testing.assert_array_almost_equal( stats['obs[1:]*obs[1:].T'], rstats['obs[1:]*obs[1:].T'], decimal=1) yield lambda: np.testing.assert_array_almost_equal( stats['obs[:-1]*obs[:-1].T'], rstats['obs[:-1]*obs[:-1].T'], decimal=1) # This test fails consistently. TODO: Figure out why. #yield lambda: np.testing.assert_array_almost_equal( # stats['trans'], rstats['trans'], decimal=2) except: type, value, tb = sys.exc_info() traceback.print_exc() pdb.post_mortem(tb)
def test_alanine_dipeptide(): import pdb, traceback, sys warnings.filterwarnings("ignore", category=DeprecationWarning) try: b = fetch_alanine_dipeptide() trajs = b.trajectories n_seq = len(trajs) n_frames = trajs[0].n_frames n_atoms = trajs[0].n_atoms n_features = n_atoms * 3 sim_T = 1000 data_home = get_data_home() data_dir = join(data_home, TARGET_DIRECTORY_ALANINE) top = md.load(join(data_dir, 'ala2.pdb')) n_components = 2 # Superpose m data = [] for traj in trajs: traj.superpose(top) Z = traj.xyz Z = np.reshape(Z, (len(Z), n_features), order='F') data.append(Z) # Fit MSLDS model n_experiments = 1 n_em_iter = 1 tol = 1e-1 model = MetastableSwitchingLDS(n_components, n_features, n_experiments=n_experiments, n_em_iter=n_em_iter) model.fit(data, gamma=.1, tol=tol, verbose=True) mslds_score = model.score(data) print("MSLDS Log-Likelihood = %f" % mslds_score) # Fit Gaussian HMM for comparison g = GaussianFusionHMM(n_components, n_features) g.fit(data) hmm_score = g.score(data) print("HMM Log-Likelihood = %f" % hmm_score) print() # Generate a trajectory from learned model. sample_traj, hidden_states = model.sample(sim_T) states = [] for k in range(n_components): states.append([]) # Presort the data into the metastable wells for k in range(n_components): for i in range(len(trajs)): traj = trajs[i] Z = traj.xyz Z = np.reshape(Z, (len(Z), n_features), order='F') logprob = log_multivariate_normal_density(Z, np.array(model.means_), np.array(model.covars_), covariance_type='full') assignments = np.argmax(logprob, axis=1) #probs = np.max(logprob, axis=1) # pick structures that have highest log probability in state s = traj[assignments == k] states[k].append(s) # Pick frame from original trajectories closest to current sample gen_traj = None for t in range(sim_T): h = hidden_states[t] best_logprob = -np.inf best_frame = None for i in range(len(trajs)): if t > 0: states[h][i].superpose(gen_traj, t-1) Z = states[h][i].xyz Z = np.reshape(Z, (len(Z), n_features), order='F') mean = sample_traj[t] logprobs = log_multivariate_normal_density(Z, mean, model.Qs_[h], covariance_type='full') ind = np.argmax(logprobs, axis=0) logprob = logprobs[ind] if logprob > best_log_prob: logprob = best_logprob best_frame = states[h][i][ind] if t == 0: gen_traj = best_frame else: gen_traj = gen_traj.join(frame) gen_traj.save('%s.xtc' % self.out) gen_traj[0].save('%s.xtc.pdb' % self.out) except: type, value, tb = sys.exc_info() traceback.print_exc() pdb.post_mortem(tb)