Exemplo n.º 1
0
def test_alanine_dipeptide_mstep():
    import pdb, traceback, sys
    warnings.filterwarnings("ignore", category=DeprecationWarning)
    try:
        b = fetch_alanine_dipeptide()
        trajs = b.trajectories
        # While debugging, restrict to first trajectory only
        trajs = [trajs[0]]
        n_seq = len(trajs)
        n_frames = trajs[0].n_frames
        n_atoms = trajs[0].n_atoms
        n_features = n_atoms * 3

        data_home = get_data_home()
        data_dir = join(data_home, TARGET_DIRECTORY_ALANINE)
        top = md.load(join(data_dir, 'ala2.pdb'))
        n_components = 2
        # Superpose m
        data = []
        for traj in trajs:
            traj.superpose(top)
            Z = traj.xyz
            Z = np.reshape(Z, (n_frames,n_features), order='F')
            data.append(Z)

        # Fit reference model and initial MSLDS model
        print "Starting Gaussian Model Fit"
        refmodel = GaussianHMM(n_components=n_components,
                            covariance_type='full').fit(data)
        print "Done with Gaussian Model Fit"

        # Obtain sufficient statistics from refmodel
        rlogprob, rstats = reference_estep(refmodel, data)
        means = refmodel.means_
        covars = refmodel.covars_
        transmat = refmodel.transmat_
        populations = refmodel.startprob_
        As = []
        for i in range(n_components):
            As.append(np.zeros((n_features, n_features)))
        Qs = refmodel.covars_
        bs = refmodel.means_
        means = refmodel.means_
        covars = refmodel.covars_

        # Test AQB solver for MSLDS
        solver = MetastableSwitchingLDSSolver(n_components, n_features)
        solver.do_mstep(As, Qs, bs, means, covars, rstats, N_iter=100)
    except:
        type, value, tb = sys.exc_info()
        traceback.print_exc()
        pdb.post_mortem(tb)
Exemplo n.º 2
0
def test_alanine_dipeptide_stats():
    import pdb, traceback, sys
    warnings.filterwarnings("ignore", category=DeprecationWarning)
    try:
        b = fetch_alanine_dipeptide()
        trajs = b.trajectories
        # While debugging, restrict to first trajectory only
        trajs = [trajs[0]]
        n_seq = len(trajs)
        n_frames = trajs[0].n_frames
        n_atoms = trajs[0].n_atoms
        n_features = n_atoms * 3

        data_home = get_data_home()
        data_dir = join(data_home, TARGET_DIRECTORY_ALANINE)
        top = md.load(join(data_dir, 'ala2.pdb'))
        n_components = 2
        # Superpose m
        data = []
        for traj in trajs:
            traj.superpose(top)
            Z = traj.xyz
            Z = np.reshape(Z, (n_frames, n_features), order='F')
            data.append(Z)

        n_hotstart = 3
        # Fit reference model and initial MSLDS model
        refmodel = GaussianHMM(n_components=n_components,
                            covariance_type='full').fit(data)
        rlogprob, rstats = reference_estep(refmodel, data)

        model = MetastableSwitchingLDS(n_components, n_features,
                n_hotstart=n_hotstart)
        model.inferrer._sequences = data
        model.means_ = refmodel.means_
        model.covars_ = refmodel.covars_
        model.transmat_ = refmodel.transmat_
        model.populations_ = refmodel.startprob_
        As = []
        for i in range(n_components):
            As.append(np.zeros((n_features, n_features)))
        model.As_ = As
        Qs = []
        eps = 1e-7
        for i in range(n_components):
            Q = refmodel.covars_[i] + eps*np.eye(n_features)
            Qs.append(Q)
        model.Qs_ = Qs
        model.bs_ = refmodel.means_
        logprob, stats = model.inferrer.do_estep()

        yield lambda: np.testing.assert_array_almost_equal(stats['post'],
                rstats['post'], decimal=2)
        yield lambda: np.testing.assert_array_almost_equal(stats['post[1:]'],
                rstats['post[1:]'], decimal=2)
        yield lambda: np.testing.assert_array_almost_equal(stats['post[:-1]'],
                rstats['post[:-1]'], decimal=2)
        yield lambda: np.testing.assert_array_almost_equal(stats['obs'],
                rstats['obs'], decimal=1)
        yield lambda: np.testing.assert_array_almost_equal(stats['obs[1:]'],
                rstats['obs[1:]'], decimal=1)
        yield lambda: np.testing.assert_array_almost_equal(stats['obs[:-1]'],
                rstats['obs[:-1]'], decimal=1)
        yield lambda: np.testing.assert_array_almost_equal(stats['obs*obs.T'],
                rstats['obs*obs.T'], decimal=1)
        yield lambda: np.testing.assert_array_almost_equal(
                stats['obs*obs[t-1].T'], rstats['obs*obs[t-1].T'], decimal=1)
        yield lambda: np.testing.assert_array_almost_equal(
                stats['obs[1:]*obs[1:].T'], rstats['obs[1:]*obs[1:].T'],
                decimal=1)
        yield lambda: np.testing.assert_array_almost_equal(
                stats['obs[:-1]*obs[:-1].T'], rstats['obs[:-1]*obs[:-1].T'],
                decimal=1)
        # This test fails consistently. TODO: Figure out why.
        #yield lambda: np.testing.assert_array_almost_equal(
        #        stats['trans'], rstats['trans'], decimal=2)

    except:
        type, value, tb = sys.exc_info()
        traceback.print_exc()
        pdb.post_mortem(tb)
Exemplo n.º 3
0
def test_alanine_dipeptide():
    import pdb, traceback, sys
    warnings.filterwarnings("ignore", 
                    category=DeprecationWarning)
    try:
        b = fetch_alanine_dipeptide()
        trajs = b.trajectories
        n_seq = len(trajs)
        n_frames = trajs[0].n_frames
        n_atoms = trajs[0].n_atoms
        n_features = n_atoms * 3
        sim_T = 1000
        data_home = get_data_home()
        data_dir = join(data_home, TARGET_DIRECTORY_ALANINE)
        top = md.load(join(data_dir, 'ala2.pdb'))
        n_components = 2
        # Superpose m
        data = []
        for traj in trajs:
            traj.superpose(top)
            Z = traj.xyz
            Z = np.reshape(Z, (len(Z), n_features), order='F')
            data.append(Z)

        # Fit MSLDS model 
        n_experiments = 1
        n_em_iter = 1
        tol = 1e-1
        model = MetastableSwitchingLDS(n_components, 
            n_features, n_experiments=n_experiments, 
            n_em_iter=n_em_iter) 
        model.fit(data, gamma=.1, tol=tol, verbose=True)
        mslds_score = model.score(data)
        print("MSLDS Log-Likelihood = %f" %  mslds_score)

        # Fit Gaussian HMM for comparison
        g = GaussianFusionHMM(n_components, n_features)
        g.fit(data)
        hmm_score = g.score(data)
        print("HMM Log-Likelihood = %f" %  hmm_score)
        print()

        # Generate a trajectory from learned model.
        sample_traj, hidden_states = model.sample(sim_T)
        states = []
        for k in range(n_components):
            states.append([])

        # Presort the data into the metastable wells
        for k in range(n_components):
            for i in range(len(trajs)):
                traj = trajs[i]
                Z = traj.xyz
                Z = np.reshape(Z, (len(Z), n_features), order='F')
                logprob = log_multivariate_normal_density(Z,
                    np.array(model.means_),
                    np.array(model.covars_), covariance_type='full')
                assignments = np.argmax(logprob, axis=1)
                #probs = np.max(logprob, axis=1)
                # pick structures that have highest log probability in state
                s = traj[assignments == k]
                states[k].append(s)

        # Pick frame from original trajectories closest to current sample
        gen_traj = None
        for t in range(sim_T):
            h = hidden_states[t]
            best_logprob = -np.inf
            best_frame = None
            for i in range(len(trajs)):
                if t > 0:
                    states[h][i].superpose(gen_traj, t-1)
                Z = states[h][i].xyz
                Z = np.reshape(Z, (len(Z), n_features), order='F')
                mean = sample_traj[t]
                logprobs = log_multivariate_normal_density(Z,
                    mean, model.Qs_[h], covariance_type='full')
                ind = np.argmax(logprobs, axis=0)
                logprob = logprobs[ind]
                if logprob > best_log_prob:
                    logprob = best_logprob
                    best_frame = states[h][i][ind]
            if t == 0:
                gen_traj = best_frame
            else:
                gen_traj = gen_traj.join(frame)
        gen_traj.save('%s.xtc' % self.out)
        gen_traj[0].save('%s.xtc.pdb' % self.out)
    except:
        type, value, tb = sys.exc_info()
        traceback.print_exc()
        pdb.post_mortem(tb)