Пример #1
0
 def test_2state_nonrev_step(self):
     obs = np.array([0, 0, 0, 0, 0, 1, 1, 1, 1], dtype=int)
     mle = bhmm.estimate_hmm([obs], nstates=2, lag=1)
     sampled = bhmm.bayesian_hmm([obs], mle, reversible=False, nsample=2000,
                                 p0_prior='mixed', transition_matrix_prior='mixed')
     assert np.all(sampled.transition_matrix_std[0] > 0)
     assert np.max(np.abs(sampled.transition_matrix_std[1])) < 1e-3
Пример #2
0
    def setUpClass(cls):
        # load observations
        testfile = abspath(join(abspath(__file__), pardir))
        testfile = join(testfile, 'data')
        testfile = join(testfile, '2well_traj_100K.dat')
        obs = np.loadtxt(testfile, dtype=int)

        # don't print
        bhmm.config.verbose = False

        # hidden states
        nstates = 2

        # run with lag 1 and 10
        cls.hmm_lag1 = bhmm.estimate_hmm([obs], nstates, lag=1, type='discrete')
        cls.hmm_lag10 = bhmm.estimate_hmm([obs], nstates, lag=10, type='discrete')
Пример #3
0
 def test_disconnected_2state(self):
     dtrajs = [[
         4, 2, 0, 3, 4, 0, 1, 3, 0, 0, 3, 1, 0, 0, 1, 0, 2, 3, 2, 1, 1, 1,
         2, 4, 0, 4, 1, 3, 1, 2, 2, 2, 3, 4, 2, 0, 1, 4, 4, 3, 3, 4, 3, 2,
         2, 2, 2, 4, 0, 4, 2, 4, 4, 3, 3, 0, 4, 4, 3, 2, 0, 1, 1, 3, 3, 3,
         0, 1, 2, 2, 4, 2, 1, 1, 4, 0, 3, 4, 1, 2, 4, 0, 1, 4, 2, 1, 4, 0,
         4, 2, 3, 0, 2, 1, 0, 3, 0, 1, 3, 4
     ],
               [
                   7, 9, 7, 8, 10, 6, 8, 7, 10, 9, 8, 7, 8, 6, 10, 6, 10, 8,
                   9, 6, 8, 9, 10, 7, 6, 10, 6, 9, 6, 7, 7, 9, 10, 6, 6, 6,
                   7, 7, 8, 10, 7, 10, 8, 7, 6, 10, 8, 10, 9, 6, 6, 8, 6, 8,
                   10, 10, 7, 9, 8, 7, 10, 6, 8, 6, 8, 9, 6, 6, 7, 7, 8, 6,
                   7, 10, 8, 10, 8, 10, 6, 6, 10, 10, 8, 9, 10, 10, 9, 8, 9,
                   8, 10, 7, 7, 9, 7, 10, 8, 9, 8, 10
               ]]
     with self.assertRaises(ValueError):
         bhmm.estimate_hmm(dtrajs, 2, lag=5, output='discrete')
Пример #4
0
 def test_1state(self):
     obs = np.array([0, 0, 0, 0, 0], dtype=int)
     hmm = bhmm.estimate_hmm([obs], nstates=1, lag=1, accuracy=1e-6)
     p0_ref = np.array([1.0])
     A_ref = np.array([[1.0]])
     B_ref = np.array([[1.0]])
     assert np.allclose(hmm.initial_distribution, p0_ref)
     assert np.allclose(hmm.transition_matrix, A_ref)
     assert np.allclose(hmm.output_model.output_probabilities, B_ref)
Пример #5
0
    def setUpClass(cls):
        # load observations
        testfile = abspath(join(abspath(__file__), pardir))
        testfile = join(testfile, 'data')
        testfile = join(testfile, '2well_traj_100K.dat')
        obs = np.loadtxt(testfile, dtype=int)

        # don't print
        bhmm.config.verbose = False

        # hidden states
        nstates = 2

        # run with lag 1 and 10
        cls.hmm_lag1 = bhmm.estimate_hmm([obs],
                                         nstates,
                                         lag=1,
                                         output='discrete')
        cls.hmm_lag10 = bhmm.estimate_hmm([obs],
                                          nstates,
                                          lag=10,
                                          output='discrete')
Пример #6
0
 def test_no_except(self):
     obs = [
         np.array([0, 1, 2, 2, 2, 2, 1, 2, 2, 2, 1, 0, 0, 0, 0, 0, 0, 0],
                  dtype=int),
         np.array([0, 0, 0, 0, 1, 1, 2, 2, 2, 2, 2, 2, 2, 1, 0, 0],
                  dtype=int)
     ]
     lag = 1
     nstates = 2
     nsamples = 2
     hmm_lag10 = bhmm.estimate_hmm(obs, nstates, lag=lag, output='discrete')
     # BHMM
     sampled_hmm_lag10 = bhmm.bayesian_hmm(obs[::lag],
                                           hmm_lag10,
                                           nsample=nsamples)
Пример #7
0
 def test_2state_2step(self):
     obs = np.array([0, 1, 0], dtype=int)
     hmm = bhmm.estimate_hmm([obs], nstates=2, lag=1, accuracy=1e-6)
     p0_ref = np.array([1, 0])
     A_ref = np.array([[0.0, 1.0],
                       [1.0, 0.0]])
     B_ref = np.array([[1, 0],
                       [0, 1]])
     perm = [1, 0]  # permutation
     assert np.allclose(hmm.initial_distribution, p0_ref, atol=1e-5) \
            or np.allclose(hmm.initial_distribution, p0_ref[perm], atol=1e-5)
     assert np.allclose(hmm.transition_matrix, A_ref, atol=1e-5) \
            or np.allclose(hmm.transition_matrix, A_ref[np.ix_(perm, perm)], atol=1e-5)
     assert np.allclose(hmm.output_model.output_probabilities, B_ref, atol=1e-5) \
            or np.allclose(hmm.output_model.output_probabilities, B_ref[[perm]], atol=1e-5)
Пример #8
0
    def setUpClass(cls):
        # load observations
        testfile = abspath(join(abspath(__file__), pardir))
        testfile = join(testfile, 'data')
        testfile = join(testfile, '2well_traj_100K.dat')
        obs = np.loadtxt(testfile, dtype=int)

        # don't print
        bhmm.config.verbose = False
        # hidden states
        cls.nstates = 2
        # samples
        cls.nsamples = 100

        # EM with lag 10
        lag = 10
        cls.hmm_lag10 = bhmm.estimate_hmm([obs], cls.nstates, lag=lag, output='discrete')
        # BHMM
        cls.sampled_hmm_lag10 = bhmm.bayesian_hmm([obs[::lag]], cls.hmm_lag10, nsample=cls.nsamples)
Пример #9
0
 def test_2state_rev_2step(self):
     obs = np.array([0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 0], dtype=int)
     mle = bhmm.estimate_hmm([obs], nstates=2, lag=1)
     sampled = bhmm.bayesian_hmm([obs], mle, reversible=False, nsample=100,
                                 p0_prior='mixed', transition_matrix_prior='mixed')
     assert np.all(sampled.transition_matrix_std > 0)
Пример #10
0
 def test_2state_rev_step(self):
     obs = np.array([0, 0, 0, 0, 0, 1, 1, 1, 1], dtype=int)
     mle = bhmm.estimate_hmm([obs], nstates=2, lag=1)
     # this will generate disconnected count matrices and should fail:
     with self.assertRaises(NotImplementedError):
         bhmm.bayesian_hmm([obs], mle, reversible=True, p0_prior=None, transition_matrix_prior=None)
Пример #11
0
 def test_1state_fail(self):
     obs = np.array([0, 0, 0, 0, 0], dtype=int)
     with self.assertRaises(NotImplementedError):
         bhmm.estimate_hmm([obs], nstates=2, lag=1, accuracy=1e-6)
Пример #12
0
    def _estimate(self, dtrajs):
        """

        Parameters
        ----------

        Return
        ------
        hmsm : :class:`EstimatedHMSM <pyemma.msm.estimators.hmsm_estimated.EstimatedHMSM>`
            Estimated Hidden Markov state model

        """
        # ensure right format
        dtrajs = _types.ensure_dtraj_list(dtrajs)
        # if no initial MSM is given, estimate it now
        if self.msm_init is None:
            # estimate with sparse=False, because we need to do PCCA which is currently not implemented for sparse
            # estimate with store_data=True, because we need an EstimatedMSM
            msm_estimator = _MSMEstimator(lag=self.lag, reversible=self.reversible, sparse=False,
                                          connectivity=self.connectivity, dt_traj=self.timestep_traj)
            msm_init = msm_estimator.estimate(dtrajs)
        else:
            assert isinstance(self.msm_init, _EstimatedMSM), 'msm_init must be of type EstimatedMSM'
            msm_init = self.msm_init
            self.reversible = msm_init.is_reversible

        # print 'Connected set: ', msm_init.active_set

        # generate lagged observations
        if self.stride == 'effective':
            # by default use lag as stride (=lag sampling), because we currently have no better theory for deciding
            # how many uncorrelated counts we can make
            self.stride = self.lag
            # if we have more than nstates timescales in our MSM, we use the next (neglected) timescale as an
            # estimate of the decorrelation time
            if msm_init.nstates > self.nstates:
                corrtime = int(max(1, msm_init.timescales()[self.nstates-1]))
                # use the smaller of these two pessimistic estimates
                self.stride = min(self.stride, 2*corrtime)
        # TODO: Here we always use the full observation state space for the estimation.
        dtrajs_lagged = _lag_observations(dtrajs, self.lag, stride=self.stride)

        # check input
        assert _types.is_int(self.nstates) and self.nstates > 1 and self.nstates <= msm_init.nstates, \
            'nstates must be an int in [2,msmobj.nstates]'
        # if hmm.nstates = msm.nstates there is no problem. Otherwise, check spectral gap
        if msm_init.nstates > self.nstates:
            timescale_ratios = msm_init.timescales()[:-1] / msm_init.timescales()[1:]
            if timescale_ratios[self.nstates-2] < 2.0:
                self.logger.warn('Requested coarse-grained model with ' + str(self.nstates) + ' metastable states at ' +
                                 'lag=' + str(self.lag) + '.' + 'The ratio of relaxation timescales between ' +
                                 str(self.nstates) + ' and ' + str(self.nstates+1) + ' states is only ' +
                                 str(timescale_ratios[self.nstates-2]) + ' while we recommend at least 2. ' +
                                 ' It is possible that the resulting HMM is inaccurate. Handle with caution.')

        # set things from MSM
        # TODO: dtrajs_obs is set here, but not used in estimation. Estimation is alwas done with
        # TODO: respect to full observation (see above). This is confusing. Define how we want to do this in gen.
        # TODO: observable set is also not used, it is just saved.
        nstates_obs_full = msm_init.nstates_full
        if self.observe_active:
            nstates_obs = msm_init.nstates
            observable_set = msm_init.active_set
            dtrajs_obs = msm_init.discrete_trajectories_active
        else:
            nstates_obs = msm_init.nstates_full
            observable_set = np.arange(nstates_obs_full)
            dtrajs_obs = msm_init.discrete_trajectories_full

        # TODO: this is redundant with BHMM code because that code is currently not easily accessible and
        # TODO: we don't want to re-estimate. Should be reengineered in bhmm.
        # ---------------------------------------------------------------------------------------
        # PCCA-based coarse-graining
        # ---------------------------------------------------------------------------------------
        # pcca- to number of metastable states
        pcca = msm_init.pcca(self.nstates)

        # HMM output matrix
        eps = 0.01 * (1.0/nstates_obs_full)  # default output probability, in order to avoid zero columns
        # Use PCCA distributions, but at least eps to avoid 100% assignment to any state (breaks convergence)
        B_conn = np.maximum(msm_init.metastable_distributions, eps)
        # full state space output matrix
        B = eps * np.ones((self.nstates, nstates_obs_full), dtype=np.float64)
        # expand B_conn to full state space
        # TODO: here we always select the active set, no matter if observe_active=True or False.
        B[:, msm_init.active_set] = B_conn[:, :]
        # TODO: at this point we will have zero observation probabilities for states that are not in the active
        # TODO: set. If these occur in the trajectory, that will mean zero columns in the output probabilities
        # TODO: and crash of forward-backward and sampling algorithms.
        # renormalize B to make it row-stochastic
        B /= B.sum(axis=1)[:, None]

        # coarse-grained transition matrix
        P_coarse = pcca.coarse_grained_transition_matrix
        # take care of unphysical values. First symmetrize
        X = np.dot(np.diag(pcca.coarse_grained_stationary_probability), P_coarse)
        X = 0.5*(X + X.T)
        # if there are values < 0, set to eps
        X = np.maximum(X, eps)
        # turn into coarse-grained transition matrix
        A = X / X.sum(axis=1)[:, None]

        # ---------------------------------------------------------------------------------------
        # Estimate discrete HMM
        # ---------------------------------------------------------------------------------------
        # lazy import bhmm here in order to avoid dependency loops
        import bhmm
        # initialize discrete HMM
        hmm_init = bhmm.discrete_hmm(A, B, stationary=True, reversible=self.reversible)
        # run EM
        hmm = bhmm.estimate_hmm(dtrajs_lagged, self.nstates, lag=1, initial_model=hmm_init,
                                accuracy=self.accuracy, maxit=self.maxit)
        self.hmm = bhmm.DiscreteHMM(hmm)

        # find observable set
        transition_matrix = self.hmm.transition_matrix
        observation_probabilities = self.hmm.output_probabilities
        # TODO: Cutting down... OK, this can be done
        if self.observe_active:  # cut down observation probabilities to active set
            observation_probabilities = observation_probabilities[:, msm_init.active_set]
            observation_probabilities /= observation_probabilities.sum(axis=1)[:,None]  # renormalize

        # parametrize self
        self._dtrajs_full = dtrajs
        self._dtrajs_lagged = dtrajs_lagged
        self._observable_set = observable_set
        self._dtrajs_obs = dtrajs_obs
        self.set_model_params(P=transition_matrix, pobs=observation_probabilities,
                              reversible=self.reversible, dt_model=self.timestep_traj.get_scaled(self.lag))

        return self