def test_2state_nonrev_step(self): obs = np.array([0, 0, 0, 0, 0, 1, 1, 1, 1], dtype=int) mle = bhmm.estimate_hmm([obs], nstates=2, lag=1) sampled = bhmm.bayesian_hmm([obs], mle, reversible=False, nsample=2000, p0_prior='mixed', transition_matrix_prior='mixed') assert np.all(sampled.transition_matrix_std[0] > 0) assert np.max(np.abs(sampled.transition_matrix_std[1])) < 1e-3
def setUpClass(cls): # load observations testfile = abspath(join(abspath(__file__), pardir)) testfile = join(testfile, 'data') testfile = join(testfile, '2well_traj_100K.dat') obs = np.loadtxt(testfile, dtype=int) # don't print bhmm.config.verbose = False # hidden states nstates = 2 # run with lag 1 and 10 cls.hmm_lag1 = bhmm.estimate_hmm([obs], nstates, lag=1, type='discrete') cls.hmm_lag10 = bhmm.estimate_hmm([obs], nstates, lag=10, type='discrete')
def test_disconnected_2state(self): dtrajs = [[ 4, 2, 0, 3, 4, 0, 1, 3, 0, 0, 3, 1, 0, 0, 1, 0, 2, 3, 2, 1, 1, 1, 2, 4, 0, 4, 1, 3, 1, 2, 2, 2, 3, 4, 2, 0, 1, 4, 4, 3, 3, 4, 3, 2, 2, 2, 2, 4, 0, 4, 2, 4, 4, 3, 3, 0, 4, 4, 3, 2, 0, 1, 1, 3, 3, 3, 0, 1, 2, 2, 4, 2, 1, 1, 4, 0, 3, 4, 1, 2, 4, 0, 1, 4, 2, 1, 4, 0, 4, 2, 3, 0, 2, 1, 0, 3, 0, 1, 3, 4 ], [ 7, 9, 7, 8, 10, 6, 8, 7, 10, 9, 8, 7, 8, 6, 10, 6, 10, 8, 9, 6, 8, 9, 10, 7, 6, 10, 6, 9, 6, 7, 7, 9, 10, 6, 6, 6, 7, 7, 8, 10, 7, 10, 8, 7, 6, 10, 8, 10, 9, 6, 6, 8, 6, 8, 10, 10, 7, 9, 8, 7, 10, 6, 8, 6, 8, 9, 6, 6, 7, 7, 8, 6, 7, 10, 8, 10, 8, 10, 6, 6, 10, 10, 8, 9, 10, 10, 9, 8, 9, 8, 10, 7, 7, 9, 7, 10, 8, 9, 8, 10 ]] with self.assertRaises(ValueError): bhmm.estimate_hmm(dtrajs, 2, lag=5, output='discrete')
def test_1state(self): obs = np.array([0, 0, 0, 0, 0], dtype=int) hmm = bhmm.estimate_hmm([obs], nstates=1, lag=1, accuracy=1e-6) p0_ref = np.array([1.0]) A_ref = np.array([[1.0]]) B_ref = np.array([[1.0]]) assert np.allclose(hmm.initial_distribution, p0_ref) assert np.allclose(hmm.transition_matrix, A_ref) assert np.allclose(hmm.output_model.output_probabilities, B_ref)
def setUpClass(cls): # load observations testfile = abspath(join(abspath(__file__), pardir)) testfile = join(testfile, 'data') testfile = join(testfile, '2well_traj_100K.dat') obs = np.loadtxt(testfile, dtype=int) # don't print bhmm.config.verbose = False # hidden states nstates = 2 # run with lag 1 and 10 cls.hmm_lag1 = bhmm.estimate_hmm([obs], nstates, lag=1, output='discrete') cls.hmm_lag10 = bhmm.estimate_hmm([obs], nstates, lag=10, output='discrete')
def test_no_except(self): obs = [ np.array([0, 1, 2, 2, 2, 2, 1, 2, 2, 2, 1, 0, 0, 0, 0, 0, 0, 0], dtype=int), np.array([0, 0, 0, 0, 1, 1, 2, 2, 2, 2, 2, 2, 2, 1, 0, 0], dtype=int) ] lag = 1 nstates = 2 nsamples = 2 hmm_lag10 = bhmm.estimate_hmm(obs, nstates, lag=lag, output='discrete') # BHMM sampled_hmm_lag10 = bhmm.bayesian_hmm(obs[::lag], hmm_lag10, nsample=nsamples)
def test_2state_2step(self): obs = np.array([0, 1, 0], dtype=int) hmm = bhmm.estimate_hmm([obs], nstates=2, lag=1, accuracy=1e-6) p0_ref = np.array([1, 0]) A_ref = np.array([[0.0, 1.0], [1.0, 0.0]]) B_ref = np.array([[1, 0], [0, 1]]) perm = [1, 0] # permutation assert np.allclose(hmm.initial_distribution, p0_ref, atol=1e-5) \ or np.allclose(hmm.initial_distribution, p0_ref[perm], atol=1e-5) assert np.allclose(hmm.transition_matrix, A_ref, atol=1e-5) \ or np.allclose(hmm.transition_matrix, A_ref[np.ix_(perm, perm)], atol=1e-5) assert np.allclose(hmm.output_model.output_probabilities, B_ref, atol=1e-5) \ or np.allclose(hmm.output_model.output_probabilities, B_ref[[perm]], atol=1e-5)
def setUpClass(cls): # load observations testfile = abspath(join(abspath(__file__), pardir)) testfile = join(testfile, 'data') testfile = join(testfile, '2well_traj_100K.dat') obs = np.loadtxt(testfile, dtype=int) # don't print bhmm.config.verbose = False # hidden states cls.nstates = 2 # samples cls.nsamples = 100 # EM with lag 10 lag = 10 cls.hmm_lag10 = bhmm.estimate_hmm([obs], cls.nstates, lag=lag, output='discrete') # BHMM cls.sampled_hmm_lag10 = bhmm.bayesian_hmm([obs[::lag]], cls.hmm_lag10, nsample=cls.nsamples)
def test_2state_rev_2step(self): obs = np.array([0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 0], dtype=int) mle = bhmm.estimate_hmm([obs], nstates=2, lag=1) sampled = bhmm.bayesian_hmm([obs], mle, reversible=False, nsample=100, p0_prior='mixed', transition_matrix_prior='mixed') assert np.all(sampled.transition_matrix_std > 0)
def test_2state_rev_step(self): obs = np.array([0, 0, 0, 0, 0, 1, 1, 1, 1], dtype=int) mle = bhmm.estimate_hmm([obs], nstates=2, lag=1) # this will generate disconnected count matrices and should fail: with self.assertRaises(NotImplementedError): bhmm.bayesian_hmm([obs], mle, reversible=True, p0_prior=None, transition_matrix_prior=None)
def test_1state_fail(self): obs = np.array([0, 0, 0, 0, 0], dtype=int) with self.assertRaises(NotImplementedError): bhmm.estimate_hmm([obs], nstates=2, lag=1, accuracy=1e-6)
def _estimate(self, dtrajs): """ Parameters ---------- Return ------ hmsm : :class:`EstimatedHMSM <pyemma.msm.estimators.hmsm_estimated.EstimatedHMSM>` Estimated Hidden Markov state model """ # ensure right format dtrajs = _types.ensure_dtraj_list(dtrajs) # if no initial MSM is given, estimate it now if self.msm_init is None: # estimate with sparse=False, because we need to do PCCA which is currently not implemented for sparse # estimate with store_data=True, because we need an EstimatedMSM msm_estimator = _MSMEstimator(lag=self.lag, reversible=self.reversible, sparse=False, connectivity=self.connectivity, dt_traj=self.timestep_traj) msm_init = msm_estimator.estimate(dtrajs) else: assert isinstance(self.msm_init, _EstimatedMSM), 'msm_init must be of type EstimatedMSM' msm_init = self.msm_init self.reversible = msm_init.is_reversible # print 'Connected set: ', msm_init.active_set # generate lagged observations if self.stride == 'effective': # by default use lag as stride (=lag sampling), because we currently have no better theory for deciding # how many uncorrelated counts we can make self.stride = self.lag # if we have more than nstates timescales in our MSM, we use the next (neglected) timescale as an # estimate of the decorrelation time if msm_init.nstates > self.nstates: corrtime = int(max(1, msm_init.timescales()[self.nstates-1])) # use the smaller of these two pessimistic estimates self.stride = min(self.stride, 2*corrtime) # TODO: Here we always use the full observation state space for the estimation. dtrajs_lagged = _lag_observations(dtrajs, self.lag, stride=self.stride) # check input assert _types.is_int(self.nstates) and self.nstates > 1 and self.nstates <= msm_init.nstates, \ 'nstates must be an int in [2,msmobj.nstates]' # if hmm.nstates = msm.nstates there is no problem. Otherwise, check spectral gap if msm_init.nstates > self.nstates: timescale_ratios = msm_init.timescales()[:-1] / msm_init.timescales()[1:] if timescale_ratios[self.nstates-2] < 2.0: self.logger.warn('Requested coarse-grained model with ' + str(self.nstates) + ' metastable states at ' + 'lag=' + str(self.lag) + '.' + 'The ratio of relaxation timescales between ' + str(self.nstates) + ' and ' + str(self.nstates+1) + ' states is only ' + str(timescale_ratios[self.nstates-2]) + ' while we recommend at least 2. ' + ' It is possible that the resulting HMM is inaccurate. Handle with caution.') # set things from MSM # TODO: dtrajs_obs is set here, but not used in estimation. Estimation is alwas done with # TODO: respect to full observation (see above). This is confusing. Define how we want to do this in gen. # TODO: observable set is also not used, it is just saved. nstates_obs_full = msm_init.nstates_full if self.observe_active: nstates_obs = msm_init.nstates observable_set = msm_init.active_set dtrajs_obs = msm_init.discrete_trajectories_active else: nstates_obs = msm_init.nstates_full observable_set = np.arange(nstates_obs_full) dtrajs_obs = msm_init.discrete_trajectories_full # TODO: this is redundant with BHMM code because that code is currently not easily accessible and # TODO: we don't want to re-estimate. Should be reengineered in bhmm. # --------------------------------------------------------------------------------------- # PCCA-based coarse-graining # --------------------------------------------------------------------------------------- # pcca- to number of metastable states pcca = msm_init.pcca(self.nstates) # HMM output matrix eps = 0.01 * (1.0/nstates_obs_full) # default output probability, in order to avoid zero columns # Use PCCA distributions, but at least eps to avoid 100% assignment to any state (breaks convergence) B_conn = np.maximum(msm_init.metastable_distributions, eps) # full state space output matrix B = eps * np.ones((self.nstates, nstates_obs_full), dtype=np.float64) # expand B_conn to full state space # TODO: here we always select the active set, no matter if observe_active=True or False. B[:, msm_init.active_set] = B_conn[:, :] # TODO: at this point we will have zero observation probabilities for states that are not in the active # TODO: set. If these occur in the trajectory, that will mean zero columns in the output probabilities # TODO: and crash of forward-backward and sampling algorithms. # renormalize B to make it row-stochastic B /= B.sum(axis=1)[:, None] # coarse-grained transition matrix P_coarse = pcca.coarse_grained_transition_matrix # take care of unphysical values. First symmetrize X = np.dot(np.diag(pcca.coarse_grained_stationary_probability), P_coarse) X = 0.5*(X + X.T) # if there are values < 0, set to eps X = np.maximum(X, eps) # turn into coarse-grained transition matrix A = X / X.sum(axis=1)[:, None] # --------------------------------------------------------------------------------------- # Estimate discrete HMM # --------------------------------------------------------------------------------------- # lazy import bhmm here in order to avoid dependency loops import bhmm # initialize discrete HMM hmm_init = bhmm.discrete_hmm(A, B, stationary=True, reversible=self.reversible) # run EM hmm = bhmm.estimate_hmm(dtrajs_lagged, self.nstates, lag=1, initial_model=hmm_init, accuracy=self.accuracy, maxit=self.maxit) self.hmm = bhmm.DiscreteHMM(hmm) # find observable set transition_matrix = self.hmm.transition_matrix observation_probabilities = self.hmm.output_probabilities # TODO: Cutting down... OK, this can be done if self.observe_active: # cut down observation probabilities to active set observation_probabilities = observation_probabilities[:, msm_init.active_set] observation_probabilities /= observation_probabilities.sum(axis=1)[:,None] # renormalize # parametrize self self._dtrajs_full = dtrajs self._dtrajs_lagged = dtrajs_lagged self._observable_set = observable_set self._dtrajs_obs = dtrajs_obs self.set_model_params(P=transition_matrix, pobs=observation_probabilities, reversible=self.reversible, dt_model=self.timestep_traj.get_scaled(self.lag)) return self