Пример #1
0
 def test_sample_by_observation_probabilities_mapping(self):
     tmat = np.array([[0.9, .1], [.1, .9]])
     # hidden states correspond to observable states
     obs = np.eye(2)
     hmm = HiddenMarkovModel(tmat, obs)
     # dtraj halfway-split between states 0 and 1
     dtrajs = np.repeat([0, 1], 10)
     samples = hmm.sample_by_observation_probabilities(dtrajs, 10)
     # test that all trajectory indices are 0 (only 1 traj)
     np.testing.assert_array_equal(np.unique(np.concatenate(samples)[:, 0]),
                                   [0])
     # test that both hidden states map to correct parts of dtraj
     np.testing.assert_(np.all(samples[0][:, 1] < 10))
     np.testing.assert_(np.all(samples[1][:, 1] >= 10))
Пример #2
0
def random_guess(n_observation_states: int,
                 n_hidden_states: int,
                 seed: Optional[int] = None):
    r"""Initializes a :class:`HMM <deeptime.markov.hmm.HiddenMarkovModel>` with a set number of hidden and
    observable states by setting the transition matrix uniform and drawing a random row-stochastic matrix as
    output probabilities.

    Parameters
    ----------
    n_observation_states : int
        The number of states in observable space.
    n_hidden_states : int
        The number of hidden states.
    seed : int, optional, default=None
        The random seed.

    Returns
    -------
    init_hmm : HiddenMarkovModel
        A randomly initialized hidden markov state model.
    """
    state = np.random.RandomState(seed=seed)
    P = np.empty((n_hidden_states, n_hidden_states))
    P.fill(1. / n_hidden_states)
    B = state.uniform(size=(n_hidden_states, n_observation_states))
    B /= B.sum(axis=-1, keepdims=True)
    from deeptime.markov.hmm import HiddenMarkovModel
    return HiddenMarkovModel(transition_model=P, output_model=B)
Пример #3
0
 def test_sample_by_noncrisp_observation_probabilities_mapping(self):
     tmat = np.array([[0.9, .1], [.1, .9]])
     # hidden states correspond to observable states
     obs = np.array([[.9, .1], [.4, .6]])
     hmm = HiddenMarkovModel(tmat, obs)
     # dtraj halfway-split between states 0 and 1
     dtrajs = np.repeat([0, 1], 10)
     n_samples = 500000
     samples = hmm.sample_by_observation_probabilities(dtrajs, n_samples)
     # test that both hidden states map to correct distributions
     probs_hidden1 = np.histogram(dtrajs[samples[0][:, 1]],
                                  bins=2)[0] / n_samples
     probs_hidden2 = np.histogram(dtrajs[samples[1][:, 1]],
                                  bins=2)[0] / n_samples
     assert_array_almost_equal(probs_hidden1, [.9, .1], decimal=2)
     assert_array_almost_equal(probs_hidden2, [.4, .6], decimal=2)
Пример #4
0
def from_data(dtrajs, n_hidden_states, reversible):
    r""" Makes an initial guess :class:`HMM <HiddenMarkovModel>` with Gaussian output model.

    To this end, a Gaussian mixture model is estimated using `scikit-learn <https://scikit-learn.org/>`_.

    Parameters
    ----------
    dtrajs : array_like or list of array_like
        Trajectories which are used for making the initial guess.
    n_hidden_states : int
        Number of hidden states.
    reversible : bool
        Whether the hidden transition matrix is estimated so that it is reversible.

    Returns
    -------
    hmm_init : HiddenMarkovModel
        An initial guess for the HMM

    See Also
    --------
    deeptime.markov.hmm.GaussianOutputModel : The type of output model this heuristic uses.
    deeptime.markov.hmm.init.discrete.metastable_from_data
    deeptime.markov.hmm.init.discrete.metastable_from_msm
    """
    from deeptime.markov.hmm import HiddenMarkovModel, GaussianOutputModel
    from sklearn.mixture import GaussianMixture
    import deeptime.markov.tools.estimation as msmest
    import deeptime.markov.tools.analysis as msmana
    from deeptime.util.types import ensure_timeseries_data

    dtrajs = ensure_timeseries_data(dtrajs)
    collected_observations = np.concatenate(dtrajs)
    gmm = GaussianMixture(n_components=n_hidden_states)
    gmm.fit(collected_observations[:, None])
    output_model = GaussianOutputModel(n_hidden_states, means=gmm.means_[:, 0], sigmas=np.sqrt(gmm.covariances_[:, 0]))

    # Compute fractional state memberships.
    Nij = np.zeros((n_hidden_states, n_hidden_states))
    for o_t in dtrajs:
        # length of trajectory
        T = o_t.shape[0]
        # output probability
        pobs = output_model.to_state_probability_trajectory(o_t)
        # normalize
        pobs /= pobs.sum(axis=1)[:, None]
        # Accumulate fractional transition counts from this trajectory.
        for t in range(T - 1):
            Nij += np.outer(pobs[t, :], pobs[t + 1, :])

    # Compute transition matrix maximum likelihood estimate.
    transition_matrix = msmest.transition_matrix(Nij, reversible=reversible)
    initial_distribution = msmana.stationary_distribution(transition_matrix)
    return HiddenMarkovModel(transition_model=transition_matrix, output_model=output_model,
                             initial_distribution=initial_distribution)
Пример #5
0
 def test_mlmsm_pipeline(self):
     hmm = HiddenMarkovModel(transition_model=MarkovStateModel([[.8, .2],
                                                                [.1, .9]]),
                             output_model=GaussianOutputModel(
                                 n_states=2,
                                 means=[-10, 10],
                                 sigmas=[.1, .1]))
     htraj, traj = hmm.simulate(10000)
     transition_matrix = hmm.transition_model.transition_matrix
     pipeline = Pipeline(steps=[(
         'tica', TICA(dim=1, lagtime=1)
     ), (
         'cluster', KMeans(n_clusters=2, max_iter=500)
     ), ('counts',
         TransitionCountEstimator(lagtime=1, count_mode="sliding"))])
     pipeline.fit(traj[..., None])
     counts = pipeline[-1].fetch_model().submodel_largest()
     mlmsm = MaximumLikelihoodMSM().fit(counts).fetch_model()
     P = mlmsm.pcca(2).coarse_grained_transition_matrix
     mindist = min(np.linalg.norm(P - transition_matrix),
                   np.linalg.norm(P - transition_matrix.T))
     assert mindist < 0.05
Пример #6
0
 def _append_sample(self, models, prior, sample_model):
     # Save a copy of the current model.
     model_copy = deepcopy(sample_model)
     # the Viterbi path is discarded, but is needed to get a new transition matrix for each model.
     if not self.store_hidden:
         model_copy.hidden_trajs.clear()
     # potentially restrict sampled models to observed space
     # since model_copy is defined on full space, observation_symbols are also observation states
     count_model = TransitionCountModel(model_copy.counts,
                                        lagtime=prior.lagtime)
     models.append(
         HiddenMarkovModel(
             transition_model=MarkovStateModel(
                 model_copy.transition_matrix,
                 stationary_distribution=model_copy.stationary_distribution,
                 reversible=self.reversible,
                 count_model=count_model),
             output_model=model_copy.output_model,
             initial_distribution=model_copy.initial_distribution,
             hidden_state_trajectories=model_copy.hidden_trajs))
Пример #7
0
def test_gaussian_prinz():
    system = prinz_potential()
    trajs = system.trajectory(np.zeros((5, 1)), length=10000)
    # this corresponds to a GMM with the means being the correct potential landscape minima
    om = deeptime.markov.hmm.GaussianOutputModel(n_states=4,
                                                 means=system.minima,
                                                 sigmas=[0.1] * 4)
    # this is almost the right hidden transition matrix
    tmat = np.array([[9.59e-1, 0, 4.06e-2, 1 - 9.59e-1 - 4.06e-2],
                     [0, 9.79e-1, 0, 1 - 9.79e-1],
                     [2.64e-2, 0, 9.68e-1, 1 - 9.68e-1 - 2.64e-2],
                     [0, 1.67e-2, 1 - 9.74e-1 - 1.67e-2, 9.74e-1]])
    msm = MarkovStateModel(tmat)
    init_ghmm = HiddenMarkovModel(
        msm, om, initial_distribution=msm.stationary_distribution)

    ghmm = MaximumLikelihoodHMM(init_ghmm, lagtime=1).fit_fetch(trajs)
    gom = ghmm.output_model
    for minimum_ix in range(4):
        x = gom.means[minimum_ix]
        xref = system.minima[np.argmin(np.abs(system.minima - x))]
        assert_allclose(x, xref, atol=1e-1)
Пример #8
0
def metastable_from_msm(msm,
                        n_hidden_states: int,
                        reversible: bool = True,
                        stationary: bool = False,
                        separate_symbols=None,
                        regularize: bool = True):
    r""" Makes an initial guess for an :class:`HMM <deeptime.markov.hmm.HiddenMarkovModel>` with
    discrete output model from an already existing MSM over observable states. The procedure is described in
    :footcite:`noe2013projected` and uses PCCA+ :footcite:`roblitz2013fuzzy` for
    coarse-graining the transition matrix and obtaining membership assignments.

    Parameters
    ----------
    msm : MarkovStateModel
        The markov state model over observable state space.
    n_hidden_states : int
        The desired number of hidden states.
    reversible : bool, optional, default=True
        Whether the HMM transition matrix is estimated so that it is reversibe.
    stationary : bool, optional, default=False
        If True, the initial distribution of hidden states is self-consistently computed as the stationary
        distribution of the transition matrix. If False, it will be estimated from the starting states.
        Only set this to true if you're sure that the observation trajectories are initiated from a global
        equilibrium distribution.
    separate_symbols : array_like, optional, default=None
        Force the given set of observed states to stay in a separate hidden state.
        The remaining nstates-1 states will be assigned by a metastable decomposition.
    regularize : bool, optional, default=True
        If set to True, makes sure that the hidden initial distribution and transition matrix have nonzero probabilities
        by setting them to eps and then renormalizing. Avoids zeros that would cause estimation algorithms to crash or
        get stuck in suboptimal states.

    Returns
    -------
    hmm_init : HiddenMarkovModel
        An initial guess for the HMM

    See Also
    --------
    deeptime.markov.hmm.DiscreteOutputModel
        The type of output model this heuristic uses.

    :func:`metastable_from_data`
        Initial guess from data if no MSM is available yet.

    :func:`deeptime.markov.hmm.init.gaussian.from_data`
        Initial guess with :class:`Gaussian output model <deeptime.markov.hmm.GaussianOutputModel>`.

    References
    ----------
    .. footbibliography::
    """
    from deeptime.markov._transition_matrix import stationary_distribution
    from deeptime.markov._transition_matrix import estimate_P
    from deeptime.markov.msm import MarkovStateModel
    from deeptime.markov import PCCAModel

    count_matrix = msm.count_model.count_matrix
    nonseparate_symbols = np.arange(msm.count_model.n_states_full)
    nonseparate_states = msm.count_model.symbols_to_states(nonseparate_symbols)
    nonseparate_msm = msm
    if separate_symbols is not None:
        separate_symbols = np.asanyarray(separate_symbols)
        if np.max(separate_symbols) >= msm.count_model.n_states_full:
            raise ValueError(f'Separate set has indices that do not exist in '
                             f'full state space: {np.max(separate_symbols)}')
        nonseparate_symbols = np.setdiff1d(nonseparate_symbols,
                                           separate_symbols)
        nonseparate_states = msm.count_model.symbols_to_states(
            nonseparate_symbols)
        nonseparate_count_model = msm.count_model.submodel(nonseparate_states)
        # make reversible
        nonseparate_count_matrix = nonseparate_count_model.count_matrix
        if issparse(nonseparate_count_matrix):
            nonseparate_count_matrix = nonseparate_count_matrix.toarray()
        P_nonseparate = estimate_P(nonseparate_count_matrix, reversible=True)
        pi = stationary_distribution(P_nonseparate, C=nonseparate_count_matrix)
        nonseparate_msm = MarkovStateModel(P_nonseparate,
                                           stationary_distribution=pi)
    if issparse(count_matrix):
        count_matrix = count_matrix.toarray()

    # if #metastable sets == #states, we can stop here
    n_meta = n_hidden_states if separate_symbols is None else n_hidden_states - 1
    if n_meta == nonseparate_msm.n_states:
        pcca = PCCAModel(nonseparate_msm.transition_matrix,
                         nonseparate_msm.stationary_distribution,
                         np.eye(n_meta), np.eye(n_meta))
    else:
        pcca = nonseparate_msm.pcca(n_meta)
    if separate_symbols is not None:
        separate_states = msm.count_model.symbols_to_states(separate_symbols)
        memberships = np.zeros((msm.n_states, n_hidden_states))
        memberships[nonseparate_states, :n_hidden_states -
                    1] = pcca.memberships
        memberships[separate_states, -1] = 1
    else:
        memberships = pcca.memberships
        separate_states = None

    hidden_transition_matrix = _coarse_grain_transition_matrix(
        msm.transition_matrix, memberships)
    if reversible:
        from deeptime.markov._transition_matrix import enforce_reversible_on_closed
        hidden_transition_matrix = enforce_reversible_on_closed(
            hidden_transition_matrix)

    hidden_counts = memberships.T.dot(count_matrix).dot(memberships)
    hidden_pi = stationary_distribution(hidden_transition_matrix,
                                        C=hidden_counts)

    output_probabilities = np.zeros(
        (n_hidden_states, msm.count_model.n_states_full))
    # we might have lost a few symbols, reduce nonsep symbols to the ones actually represented
    nonseparate_symbols = msm.count_model.state_symbols[nonseparate_states]
    if separate_symbols is not None:
        separate_symbols = msm.count_model.state_symbols[separate_states]
        output_probabilities[:n_hidden_states - 1,
                             nonseparate_symbols] = pcca.metastable_distributions
        output_probabilities[
            -1,
            separate_symbols] = msm.stationary_distribution[separate_states]
    else:
        output_probabilities[:,
                             nonseparate_symbols] = pcca.metastable_distributions

    # regularize
    eps_a = 0.01 / n_hidden_states if regularize else 0.
    hidden_pi, hidden_transition_matrix = _regularize_hidden(
        hidden_pi,
        hidden_transition_matrix,
        reversible=reversible,
        stationary=stationary,
        count_matrix=hidden_counts,
        eps=eps_a)
    eps_b = 0.01 / msm.n_states if regularize else 0.
    output_probabilities = _regularize_pobs(output_probabilities,
                                            nonempty=None,
                                            separate=separate_symbols,
                                            eps=eps_b)
    from deeptime.markov.hmm import HiddenMarkovModel
    return HiddenMarkovModel(transition_model=hidden_transition_matrix,
                             output_model=output_probabilities,
                             initial_distribution=hidden_pi)
Пример #9
0
 def test_model_likelihood(self):
     hmm = HiddenMarkovModel(self.transition_probabilities,
                             self.conditional_probabilities)
     loglik = hmm.compute_observation_likelihood(self.dtraj)
     ref_logprob = -3.3725
     np.testing.assert_array_almost_equal(loglik, ref_logprob, decimal=4)
Пример #10
0
    def fit(self, dtrajs, initial_model=None, **kwargs):
        r""" Fits a new :class:`HMM <HiddenMarkovModel>` to data.

        Parameters
        ----------
        dtrajs : array_like or list of array_like
            Timeseries data.
        initial_model : HiddenMarkovModel, optional, default=None
            Override for :attr:`initial_transition_model`.
        **kwargs
            Ignored kwargs for scikit-learn compatibility.

        Returns
        -------
        self : MaximumLikelihoodHMM
            Reference to self.
        """
        if initial_model is None:
            initial_model = self.initial_transition_model
        if initial_model is None or not isinstance(initial_model, HiddenMarkovModel):
            raise ValueError("For estimation, an initial model of type "
                             "`deeptime.markov.hmm.HiddenMarkovModel` is required.")

        # copy initial model
        transition_matrix = initial_model.transition_model.transition_matrix
        if issparse(transition_matrix):
            # want dense matrix, toarray makes a copy
            transition_matrix = transition_matrix.toarray()
        else:
            # new instance
            transition_matrix = np.copy(transition_matrix)

        hmm_data = MaximumLikelihoodHMM._HMMModelStorage(transition_matrix=transition_matrix,
                                                         output_model=initial_model.output_model.copy(),
                                                         initial_distribution=initial_model.initial_distribution.copy())

        dtrajs = ensure_timeseries_data(dtrajs)
        dtrajs = compute_dtrajs_effective(dtrajs, lagtime=self.lagtime, n_states=initial_model.n_hidden_states,
                                          stride=self.stride)

        max_n_frames = max(len(obs) for obs in dtrajs)
        # pre-construct hidden variables
        N = initial_model.n_hidden_states
        alpha = np.zeros((max_n_frames, N))
        beta = np.zeros((max_n_frames, N))
        gammas = [np.zeros((len(obs), N)) for obs in dtrajs]
        count_matrices = [np.zeros((N, N)) for _ in dtrajs]

        it = 0
        likelihoods = np.empty(self.maxit)
        # flag if connectivity has changed (e.g. state lost) - in that case the likelihood
        # is discontinuous and can't be used as a convergence criterion in that iteration.
        tmatrix_nonzeros = hmm_data.transition_matrix.nonzero()
        converged = False

        while not converged and it < self.maxit:
            loglik = 0.0
            for obs, gamma, counts in zip(dtrajs, gammas, count_matrices):
                loglik_update, _ = self._forward_backward(hmm_data, obs, alpha, beta, gamma, counts)
                loglik += loglik_update
            assert np.isfinite(loglik), it

            # convergence check
            if it > 0:
                dL = loglik - likelihoods[it - 1]
                if dL < self.accuracy:
                    converged = True

            # update model
            self._update_model(hmm_data, dtrajs, gammas, count_matrices, maxiter=self.maxit_reversible)

            # connectivity change check
            tmatrix_nonzeros_new = hmm_data.transition_matrix.nonzero()
            if not np.array_equal(tmatrix_nonzeros, tmatrix_nonzeros_new):
                converged = False  # unset converged
                tmatrix_nonzeros = tmatrix_nonzeros_new

            # end of iteration
            likelihoods[it] = loglik
            it += 1

        likelihoods = np.resize(likelihoods, it)

        transition_counts = self._reduce_transition_counts(count_matrices)

        count_model = TransitionCountModel(count_matrix=transition_counts, lagtime=self.lagtime)
        transition_model = MarkovStateModel(hmm_data.transition_matrix, reversible=self.reversible,
                                            count_model=count_model)
        hidden_state_trajs = [
            viterbi(hmm_data.transition_matrix, hmm_data.output_model.to_state_probability_trajectory(obs),
                    hmm_data.initial_distribution) for obs in dtrajs
        ]
        model = HiddenMarkovModel(
            transition_model=transition_model,
            output_model=hmm_data.output_model,
            initial_distribution=hmm_data.initial_distribution,
            likelihoods=likelihoods,
            state_probabilities=gammas,
            initial_count=self._init_counts(gammas),
            hidden_state_trajectories=hidden_state_trajs,
            stride=self.stride
        )
        self._model = model
        return self