示例#1
0
 def test_2state_2obs_Pgiven(self):
     obs = np.array([0, 0, 1, 1, 0])
     C = msmest.count_matrix(obs, 1).toarray()
     Aref = np.array([[1.0]])
     for rev in [True, False]:  # reversibiliy doesn't matter in this example
         P = msmest.transition_matrix(C, reversible=rev)
         p0, P0, B0 = init_discrete_hmm_spectral(C, 1, reversible=rev, P=P)
         assert(np.allclose(P0, Aref))
         # output must be 1 x 2, and no zeros
         assert(np.array_equal(B0.shape, np.array([1, 2])))
         assert(np.all(B0 > 0.0))
示例#2
0
文件: api.py 项目: ChayaSt/bhmm
def init_discrete_hmm(observations, nstates, lag=1, reversible=True, stationary=True, regularize=True,
                      method='connect-spectral', separate=None):
    """Use a heuristic scheme to generate an initial model.

    Parameters
    ----------
    observations : list of ndarray((T_i))
        list of arrays of length T_i with observation data
    nstates : int
        The number of states.
    lag : int
        Lag time at which the observations should be counted.
    reversible : bool
        Estimate reversible HMM transition matrix.
    stationary : bool
        p0 is the stationary distribution of P. Currently only reversible=True is implemented
    regularize : bool
        Regularize HMM probabilities to avoid 0's.
    method : str
        * 'lcs-spectral' : Does spectral clustering on the largest connected set
            of observed states.
        * 'connect-spectral' : Uses a weak regularization to connect the weakly
            connected sets and then initializes HMM using spectral clustering on
            the nonempty set.
        * 'spectral' : Uses spectral clustering on the nonempty subsets. Separated
            observed states will end up in separate hidden states. This option is
            only recommended for small observation spaces. Use connect-spectral for
            large observation spaces.
    separate : None or iterable of int
        Force the given set of observed states to stay in a separate hidden state.
        The remaining nstates-1 states will be assigned by a metastable decomposition.

    Examples
    --------

    Generate initial model for a discrete output model.

    >>> import bhmm
    >>> [model, observations, states] = bhmm.testsystems.generate_synthetic_observations(output='discrete')
    >>> initial_model = init_discrete_hmm(observations, model.nstates)

    """
    import msmtools.estimation as msmest
    from bhmm.init.discrete import init_discrete_hmm_spectral
    C = msmest.count_matrix(observations, lag).toarray()
    # regularization
    if regularize:
        eps_A = None
        eps_B = None
    else:
        eps_A = 0
        eps_B = 0
    if not stationary:
        raise NotImplementedError('Discrete-HMM initialization with stationary=False is not yet implemented.')

    if method=='lcs-spectral':
        lcs = msmest.largest_connected_set(C)
        p0, P, B = init_discrete_hmm_spectral(C, nstates, reversible=reversible, stationary=stationary,
                                              active_set=lcs, separate=separate, eps_A=eps_A, eps_B=eps_B)
    elif method=='connect-spectral':
        # make sure we're strongly connected
        C += msmest.prior_neighbor(C, 0.001)
        nonempty = _np.where(C.sum(axis=0) + C.sum(axis=1) > 0)[0]
        C[nonempty, nonempty] = _np.maximum(C[nonempty, nonempty], 0.001)
        p0, P, B = init_discrete_hmm_spectral(C, nstates, reversible=reversible, stationary=stationary,
                                              active_set=nonempty, separate=separate, eps_A=eps_A, eps_B=eps_B)
    elif method=='spectral':
        p0, P, B = init_discrete_hmm_spectral(C, nstates, reversible=reversible, stationary=stationary,
                                              active_set=None, separate=separate, eps_A=eps_A, eps_B=eps_B)
    else:
        raise NotImplementedError('Unknown discrete-HMM initialization method ' + str(method))

    hmm0 = discrete_hmm(p0, P, B)
    hmm0._lag = lag
    return hmm0
    def _estimate(self, dtrajs):
        import bhmm
        # ensure right format
        dtrajs = _types.ensure_dtraj_list(dtrajs)

        # CHECK LAG
        trajlengths = [_np.size(dtraj) for dtraj in dtrajs]
        if self.lag >= _np.max(trajlengths):
            raise ValueError('Illegal lag time ' + str(self.lag) + ' exceeds longest trajectory length')
        if self.lag > _np.mean(trajlengths):
            self.logger.warning('Lag time ' + str(self.lag) + ' is on the order of mean trajectory length '
                                + str(_np.mean(trajlengths)) + '. It is recommended to fit four lag times in each '
                                + 'trajectory. HMM might be inaccurate.')

        # EVALUATE STRIDE
        if self.stride == 'effective':
            # by default use lag as stride (=lag sampling), because we currently have no better theory for deciding
            # how many uncorrelated counts we can make
            self.stride = self.lag
            # get a quick estimate from the spectral radius of the nonreversible
            from pyemma.msm import estimate_markov_model
            msm_nr = estimate_markov_model(dtrajs, lag=self.lag, reversible=False, sparse=False,
                                           connectivity='largest', dt_traj=self.timestep_traj)
            # if we have more than nstates timescales in our MSM, we use the next (neglected) timescale as an
            # estimate of the decorrelation time
            if msm_nr.nstates > self.nstates:
                corrtime = max(1, msm_nr.timescales()[self.nstates-1])
                # use the smaller of these two pessimistic estimates
                self.stride = int(min(self.lag, 2*corrtime))

        # LAG AND STRIDE DATA
        dtrajs_lagged_strided = bhmm.lag_observations(dtrajs, self.lag, stride=self.stride)

        # OBSERVATION SET
        if self.observe_nonempty:
            observe_subset = 'nonempty'
        else:
            observe_subset = None

        # INIT HMM
        from bhmm import init_discrete_hmm
        from pyemma.msm.estimators import MaximumLikelihoodMSM
        if self.msm_init=='largest-strong':
            hmm_init = init_discrete_hmm(dtrajs_lagged_strided, self.nstates, lag=1,
                                         reversible=self.reversible, stationary=True, regularize=True,
                                         method='lcs-spectral', separate=self.separate)
        elif self.msm_init=='all':
            hmm_init = init_discrete_hmm(dtrajs_lagged_strided, self.nstates, lag=1,
                                         reversible=self.reversible, stationary=True, regularize=True,
                                         method='spectral', separate=self.separate)
        elif issubclass(self.msm_init.__class__, MaximumLikelihoodMSM):  # initial MSM given.
            from bhmm.init.discrete import init_discrete_hmm_spectral
            p0, P0, pobs0 = init_discrete_hmm_spectral(self.msm_init.count_matrix_full, self.nstates,
                                                       reversible=self.reversible, stationary=True,
                                                       active_set=self.msm_init.active_set,
                                                       P=self.msm_init.transition_matrix, separate=self.separate)
            hmm_init = bhmm.discrete_hmm(p0, P0, pobs0)
            observe_subset = self.msm_init.active_set  # override observe_subset.
        else:
            raise ValueError('Unknown MSM initialization option: ' + str(self.msm_init))

        # ---------------------------------------------------------------------------------------
        # Estimate discrete HMM
        # ---------------------------------------------------------------------------------------

        # run EM
        from bhmm.estimators.maximum_likelihood import MaximumLikelihoodEstimator as _MaximumLikelihoodEstimator
        hmm_est = _MaximumLikelihoodEstimator(dtrajs_lagged_strided, self.nstates, initial_model=hmm_init,
                                              output='discrete', reversible=self.reversible, stationary=self.stationary,
                                              accuracy=self.accuracy, maxit=self.maxit)
        # run
        hmm_est.fit()
        # package in discrete HMM
        self.hmm = bhmm.DiscreteHMM(hmm_est.hmm)

        # get model parameters
        self.initial_distribution = self.hmm.initial_distribution
        transition_matrix = self.hmm.transition_matrix
        observation_probabilities = self.hmm.output_probabilities

        # get estimation parameters
        self.likelihoods = hmm_est.likelihoods  # Likelihood history
        self.likelihood = self.likelihoods[-1]
        self.hidden_state_probabilities = hmm_est.hidden_state_probabilities  # gamma variables
        self.hidden_state_trajectories = hmm_est.hmm.hidden_state_trajectories  # Viterbi path
        self.count_matrix = hmm_est.count_matrix  # hidden count matrix
        self.initial_count = hmm_est.initial_count  # hidden init count
        self._active_set = _np.arange(self.nstates)

        # TODO: it can happen that we loose states due to striding. Should we lift the output probabilities afterwards?
        # parametrize self
        self._dtrajs_full = dtrajs
        self._dtrajs_lagged = dtrajs_lagged_strided
        self._nstates_obs_full = msmest.number_of_states(dtrajs)
        self._nstates_obs = msmest.number_of_states(dtrajs_lagged_strided)
        self._observable_set = _np.arange(self._nstates_obs)
        self._dtrajs_obs = dtrajs
        self.set_model_params(P=transition_matrix, pobs=observation_probabilities,
                              reversible=self.reversible, dt_model=self.timestep_traj.get_scaled(self.lag))

        # TODO: perhaps remove connectivity and just rely on .submodel()?
        # deal with connectivity
        states_subset = None
        if self.connectivity == 'largest':
            states_subset = 'largest-strong'
        elif self.connectivity == 'populous':
            states_subset = 'populous-strong'

        # return submodel (will return self if all None)
        return self.submodel(states=states_subset, obs=observe_subset, mincount_connectivity=self.mincount_connectivity)
示例#4
0
文件: api.py 项目: ongbe/bhmm
def init_discrete_hmm(observations,
                      nstates,
                      lag=1,
                      reversible=True,
                      stationary=True,
                      regularize=True,
                      method='connect-spectral',
                      separate=None):
    """Use a heuristic scheme to generate an initial model.

    Parameters
    ----------
    observations : list of ndarray((T_i))
        list of arrays of length T_i with observation data
    nstates : int
        The number of states.
    lag : int
        Lag time at which the observations should be counted.
    reversible : bool
        Estimate reversible HMM transition matrix.
    stationary : bool
        p0 is the stationary distribution of P. Currently only reversible=True is implemented
    regularize : bool
        Regularize HMM probabilities to avoid 0's.
    method : str
        * 'lcs-spectral' : Does spectral clustering on the largest connected set
            of observed states.
        * 'connect-spectral' : Uses a weak regularization to connect the weakly
            connected sets and then initializes HMM using spectral clustering on
            the nonempty set.
        * 'spectral' : Uses spectral clustering on the nonempty subsets. Separated
            observed states will end up in separate hidden states. This option is
            only recommended for small observation spaces. Use connect-spectral for
            large observation spaces.
    separate : None or iterable of int
        Force the given set of observed states to stay in a separate hidden state.
        The remaining nstates-1 states will be assigned by a metastable decomposition.

    Examples
    --------

    Generate initial model for a discrete output model.

    >>> import bhmm
    >>> [model, observations, states] = bhmm.testsystems.generate_synthetic_observations(output='discrete')
    >>> initial_model = init_discrete_hmm(observations, model.nstates)

    """
    import msmtools.estimation as msmest
    from bhmm.init.discrete import init_discrete_hmm_spectral
    C = msmest.count_matrix(observations, lag).toarray()
    # regularization
    if regularize:
        eps_A = None
        eps_B = None
    else:
        eps_A = 0
        eps_B = 0
    if not stationary:
        raise NotImplementedError(
            'Discrete-HMM initialization with stationary=False is not yet implemented.'
        )

    if method == 'lcs-spectral':
        lcs = msmest.largest_connected_set(C)
        p0, P, B = init_discrete_hmm_spectral(C,
                                              nstates,
                                              reversible=reversible,
                                              stationary=stationary,
                                              active_set=lcs,
                                              separate=separate,
                                              eps_A=eps_A,
                                              eps_B=eps_B)
    elif method == 'connect-spectral':
        # make sure we're strongly connected
        C += msmest.prior_neighbor(C, 0.001)
        nonempty = _np.where(C.sum(axis=0) + C.sum(axis=1) > 0)[0]
        C[nonempty, nonempty] = _np.maximum(C[nonempty, nonempty], 0.001)
        p0, P, B = init_discrete_hmm_spectral(C,
                                              nstates,
                                              reversible=reversible,
                                              stationary=stationary,
                                              active_set=nonempty,
                                              separate=separate,
                                              eps_A=eps_A,
                                              eps_B=eps_B)
    elif method == 'spectral':
        p0, P, B = init_discrete_hmm_spectral(C,
                                              nstates,
                                              reversible=reversible,
                                              stationary=stationary,
                                              active_set=None,
                                              separate=separate,
                                              eps_A=eps_A,
                                              eps_B=eps_B)
    else:
        raise NotImplementedError(
            'Unknown discrete-HMM initialization method ' + str(method))

    hmm0 = discrete_hmm(p0, P, B)
    hmm0._lag = lag
    return hmm0