def regularize_hidden(p0, P, reversible=True, stationary=False, C=None, eps=None): """ Regularizes the hidden initial distribution and transition matrix. Makes sure that the hidden initial distribution and transition matrix have nonzero probabilities by setting them to eps and then renormalizing. Avoids zeros that would cause estimation algorithms to crash or get stuck in suboptimal states. Parameters ---------- p0 : ndarray(n) Initial hidden distribution of the HMM P : ndarray(n, n) Hidden transition matrix reversible : bool HMM is reversible. Will make sure it is still reversible after modification. stationary : bool p0 is the stationary distribution of P. In this case, will not regularize p0 separately. If stationary=False, the regularization will be applied to p0. C : ndarray(n, n) Hidden count matrix. Only needed for stationary=True and P disconnected. epsilon : float or None minimum value of the resulting transition matrix. Default: evaluates to 0.01 / n. The coarse-graining equation can lead to negative elements and thus epsilon should be set to at least 0. Positive settings of epsilon are similar to a prior and enforce minimum positive values for all transition probabilities. Return ------ p0 : ndarray(n) regularized initial distribution P : ndarray(n, n) regularized transition matrix """ # input n = P.shape[0] if eps is None: # default output probability, in order to avoid zero columns eps = 0.01 / n # REGULARIZE P P = np.maximum(P, eps) # and renormalize P /= P.sum(axis=1)[:, None] # ensure reversibility if reversible: P = _tmatrix_disconnected.enforce_reversible_on_closed(P) # REGULARIZE p0 if stationary: _tmatrix_disconnected.stationary_distribution(P, C=C) else: p0 = np.maximum(p0, eps) p0 /= p0.sum() return p0, P
def _updateTransitionMatrix(self): """ Updates the hidden-state transition matrix and the initial distribution """ # TRANSITION MATRIX C = self.model.count_matrix() + self.prior_C # posterior count matrix # check if we work with these options if self.reversible and not _tmatrix_disconnected.is_connected(C, strong=True): raise NotImplementedError( "Encountered disconnected count matrix with sampling option reversible:\n " + str(C) + "\nUse prior to ensure connectivity or use reversible=False." ) # ensure consistent sparsity pattern (P0 might have additional zeros because of underflows) # TODO: these steps work around a bug in msmtools. Should be fixed there P0 = msmest.transition_matrix(C, reversible=self.reversible, maxiter=10000, warn_not_converged=False) zeros = np.where(P0 + P0.T == 0) C[zeros] = 0 # run sampler Tij = msmest.sample_tmatrix( C, nsample=1, nsteps=self.transition_matrix_sampling_steps, reversible=self.reversible ) # INITIAL DISTRIBUTION if self.stationary: # p0 is consistent with P p0 = _tmatrix_disconnected.stationary_distribution(Tij, C=C) else: n0 = self.model.count_init().astype(float) p0 = np.random.dirichlet(n0 + self.prior_n0) # sample p0 from posterior # update HMM with new sample self.model.update(p0, Tij)
def _update_model(self, gammas, count_matrices, maxiter=10000000): """ Maximization step: Updates the HMM model given the hidden state assignment and count matrices Parameters ---------- gamma : [ ndarray(T,N, dtype=float) ] list of state probabilities for each trajectory count_matrix : [ ndarray(N,N, dtype=float) ] list of the Baum-Welch transition count matrices for each hidden state trajectory maxiter : int maximum number of iterations of the transition matrix estimation if an iterative method is used. """ gamma0_sum = self._init_counts(gammas) C = self._transition_counts(count_matrices) logger().info("Initial count = \n" + str(gamma0_sum)) logger().info("Count matrix = \n" + str(C)) # compute new transition matrix from bhmm.estimators._tmatrix_disconnected import estimate_P, stationary_distribution T = estimate_P(C, reversible=self._hmm.is_reversible, fixed_statdist=self._fixed_stationary_distribution, maxiter=maxiter, maxerr=1e-12, mincount_connectivity=1e-16) # print 'P:\n', T # estimate stationary or init distribution if self._stationary: if self._fixed_stationary_distribution is None: pi = stationary_distribution(T, C=C, mincount_connectivity=1e-16) else: pi = self._fixed_stationary_distribution else: if self._fixed_initial_distribution is None: pi = gamma0_sum / np.sum(gamma0_sum) else: pi = self._fixed_initial_distribution # print 'pi: ', pi, ' stationary = ', self._hmm.is_stationary # update model self._hmm.update(pi, T) logger().info("T: \n" + str(T)) logger().info("pi: \n" + str(pi)) # update output model self._hmm.output_model.estimate(self._observations, gammas)
def _update_model(self, gammas, count_matrices): """ Maximization step: Updates the HMM model given the hidden state assignment and count matrices Parameters ---------- gamma : [ ndarray(T,N, dtype=float) ] list of state probabilities for each trajectory count_matrix : [ ndarray(N,N, dtype=float) ] list of the Baum-Welch transition count matrices for each hidden state trajectory """ K = len(self._observations) N = self._nstates C = np.zeros((N, N)) gamma0_sum = np.zeros((N)) for k in range(K): # update state counts gamma0_sum += gammas[k][0] # update count matrix C += count_matrices[k] logger().info("Count matrix = \n" + str(C)) # compute new transition matrix from bhmm.estimators._tmatrix_disconnected import estimate_P, stationary_distribution T = estimate_P(C, reversible=self._hmm.is_reversible, fixed_statdist=self._fixed_stationary_distribution) # stationary or init distribution if self._hmm.is_stationary: if self._fixed_stationary_distribution is None: pi = stationary_distribution(C, T) else: pi = self._fixed_stationary_distribution else: if self._fixed_initial_distribution is None: pi = gamma0_sum / np.sum(gamma0_sum) else: pi = self._fixed_initial_distribution # update model self._hmm.update(T, pi) logger().info("T: \n" + str(T)) logger().info("pi: \n" + str(pi)) # update output model # TODO: need to parallelize model fitting. Otherwise we can't gain much speed! self._hmm.output_model._estimate_output_model(self._observations, gammas)
def _update_model(self, gammas, count_matrices): """ Maximization step: Updates the HMM model given the hidden state assignment and count matrices Parameters ---------- gamma : [ ndarray(T,N, dtype=float) ] list of state probabilities for each trajectory count_matrix : [ ndarray(N,N, dtype=float) ] list of the Baum-Welch transition count matrices for each hidden state trajectory """ K = len(self._observations) N = self._nstates C = np.zeros((N, N)) gamma0_sum = np.zeros((N)) for k in range(K): # update state counts gamma0_sum += gammas[k][0] # update count matrix C += count_matrices[k] logger().info("Count matrix = \n"+str(C)) # compute new transition matrix from bhmm.estimators._tmatrix_disconnected import estimate_P, stationary_distribution T = estimate_P(C, reversible=self._hmm.is_reversible, fixed_statdist=self._fixed_stationary_distribution) # stationary or init distribution if self._hmm.is_stationary: if self._fixed_stationary_distribution is None: pi = stationary_distribution(C, T) else: pi = self._fixed_stationary_distribution else: if self._fixed_initial_distribution is None: pi = gamma0_sum / np.sum(gamma0_sum) else: pi = self._fixed_initial_distribution # update model self._hmm.update(T, pi) logger().info("T: \n"+str(T)) logger().info("pi: \n"+str(pi)) # update output model # TODO: need to parallelize model fitting. Otherwise we can't gain much speed! self._hmm.output_model._estimate_output_model(self._observations, gammas)
def _update_model(self, gammas, count_matrices, maxiter=10000000): """ Maximization step: Updates the HMM model given the hidden state assignment and count matrices Parameters ---------- gamma : [ ndarray(T,N, dtype=float) ] list of state probabilities for each trajectory count_matrix : [ ndarray(N,N, dtype=float) ] list of the Baum-Welch transition count matrices for each hidden state trajectory maxiter : int maximum number of iterations of the transition matrix estimation if an iterative method is used. """ gamma0_sum = self._init_counts(gammas) C = self._transition_counts(count_matrices) logger().info("Initial count = \n"+str(gamma0_sum)) logger().info("Count matrix = \n"+str(C)) # compute new transition matrix from bhmm.estimators._tmatrix_disconnected import estimate_P, stationary_distribution T = estimate_P(C, reversible=self._hmm.is_reversible, fixed_statdist=self._fixed_stationary_distribution, maxiter=maxiter, maxerr=1e-12, mincount_connectivity=1e-16) # print 'P:\n', T # estimate stationary or init distribution if self._stationary: if self._fixed_stationary_distribution is None: pi = stationary_distribution(T, C=C, mincount_connectivity=1e-16) else: pi = self._fixed_stationary_distribution else: if self._fixed_initial_distribution is None: pi = gamma0_sum / np.sum(gamma0_sum) else: pi = self._fixed_initial_distribution # print 'pi: ', pi, ' stationary = ', self._hmm.is_stationary # update model self._hmm.update(pi, T) logger().info("T: \n"+str(T)) logger().info("pi: \n"+str(pi)) # update output model self._hmm.output_model.estimate(self._observations, gammas)
def _updateTransitionMatrix(self): """ Updates the hidden-state transition matrix and the initial distribution """ # TRANSITION MATRIX C = self.model.count_matrix() + self.prior_C # posterior count matrix # check if we work with these options if self.reversible and not _tmatrix_disconnected.is_connected( C, strong=True): raise NotImplementedError( 'Encountered disconnected count matrix with sampling option reversible:\n ' + str(C) + '\nUse prior to ensure connectivity or use reversible=False.') # ensure consistent sparsity pattern (P0 might have additional zeros because of underflows) # TODO: these steps work around a bug in msmtools. Should be fixed there P0 = msmest.transition_matrix(C, reversible=self.reversible, maxiter=10000, warn_not_converged=False) zeros = np.where(P0 + P0.T == 0) C[zeros] = 0 # run sampler Tij = msmest.sample_tmatrix( C, nsample=1, nsteps=self.transition_matrix_sampling_steps, reversible=self.reversible) # INITIAL DISTRIBUTION if self.stationary: # p0 is consistent with P p0 = _tmatrix_disconnected.stationary_distribution(Tij, C=C) else: n0 = self.model.count_init().astype(float) first_timestep_counts_with_prior = n0 + self.prior_n0 positive = first_timestep_counts_with_prior > 0 p0 = np.zeros_like(n0) p0[positive] = np.random.dirichlet( first_timestep_counts_with_prior[positive] ) # sample p0 from posterior # update HMM with new sample self.model.update(p0, Tij)
def init_discrete_hmm_spectral(C_full, nstates, reversible=True, stationary=True, active_set=None, P=None, eps_A=None, eps_B=None, separate=None): """Initializes discrete HMM using spectral clustering of observation counts Initializes HMM as described in [1]_. First estimates a Markov state model on the given observations, then uses PCCA+ to coarse-grain the transition matrix [2]_ which initializes the HMM transition matrix. The HMM output probabilities are given by Bayesian inversion from the PCCA+ memberships [1]_. The regularization parameters eps_A and eps_B are used to guarantee that the hidden transition matrix and output probability matrix have no zeros. HMM estimation algorithms such as the EM algorithm and the Bayesian sampling algorithm cannot recover from zero entries, i.e. once they are zero, they will stay zero. Parameters ---------- C_full : ndarray(N, N) Transition count matrix on the full observable state space nstates : int The number of hidden states. reversible : bool Estimate reversible HMM transition matrix. stationary : bool p0 is the stationary distribution of P. In this case, will not active_set : ndarray(n, dtype=int) or None Index area. Will estimate kinetics only on the given subset of C P : ndarray(n, n) Transition matrix estimated from C (with option reversible). Use this option if P has already been estimated to avoid estimating it twice. eps_A : float or None Minimum transition probability. Default: 0.01 / nstates eps_B : float or None Minimum output probability. Default: 0.01 / nfull separate : None or iterable of int Force the given set of observed states to stay in a separate hidden state. The remaining nstates-1 states will be assigned by a metastable decomposition. Returns ------- p0 : ndarray(n) Hidden state initial distribution A : ndarray(n, n) Hidden state transition matrix B : ndarray(n, N) Hidden-to-observable state output probabilities Raises ------ ValueError If the given active set is illegal. NotImplementedError If the number of hidden states exceeds the number of observed states. Examples -------- Generate initial model for a discrete output model. >>> import numpy as np >>> C = np.array([[0.5, 0.5, 0.0], [0.4, 0.5, 0.1], [0.0, 0.1, 0.9]]) >>> initial_model = init_discrete_hmm_spectral(C, 2) References ---------- .. [1] F. Noe, H. Wu, J.-H. Prinz and N. Plattner: Projected and hidden Markov models for calculating kinetics and metastable states of complex molecules. J. Chem. Phys. 139, 184114 (2013) .. [2] S. Kube and M. Weber: A coarse graining method for the identification of transition rates between molecular conformations. J. Chem. Phys. 126, 024103 (2007) """ # MICROSTATE COUNT MATRIX nfull = C_full.shape[0] # INPUTS if eps_A is None: # default transition probability, in order to avoid zero columns eps_A = 0.01 / nstates if eps_B is None: # default output probability, in order to avoid zero columns eps_B = 0.01 / nfull # Manage sets symsum = C_full.sum(axis=0) + C_full.sum(axis=1) nonempty = np.where(symsum > 0)[0] if active_set is None: active_set = nonempty else: if np.any(symsum[active_set] == 0): raise ValueError('Given active set has empty states' ) # don't tolerate empty states if P is not None: if np.shape(P)[0] != active_set.size: # needs to fit to active raise ValueError('Given initial transition matrix P has shape ' + str(np.shape(P)) + 'while active set has size ' + str(active_set.size)) # when using separate states, only keep the nonempty ones (the others don't matter) if separate is None: active_nonseparate = active_set.copy() nmeta = nstates else: if np.max(separate) >= nfull: raise ValueError( 'Separate set has indexes that do not exist in full state space: ' + str(np.max(separate))) active_nonseparate = np.array(list(set(active_set) - set(separate))) nmeta = nstates - 1 # check if we can proceed if active_nonseparate.size < nmeta: raise NotImplementedError('Trying to initialize ' + str(nmeta) + '-state HMM from smaller ' + str(active_nonseparate.size) + '-state MSM.') # MICROSTATE TRANSITION MATRIX (MSM). C_active = C_full[np.ix_(active_set, active_set)] if P is None: # This matrix may be disconnected and have transient states P_active = _tmatrix_disconnected.estimate_P( C_active, reversible=reversible, maxiter=10000) # short iteration else: P_active = P # MICROSTATE EQUILIBRIUM DISTRIBUTION pi_active = _tmatrix_disconnected.stationary_distribution(P_active, C=C_active) pi_full = np.zeros(nfull) pi_full[active_set] = pi_active # NONSEPARATE TRANSITION MATRIX FOR PCCA+ C_active_nonseparate = C_full[np.ix_(active_nonseparate, active_nonseparate)] if reversible and separate is None: # in this case we already have a reversible estimate with the right size P_active_nonseparate = P_active else: # not yet reversible. re-estimate P_active_nonseparate = _tmatrix_disconnected.estimate_P( C_active_nonseparate, reversible=True) # COARSE-GRAINING WITH PCCA+ if active_nonseparate.size > nmeta: from msmtools.analysis.dense.pcca import PCCA pcca_obj = PCCA(P_active_nonseparate, nmeta) M_active_nonseparate = pcca_obj.memberships # memberships B_active_nonseparate = pcca_obj.output_probabilities # output probabilities else: # equal size M_active_nonseparate = np.eye(nmeta) B_active_nonseparate = np.eye(nmeta) # ADD SEPARATE STATE IF NEEDED if separate is None: M_active = M_active_nonseparate else: M_full = np.zeros((nfull, nstates)) M_full[active_nonseparate, :nmeta] = M_active_nonseparate M_full[separate, -1] = 1 M_active = M_full[active_set] # COARSE-GRAINED TRANSITION MATRIX P_hmm = coarse_grain_transition_matrix(P_active, M_active) if reversible: P_hmm = _tmatrix_disconnected.enforce_reversible_on_closed(P_hmm) C_hmm = M_active.T.dot(C_active).dot(M_active) pi_hmm = _tmatrix_disconnected.stationary_distribution( P_hmm, C=C_hmm) # need C_hmm in case if A is disconnected # COARSE-GRAINED OUTPUT DISTRIBUTION B_hmm = np.zeros((nstates, nfull)) B_hmm[:nmeta, active_nonseparate] = B_active_nonseparate if separate is not None: # add separate states B_hmm[-1, separate] = pi_full[separate] # REGULARIZE SOLUTION pi_hmm, P_hmm = regularize_hidden(pi_hmm, P_hmm, reversible=reversible, stationary=stationary, C=C_hmm, eps=eps_A) B_hmm = regularize_pobs(B_hmm, nonempty=nonempty, separate=separate, eps=eps_B) # print 'cg pi: ', pi_hmm # print 'cg A:\n ', P_hmm # print 'cg B:\n ', B_hmm logger().info('Initial model: ') logger().info('initial distribution = \n' + str(pi_hmm)) logger().info('transition matrix = \n' + str(P_hmm)) logger().info('output matrix = \n' + str(B_hmm.T)) return pi_hmm, P_hmm, B_hmm
def submodel(self, states=None, obs=None, mincount_connectivity='1/n'): """Returns a HMM with restricted state space Parameters ---------- states : None, str or int-array Hidden states to restrict the model to. In addition to specifying the subset, possible options are: * None : all states - don't restrict * 'populous-strong' : strongly connected subset with maximum counts * 'populous-weak' : weakly connected subset with maximum counts * 'largest-strong' : strongly connected subset with maximum size * 'largest-weak' : weakly connected subset with maximum size obs : None, str or int-array Observed states to restrict the model to. In addition to specifying an array with the state labels to be observed, possible options are: * None : all states - don't restrict * 'nonempty' : all states with at least one observation in the estimator mincount_connectivity : float or '1/n' minimum number of counts to consider a connection between two states. Counts lower than that will count zero in the connectivity check and may thus separate the resulting transition matrix. Default value: 1/nstates. Returns ------- hmm : HMM The restricted HMM. """ if states is None and obs is None and mincount_connectivity == 0: return self if states is None: states = _np.arange(self.nstates) if obs is None: obs = _np.arange(self.nstates_obs) if str(mincount_connectivity) == '1/n': mincount_connectivity = 1.0/float(self.nstates) # handle new connectivity from bhmm.estimators import _tmatrix_disconnected S = _tmatrix_disconnected.connected_sets(self.count_matrix, mincount_connectivity=mincount_connectivity, strong=True) if len(S) > 1: # keep only non-negligible transitions C = _np.zeros(self.count_matrix.shape) large = _np.where(self.count_matrix >= mincount_connectivity) C[large] = self.count_matrix[large] for s in S: # keep all (also small) transition counts within strongly connected subsets C[_np.ix_(s, s)] = self.count_matrix[_np.ix_(s, s)] # re-estimate transition matrix with disc. P = _tmatrix_disconnected.estimate_P(C, reversible=self.reversible, mincount_connectivity=0) pi = _tmatrix_disconnected.stationary_distribution(P, C) else: C = self.count_matrix P = self.transition_matrix pi = self.stationary_distribution # determine substates if isinstance(states, str): from bhmm.estimators import _tmatrix_disconnected strong = 'strong' in states largest = 'largest' in states S = _tmatrix_disconnected.connected_sets(self.count_matrix, mincount_connectivity=mincount_connectivity, strong=strong) if largest: score = [len(s) for s in S] else: score = [self.count_matrix[_np.ix_(s, s)].sum() for s in S] states = _np.array(S[_np.argmax(score)]) if states is not None: # sub-transition matrix self._active_set = states C = C[_np.ix_(states, states)].copy() P = P[_np.ix_(states, states)].copy() P /= P.sum(axis=1)[:, None] pi = _tmatrix_disconnected.stationary_distribution(P, C) self.initial_count = self.initial_count[states] self.initial_distribution = self.initial_distribution[states] / self.initial_distribution[states].sum() # determine observed states if str(obs) == 'nonempty': import msmtools.estimation as msmest obs = _np.where(msmest.count_states(self.discrete_trajectories_lagged) > 0)[0] if obs is not None: # set observable set self._observable_set = obs self._nstates_obs = obs.size # full2active mapping _full2obs = -1 * _np.ones(self._nstates_obs_full, dtype=int) _full2obs[obs] = _np.arange(len(obs), dtype=int) # observable trajectories self._dtrajs_obs = [] for dtraj in self.discrete_trajectories_full: self._dtrajs_obs.append(_full2obs[dtraj]) # observation matrix B = self.observation_probabilities[_np.ix_(states, obs)].copy() B /= B.sum(axis=1)[:, None] else: B = self.observation_probabilities # set quantities back. self.update_model_params(P=P, pobs=B, pi=pi) self.count_matrix_EM = self.count_matrix[_np.ix_(states, states)] # unchanged count matrix self.count_matrix = C # count matrix consistent with P return self
def init_discrete_hmm_spectral(C_full, nstates, reversible=True, stationary=True, active_set=None, P=None, eps_A=None, eps_B=None, separate=None): """Initializes discrete HMM using spectral clustering of observation counts Initializes HMM as described in [1]_. First estimates a Markov state model on the given observations, then uses PCCA+ to coarse-grain the transition matrix [2]_ which initializes the HMM transition matrix. The HMM output probabilities are given by Bayesian inversion from the PCCA+ memberships [1]_. The regularization parameters eps_A and eps_B are used to guarantee that the hidden transition matrix and output probability matrix have no zeros. HMM estimation algorithms such as the EM algorithm and the Bayesian sampling algorithm cannot recover from zero entries, i.e. once they are zero, they will stay zero. Parameters ---------- C_full : ndarray(N, N) Transition count matrix on the full observable state space nstates : int The number of hidden states. reversible : bool Estimate reversible HMM transition matrix. stationary : bool p0 is the stationary distribution of P. In this case, will not active_set : ndarray(n, dtype=int) or None Index area. Will estimate kinetics only on the given subset of C P : ndarray(n, n) Transition matrix estimated from C (with option reversible). Use this option if P has already been estimated to avoid estimating it twice. eps_A : float or None Minimum transition probability. Default: 0.01 / nstates eps_B : float or None Minimum output probability. Default: 0.01 / nfull separate : None or iterable of int Force the given set of observed states to stay in a separate hidden state. The remaining nstates-1 states will be assigned by a metastable decomposition. Returns ------- p0 : ndarray(n) Hidden state initial distribution A : ndarray(n, n) Hidden state transition matrix B : ndarray(n, N) Hidden-to-observable state output probabilities Raises ------ ValueError If the given active set is illegal. NotImplementedError If the number of hidden states exceeds the number of observed states. Examples -------- Generate initial model for a discrete output model. >>> import numpy as np >>> C = np.array([[0.5, 0.5, 0.0], [0.4, 0.5, 0.1], [0.0, 0.1, 0.9]]) >>> initial_model = init_discrete_hmm_spectral(C, 2) References ---------- .. [1] F. Noe, H. Wu, J.-H. Prinz and N. Plattner: Projected and hidden Markov models for calculating kinetics and metastable states of complex molecules. J. Chem. Phys. 139, 184114 (2013) .. [2] S. Kube and M. Weber: A coarse graining method for the identification of transition rates between molecular conformations. J. Chem. Phys. 126, 024103 (2007) """ # MICROSTATE COUNT MATRIX nfull = C_full.shape[0] # INPUTS if eps_A is None: # default transition probability, in order to avoid zero columns eps_A = 0.01 / nstates if eps_B is None: # default output probability, in order to avoid zero columns eps_B = 0.01 / nfull # Manage sets symsum = C_full.sum(axis=0) + C_full.sum(axis=1) nonempty = np.where(symsum > 0)[0] if active_set is None: active_set = nonempty else: if np.any(symsum[active_set] == 0): raise ValueError('Given active set has empty states') # don't tolerate empty states if P is not None: if np.shape(P)[0] != active_set.size: # needs to fit to active raise ValueError('Given initial transition matrix P has shape ' + str(np.shape(P)) + 'while active set has size ' + str(active_set.size)) # when using separate states, only keep the nonempty ones (the others don't matter) if separate is None: active_nonseparate = active_set.copy() nmeta = nstates else: if np.max(separate) >= nfull: raise ValueError('Separate set has indexes that do not exist in full state space: ' + str(np.max(separate))) active_nonseparate = np.array(list(set(active_set) - set(separate))) nmeta = nstates - 1 # check if we can proceed if active_nonseparate.size < nmeta: raise NotImplementedError('Trying to initialize ' + str(nmeta) + '-state HMM from smaller ' + str(active_nonseparate.size) + '-state MSM.') # MICROSTATE TRANSITION MATRIX (MSM). C_active = C_full[np.ix_(active_set, active_set)] if P is None: # This matrix may be disconnected and have transient states P_active = _tmatrix_disconnected.estimate_P(C_active, reversible=reversible, maxiter=10000) # short iteration else: P_active = P # MICROSTATE EQUILIBRIUM DISTRIBUTION pi_active = _tmatrix_disconnected.stationary_distribution(P_active, C=C_active) pi_full = np.zeros(nfull) pi_full[active_set] = pi_active # NONSEPARATE TRANSITION MATRIX FOR PCCA+ C_active_nonseparate = C_full[np.ix_(active_nonseparate, active_nonseparate)] if reversible and separate is None: # in this case we already have a reversible estimate with the right size P_active_nonseparate = P_active else: # not yet reversible. re-estimate P_active_nonseparate = _tmatrix_disconnected.estimate_P(C_active_nonseparate, reversible=True) # COARSE-GRAINING WITH PCCA+ if active_nonseparate.size > nmeta: from msmtools.analysis.dense.pcca import PCCA pcca_obj = PCCA(P_active_nonseparate, nmeta) M_active_nonseparate = pcca_obj.memberships # memberships B_active_nonseparate = pcca_obj.output_probabilities # output probabilities else: # equal size M_active_nonseparate = np.eye(nmeta) B_active_nonseparate = np.eye(nmeta) # ADD SEPARATE STATE IF NEEDED if separate is None: M_active = M_active_nonseparate else: M_full = np.zeros((nfull, nstates)) M_full[active_nonseparate, :nmeta] = M_active_nonseparate M_full[separate, -1] = 1 M_active = M_full[active_set] # COARSE-GRAINED TRANSITION MATRIX P_hmm = coarse_grain_transition_matrix(P_active, M_active) if reversible: P_hmm = _tmatrix_disconnected.enforce_reversible_on_closed(P_hmm) C_hmm = M_active.T.dot(C_active).dot(M_active) pi_hmm = _tmatrix_disconnected.stationary_distribution(P_hmm, C=C_hmm) # need C_hmm in case if A is disconnected # COARSE-GRAINED OUTPUT DISTRIBUTION B_hmm = np.zeros((nstates, nfull)) B_hmm[:nmeta, active_nonseparate] = B_active_nonseparate if separate is not None: # add separate states B_hmm[-1, separate] = pi_full[separate] # REGULARIZE SOLUTION pi_hmm, P_hmm = regularize_hidden(pi_hmm, P_hmm, reversible=reversible, stationary=stationary, C=C_hmm, eps=eps_A) B_hmm = regularize_pobs(B_hmm, nonempty=nonempty, separate=separate, eps=eps_B) # print 'cg pi: ', pi_hmm # print 'cg A:\n ', P_hmm # print 'cg B:\n ', B_hmm logger().info('Initial model: ') logger().info('initial distribution = \n'+str(pi_hmm)) logger().info('transition matrix = \n'+str(P_hmm)) logger().info('output matrix = \n'+str(B_hmm.T)) return pi_hmm, P_hmm, B_hmm