def test_connected_count_matrix(self): """Directed""" is_c = is_connected(self.C_not_connected) self.assertFalse(is_c) is_c = is_connected(self.C_connected) self.assertTrue(is_c) """Undirected""" is_c = is_connected(self.C_not_connected, directed=False) self.assertTrue(is_c)
def mle_trev(C, maxerr=1.0E-12, maxiter=int(1.0E6), warn_not_converged=True, return_statdist=False, eps_mu=1.0E-15): assert maxerr > 0, 'maxerr must be positive' assert maxiter > 0, 'maxiter must be positive' assert C.shape[0] == C.shape[1], 'C must be a square matrix.' from deeptime.markov.tools.estimation import is_connected assert is_connected(C, directed=True), 'C must be strongly connected' dtype = C.dtype if dtype not in (np.float32, np.float64, np.longdouble): dtype = np.float64 C_sum_py = C.sum(axis=1).A1 C_sum = C_sum_py.astype(dtype, order='C', copy=False) CCt = C + C.T # convert CCt to coo format CCt_coo = CCt.tocoo() n_data = CCt_coo.nnz CCt_data = CCt_coo.data.astype(dtype, order='C', copy=False) i_indices = CCt_coo.row.astype(int, order='C', copy=True) j_indices = CCt_coo.col.astype(int, order='C', copy=True) # prepare data array of T in coo format T_data = np.zeros(n_data, dtype=dtype, order='C') mu = np.zeros(C.shape[0], dtype=dtype, order='C') code = _bindings.mle_trev_sparse(T_data, CCt_data, i_indices, j_indices, n_data, C_sum, CCt.shape[0], maxerr, maxiter, mu, eps_mu) if code == -5 and warn_not_converged: warnings.warn( "Reversible transition matrix estimation with fixed stationary distribution didn't converge.", NotConvergedWarning) # T matrix has the same shape and positions of nonzero elements as CCt T = scipy.sparse.csr_matrix((T_data, (i_indices, j_indices)), shape=CCt.shape) from deeptime.markov.tools.estimation.sparse.transition_matrix import correct_transition_matrix T = correct_transition_matrix(T) if return_statdist: return T, mu else: return T
def mle_trev_given_pi(C, mu, maxerr=1.0E-12, maxiter=1000000, warn_not_converged=True): assert maxerr > 0, 'maxerr must be positive' assert maxiter > 0, 'maxiter must be positive' from deeptime.markov.tools.estimation import is_connected assert is_connected(C, directed=False), 'C must be (weakly) connected' dtype = C.dtype if dtype not in (np.float32, np.float64, np.longdouble): dtype = np.float64 c_mu = mu.astype(dtype, order='C', copy=False) CCt_coo = (C + C.T).tocoo() assert CCt_coo.shape[0] == CCt_coo.shape[1] == c_mu.shape[ 0], 'Dimensions of C and mu don\'t agree.' n_data = CCt_coo.nnz CCt_data = CCt_coo.data.astype(dtype, order='C', copy=False) i_indices = CCt_coo.row.astype(np.uint64, order='C', copy=False) j_indices = CCt_coo.col.astype(np.uint64, order='C', copy=False) # prepare data array of T in coo format T_unnormalized_data = np.zeros(n_data, dtype=dtype, order='C') code = _bindings.mle_trev_given_pi_sparse(T_unnormalized_data, CCt_data, i_indices, j_indices, n_data, c_mu, CCt_coo.shape[0], maxerr, maxiter) if code == -5 and warn_not_converged: warnings.warn( "Reversible transition matrix estimation with fixed stationary distribution didn't converge.", NotConvergedWarning) # unnormalized T matrix has the same shape and positions of nonzero elements as the C matrix T_unnormalized = scipy.sparse.csr_matrix( (T_unnormalized_data, (i_indices.copy(), j_indices.copy())), shape=CCt_coo.shape) # finish T by setting the diagonal elements according to the normalization constraint rowsum = T_unnormalized.sum(axis=1).A1 T_diagonal = scipy.sparse.diags(np.maximum(1.0 - rowsum, 0.0), 0) return T_unnormalized + T_diagonal
def setUpClass(cls): n_states = 50 traj_length = 10000 dtraj = np.zeros(traj_length, dtype=int) dtraj[::2] = np.random.randint(1, n_states, size=(traj_length - 1) // 2 + 1) c = count_matrix(dtraj, lag=1) while not is_connected(c, directed=True): dtraj = np.zeros(traj_length, dtype=int) dtraj[::2] = np.random.randint(1, n_states, size=(traj_length - 1) // 2 + 1) c = count_matrix(dtraj, lag=1) #state_counts = np.bincount(dtraj)[:,np.newaxis] ttraj = np.zeros(traj_length, dtype=int) btraj = np.zeros((traj_length, 1)) cls.tram_trajs = ([ttraj], [dtraj], [btraj]) cls.T_ref = transition_matrix(c, reversible=True).toarray()
def _estimate(self, dtrajs): """ Estimates the MSM """ # get trajectory counts. This sets _C_full and _nstates_full dtrajstats = self._get_dtraj_stats(dtrajs) self._C_full = dtrajstats.count_matrix() # full count matrix self._nstates_full = self._C_full.shape[0] # number of states # check for consistency between statdist constraints and core set if self.core_set is not None and self.statdist_constraint is not None: if len(self.core_set) != len(self.statdist_constraint): raise ValueError( 'Number of core sets and stationary distribution ' 'constraints do not match.') # rewrite statdist constraints to full set for compatibility reasons #TODO: find a more consistent way of dealing with this import copy _stdist_constr_coreset = copy.deepcopy(self.statdist_constraint) self.statdist_constraint = _np.zeros(self._nstates_full) self.statdist_constraint[self.core_set] = _stdist_constr_coreset # set active set. This is at the same time a mapping from active to full if self.connectivity == 'largest': if self.statdist_constraint is None: # statdist not given - full connectivity on all states self.active_set = dtrajstats.largest_connected_set else: active_set = self._prepare_input_revpi( self._C_full, self.statdist_constraint) self.active_set = active_set else: # for 'None' and 'all' all visited states are active self.active_set = dtrajstats.visited_set # FIXME: setting is_estimated before so that we can start using the parameters just set, but this is not clean! # is estimated self._is_estimated = True # if active set is empty, we can't do anything. if _np.size(self.active_set) == 0: raise RuntimeError('Active set is empty. Cannot estimate MSM.') # active count matrix and number of states self._C_active = dtrajstats.count_matrix(subset=self.active_set) # continue sparse or dense? if not self.sparse: # converting count matrices to arrays. As a result the # transition matrix and all subsequent properties will be # computed using dense arrays and dense matrix algebra. self._C_full = self._C_full.toarray() self._C_active = self._C_active.toarray() self._nstates = self._C_active.shape[0] # computed derived quantities # back-mapping from full to lcs self._full2active = -1 * _np.ones(dtrajstats.nstates, dtype=int) self._full2active[self.active_set] = _np.arange(len(self.active_set)) # restrict stationary distribution to active set if self.statdist_constraint is None: statdist_active = None else: statdist_active = self.statdist_constraint[self.active_set] statdist_active /= statdist_active.sum() # renormalize opt_args = {} # TODO: non-rev estimate of msmtools does not comply with its own api... if statdist_active is None and self.reversible: opt_args['return_statdist'] = True # Estimate transition matrix if self.connectivity == 'largest': P = transition_matrix(self._C_active, reversible=self.reversible, mu=statdist_active, maxiter=self.maxiter, maxerr=self.maxerr, **opt_args) elif self.connectivity == 'none': # reversible mode only possible if active set is connected # - in this case all visited states are connected and thus # this mode is identical to 'largest' if self.reversible and not is_connected(self._C_active): raise ValueError( 'Reversible MSM estimation is not possible with connectivity mode "none", ' 'because the set of all visited states is not reversibly connected' ) P = transition_matrix(self._C_active, reversible=self.reversible, mu=statdist_active, maxiter=self.maxiter, maxerr=self.maxerr, **opt_args) else: raise NotImplementedError( 'MSM estimation with connectivity=%s is currently not implemented.' % self.connectivity) # msmtools returns a tuple for statdist_active = None. if isinstance(P, tuple): P, statdist_active = P # Done. We set our own model parameters, so this estimator is # equal to the estimated model. self._connected_sets = dtrajstats.connected_sets self.set_model_params(P=P, pi=statdist_active, reversible=self.reversible, dt_model=self.timestep_traj.get_scaled(self.lag)) return self