def test_connected_count_matrix(self):
     """Directed"""
     is_c = is_connected(self.C_not_connected)
     self.assertFalse(is_c)
     is_c = is_connected(self.C_connected)
     self.assertTrue(is_c)
     """Undirected"""
     is_c = is_connected(self.C_not_connected, directed=False)
     self.assertTrue(is_c)
示例#2
0
def mle_trev(C,
             maxerr=1.0E-12,
             maxiter=int(1.0E6),
             warn_not_converged=True,
             return_statdist=False,
             eps_mu=1.0E-15):
    assert maxerr > 0, 'maxerr must be positive'
    assert maxiter > 0, 'maxiter must be positive'
    assert C.shape[0] == C.shape[1], 'C must be a square matrix.'
    from deeptime.markov.tools.estimation import is_connected
    assert is_connected(C, directed=True), 'C must be strongly connected'

    dtype = C.dtype
    if dtype not in (np.float32, np.float64, np.longdouble):
        dtype = np.float64

    C_sum_py = C.sum(axis=1).A1
    C_sum = C_sum_py.astype(dtype, order='C', copy=False)

    CCt = C + C.T
    # convert CCt to coo format
    CCt_coo = CCt.tocoo()
    n_data = CCt_coo.nnz
    CCt_data = CCt_coo.data.astype(dtype, order='C', copy=False)
    i_indices = CCt_coo.row.astype(int, order='C', copy=True)
    j_indices = CCt_coo.col.astype(int, order='C', copy=True)

    # prepare data array of T in coo format
    T_data = np.zeros(n_data, dtype=dtype, order='C')
    mu = np.zeros(C.shape[0], dtype=dtype, order='C')
    code = _bindings.mle_trev_sparse(T_data, CCt_data, i_indices, j_indices,
                                     n_data, C_sum, CCt.shape[0], maxerr,
                                     maxiter, mu, eps_mu)
    if code == -5 and warn_not_converged:
        warnings.warn(
            "Reversible transition matrix estimation with fixed stationary distribution didn't converge.",
            NotConvergedWarning)

    # T matrix has the same shape and positions of nonzero elements as CCt
    T = scipy.sparse.csr_matrix((T_data, (i_indices, j_indices)),
                                shape=CCt.shape)
    from deeptime.markov.tools.estimation.sparse.transition_matrix import correct_transition_matrix
    T = correct_transition_matrix(T)
    if return_statdist:
        return T, mu
    else:
        return T
示例#3
0
def mle_trev_given_pi(C,
                      mu,
                      maxerr=1.0E-12,
                      maxiter=1000000,
                      warn_not_converged=True):
    assert maxerr > 0, 'maxerr must be positive'
    assert maxiter > 0, 'maxiter must be positive'
    from deeptime.markov.tools.estimation import is_connected
    assert is_connected(C, directed=False), 'C must be (weakly) connected'
    dtype = C.dtype
    if dtype not in (np.float32, np.float64, np.longdouble):
        dtype = np.float64
    c_mu = mu.astype(dtype, order='C', copy=False)
    CCt_coo = (C + C.T).tocoo()
    assert CCt_coo.shape[0] == CCt_coo.shape[1] == c_mu.shape[
        0], 'Dimensions of C and mu don\'t agree.'
    n_data = CCt_coo.nnz
    CCt_data = CCt_coo.data.astype(dtype, order='C', copy=False)
    i_indices = CCt_coo.row.astype(np.uint64, order='C', copy=False)
    j_indices = CCt_coo.col.astype(np.uint64, order='C', copy=False)
    # prepare data array of T in coo format
    T_unnormalized_data = np.zeros(n_data, dtype=dtype, order='C')

    code = _bindings.mle_trev_given_pi_sparse(T_unnormalized_data, CCt_data,
                                              i_indices, j_indices, n_data,
                                              c_mu, CCt_coo.shape[0], maxerr,
                                              maxiter)

    if code == -5 and warn_not_converged:
        warnings.warn(
            "Reversible transition matrix estimation with fixed stationary distribution didn't converge.",
            NotConvergedWarning)

    # unnormalized T matrix has the same shape and positions of nonzero elements as the C matrix
    T_unnormalized = scipy.sparse.csr_matrix(
        (T_unnormalized_data, (i_indices.copy(), j_indices.copy())),
        shape=CCt_coo.shape)
    # finish T by setting the diagonal elements according to the normalization constraint
    rowsum = T_unnormalized.sum(axis=1).A1
    T_diagonal = scipy.sparse.diags(np.maximum(1.0 - rowsum, 0.0), 0)

    return T_unnormalized + T_diagonal
示例#4
0
    def setUpClass(cls):
        n_states = 50
        traj_length = 10000

        dtraj = np.zeros(traj_length, dtype=int)
        dtraj[::2] = np.random.randint(1,
                                       n_states,
                                       size=(traj_length - 1) // 2 + 1)

        c = count_matrix(dtraj, lag=1)
        while not is_connected(c, directed=True):
            dtraj = np.zeros(traj_length, dtype=int)
            dtraj[::2] = np.random.randint(1,
                                           n_states,
                                           size=(traj_length - 1) // 2 + 1)
            c = count_matrix(dtraj, lag=1)

        #state_counts = np.bincount(dtraj)[:,np.newaxis]
        ttraj = np.zeros(traj_length, dtype=int)
        btraj = np.zeros((traj_length, 1))
        cls.tram_trajs = ([ttraj], [dtraj], [btraj])

        cls.T_ref = transition_matrix(c, reversible=True).toarray()
示例#5
0
    def _estimate(self, dtrajs):
        """ Estimates the MSM """
        # get trajectory counts. This sets _C_full and _nstates_full
        dtrajstats = self._get_dtraj_stats(dtrajs)
        self._C_full = dtrajstats.count_matrix()  # full count matrix
        self._nstates_full = self._C_full.shape[0]  # number of states

        # check for consistency between statdist constraints and core set
        if self.core_set is not None and self.statdist_constraint is not None:
            if len(self.core_set) != len(self.statdist_constraint):
                raise ValueError(
                    'Number of core sets and stationary distribution '
                    'constraints do not match.')

            # rewrite statdist constraints to full set for compatibility reasons
            #TODO: find a more consistent way of dealing with this
            import copy
            _stdist_constr_coreset = copy.deepcopy(self.statdist_constraint)
            self.statdist_constraint = _np.zeros(self._nstates_full)
            self.statdist_constraint[self.core_set] = _stdist_constr_coreset

        # set active set. This is at the same time a mapping from active to full
        if self.connectivity == 'largest':
            if self.statdist_constraint is None:
                # statdist not given - full connectivity on all states
                self.active_set = dtrajstats.largest_connected_set
            else:
                active_set = self._prepare_input_revpi(
                    self._C_full, self.statdist_constraint)
                self.active_set = active_set
        else:
            # for 'None' and 'all' all visited states are active
            self.active_set = dtrajstats.visited_set

        # FIXME: setting is_estimated before so that we can start using the parameters just set, but this is not clean!
        # is estimated
        self._is_estimated = True

        # if active set is empty, we can't do anything.
        if _np.size(self.active_set) == 0:
            raise RuntimeError('Active set is empty. Cannot estimate MSM.')

        # active count matrix and number of states
        self._C_active = dtrajstats.count_matrix(subset=self.active_set)

        # continue sparse or dense?
        if not self.sparse:
            # converting count matrices to arrays. As a result the
            # transition matrix and all subsequent properties will be
            # computed using dense arrays and dense matrix algebra.
            self._C_full = self._C_full.toarray()
            self._C_active = self._C_active.toarray()

        self._nstates = self._C_active.shape[0]

        # computed derived quantities
        # back-mapping from full to lcs
        self._full2active = -1 * _np.ones(dtrajstats.nstates, dtype=int)
        self._full2active[self.active_set] = _np.arange(len(self.active_set))

        # restrict stationary distribution to active set
        if self.statdist_constraint is None:
            statdist_active = None
        else:
            statdist_active = self.statdist_constraint[self.active_set]
            statdist_active /= statdist_active.sum()  # renormalize

        opt_args = {}
        # TODO: non-rev estimate of msmtools does not comply with its own api...
        if statdist_active is None and self.reversible:
            opt_args['return_statdist'] = True

        # Estimate transition matrix
        if self.connectivity == 'largest':
            P = transition_matrix(self._C_active,
                                  reversible=self.reversible,
                                  mu=statdist_active,
                                  maxiter=self.maxiter,
                                  maxerr=self.maxerr,
                                  **opt_args)
        elif self.connectivity == 'none':
            # reversible mode only possible if active set is connected
            # - in this case all visited states are connected and thus
            # this mode is identical to 'largest'
            if self.reversible and not is_connected(self._C_active):
                raise ValueError(
                    'Reversible MSM estimation is not possible with connectivity mode "none", '
                    'because the set of all visited states is not reversibly connected'
                )
            P = transition_matrix(self._C_active,
                                  reversible=self.reversible,
                                  mu=statdist_active,
                                  maxiter=self.maxiter,
                                  maxerr=self.maxerr,
                                  **opt_args)
        else:
            raise NotImplementedError(
                'MSM estimation with connectivity=%s is currently not implemented.'
                % self.connectivity)

        # msmtools returns a tuple for statdist_active = None.
        if isinstance(P, tuple):
            P, statdist_active = P

        # Done. We set our own model parameters, so this estimator is
        # equal to the estimated model.
        self._connected_sets = dtrajstats.connected_sets
        self.set_model_params(P=P,
                              pi=statdist_active,
                              reversible=self.reversible,
                              dt_model=self.timestep_traj.get_scaled(self.lag))

        return self