Exemplo n.º 1
0
def test_msm_submodel_statdist(disconnected_states, lag, reversible,
                               count_mode):
    count_model = TransitionCountEstimator(
        lagtime=lag,
        count_mode=count_mode).fit(disconnected_states.dtrajs).fetch_model()

    for cset in count_model.connected_sets():
        submodel = count_model.submodel(cset)
        estimator = MaximumLikelihoodMSM(reversible=reversible).fit(submodel)
        msm = estimator.fetch_model()
        C = submodel.count_matrix
        P = C / np.sum(C, axis=-1)[:, None]

        import scipy.linalg as salg
        eigval, eigvec = salg.eig(P, left=True, right=False)

        pi = np.real(eigvec)[:, np.where(
            np.real(eigval) > 1. - 1e-3)[0]].squeeze()
        if np.any(pi < 0):
            pi *= -1.
        pi = pi / np.sum(pi)
        assert_array_almost_equal(msm.stationary_distribution,
                                  pi,
                                  decimal=1,
                                  err_msg="Failed for cset {} with "
                                  "cmat {}".format(cset,
                                                   submodel.count_matrix))
Exemplo n.º 2
0
def test_weakly_connected_count_matrix():
    count_matrix = np.array([[10, 1, 0, 0], [0, 1, 1, 0], [0, 1, 1, 1], [0, 0, 0, 1]], dtype=np.float32)
    assert_equal(MaximumLikelihoodMSM().fit(count_matrix).fetch_model().n_connected_msms, 3,
                 err_msg="Count matrix not strongly connected, should decay into three sets.")
    # count matrix weakly connected, this should work
    msm = MaximumLikelihoodMSM(reversible=False).fit(count_matrix).fetch_model()
    assert_equal(msm.reversible, False)
    assert_equal(msm.n_states, 4)
    assert_equal(msm.lagtime, 1)
    assert_(msm.count_model is not None)
    assert_equal(msm.count_model.count_matrix, count_matrix)
    # last state is sink state
    assert_equal(msm.stationary_distribution, [0, 0, 0, 1])
    assert_array_almost_equal(msm.transition_matrix,
                              [[10. / 11, 1. / 11, 0, 0],
                               [0, 0.5, 0.5, 0],
                               [0, 1. / 3, 1. / 3, 1. / 3],
                               [0, 0, 0, 1]])
    assert_equal(msm.n_eigenvalues, 4)
    assert_equal(msm.sparse, False)

    msm = msm.submodel(np.array([1, 2]))
    assert_equal(msm.reversible, False)
    assert_equal(msm.n_states, 2)
    assert_equal(msm.count_model.state_symbols, [1, 2])
    assert_equal(msm.lagtime, 1)
    assert_equal(msm.count_model.count_matrix, [[1, 1], [1, 1]])
    assert_equal(msm.stationary_distribution, [0.5, 0.5])
    assert_array_almost_equal(msm.transition_matrix, [[0.5, 0.5], [0.5, 0.5]])
    assert_equal(msm.n_eigenvalues, 2)
    assert_equal(msm.sparse, False)
Exemplo n.º 3
0
def msm_double_well(lagtime=100,
                    reversible=True,
                    **kwargs) -> MaximumLikelihoodMSM:
    count_model = TransitionCountEstimator(lagtime=lagtime, count_mode="sliding")\
        .fit(datasets.double_well_discrete().dtraj).fetch_model().submodel_largest()
    est = MaximumLikelihoodMSM(reversible=reversible, **kwargs)
    est.fit(count_model)
    return est
Exemplo n.º 4
0
def test_recover_timescale():
    trajs = double_well_discrete().simulate_trajectories(n_trajectories=100,
                                                         n_steps=50000)
    ts = double_well_discrete().analytic_msm.timescales(1)[0]
    counts = TransitionCountEstimator(1, 'sliding').fit(trajs).fetch_model()
    msm = MaximumLikelihoodMSM().fit(counts.submodel_largest()).fetch_model()
    ts_rec = msm.timescales(1)[0]
    np.testing.assert_(np.abs(ts - ts_rec) <= 200.)
Exemplo n.º 5
0
def estimate_markov_model(dtrajs, lag, **kw) -> MarkovStateModel:
    statdist_constraint = kw.pop('statdist', None)
    connectivity = kw.pop('connectivity_threshold', 0.)
    sparse = kw.pop('sparse', False)
    count_model = TransitionCountEstimator(lagtime=lag, count_mode="sliding", sparse=sparse).fit(dtrajs).fetch_model()
    count_model = count_model.submodel_largest(probability_constraint=statdist_constraint,
                                               connectivity_threshold=connectivity)
    est = MaximumLikelihoodMSM(stationary_distribution_constraint=statdist_constraint, sparse=sparse, **kw)
    est.fit(count_model)
    return est.fetch_model()
Exemplo n.º 6
0
def test_reversible_disconnected(disconnected_states, lag, count_mode):
    r"""disconnected states: 2 <- 0 <-> 1 <-> 3 | 7 -> 4 <-> 5 | 6"""
    count_model = TransitionCountEstimator(lagtime=lag, count_mode=count_mode) \
        .fit(disconnected_states.dtrajs).fetch_model()

    msm = MaximumLikelihoodMSM(reversible=True).fit(count_model).fetch_model()
    assert_equal(msm.n_connected_msms, len(disconnected_states.connected_sets))
    for i, subset in enumerate(disconnected_states.connected_sets):
        # can do this because subsets are ordered in decreasing cardinality
        assert_equal(msm.state_symbols(i), subset)

    non_reversibly_connected_set = [0, 1, 2, 3]
    submodel = count_model.submodel(non_reversibly_connected_set)

    msm = MaximumLikelihoodMSM(reversible=True).fit(submodel).fetch_model()
    assert_equal(msm.n_connected_msms, 2)
    assert_equal(msm.state_symbols(0), [0, 1, 3])
    assert_equal(msm.state_symbols(1), [2])

    fully_disconnected_set = [6, 2]
    submodel = count_model.submodel(fully_disconnected_set)
    msm = MaximumLikelihoodMSM(reversible=True).fit(submodel).fetch_model()
    assert_equal(msm.n_connected_msms, 2)
    assert_equal(msm.state_symbols(0), [6])
    assert_equal(msm.state_symbols(1), [2])
Exemplo n.º 7
0
def test_empirical_vs_ground_truth_koopman_model():
    bdc = BirthDeathChain([0, .5, .5], [.5, .5, 0.])
    dtraj = bdc.msm.simulate(10000)
    est = MaximumLikelihoodMSM(
        reversible=True,
        stationary_distribution_constraint=bdc.stationary_distribution,
        lagtime=1)
    msm_ref = est.fit_fetch(dtraj)
    assert_almost_equal(bdc.msm.koopman_model.score(r=2),
                        msm_ref.score(r=2),
                        decimal=2)
Exemplo n.º 8
0
def test_estimator_params(reversible, statdist, sparse, maxiter, maxerr):
    if statdist is not None and (np.any(statdist > 1) or np.any(statdist < 0)):
        with assert_raises(ValueError):
            MaximumLikelihoodMSM(reversible=reversible, stationary_distribution_constraint=statdist,
                                 sparse=sparse, maxiter=maxiter, maxerr=maxerr)
    else:
        msm = MaximumLikelihoodMSM(reversible=reversible, stationary_distribution_constraint=statdist,
                                   sparse=sparse, maxiter=maxiter, maxerr=maxerr)
        assert_equal(msm.reversible, reversible)
        assert_equal(msm.stationary_distribution_constraint,
                     statdist / np.sum(statdist) if statdist is not None else None)
        assert_equal(msm.sparse, sparse)
        assert_equal(msm.maxiter, maxiter)
        assert_equal(msm.maxerr, maxerr)
Exemplo n.º 9
0
def compute_effective_stride(dtrajs, lagtime, n_states) -> int:
    r"""
    Computes the effective stride which is an estimate of the striding required to produce uncorrelated samples.
    By default this is the lagtime (lag sampling). A nonreversible MSM is estimated, if its number of states is larger
    than the number of states provided to this method, stride is set to the minimum of lagtime and two times the
    correlation time of the next neglected timescale.

    Parameters
    ----------
    dtrajs : array_like or list of array_like
        Discretized trajectory or list of discretized trajectories
    lagtime : int
        Lagtime
    n_states : int
        Number of resolved states

    Returns
    -------
    stride : int
        Estimated effective stride to produce approximately uncorrelated samples
    """
    from deeptime.util.types import ensure_dtraj_list
    dtrajs = ensure_dtraj_list(dtrajs)
    # by default use lag as stride (=lag sampling), because we currently have no better theory for deciding
    # how many uncorrelated counts we can make
    stride = lagtime
    # get a quick fit from the spectral radius of the non-reversible
    from deeptime.markov import TransitionCountEstimator
    count_model = TransitionCountEstimator(
        lagtime=lagtime, count_mode="sliding").fit(dtrajs).fetch_model()
    count_model = count_model.submodel_largest()
    from deeptime.markov.msm import MaximumLikelihoodMSM
    msm_non_rev = MaximumLikelihoodMSM(
        reversible=False, sparse=False).fit(count_model).fetch_model()
    # if we have more than n_states timescales in our MSM, we use the next (neglected) timescale as an
    # fit of the de-correlation time
    if msm_non_rev.n_states > n_states:
        # because we use non-reversible msm, we want to silence the ImaginaryEigenvalueWarning
        import warnings
        with warnings.catch_warnings():
            from deeptime.util.exceptions import ImaginaryEigenValueWarning
            warnings.filterwarnings('ignore',
                                    category=ImaginaryEigenValueWarning)
            correlation_time = max(1, msm_non_rev.timescales()[n_states - 1])
        # use the smaller of these two pessimistic estimates
        stride = int(min(lagtime, 2 * correlation_time))

    return stride
Exemplo n.º 10
0
def test_estimator(fixed_seed):
    data = deeptime.data.ellipsoids()
    obs = data.observations(6000, n_dim=10).astype(np.float32)

    # set up the lobe
    lobe = nn.Sequential(nn.Linear(10, 1), nn.Tanh())
    # train the lobe
    opt = torch.optim.Adam(lobe.parameters(), lr=1e-2)
    for _ in range(50):
        for X, Y in deeptime.util.data.timeshifted_split(obs, lagtime=1, chunksize=512):
            opt.zero_grad()
            lval = vampnet_loss(lobe(torch.from_numpy(X)), lobe(torch.from_numpy(Y)))
            lval.backward()
            opt.step()

    # now let's compare
    lobe.eval()
    ds = TrajectoryDataset(1, obs)
    loader = DataLoader(ds, batch_size=512)
    loader_val = DataLoader(ds, batch_size=512)
    vampnet = VAMPNet(lobe=lobe)
    vampnet_model = vampnet.fit(loader, validation_loader=loader_val).fetch_model()
    assert_(len(vampnet.train_scores) > 0)
    assert_(len(vampnet.validation_scores) > 0)
    # reference model w/o learnt featurization
    projection = VAMP(lagtime=1, observable_transform=vampnet_model).fit(obs).transform(obs, propagate=True)

    dtraj = KMeans(2).fit(projection).transform(projection)
    msm_vampnet = MaximumLikelihoodMSM().fit(dtraj, lagtime=1).fetch_model()

    np.testing.assert_array_almost_equal(msm_vampnet.transition_matrix, data.msm.transition_matrix, decimal=2)
Exemplo n.º 11
0
def test_estimator_fit(dtype):
    data = deeptime.data.ellipsoids()
    obs = data.observations(60000, n_dim=2).astype(dtype)
    train, val = torch.utils.data.random_split(deeptime.data.TimeLaggedDataset.from_trajectory(1, obs), [50000, 9999])

    # set up the lobe
    linear_layer = nn.Linear(2, 1)
    lobe = nn.Sequential(linear_layer, nn.Tanh())

    with torch.no_grad():
        linear_layer.weight[0, 0] = -0.3030
        linear_layer.weight[0, 1] = 0.3060
        linear_layer.bias[0] = -0.7392

    net = VAMPNet(lobe=lobe, dtype=dtype, learning_rate=1e-8)
    train_loader = create_timelagged_data_loader(train, lagtime=1, batch_size=512)
    val_loader = create_timelagged_data_loader(val, lagtime=1, batch_size=512)
    net.fit(train_loader, n_epochs=1, validation_data=val_loader, validation_score_callback=lambda *x: x)
    projection = net.transform(obs)

    # reference model w/o learnt featurization
    projection = VAMP(lagtime=1).fit(projection).fetch_model().transform(projection)

    dtraj = Kmeans(2).fit(projection).transform(projection)
    msm_vampnet = MaximumLikelihoodMSM().fit(dtraj, lagtime=1).fetch_model()

    np.testing.assert_array_almost_equal(msm_vampnet.transition_matrix, data.msm.transition_matrix, decimal=2)
Exemplo n.º 12
0
def test_estimator():
    data = deeptime.data.ellipsoids()
    obs = data.observations(60000, n_dim=10).astype(np.float32)

    # set up the lobe
    lobe = nn.Sequential(nn.Linear(10, 1), nn.Tanh())
    # train the lobe
    opt = torch.optim.Adam(lobe.parameters(), lr=5e-4)
    for _ in range(50):
        for X, Y in deeptime.data.timeshifted_split(obs, lagtime=1, chunksize=512):
            opt.zero_grad()
            lval = loss(lobe(torch.from_numpy(X)), lobe(torch.from_numpy(Y)))
            lval.backward()
            opt.step()

    # now let's compare
    lobe.eval()
    loader = create_timelagged_data_loader(obs, lagtime=1, batch_size=512)
    vampnet = VAMPNet(lobe=lobe)
    vampnet_model = vampnet.fit(loader).fetch_model()
    # np.testing.assert_array_less(vamp_model.timescales()[0], vampnet_model.timescales()[0])

    projection = vampnet_model.transform(obs)
    # reference model w/o learnt featurization
    projection = VAMP(lagtime=1).fit(projection).fetch_model().transform(projection)

    dtraj = Kmeans(2).fit(projection).transform(projection)
    msm_vampnet = MaximumLikelihoodMSM().fit(dtraj, lagtime=1).fetch_model()

    np.testing.assert_array_almost_equal(msm_vampnet.transition_matrix, data.msm.transition_matrix, decimal=2)
Exemplo n.º 13
0
def test_nonreversible_disconnected():
    msm1 = MarkovStateModel([[.7, .3], [.3, .7]])
    msm2 = MarkovStateModel([[.9, .05, .05], [.3, .6, .1], [.1, .1, .8]])
    traj = np.concatenate([msm1.simulate(1000000), 2 + msm2.simulate(1000000)])
    counts = TransitionCountEstimator(lagtime=1, count_mode="sliding").fit(traj)
    msm = MaximumLikelihoodMSM(reversible=True).fit(counts).fetch_model()
    assert_equal(msm.transition_matrix.shape, (3, 3))
    assert_equal(msm.stationary_distribution.shape, (3,))
    assert_equal(msm.state_symbols(), [2, 3, 4])
    assert_equal(msm.state_symbols(1), [0, 1])
    msm.select(1)
    assert_equal(msm.transition_matrix.shape, (2, 2))
    assert_equal(msm.stationary_distribution.shape, (2,))
    assert_equal(msm.state_symbols(), [0, 1])
    assert_equal(msm.state_symbols(0), [2, 3, 4])
    with assert_raises(IndexError):
        msm.select(2)
Exemplo n.º 14
0
 def __init__(self,
              reversible,
              statdist_constraint,
              sparse,
              count_mode="sliding"):
     super().__init__(statdist_constraint=statdist_constraint,
                      sparse=sparse,
                      count_mode=count_mode)
     maxerr = 1e-12
     if statdist_constraint:
         est = MaximumLikelihoodMSM(reversible=reversible,
                                    maxerr=maxerr,
                                    stationary_distribution_constraint=self.
                                    stationary_distribution,
                                    sparse=sparse)
     else:
         est = MaximumLikelihoodMSM(reversible=reversible,
                                    maxerr=maxerr,
                                    sparse=sparse)
     est.fit(self.counts)
     self._msm = est.fetch_model()
     self._msm_estimator = est
     self._expectation = 31.73
     if not reversible:
         self._timescales = np.array([310.49376926, 8.48302712, 5.02649564])
     else:
         self._timescales = np.array([310.87, 8.5, 5.09])
Exemplo n.º 15
0
def test_invalid_arguments():
    with assert_raises(ValueError):
        # negative counts
        MaximumLikelihoodMSM().fit(-1 * np.ones((5, 5))).fetch_model()
    with assert_raises(ValueError):
        # non quadratic count matrix
        MaximumLikelihoodMSM().fit(np.ones((3, 5))).fetch_model()
    with assert_raises(ValueError):
        # stationary distribution not over whole state space
        MaximumLikelihoodMSM(stationary_distribution_constraint=np.array([1 / 3, 1 / 3, 1 / 3])).fit(np.ones((5, 5)))
    with assert_raises(ValueError):
        # no counts but statdist constraint
        MaximumLikelihoodMSM(stationary_distribution_constraint=np.array([.5, .5])).fit(np.zeros((2, 2)))
    with assert_raises(ValueError):
        # fit with transition count estimator that hasn't been fit
        MaximumLikelihoodMSM().fit(TransitionCountEstimator(1, "sliding"))
    with assert_raises(ValueError):
        # fit with bogus object
        MaximumLikelihoodMSM().fit(object())
    with assert_raises(ValueError):
        # fit from timeseries without lagtime
        MaximumLikelihoodMSM().fit(np.array([0, 1, 2, 3, 4, 5, 6]))
    with assert_raises(ValueError):
        # empty collection is not allowed
        MarkovStateModelCollection([], [], False, [], 1.)
    with assert_raises(ValueError):
        # number of elements in lists must match
        MarkovStateModelCollection([np.array([[.5, .5], [.5, .5]])], [], False, [], 1.)
    with assert_raises(ValueError):
        # number of states in lists must match
        MarkovStateModelCollection([np.array([[.5, .5], [.5, .5]])], [None], False,
                                   [TransitionCountModel(np.ones((3, 3)))], 1.)
Exemplo n.º 16
0
    def test_mlmsm_pipeline(self):
        file = mdshare.fetch('hmm-doublewell-2d-100k.npz', working_directory='data')

        with np.load(file) as fh:
            data = fh['trajectory']
            transition_matrix = fh['transition_matrix']

        pipeline = Pipeline(steps=[
            ('tica', TICA(dim=1, lagtime=1)),
            ('cluster', KMeans(n_clusters=2, max_iter=500)),
            ('counts', TransitionCountEstimator(lagtime=1, count_mode="sliding"))
        ])
        pipeline.fit(data)
        counts = pipeline[-1].fetch_model().submodel_largest()
        mlmsm = MaximumLikelihoodMSM().fit(counts).fetch_model()
        P = mlmsm.pcca(2).coarse_grained_transition_matrix
        mindist = min(np.linalg.norm(P - transition_matrix), np.linalg.norm(P - transition_matrix.T))
        assert mindist < 0.05
Exemplo n.º 17
0
def test_msm_invalid_statdist_constraint(disconnected_states, lagtime, reversible, count_mode):
    pi = np.ones(4) / 4.
    count_model = TransitionCountEstimator(lagtime=lagtime, count_mode=count_mode) \
        .fit(disconnected_states.dtrajs).fetch_model()
    for cset in count_model.connected_sets():
        submodel = count_model.submodel(cset)

        with assert_raises(ValueError):
            MaximumLikelihoodMSM(reversible=reversible, stationary_distribution_constraint=pi).fit(submodel)
Exemplo n.º 18
0
def test_fit_with_invalid_args():
    estimator_without_model = TransitionCountEstimator(lagtime=1,
                                                       count_mode='sliding')

    with assert_raises(ValueError):
        MaximumLikelihoodMSM().fit(estimator_without_model)
    with assert_raises(ValueError):
        MaximumLikelihoodMSM().fit_from_counts(estimator_without_model)
    with assert_raises(ValueError):

        class Bogus:
            pass

        MaximumLikelihoodMSM().fit(Bogus())
    with assert_raises(ValueError):

        class Bogus:
            pass

        MaximumLikelihoodMSM().fit_from_counts(Bogus())
Exemplo n.º 19
0
 def test_mlmsm_pipeline(self):
     hmm = HiddenMarkovModel(transition_model=MarkovStateModel([[.8, .2],
                                                                [.1, .9]]),
                             output_model=GaussianOutputModel(
                                 n_states=2,
                                 means=[-10, 10],
                                 sigmas=[.1, .1]))
     htraj, traj = hmm.simulate(10000)
     transition_matrix = hmm.transition_model.transition_matrix
     pipeline = Pipeline(steps=[(
         'tica', TICA(dim=1, lagtime=1)
     ), (
         'cluster', KMeans(n_clusters=2, max_iter=500)
     ), ('counts',
         TransitionCountEstimator(lagtime=1, count_mode="sliding"))])
     pipeline.fit(traj[..., None])
     counts = pipeline[-1].fetch_model().submodel_largest()
     mlmsm = MaximumLikelihoodMSM().fit(counts).fetch_model()
     P = mlmsm.pcca(2).coarse_grained_transition_matrix
     mindist = min(np.linalg.norm(P - transition_matrix),
                   np.linalg.norm(P - transition_matrix.T))
     assert mindist < 0.05
Exemplo n.º 20
0
def test_strongly_connected_count_matrix():
    # transitions 6->1->2->3->4->6, disconnected are 0 and 5
    dtraj = np.array([0, 6, 1, 2, 3, 4, 6, 5])
    counts = TransitionCountEstimator(lagtime=1, count_mode="sliding").fit(dtraj).fetch_model()
    assert_equal(counts.n_states, 7)
    sets = counts.connected_sets(directed=True)
    assert_equal(len(sets), 3)
    assert_equal(len(sets[0]), 5)
    with assert_raises(BaseException, msg="count matrix not strongly connected, expected failure in rev. case"):
        MaximumLikelihoodMSM().fit(counts)
    counts = counts.submodel_largest(directed=True)  # now we are strongly connected
    # due to reversible we get 6<->1<->2<->3<->4<->6
    msm = MaximumLikelihoodMSM(reversible=True).fit(counts).fetch_model()
    # check that the msm has symbols 1,2,3,4,6
    assert_(np.all([i in msm.count_model.state_symbols for i in [1, 2, 3, 4, 6]]))
    assert_equal(msm.reversible, True)
    assert_equal(msm.n_states, 5)
    assert_equal(msm.lagtime, 1)
    assert_array_almost_equal(msm.transition_matrix, [
        [0., .5, 0., 0., .5],
        [.5, 0., .5, 0., 0.],
        [0., .5, 0., .5, 0.],
        [0., 0., .5, 0., .5],
        [.5, 0., 0., .5, 0.]
    ])
    assert_array_almost_equal(msm.stationary_distribution, [1. / 5] * 5)
    assert_equal(msm.n_eigenvalues, 5)
    assert_equal(msm.sparse, False)

    msm = msm.submodel(np.array([3, 4]))  # states 3 and 4 correspond to symbols 4 and 6
    assert_equal(msm.reversible, True)
    assert_equal(msm.n_states, 2)
    assert_equal(msm.lagtime, 1)
    assert_array_almost_equal(msm.transition_matrix, [[0, 1.], [1., 0]])
    assert_array_almost_equal(msm.stationary_distribution, [0.5, 0.5])
    assert_equal(msm.n_eigenvalues, 2)
    assert_equal(msm.sparse, False)
    assert_equal(msm.count_model.state_symbols, [4, 6])
Exemplo n.º 21
0
    def fit(self, data, callback: Callable = None):
        """
        Performs the estimation on either a count matrix or a previously estimated TransitionCountModel.

        Parameters
        ----------
        data : (N,N) count matrix or TransitionCountModel or MaximumLikelihoodMSM or MarkovStateModel
            a count matrix or a transition count model that was estimated from data

        callback: callable, optional, default=None
            Function to be called to indicate progress of sampling.

        Returns
        -------
        self : BayesianMSM
            Reference to self.
        """
        from deeptime.markov import TransitionCountModel
        if isinstance(data, TransitionCountModel) and data.counting_mode is not None \
                and "effective" not in data.counting_mode:
            raise ValueError(
                "The transition count model was not estimated using an effective counting method, "
                "therefore counts are likely to be strongly correlated yielding wrong confidences."
            )

        if isinstance(data, Estimator):
            if data.has_model:
                data = data.fetch_model()
            else:
                raise ValueError(
                    "Can only use estimators as input if they have been fit previously."
                )

        if isinstance(data, TransitionCountModel) or is_square_matrix(data):
            msm = MaximumLikelihoodMSM(
                reversible=self.reversible,
                stationary_distribution_constraint=self.
                stationary_distribution_constraint,
                sparse=self.sparse,
                maxiter=self.maxiter,
                maxerr=self.maxerr).fit(data).fetch_model()
        elif isinstance(data, MarkovStateModel):
            msm = data
        else:
            raise ValueError(
                "Unsupported input data, can only be count matrix (or TransitionCountModel, "
                "TransitionCountEstimator) or a MarkovStateModel instance or an estimator producing "
                "Markov state models.")

        return self.fit_from_msm(msm, callback=callback)
Exemplo n.º 22
0
def metastable_from_data(dtrajs,
                         n_hidden_states,
                         lagtime,
                         stride=1,
                         mode='largest-regularized',
                         reversible: bool = True,
                         stationary: bool = False,
                         separate_symbols=None,
                         states: Optional[np.ndarray] = None,
                         regularize: bool = True,
                         connectivity_threshold: Union[str, float] = 0.):
    r"""Estimates an initial guess :class:`HMM <deeptime.markov.hmm.HiddenMarkovModel>` from given
    discrete trajectories.

    Following the procedure described in :footcite:`noe2013projected`: First
    a :class:`MSM <deeptime.markov.msm.MarkovStateModel>` is estimated, which is then subsequently
    coarse-grained with PCCA+ :footcite:`roblitz2013fuzzy`. After estimation of the MSM, this
    method calls :meth:`metastable_from_msm`.

    Parameters
    ----------
    dtrajs : array_like or list of array_like
        A discrete trajectory or a list of discrete trajectories.
    n_hidden_states : int
        Number of hidden states.
    lagtime : int
        The lagtime at which transitions are counted.
    stride : int or str, optional, default=1
        stride between two lagged trajectories extracted from the input trajectories. Given trajectory :code:`s[t]`,
        stride and lag will result in trajectories

            :code:`s[0], s[lag], s[2 lag], ...`

            :code:`s[stride], s[stride + lag], s[stride + 2 lag], ...`

        Setting stride = 1 will result in using all data (useful for maximum likelihood estimator), while a Bayesian
        estimator requires a longer stride in order to have statistically uncorrelated trajectories. Setting
        :code:`stride='effective'` uses the largest neglected timescale as an estimate for the correlation time
        and sets the stride accordingly.
    mode : str, optional, default='largest-regularized'
        The mode at which the markov state model is estimated. Since the process is assumed to be reversible and
        finite statistics might lead to unconnected regions in state space, a subselection can automatically be made
        and the count matrix can be regularized. The following options are available:

        * 'all': all available states are taken into account
        * 'largest': the largest connected state set is selected, see
          :meth:`TransitionCountModel.submodel_largest <deeptime.markov.TransitionCountModel.submodel_largest>`.
        * populus: the connected set with the largest population in the data, see
          :meth:`TransitionCountModel.submodel_largest <deeptime.markov.TransitionCountModel.submodel_largest>`.

        For regularization, each of the options can be suffixed by a '-regularized', e.g., 'largest-regularized'.
        This means that the count matrix has no zero entries and everything is reversibly connected. In particular,
        a prior of the form

        .. math:: b_{ij}=\left \{ \begin{array}{rl}
                     \alpha & \text{, if }c_{ij}+c_{ji}>0, \\
                     0      & \text{, otherwise,}
                     \end{array} \right .

        with :math:`\alpha=10^{-3}` is added and all non-reversibly connected components are artifically connected
        by adding backward paths.
    reversible : bool, optional, default=True
        Whether the HMM transition matrix is estimated so that it is reversibe.
    stationary : bool, optional, default=False
        If True, the initial distribution of hidden states is self-consistently computed as the stationary
        distribution of the transition matrix. If False, it will be estimated from the starting states.
        Only set this to true if you're sure that the observation trajectories are initiated from a global
        equilibrium distribution.
    separate_symbols : array_like, optional, default=None
        Force the given set of observed states to stay in a separate hidden state.
        The remaining nstates-1 states will be assigned by a metastable decomposition.
    states : (dtype=int) ndarray, optional, default=None
        Artifically restrict count model to selection of states, even before regularization.
    regularize : bool, optional, default=True
        If set to True, makes sure that the hidden initial distribution and transition matrix have nonzero probabilities
        by setting them to eps and then renormalizing. Avoids zeros that would cause estimation algorithms to crash or
        get stuck in suboptimal states.
    connectivity_threshold : float or '1/n', optional, default=0.
        Connectivity threshold. counts that are below the specified value are disregarded when finding connected
        sets. In case of '1/n', the threshold gets resolved to :math:`1 / \mathrm{n\_states\_full}`.

    Returns
    -------
    hmm_init : HiddenMarkovModel
        An initial guess for the HMM

    See Also
    --------
    DiscreteOutputModel
        The type of output model this heuristic uses.

    :func:`metastable_from_msm`
        Initial guess from an already existing :class:`MSM <deeptime.markov.msm.MarkovStateModel>`.

    :func:`deeptime.markov.hmm.init.gaussian.from_data`
        Initial guess with :class:`Gaussian output model <deeptime.markov.hmm.GaussianOutputModel>`.


    References
    ----------
    .. footbibliography::
    """
    if mode not in metastable_from_data.VALID_MODES \
            + [m + "-regularized" for m in metastable_from_data.VALID_MODES]:
        raise ValueError("mode can only be one of [{}]".format(", ".join(
            metastable_from_data.VALID_MODES)))

    from deeptime.markov import compute_dtrajs_effective, TransitionCountEstimator

    dtrajs = ensure_dtraj_list(dtrajs)
    dtrajs = compute_dtrajs_effective(dtrajs,
                                      lagtime=lagtime,
                                      n_states=n_hidden_states,
                                      stride=stride)
    counts = TransitionCountEstimator(1, 'sliding',
                                      sparse=False).fit(dtrajs).fetch_model()
    if states is not None:
        counts = counts.submodel(states)
    if '-regularized' in mode:
        import deeptime.markov.tools.estimation as memest
        counts.count_matrix[...] += memest.prior_neighbor(
            counts.count_matrix, 0.001)
        nonempty = np.where(
            counts.count_matrix.sum(axis=0) +
            counts.count_matrix.sum(axis=1) > 0)[0]
        counts.count_matrix[nonempty, nonempty] = np.maximum(
            counts.count_matrix[nonempty, nonempty], 0.001)
    if 'all' in mode:
        pass  # no-op
    if 'largest' in mode:
        counts = counts.submodel_largest(
            directed=True,
            connectivity_threshold=connectivity_threshold,
            sort_by_population=False)
    if 'populous' in mode:
        counts = counts.submodel_largest(
            directed=True,
            connectivity_threshold=connectivity_threshold,
            sort_by_population=True)
    from deeptime.markov.msm import MaximumLikelihoodMSM
    msm = MaximumLikelihoodMSM(reversible=True,
                               allow_disconnected=True,
                               maxerr=1e-3,
                               maxiter=10000).fit(counts).fetch_model()
    return metastable_from_msm(msm, n_hidden_states, reversible, stationary,
                               separate_symbols, regularize)
Exemplo n.º 23
0
def test_sanity():
    dtraj, traj = swissroll_model(100000)
    msm = MaximumLikelihoodMSM(lagtime=1).fit(dtraj).fetch_model()
    assert_almost_equal(msm.transition_matrix,
                        swissroll_model.transition_matrix,
                        decimal=2)