Exemplo n.º 1
0
def ChapmanKolmogorovTest(assignments,
                          klist=[1, 2, 3, 4, 5],
                          lagtime=50,
                          states=None):
    msm = MarkovStateModel(lag_time=lagtime, n_timescales=10)
    msm.fit(assignments)
    p_tau = msm.populations_
    T_tau = msm.transmat_
    mapping_tau = msm.mapping_

    prob_tau_all = []
    prob_ktau_all = []

    if states == "all" or states is None:
        states = range(len(p_tau))

    for k in klist:
        lagtime_long = k * lagtime
        print "long lagtime:", lagtime_long
        msm = MarkovStateModel(lag_time=lagtime_long, n_timescales=10)
        msm.fit(assignments)
        p_ktau = msm.populations_
        T_ktau = msm.transmat_
        mapping_ktau = msm.mapping_
        probability_tau, probability_ktau = CalculateStatesProbability(
            T_tau, T_ktau, p_tau, p_ktau, mapping_tau, mapping_ktau, k, states)

        prob_tau_all.append(probability_tau)
        prob_ktau_all.append(probability_ktau)

    return prob_tau_all, prob_ktau_all
Exemplo n.º 2
0
def test_counts_no_trim():
    # test counts matrix without trimming
    model = MarkovStateModel(reversible_type=None, ergodic_cutoff=0)

    model.fit([[1, 1, 1, 1, 1, 1, 1, 1, 1]])
    eq(model.countsmat_, np.array([[8.0]]))
    eq(model.mapping_, {1: 0})
Exemplo n.º 3
0
def test_1():
    # test counts matrix without trimming
    model = MarkovStateModel(reversible_type=None, ergodic_cutoff=0)

    model.fit([[1, 1, 1, 1, 1, 1, 1, 1, 1]])
    eq(model.countsmat_, np.array([[8.0]]))
    eq(model.mapping_, {1: 0})
Exemplo n.º 4
0
def test_13():
    model = MarkovStateModel(n_timescales=2)
    model.fit([[0, 0, 0, 1, 2, 1, 0, 0, 0, 1, 3, 3, 3, 1, 1, 2, 2, 0, 0]])
    left_right = np.dot(model.left_eigenvectors_.T, model.right_eigenvectors_)

    # check biorthonormal
    np.testing.assert_array_almost_equal(
        left_right,
        np.eye(3))

    # check that the stationary left eigenvector is normalized to be 1
    np.testing.assert_almost_equal(model.left_eigenvectors_[:, 0].sum(), 1)

    # the left eigenvectors satisfy <\phi_i, \phi_i>_{\mu^{-1}} = 1
    for i in range(3):
        np.testing.assert_almost_equal(
            np.dot(model.left_eigenvectors_[:, i],
                   model.left_eigenvectors_[:, i] / model.populations_), 1)

    # and that the right eigenvectors satisfy  <\psi_i, \psi_i>_{\mu} = 1
    for i in range(3):
        np.testing.assert_almost_equal(
            np.dot(model.right_eigenvectors_[:, i],
                   model.right_eigenvectors_[:, i] *
                   model.populations_), 1)
Exemplo n.º 5
0
def test_both():
    sequences = [np.random.randint(20, size=1000) for _ in range(10)]
    lag_times = [1, 5, 10]

    models_ref = []
    for tau in lag_times:
        msm = MarkovStateModel(reversible_type='mle', lag_time=tau,
                               n_timescales=10)
        msm.fit(sequences)
        models_ref.append(msm)

    timescales_ref = [m.timescales_ for m in models_ref]

    model = MarkovStateModel(reversible_type='mle', lag_time=1, n_timescales=10)
    models = param_sweep(model, sequences, {'lag_time': lag_times}, n_jobs=2)
    timescales = implied_timescales(sequences, lag_times, msm=model,
                                    n_timescales=10, n_jobs=2)

    print(timescales)
    print(timescales_ref)

    if np.abs(models[0].transmat_ - models[1].transmat_).sum() < 1E-6:
        raise Exception("you wrote a bad test.")

    for i in range(len(lag_times)):
        npt.assert_array_almost_equal(models[i].transmat_,
                                      models_ref[i].transmat_)
        npt.assert_array_almost_equal(timescales_ref[i], timescales[i])
Exemplo n.º 6
0
def test_both():
    model = MarkovStateModel(
        reversible_type='mle', lag_time=1, n_timescales=1) 

    # note this might break it if we ask for more than 1 timescale
    sequences = np.random.randint(20, size=(10, 1000))
    lag_times = [1, 5, 10]

    models_ref = []
    for tau in lag_times:
        msm = MarkovStateModel(
            reversible_type='mle', lag_time=tau, n_timescales=10)
        msm.fit(sequences)
        models_ref.append(msm)

    timescales_ref = [m.timescales_ for m in models_ref]

    models = param_sweep(msm, sequences, {'lag_time' : lag_times}, n_jobs=2)
    timescales = implied_timescales(sequences, lag_times, msm=msm,
                                    n_timescales=10, n_jobs=2)

    print(timescales)
    print(timescales_ref)

    if np.abs(models[0].transmat_ - models[1].transmat_).sum() < 1E-6:
        raise Exception("you wrote a bad test.")

    for i in range(len(lag_times)):
        models[i].lag_time = lag_times[i]
        npt.assert_array_almost_equal(models[i].transmat_, models_ref[i].transmat_)
        npt.assert_array_almost_equal(timescales_ref[i], timescales[i])
Exemplo n.º 7
0
def test_partial_transform():
    model = MarkovStateModel()
    model.fit([['a', 'a', 'b', 'b', 'c', 'c', 'a', 'a']])
    assert model.mapping_ == {'a': 0, 'b': 1, 'c': 2}

    v = model.partial_transform(['a', 'b', 'c'])
    assert isinstance(v, list)
    assert len(v) == 1
    assert v[0].dtype == np.int
    np.testing.assert_array_equal(v[0], [0, 1, 2])

    v = model.partial_transform(['a', 'b', 'c', 'd'], 'clip')
    assert isinstance(v, list)
    assert len(v) == 1
    assert v[0].dtype == np.int
    np.testing.assert_array_equal(v[0], [0, 1, 2])

    v = model.partial_transform(['a', 'b', 'c', 'd'], 'fill')
    assert isinstance(v, np.ndarray)
    assert len(v) == 4
    assert v.dtype == np.float
    np.testing.assert_array_equal(v, [0, 1, 2, np.nan])

    v = model.partial_transform(['a', 'a', 'SPLIT', 'b', 'b', 'b'], 'clip')
    assert isinstance(v, list)
    assert len(v) == 2
    assert v[0].dtype == np.int
    assert v[1].dtype == np.int
    np.testing.assert_array_equal(v[0], [0, 0])
    np.testing.assert_array_equal(v[1], [1, 1, 1])
def case1():
    map_id = 40
    for p_id in range(6383, 6391):

        assignments = np.load('Assignments-%d.fixed.Map%d.npy' %
                              (p_id, map_id))
        cv = KFold(len(assignments), n_folds=10)
        lagtime = 50
        msm = MarkovStateModel(lag_time=lagtime)
        pops = []
        msmts = []
        for fold, (train_index, test_index) in enumerate(cv):
            assignments_train = assignments[train_index]
            msm.fit(assignments_train)
            if len(msm.populations_) == 40:
                pops.append(msm.populations_)

            msmts.append(msm.timescales_)

        output_dir = "Data-%d-macro%d" % (p_id, map_id)
        if not os.path.exists(output_dir):
            os.makedirs(output_dir)

        fn_populations = os.path.join(output_dir, "Populations-10fold.npy")
        fn_msmts = os.path.join(output_dir, "ImpliedTimescales-10fold.npy")

        np.save(fn_populations, pops)
        np.save(fn_msmts, msmts)
        print "Saved: {},{}".format(fn_populations, fn_msmts)
Exemplo n.º 9
0
def test_10():
    # test inverse transform
    model = MarkovStateModel(reversible_type=None, ergodic_cutoff=0)
    model.fit([['a', 'b', 'c', 'a', 'a', 'b']])
    v = model.inverse_transform([[0, 1, 2]])
    assert len(v) == 1
    np.testing.assert_array_equal(v[0], ['a', 'b', 'c'])
Exemplo n.º 10
0
def test_10():
    # test inverse transform
    model = MarkovStateModel(reversible_type=None, ergodic_cutoff=0)
    model.fit([['a', 'b', 'c', 'a', 'a', 'b']])
    v = model.inverse_transform([[0, 1, 2]])
    assert len(v) == 1
    np.testing.assert_array_equal(v[0], ['a', 'b', 'c'])
Exemplo n.º 11
0
def test_fluxes_1():
    # depends on tpt.committors

    msm = MarkovStateModel(lag_time=1)
    assignments = np.random.randint(3, size=(10, 1000))
    msm.fit(assignments)

    tprob = msm.transmat_
    pop = msm.populations_
    # forward committors
    qplus = tpt.committors(0, 2, msm)

    ref_fluxes = np.zeros((3, 3))
    ref_net_fluxes = np.zeros((3, 3))
    for i in range(3):
        for j in range(3):
            if i != j:
                # Eq. 2.24 in Metzner et al. Transition Path Theory.
                # Multiscale Model. Simul. 2009, 7, 1192-1219.
                ref_fluxes[i, j] = (pop[i] * tprob[i, j] *
                                    (1 - qplus[i]) * qplus[j])

    for i in range(3):
        for j in range(3):
            ref_net_fluxes[i, j] = np.max([0, ref_fluxes[i, j] -
                                          ref_fluxes[j, i]])

    fluxes = tpt.fluxes(0, 2, msm)
    net_fluxes = tpt.net_fluxes(0, 2, msm)

    npt.assert_array_almost_equal(ref_fluxes, fluxes)
    npt.assert_array_almost_equal(ref_net_fluxes, net_fluxes)
Exemplo n.º 12
0
def test_harder_hubscore():
    # depends on tpt.committors and tpt.conditional_committors

    assignments = np.random.randint(10, size=(10, 1000))
    msm = MarkovStateModel(lag_time=1)
    msm.fit(assignments)

    hub_scores = tpt.hub_scores(msm)

    ref_hub_scores = np.zeros(10)
    for A in range(10):
        for B in range(10):
            committors = tpt.committors(A, B, msm)
            denom = msm.transmat_[A, :].dot(committors)
            for C in range(10):
                if A == B or A == C or B == C:
                    continue
                cond_committors = tpt.conditional_committors(A, B, C, msm)

                temp = 0.0
                for i in range(10):
                    if i in [A, B]:
                        continue
                    temp += cond_committors[i] * msm.transmat_[A, i]
                temp /= denom

                ref_hub_scores[C] += temp

    ref_hub_scores /= (9 * 8)

    npt.assert_array_almost_equal(ref_hub_scores, hub_scores)
Exemplo n.º 13
0
def test_harder_hubscore():
    # depends on tpt.committors and tpt.conditional_committors

    assignments = np.random.randint(10, size=(10, 1000))
    msm = MarkovStateModel(lag_time=1)
    msm.fit(assignments)

    hub_scores = tpt.hub_scores(msm)

    ref_hub_scores = np.zeros(10)
    for A in xrange(10):
        for B in xrange(10):
            committors = tpt.committors(A, B, msm)
            denom = msm.transmat_[A, :].dot(committors)  #+ msm.transmat_[A, B]
            for C in xrange(10):
                if A == B or A == C or B == C:
                    continue
                cond_committors = tpt.conditional_committors(A, B, C, msm)

                temp = 0.0
                for i in xrange(10):
                    if i in [A, B]:
                        continue
                    temp += cond_committors[i] * msm.transmat_[A, i]
                temp /= denom

                ref_hub_scores[C] += temp

    ref_hub_scores /= (9 * 8)

    #print(ref_hub_scores, hub_scores)

    npt.assert_array_almost_equal(ref_hub_scores, hub_scores)
Exemplo n.º 14
0
def test_cond_committors():
    # depends on tpt.committors
    
    msm = MarkovStateModel(lag_time=1)
    assignments = np.random.randint(4, size=(10, 1000))
    msm.fit(assignments)

    tprob = msm.transmat_

    for_committors = tpt.committors(0, 3, msm)
    cond_committors = tpt.conditional_committors(0, 3, 2, msm)

    # The committor for state one can be decomposed into paths that
    # do and do not visit state 2 along the way. The paths that do not
    # visit state 1 must look like 1, 1, 1, ..., 1, 1, 3. So we can
    # compute them with a similar approximation as the forward committor
    # Since we want the other component of the forward committor, we
    # subtract that probability from the forward committor
    ref = for_committors[1] - np.power(tprob[1, 1], np.arange(5000)).sum() * tprob[1, 3]
    #print (ref / for_committors[1])
    ref = [0, ref, for_committors[2], 0]

    #print(cond_committors, ref)

    npt.assert_array_almost_equal(ref, cond_committors)
Exemplo n.º 15
0
def test_cond_committors():
    # depends on tpt.committors

    msm = MarkovStateModel(lag_time=1)
    assignments = np.random.randint(4, size=(10, 1000))
    msm.fit(assignments)

    tprob = msm.transmat_

    for_committors = tpt.committors(0, 3, msm)
    cond_committors = tpt.conditional_committors(0, 3, 2, msm)

    # The committor for state one can be decomposed into paths that
    # do and do not visit state 2 along the way. The paths that do not
    # visit state 1 must look like 1, 1, 1, ..., 1, 1, 3. So we can
    # compute them with a similar approximation as the forward committor
    # Since we want the other component of the forward committor, we
    # subtract that probability from the forward committor
    ref = for_committors[1] - np.power(tprob[1, 1],
                                       np.arange(5000)).sum() * tprob[1, 3]
    #print (ref / for_committors[1])
    ref = [0, ref, for_committors[2], 0]

    #print(cond_committors, ref)

    npt.assert_array_almost_equal(ref, cond_committors)
Exemplo n.º 16
0
def test_fluxes():
    # depends on tpt.committors

    msm = MarkovStateModel(lag_time=1)
    assignments = np.random.randint(3, size=(10, 1000))
    msm.fit(assignments)

    tprob = msm.transmat_
    pop = msm.populations_
    # forward committors
    qplus = tpt.committors(0, 2, msm)

    ref_fluxes = np.zeros((3, 3))
    ref_net_fluxes = np.zeros((3, 3))
    for i in xrange(3):
        for j in xrange(3):
            if i != j:
                # Eq. 2.24 in Metzner et al. Transition Path Theory.
                # Multiscale Model. Simul. 2009, 7, 1192-1219.
                ref_fluxes[i, j] = (pop[i] * tprob[i, j] * (1 - qplus[i]) *
                                    qplus[j])

    for i in xrange(3):
        for j in xrange(3):
            ref_net_fluxes[i, j] = np.max(
                [0, ref_fluxes[i, j] - ref_fluxes[j, i]])

    fluxes = tpt.fluxes(0, 2, msm)
    net_fluxes = tpt.net_fluxes(0, 2, msm)

    # print(fluxes)
    # print(ref_fluxes)

    npt.assert_array_almost_equal(ref_fluxes, fluxes)
    npt.assert_array_almost_equal(ref_net_fluxes, net_fluxes)
Exemplo n.º 17
0
def test_13():
    model = MarkovStateModel(n_timescales=2)
    model.fit([[0, 0, 0, 1, 2, 1, 0, 0, 0, 1, 3, 3, 3, 1, 1, 2, 2, 0, 0]])
    left_right = np.dot(model.left_eigenvectors_.T, model.right_eigenvectors_)

    # check biorthonormal
    np.testing.assert_array_almost_equal(
        left_right,
        np.eye(3))

    # check that the stationary left eigenvector is normalized to be 1
    np.testing.assert_almost_equal(model.left_eigenvectors_[:, 0].sum(), 1)

    # the left eigenvectors satisfy <\phi_i, \phi_i>_{\mu^{-1}} = 1
    for i in range(3):
        np.testing.assert_almost_equal(
            np.dot(model.left_eigenvectors_[:, i],
                   model.left_eigenvectors_[:, i] / model.populations_), 1)

    # and that the right eigenvectors satisfy  <\psi_i, \psi_i>_{\mu} = 1
    for i in range(3):
        np.testing.assert_almost_equal(
            np.dot(model.right_eigenvectors_[:, i],
                   model.right_eigenvectors_[:, i] *
                   model.populations_), 1)
Exemplo n.º 18
0
def test_counts_2():
    # test counts matrix with trimming
    model = MarkovStateModel(reversible_type=None, ergodic_cutoff=1)

    model.fit([[1, 1, 1, 1, 1, 1, 1, 1, 1, 2]])
    eq(model.mapping_, {1: 0})
    eq(model.countsmat_, np.array([[8]]))
Exemplo n.º 19
0
def test_partial_transform():
    model = MarkovStateModel()
    model.fit([['a', 'a', 'b', 'b', 'c', 'c', 'a', 'a']])
    assert model.mapping_ == {'a': 0, 'b': 1, 'c': 2}

    v = model.partial_transform(['a', 'b', 'c'])
    assert isinstance(v, list)
    assert len(v) == 1
    assert v[0].dtype == np.int
    np.testing.assert_array_equal(v[0], [0, 1, 2])

    v = model.partial_transform(['a', 'b', 'c', 'd'], 'clip')
    assert isinstance(v, list)
    assert len(v) == 1
    assert v[0].dtype == np.int
    np.testing.assert_array_equal(v[0], [0, 1, 2])

    v = model.partial_transform(['a', 'b', 'c', 'd'], 'fill')
    assert isinstance(v, np.ndarray)
    assert len(v) == 4
    assert v.dtype == np.float
    np.testing.assert_array_equal(v, [0, 1, 2, np.nan])

    v = model.partial_transform(['a', 'a', 'SPLIT', 'b', 'b', 'b'], 'clip')
    assert isinstance(v, list)
    assert len(v) == 2
    assert v[0].dtype == np.int
    assert v[1].dtype == np.int
    np.testing.assert_array_equal(v[0], [0, 0])
    np.testing.assert_array_equal(v[1], [1, 1, 1])
Exemplo n.º 20
0
def test_counts_2():
    # test counts matrix with trimming
    model = MarkovStateModel(reversible_type=None, ergodic_cutoff=1)

    model.fit([[1, 1, 1, 1, 1, 1, 1, 1, 1, 2]])
    eq(model.mapping_, {1: 0})
    eq(model.countsmat_, np.array([[8]]))
Exemplo n.º 21
0
def test_both():
    model = MarkovStateModel(reversible_type="mle", lag_time=1, n_timescales=1)

    # note this might break it if we ask for more than 1 timescale
    sequences = np.random.randint(20, size=(10, 1000))
    lag_times = [1, 5, 10]

    models_ref = []
    for tau in lag_times:
        msm = MarkovStateModel(reversible_type="mle", lag_time=tau, n_timescales=10)
        msm.fit(sequences)
        models_ref.append(msm)

    timescales_ref = [m.timescales_ for m in models_ref]

    models = param_sweep(msm, sequences, {"lag_time": lag_times}, n_jobs=2)
    timescales = implied_timescales(sequences, lag_times, msm=msm, n_timescales=10, n_jobs=2)

    print(timescales)
    print(timescales_ref)

    if np.abs(models[0].transmat_ - models[1].transmat_).sum() < 1e-6:
        raise Exception("you wrote a bad test.")

    for i in range(len(lag_times)):
        models[i].lag_time = lag_times[i]
        npt.assert_array_almost_equal(models[i].transmat_, models_ref[i].transmat_)
        npt.assert_array_almost_equal(timescales_ref[i], timescales[i])
Exemplo n.º 22
0
def test_51():
    # test score_ll
    model = MarkovStateModel(reversible_type='mle')
    sequence = ['a', 'a', 'b', 'b', 'a', 'a', 'b', 'b', 'c', 'c', 'c', 'a', 'a']
    model.fit([sequence])
    assert model.mapping_ == {'a': 0, 'b': 1, 'c': 2}

    score_ac = model.score_ll([['a', 'c']])
    assert score_ac == np.log(model.transmat_[0, 2])
Exemplo n.º 23
0
def test_mle_eq():
    seq = [[0, 0, 0, 1, 1, 1, 0, 0, 0, 1, 0, 1, 1]]
    mle_mdl = MarkovStateModel(lag_time=1)
    b_mdl = BootStrapMarkovStateModel(n_samples=10, n_procs=2, msm_args={'lag_time': 1})
    mle_mdl.fit(seq)
    b_mdl.fit(seq)
    #make sure we have good model
    eq(mle_mdl.populations_, b_mdl.mle_.populations_)
    eq(mle_mdl.timescales_, b_mdl.mle_.timescales_)
Exemplo n.º 24
0
def test_from_msm():
    assignments, _ = _metastable_system()
    msm = MarkovStateModel()
    msm.fit(assignments)
    pcca = PCCA.from_msm(msm, 2)

    msm = MarkovStateModel()
    msm.fit(assignments)
    pccaplus = PCCAPlus.from_msm(msm, 2)
Exemplo n.º 25
0
def test_51():
    # test score_ll
    model = MarkovStateModel(reversible_type='mle')
    sequence = ['a', 'a', 'b', 'b', 'a', 'a', 'b', 'b', 'c', 'c', 'c', 'a', 'a']
    model.fit([sequence])
    assert model.mapping_ == {'a': 0, 'b': 1, 'c': 2}

    score_ac = model.score_ll([['a', 'c']])
    assert score_ac == np.log(model.transmat_[0, 2])
Exemplo n.º 26
0
def test_6():
    # test score_ll with novel entries
    model = MarkovStateModel(reversible_type='mle')
    sequence = ['a', 'a', 'b', 'b', 'a', 'a', 'b', 'b']
    model.fit([sequence])

    assert not np.isfinite(model.score_ll([['c']]))
    assert not np.isfinite(model.score_ll([['c', 'c']]))
    assert not np.isfinite(model.score_ll([['a', 'c']]))
Exemplo n.º 27
0
def test_from_msm():
    assignments, _ = _metastable_system()
    msm = MarkovStateModel()
    msm.fit(assignments)
    pcca = PCCA.from_msm(msm, 2)

    msm = MarkovStateModel()
    msm.fit(assignments)
    pccaplus = PCCAPlus.from_msm(msm, 2)
Exemplo n.º 28
0
def test_6():
    # test score_ll with novel entries
    model = MarkovStateModel(reversible_type='mle')
    sequence = ['a', 'a', 'b', 'b', 'a', 'a', 'b', 'b']
    model.fit([sequence])

    assert not np.isfinite(model.score_ll([['c']]))
    assert not np.isfinite(model.score_ll([['c', 'c']]))
    assert not np.isfinite(model.score_ll([['a', 'c']]))
Exemplo n.º 29
0
def at_lagtime(lt):
    msm = MarkovStateModel(lag_time=lt, n_timescales=10, verbose=False)
    msm.fit(list(ktrajs.values()))
    ret = {
        'lag_time': lt,
        'percent_retained': msm.percent_retained_,
    }
    for i in range(msm.n_timescales):
        ret['timescale_{}'.format(i)] = msm.timescales_[i]
    return ret
Exemplo n.º 30
0
 def test_plot_implied_timescales(self):
     lag_times = [1, 50, 100, 250, 500, 1000, 5000]
     msm_objs = []
     for lag in lag_times:
         # Construct MSM
         msm = MarkovStateModel(lag_time=lag, n_timescales=5)
         msm.fit(data)
         msm_objs.append(msm)
     ax = plot_implied_timescales(msm_objs)
     assert isinstance(ax, SubplotBase)
Exemplo n.º 31
0
def test_plot_implied_timescales():
    lag_times = [1, 50, 100, 250, 500, 1000, 5000]
    msm_objs = []
    for lag in lag_times:
        # Construct MSM
        msm = MarkovStateModel(lag_time=lag, n_timescales=5)
        msm.fit(data)
        msm_objs.append(msm)
    ax = plot_implied_timescales(msm_objs)
    assert isinstance(ax, SubplotBase)
Exemplo n.º 32
0
def test_mfpt_match():
    assignments = np.random.randint(10, size=(10, 2000))
    msm = MarkovStateModel(lag_time=1)
    msm.fit(assignments)

    # these two do different things
    mfpts0 = np.vstack([tpt.mfpts(msm, i) for i in range(10)]).T
    mfpts1 = tpt.mfpts(msm)

    npt.assert_array_almost_equal(mfpts0, mfpts1)
Exemplo n.º 33
0
def at_lagtime(lt, clustered_trajs):
    msm = MarkovStateModel(lag_time=lt, n_timescales=20, verbose=False)
    msm.fit(clustered_trajs)
    ret = {
        'lag_time': lt,
        'percent_retained': msm.percent_retained_,
    }
    for i in range(msm.n_timescales):
        ret['timescale_{}'.format(i)] = msm.timescales_[i]
    return ret
Exemplo n.º 34
0
def at_lagtime(lt):
    msm = MarkovStateModel(lag_time=lt, n_timescales=10, verbose=False)
    msm.fit(list(ktrajs.values()))
    ret = {
        'lag_time': lt,
        'percent_retained': msm.percent_retained_,
    }
    for i in range(msm.n_timescales):
        ret['timescale_{}'.format(i)] = msm.timescales_[i]
    return ret
Exemplo n.º 35
0
def test_score_1():
    # test that GMRQ is equal to the sum of the first n eigenvalues,
    # when testing and training on the same dataset.
    sequence = [0, 0, 0, 1, 1, 1, 2, 2, 2, 1, 1, 1,
                0, 0, 0, 1, 2, 2, 2, 1, 1, 1, 0, 0]
    for n in [0, 1, 2]:
        model = MarkovStateModel(verbose=False, n_timescales=n)
        model.fit([sequence])

        assert_approx_equal(model.score([sequence]), model.eigenvalues_.sum())
        assert_approx_equal(model.score([sequence]), model.score_)
Exemplo n.º 36
0
def test_fit_1():
    # call fit, compare to MSM
    sequence = [0, 0, 0, 1, 1, 1, 0, 0, 2, 2, 0, 1, 1, 1, 2, 2, 2, 2, 2]
    model = ContinuousTimeMSM(verbose=False)
    model.fit([sequence])

    msm = MarkovStateModel(verbose=False)
    msm.fit([sequence])

    # they shouldn't be equal in general, but for this input they seem to be
    np.testing.assert_array_almost_equal(model.transmat_, msm.transmat_)
Exemplo n.º 37
0
def test_score_1():
    # test that GMRQ is equal to the sum of the first n eigenvalues,
    # when testing and training on the same dataset.
    sequence = [0, 0, 0, 1, 1, 1, 2, 2, 2, 1, 1, 1,
                0, 0, 0, 1, 2, 2, 2, 1, 1, 1, 0, 0]
    for n in [0, 1, 2]:
        model = MarkovStateModel(verbose=False, n_timescales=n)
        model.fit([sequence])

        assert_approx_equal(model.score([sequence]), model.eigenvalues_.sum())
        assert_approx_equal(model.score([sequence]), model.score_)
Exemplo n.º 38
0
def test_mle_eq():
    seq = [[0, 0, 0, 1, 1, 1, 0, 0, 0, 1, 0, 1, 1]]
    mle_mdl = MarkovStateModel(lag_time=1)
    b_mdl = BootStrapMarkovStateModel(n_samples=10,
                                      n_procs=2,
                                      msm_args={'lag_time': 1})
    mle_mdl.fit(seq)
    b_mdl.fit(seq)
    #make sure we have good model
    eq(mle_mdl.populations_, b_mdl.mle_.populations_)
    eq(mle_mdl.timescales_, b_mdl.mle_.timescales_)
Exemplo n.º 39
0
def test_fit_1():
    # call fit, compare to MSM
    sequence = [0, 0, 0, 1, 1, 1, 0, 0, 2, 2, 0, 1, 1, 1, 2, 2, 2, 2, 2]
    model = ContinuousTimeMSM(verbose=False)
    model.fit([sequence])

    msm = MarkovStateModel(verbose=False)
    msm.fit([sequence])

    # they shouldn't be equal in general, but for this input they seem to be
    np.testing.assert_array_almost_equal(model.transmat_, msm.transmat_)
Exemplo n.º 40
0
def test_11():
    # test sample
    model = MarkovStateModel()
    model.fit([[0, 0, 0, 1, 1, 1, 0, 0, 0, 1, 0, 1, 1, 2, 2, 0, 0]])
    sample = model.sample_discrete(n_steps=1000, random_state=0)
    assert isinstance(sample, np.ndarray)
    assert len(sample) == 1000

    bc = np.bincount(sample)
    diff = model.populations_ - (bc / np.sum(bc))

    assert np.sum(np.abs(diff)) < 0.1
Exemplo n.º 41
0
def test_11():
    # test sample
    model = MarkovStateModel()
    model.fit([[0, 0, 0, 1, 1, 1, 0, 0, 0, 1, 0, 1, 1, 2, 2, 0, 0]])
    sample = model.sample_discrete(n_steps=1000, random_state=0)
    assert isinstance(sample, np.ndarray)
    assert len(sample) == 1000

    bc = np.bincount(sample)
    diff = model.populations_ - (bc / np.sum(bc))

    assert np.sum(np.abs(diff)) < 0.1
def case2_micro_combined():
    assignments = np.load('Assignments.npy')
    lagtime = 50
    msmts = []
    msm = MarkovStateModel(lag_time=lagtime)
    cv = KFold(len(assignments), n_folds=10)
    for fold, (train_index, test_index) in enumerate(cv):
        assignments_train = assignments[train_index]
        msm.fit(assignments_train)
        msmts.append(msm.timescales_)
    fn_msmts = os.path.join('Data-combined', "ImpliedTimescales-10fold.npy")
    np.save(fn_msmts, msmts)
    print "Saved: {}".format(fn_msmts)
Exemplo n.º 43
0
def test_mfpt_match():
    assignments = np.random.randint(10, size=(10, 2000))
    msm = MarkovStateModel(lag_time=1)
    msm.fit(assignments)

    # these two do different things
    mfpts0 = np.vstack([tpt.mfpts(msm, i) for i in xrange(10)]).T
    mfpts1 = tpt.mfpts(msm)

    # print(mfpts0)
    # print(mfpts1)

    npt.assert_array_almost_equal(mfpts0, mfpts1)
Exemplo n.º 44
0
def implied_times():
    i_times = np.zeros((len(lag_times), 20))
    for i in range(len(lag_times)):
        msm = MarkovStateModel(lag_time=lag_times[i],
                               n_timescales=20,
                               reversible_type='transpose',
                               ergodic_cutoff='off',
                               prior_counts=0,
                               sliding_window=True,
                               verbose=True)
        msm.fit(sequences)
        i_times[i] = msm.eigenvalues_[1:]
        print "lag time, msm eigenvalues:", lag_times[i], msm.eigenvalues_
Exemplo n.º 45
0
def test_eigtransform_2():
    model = MarkovStateModel(n_timescales=2)
    traj = [4, 3, 0, 0, 0, 1, 2, 1, 0, 0, 0, 1, 0, 1, 1, 2, 2, 0, 0]
    model.fit([traj])

    transformed_0 = model.eigtransform([traj], mode='clip')
    # clip off the first two states (not ergodic)
    assert transformed_0[0].shape == (len(traj) - 2, model.n_timescales)

    transformed_1 = model.eigtransform([traj], mode='fill')
    assert transformed_1[0].shape == (len(traj), model.n_timescales)
    assert np.all(np.isnan(transformed_1[0][:2, :]))
    assert not np.any(np.isnan(transformed_1[0][2:]))
Exemplo n.º 46
0
def test_12():
    # test eigtransform
    model = MarkovStateModel(n_timescales=1)
    model.fit([[4, 3, 0, 0, 0, 1, 2, 1, 0, 0, 0, 1, 0, 1, 1, 2, 2, 0, 0]])
    assert model.mapping_ == {0: 0, 1: 1, 2: 2}
    assert len(model.eigenvalues_) == 2
    t = model.eigtransform([[0, 1]], right=True)
    assert t[0][0] == model.right_eigenvectors_[0, 1]
    assert t[0][1] == model.right_eigenvectors_[1, 1]

    s = model.eigtransform([[0, 1]], right=False)
    assert s[0][0] == model.left_eigenvectors_[0, 1]
    assert s[0][1] == model.left_eigenvectors_[1, 1]
Exemplo n.º 47
0
def test_eigtransform_2():
    model = MarkovStateModel(n_timescales=2)
    traj = [4, 3, 0, 0, 0, 1, 2, 1, 0, 0, 0, 1, 0, 1, 1, 2, 2, 0, 0]
    model.fit([traj])

    transformed_0 = model.eigtransform([traj], mode='clip')
    # clip off the first two states (not ergodic)
    assert transformed_0[0].shape == (len(traj) - 2, model.n_timescales)

    transformed_1 = model.eigtransform([traj], mode='fill')
    assert transformed_1[0].shape == (len(traj), model.n_timescales)
    assert np.all(np.isnan(transformed_1[0][:2, :]))
    assert not np.any(np.isnan(transformed_1[0][2:]))
Exemplo n.º 48
0
def test_12():
    # test eigtransform
    model = MarkovStateModel(n_timescales=1)
    model.fit([[4, 3, 0, 0, 0, 1, 2, 1, 0, 0, 0, 1, 0, 1, 1, 2, 2, 0, 0]])
    assert model.mapping_ == {0: 0, 1: 1, 2: 2}
    assert len(model.eigenvalues_) == 2
    t = model.eigtransform([[0, 1]], right=True)
    assert t[0][0] == model.right_eigenvectors_[0, 1]
    assert t[0][1] == model.right_eigenvectors_[1, 1]

    s = model.eigtransform([[0, 1]], right=False)
    assert s[0][0] == model.left_eigenvectors_[0, 1]
    assert s[0][1] == model.left_eigenvectors_[1, 1]
Exemplo n.º 49
0
def build_msm():
    assigns = []
    for i in range(64):
        assigns.append(np.loadtxt('macro12_assigns_%d.txt' % i, dtype=int))
    # 40 ns
    msm = MarkovStateModel(lag_time=500,
                           n_timescales=20,
                           reversible_type='transpose',
                           ergodic_cutoff='off',
                           prior_counts=0,
                           sliding_window=True,
                           verbose=True)
    msm.fit(assigns)
    return msm
Exemplo n.º 50
0
def cluster_msm(sequences,n_states, lag_times):
  for n in n_states:
    states = KMeans(n_clusters=n)
    states.fit(sequences)
    io.dump(states,str(n)+'n_cl.pkl')
    ts=np.zeros(5)
    for lag_time in lag_times:
        msm = MarkovStateModel(lag_time=lag_time, verbose=False,n_timescales=5)
        msm.fit(states.labels_)
        ts1=msm.timescales_
        ts=np.vstack((ts,ts1))
        io.dump(msm,str(n)+'n_'+str(lag_time)+'lt_msm.pkl')
    ts=np.delete(ts, (0), axis=0)
    io.dump(ts,str(n)+'n_timescales.pkl')
Exemplo n.º 51
0
class TestModelUtils:
    def setUp(self):
        numpy.random.seed(12)
        self.msm = MarkovStateModel()
        self.msm.fit([numpy.random.randint(5, size=10) for _ in range(20)
                      ]  # 20 lists of 10 random integers from 0 to 5
                     )

    def test_retrieveMSM(self):
        msm = retrieve_MSM(model)
        assert isinstance(msm, MarkovStateModel)

    def test_retrieve_clusterer(self):
        clusterer = retrieve_clusterer(model)
        assert isinstance(clusterer, MiniBatchKMeans)

    def test_retrieve_feat(self):
        feat = retrieve_feat(model)
        assert isinstance(feat, DihedralFeaturizer)

    def test_retrieve_scaler(self):
        scaler = retrieve_scaler(model)
        assert isinstance(scaler, MinMaxScaler)

    def test_retrieve_decomposer(self):
        decomposer = retrieve_decomposer(model)
        assert isinstance(decomposer, tICA)

    def test_apply_percentile_search1(self):
        counts = apply_percentile_search(
            count_array=self.msm.transmat_,
            percentile=0.1,
            desired_length=10,
            search_type='clusterer',
            msm=None,
        )
        assert isinstance(counts, list)
        assert len(counts) == 10

    def test_apply_percentile_search2(self):
        counts = apply_percentile_search(
            count_array=self.msm.transmat_,
            percentile=0.1,
            desired_length=2,
            search_type='msm',
            msm=self.msm,
        )
        assert isinstance(counts, list)
        assert len(counts) == 2
Exemplo n.º 52
0
def test_5():
    trjs = DoubleWell(random_state=0).get_cached().trajectories
    clusterer = NDGrid(n_bins_per_feature=5)
    mle_msm = MarkovStateModel(lag_time=100, verbose=False)
    b_msm = BayesianMarkovStateModel(lag_time=100, n_samples=1000, n_chains=8, n_steps=1000, random_state=0)

    states = clusterer.fit_transform(trjs)
    b_msm.fit(states)
    mle_msm.fit(states)

    # this is a pretty silly test. it checks that the mean transition
    # matrix is not so dissimilar from the MLE transition matrix.
    # This shouldn't necessarily be the case anyways -- the likelihood is
    # not "symmetric". And the cutoff chosen is just heuristic.
    assert np.linalg.norm(b_msm.all_transmats_.mean(axis=0) - mle_msm.transmat_) < 1e-2
Exemplo n.º 53
0
def test_9():
    # what if the input data contains NaN? They should be ignored
    model = MarkovStateModel(ergodic_cutoff=0)

    seq = [0, 1, 0, 1, np.nan]
    model.fit(seq)
    assert model.n_states_ == 2
    assert model.mapping_ == {0: 0, 1: 1}

    if not PY3:
        model = MarkovStateModel()
        seq = [0, 1, 0, None, 0, 1]
        model.fit(seq)
        assert model.n_states_ == 2
        assert model.mapping_ == {0: 0, 1: 1}
Exemplo n.º 54
0
def test_9():
    # what if the input data contains NaN? They should be ignored
    model = MarkovStateModel(ergodic_cutoff=0)

    seq = [0, 1, 0, 1, np.nan]
    model.fit(seq)
    assert model.n_states_ == 2
    assert model.mapping_ == {0: 0, 1: 1}

    if not PY3:
        model = MarkovStateModel()
        seq = [0, 1, 0, None, 0, 1]
        model.fit(seq)
        assert model.n_states_ == 2
        assert model.mapping_ == {0: 0, 1: 1}
Exemplo n.º 55
0
def test_3():
    model = MarkovStateModel(reversible_type='mle')
    model.fit([[0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 2, 2, 2, 2, 0, 0, 0]])

    counts = np.array([[8, 1, 1], [1, 3, 0], [1, 0, 3]])
    eq(model.countsmat_, counts)
    assert np.sum(model.populations_) == 1.0
    model.timescales_

    # test pickleable
    try:
        dump(model, 'test-msm-temp.npy', compress=1)
        model2 = load('test-msm-temp.npy')
        eq(model2.timescales_, model.timescales_)
    finally:
        os.unlink('test-msm-temp.npy')
Exemplo n.º 56
0
def test_3():
    model = MarkovStateModel(reversible_type='mle')
    model.fit([[0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 2, 2, 2, 2, 0, 0, 0]])

    counts = np.array([[8, 1, 1], [1, 3, 0], [1, 0, 3]])
    eq(model.countsmat_, counts)
    assert np.sum(model.populations_) == 1.0
    model.timescales_

    # test pickleable
    try:
        dump(model, 'test-msm-temp.npy', compress=1)
        model2 = load('test-msm-temp.npy')
        eq(model2.timescales_, model.timescales_)
    finally:
        os.unlink('test-msm-temp.npy')
Exemplo n.º 57
0
def test_score_ll_1():
    model = MarkovStateModel(reversible_type='mle')
    sequence = ['a', 'a', 'b', 'b', 'a', 'a', 'b', 'b']
    model.fit([sequence])
    assert model.mapping_ == {'a': 0, 'b': 1}

    score_aa = model.score_ll([['a', 'a']])
    assert score_aa == np.log(model.transmat_[0, 0])
    score_bb = model.score_ll([['b', 'b']])
    assert score_bb == np.log(model.transmat_[1, 1])
    score_ab = model.score_ll([['a', 'b']])
    assert score_ab == np.log(model.transmat_[0, 1])
    score_abb = model.score_ll([['a', 'b', 'b']])
    assert score_abb == (np.log(model.transmat_[0, 1]) +
                         np.log(model.transmat_[1, 1]))

    assert model.state_labels_ == ['a', 'b']
    assert np.sum(model.populations_) == 1.0
Exemplo n.º 58
0
def test_score_ll_1():
    model = MarkovStateModel(reversible_type='mle')
    sequence = ['a', 'a', 'b', 'b', 'a', 'a', 'b', 'b']
    model.fit([sequence])
    assert model.mapping_ == {'a': 0, 'b': 1}

    score_aa = model.score_ll([['a', 'a']])
    assert score_aa == np.log(model.transmat_[0, 0])
    score_bb = model.score_ll([['b', 'b']])
    assert score_bb == np.log(model.transmat_[1, 1])
    score_ab = model.score_ll([['a', 'b']])
    assert score_ab == np.log(model.transmat_[0, 1])
    score_abb = model.score_ll([['a', 'b', 'b']])
    assert score_abb == (np.log(model.transmat_[0, 1]) +
                         np.log(model.transmat_[1, 1]))

    assert model.state_labels_ == ['a', 'b']
    assert np.sum(model.populations_) == 1.0
Exemplo n.º 59
0
def test_3():
    model = MarkovStateModel(reversible_type='mle')
    model.fit([[0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 2, 2, 2, 2, 0, 0, 0]])

    counts = np.array([[8, 1, 1], [1, 3, 0], [1, 0, 3]])
    eq(model.countsmat_, counts)
    assert np.sum(model.populations_) == 1.0
    model.timescales_

    # test pickleable
    try:
        dir = tempfile.mkdtemp()
        fn = os.path.join(dir, 'test-msm-temp.npy')
        dump(model, fn, compress=1)
        model2 = load(fn)
        eq(model2.timescales_, model.timescales_)
    finally:
        os.unlink(fn)
        os.rmdir(dir)
Exemplo n.º 60
0
def test_hessian():
    grid = NDGrid(n_bins_per_feature=10, min=-np.pi, max=np.pi)
    seqs = grid.fit_transform(load_doublewell(random_state=0)['trajectories'])
    seqs = [seqs[i] for i in range(10)]

    lag_time = 10
    model = ContinuousTimeMSM(verbose=True, lag_time=lag_time)
    model.fit(seqs)
    msm = MarkovStateModel(verbose=False, lag_time=lag_time)
    print(model.summarize())
    print('MSM timescales\n', msm.fit(seqs).timescales_)
    print('Uncertainty K\n', model.uncertainty_K())
    print('Uncertainty pi\n', model.uncertainty_pi())