示例#1
0
def ChapmanKolmogorovTest(assignments,
                          klist=[1, 2, 3, 4, 5],
                          lagtime=50,
                          states=None):
    msm = MarkovStateModel(lag_time=lagtime, n_timescales=10)
    msm.fit(assignments)
    p_tau = msm.populations_
    T_tau = msm.transmat_
    mapping_tau = msm.mapping_

    prob_tau_all = []
    prob_ktau_all = []

    if states == "all" or states is None:
        states = range(len(p_tau))

    for k in klist:
        lagtime_long = k * lagtime
        print "long lagtime:", lagtime_long
        msm = MarkovStateModel(lag_time=lagtime_long, n_timescales=10)
        msm.fit(assignments)
        p_ktau = msm.populations_
        T_ktau = msm.transmat_
        mapping_ktau = msm.mapping_
        probability_tau, probability_ktau = CalculateStatesProbability(
            T_tau, T_ktau, p_tau, p_ktau, mapping_tau, mapping_ktau, k, states)

        prob_tau_all.append(probability_tau)
        prob_ktau_all.append(probability_ktau)

    return prob_tau_all, prob_ktau_all
示例#2
0
def test_both():
    model = MarkovStateModel(
        reversible_type='mle', lag_time=1, n_timescales=1) 

    # note this might break it if we ask for more than 1 timescale
    sequences = np.random.randint(20, size=(10, 1000))
    lag_times = [1, 5, 10]

    models_ref = []
    for tau in lag_times:
        msm = MarkovStateModel(
            reversible_type='mle', lag_time=tau, n_timescales=10)
        msm.fit(sequences)
        models_ref.append(msm)

    timescales_ref = [m.timescales_ for m in models_ref]

    models = param_sweep(msm, sequences, {'lag_time' : lag_times}, n_jobs=2)
    timescales = implied_timescales(sequences, lag_times, msm=msm,
                                    n_timescales=10, n_jobs=2)

    print(timescales)
    print(timescales_ref)

    if np.abs(models[0].transmat_ - models[1].transmat_).sum() < 1E-6:
        raise Exception("you wrote a bad test.")

    for i in range(len(lag_times)):
        models[i].lag_time = lag_times[i]
        npt.assert_array_almost_equal(models[i].transmat_, models_ref[i].transmat_)
        npt.assert_array_almost_equal(timescales_ref[i], timescales[i])
示例#3
0
def test_ergodic_cutoff():
    assert (MarkovStateModel(lag_time=10).ergodic_cutoff ==
            BayesianMarkovStateModel(lag_time=10).ergodic_cutoff)
    assert (MarkovStateModel(lag_time=10)._parse_ergodic_cutoff() ==
            BayesianMarkovStateModel(lag_time=10)._parse_ergodic_cutoff())
    for cut_off in [0.01, 'on', 'off']:
        assert (MarkovStateModel(ergodic_cutoff=cut_off).ergodic_cutoff ==
                BayesianMarkovStateModel(ergodic_cutoff=cut_off).ergodic_cutoff)
示例#4
0
def test_from_msm():
    assignments, _ = _metastable_system()
    msm = MarkovStateModel()
    msm.fit(assignments)
    pcca = PCCA.from_msm(msm, 2)

    msm = MarkovStateModel()
    msm.fit(assignments)
    pccaplus = PCCAPlus.from_msm(msm, 2)
示例#5
0
def test_counts_3():
    # test counts matrix scaling
    seq = [1] * 4 + [2] * 4 + [1] * 4

    model1 = MarkovStateModel(reversible_type=None, lag_time=2,
                              sliding_window=True).fit([seq])
    model2 = MarkovStateModel(reversible_type=None, lag_time=2,
                              sliding_window=False).fit([seq])
    model3 = MarkovStateModel(reversible_type=None, lag_time=2,
                              ergodic_cutoff='off').fit([seq])

    eq(model1.countsmat_, model2.countsmat_)
    eq(model1.countsmat_, model3.countsmat_)
    eq(model2.countsmat_, model3.countsmat_)
示例#6
0
def test_9():
    # what if the input data contains NaN? They should be ignored
    model = MarkovStateModel(ergodic_cutoff=0)

    seq = [0, 1, 0, 1, np.nan]
    model.fit(seq)
    assert model.n_states_ == 2
    assert model.mapping_ == {0: 0, 1: 1}

    if not PY3:
        model = MarkovStateModel()
        seq = [0, 1, 0, None, 0, 1]
        model.fit(seq)
        assert model.n_states_ == 2
        assert model.mapping_ == {0: 0, 1: 1}
示例#7
0
def plot_timescales(clusterer_dir,
                    n_clusters,
                    tica_dir,
                    main="",
                    lag_times=list(range(1, 50))):
    clusterer = verboseload(clusterer_dir)
    print(clusterer)
    sequences = clusterer.labels_
    #print(sequences)
    #lag_times = list(np.arange(1,150,5))
    n_timescales = 5

    msm_timescales = implied_timescales(sequences,
                                        lag_times,
                                        n_timescales=n_timescales,
                                        msm=MarkovStateModel(
                                            verbose=True,
                                            prior_counts=1e-5,
                                            ergodic_cutoff='off'))
    print(msm_timescales)

    for i in range(n_timescales):
        plt.plot(lag_times, msm_timescales[:, i])
    plt.xlabel("Lag time (ns)")
    plt.ylabel("Implied Timescales (ns)")
    plt.title(main)
    plt.semilogy()
    pp = PdfPages("%s/%s_n_clusters%d_implied_timescales.pdf" %
                  (tica_dir, main, n_clusters))
    pp.savefig()
    pp.close()
    plt.clf()
示例#8
0
def test_cond_committors():
    # depends on tpt.committors

    msm = MarkovStateModel(lag_time=1)
    assignments = np.random.randint(4, size=(10, 1000))
    msm.fit(assignments)

    tprob = msm.transmat_

    for_committors = tpt.committors(0, 3, msm)
    cond_committors = tpt.conditional_committors(0, 3, 2, msm)

    # The committor for state one can be decomposed into paths that
    # do and do not visit state 2 along the way. The paths that do not
    # visit state 1 must look like 1, 1, 1, ..., 1, 1, 3. So we can
    # compute them with a similar approximation as the forward committor
    # Since we want the other component of the forward committor, we
    # subtract that probability from the forward committor
    ref = for_committors[1] - np.power(tprob[1, 1],
                                       np.arange(5000)).sum() * tprob[1, 3]
    #print (ref / for_committors[1])
    ref = [0, ref, for_committors[2], 0]

    #print(cond_committors, ref)

    npt.assert_array_almost_equal(ref, cond_committors)
示例#9
0
def test_harder_hubscore():
    # depends on tpt.committors and tpt.conditional_committors

    assignments = np.random.randint(10, size=(10, 1000))
    msm = MarkovStateModel(lag_time=1)
    msm.fit(assignments)

    hub_scores = tpt.hub_scores(msm)

    ref_hub_scores = np.zeros(10)
    for A in xrange(10):
        for B in xrange(10):
            committors = tpt.committors(A, B, msm)
            denom = msm.transmat_[A, :].dot(committors)  #+ msm.transmat_[A, B]
            for C in xrange(10):
                if A == B or A == C or B == C:
                    continue
                cond_committors = tpt.conditional_committors(A, B, C, msm)

                temp = 0.0
                for i in xrange(10):
                    if i in [A, B]:
                        continue
                    temp += cond_committors[i] * msm.transmat_[A, i]
                temp /= denom

                ref_hub_scores[C] += temp

    ref_hub_scores /= (9 * 8)

    #print(ref_hub_scores, hub_scores)

    npt.assert_array_almost_equal(ref_hub_scores, hub_scores)
示例#10
0
def test_fluxes():
    # depends on tpt.committors

    msm = MarkovStateModel(lag_time=1)
    assignments = np.random.randint(3, size=(10, 1000))
    msm.fit(assignments)

    tprob = msm.transmat_
    pop = msm.populations_
    # forward committors
    qplus = tpt.committors(0, 2, msm)

    ref_fluxes = np.zeros((3, 3))
    ref_net_fluxes = np.zeros((3, 3))
    for i in xrange(3):
        for j in xrange(3):
            if i != j:
                # Eq. 2.24 in Metzner et al. Transition Path Theory.
                # Multiscale Model. Simul. 2009, 7, 1192-1219.
                ref_fluxes[i, j] = (pop[i] * tprob[i, j] * (1 - qplus[i]) *
                                    qplus[j])

    for i in xrange(3):
        for j in xrange(3):
            ref_net_fluxes[i, j] = np.max(
                [0, ref_fluxes[i, j] - ref_fluxes[j, i]])

    fluxes = tpt.fluxes(0, 2, msm)
    net_fluxes = tpt.net_fluxes(0, 2, msm)

    # print(fluxes)
    # print(ref_fluxes)

    npt.assert_array_almost_equal(ref_fluxes, fluxes)
    npt.assert_array_almost_equal(ref_net_fluxes, net_fluxes)
def case1():
    map_id = 40
    for p_id in range(6383, 6391):

        assignments = np.load('Assignments-%d.fixed.Map%d.npy' %
                              (p_id, map_id))
        cv = KFold(len(assignments), n_folds=10)
        lagtime = 50
        msm = MarkovStateModel(lag_time=lagtime)
        pops = []
        msmts = []
        for fold, (train_index, test_index) in enumerate(cv):
            assignments_train = assignments[train_index]
            msm.fit(assignments_train)
            if len(msm.populations_) == 40:
                pops.append(msm.populations_)

            msmts.append(msm.timescales_)

        output_dir = "Data-%d-macro%d" % (p_id, map_id)
        if not os.path.exists(output_dir):
            os.makedirs(output_dir)

        fn_populations = os.path.join(output_dir, "Populations-10fold.npy")
        fn_msmts = os.path.join(output_dir, "ImpliedTimescales-10fold.npy")

        np.save(fn_populations, pops)
        np.save(fn_msmts, msmts)
        print "Saved: {},{}".format(fn_populations, fn_msmts)
示例#12
0
def test_13():
    model = MarkovStateModel(n_timescales=2)
    model.fit([[0, 0, 0, 1, 2, 1, 0, 0, 0, 1, 3, 3, 3, 1, 1, 2, 2, 0, 0]])
    left_right = np.dot(model.left_eigenvectors_.T, model.right_eigenvectors_)

    # check biorthonormal
    np.testing.assert_array_almost_equal(
        left_right,
        np.eye(3))

    # check that the stationary left eigenvector is normalized to be 1
    np.testing.assert_almost_equal(model.left_eigenvectors_[:, 0].sum(), 1)

    # the left eigenvectors satisfy <\phi_i, \phi_i>_{\mu^{-1}} = 1
    for i in range(3):
        np.testing.assert_almost_equal(
            np.dot(model.left_eigenvectors_[:, i],
                   model.left_eigenvectors_[:, i] / model.populations_), 1)

    # and that the right eigenvectors satisfy  <\psi_i, \psi_i>_{\mu} = 1
    for i in range(3):
        np.testing.assert_almost_equal(
            np.dot(model.right_eigenvectors_[:, i],
                   model.right_eigenvectors_[:, i] *
                   model.populations_), 1)
示例#13
0
def test_10():
    # test inverse transform
    model = MarkovStateModel(reversible_type=None, ergodic_cutoff=0)
    model.fit([['a', 'b', 'c', 'a', 'a', 'b']])
    v = model.inverse_transform([[0, 1, 2]])
    assert len(v) == 1
    np.testing.assert_array_equal(v[0], ['a', 'b', 'c'])
示例#14
0
def test_counts_no_trim():
    # test counts matrix without trimming
    model = MarkovStateModel(reversible_type=None, ergodic_cutoff=0)

    model.fit([[1, 1, 1, 1, 1, 1, 1, 1, 1]])
    eq(model.countsmat_, np.array([[8.0]]))
    eq(model.mapping_, {1: 0})
示例#15
0
def test_counts_2():
    # test counts matrix with trimming
    model = MarkovStateModel(reversible_type=None, ergodic_cutoff=1)

    model.fit([[1, 1, 1, 1, 1, 1, 1, 1, 1, 2]])
    eq(model.mapping_, {1: 0})
    eq(model.countsmat_, np.array([[8]]))
示例#16
0
def test_partial_transform():
    model = MarkovStateModel()
    model.fit([['a', 'a', 'b', 'b', 'c', 'c', 'a', 'a']])
    assert model.mapping_ == {'a': 0, 'b': 1, 'c': 2}

    v = model.partial_transform(['a', 'b', 'c'])
    assert isinstance(v, list)
    assert len(v) == 1
    assert v[0].dtype == np.int
    np.testing.assert_array_equal(v[0], [0, 1, 2])

    v = model.partial_transform(['a', 'b', 'c', 'd'], 'clip')
    assert isinstance(v, list)
    assert len(v) == 1
    assert v[0].dtype == np.int
    np.testing.assert_array_equal(v[0], [0, 1, 2])

    v = model.partial_transform(['a', 'b', 'c', 'd'], 'fill')
    assert isinstance(v, np.ndarray)
    assert len(v) == 4
    assert v.dtype == np.float
    np.testing.assert_array_equal(v, [0, 1, 2, np.nan])

    v = model.partial_transform(['a', 'a', 'SPLIT', 'b', 'b', 'b'], 'clip')
    assert isinstance(v, list)
    assert len(v) == 2
    assert v[0].dtype == np.int
    assert v[1].dtype == np.int
    np.testing.assert_array_equal(v[0], [0, 0])
    np.testing.assert_array_equal(v[1], [1, 1, 1])
示例#17
0
def test_ntimescales_3():
    # see issue #603
    trajs = [np.random.randint(0, 30, size=500) for _ in range(5)]
    msm = MarkovStateModel(n_timescales=10).fit(trajs)

    pccap = PCCAPlus.from_msm(msm, 11)
    lumped_trajs = pccap.transform(trajs)
    assert len(np.unique(lumped_trajs)) == 11
示例#18
0
def test_pipeline():
    trajs = DoubleWell(random_state=0).get_cached().trajectories
    p = Pipeline([
        ('ndgrid', NDGrid(n_bins_per_feature=100)),
        ('msm', MarkovStateModel(lag_time=100))
    ])

    p.fit(trajs)
    p.named_steps['msm'].summarize()
示例#19
0
def test_pcca_plus_1():
    assignments, ref_macrostate_assignments = _metastable_system()
    pipeline = Pipeline([('msm', MarkovStateModel()), ('pcca+', PCCAPlus(2))])
    macro_assignments = pipeline.fit_transform(assignments)[0]
    # we need to consider any permutation of the state labels when we
    # test for equality. Since it's only a 2-state that's simple using
    # the logical_not to flip the assignments.
    assert (np.all(macro_assignments == ref_macrostate_assignments) or np.all(
        macro_assignments == np.logical_not(ref_macrostate_assignments)))
示例#20
0
def test_6():
    # test score_ll with novel entries
    model = MarkovStateModel(reversible_type='mle')
    sequence = ['a', 'a', 'b', 'b', 'a', 'a', 'b', 'b']
    model.fit([sequence])

    assert not np.isfinite(model.score_ll([['c']]))
    assert not np.isfinite(model.score_ll([['c', 'c']]))
    assert not np.isfinite(model.score_ll([['a', 'c']]))
示例#21
0
def test_51():
    # test score_ll
    model = MarkovStateModel(reversible_type='mle')
    sequence = ['a', 'a', 'b', 'b', 'a', 'a', 'b', 'b', 'c', 'c', 'c', 'a', 'a']
    model.fit([sequence])
    assert model.mapping_ == {'a': 0, 'b': 1, 'c': 2}

    score_ac = model.score_ll([['a', 'c']])
    assert score_ac == np.log(model.transmat_[0, 2])
示例#22
0
def estimate_mle_populations(matrix):
    if msmb_version == '2.8.2':
        t_matrix = estimate_transition_matrix(matrix)
        populations = get_eigenvectors(t_matrix, 1, **kwargs)[1][:, 0]
        return populations
    elif msmb_version == '3.2.0':
        obj = MarkovStateModel()
        populations = obj._fit_mle(matrix)[1]
        return populations
示例#23
0
def test_ntimescales_2():
    # see issue #603
    trajs = [random.randint(0, 100, size=500) for _ in range(15)]
    msm = MarkovStateModel().fit(trajs)

    pccap = PCCAPlus.from_msm(msm, 11)
    lumped_trajs = pccap.transform(trajs)
    observed_macros = len(np.unique(lumped_trajs))
    assert observed_macros == 11, observed_macros
示例#24
0
def at_lagtime(lt):
    msm = MarkovStateModel(lag_time=lt, n_timescales=10, verbose=False)
    msm.fit(list(ktrajs.values()))
    ret = {
        'lag_time': lt,
        'percent_retained': msm.percent_retained_,
    }
    for i in range(msm.n_timescales):
        ret['timescale_{}'.format(i)] = msm.timescales_[i]
    return ret
示例#25
0
def test_plot_implied_timescales():
    lag_times = [1, 50, 100, 250, 500, 1000, 5000]
    msm_objs = []
    for lag in lag_times:
        # Construct MSM
        msm = MarkovStateModel(lag_time=lag, n_timescales=5)
        msm.fit(data)
        msm_objs.append(msm)
    ax = plot_implied_timescales(msm_objs)
    assert isinstance(ax, SubplotBase)
示例#26
0
    def generate_msm(self, clustered):
        """
        Generates a MSM from the current cluster data

        Returns: Msm
        """
        # Generate microstate MSM
        self.currtime = time.time()
        msm = MarkovStateModel(lag_time=self.config.getint("model", "msm_lag"),
                               reversible_type="transpose",
                               ergodic_cutoff="off",
                               prior_counts=0.000001)
        msm.fit(clustered)
        print("TIME\tmicromsm:\t%f" % (time.time() - self.currtime))
        utils.dump(msm, "msm_G%d.pkl" % self.generation)

        # Lump into macrostates
        self.currtime = time.time()
        pcca = PCCAPlus.from_msm(msm,
                                 n_macrostates=self.config.getint(
                                     "model", "macrostates"))
        mclustered = pcca.transform(clustered, mode="fill")
        if any(any(np.isnan(x) for x in m) for m in mclustered):  #pylint: disable=no-member
            print(
                "WARNING: Unassignable clusters in PCCA with %d macrostates!" %
                self.config.getint("model", "macrostates"))
        print("TIME\tpccaplus:\t%f" % (time.time() - self.currtime))
        if self.save_extras:
            utils.dump(pcca, "macrostater.pkl")

        # Generate macrostate MSM
        self.currtime = time.time()
        mmsm = MarkovStateModel(lag_time=self.config.getint(
            "model", "msm_lag"),
                                reversible_type="transpose",
                                ergodic_cutoff="off",
                                prior_counts=0.000001)
        mmsm.fit(mclustered)
        print("TIME\tmacromsm\t%f" % (time.time() - self.currtime))
        utils.dump(mmsm, "mmsm_G%d.pkl" % self.generation)

        return mmsm, mclustered
示例#27
0
def test_bace():
    assignments, ref_macrostate_assignments = _metastable_system()
    pipeline = Pipeline([('msm', MarkovStateModel()),
                         ('bace', BACE(n_macrostates=2))])
    macro_assignments = pipeline.fit_transform(assignments)[0]
    # we need to consider any permutation of the state labels when we
    # test for equality. Since it's only a 2-state that's simple using
    # the logical_not to flip the assignments.
    opposite = np.logical_not(ref_macrostate_assignments)
    assert (np.all(macro_assignments == ref_macrostate_assignments)
            or np.all(macro_assignments == opposite))
示例#28
0
def test_score_1():
    # test that GMRQ is equal to the sum of the first n eigenvalues,
    # when testing and training on the same dataset.
    sequence = [0, 0, 0, 1, 1, 1, 2, 2, 2, 1, 1, 1,
                0, 0, 0, 1, 2, 2, 2, 1, 1, 1, 0, 0]
    for n in [0, 1, 2]:
        model = MarkovStateModel(verbose=False, n_timescales=n)
        model.fit([sequence])

        assert_approx_equal(model.score([sequence]), model.eigenvalues_.sum())
        assert_approx_equal(model.score([sequence]), model.score_)
示例#29
0
def test_fit_1():
    # call fit, compare to MSM
    sequence = [0, 0, 0, 1, 1, 1, 0, 0, 2, 2, 0, 1, 1, 1, 2, 2, 2, 2, 2]
    model = ContinuousTimeMSM(verbose=False)
    model.fit([sequence])

    msm = MarkovStateModel(verbose=False)
    msm.fit([sequence])

    # they shouldn't be equal in general, but for this input they seem to be
    np.testing.assert_array_almost_equal(model.transmat_, msm.transmat_)
示例#30
0
def test_7():
    # test timescales
    model = MarkovStateModel()
    model.fit([[0, 0, 0, 1, 1, 1, 0, 0, 0, 1, 0, 1, 1]])
    assert np.all(np.isfinite(model.timescales_))
    assert len(model.timescales_) == 1

    model.fit([[0, 0, 0, 1, 1, 1, 0, 0, 0, 1, 0, 1, 1, 2, 2, 0, 0]])
    assert np.all(np.isfinite(model.timescales_))
    assert len(model.timescales_) == 2
    assert model.n_states_ == 3

    model = MarkovStateModel(n_timescales=1)
    model.fit([[0, 0, 0, 1, 1, 1, 0, 0, 0, 1, 0, 1, 1, 2, 2, 0, 0]])
    assert len(model.timescales_) == 1

    model = MarkovStateModel(n_timescales=100)
    model.fit([[0, 0, 0, 1, 1, 1, 0, 0, 0, 1, 0, 1, 1, 2, 2, 0, 0]])
    assert len(model.timescales_) == 2
    assert np.sum(model.populations_) == 1.0