Exemplo n.º 1
0
def test_score_1():
    # test that GMRQ is equal to the sum of the first n eigenvalues,
    # when testing and training on the same dataset.
    sequence = [0, 0, 0, 1, 1, 1, 2, 2, 2, 1, 1, 1,
                0, 0, 0, 1, 2, 2, 2, 1, 1, 1, 0, 0]
    for n in [0, 1, 2]:
        model = MarkovStateModel(verbose=False, n_timescales=n)
        model.fit([sequence])

        assert_approx_equal(model.score([sequence]), model.eigenvalues_.sum())
        assert_approx_equal(model.score([sequence]), model.score_)
Exemplo n.º 2
0
def test_score_1():
    # test that GMRQ is equal to the sum of the first n eigenvalues,
    # when testing and training on the same dataset.
    sequence = [0, 0, 0, 1, 1, 1, 2, 2, 2, 1, 1, 1,
                0, 0, 0, 1, 2, 2, 2, 1, 1, 1, 0, 0]
    for n in [0, 1, 2]:
        model = MarkovStateModel(verbose=False, n_timescales=n)
        model.fit([sequence])

        assert_approx_equal(model.score([sequence]), model.eigenvalues_.sum())
        assert_approx_equal(model.score([sequence]), model.score_)
Exemplo n.º 3
0
def calculate_fitness(population_dihedral, diheds, score_global, i, lock):
    import pandas as pd
    import numpy as np
    pop_index = i
    new_diheds = []

    for i in range(0, len(diheds)):
        X = diheds[i]
        selected_features = X[:, population_dihedral]
        new_diheds.append(selected_features)
    from msmbuilder.preprocessing import RobustScaler
    scaler = RobustScaler()
    scaled_diheds = scaler.fit_transform(new_diheds)
    scaled_diheds = new_diheds
    from msmbuilder.decomposition import tICA
    tica_model = tICA(lag_time=2, n_components=5)
    tica_model.fit(scaled_diheds)
    tica_trajs = tica_model.transform(scaled_diheds)
    from msmbuilder.cluster import MiniBatchKMeans
    clusterer = MiniBatchKMeans(n_clusters=200, random_state=42)

    clustered_trajs = clusterer.fit_transform(tica_trajs)
    from msmbuilder.msm import MarkovStateModel
    msm = MarkovStateModel(lag_time=50, n_timescales=5)
    #msm.fit_transform(clustered_trajs)
    from sklearn.cross_validation import KFold
    n_states = [4]
    cv = KFold(len(clustered_trajs), n_folds=5)
    results = []
    for n in n_states:
        msm.n_states_ = n
        for fold, (train_index, test_index) in enumerate(cv):
            train_data = [clustered_trajs[i] for i in train_index]
            test_data = [clustered_trajs[i] for i in test_index]
            msm.fit(train_data)
            train_score = msm.score(train_data)
            test_score = msm.score(test_data)
            time_score = msm.timescales_[0]
            time_test_score = time_score + test_score
            print(time_score)
            print(test_score)
            av_score = time_test_score / 2
            results.append({
                'train_score': train_score,
                'test_score': test_score,
                'time_score': time_score,
                'av_score': av_score,
                'n_states': n,
                'fold': fold
            })
            print(msm.timescales_)
    results = pd.DataFrame(results)
    avgs = (results.groupby('n_states').aggregate(np.median).drop('fold',
                                                                  axis=1))
    best_nt = avgs['test_score'].idxmax()
    best_n = avgs['av_score'].idxmax()
    best_score = avgs.loc[best_n, 'av_score']
    best_scorent = avgs.loc[best_nt, 'test_score']
    print(best_scorent)
    lock.acquire()
    score_global.update({pop_index: best_scorent})
    lock.release()
Exemplo n.º 4
0
            train_data = []
            test_data = []
            for i in range(len(tica_data)):
                cv = KFold(len(tica_data[i]), n_folds=nFolds)
                for current_fold, (train_index, test_index) in enumerate(cv):
                    if current_fold == fold:
                        train_data.append(tica_data[i][train_index])
                        test_data.append(tica_data[i][test_index])
            reduced_train_data = sub_sampling_data(train_data, stride=100)
            kmeans.fit(reduced_train_data)
            assignments_train = kmeans.predict(train_data)
            assignments_test = kmeans.predict(test_data)
            msm = MarkovStateModel(lag_time=lagtime)
            msm.fit(assignments_train)
            train_score = msm.score_
            test_score = msm.score(assignments_test)

            results.append({
                'train_score': train_score,
                'test_score': test_score,
                'n_states': n,
                'fold': fold,
                'timescales': msm.timescales_
            })

        results = pd.DataFrame(results)
        print results
        output_fn = "GMRQ_MSMs_score_for_tica_n_%d_%d.pkl" % (n_tIC, n)
        with open(output_fn, 'wb') as result_fn:
            pickle.dump(results, result_fn)
            result_fn.close()
Exemplo n.º 5
0
 kcenters = KCenters(n_clusters=n_Micro,
                     metric='euclidean',
                     random_state=0)
 kcenters.fit(train_data_projection)
 train_data_sequence = kcenters.predict(
     train_data_projection)
 test_data_sequence = kcenters.predict(test_data_projection)
 msm = MarkovStateModel(
     n_timescales=3,
     lag_time=100,
     reversible_type='transpose',
     verbose=False,
     sliding_window=True,
     ergodic_cutoff='on')  #the parameters may change
 msm.fit(train_data_sequence)
 train_score = msm.score(train_data_sequence)
 test_score = msm.score(test_data_sequence)
 f1 = open(
     sub_resultdir +
     '/Fold_%d_tica_lagtime_%d_ntics_%d_nMicro_%d_gmrq.summary'
     % (fold, tica_correlation_time, n_tics, n_Micro), 'w')
 f1.write('train_score:%f' % (train_score))
 f1.write('\n')
 f1.write('test_score:%f' % (test_score))
 f1.write('\n')
 f1.close()
 print(
     'computing implied timescale for training data'
 )  #the x-range to plot implied timescale should also change
 train_msm_timescales = implied_timescales(
     train_data_sequence,