def post(self): io = StringIO(self.get_argument('matrix')) w = sio.mmread(io) msm = MarkovStateModel() msm.transmat_, msm.populations_ = _transmat_mle_prinz(w) msm.n_states_ = msm.populations_.shape[0] if bool(int(self.get_argument('mode'))): self.write(make_json_paths(msm, self)) # TP else: self.write(make_json_graph(msm, self)) # MSM
def test_hubscore(): #Make an actual hub! tprob = np.array([[0.8, 0.0, 0.2, 0.0, 0.0], [0.0, 0.8, 0.2, 0.0, 0.0], [0.1, 0.1, 0.6, 0.1, 0.1], [0.0, 0.0, 0.2, 0.8, 0.0], [0.0, 0.0, 0.2, 0.0, 0.8]]) msm = MarkovStateModel(lag_time=1) msm.transmat_ = tprob msm.n_states_ = 5 score = tpt.hub_scores(msm, 2)[0] assert score == 1.0
def test_hubscore(): # Make an actual hub! tprob = np.array([[0.8, 0.0, 0.2, 0.0, 0.0], [0.0, 0.8, 0.2, 0.0, 0.0], [0.1, 0.1, 0.6, 0.1, 0.1], [0.0, 0.0, 0.2, 0.8, 0.0], [0.0, 0.0, 0.2, 0.0, 0.8]]) msm = MarkovStateModel(lag_time=1) msm.transmat_ = tprob msm.n_states_ = 5 score = tpt.hub_scores(msm, 2)[0] assert score == 1.0
def test_2(): model = MarkovStateModel(verbose=False) C = np.array([[4380, 153, 15, 2, 0, 0], [211, 4788, 1, 0, 0, 0], [169, 1, 4604, 226, 0, 0], [3, 13, 158, 4823, 3, 0], [0, 0, 0, 4, 4978, 18], [7, 5, 0, 0, 62, 4926]], dtype=float) C = C + 1.0 / 6.0 model.n_states_ = C.shape[0] model.countsmat_ = C model.transmat_, model.populations_ = model._fit_mle(C) n_trials = 5000 random = np.random.RandomState(0) all_timescales = np.zeros((n_trials, model.n_states_ - 1)) all_eigenvalues = np.zeros((n_trials, model.n_states_)) for i in range(n_trials): T = np.vstack([random.dirichlet(C[i]) for i in range(C.shape[0])]) u = _solve_msm_eigensystem(T, k=6)[0] all_eigenvalues[i] = u all_timescales[i] = -1 / np.log(u[1:]) pp.figure(figsize=(12, 8)) for i in range(3): pp.subplot(2, 3, i + 1) pp.title('Timescale %d' % i) kde = scipy.stats.gaussian_kde(all_timescales[:, i]) xx = np.linspace(all_timescales[:, i].min(), all_timescales[:, i].max()) r = scipy.stats.norm(loc=model.timescales_[i], scale=model.uncertainty_timescales()[i]) pp.plot(xx, kde.evaluate(xx), c='r', label='Samples') pp.plot(xx, r.pdf(xx), c='b', label='Analytic') for i in range(1, 4): pp.subplot(2, 3, 3 + i) pp.title('Eigenvalue %d' % i) kde = scipy.stats.gaussian_kde(all_eigenvalues[:, i]) xx = np.linspace(all_eigenvalues[:, i].min(), all_eigenvalues[:, i].max()) r = scipy.stats.norm(loc=model.eigenvalues_[i], scale=model.uncertainty_eigenvalues()[i]) pp.plot(xx, kde.evaluate(xx), c='r', label='Samples') pp.plot(xx, r.pdf(xx), c='b', label='Analytic') pp.tight_layout() pp.legend(loc=4) pp.savefig('test_msm_uncertainty_plots.png')
def test_2(): model = MarkovStateModel(verbose=False) C = np.array([ [4380, 153, 15, 2, 0, 0], [211, 4788, 1, 0, 0, 0], [169, 1, 4604, 226, 0, 0], [3, 13, 158, 4823, 3, 0], [0, 0, 0, 4, 4978, 18], [7, 5, 0, 0, 62, 4926]], dtype=float) C = C + 1.0 / 6.0 model.n_states_ = C.shape[0] model.countsmat_ = C model.transmat_, model.populations_ = model._fit_mle(C) n_trials = 5000 random = np.random.RandomState(0) all_timescales = np.zeros((n_trials, model.n_states_ - 1)) all_eigenvalues = np.zeros((n_trials, model.n_states_)) for i in range(n_trials): T = np.vstack([random.dirichlet(C[i]) for i in range(C.shape[0])]) u = _solve_msm_eigensystem(T, k=6)[0] all_eigenvalues[i] = u all_timescales[i] = -1 / np.log(u[1:]) pp.figure(figsize=(12, 8)) for i in range(3): pp.subplot(2,3,i+1) pp.title('Timescale %d' % i) kde = scipy.stats.gaussian_kde(all_timescales[:, i]) xx = np.linspace(all_timescales[:,i].min(), all_timescales[:,i].max()) r = scipy.stats.norm(loc=model.timescales_[i], scale=model.uncertainty_timescales()[i]) pp.plot(xx, kde.evaluate(xx), c='r', label='Samples') pp.plot(xx, r.pdf(xx), c='b', label='Analytic') for i in range(1, 4): pp.subplot(2,3,3+i) pp.title('Eigenvalue %d' % i) kde = scipy.stats.gaussian_kde(all_eigenvalues[:, i]) xx = np.linspace(all_eigenvalues[:,i].min(), all_eigenvalues[:,i].max()) r = scipy.stats.norm(loc=model.eigenvalues_[i], scale=model.uncertainty_eigenvalues()[i]) pp.plot(xx, kde.evaluate(xx), c='r', label='Samples') pp.plot(xx, r.pdf(xx), c='b', label='Analytic') pp.tight_layout() pp.legend(loc=4) pp.savefig('test_msm_uncertainty_plots.png')
def test_mfpt2(): tprob = np.array([[0.90, 0.10], [0.22, 0.78]]) pi0 = 1 pi1 = pi0 * tprob[0, 1] / tprob[1, 0] pops = np.array([pi0, pi1]) / (pi0 + pi1) msm = MarkovStateModel(lag_time=1) msm.transmat_ = tprob msm.n_states_ = 2 msm.populations_ = pops mfpts = np.vstack([tpt.mfpts(msm, i) for i in range(2)]).T # since it's a 2x2 the mfpt from 0 -> 1 is the # same as the escape time of 0 npt.assert_almost_equal(1 / (1 - tprob[0, 0]), mfpts[0, 1]) npt.assert_almost_equal(1 / (1 - tprob[1, 1]), mfpts[1, 0])
def test_countsmat(): model = MarkovStateModel(verbose=False) C = np.array([[4380, 153, 15, 2, 0, 0], [211, 4788, 1, 0, 0, 0], [169, 1, 4604, 226, 0, 0], [3, 13, 158, 4823, 3, 0], [0, 0, 0, 4, 4978, 18], [7, 5, 0, 0, 62, 4926]], dtype=float) C = C + (1.0 / 6.0) model.n_states_ = C.shape[0] model.countsmat_ = C model.transmat_, model.populations_ = model._fit_mle(C) n_trials = 5000 random = np.random.RandomState(0) all_timescales = np.zeros((n_trials, model.n_states_ - 1)) all_eigenvalues = np.zeros((n_trials, model.n_states_)) for i in range(n_trials): T = np.vstack([random.dirichlet(C[i]) for i in range(C.shape[0])]) u = _solve_msm_eigensystem(T, k=6)[0] u = np.real(u) # quiet warning. Don't know if this is legit all_eigenvalues[i] = u all_timescales[i] = -1 / np.log(u[1:])
def test_mfpt2(): tprob = np.array([[0.90, 0.10], [0.22, 0.78]]) pi0 = 1 # pi1 T[1, 0] = pi0 T[0, 1] pi1 = pi0 * tprob[0, 1] / tprob[1, 0] pops = np.array([pi0, pi1]) / (pi0 + pi1) msm = MarkovStateModel(lag_time=1) msm.transmat_ = tprob msm.n_states_ = 2 msm.populations_ = pops mfpts = np.vstack([tpt.mfpts(msm, i) for i in xrange(2)]).T #print(1 / (1 - tprob[0, 0]), mfpts[0, 1]) #print(1 / (1 - tprob[1, 1]), mfpts[1, 0]) # since it's a 2x2 the mfpt from 0 -> 1 is the # same as the escape time of 0 npt.assert_almost_equal(1 / (1 - tprob[0, 0]), mfpts[0, 1]) npt.assert_almost_equal(1 / (1 - tprob[1, 1]), mfpts[1, 0])
def test_countsmat(): model = MarkovStateModel(verbose=False) C = np.array([ [4380, 153, 15, 2, 0, 0], [211, 4788, 1, 0, 0, 0], [169, 1, 4604, 226, 0, 0], [3, 13, 158, 4823, 3, 0], [0, 0, 0, 4, 4978, 18], [7, 5, 0, 0, 62, 4926]], dtype=float) C = C + (1.0 / 6.0) model.n_states_ = C.shape[0] model.countsmat_ = C model.transmat_, model.populations_ = model._fit_mle(C) n_trials = 5000 random = np.random.RandomState(0) all_timescales = np.zeros((n_trials, model.n_states_ - 1)) all_eigenvalues = np.zeros((n_trials, model.n_states_)) for i in range(n_trials): T = np.vstack([random.dirichlet(C[i]) for i in range(C.shape[0])]) u = _solve_msm_eigensystem(T, k=6)[0] u = np.real(u) # quiet warning. Don't know if this is legit all_eigenvalues[i] = u all_timescales[i] = -1 / np.log(u[1:])
def calculate_fitness(population_dihedral, diheds, score_global, i, lock): import pandas as pd import numpy as np pop_index = i new_diheds = [] for i in range(0, len(diheds)): X = diheds[i] selected_features = X[:, population_dihedral] new_diheds.append(selected_features) from msmbuilder.preprocessing import RobustScaler scaler = RobustScaler() scaled_diheds = scaler.fit_transform(new_diheds) scaled_diheds = new_diheds from msmbuilder.decomposition import tICA tica_model = tICA(lag_time=2, n_components=5) tica_model.fit(scaled_diheds) tica_trajs = tica_model.transform(scaled_diheds) from msmbuilder.cluster import MiniBatchKMeans clusterer = MiniBatchKMeans(n_clusters=200, random_state=42) clustered_trajs = clusterer.fit_transform(tica_trajs) from msmbuilder.msm import MarkovStateModel msm = MarkovStateModel(lag_time=50, n_timescales=5) #msm.fit_transform(clustered_trajs) from sklearn.cross_validation import KFold n_states = [4] cv = KFold(len(clustered_trajs), n_folds=5) results = [] for n in n_states: msm.n_states_ = n for fold, (train_index, test_index) in enumerate(cv): train_data = [clustered_trajs[i] for i in train_index] test_data = [clustered_trajs[i] for i in test_index] msm.fit(train_data) train_score = msm.score(train_data) test_score = msm.score(test_data) time_score = msm.timescales_[0] time_test_score = time_score + test_score print(time_score) print(test_score) av_score = time_test_score / 2 results.append({ 'train_score': train_score, 'test_score': test_score, 'time_score': time_score, 'av_score': av_score, 'n_states': n, 'fold': fold }) print(msm.timescales_) results = pd.DataFrame(results) avgs = (results.groupby('n_states').aggregate(np.median).drop('fold', axis=1)) best_nt = avgs['test_score'].idxmax() best_n = avgs['av_score'].idxmax() best_score = avgs.loc[best_n, 'av_score'] best_scorent = avgs.loc[best_nt, 'test_score'] print(best_scorent) lock.acquire() score_global.update({pop_index: best_scorent}) lock.release()