def test_2(self): t2 = timescales(self.P2)[1] lags = [1, 2, 3, 4, 5] its = msm.timescales_msm([self.dtraj2], lags=lags) est = its.timescales[0] np.testing.assert_array_less(est, t2 + 2.0) np.testing.assert_array_less(t2 - 2.0, est)
def test_2(self): t2 = timescales(self.P2)[1] lags = [1, 2, 3, 4, 5] its = msm.timescales_msm([self.dtraj2], lags=lags) est = its.timescales[0] assert (np.alltrue(est < t2 + 2.0)) assert (np.alltrue(est > t2 - 2.0))
def test_4_2(self): t4 = timescales(self.P4)[1] lags = [int(t4)] its = msm.timescales_msm([self.dtraj4_2], lags=lags) est = its.timescales[0] np.testing.assert_array_less(est, t4 + 20.0) np.testing.assert_array_less(t4 - 20.0, est)
def test_4_2(self): t4 = timescales(self.P4)[1] lags = [int(t4)] its = msm.timescales_msm([self.dtraj4_2], lags=lags) est = its.timescales[0] assert (np.alltrue(est < t4 + 20.0)) assert (np.alltrue(est > t4 - 20.0))
def test_timescales(self): from pyemma.msm import timescales_msm its = timescales_msm(self.dtraj, lags=[1, 2], mincount_connectivity=0, errors=None) assert its.estimator.mincount_connectivity == 0
def test_its_msm(self): estimator = msm.timescales_msm([self.double_well_data.dtraj_T100K_dt10_n6good], lags = [1, 10, 100, 1000]) ref = np.array([[ 174.22244263, 3.98335928, 1.61419816, 1.1214093 , 0.87692952], [ 285.56862305, 6.66532284, 3.05283223, 2.6525504 , 1.9138432 ], [ 325.35442195, 24.17388446, 20.52185604, 20.10058217, 17.35451648], [ 343.53679359, 255.92796581, 196.26969348, 195.56163418, 170.58422303]]) # rough agreement with MLE assert np.allclose(estimator.timescales, ref, rtol=0.1, atol=10.0)
def compute_nice(self, reversible): """ Tests if standard its estimates run without errors :return: """ for i in range(len(self.dtrajs)): its = msm.timescales_msm(self.dtrajs[i], reversible=reversible)
def test_too_large_lagtime(self): dtraj = [[0, 1, 1, 1, 0]] lags = [1, 2, 3, 4, 5, 6, 7, 8] expected_lags = [1, 2] # 3, 4 is impossible because no finite timescales. import warnings with warnings.catch_warnings(record=True) as w: warnings.simplefilter("always") its = msm.timescales_msm(dtraj, lags=lags, reversible=False) # FIXME: we do not trigger a UserWarning, but msmtools.exceptions.SpectralWarning, intended? #assert issubclass(w[-1].category, UserWarning) np.testing.assert_equal(its.lags, expected_lags)
def _algorithm(self): logger.info('Postprocessing new data') datalist = simlist(glob(path.join(self.datapath, '*', '')), glob(path.join(self.inputpath, '*', 'structure.pdb')), glob(path.join(self.inputpath, '*', ''))) filtlist = simfilter(datalist, self.filteredpath, filtersel=self.filtersel) if hasattr(self, 'metricsel2') and self.metricsel2 is not None: proj = MetricDistance(self.metricsel1, self.metricsel2, metric=self.metrictype) else: proj = MetricSelfDistance(self.metricsel1, metric=self.metrictype) metr = Metric(filtlist, skip=self.skip) metr.projection(proj) data = metr.project() #if self.contactsym is not None: # contactSymmetry(data, self.contactsym) data.dropTraj() if self.ticadim > 0: tica = TICA(data, int(max(2, np.ceil(20/self.skip)))) datadr = tica.project(self.ticadim) else: datadr = data K = int(max(np.round(0.6 * np.log10(datadr.numFrames/1000)*1000+50), 100)) # heuristic if K > datadr.numFrames / 3: # Freaking ugly patches ... K = int(datadr.numFrames / 3) datadr.cluster(self.clustmethod(n_clusters=K), mergesmall=5) replacement = False if datadr.K < 10: datadr.cluster(self.clustmethod(n_clusters=K)) replacement = True model = Model(datadr) macronum = self.macronum if datadr.K < macronum: macronum = np.ceil(datadr.K / 2) logger.warning('Using less macrostates than requested due to lack of microstates. macronum = ' + str(macronum)) from pyemma.msm import timescales_msm timesc = timescales_msm(datadr.St.tolist(), lags=self.lag, nits=macronum).get_timescales() macronum = min(self.macronum, max(np.sum(timesc > self.lag), 2)) model.markovModel(self.lag, macronum) p_i = self._criteria(model, self.method) (spawncounts, prob) = self._spawn(p_i, self.nmax-self.running) logger.debug('spawncounts {}'.format(spawncounts)) stateIdx = np.where(spawncounts > 0)[0] _, relFrames = model.sampleStates(stateIdx, spawncounts[stateIdx], statetype='micro', replacement=replacement) logger.debug('relFrames {}'.format(relFrames)) self._writeInputs(datadr.rel2sim(np.concatenate(relFrames)))
def _numMacrostates(self, data): """ Heuristic for calculating the number of macrostates for the Markov model """ macronum = self.macronum if data.K < macronum: macronum = np.ceil(data.K / 2) logger.warning('Using less macrostates than requested due to lack of microstates. macronum = ' + str(macronum)) # Calculating how many timescales are above the lag time to limit number of macrostates from pyemma.msm import timescales_msm timesc = timescales_msm(data.St.tolist(), lags=self.lag, nits=macronum).get_timescales() macronum = min(self.macronum, max(np.sum(timesc > self.lag), 2)) return macronum
def test_too_large_lagtime(self): dtraj = [[0, 1, 1, 1, 0]] lags = [1, 2, 3, 4, 5, 6, 7, 8] expected_lags = [1, 2] # 3, 4 is impossible because no finite timescales. its = msm.timescales_msm(dtraj, lags=lags, reversible=False) # TODO: should catch warnings! # with warnings.catch_warnings(record=True) as w: # warnings.simplefilter("always") # assert issubclass(w[-1].category, UserWarning) got_lags = its.lagtimes assert (np.shape(got_lags) == np.shape(expected_lags)) assert (np.allclose(got_lags, expected_lags))
def test_too_large_lagtime(self): dtraj = [[0, 1, 1, 1, 0]] lags = [1, 2, 3, 4, 5, 6, 7, 8] expected_lags = [1, 2] # 3, 4 is impossible because no finite timescales. import warnings with warnings.catch_warnings(record=True) as w: warnings.simplefilter("always") its = msm.timescales_msm(dtraj, lags=lags, reversible=False, n_jobs=1) np.testing.assert_equal(its.lags, expected_lags)
def test_fraction_of_frames(self): dtrajs = [ [0, 1, 0], # These two will fail for lag >2 [1, 0, 1], # These two will fail for lag >2 [0, 1, 1, 1], [1, 0, 0, 1], [0, 1, 0, 1, 0], [1, 0, 1, 0, 1], ] lengths = [len(traj) for traj in dtrajs] lags = [1, 2, 3] its = msm.timescales_msm(dtrajs, lags=lags) all_frames = np.sum(lengths) longer_than_3 = np.sum(lengths[2:]) test_frac = longer_than_3 / all_frames assert np.allclose(its.fraction_of_frames, np.array([1, 1, test_frac]))
def test_its_bmsm(self): estimator = msm.timescales_msm( [self.double_well_data.dtraj_T100K_dt10_n6good], lags=[10, 50, 200], errors='bayes', nsamples=1000, n_jobs=1) ref = np.array([ [284.87479737, 6.68390402, 3.0375248, 2.65314172, 1.93066562], [320.08583492, 11.14612743, 10.3450663, 9.42799075, 8.2109752], [351.41541961, 42.87427869, 41.17841657, 37.35485197, 23.24254608] ]) # rough agreement with MLE assert np.allclose(estimator.timescales, ref, rtol=0.1, atol=10.0) # within left / right intervals. This test should fail only 1 out of 1000 times. L, R = estimator.get_sample_conf(conf=0.999) # we only test the first timescale, because the second is already ambiguous (deviations after the first place), # which makes this tests fail stochastically. np.testing.assert_array_less(L[0], estimator.timescales[0]) np.testing.assert_array_less(estimator.timescales[0], R[0])
fig, (ax1, ax2) = pp.subplots(1,2) ax1.scatter(cc_x, cc_y, marker='o', color='black') ax2 = mplt.plot_free_energy(np.vstack(Y)[:,0], np.vstack(Y)[:,1], cbar_label=None) if args.save: pp.savefig(os.path.join(args.save_destination, 'msm_tica_all.png')) if args.display: pp.show() pp.clf() pp.close() ### #actually generate MSM from data msm_from_data = msm.estimate_markov_model(dtrajs=mapped_data, lag=lagtime) #plot and/or save implied timescales, if specified if args.timescales: its = msm.timescales_msm(dtrajs=mapped_data, lags=500) mplt.plot_implied_timescales(its, show_mean=False, ylog=True, dt=25, units='ps', linewidth=2) if args.save: pp.savefig(os.path.join(args.save_destination, 'msm_its.png')) if args.display: pp.show() pp.clf() pp.close() #### #pcca cluster using specified n_sets msm_from_data.pcca(n_sets) pcca_return = msm_from_data.pcca(n_sets) pcca_return.metastable_sets pcca_return.metastable_assignment pcca_return.transition_matrix
lcc_sorted_456 = map(int, lcc_sorted_456) # In[24]: dtrajs_1D_234_sorted = [] dtrajs_1D_345_sorted = [] dtrajs_1D_456_sorted = [] for i in range( dtrajs_1D_234[0].shape[0] ): dtrajs_1D_234_sorted.append(lcc_sorted_234[dtrajs_1D_234[0][i]]) dtrajs_1D_345_sorted.append(lcc_sorted_345[dtrajs_1D_345[0][i]]) dtrajs_1D_456_sorted.append(lcc_sorted_456[dtrajs_1D_456[0][i]]) # In[25]: lags = np.linspace(1,1000,200,dtype='int') its_234 = msm.timescales_msm(dtrajs_1D_234_sorted, lags=lags, nits=n_clusters) its_345 = msm.timescales_msm(dtrajs_1D_345_sorted, lags=lags, nits=n_clusters) its_456 = msm.timescales_msm(dtrajs_1D_456_sorted, lags=lags, nits=n_clusters) # In[27]: tau = 400 Cmat_234 = pyemma.msm.estimation.count_matrix(dtrajs_1D_234_sorted, tau, sliding=True, sparse_return=False) Cmat_345 = pyemma.msm.estimation.count_matrix(dtrajs_1D_345_sorted, tau, sliding=True, sparse_return=False) Cmat_456 = pyemma.msm.estimation.count_matrix(dtrajs_1D_456_sorted, tau, sliding=True, sparse_return=False) # In[28]: Cmat_totind = Cmat_234+Cmat_345+Cmat_456 Cmat_totind
def test_lag_generation(self): its = msm.timescales_msm(self.dtraj4_2, lags=1000) np.testing.assert_array_equal( its.lags, [1, 2, 3, 5, 8, 12, 18, 27, 41, 62, 93, 140, 210, 315, 473, 710])
ax2 = mplt.plot_free_energy(np.vstack(Y)[:, 0], np.vstack(Y)[:, 1], cbar_label=None) if args.save: pp.savefig(os.path.join(args.save_destination, 'msm_tica_all.png')) if args.display: pp.show() pp.clf() pp.close() ### #actually generate MSM from data msm_from_data = msm.estimate_markov_model(dtrajs=mapped_data, lag=lagtime) #plot and/or save implied timescales, if specified if args.timescales: its = msm.timescales_msm(dtrajs=mapped_data, lags=500) mplt.plot_implied_timescales(its, show_mean=False, ylog=True, dt=25, units='ps', linewidth=2) if args.save: pp.savefig(os.path.join(args.save_destination, 'msm_its.png')) if args.display: pp.show() pp.clf() pp.close() #### #pcca cluster using specified n_sets
lags = [ 1, 5, 10, 20, 35, 50, 75, 100, 150, 200, 300, 400, 500, 600, 700, 800, 900, 100 ] implied_ts = pyemma.msm.its(dtrajs=dtrajs, lags=lags, nits=5) pyemma.plots.plot_implied_timescales(implied_ts, units='time-steps', ylog=False) #plt.vlines(2,ymin=0,ymax=350,linestyles='dashed') #plt.annotate("selected model", xy=(lags[-3], implied_ts.timescales[-3][0]), xytext=(15,250), # arrowprops=dict(facecolor='black', shrink=0.001, width=0.1,headwidth=8)) plt.figure(figsize=(10, 10), dpi=600) plt.ylim([0, 150]) print(implied_ts) its = msm.timescales_msm(dtrajs, lags=50, nits=10) print(its) mplt.plot_implied_timescales(its, ylog=False, units='steps', linewidth=2) #plt.xlim(0, 40); plt.ylim(0, 50) its = msm.timescales_msm(dtrajs, lags=50, nits=10, errors='bayes', n_jobs=-1) plt.figure(figsize=(8, 5)) mplt.plot_implied_timescales(its, show_mean=False, ylog=False, dt=0.1, units='ns', linewidth=2) #plt.xlim(0, 5); plt.ylim(0.1,60);
def _algorithm(self): logger.info('Postprocessing new data') datalist = simlist( glob(path.join(self.datapath, '*', '')), glob(path.join(self.inputpath, '*', 'structure.pdb')), glob(path.join(self.inputpath, '*', ''))) filtlist = simfilter(datalist, self.filteredpath, filtersel=self.filtersel) if hasattr(self, 'metricsel2') and self.metricsel2 is not None: proj = MetricDistance(self.metricsel1, self.metricsel2, metric=self.metrictype) else: proj = MetricSelfDistance(self.metricsel1, metric=self.metrictype) metr = Metric(filtlist, skip=self.skip) metr.projection(proj) data = metr.project() #if self.contactsym is not None: # contactSymmetry(data, self.contactsym) data.dropTraj() if self.ticadim > 0: tica = TICA(data, int(max(2, np.ceil(20 / self.skip)))) datadr = tica.project(self.ticadim) else: datadr = data K = int( max(np.round(0.6 * np.log10(datadr.numFrames / 1000) * 1000 + 50), 100)) # heuristic if K > datadr.numFrames / 3: # Freaking ugly patches ... K = int(datadr.numFrames / 3) datadr.cluster(self.clustmethod(n_clusters=K), mergesmall=5) replacement = False if datadr.K < 10: datadr.cluster(self.clustmethod(n_clusters=K)) replacement = True model = Model(datadr) macronum = self.macronum if datadr.K < macronum: macronum = np.ceil(datadr.K / 2) logger.warning( 'Using less macrostates than requested due to lack of microstates. macronum = ' + str(macronum)) from pyemma.msm import timescales_msm timesc = timescales_msm(datadr.St.tolist(), lags=self.lag, nits=macronum).get_timescales() macronum = min(self.macronum, max(np.sum(timesc > self.lag), 2)) model.markovModel(self.lag, macronum) p_i = self._criteria(model, self.method) (spawncounts, prob) = self._spawn(p_i, self.nmax - self.running) logger.debug('spawncounts {}'.format(spawncounts)) stateIdx = np.where(spawncounts > 0)[0] _, relFrames = model.sampleStates(stateIdx, spawncounts[stateIdx], statetype='micro', replacement=replacement) logger.debug('relFrames {}'.format(relFrames)) self._writeInputs(datadr.rel2sim(np.concatenate(relFrames)))
plt.xlabel("IC 1") plt.ylabel("IC 2") plt.title("FES IC1-2") plt.savefig("fes_IC1-2.png") plt.figure(figsize=(8, 5)) mplt.plot_free_energy(xall, np.vstack(Y)[:, 2], cmap="Spectral") plt.plot(cc_x, cc_z, linewidth=0, marker='o', markersize=5, color='black') plt.xlabel("IC 1") plt.ylabel("IC 3") plt.title("FES IC1-3") plt.savefig("fes_IC1-3.png") lags = None plt.figure(figsize=(8, 5)) its = msm.timescales_msm(dtrajs, lags=lags, nits=10) mplt.plot_implied_timescales(its, ylog=True, units='steps', linewidth=2) plt.savefig("its.png") # its = msm.timescales_msm(dtrajs, lags=lags, nits=10, errors='bayes', n_jobs=-1) # plt.figure(figsize=(8, 5)) # mplt.plot_implied_timescales(its, show_mean=False, ylog=False, units='steps', linewidth=2) # plt.savefig("its_errors.png") M = msm.estimate_markov_model(dtrajs, msm_lag) print('fraction of states used = ', M.active_state_fraction) print('fraction of counts used = ', M.active_count_fraction) f = plt.figure(figsize=(8, 5)) pi = M.stationary_distribution ax = mplt.scatter_contour(cc_x[M.active_set], cc_y[M.active_set], pi, fig=f)