reader = coor.source(trajnames, features=feat) # Estimate Markov state model #tica_lag = 20 #keep_dims = 23 #keep_dims = 23 if not noplots: print("Plotting tica timescales vs lagtime...") plot_tica_stuff() #tica_lag = 50 # lagtime where TICA timescales are converged #keep_dims = 5 # num dims where cumulative variance reaches ~0.8 tica = coor.tica(lag=lagtime, stride=1) coor.pipeline([reader, tica]) Y = tica.get_output(dimensions=range(keep_dims)) #np.save(msm_savedir + "/tica_ti.npy", tica.timescales) #print("Saving tica coordinates...") ##if not os.path.exists(msm_savedir + "/run_1_TIC_1.npy"): #for i in range(keep_dims): # for n in range(len(Y)): # # save TIC with indices of corresponding traj # idx1, idx2 = traj_idxs[n] # tic_saveas = msm_savedir + "/run_{}_{}_TIC_{}.npy".format(idx1, idx2, i+1) # if not os.path.exists(tic_saveas) or resave_tic: # np.save(tic_saveas, Y[n][:,i]) raise SystemExit
def plot_tica_stuff(): # calculate TICA at different lagtimes #tica_lags = np.array(range(1, 11) + [12, 15, 20, 25, 50, 75, 100, 150, 200]) tica_lags = np.array([1, 5, 10, 25, 50, 100, 200, 500, 1000]) all_cumvar = [] all_tica_ti = [] for i in range(len(tica_lags)): tica = coor.tica(lag=tica_lags[i], stride=1) coor.pipeline([reader, tica]) all_cumvar.append(tica.cumvar) all_tica_ti.append(tica.timescales) all_cumvar = np.array(all_cumvar) all_tica_ti = np.array(all_tica_ti) # times vs lag plt.figure() for i in range(20): plt.plot(tica_lags, all_tica_ti[:, i]) plt.fill_between(tica_lags, tica_lags, color='gray', lw=2) #ymin, ymax = plt.ylim() #plt.ylim(ymin, ymax) plt.grid(True, alpha=1, color='k', ls='--') plt.xlabel(r"Lag time $\tau$") plt.ylabel(r"TICA $t_i(\tau)$") plt.title(f_str) plt.savefig(msm_savedir + "/tica_its_vs_lag.pdf") plt.savefig(msm_savedir + "/tica_its_vs_lag.png") # cumulative variance plt.figure() for i in range(len(tica_lags)): plt.plot(np.arange(1, len(all_cumvar[i]) + 1), all_cumvar[i], label=str(tica_lags[i])) plt.legend(loc=4) plt.grid(True, alpha=1, color='k', ls='--') #ymin, ymax = plt.ylim() plt.ylim(0, 1) plt.xlabel("Index") plt.ylabel("Kinetic Variance") plt.title(f_str) plt.savefig(msm_savedir + "/tica_cumvar.pdf") plt.savefig(msm_savedir + "/tica_cumvar.png") # times vs index plt.figure() for i in range(len(tica_lags)): plt.plot(all_tica_ti[i, :20], 'o', label=str(tica_lags[i])) plt.legend() plt.grid(True, alpha=1, color='k', ls='--') #ymin, ymax = plt.ylim() #plt.ylim(ymin, ymax) plt.xlabel("Index") plt.ylabel(r"TICA $t_i$") plt.title(f_str) plt.savefig(msm_savedir + "/tica_its.pdf") plt.savefig(msm_savedir + "/tica_its.png")
print("There are %d frames total in %d trajectories." % (coordinates_source.n_frames_total(), coordinates_source.number_of_trajectories())) ################################################################################ # Do tICA ################################################################################ print('tICA...') running_tica = coor.tica(lag=100, dim=100) ################################################################################ # Cluster ################################################################################ print('Clustering...') clustering = coor.cluster_kmeans(k=100, stride=50) coor.pipeline([coordinates_source,running_tica,clustering]) dtrajs = clustering.dtrajs # Save discrete trajectories. clustering.save_dtrajs(output_format='npy', extension='.npy') ################################################################################ # Make tics plot ################################################################################ tics = running_tica.get_output()[0] z,x,y = np.histogram2d(tics[:,0],tics[:,1], bins=50) F = -np.log(z+1) extent = [x[0], x[-1], y[0], y[-1]]
# Define coordinates source ################################################################################ trajectory_files = glob(os.path.join(source_directory, '*0.h5')) coordinates_source = coor.source(trajectory_files, featurizer) print("There are %d frames total in %d trajectories." % (coordinates_source.n_frames_total(), coordinates_source.number_of_trajectories())) ################################################################################ # Do tICA ################################################################################ print('tICA...') running_tica = coor.tica(lag=1600, dim=100) coor.pipeline([coordinates_source, running_tica]) ################################################################################ # Make eigenvalues plot ################################################################################ plt.clf() eigenvalues = (running_tica.eigenvalues)**2 sum_eigenvalues = np.sum(eigenvalues[0:2]) print "This is the sum of the first two eigenvalues: %s." % sum_eigenvalues plt.plot(eigenvalues) plt.xlim(0, 4) plt.ylim(0, 1.2)
coordinates_source.number_of_trajectories())) ################################################################################ # Do tICA ################################################################################ print('tICA...') running_tica = coor.tica(lag=100, dim=100) ################################################################################ # Cluster ################################################################################ print('Clustering...') clustering = coor.cluster_kmeans(k=100, stride=50) coor.pipeline([coordinates_source, running_tica, clustering]) dtrajs = clustering.dtrajs # Save discrete trajectories. clustering.save_dtrajs(output_format='npy', extension='.npy') ################################################################################ # Make tics plot ################################################################################ tics = running_tica.get_output()[0] z, x, y = np.histogram2d(tics[:, 0], tics[:, 1], bins=50) F = -np.log(z + 1) extent = [x[0], x[-1], y[0], y[-1]]
################################################################################ # Define coordinates source ################################################################################ trajectory_files = glob(os.path.join(source_directory, '*0.h5')) coordinates_source = coor.source(trajectory_files,featurizer) print("There are %d frames total in %d trajectories." % (coordinates_source.n_frames_total(), coordinates_source.number_of_trajectories())) ################################################################################ # Do tICA ################################################################################ print('tICA...') running_tica = coor.tica(lag=1600, dim=100) coor.pipeline([coordinates_source,running_tica]) ################################################################################ # Make eigenvalues plot ################################################################################ plt.clf() eigenvalues = (running_tica.eigenvalues)**2 sum_eigenvalues = np.sum(eigenvalues[0:2]) print "This is the sum of the first two eigenvalues: %s." % sum_eigenvalues plt.plot(eigenvalues) plt.xlim(0,4) plt.ylim(0,1.2)
feat.add_distances(pair_idxs) if "invdists" in feature_set: feat.add_inverse_distances(pair_idxs) if "rho" in feature_set: feat.add_custom_feature( CustomFeature(local_density_feature, len(all_pair_idxs), fun_args=(all_pair_idxs, r0, widths))) if "rg" in feature_set: feat.add_custom_feature(CustomFeature(rg_feature, 1)) reader = coor.source(trajnames, features=feat) transform = coor.tica(lag=lagtime, stride=stride) cluster_pipe = [reader, transform, cluster] pipeline = coor.pipeline(cluster_pipe) dtrajs = cluster.dtrajs #lags = [1,2,5,10,20,50,100,200,300,400,500,600,700,800,900,1000] lags = [10, 25, 50, 100, 200, 500, 1000] its = msm.its(dtrajs, lags=lags) T = 300 save_markov_state_models(T, its.models) # save name should have n_clusters saveas = "msm_its_vs_lag_{}".format(n_clusters) mplt.plot_implied_timescales(its, ylog=False) #plt.title("T = " + str(T)) plt.savefig(msm_savedir + "/" + saveas + ".pdf") plt.savefig(msm_savedir + "/" + saveas + ".png")