def main(path_files, path_to_save): files = glob.glob(os.path.join(path_files, "traj_*.dat")) Y = [] for f in files: Y.append(utilities.loadtxtfile(f)) Y_stack = np.vstack(Y) xall = Y_stack[:, 1] yall = Y_stack[:, 2] zall = Y_stack[:, 3] plt.figure(figsize=(8, 5)) mplt.plot_free_energy(xall, yall, cmap="Spectral") plt.xlabel("x") plt.ylabel("y") if path_to_save is not None: plt.savefig(os.path.join(path_to_save, "x-y_decomposition.png")) plt.figure(figsize=(8, 5)) mplt.plot_free_energy(xall, zall, cmap="Spectral") plt.xlabel("x") plt.ylabel("z") if path_to_save is not None: plt.savefig(os.path.join(path_to_save, "x-z_decomposition.png")) plt.figure(figsize=(8, 5)) mplt.plot_free_energy(yall, zall, cmap="Spectral") plt.xlabel("y") plt.ylabel("z") if path_to_save is not None: plt.savefig(os.path.join(path_to_save, "y-z_decomposition.png")) plt.show()
def plotFES(data,ic,outfile): """ plot all data as FES data: TICA data ic: second ic to plot against outfile: name of plot """ set_pub() fig1 = plt.figure(figsize=(8,6)) fig1, ax1 = mplt.plot_free_energy(np.vstack(data)[:,0], np.vstack(data)[:,ic], kT=0.596, cbar_label='Free energy (kcal/mol)'); ax1.set_xlabel('tIC 1', fontsize=24) ax1.set_ylabel('tIC ' + str(ic + 1), fontsize=24) plt.scatter(cl1[20,0],cl1[20,ic], marker="$1$", s=100, c='r') plt.scatter(cl1[17,0],cl1[17,ic],marker="$2$", s=100, c='r') plt.scatter(cl1[3,0],cl1[3,ic],marker="$3$", s=100, c='r') plt.scatter(cl1[18,0],cl1[18,ic],marker="$4$", s=100, c='r') plt.tight_layout() fig1.savefig(outfile + 'fes_1v' + str(ic + 1) + '.eps') fig1.clf()
def run(self): time_start=time.time() print("start") parser = self.create_arg_parser() args = parser.parse_args() #parser = argparse.ArgumentParser() #parser.add_argument('--Kconfig', help='link to Kernel configurations file') #parser.add_argument('--port', dest="port", help='port for RabbitMQ server', default=5672, type=int) #args = parser.parse_args() Kconfig = imp.load_source('Kconfig', args.Kconfig) pdb_file=glob.glob(args.path+'/iter*_input*.pdb')[0] #pdb_file=glob.glob('iter*_input*.pdb')[0] #traj_files=glob.glob(args.path+'/iter*_traj*.dcd') p_cont=True p_iter=0 traj_files=[] traj_files_npy=[] iter_arr=[] while(p_cont): traj_files_tmp=glob.glob(args.path+'/iter'+str(p_iter)+'_traj*.dcd') traj_files_npy_tmp=glob.glob(args.path+'/iter'+str(p_iter)+'_traj*.npy') traj_files.sort() if len(traj_files_tmp)==0: p_cont=False else: print("iter", str(p_iter), " # files", str(len(traj_files_tmp))) traj_files=traj_files+traj_files_tmp traj_files_npy=traj_files_npy+traj_files_npy_tmp iter_arr=iter_arr+[p_iter]*len(traj_files_tmp) p_iter=p_iter+1 p_iter_max=p_iter-1 iter_arr=np.array(iter_arr) #traj_files=glob.glob('iter*_traj*.dcd') traj_files.sort() get_out_arr=[] for i, file in enumerate(traj_files_npy): get_out_arr=get_out_arr+[np.load(file)] #topfile = md.load(pdb_file) #featurizer = pyemma.coordinates.featurizer(topfile) #featurizer.add_residue_mindist(residue_pairs='all', scheme='closest-heavy') #featurizer.add_backbone_torsions(cossin=True) #featurizer.dimension() #inp = pyemma.coordinates.source(traj_files, featurizer) #inp.get_output() #print("n atoms",topfile.n_atoms) #print("n frames total",inp.n_frames_total()) #print("n trajs",inp.number_of_trajectories()) #print(" traj lengths", inp.trajectory_lengths()) #print(" input dimension",inp.dimension()) tica_lag=Kconfig.tica_lag#1 tica_dim=Kconfig.tica_dim tica_stride=Kconfig.tica_stride if Kconfig.koopman=='yes': try: tica_obj = pyemma.coordinates.tica(get_out_arr, lag=tica_lag, dim=tica_dim, kinetic_map=True, stride=tica_stride, weights='koopman') print("koopman works") except: tica_obj = pyemma.coordinates.tica(get_out_arr, lag=tica_lag, dim=tica_dim, kinetic_map=True, stride=tica_stride, weights='empirical') print("koopman failed, using normal tica") else: tica_obj = pyemma.coordinates.tica(get_out_arr, lag=tica_lag, dim=tica_dim, kinetic_map=True, stride=tica_stride, weights='empirical') # tica_weights='empirical', tica_weights='koopman' #tica_obj = pyemma.coordinates.tica(inp, lag=tica_lag, dim=tica_dim, kinetic_map=True, stride=tica_stride, weights=tica_weights) print("TICA eigenvalues", tica_obj.eigenvalues) print("TICA timescales",tica_obj.timescales) y = tica_obj.get_output(stride=tica_stride) np.save(args.path+'/npy_iter'+str(args.cur_iter)+'_tica_y.npy',y) #y[0].shape print('time tica finished', str(time.time()-time_start)) msm_states=Kconfig.msm_states msm_stride=Kconfig.msm_stride msm_lag=Kconfig.msm_lag cl = pyemma.coordinates.cluster_kmeans(data=y, k=msm_states, max_iter=10, stride=msm_stride) #np.save(args.path+'/npy_iter'+str(args.cur_iter)+'_tica_cl.npy',cl) np.save(args.path+'/npy_iter'+str(args.cur_iter)+'_tica_dtrajs.npy',cl.dtrajs) #cl = pyemma.coordinates.cluster_mini_batch_kmeans(data=y, k=msm_states, max_iter=10, n_jobs=None) print('time kmeans finished', str(time.time()-time_start)) m = pyemma.msm.estimate_markov_model(cl.dtrajs, msm_lag) np.save(args.path+'/npy_iter'+str(args.cur_iter)+'_tica_m.npy',m) print('time msm finished', str(time.time()-time_start)) ######################################## #print(tica_obj.eigenvectors) print("MSM eigenvalues",m.eigenvalues(10)) #print(m.eigenvectors_left(10)) #print(m.eigenvectors_right(10)) print("MSM P connected",m.P) #only connected #print("MSM clustercenters",cl.clustercenters) print("MSM timescales", m.timescales(10)) #print("MSM stat", m.stationary_distribution) print("MSM active set", m.active_set) print('fraction of states used = ', m.active_state_fraction) print('fraction of counts used = ', m.active_count_fraction) c = m.count_matrix_full s = np.sum(c, axis=1) print("count matrix sums",s) if 0 not in s: q = 1.0 / s n_states=c.shape[0] dtrajs = [ t for t in cl.dtrajs ] #print("msm dtrajs", dtrajs) #get frame_list for each msm state frame_state_list = {n: [] for n in range(n_states)} for nn, dt in enumerate(dtrajs): for mm, state in enumerate(dt): frame_state_list[state].append((nn,mm)) for k in range(n_states): if len(frame_state_list[k]) == 0: print('removing state '+str(k)+' no frames') q[k] = 0.0 # and normalize the remaining one q /= np.sum(q) n_pick=int(args.n_select)#100 if Kconfig.strategy=='cmicro': state_picks = np.random.choice(np.arange(len(q)), size=n_pick, p=q) elif Kconfig.strategy=='cmacro': num_eigenvecs_to_compute = 10 microstate_transitions_used=c #cache['too_small']='False' num_visited_microstates=c.shape[0] states_unique=np.arange(num_visited_microstates) visited_microstates=states_unique largest_visited_set=msmtools.estimation.largest_connected_set(microstate_transitions_used) C_largest0=microstate_transitions_used[largest_visited_set, :][:, largest_visited_set] rowsum = np.ravel(C_largest0.sum(axis=1)) largest_visited_set2=largest_visited_set[rowsum>0] C_largest=microstate_transitions_used[largest_visited_set2, :][:, largest_visited_set2] rowsum = C_largest.sum(axis=1) #print("C_largest", C_largest.shape[0]) if C_largest.shape[0]>10: if(np.min(rowsum) == 0.0): print("failed because rowsum", rowsoum, C_largest) cache['small']='True' #raise ValueError("matrix C contains rows with sum zero.") #try: #print("try") T_largest=msmtools.estimation.transition_matrix(C_largest, reversible=True) #print(T_largest.shape) states_largest=largest_visited_set2 print("largest_connected_set", states_largest.shape[0]) #print(states_largest, states_unique) MSM_largest=pyemma.msm.markov_model(T_largest) current_eigenvecs = MSM_largest.eigenvectors_right(num_eigenvecs_to_compute) current_timescales = np.real(MSM_largest.timescales()) current_eigenvals = np.real(MSM_largest.eigenvalues()) not_connect=np.where(np.in1d(states_unique, states_largest,invert=True))[0] all_connect=np.where(np.in1d(states_unique, states_largest))[0] print("worked timescales",current_timescales[:10]) print("not_connected states",not_connect) projected_microstate_coords_scaled = sklearn.preprocessing.MinMaxScaler(feature_range=(-1, 1)).fit_transform(current_eigenvecs[:,1:]) projected_microstate_coords_scaled *= np.sqrt(current_timescales[:num_eigenvecs_to_compute-1] / current_timescales[0]).reshape(1, num_eigenvecs_to_compute-1) select_n_macro_type=Kconfig.select_n_macro_type #'kin_content' #Kconfig.select_n_macro_type if select_n_macro_type == 'const': # 1_over_cmacro_estim par_num_macrostates=int(Kconfig.num_macrostates)#30 num_macrostates = min(par_num_macrostates,num_visited_microstates) elif select_n_macro_type == 'kin_var': # 1_over_cmacro_estim3 frac_kin_var=0.5 kin_var = np.cumsum(current_eigenvals**2) cut = kin_var[kin_var < kin_var.max()*frac_kin_var] num_macrostates = min(max(cut.shape[0],1),num_visited_microstates) elif select_n_macro_type == 'kin_content': # 1_over_cmacro_estim4 frac_kin_content=0.5 kin_cont = np.cumsum(-1./np.log(np.abs(current_eigenvals[1:])))/2. cut = kin_cont[kin_cont < kin_cont.max()*frac_kin_content] num_macrostates = min(max(cut.shape[0],1),num_visited_microstates) macrostate_method='pcca' #macrostate_method='kmeans' if macrostate_method=='pcca': m.pcca(num_macrostates) macrostate_assignments = { k:v for k,v in enumerate(m.metastable_sets) } largest_assign = m.metastable_assignments print("macrostate assignments", macrostate_assignments) print("mismatch", "largest_assign", largest_assign.shape, "num_visited_microstates", num_visited_microstates) #all_assign=largest_assign all_assign=np.zeros(num_visited_microstates) all_assign[all_connect]=largest_assign all_assign[not_connect]=np.arange(not_connect.shape[0])+largest_assign.max()+1 print('time macrostate pcca finished', str(time.time()-time_start)) else: kmeans_obj = pyemma.coordinates.cluster_kmeans(data=projected_microstate_coords_scaled, k=num_macrostates, max_iter=10) largest_assign=kmeans_obj.assign()[0] print('time macrostate kmeans finished', str(time.time()-time_start)) all_assign=np.zeros(num_visited_microstates) all_assign[all_connect]=largest_assign all_assign[not_connect]=np.arange(not_connect.shape[0])+largest_assign.max()+1 macrostate_assignment_of_visited_microstates=all_assign.astype('int') np.save(args.path+'/npy_iter'+str(args.cur_iter)+'_msm_macrostates.npy',macrostate_assignment_of_visited_microstates) print("all_assign",all_assign) select_macro_type = 'sto_inv_linear' if select_macro_type=='dmdmd': macrostate_counts = np.array([np.sum(s[states_unique][macrostate_assignment_of_visited_microstates == macrostate_label]) for macrostate_label in range(macrostate_assignment_of_visited_microstates.max()+1)]) selected_macrostate = select_restart_state(macrostate_counts[macrostate_counts > 0], 'rand', np.arange(macrostate_counts.shape[0])[macrostate_counts > 0], nparallel=nparallel) #print(macrostate_counts[macrostate_counts > 0], np.arange(num_macrostates)[macrostate_counts > 0], selected_macrostate) elif select_macro_type == 'sto_inv_linear': macrostate_counts = np.array([np.sum(s[states_unique][macrostate_assignment_of_visited_microstates == macrostate_label]) for macrostate_label in range(macrostate_assignment_of_visited_microstates.max()+1)]) selected_macrostate = select_restart_state(macrostate_counts[macrostate_counts > 0], 'sto_inv_linear', np.arange(macrostate_counts.shape[0])[macrostate_counts > 0], nparallel=n_pick) print("macrostate_counts", macrostate_counts) print("selected_macrostate", selected_macrostate) select_micro_within_macro_type='sto_inv_linear' restart_state=np.empty((0)) for i in range(n_pick): selected_macrostate_mask = (macrostate_assignment_of_visited_microstates == selected_macrostate[i]) #print(selected_macrostate, microstate_transitions_used[visited_microstates], macrostate_counts, counts[states_unique][selected_macrostate]) counts_in_selected_macrostate = s[states_unique][selected_macrostate_mask] #print parameters['select_micro_within_macro_type'] if select_micro_within_macro_type == 'sto_inv_linear': # within a macrostate, select a microstate based on count add_microstate=select_restart_state(counts_in_selected_macrostate, 'sto_inv_linear', visited_microstates[selected_macrostate_mask], nparallel=1) elif select_micro_within_macro_type == 'rand': add_microstate=select_restart_state(counts_in_selected_macrostate, 'rand', visited_microstates[selected_macrostate_mask], nparallel=1) #restart_state = [np.random.choice(visited_microstates[selected_macrostate_mask])] * nparallel restart_state=np.append(restart_state,add_microstate) #print(i,selected_macrostate[i], add_microstate) state_picks=restart_state.astype('int') print("state_picks",state_picks) print("no exceptions") #except: #state_picks = np.random.choice(np.arange(len(q)), size=n_pick, p=q) #print("state_picks",state_picks) #print("exception found") else: print("didn't recognize strategy") print("selected msm restarts", state_picks) picks = [ frame_state_list[state][np.random.randint(0, len(frame_state_list[state]))] for state in state_picks ] traj_select = [traj_files[pick[0]] for pick in picks] frame_select = [pick[1]*tica_stride*msm_stride for pick in picks] print('traj_select picks',picks) print('frame_select',traj_select) print('time frame selection finished', str(time.time()-time_start)) text_file = open(args.path + "/traj_select.txt", "w") for idx in range(n_pick): text_file.write(traj_select[idx]+' to iter '+str(args.cur_iter)+' idx '+str(idx)+' \n') text_file.close() # write new input files from frames for idx in range(n_pick): tmp =md.load(args.path+'/iter0_input0.pdb') files = md.load(traj_select[idx], top=args.path+'/iter0_input0.pdb') tmp.xyz[0,:,:]=files.xyz[frame_select[idx],:,:] tmp.save_pdb(args.path+'/iter'+str(args.cur_iter+1)+'_input'+str(idx)+'.pdb') print('time writing new frames finished', str(time.time()-time_start)) #rg rmsd original_file = md.load(args.path+'/'+args.ref)#'/iter0_input0.pdb') out_files=glob.glob(args.path+'/iter*_out*.pdb') out_files.sort() #print md.rmsd(md.load(out_files2[2]),original_file, atom_indices=heavy)[0] BETA_CONST = 50 # 1/nm LAMBDA_CONST = 1.8 NATIVE_CUTOFF = 0.45 # nanometers heavy = original_file.topology.select_atom_indices('heavy') heavy_pairs = np.array([(i,j) for (i,j) in combinations(heavy, 2) if abs(original_file.topology.atom(i).residue.index - \ original_file.topology.atom(j).residue.index) > 3]) # compute the distances between these pairs in the native state heavy_pairs_distances = md.compute_distances(original_file[0], heavy_pairs)[0] # and get the pairs s.t. the distance is less than NATIVE_CUTOFF native_contacts = heavy_pairs[heavy_pairs_distances < NATIVE_CUTOFF] r0 = md.compute_distances(original_file[0], native_contacts) rg_arr=[] rmsd_arr=[] q_arr=[] for file in out_files: file2 = md.load(file) rmsd_val=md.rmsd(file2,original_file, atom_indices=heavy)[0] rg_arr.append(md.compute_rg(file2)[0]) rmsd_arr.append(rmsd_val) r = md.compute_distances(file2[0], native_contacts) q = np.mean(1.0 / (1 + np.exp(BETA_CONST * (r - LAMBDA_CONST * r0))), axis=1)[0] q_arr.append(q) rg_arr=np.array(rg_arr) np.save(args.path+'/npy_iter'+str(args.cur_iter)+'_rg_arr.npy',rg_arr) #print("rg values", rg_arr.min(), rg_arr.max(), rg_arr) rmsd_arr=np.array(rmsd_arr) np.save(args.path+'/npy_iter'+str(args.cur_iter)+'_rmsd_arr.npy',rmsd_arr) #print("rmsd values", rmsd_arr.min(), rmsd_arr.max(), rmsd_arr) q_arr=np.array(q_arr) np.save(args.path+'/npy_iter'+str(args.cur_iter)+'_q_arr.npy',q_arr) #print("Q values", q_arr.min(), q_arr.max(), q_arr) ######################################## colornames=[name for name, color in matplotlib.colors.cnames.iteritems()] tica0=np.array([]) tica1=np.array([]) for i in range(len(y)): tica0=np.append(tica0,y[i][:,0]) tica1=np.append(tica1,y[i][:,1]) clf() fig=figure() ax = fig.add_subplot(111) ax.scatter(np.arange(tica_obj.timescales.shape[0]),tica_obj.timescales) ax.set_ylabel('TICA Timescales (steps)') ax.set_xlabel('# TICA eigenvector') ax.set_yscale('log') savefig(args.path+'/plot_iter'+str(args.cur_iter)+'_tica_timescales.png', bbox_inches='tight', dpi=200) cumvar = np.cumsum(tica_obj.timescales) cumvar /= cumvar[-1] clf() plot(cumvar, linewidth=2) for thres in [0.5,0.8,0.95]: threshold_index=np.argwhere(cumvar > thres)[0][0] print "tica thres, thres_idx", thres, threshold_index vlines(threshold_index, 0.0, 1.0, linewidth=2) hlines(thres, 0, cumvar.shape[0], linewidth=2) xlabel('Eigenvalue Number', fontsize = 16) ylabel('cumulative kinetic content', fontsize = 16) savefig(args.path+'/plot_iter'+str(args.cur_iter)+'_tica_cumulative_kinetic_content.png', bbox_inches='tight', dpi=200) msm_timescales=m.timescales(100) clf() fig=figure() ax = fig.add_subplot(111) ax.scatter(np.arange(msm_timescales.shape[0]),msm_timescales*tica_stride) ax.set_ylabel('MSM Timescales (steps)') ax.set_xlabel('# MSM eigenvector') ax.set_yscale('log') savefig(args.path+'/plot_iter'+str(args.cur_iter)+'_msm_timescales.png', bbox_inches='tight', dpi=200) cumvar = np.cumsum(m.timescales(100)) cumvar /= cumvar[-1] clf() plot(cumvar, linewidth=2) for thres in [0.5,0.8,0.95]: threshold_index=np.argwhere(cumvar > thres)[0][0] print "msm thres, thres_idx", thres, threshold_index vlines(threshold_index, 0.0, 1.0, linewidth=2) hlines(thres, 0, cumvar.shape[0], linewidth=2) xlabel('Eigenvalue Number', fontsize = 16) ylabel('cumulative kinetic content', fontsize = 16) savefig(args.path+'/plot_iter'+str(args.cur_iter)+'_msm_cumulative_kinetic_content.png', bbox_inches='tight', dpi=200) clf() xlabel("TICA ev0") ylabel("TICA ev1") cp = scatter(tica0, tica1, s=10, c='blue', marker='o', linewidth=0.,cmap='jet', label='MSM states') savefig(args.path+'/plot_iter'+str(args.cur_iter)+'_tica_evs.png', bbox_inches='tight', dpi=200) clf() fig, ax = plots.plot_free_energy(tica0, tica1,cmap='Spectral') xlabel("TICA ev0") ylabel("TICA ev1") savefig(args.path+'/plot_iter'+str(args.cur_iter)+'_tica_evs2.png', bbox_inches='tight', dpi=200) clf() fig, ax = plots.plot_free_energy(tica0, tica1,cmap='Spectral') cp = scatter(cl.clustercenters[:,0], cl.clustercenters[:,1], s=10, c='blue', marker='o', linewidth=0.,cmap='jet', label='MSM state centers') xlabel("TICA ev0") ylabel("TICA ev1") legend() savefig(args.path+'/plot_iter'+str(args.cur_iter)+'_tica_evs3_centers.png', bbox_inches='tight', dpi=200) #plot msm ev clf() xlabel("MSM ev1") ylabel("MSM ev2") cp = scatter(m.eigenvectors_right(10)[:,1], m.eigenvectors_right(10)[:,2], s=10, c='blue', marker='o', linewidth=0.,cmap='jet', label='MSM states') savefig(args.path+'/plot_iter'+str(args.cur_iter)+'_msm_evs.png', bbox_inches='tight', dpi=200) #plot msm ev clf() fig, ax = plots.plot_free_energy(m.eigenvectors_right(10)[:,1], m.eigenvectors_right(10)[:,2], cmap='Spectral', weights=m.stationary_distribution, nbins=30) xlabel("MSM ev1") ylabel("MSM ev2") savefig(args.path+'/plot_iter'+str(args.cur_iter)+'_msm_evs2.png', bbox_inches='tight', dpi=200) clf() xlabel("RMSD") ylabel("Rg") cp = scatter(rmsd_arr, rg_arr, s=10, c='blue', marker='o', linewidth=0.,cmap='jet', label='MSM states') savefig(args.path+'/plot_iter'+str(args.cur_iter)+'_rgrmsd.png', bbox_inches='tight', dpi=200) #plot msm ev clf() fig, ax = plots.plot_free_energy(rmsd_arr, rg_arr, cmap='Spectral', nbins=30) xlabel("RMSD") ylabel("Rg") savefig(args.path+'/plot_iter'+str(args.cur_iter)+'_rgrmsd2.png', bbox_inches='tight', dpi=200) clf() xlabel("Q") ylabel("Rg") cp = scatter(q_arr, rg_arr, s=10, c='blue', marker='o', linewidth=0.,cmap='jet', label='MSM states') savefig(args.path+'/plot_iter'+str(args.cur_iter)+'_qrg.png', bbox_inches='tight', dpi=200) clf() fig, ax = plots.plot_free_energy(q_arr, rg_arr, cmap='Spectral', nbins=10) xlabel("Q") ylabel("Rg") savefig(args.path+'/plot_iter'+str(args.cur_iter)+'_qrg_2.png', bbox_inches='tight', dpi=200) #Q 1d free energy clf() z, x = np.histogram(q_arr, bins=10) F = -np.log(z) F=F-F.min() plot(x[1:], F) scatter(x[1:], F) xlabel('Q', fontsize = 15) ylabel('Free Energy [kT]', fontsize =15) savefig(args.path+'/plot_iter'+str(args.cur_iter)+'_free_energy_q.png', bbox_inches='tight', dpi=200) #MSM 1d free energy clf() n_step=int(m.P.shape[0]/10) bins=np.sort(m.eigenvectors_right(10)[:,1])[::n_step] bins=np.append(bins,np.sort(m.eigenvectors_right(10)[:,1])[-1]) z, x = np.histogram(m.eigenvectors_right(10)[:,1], weights=m.stationary_distribution, density=True, bins=bins) F = -np.log(z) F=F-F.min() plot(x[1:], F) scatter(x[1:], F) xlabel('MSM ev1', fontsize = 15) ylabel('Free Energy [kT]', fontsize =15) savefig(args.path+'/plot_iter'+str(args.cur_iter)+'_msm_free_energy.png', bbox_inches='tight', dpi=200) #which tica frames seleted tica0_sel=np.array([]) tica1_sel=np.array([]) for i in range(n_pick): tica0_sel=np.append(tica0_sel,y[picks[i][0]][frame_select[i],0]) tica1_sel=np.append(tica1_sel,y[picks[i][0]][frame_select[i],1]) clf() xlabel("TICA ev0") ylabel("TICA ev1") cp = scatter(tica0, tica1, s=10, c='blue', marker='o', linewidth=0.,cmap='jet', label='all frames') cp = scatter(tica0_sel, tica1_sel, s=10, c='red', marker='o', linewidth=0.,cmap='jet', label='selected') legend() savefig(args.path+'/plot_iter'+str(args.cur_iter)+'_tica_evs4_selected.png', bbox_inches='tight', dpi=200) #m.ck_test ck=m.cktest(2) clf() pyemma.plots.plot_cktest(ck, diag=True, figsize=(7,7), layout=(2,2), padding_top=0.1, y01=False, padding_between=0.3, dt=0.1, units='ns') savefig(args.path+'/plot_iter'+str(args.cur_iter)+'_msm_cktest.png') #lags = [1,2,5,10,20,50,100,200, 500,1000] #its = pyemma.msm.its(dtrajs, nits=10, lags=lags) #clf() #pyemma.plots.plot_implied_timescales(its, ylog=True, units='steps', linewidth=2) #xlim(0, 40); ylim(0, 120); #savefig(args.path+'/plot_iter'+str(args.cur_iter)+'_msm_its.png', bbox_inches='tight', dpi=200) its = pyemma.msm.its(dtrajs, errors='bayes', nits=10) clf() pyemma.plots.plot_implied_timescales(its, ylog=True, units='steps', linewidth=2) #xlim(0, 40); ylim(0, 120); savefig(args.path+'/plot_iter'+str(args.cur_iter)+'_msm_its2.png', bbox_inches='tight', dpi=200) #clf() #pyemma.plots.plot_implied_timescales(its, ylog=False, units='steps', linewidth=2, show_mle=False) ##xlim(0, 40); ylim(0, 120); #savefig(args.path+'/plot_iter'+str(args.cur_iter)+'_msm_its3.png', bbox_inches='tight', dpi=200) #which msm states selected #warning m only connected, c full -selected #m.active_set #state_picks #msm_states p_picks_active=[] for i in state_picks: if i in m.active_set: p_picks_active.append(np.argwhere(i==m.active_set)[0][0]) p_picks_active=np.unique(np.array(p_picks_active)).astype(int) clf() xlabel("MSM ev1") ylabel("MSM ev2") cp = scatter(m.eigenvectors_right(10)[:,1], m.eigenvectors_right(10)[:,2], s=10, c='blue', marker='o', linewidth=0.,cmap='jet', label='MSM states') cp = scatter(m.eigenvectors_right(10)[p_picks_active,1], m.eigenvectors_right(10)[p_picks_active,2], s=10, c='red', marker='o', linewidth=0.,cmap='jet', label='selected') legend(loc='center left', bbox_to_anchor=(1, 0.5)) savefig(args.path+'/plot_iter'+str(args.cur_iter)+'_msm_evs_4_select.png', bbox_inches='tight', dpi=200) p_states=np.array([]) p_unique=[] for p_iter in range(p_iter_max+1): p_arr=np.argwhere(iter_arr==p_iter) for i in p_arr: #print i[0] p_states=np.append(p_states,dtrajs[i[0]]) p_states=np.unique(p_states).astype(int) p_unique.append(p_states.shape[0]) p_unique=np.array(p_unique) np.save(args.path+'/npy_iter'+str(args.cur_iter)+'_p_unique.npy',p_unique) clf() fig=figure() ax = fig.add_subplot(111) ax.scatter(np.arange(p_unique.shape[0]),p_unique) ax.set_ylabel('# of current msm states explored') ax.set_xlabel('iteration') #ax.set_yscale('log') savefig(args.path+'/plot_iter'+str(args.cur_iter)+'_strategy.png', bbox_inches='tight', dpi=200) clf() xlabel("TICA ev0") ylabel("TICA ev1") for p_iter in range(p_iter_max,-1,-1): p_arr=np.argwhere(iter_arr==p_iter) tica0=np.array([]) tica1=np.array([]) for i in p_arr: #print i[0] tica0=np.append(tica0,y[i[0]][:,0]) tica1=np.append(tica1,y[i[0]][:,1]) cp = scatter(tica0, tica1, s=10, marker='o', linewidth=0.,cmap='jet', c=colornames[p_iter], label='iter '+str(p_iter)) legend(loc='center left', bbox_to_anchor=(1, 0.5)) savefig(args.path+'/plot_iter'+str(args.cur_iter)+'_tica_evs5_iters.png', bbox_inches='tight', dpi=200) clf() xlabel("MSM ev1") ylabel("MSM ev2") for p_iter in range(p_iter_max,-1,-1): p_arr=np.argwhere(iter_arr==p_iter) p_states=np.array([]) for i in p_arr: #print i[0] p_states=np.append(p_states,dtrajs[i[0]]) p_states=np.unique(p_states).astype(int) p_states_active=[] for i in p_states: if i in m.active_set: p_states_active.append(np.argwhere(i==m.active_set)[0][0]) p_states_active=np.unique(np.array(p_states_active)).astype(int) cp = scatter(m.eigenvectors_right(10)[p_states_active,1], m.eigenvectors_right(10)[p_states_active,2], s=10, marker='o', linewidth=0., cmap='spectral', c=colornames[p_iter], label='iter '+str(p_iter)) legend(loc='center left', bbox_to_anchor=(1, 0.5)) savefig(args.path+'/plot_iter'+str(args.cur_iter)+'_msm_evs_3_iter.png', bbox_inches='tight', dpi=200) print('time plotting finished', str(time.time()-time_start))
gyrateArray = gyrateArray * 10 peptide_name = "peptide" #Passe d'un tableau python à un tableau numpy, obligatoire xmin = round(np.min(gyrateArray), 2) - 0.01 xmax = round(np.max(gyrateArray), 2) + 0.01 #plt.xlim([xmin,xmax]) #Borne #on fixe les borne pour avoir les mêmes échelles entre les graphes ymin = round(np.min(rms), 2) ymax = round(np.max(rms), 2) + 0.05 plt.xlim([xmin, xmax]) #Trace la carte d'énergie libre, abscisse : rayon de gyration, ordonnee : RMSD plt.xlim([xmin, xmax]) #Borne plt.ylim([ymin, ymax]) mplt.plot_free_energy(gyrateArray, rms) #plt.plot([Refgyrate],[ymin], '+') plt.ylabel('RMSD (A)') plt.xlabel('Radius of gyration (A)') save_figure('free' + peptide_name + '.pdf', PathOut + "/") #Par défaut, image au format pdf #Mise en place du k-means n_clusters = args.kmeans Y = np.vstack((gyrateArray, rms)) X = np.transpose(Y) clustering = coor.cluster_kmeans(X, k=n_clusters, max_iter=100) dtrajs = clustering.dtrajs cc_x = clustering.clustercenters[:, 0] cc_y = clustering.clustercenters[:, 1]
# TRIALS END HERE # optimal parameters according to cktest: n_clusters = 100, lag_time = 380; # optimal numbers of dtrajs parsed to cktest: 3, 4, 5, 7; perhaps 5 is the best print(Y) print((len(Y))) # xall, yall - first and second tica output dimensions xall = np.vstack(Y)[:, 0] yall = np.vstack(Y)[:, 1] zall = np.vstack(Y)[:, 2] W = np.concatenate(msm.trajectory_weights()) mplt.plot_free_energy(xall, yall) cc_x = clustering.clustercenters[:, 0] cc_y = clustering.clustercenters[:, 1] plt.plot(cc_x, cc_y, linewidth=0, marker='o', markersize=5, color='black') print('fraction of states used = {:f}'.format(msm.active_state_fraction)) print('fraction of counts used = {:f}'.format(msm.active_count_fraction)) print(msm.stationary_distribution) print('sum of weights = {:f}'.format(msm.pi.sum())) print(dir(msm)) print(dir(tica_obj)) # stationary distribution fig, ax, misc = pyemma.plots.plot_contour(xall,
## outfile names out = 'thirty_100.pkl' ics = 3 # look at this many ICs num_clu = 30 tica = pickle.load(open(indir + tica_file, 'rb')) for i in tica: i[:, 0] *= -1 tica1 = [i[:, :ics] for i in tica] cl = coor.cluster_mini_batch_kmeans(tica1, k=num_clu, max_iter=100) pickle.dump(cl, open(out, 'wb')) fig1, ax1 = mplt.plot_free_energy( np.vstack(tica1)[:, 0], np.vstack(tica1)[:, 1]) plt.scatter(cl.clustercenters[:, 0], cl.clustercenters[:, 1], c='k') plt.xlabel('tIC 1', fontsize=18) plt.ylabel('tIC 2', fontsize=18) plt.savefig('cluster_centers1v2.pdf') plt.clf() fig1, ax1 = mplt.plot_free_energy( np.vstack(tica1)[:, 0], np.vstack(tica1)[:, 2]) plt.scatter(cl.clustercenters[:, 0], cl.clustercenters[:, 2], c='k') plt.xlabel('tIC 1', fontsize=18) plt.ylabel('tIC 3', fontsize=18) plt.savefig('cluster_centers1v3.pdf') plt.clf()
np.hstack(Y6))).T corner.corner(sixtics, labels=[ r"$tic1$", r"$tic2$", r"$tic3$", r"$tic4$", r"$tic5$", r"$tic6$", r"$\Gamma \, [\mathrm{parsec}]$" ], quantiles=[0.16, 0.5, 0.84], show_titles=True, title_kwargs={"fontsize": 12}) plt.savefig('corner.png') plt.clf() plt.figure(figsize=(8, 5)) mplt.plot_free_energy(np.hstack(Y1), np.hstack(Y2)) plt.xlabel('tic 1') plt.ylabel('tic 2') plt.savefig('tic1-tic2.png') plt.clf() plt.figure(figsize=(5, 3)) plt.plot(np.cumsum(tica.eigenvalues), 'o') plt.xlabel('eigenvalue') plt.ylabel('cummulative sum') plt.savefig('cumsum_tica_eigenvalues.png') plt.clf() plt.title('Feature correlation to tIC 1')
from glob import glob filenames = sorted(glob(path_to_trajs)) print('These are our trajectories that flip.') print(filenames[16], filenames[42], filenames[145], filenames[149]) import seaborn as sns sns.set_style("ticks") #We pick one of these trajectories and the frames of interest, as described in the directory 'picking_new_starting' plt.figure(figsize=(5, 3)) mplt.plot_free_energy(np.hstack(Y1), np.hstack(Y2), weights=np.hstack(HMM.trajectory_weights()), cmap='YlGnBu_r') for i in range(len(clkmeans_clustercenters[:, 0])): plt.scatter(clkmeans_clustercenters[i, 0], clkmeans_clustercenters[i, 1], color='red', s=50, alpha=(state_1_colors[i])) plt.scatter(clkmeans_clustercenters[i, 0], clkmeans_clustercenters[i, 1], color='violet', s=50, alpha=(state_2_colors[i])) plt.plot([Y1[16][30], Y1[16][520]], [Y2[16][30], Y2[16][520]], color='yellow', linestyle='--')
print( "the timescale associated with the slowest collective motion in the system is: %d ps" % (msm.timescales_[0])) print( "using vmd to open msm-1-dynamic-mode.xtc, to intepret the slowest dynamic mode of the system" ) # In[161]: ##Draw the potential of mean force, newly added pi_0 = msm.populations_[np.concatenate( microstate_sequences.labels_, axis=0)] #microstate stationary population xall = np.concatenate(tica_trajs)[:, 0] yall = np.concatenate(tica_trajs)[:, 1] plot_free_energy( xall, yall, weights=pi_0, logscale=True, nbins=100, cbar=True) #if specify "None", then not weighted by population plt.savefig(resultdir + "/microstate_msm_PMF.png") # In[162]: #judging from the above implied timescale, we need to lump the microstates into 3 macrostates #lump kinetically close microstates into a few macrostates using PCCA+ algorithm, which will facilitate the visualization and intereptation of kinetics of the system pcca = PCCAPlus.from_msm(msm, n_macrostates=3) macro_trajs = pcca.transform(microstate_sequences.labels_) #show the macrostates onto tICA space plt.figure() plot_states_on_tic_space(resultdir, 'macrostate.png', tica_trajs, macro_trajs, 1, 2) #for alanine dipeptide, we can also validate the lumping using Ramanchandran plots
if this_y2 > a * y1 - (clf.intercept_[0]) / w[1]: Y1_DFG_out.append(this_y1) Y2_DFG_out.append(this_y2) weights_DFG_out.append(this_weight) else: Y1_DFG_in.append(this_y1) Y2_DFG_in.append(this_y2) weights_DFG_in.append(this_weight) delG[system] = -np.log(np.sum(weights_DFG_in) / np.sum(weights_DFG_out)) plt.figure(figsize=(5, 3)) mplt.plot_free_energy(np.hstack(Y1), np.hstack(Y2), weights=np.hstack(MSM.trajectory_weights()), cmap='pink', ncountours=11, vmax=9.6, cbar=False) plt.xlabel('TIC 1', fontsize=12) plt.xticks(fontsize=12) plt.xlim((-75, 87.5)) plt.xticks(np.arange(-75, 76, 25), ['', '', '', '', '', '', '']) plt.ylabel('TIC 2', fontsize=12) plt.yticks(fontsize=12) plt.ylim((-60, 60)) plt.yticks(np.arange(-60, 61, 20), ['', '', '', '', '', '', '']) plt.plot(xx, yy, '--', color='0.25') plt.gca().invert_xaxis() ax, _ = mpl.colorbar.make_axes(plt.gca()) cbar = mpl.colorbar.ColorbarBase(ax,
def _landscape_plot(XY, title='', ax=None, xlabel='TIC0', ylabel='TIC1'): fig, ax = pymplots.plot_free_energy(*XY, ax=ax) # cmap='hot') ax.set_xlabel(xlabel) ax.set_ylabel(ylabel) ax.set_title(title) return (fig, ax)
disc.parametrize() print tica_obj.cumvar #TICA output is Y Y = tica_obj.get_output() print np.shape(Y) #print 'Y[0]' #print Y[0] print 'number of trajetories = ', np.shape(Y)[0] # #mapped_data is the TICA clustered data mapped to the microstates (so integer valued) mapped_data = input_data.dtrajs #plot tica free energy histogram plot if 1: mplt.plot_free_energy(np.vstack(Y)[:, 0], np.vstack(Y)[:, 1]) cc_x = input_data.clustercenters[:, 0] cc_y = input_data.clustercenters[:, 1] pp.plot(cc_x, cc_y, linewidth=0, marker='o', markersize=5, color='black') mplt.plot_free_energy(np.vstack(Y)[:, 0], np.vstack(Y)[:, 1], cbar_label=None) if args.save: pp.savefig(os.path.join(args.save_destination, 'msm_tica_clusters.png')) if args.display: pp.show() pp.clf() pp.close() fig, (ax1, ax2) = pp.subplots(1, 2) ax1.scatter(cc_x, cc_y, marker='o', color='black')
def FES(MD_trajectories, MD_top, projected_trajectory, proj_idxs=[0, 1], nbins=100, n_sample=100, axlabel='proj'): r""" Return a molecular visualization widget connected with a free energy plot. Parameters ---------- MD_trajectories : str, or list of strings with the filename(s) the the molecular dynamics (MD) trajectories. Any file extension that :py:obj:`mdtraj` (.xtc, .dcd etc) can read is accepted. Alternatively, a single :obj:`mdtraj.Trajectory` object or a list of them can be given as input. MD_top : str to topology filename or directly an :obj:`mdtraj.Topology` object projected_trajectory : str to a filename or numpy ndarray of shape (n_frames, n_dims) Time-series with the projection(s) that want to be explored. If these have been computed externally, you can provide .npy-filenames or readable asciis (.dat, .txt etc). NOTE: molpx assumes that there is no time column. proj_idxs: list or ndarray of length 2 Selection of projection idxs (zero-idxd) to visualize. nbins : int, default 100 The number of bins per axis to used in the histogram (FES) n_sample : int, default is 100 The number of geometries that will be used to represent the FES. The higher the number, the higher the spatial resolution of the "click"-action. axlabel : str, default is 'proj' Format of the labels in the FES plot Returns -------- ax : :obj:`pylab.Axis` object iwd : :obj:`nglview.NGLWidget` data_sample: numpy ndarray of shape (n, n_sample) with the position of the dots in the plot geoms: :obj:`mdtraj.Trajectory` object with the geometries n_sample geometries shown by the nglwidget """ data_sample, geoms, data = generate.sample(MD_trajectories, MD_top, projected_trajectory, proj_idxs=proj_idxs, n_points=n_sample, return_data=True) data = _np.vstack(data) _plt.figure() # Use PyEMMA's plotting routing plot_free_energy(data[:, proj_idxs[0]], data[:, proj_idxs[1]], nbins=nbins) #h, (x, y) = _np.histogramdd(data, bins=nbins) #irange = _np.hstack((x[[0,-1]], y[[0,-1]])) #_plt.contourf(-_np.log(h).T, extent=irange) ax = _plt.gca() ax.set_xlabel('$\mathregular{%s_{%u}}$' % (axlabel, proj_idxs[0])) ax.set_ylabel('$\mathregular{%s_{%u}}$' % (axlabel, proj_idxs[1])) iwd = sample(data_sample, geoms.superpose(geoms[0]), ax) return _plt.gca(), _plt.gcf(), iwd, data_sample, geoms
plt.savefig("traj_%d_ICs.png" % (ij + 1)) # if we have many trajectories having them all open might consume a lot of # memory plt.close() else: Y = trajs clustering = coor.cluster_kmeans(Y, k=numClusters, max_iter=100) dtrajs = clustering.dtrajs cc_x = clustering.clustercenters[:, 0] cc_y = clustering.clustercenters[:, 1] cc_z = clustering.clustercenters[:, 2] xall = np.vstack(Y)[:, 0] yall = np.vstack(Y)[:, 1] plt.figure(figsize=(8, 5)) mplt.plot_free_energy(xall, yall, cmap="Spectral") plt.plot(cc_x, cc_y, linewidth=0, marker='o', markersize=5, color='black') plt.xlabel("IC 1") plt.ylabel("IC 2") plt.title("FES IC1-2") plt.savefig("fes_IC1-2.png") plt.figure(figsize=(8, 5)) mplt.plot_free_energy(xall, np.vstack(Y)[:, 2], cmap="Spectral") plt.plot(cc_x, cc_z, linewidth=0, marker='o', markersize=5, color='black') plt.xlabel("IC 1") plt.ylabel("IC 3") plt.title("FES IC1-3") plt.savefig("fes_IC1-3.png") lags = None
disc.parametrize() print tica_obj.cumvar #TICA output is Y Y = tica_obj.get_output() print np.shape(Y) #print 'Y[0]' #print Y[0] print 'number of trajetories = ', np.shape(Y)[0] # #mapped_data is the TICA clustered data mapped to the microstates (so integer valued) mapped_data =input_data.dtrajs #plot tica free energy histogram plot if 1: mplt.plot_free_energy(np.vstack(Y)[:,0], np.vstack(Y)[:,1]) cc_x = input_data.clustercenters[:,0] cc_y = input_data.clustercenters[:,1] pp.plot(cc_x,cc_y, linewidth=0, marker='o', markersize=5, color='black') mplt.plot_free_energy(np.vstack(Y)[:,0], np.vstack(Y)[:,1], cbar_label=None); if args.save: pp.savefig(os.path.join(args.save_destination, 'msm_tica_clusters.png')) if args.display: pp.show() pp.clf() pp.close() fig, (ax1, ax2) = pp.subplots(1,2) ax1.scatter(cc_x, cc_y, marker='o', color='black') ax2 = mplt.plot_free_energy(np.vstack(Y)[:,0], np.vstack(Y)[:,1], cbar_label=None) if args.save: pp.savefig(os.path.join(args.save_destination, 'msm_tica_all.png'))