def run_EMUS(self): # Load data psis, cv_trajs, neighbors = uu.data_from_meta(self.meta_file_name, self.dim, T=self.T, k_B=self.k_B, period=None) # Calculate the partition function for each window z, F = emus.calculate_zs(psis, neighbors=neighbors) # Calculate error in each z value from the first iteration. zerr, zcontribs, ztaus = avar.calc_partition_functions( psis, z, F, iat_method='acor') # Calculate the PMF from EMUS domain = ((-0.5, 6)) # Range of dihedral angle values pmf, edges = emus.calculate_pmf(cv_trajs, psis, domain, z, nbins=self.num_bins, kT=self.kT, use_iter=False) # Calculate the pmf # Calculate z using the MBAR iteration. z_iter_1, F_iter_1 = emus.calculate_zs(psis, n_iter=1) z_iter_2, F_iter_2 = emus.calculate_zs(psis, n_iter=2) z_iter_5, F_iter_5 = emus.calculate_zs(psis, n_iter=5) z_iter_1k, F_iter_1k = emus.calculate_zs(psis, n_iter=1000) # Calculate new PMF iterpmf, edges = emus.calculate_pmf(cv_trajs, psis, domain, nbins=self.num_bins, z=z_iter_1k, kT=self.kT) # Get the asymptotic error of each histogram bin. pmf_av_mns, pmf_avars = avar.calc_pmf(cv_trajs, psis, domain, z, F, nbins=self.num_bins, kT=self.kT, iat_method=np.average(ztaus, axis=0)) ### Data Output Section ### # Plot the EMUS, Iterative EMUS pmfs. pmf_centers = (edges[0][1:] + edges[0][:-1]) / 2. self.x = pmf_centers self.y = pmf_av_mns / self.tau self.err = 1 / self.tau * np.sqrt(pmf_avars) self.y_iter = iterpmf / self.tau
meta_file = 'metadata.dat' # Path to Meta File dim = 1 # 1 Dimensional CV space. #period = 360 # Dihedral Angles periodicity # Load data #psis, cv_trajs, neighbors = uu.data_from_meta(meta_file,dim,T=T,k_B=k_B,period=period) psis, cv_trajs, neighbors = uu.data_from_meta(meta_file, dim, T=T, k_B=k_B) #psis, cv_trajs, neighbors = uu.data_from_meta(meta_file,dim,T=T,k_B=k_B,nsig=6) nbins = 200 # Number of Histogram Bins. # Calculate the partition function for each window z, F = emus.calculate_zs(psis, neighbors=neighbors) # Calculate error in each z value from the first iteration. zerr, zcontribs, ztaus = avar.calc_partition_functions(psis, z, F, iat_method='acor') #zerr, zcontribs, ztaus = avar.calc_partition_functions(psis,z,F,neighbors,iat_method='acor') # Calculate the PMF from EMUS #domain = ((-180.0,180.)) # Range of dihedral angle values domain = ((3.0, 25.0)) # Range of length values pmf, edges = emus.calculate_pmf(cv_trajs, psis, domain, z, nbins=nbins, kT=kT, use_iter=False) # Calculate the pmf # Calculate z using the MBAR iteration.
def main(): a = _parse_args() # Get Dictionary of Arguments kT = a['k_B'] * a['T'] domain = np.reshape(a['domain'], (-1, 2)) # Load data psis, cv_trajs, neighbors = uu.data_from_meta(a['meta_file'], a['n_dim'], T=a['T'], k_B=a['k_B'], period=a['period'], nsig=a['sigma']) if a['fxn_file'] is not None: fdata = uu.fxn_data_from_meta(a['fxn_file']) else: fdata = None # Calculate the partition function for each window z, F = emus.calculate_zs(psis, neighbors=neighbors, n_iter=a['n_iter']) # Calculate the PMF pmf, edges = emus.calculate_pmf(cv_trajs, psis, domain, z, neighbors=neighbors, nbins=a['nbins'], kT=kT) # Calculate any averages of functions. if fdata is not None: favgs = [] for n, fdata_i in enumerate(fdata): favgs.append( emus.calculate_obs(psis, z, fdata_i, neighbors=neighbors)) # Perform Error analysis if requested. if a['error'] is not None: zEMUS, FEMUS = emus.calculate_zs(psis, neighbors=neighbors, n_iter=0) zvars, z_contribs, z_iats = avar.calc_partition_functions( psis, zEMUS, FEMUS, neighbors=neighbors, iat_method=a['error']) z_avg_taus = np.average(z_iats, axis=0) pmf_EMUS, pmf_EMUS_avar = avar.calc_pmf(cv_trajs, psis, domain, zEMUS, FEMUS, neighbors=neighbors, nbins=a['nbins'], kT=kT, iat_method=z_avg_taus) # Perform analysis on any provided functions. if fdata is not None: favgs_EM = [] ferrs = [] fcontribs = [] for fdata_i in fdata: iat, mean, variances = avar.calc_avg_ratio( psis, zEMUS, FEMUS, fdata_i, neighbors=neighbors, iat_method=a['error']) favgs_EM.append(mean) fcontribs.append(variances) ferrs.append(np.sum(variances)) # Save Data f = h5py.File(a['output'] + '_out.hdf5', "w") # Save PMF pmf_grp = f.create_group("PMF") pmf_dset = pmf_grp.create_dataset("pmf", pmf.shape, dtype='f') dmn_dset = pmf_grp.create_dataset("domain", np.array(domain).shape, dtype='f') pmf_dset[...] = pmf dmn_dset[...] = np.array(domain) for ie, edg in enumerate(edges): edg_dset = pmf_grp.create_dataset("edge_%d" % ie, edg.shape, dtype='f') edg_dset[...] = edg if a['error'] is not None: pmf_EMUS_dset = pmf_grp.create_dataset("pmf_no_iter", pmf_EMUS.shape, dtype='f') pmf_EMUS_dset[...] = pmf_EMUS pmf_EMUS_avar_dset = pmf_grp.create_dataset("pmf_no_iter_avars", pmf_EMUS_avar.shape, dtype='f') pmf_EMUS_avar_dset[...] = pmf_EMUS_avar # Save partition functions z_grp = f.create_group("partition_function") z_dset = z_grp.create_dataset("z", z.shape, dtype='f') z_dset[...] = z F_dset = z_grp.create_dataset("F", F.shape, dtype='f') F_dset[...] = F if a['error'] is not None: zerr_dset = z_grp.create_dataset("z_avars", np.array(zvars).shape, dtype='f') zerr_dset[...] = np.array(zvars) zEMUS_dset = z_grp.create_dataset("z_no_iter", zEMUS.shape, dtype='f') zEMUS_dset[...] = zEMUS FEMUS_dset = z_grp.create_dataset("F_no_iter", FEMUS.shape, dtype='f') FEMUS_dset[...] = FEMUS # Save function data. if fdata is not None: f_grp = f.create_group('function_averages') f_dset = f_grp.create_dataset("f", np.shape(favgs), dtype='f') f_dset[...] = np.array(favgs) if a['error'] is not None: fvar_dset = f_grp.create_dataset("f_avars", np.shape(ferrs), dtype='f') fvar_dset[...] = ferrs f.close()
def main(): """ """ free_energy_cmap = plt.cm.get_cmap('Blues_r') four_pi = 4.*np.pi kT = parameters['k_B']*parameters['temperature'] beta = 1./(2.*kT) # ---------------------------------------- # RUN EMUS ANALYSIS STEP print('Starting EMUS analysis') psis, cv_trajs, neighbors = usutils.data_from_meta(parameters['emus_meta_file'],parameters['num_biased_dimensions'],T=parameters['temperature'],k_B=parameters['k_B']) # psis is a list of arrays containing the relative weights of each window's cv values in all windows. cv_trajs is a list of arrays containing the raw cv values for each window. neighbors is an 2D array containing indices for neighboring windows to be used. # calculate the partition function for each window z, F = emus.calculate_zs(psis,neighbors=neighbors) # z is an 1D array that contains the normalization constant for each window. F is an 2D array (num windows by num windows) that contains the eigenvalues of the iterature EMUS process. zerr, zcontribs, ztaus = avar.calc_partition_functions(psis,z,F,iat_method='acor') np.savetxt(parameters['output_directory'] + 'emus_stitching_constants.dat',np.c_[list(range(int(parameters['nWindows']))),z,zerr], fmt='%15.10f') r0_k_data, file_list = read_meta(parameters['emus_meta_file']) # file_list not being saved... # ---------------------------------------- # FOR REWEIGHTING, ONLY NEED THE Z ARRAY del psis del cv_trajs del neighbors del F del zerr del zcontribs del ztaus # ---------------------------------------- # LOAD IN UNBIASED AND BIASED CV DATA print('Starting to load in cv data (original and new)') nWindows_range = range(int(parameters['nWindows'])) # assumes windows are numbered with zero-indexing data = ['' for i in nWindows_range] for i in nWindows_range: print('loading window', i) temp_biased_data = np.loadtxt(file_list[i])[:,1] temp_unbiased_data = np.loadtxt(parameters['unbiased_data_files_naming']%(i))[:,:2] if temp_biased_data.shape[0] != temp_unbiased_data.shape[0]: print('unbiased data file', i, 'has different number of values than the biased cv data file from the respective window. This should not happen.', temp_unbiased_data.shape, temp_biased_data.shape) sys.exit() temp_data = np.c_[temp_biased_data,temp_unbiased_data[:,0],temp_unbiased_data[:,1]] # biased CV data is row's index 0; unbiased CV data is row's index 1 data[i] = temp_data # ---------------------------------------- # prep 2d histogram arrays xMin = float(parameters['xMin']) xMax = float(parameters['xMax']) yMin = float(parameters['yMin']) yMax = float(parameters['yMax']) xBins = int(parameters['xBins']) yBins = int(parameters['yBins']) delta_x = (xMax - xMin)/xBins delta_y = (yMax - yMin)/yBins print('Bin widths:', delta_x, delta_y) x_half_bins = np.array([xMin + delta_x*(i+0.5) for i in range(xBins)]) y_half_bins = np.array([yMin + delta_y*(i+0.5) for i in range(yBins)]) x_edges = np.array([xMin + delta_x*i for i in range(xBins+1)]) y_edges = np.array([yMin + delta_y*i for i in range(yBins+1)]) # ---------------------------------------- # REWEIGHTING BIASED CV FE SURFACE INTO A 2D CV SPACE nValues_total = 0 x_total_fe_counts = np.zeros(xBins) y_total_fe_counts = np.zeros(yBins) td_total_fe_counts = np.zeros((xBins,yBins)) for i in nWindows_range: nValues = len(data[i]) nValues_total += nValues x_window_counts = np.zeros(xBins) y_window_counts = np.zeros(yBins) x_window_fe_counts = np.zeros(xBins) y_window_fe_counts = np.zeros(yBins) with open(parameters['output_directory'] + 'window%03d.frame_weights.dat'%(i),'w') as W: for j in range(nValues): # ---------------------------------------- # HISTOGRAMMING DATA POINT x_index = int((data[i][j][1] - xMin)/delta_x) y_index = int((data[i][j][2] - yMin)/delta_y) if x_index < 0 or x_index > xBins: print('!!! 0 > x_index >= xBins ...', data[i][j][0], x_index, i, 'Histogram bounds are not wide enough in the x-dimension. Job failed.') sys.exit() elif x_index == xBins: x_index = -1 if y_index < 0 or y_index > yBins: print('!!! 0 > y_index >= yBins ...', data[i][j][0], y_index, i, 'Histogram bounds are not wide enough in the y-dimension. Job failed.') sys.exit() elif y_index == yBins: y_index = -1 # ---------------------------------------- # ANALYZING DATA POINT IN CONSIDERATION OF CURRENT WINDOW w = np.exp((-beta*r0_k_data[i][1])*(data[i][j][0] - r0_k_data[i][0])**2)/z[i] # exp((-k/2*k_B*T)(r-r0)^2)/z; no volume correction... #w = (data[i][j][0]**2)*np.exp((-beta*r0_k_data[i][1])*(data[i][j][0] - r0_k_data[i][0])**2)/z[i] # r^2 * exp((-k/2*k_B*T)(r-r0)^2)/z; x_window_counts[x_index] += 1 x_window_fe_counts[x_index] += 1/w y_window_counts[y_index] += 1 y_window_fe_counts[y_index] += 1/w # ---------------------------------------- # ANALYZING DATA POINT IN CONSIDERATION OF ALL WINDOWS w = 0 for k in nWindows_range: w+= np.exp((-beta*r0_k_data[k][1])*(data[i][j][0] - r0_k_data[k][0])**2)/z[k] # exp((-k/2*k_B*T)(r-r0)^2)/z; no volume correction... #w+= (data[i][j][0]**2)*np.exp((-beta*r0_k_data[k][1])*(data[i][j][0] - r0_k_data[k][0])**2)/z[k] # r^2 * exp((-k/2*k_B*T)(r-r0)^2)/z; w /= parameters['nWindows'] # <r^2 * exp((-k/2*k_B*T)(r-r0)^2)/z>; average reciprocal boltzmann weight of data point in all possible windows; weight = 1./w W.write('%15d %15f\n'%(j,weight)) x_total_fe_counts[x_index] += weight y_total_fe_counts[y_index] += weight td_total_fe_counts[x_index][y_index] += weight # ---------------------------------------- # FINISHING ANALYSIS OF THE REWEIGHTED PROB. DENSITY OF EACH INDIVIDUAL WINDOW - XDATA x_window_prob_density = x_window_counts/(nValues*delta_x) plt.figure(1) plt.plot(x_half_bins[:],x_window_prob_density[:],zorder=3) # ---------------------------------------- # FINISHING ANALYSIS OF THE REWEIGHTED FREE ENERGY OF EACH INDIVIDUAL WINDOW - XDATA x_window_fe_counts = -kT*np.log(x_window_fe_counts/(nValues*delta_x)) # no volume correction #x_window_fe_counts = np.array([-kT*np.log(x_window_fe_counts[j]/(nValues*delta_x*four_pi)) for j in range(xBins)]) plt.figure(2) plt.plot(x_half_bins[x_window_counts > 10.], x_window_fe_counts[x_window_counts > 10],zorder=3) # ---------------------------------------- # FINISHING ANALYSIS OF THE REWEIGHTED PROB. DENSITY OF EACH INDIVIDUAL WINDOW - YDATA y_window_prob_density = y_window_counts/(nValues*delta_y) plt.figure(3) plt.plot(y_half_bins[:],y_window_prob_density[:],zorder=3) # ---------------------------------------- # FINISHING ANALYSIS OF THE REWEIGHTED FREE ENERGY OF EACH INDIVIDUAL WINDOW - YDATA y_window_fe_counts = -kT*np.log(y_window_fe_counts/(nValues*delta_y)) # no volume correction #y_window_fe_counts = np.array([-kT*np.log(y_window_fe_counts[j]/(nValues*delta_y*four_pi)) for j in range(yBins)]) plt.figure(4) plt.plot(y_half_bins[y_window_counts > 10.], y_window_fe_counts[y_window_counts > 10],zorder=3) print('Done with window', i) # ---------------------------------------- # FINISHED REWEIGHTING, RUNNING BOOTSTRAP ANALYSIS TO GET ERROR BARS if parameters['bootstrap_bool']: print('Beginning bootstrap analysis to approximate error in reweighted CVs') original_data = np.empty((0,3)) for i in nWindows_range: original_data = np.concatenate((original_data,np.array(data[i]))) if original_data.shape != (nValues_total,3): print(original_data.shape, nValues_total, 'something went wrong during bootstrapping') sys.exit() x_bootstrap_results = [] y_bootstrap_results = [] td_bootstrap_results = [] for i in range(parameters['nIterations']): print('Starting Step %d of %d steps in bootstrap analysis'%(i,parameters['nIterations'])) # create bootstrap data sample_data = original_data[np.random.randint(nValues_total,size=nValues_total)] x_total_fe_bootstrap = np.zeros(xBins) y_total_fe_bootstrap = np.zeros(yBins) td_total_fe_bootstrap = np.zeros((xBins,yBins)) # analyze new dataset to get reweighted FE of each bin for j in range(nValues_total): # ---------------------------------------- # HISTOGRAMMING DATA POINT x_index = int((sample_data[j,1] - xMin)/delta_x) y_index = int((sample_data[j,2] - yMin)/delta_y) if x_index == xBins: x_index = -1 if y_index == yBins: y_index = -1 w = 0 for k in nWindows_range: w+= np.exp((-beta*r0_k_data[k][1])*(sample_data[j,0] - r0_k_data[k][0])**2)/z[k] # exp((-k/2*k_B*T)(r-r0)^2)/z; no volume correction... w /= parameters['nWindows'] # <r^2 * exp((-k/2*k_B*T)(r-r0)^2)/z>; average reciprocal boltzmann weight of data point in all possible windows; x_total_fe_bootstrap[x_index] += 1/w y_total_fe_bootstrap[y_index] += 1/w td_total_fe_bootstrap[x_index][y_index] += 1/w x_total_fe_bootstrap /= delta_x*nValues_total x_total_fe_bootstrap = -kT*np.log(x_total_fe_bootstrap) # no volume correction x_total_fe_bootstrap -= np.ndarray.min(x_total_fe_bootstrap) x_bootstrap_results.append(x_total_fe_bootstrap) y_total_fe_bootstrap /= delta_y*nValues_total y_total_fe_bootstrap = -kT*np.log(y_total_fe_bootstrap) # no volume correction y_total_fe_bootstrap -= np.ndarray.min(y_total_fe_bootstrap) y_bootstrap_results.append(y_total_fe_bootstrap) td_total_fe_bootstrap /= delta_x*delta_y*nValues_total # currently a stitched probability density; no volume correction td_total_fe_bootstrap = -kT*np.log(td_total_fe_bootstrap) td_total_fe_bootstrap -= np.ndarray.min(td_total_fe_bootstrap) td_bootstrap_results.append(td_total_fe_bootstrap) ### NOTE: CALCS THE STANDARD DEVIATION OF THE BOOTSTRAPPED DATA x_std_error = np.std(np.array(x_bootstrap_results),axis=0) y_std_error = np.std(np.array(y_bootstrap_results),axis=0) td_std_error = np.std(np.array(td_bootstrap_results),axis=0) np.savetxt(parameters['output_directory'] + 'x_axis_error_analysis.dat', x_std_error, fmt='%.10f') np.savetxt(parameters['output_directory'] + 'y_axis_error_analysis.dat', y_std_error, fmt='%.10f') np.savetxt(parameters['output_directory'] + 'td_axis_error_analysis.dat', td_std_error, fmt='%.10f') del sample_data del x_total_fe_bootstrap del y_total_fe_bootstrap del td_total_fe_bootstrap del x_bootstrap_results del y_bootstrap_results del td_bootstrap_results # ---------------------------------------- # FINISHED REWEIGHTING, CLEANING UP VARIABLE SPACE del data del x_window_counts del y_window_counts del x_window_prob_density del y_window_prob_density del x_window_fe_counts del y_window_fe_counts # ---------------------------------------- # FINISHING PLOTTING OF THE REWEIGHTED PROB. DENSITY OF EACH INDIVIDUAL WINDOW - XDATA finish_plot(1,parameters['output_directory']+parameters['reweighted_x_axis_prob_density_plot_name'],parameters['x_axis_label'],'Probability Density',xlim=(xMin,xMax)) # ---------------------------------------- # FINISHING PLOTTING OF THE REWEIGHTED, UNSTITCHED FREE ENERGY - XDATA finish_plot(2,parameters['output_directory']+parameters['reweighted_x_axis_unstitched_fe_plot_name'],parameters['x_axis_label'],r'Relative Free Energy (kcal mol$^{-1}$)',xlim=(xMin,xMax)) # ---------------------------------------- # FINISHING PLOTTING OF THE REWEIGHTED PROB. DENSITY OF EACH INDIVIDUAL WINDOW - YDATA finish_plot(3,parameters['output_directory']+parameters['reweighted_y_axis_prob_density_plot_name'],parameters['y_axis_label'],'Probability Density',xlim=(yMin,yMax)) # ---------------------------------------- # FINISHING PLOTTING OF THE REWEIGHTED, UNSTITCHED FREE ENERGY - YDATA finish_plot(4,parameters['output_directory']+parameters['reweighted_y_axis_unstitched_fe_plot_name'],parameters['y_axis_label'],r'Relative Free Energy (kcal mol$^{-1}$)',xlim=(yMin,yMax)) # ---------------------------------------- # PLOTTING REWEIGHTED X-DATA FE SURFACE x_total_fe_counts /= delta_x*nValues_total # no volume correction #x_total_fe_counts /= four_pi*delta_x*nValues_total x_total_fe_counts = -kT*np.log(x_total_fe_counts) # no volume correction x_total_fe_counts -= np.ndarray.min(x_total_fe_counts) np.savetxt(parameters['output_directory'] + parameters['reweighted_x_axis_stitched_fe_data_file_name'], np.c_[range(xBins),x_half_bins,x_total_fe_counts], fmt='%.10f') plt.figure(5) if parameters['bootstrap_bool']: plt.errorbar(x_half_bins[:],x_total_fe_counts[:],yerr=x_std_error,ecolor='r',elinewidth=0.5,zorder=3) else: plt.plot(x_half_bins[:],x_total_fe_counts[:],zorder=3) finish_plot(5, parameters['output_directory']+parameters['reweighted_x_axis_stitched_fe_plot_name'], parameters['x_axis_label'], r'Relative Free Energy (kcal mol$^{-1}$)',xlim=(xMin,xMax),ylim=(-0.05,10)) # NOTE # ---------------------------------------- # PLOTTING REWEIGHTED Y-DATA FE SURFACE y_total_fe_counts /= delta_y*nValues_total # no volume correction #y_total_fe_counts /= four_pi*delta_y*nValues_total y_total_fe_counts = -kT*np.log(y_total_fe_counts) y_total_fe_counts -= np.ndarray.min(y_total_fe_counts) np.savetxt(parameters['output_directory'] + parameters['reweighted_y_axis_stitched_fe_data_file_name'], np.c_[range(yBins),y_half_bins,y_total_fe_counts], fmt='%.10f') plt.figure(6) if parameters['bootstrap_bool']: plt.errorbar(y_half_bins[:],y_total_fe_counts[:],yerr=y_std_error,ecolor='r',elinewidth=0.5,zorder=3) else: plt.plot(y_half_bins[:],y_total_fe_counts[:],zorder=3) finish_plot(6, parameters['output_directory']+parameters['reweighted_y_axis_stitched_fe_plot_name'], parameters['y_axis_label'], r'Relative Free Energy (kcal mol$^{-1}$)',xlim=(yMin,yMax),ylim=(-0.05,10)) # NOTE # ---------------------------------------- # PLOTTING REWEIGHTED 2D FE LANDSCAPE td_total_fe_counts /= delta_x*delta_y*nValues_total # currently a stitched probability density; no volume correction #td_total_fe_counts /= four_pi*delta_x*delta_y*nValues_total # currently a stitched probability density; with vol correction td_total_fe_counts = -kT*np.log(td_total_fe_counts) td_total_fe_counts -= np.ndarray.min(td_total_fe_counts) np.savetxt(parameters['output_directory'] + parameters['reweighted_2d_heatmap_data_file_name'], td_total_fe_counts, fmt='%.10f') masked_fe_counts = ma.masked_where(np.isinf(td_total_fe_counts),td_total_fe_counts) fig, ax = plt.subplots(num=7) plt.pcolormesh(x_edges,y_edges,masked_fe_counts.T,cmap=free_energy_cmap,zorder=3,vmax=10) cb1 = plt.colorbar() #extend='max' cb1.set_label(r'Relative Free Energy (kcal mol$^{-1}$)',size=14) ax.set_aspect('equal') finish_plot(7, parameters['output_directory']+parameters['reweighted_2d_heatmap_plot_name'], parameters['x_axis_label'], parameters['x_axis_label'],xlim=(xMin,xMax),ylim=(yMin,yMax)) # NOTE plt.close() print('Done plotting.') # ---------------------------------------- # OUTPUT SUMMARY FILE summary(parameters['output_directory'] + 'reweighting.summary',sys.argv,parameters)