def subsample(N_k, U_kn, V_kn, N_kn, g, type): K = len(N_k) N_k_sampled = numpy.zeros(K) tempspace = numpy.zeros(numpy.max(N_k)) for k in range(K): if (type != 'volume') and (type != 'number'): indices = timeseries.subsampleCorrelatedData( U_kn[k, 0:N_k[k]], g[k]) tempspace = U_kn[k, indices].copy() N_k_sampled[k] = numpy.size(indices) U_kn[k, 0:N_k_sampled[k]] = tempspace[0:N_k_sampled[k]] if (type in requireV): indices = timeseries.subsampleCorrelatedData( V_kn[k, 0:N_k[k]], g[k]) tempspace = V_kn[k, indices].copy() N_k_sampled[k] = numpy.size(indices) V_kn[k, 0:N_k_sampled[k]] = tempspace[0:N_k_sampled[k]] if (type in requireN): indices = timeseries.subsampleCorrelatedData( N_kn[k, 0:N_k[k]], g[k]) tempspace = N_kn[k, indices].copy() N_k_sampled[k] = numpy.size(indices) N_kn[k, 0:N_k_sampled[k]] = tempspace[0:N_k_sampled[k]] print "data has been subsampled using the statistical inefficiencies" g[k] = 1.0 N_k[k] = N_k_sampled[k]
def subsample(Q_n,localQ): print 'Subsampling the data' g = timeseries.statisticalInefficiency(Q_n) indices = numpy.array(timeseries.subsampleCorrelatedData(Q_n,g)) print '%i uncorrelated samples found of %i original samples' %(len(indices),len(Q_n)) localQ = localQ[:,indices] return localQ
def getNkandUkln(do_dhdl=False): """Identifies uncorrelated samples and updates the arrays of the reduced potential energy and dhdlt retaining data entries of these samples only. Assumes that 'dhdlt' and 'u_klt' are in memory, as well as proper values for 'sta' and 'fin', i.e. the starting and final snapshot positions to be read, both are arrays of dimension K.""" u_kln = numpy.zeros([K,K,max(fin-sta)], numpy.float64) # u_kln[k,m,n] is the reduced potential energy of uncorrelated sample index n from state k evaluated at state m N_k = numpy.zeros(K, int) # N_k[k] is the number of uncorrelated samples from state k g = numpy.zeros(K,float) # autocorrelation times for the data if do_dhdl: dhdl = numpy.zeros([K,n_components,max(fin-sta)], float) #dhdl is value for dhdl for each component in the file at each time. print "\n\nNumber of correlated and uncorrelated samples:\n\n%6s %12s %12s %12s\n" % ('State', 'N', 'N_k', 'N/N_k') for k in range(K): # Sum up over the energy components; notice, that only the relevant data is being used in the third dimension. dhdl_sum = numpy.sum(dhdlt[k,:,sta[k]:fin[k]], axis=0) # Determine indices of uncorrelated samples from potential autocorrelation analysis at state k # (alternatively, could use the energy differences -- here, we will use total dhdl). g[k] = timeseries.statisticalInefficiency(dhdl_sum) indices = numpy.array(timeseries.subsampleCorrelatedData(dhdl_sum, g=g[k])) # indices of uncorrelated samples N = len(indices) # number of uncorrelated samples # Handle case where we end up with too few. if N < 50: if do_dhdl: print "WARNING: Only %s uncorrelated samples found at lambda number %s; proceeding with analysis using correlated samples..." % (N, k) indices = numpy.arange(len(dhdl_sum)) N = len(indices) N_k[k] = N # Store the number of uncorrelated samples from state k. for l in range(K): u_kln[k,l,0:N] = u_klt[k,l,indices] if do_dhdl: print "%6s %12s %12s %12.2f" % (k, fin[k], N_k[k], g[k]) for n in range(n_components): dhdl[k,n,0:N] = dhdlt[k,n,indices] if do_dhdl: return (dhdl, N_k, u_kln) return (N_k, u_kln)
def subsample_series(series, g_t=None, return_g_t=False): if g_t is None: g_t = timeseries.statisticalInefficiency(series) state_indices = timeseries.subsampleCorrelatedData(series, g = g_t, conservative=True) N_k = len(state_indices) transfer_series = series[state_indices] if return_g_t: return state_indices, transfer_series, g_t else: return state_indices, transfer_series
def subsample(N_k,U_kn,V_kn,N_kn,g,type): K = len(N_k) N_k_sampled = numpy.zeros(K, dtype=numpy.int) tempspace = numpy.zeros(numpy.max(N_k)) for k in range(K): if (type != 'volume') and (type != 'number'): indices = timeseries.subsampleCorrelatedData(U_kn[k,0:N_k[k]],g[k]) tempspace = U_kn[k,indices].copy() N_k_sampled[k] = numpy.size(indices) U_kn[k,0:N_k_sampled[k]] = tempspace[0:N_k_sampled[k]] if (type in requireV): indices = timeseries.subsampleCorrelatedData(V_kn[k,0:N_k[k]],g[k]) tempspace = V_kn[k,indices].copy() N_k_sampled[k] = numpy.size(indices) V_kn[k,0:N_k_sampled[k]] = tempspace[0:N_k_sampled[k]] if (type in requireN): indices = timeseries.subsampleCorrelatedData(N_kn[k,0:N_k[k]],g[k]) tempspace = N_kn[k,indices].copy() N_k_sampled[k] = numpy.size(indices) N_kn[k,0:N_k_sampled[k]] = tempspace[0:N_k_sampled[k]] print "data has been subsampled using the statistical inefficiencies" g[k] = 1.0 N_k[k] = N_k_sampled[k]
def subsample(U_kn,Q_kn,K,N_max): assume_uncorrelated = False if assume_uncorrelated: print 'Assuming data is uncorrelated' N_k = numpy.zeros(K, numpy.int32) N_k[:] = N_max else: print 'Subsampling the data...' N_k = numpy.zeros(K,numpy.int32) g = numpy.zeros(K,numpy.float64) for k in range(K): # subsample the energies g[k] = timeseries.statisticalInefficiency(Q_kn[k])#,suppress_warning=True) indices = numpy.array(timeseries.subsampleCorrelatedData(Q_kn[k],g=g[k])) # indices of uncorrelated samples N_k[k] = len(indices) # number of uncorrelated samplesadsf U_kn[k,0:N_k[k]] = U_kn[k,indices] Q_kn[k,0:N_k[k]] = Q_kn[k,indices] return U_kn, Q_kn, N_k
def subsample1D(pos_kn,N_k,ineff): ''' Modifies pos_xkn,pos_ykn,N_k inplace ''' logger.info("Subsampling using given ICATS") K = pos_kn.shape[0] for i in range(K): indices = timeseries.subsampleCorrelatedData(pos_kn[i,0:N_k[i]], g = ineff[i]) newN = len(indices) pos_kn[i,0:newN] = pos_kn[i,indices] logger.debug("Original %s New %s",N_k[i],newN) N_k[i] = newN if newN < 10: logger.warn("Very few independant samples %s",newN) logger.info("Subsampled using given ICATS") return pos_kn,N_k
def subsample(observ,maxIneff): ''' subsample according to largest inefff Parameters ------------- observ: list of arrays ineff: array with ineff for each column of observ Return ----------- newObserv: list of arrays subsampled according to ineff ''' logger.info("Subsampling using given ICATS") newObserv = [] for i,sim in enumerate(observ): indices = timeseries.subsampleCorrelatedData(sim[:,0], g = maxIneff[i]) newsim = sim[indices,...] newObserv.append(newsim) logger.debug("Original %s \nNew %s",[i.shape[0] for i in observ],[i.shape[0] for i in newObserv]) logger.info("Subsampled using given ICATS") return newObserv
# infile.close() # Parse data. # n = 0 # for line in lines: # if line[0] != '#' and line[0] != '@': # tokens = line.split() # u_kn[k,n] = beta_k[k] * (float(tokens[2]) - float(tokens[1])) # reduced potential energy without umbrella restraint # n += 1 # Compute correlation times for potential energy and val # timeseries. If the temperatures differ, use energies to determine samples; otherwise, use the cosine of val if (DifferentTemperatures): g_k[k] = timeseries.statisticalInefficiency(u_kn[k,:], u_kn[k,:]) print "Correlation time for set %5d is %10.3f" % (k,g_k[k]) indices = timeseries.subsampleCorrelatedData(u_kn[k,:]) else: #g_k[k] = timeseries.statisticalInefficiency(val_kn[k,:], val_kn[k,:]) #print "Correlation time for set %5d is %10.3f" % (k,g_k[k]) indices = timeseries.subsampleCorrelatedData(val_kn[k,0:N_k[k]], fast=True, verbose=True) # Subsample data. N_k[k] = len(indices) u_kn[k,0:N_k[k]] = u_kn[k,indices] val_kn[k,0:N_k[k]] = val_kn[k,indices] # print val_kn[k,0:N_k[k]] # Set zero of u_kn -- this is arbitrary. u_kn -= u_kn.min() val_min = numpy.min([numpy.min(val_kn[k,0:N_k[k]]) for k in range(K)]) val_max = numpy.max([numpy.max(val_kn[k,0:N_k[k]]) for k in range(K)])
infile.close() # Parse data. n = 0 for line in lines: if line[0] != '#' and line[0] != '@': tokens = line.split() u_kn[k,n] = beta_k[k] * (float(tokens[2]) - float(tokens[1])) # reduced potential energy without umbrella restraint n += 1 # Compute correlation times for potential energy and chi # timeseries. If the temperatures differ, use energies to determine samples; otherwise, use the cosine of chi if (DifferentTemperatures): g_k[k] = timeseries.statisticalInefficiency(u_kn[k,:], u_kn[k,:]) print "Correlation time for set %5d is %10.3f" % (k,g_k[k]) indices = timeseries.subsampleCorrelatedData(u_kn[k,:]) else: g_k[k] = timeseries.statisticalInefficiency(numpy.cos(chi_kn[k,:]/(180.0/numpy.pi)),numpy.cos(chi_kn[k,:]/(180.0/numpy.pi))) print "Correlation time for set %5d is %10.3f" % (k,g_k[k]) indices = timeseries.subsampleCorrelatedData(numpy.cos(chi_kn[k,:]/(180.0/numpy.pi))) # Subsample data. N_k[k] = len(indices) u_kn[k,0:N_k[k]] = u_kn[k,indices] chi_kn[k,0:N_k[k]] = chi_kn[k,indices] # Set zero of u_kn -- this is arbitrary. u_kn -= u_kn.min() # Construct torsion bins print "Binning data..." delta = (chi_max - chi_min) / float(nbins)
raise "pymbar [https://simtk.org/home/pymbar] must be installed to complete analysis of free energies." # ============================================================================= # Subsample correlated samples to generate uncorrelated subsample. # ============================================================================= print "Subsampling data to remove correlation..." K = nlambda # number of states N_k = nprod_iterations * numpy.ones( [K], numpy.int32) # N_k[k] is the number of uncorrelated samples at state k u_kln_subsampled = numpy.zeros([K, K, nprod_iterations], numpy.float64) # subsampled data for k in range(K): # Get indices of uncorrelated samples. indices = subsampleCorrelatedData(u_kln[k, k, :]) # Store only uncorrelated data. N_k[k] = len(indices) for l in range(K): u_kln_subsampled[k, l, 0:len(indices)] = u_kln[k, l, indices] print "Number of uncorrelated samples per state:" print N_k # ============================================================================= # Analyze with MBAR to compute free energy differences and statistical errors. # ============================================================================= print "Analyzing with MBAR..." mbar = MBAR(u_kln_subsampled, N_k) [Deltaf_ij, dDeltaf_ij] = mbar.getFreeEnergyDifferences() print "Free energy differences (in kT)"
if len(fep_columns) > 0: for i in range(len(fep_columns)): reduced_fep_data.append(numpy.zeros([K, N_samples], numpy.float64)) for k in range(K): # Extract timeseries. A_t = biasing_variable_kt[0][k, :] # Compute statistical inefficiency. try: g = timeseries.statisticalInefficiency(A_t) except Exception as e: print str(e) print A_t # Subsample data. if subsample_trajectories: indices = timeseries.subsampleCorrelatedData(A_t, g=g) else: indices = timeseries.subsampleCorrelatedData(A_t, g=1) N = len(indices) # number of uncorrelated samples print "k = %5d : g = %.1f, N = %d" % (k, g, N) for i in range(nbiases): biasing_variable_kn[i][k, 0:N] = biasing_variable_kt[i][k, indices] for i in range(nperturbations + 1): U_kn[i][k, 0:N] = U_kt[i][k, indices] if not cluster_binning: pmf_variable_kn_1[k, 0:N] = pmf_variable_kt_1[k, indices] if ndim == 2: pmf_variable_kn_2[k, 0:N] = pmf_variable_kt_2[k, indices] if cluster_binning: cluster_bin_kn[k, 0:N] = cluster_bin_kt[k, indices] if len(expectation_columns) > 0:
def _subsample_kln(self, u_kln): #Try to load in the data if self.save_equil_data: #Check if we want to save/load equilibration data try: equil_data = numpy.load( os.path.join( self.source_directory, self.save_prefix + self.phase + '_equil_data_%s.npz' % self.subsample_method)) if self.nequil is None: self.nequil = equil_data['nequil'] elif type(self.nequil ) is int and self.subsample_method == 'per-state': print "WARRNING: Per-state subsampling requested with only single value for equilibration..." try: self.nequil = equil_data['nequil'] print "Loading equilibration from file with %i states read" % self.nstates except: print "Assuming equal equilibration per state of %i" % self.nequil self.nequil = numpy.array([self.nequil] * self.nstates) self.g_t = equil_data['g_t'] Neff_max = equil_data['Neff_max'] #Do equilibration if we have not already if self.subsample_method == 'per-state' and ( len(self.g_t) < self.nstates or len(self.nequil) < self.nstates): equil_loaded = False raise IndexError else: equil_loaded = True except: if self.subsample_method == 'per-state': self.nequil = numpy.zeros([self.nstates], dtype=numpy.int32) self.g_t = numpy.zeros([self.nstates]) Neff_max = numpy.zeros([self.nstates]) for k in xrange(self.nstates): if self.verbose: print "Computing timeseries for state %i/%i" % ( k, self.nstates - 1) self.nequil[k] = 0 self.g_t[k] = timeseries.statisticalInefficiency( u_kln[k, k, :]) Neff_max[k] = (u_kln[k, k, :].size + 1) / self.g_t[k] #[self.nequil[k], self.g_t[k], Neff_max[k]] = self._detect_equilibration(u_kln[k,k,:]) else: if self.nequil is None: [self.nequil, self.g_t, Neff_max] = self._detect_equilibration(self.u_n) else: [self.nequil_timeseries, self.g_t, Neff_max] = self._detect_equilibration(self.u_n) equil_loaded = False if not equil_loaded: numpy.savez(os.path.join( self.source_directory, self.save_prefix + self.phase + '_equil_data_%s.npz' % self.subsample_method), nequil=self.nequil, g_t=self.g_t, Neff_max=Neff_max) elif self.nequil is None: if self.subsample_method == 'per-state': self.nequil = numpy.zeros([self.nstates], dtype=numpy.int32) self.g_t = numpy.zeros([self.nstates]) Neff_max = numpy.zeros([self.nstates]) for k in xrange(self.nstates): [self.nequil[k], self.g_t[k], Neff_max[k]] = self._detect_equilibration(u_kln[k, k, :]) if self.verbose: print "State %i equilibrated with %i samples" % ( k, int(Neff_max[k])) else: [self.nequil, self.g_t, Neff_max] = self._detect_equilibration(self.u_n) if self.verbose: print[self.nequil, Neff_max] # 1) Discard equilibration data # 2) Subsample data to obtain uncorrelated samples self.N_k = numpy.zeros(self.nstates, numpy.int32) if self.subsample_method == 'per-state': # Discard samples nsamples_equil = self.niterations - self.nequil self.u_kln = numpy.zeros( [self.nstates, self.nstates, nsamples_equil.max()]) for k in xrange(self.nstates): self.u_kln[k, :, :nsamples_equil[k]] = u_kln[k, :, self.nequil[k]:] #Subsample transfer_retained_indices = numpy.zeros( [self.nstates, nsamples_equil.max()], dtype=numpy.int32) for k in xrange(self.nstates): state_indices = timeseries.subsampleCorrelatedData( self.u_kln[k, k, :], g=self.g_t[k]) self.N_k[k] = len(state_indices) transfer_retained_indices[k, :self.N_k[k]] = state_indices transfer_kln = numpy.zeros( [self.nstates, self.nstates, self.N_k.max()]) self.retained_indices = numpy.zeros( [self.nstates, self.N_k.max()], dtype=numpy.int32) for k in xrange(self.nstates): self.retained_indices[ k, :self.N_k[k]] = transfer_retained_indices[ k, :self.N_k[k]] #Memory reduction transfer_kln[k, :, :self.N_k[k]] = self.u_kln[ k, :, self.retained_indices[k, :self.N_k[ k]]].T #Have to transpose since indexing in this way causes issues #Cut down on memory, once function is done, transfer_kln should be released self.u_kln = transfer_kln self.retained_iters = self.N_k else: #Discard Samples self.u_kln = u_kln[:, :, self.nequil:] self.u_n = self.u_n[self.nequil:] #Subsamples indices = timeseries.subsampleCorrelatedData( self.u_n, g=self.g_t) # indices of uncorrelated samples self.u_kln = self.u_kln[:, :, indices] self.N_k[:] = len(indices) self.retained_indices = indices self.retained_iters = len(indices) return
from timeseries import subsampleCorrelatedData from pymbar import MBAR except: raise "pymbar [https://simtk.org/home/pymbar] must be installed to complete analysis of free energies." # ============================================================================= # Subsample correlated samples to generate uncorrelated subsample. # ============================================================================= print "Subsampling data to remove correlation..." K = nlambda # number of states N_k = nprod_iterations*numpy.ones([K], numpy.int32) # N_k[k] is the number of uncorrelated samples at state k u_kln_subsampled = numpy.zeros([K,K,nprod_iterations], numpy.float64) # subsampled data for k in range(K): # Get indices of uncorrelated samples. indices = subsampleCorrelatedData(u_kln[k,k,:]) # Store only uncorrelated data. N_k[k] = len(indices) for l in range(K): u_kln_subsampled[k,l,0:len(indices)] = u_kln[k,l,indices] print "Number of uncorrelated samples per state:" print N_k # ============================================================================= # Analyze with MBAR to compute free energy differences and statistical errors. # ============================================================================= print "Analyzing with MBAR..." mbar = MBAR(u_kln_subsampled, N_k) [Deltaf_ij, dDeltaf_ij] = mbar.getFreeEnergyDifferences() print "Free energy differences (in kT)"
g_k = zeros([K], float64) for k in range(K): # Compute statistical inefficiency for extension timeseries g = timeseries.statisticalInefficiency(x_kt[k,0:T_k[k]], x_kt[k,0:T_k[k]]) # store statistical inefficiency g_k[k] = g print "timeseries %d : g = %.1f, %.0f uncorrelated samples (of %d total samples)" % (k+1, g, floor(T_k[k] / g), T_k[k]) N_max = max(N_max, ceil(T_k[k] / g) + 1) # Subsample trajectory position data. x_kn = zeros([K, N_max], float64) bin_kn = zeros([K, N_max], int32) N_k = zeros([K], int32) for k in range(K): # Compute correlation times for potential energy and chi timeseries. indices = timeseries.subsampleCorrelatedData(x_kt[k,0:T_k[k]]) # Store subsampled positions. N_k[k] = len(indices) x_kn[k,0:N_k[k]] = x_kt[k,indices] bin_kn[k,0:N_k[k]] = bin_kt[k,indices] # Set arbitrary zeros for external biasing potential. x0_k = zeros([K], float64) # x position corresponding to zero of potential for k in range(K): x0_k[k] = x_kn[k,0:N_k[k]].mean() print "x0_k = " print x0_k # Compute bias energies in units of kT. u_kln = zeros([K,K,N_max], float64) # u_kln[k,l,n] is the reduced (dimensionless) relative potential energy of snapshot n from umbrella simulation k evaluated at umbrella l for k in range(K):
def estimate_enthalpies(ncfile, ndiscard=0, nuse=None): """Estimate enthalpies of all alchemical states. ARGUMENTS ncfile (NetCDF) - input YANK netcdf file OPTIONAL ARGUMENTS ndiscard (int) - number of iterations to discard to equilibration nuse (int) - number of iterations to use (after discarding) TODO: Automatically determine 'ndiscard'. TODO: Combine some functions with estimate_free_energies. """ # Get current dimensions. niterations = ncfile.variables['energies'].shape[0] nstates = ncfile.variables['energies'].shape[1] natoms = ncfile.variables['energies'].shape[2] # Extract energies. print "Reading energies..." energies = ncfile.variables['energies'] u_kln_replica = zeros([nstates, nstates, niterations], float64) for n in range(niterations): u_kln_replica[:, :, n] = energies[n, :, :] print "Done." # Deconvolute replicas print "Deconvoluting replicas..." u_kln = zeros([nstates, nstates, niterations], float64) for iteration in range(niterations): state_indices = ncfile.variables['states'][iteration, :] u_kln[state_indices, :, iteration] = energies[iteration, :, :] print "Done." # Compute total negative log probability over all iterations. u_n = zeros([niterations], float64) for iteration in range(niterations): u_n[iteration] = sum(diagonal(u_kln[:, :, iteration])) #print u_n # DEBUG # outfile = open('u_n.out', 'w') # for iteration in range(niterations): # outfile.write("%8d %24.3f\n" % (iteration, u_n[iteration])) # outfile.close() # Discard initial data to equilibration. u_kln_replica = u_kln_replica[:, :, ndiscard:] u_kln = u_kln[:, :, ndiscard:] u_n = u_n[ndiscard:] # Truncate to number of specified conformations to use if (nuse): u_kln_replica = u_kln_replica[:, :, 0:nuse] u_kln = u_kln[:, :, 0:nuse] u_n = u_n[0:nuse] # Subsample data to obtain uncorrelated samples N_k = zeros(nstates, int32) indices = timeseries.subsampleCorrelatedData( u_n) # indices of uncorrelated samples #indices = range(0,u_n.size) # DEBUG - assume samples are uncorrelated N = len(indices) # number of uncorrelated samples N_k[:] = N u_kln[:, :, 0:N] = u_kln[:, :, indices] print "number of uncorrelated samples:" print N_k print "" # Compute average enthalpies. H_k = zeros([nstates], float64) # H_i[i] is estimated enthalpy of state i dH_k = zeros([nstates], float64) for k in range(nstates): H_k[k] = u_kln[k, k, :].mean() dH_k[k] = u_kln[k, k, :].std() / sqrt(N) return (H_k, dH_k)
# Estimate the statistical inefficiency of the simulation by analyzing the timeseries of interest. # We use the max of cos and sin of the phi and psi timeseries because they are periodic angles. # The print "Computing statistical inefficiencies..." g_cosphi = timeseries.statisticalInefficiencyMultiple(numpy.cos(phi_kt_replica * numpy.pi / 180.0)) print "g_cos(phi) = %.1f" % g_cosphi g_sinphi = timeseries.statisticalInefficiencyMultiple(numpy.sin(phi_kt_replica * numpy.pi / 180.0)) print "g_sin(phi) = %.1f" % g_sinphi g_cospsi = timeseries.statisticalInefficiencyMultiple(numpy.cos(psi_kt_replica * numpy.pi / 180.0)) print "g_cos(psi) = %.1f" % g_cospsi g_sinpsi = timeseries.statisticalInefficiencyMultiple(numpy.sin(psi_kt_replica * numpy.pi / 180.0)) print "g_sin(psi) = %.1f" % g_sinpsi # Subsample data with maximum of all correlation times. print "Subsampling data..." g = numpy.max(numpy.array([g_cosphi, g_sinphi, g_cospsi, g_sinpsi])) indices = timeseries.subsampleCorrelatedData(U_kt[k,:], g = g) print "Using g = %.1f to obtain %d uncorrelated samples per temperature" % (g, len(indices)) N_max = int(numpy.ceil(T / g)) # max number of samples per temperature U_kn = numpy.zeros([K, N_max], numpy.float64) phi_kn = numpy.zeros([K, N_max], numpy.float64) psi_kn = numpy.zeros([K, N_max], numpy.float64) N_k = N_max * numpy.ones([K], numpy.int32) for k in range(K): U_kn[k,:] = U_kt[k,indices] phi_kn[k,:] = phi_kt[k,indices] psi_kn[k,:] = psi_kt[k,indices] print "%d uncorrelated samples per temperature" % N_max #=================================================================================================== # Generate a list of indices of all configurations in kn-indexing #===================================================================================================
def estimate_enthalpies(ncfile, ndiscard = 0, nuse = None): """Estimate enthalpies of all alchemical states. ARGUMENTS ncfile (NetCDF) - input YANK netcdf file OPTIONAL ARGUMENTS ndiscard (int) - number of iterations to discard to equilibration nuse (int) - number of iterations to use (after discarding) TODO: Automatically determine 'ndiscard'. TODO: Combine some functions with estimate_free_energies. """ # Get current dimensions. niterations = ncfile.variables['energies'].shape[0] nstates = ncfile.variables['energies'].shape[1] natoms = ncfile.variables['energies'].shape[2] # Extract energies. print "Reading energies..." energies = ncfile.variables['energies'] u_kln_replica = zeros([nstates, nstates, niterations], float64) for n in range(niterations): u_kln_replica[:,:,n] = energies[n,:,:] print "Done." # Deconvolute replicas print "Deconvoluting replicas..." u_kln = zeros([nstates, nstates, niterations], float64) for iteration in range(niterations): state_indices = ncfile.variables['states'][iteration,:] u_kln[state_indices,:,iteration] = energies[iteration,:,:] print "Done." # Compute total negative log probability over all iterations. u_n = zeros([niterations], float64) for iteration in range(niterations): u_n[iteration] = sum(diagonal(u_kln[:,:,iteration])) #print u_n # DEBUG # outfile = open('u_n.out', 'w') # for iteration in range(niterations): # outfile.write("%8d %24.3f\n" % (iteration, u_n[iteration])) # outfile.close() # Discard initial data to equilibration. u_kln_replica = u_kln_replica[:,:,ndiscard:] u_kln = u_kln[:,:,ndiscard:] u_n = u_n[ndiscard:] # Truncate to number of specified conformations to use if (nuse): u_kln_replica = u_kln_replica[:,:,0:nuse] u_kln = u_kln[:,:,0:nuse] u_n = u_n[0:nuse] # Subsample data to obtain uncorrelated samples N_k = zeros(nstates, int32) indices = timeseries.subsampleCorrelatedData(u_n) # indices of uncorrelated samples #indices = range(0,u_n.size) # DEBUG - assume samples are uncorrelated N = len(indices) # number of uncorrelated samples N_k[:] = N u_kln[:,:,0:N] = u_kln[:,:,indices] print "number of uncorrelated samples:" print N_k print "" # Compute average enthalpies. H_k = zeros([nstates], float64) # H_i[i] is estimated enthalpy of state i dH_k = zeros([nstates], float64) for k in range(nstates): H_k[k] = u_kln[k,k,:].mean() dH_k[k] = u_kln[k,k,:].std() / sqrt(N) return (H_k, dH_k)
def estimate_free_energies(ncfile, ndiscard = 0, nuse = None): """Estimate free energies of all alchemical states. ARGUMENTS ncfile (NetCDF) - input YANK netcdf file OPTIONAL ARGUMENTS ndiscard (int) - number of iterations to discard to equilibration nuse (int) - maximum number of iterations to use (after discarding) TODO: Automatically determine 'ndiscard'. """ # Get current dimensions. niterations = ncfile.variables['energies'].shape[0] nstates = ncfile.variables['energies'].shape[1] natoms = ncfile.variables['energies'].shape[2] # Extract energies. print "Reading energies..." energies = ncfile.variables['energies'] u_kln_replica = zeros([nstates, nstates, niterations], float64) for n in range(niterations): u_kln_replica[:,:,n] = energies[n,:,:] print "Done." # Deconvolute replicas print "Deconvoluting replicas..." u_kln = zeros([nstates, nstates, niterations], float64) for iteration in range(niterations): state_indices = ncfile.variables['states'][iteration,:] u_kln[state_indices,:,iteration] = energies[iteration,:,:] print "Done." # Compute total negative log probability over all iterations. u_n = zeros([niterations], float64) for iteration in range(niterations): u_n[iteration] = sum(diagonal(u_kln[:,:,iteration])) #print u_n # DEBUG outfile = open('u_n.out', 'w') for iteration in range(niterations): outfile.write("%8d %24.3f\n" % (iteration, u_n[iteration])) outfile.close() # Discard initial data to equilibration. u_kln_replica = u_kln_replica[:,:,ndiscard:] u_kln = u_kln[:,:,ndiscard:] u_n = u_n[ndiscard:] # Truncate to number of specified conforamtions to use if (nuse): u_kln_replica = u_kln_replica[:,:,0:nuse] u_kln = u_kln[:,:,0:nuse] u_n = u_n[0:nuse] # Subsample data to obtain uncorrelated samples N_k = zeros(nstates, int32) indices = timeseries.subsampleCorrelatedData(u_n) # indices of uncorrelated samples #indices = range(0,u_n.size) # DEBUG - assume samples are uncorrelated N = len(indices) # number of uncorrelated samples N_k[:] = N u_kln[:,:,0:N] = u_kln[:,:,indices] print "number of uncorrelated samples:" print N_k print "" #=================================================================================================== # Estimate free energy difference with MBAR. #=================================================================================================== # Initialize MBAR (computing free energy estimates, which may take a while) print "Computing free energy differences..." mbar = MBAR(u_kln, N_k, verbose = False, method = 'adaptive', maximum_iterations = 50000) # use slow self-consistent-iteration (the default) #mbar = MBAR(u_kln, N_k, verbose = False, method = 'self-consistent-iteration', maximum_iterations = 50000) # use slow self-consistent-iteration (the default) #mbar = MBAR(u_kln, N_k, verbose = True, method = 'Newton-Raphson') # use faster Newton-Raphson solver # Get matrix of dimensionless free energy differences and uncertainty estimate. print "Computing covariance matrix..." (Deltaf_ij, dDeltaf_ij) = mbar.getFreeEnergyDifferences(uncertainty_method='svd-ew') # # Matrix of free energy differences print "Deltaf_ij:" for i in range(nstates): for j in range(nstates): print "%8.3f" % Deltaf_ij[i,j], print "" # print Deltaf_ij # # Matrix of uncertainties in free energy difference (expectations standard deviations of the estimator about the true free energy) print "dDeltaf_ij:" for i in range(nstates): for j in range(nstates): print "%8.3f" % dDeltaf_ij[i,j], print "" # Return free energy differences and an estimate of the covariance. return (Deltaf_ij, dDeltaf_ij)
#------------------------------------------------------------------------ # Read Data From File #------------------------------------------------------------------------ print("") print("Preparing data:") T_from_file = read_simulation_temps(simulation,NumTemps) E_from_file = read_total_energies(simulation,TE_COL_NUM) K = len(T_from_file) N_k = numpy.zeros(K,numpy.int32) g = numpy.zeros(K,numpy.float64) for k in range(K): # subsample the energies g[k] = timeseries.statisticalInefficiency(E_from_file[k]) indices = numpy.array(timeseries.subsampleCorrelatedData(E_from_file[k],g=g[k])) # indices of uncorrelated samples N_k[k] = len(indices) # number of uncorrelated samples E_from_file[k,0:N_k[k]] = E_from_file[k,indices] #------------------------------------------------------------------------ # Insert Intermediate T's and corresponding blank U's and E's #------------------------------------------------------------------------ Temp_k = T_from_file minT = T_from_file[0] maxT = T_from_file[len(T_from_file) - 1] #beta = 1/(k*BT) #T = 1/(kB*beta) if dtype == 'temperature': minv = minT maxv = maxT elif dtype == 'beta': # actually going in the opposite direction as beta for logistical reasons
if len(fep_columns) > 0: for i in range(len(fep_columns)): reduced_fep_data.append(numpy.zeros([K,N_samples], numpy.float64)) for k in range(K): # Extract timeseries. A_t = biasing_variable_kt[0][k,:] # Compute statistical inefficiency. try: g = timeseries.statisticalInefficiency(A_t) except Exception as e: print str(e) print A_t # Subsample data. if subsample_trajectories: indices = timeseries.subsampleCorrelatedData(A_t, g=g) else: indices = timeseries.subsampleCorrelatedData(A_t, g=1) N = len(indices) # number of uncorrelated samples print "k = %5d : g = %.1f, N = %d" % (k, g, N) for i in range(nbiases): biasing_variable_kn[i][k,0:N] = biasing_variable_kt[i][k,indices] for i in range(nperturbations+1): U_kn[i][k,0:N] = U_kt[i][k,indices] if not cluster_binning: pmf_variable_kn_1[k,0:N] = pmf_variable_kt_1[k,indices] if ndim == 2: pmf_variable_kn_2[k,0:N] = pmf_variable_kt_2[k,indices] if cluster_binning: cluster_bin_kn[k,0:N] = cluster_bin_kt[k,indices] if len(expectation_columns) > 0:
for state in range(K): # construct timeseries Nstate = 0 for t in range(T): if state_t[t] == state: #u_t_singlestate[Nstate] = u_tk[t,state] u_t_singlestate[Nstate] = u_t[t] Nstate += 1 if Nstate > 0: g_state = timeseries.statisticalInefficiency( u_t_singlestate[0:Nstate], u_t_singlestate[0:Nstate]) print "state %5d : g = %16.8f, N = %6d" % (state, g_state, Nstate) # Analyze timeseries to determine effectively uncorrelated snapshots. indices = timeseries.subsampleCorrelatedData( u_t) # indices of uncorrelated samples N = len(indices) # number of uncorrelated samples print "%d uncorrelated samples of %d snapshots." % (N, T) # DEBUG: assume all samples are uncorrelated # indices = range(0,T,20) # for t in range(T): # print "%8d %16.8f" % (t, u_t[t]) # N = len(indices) # Count number of uncorrelated samples in each state. N_k = zeros(K, int32) for n in range(N): t = indices[n] state = state_t[t] N_k[state] += 1
infile.close() # Parse data. n = 0 for line in lines: if line[0] != '#' and line[0] != '@': tokens = line.split() u_kn[k,n] = beta_k[k] * (float(tokens[2]) - float(tokens[1])) # reduced potential energy without umbrella restraint n += 1 # Compute correlation times for potential energy and chi # timeseries. If the temperatures differ, use energies to determine samples; otherwise, use the cosine of chi if (DifferentTemperatures): g_k[k] = timeseries.statisticalInefficiency(u_kn[k,:], u_kn[k,0:N_k[k]]) print "Correlation time for set %5d is %10.3f" % (k,g_k[k]) indices = timeseries.subsampleCorrelatedData(u_kn[k,0:N_k[k]]) else: chi_radians = chi_kn[k,0:N_k[k]]/(180.0/numpy.pi) g_cos = timeseries.statisticalInefficiency(numpy.cos(chi_radians)) g_sin = timeseries.statisticalInefficiency(numpy.sin(chi_radians)) print "g_cos = %.1f | g_sin = %.1f" % (g_cos, g_sin) g_k[k] = max(g_cos, g_sin) print "Correlation time for set %5d is %10.3f" % (k,g_k[k]) indices = timeseries.subsampleCorrelatedData(chi_radians, g=g_k[k]) # Subsample data. N_k[k] = len(indices) u_kn[k,0:N_k[k]] = u_kn[k,indices] chi_kn[k,0:N_k[k]] = chi_kn[k,indices] N_max = numpy.max(N_k) # shorten the array size u_kln = numpy.zeros([K,K,N_max], numpy.float64) # u_kln[k,l,n] is the reduced potential energy of snapshot n from umbrella simulation k evaluated at umbrella l
u_t_singlestate = zeros([T], float64) for state in range(K): # construct timeseries Nstate = 0 for t in range(T): if state_t[t] == state: #u_t_singlestate[Nstate] = u_tk[t,state] u_t_singlestate[Nstate] = u_t[t] Nstate += 1 if Nstate > 0: g_state = timeseries.statisticalInefficiency(u_t_singlestate[0:Nstate], u_t_singlestate[0:Nstate]) print "state %5d : g = %16.8f, N = %6d" % (state, g_state, Nstate) # Analyze timeseries to determine effectively uncorrelated snapshots. indices = timeseries.subsampleCorrelatedData(u_t) # indices of uncorrelated samples N = len(indices) # number of uncorrelated samples print "%d uncorrelated samples of %d snapshots." % (N, T) # DEBUG: assume all samples are uncorrelated # indices = range(0,T,20) # for t in range(T): # print "%8d %16.8f" % (t, u_t[t]) # N = len(indices) # Count number of uncorrelated samples in each state. N_k = zeros(K, int32) for n in range(N): t = indices[n] state = state_t[t] N_k[state] += 1
def main(): options = parse_args() kB = 0.00831447/4.184 #Boltzmann constant (Gas constant) in kJ/(mol*K) dT = 2.5 # Temperature increment for calculating Cv(T) T = numpy.loadtxt(options.tfile) print 'Initial temperature states are', T K = len(T) U_kn, Q_kn, N_max = read_data(options,T,K) print 'Subsampling Q...' N_k = numpy.zeros(K,numpy.int32) g = numpy.zeros(K,numpy.float64) for k in range(K): # subsample the energies g[k] = timeseries.statisticalInefficiency(Q_kn[k])#,suppress_warning=True) indices = numpy.array(timeseries.subsampleCorrelatedData(Q_kn[k],g=g[k])) # indices of uncorrelated samples N_k[k] = len(indices) # number of uncorrelated samplesadsf print '%i uncorrelated samples out of %i total samples' %(len(indices),options.N_max/options.skip) U_kn[k,0:N_k[k]] = U_kn[k,indices] Q_kn[k,0:N_k[k]] = Q_kn[k,indices] insert = True if insert: #------------------------------------------------------------------------ # Insert Intermediate T's and corresponding blank U's and E's #------------------------------------------------------------------------ # Set up variables Temp_k = T currentT = T[0] + dT maxT = T[-1] i = 1 print("--Inserting intermediate temperatures...") # Loop, inserting T's at which we are interested in the properties while (currentT < maxT) : if (currentT < Temp_k[i]): Temp_k = numpy.insert(Temp_k, i, currentT) currentT = currentT + dT else: currentT = Temp_k[i] + dT i = i + 1 # Update number of states K = len(Temp_k) print("--Inserting blank energies to match up with inserted temperatures...") # Loop, inserting E's into blank matrix (leaving blanks only where new Ts are inserted) Q_fromfile = Q_kn Nall_k = numpy.zeros([K], numpy.int32) # Number of samples (n) for each state (k) = number of iterations/energies E_kn = numpy.zeros([K, N_max], numpy.float64) Q_kn = numpy.zeros([K, N_max], numpy.float64) i = 0 for k in range(K): if (Temp_k[k] == T[i]): E_kn[k,0:N_k[i]] = U_kn[i,0:N_k[i]] Q_kn[k,0:N_k[i]] = Q_fromfile[i,0:N_k[i]] Nall_k[k] = N_k[i] i = i + 1 else: print 'Not inserting intermediate temperatures' Temp_k = T E_kn = U_kn Nall_k = N_k #------------------------------------------------------------------------ # Compute inverse temperatures #------------------------------------------------------------------------ beta_k = 1 / (kB * Temp_k) #------------------------------------------------------------------------ # Compute reduced potential energies #------------------------------------------------------------------------ print "--Computing reduced energies..." u_kln = numpy.zeros([K,K,N_max], numpy.float64) # u_kln is reduced pot. ener. of segment n of temp k evaluated at temp l for k in range(K): for l in range(K): u_kln[k,l,0:Nall_k[k]] = beta_k[l] * E_kn[k,0:Nall_k[k]] #------------------------------------------------------------------------ # Initialize MBAR #------------------------------------------------------------------------ # Initialize MBAR with Newton-Raphson print "" print "Initializing MBAR:" print "--K = number of Temperatures" print "--L = number of Temperatures" print "--N = number of Energies per Temperature" # Use Adaptive Method (Both Newton-Raphson and Self-Consistent, testing which is better) if insert: mbar = pymbar.MBAR(u_kln, Nall_k, method = 'adaptive', verbose=True, relative_tolerance=1e-12) else: f_k = wham.histogram_wham(beta_k, U_kn, Nall_k, relative_tolerance = 1.0e-4) mbar = pymbar.MBAR(u_kln, Nall_k, initial_f_k = f_k, verbose=True) #------------------------------------------------------------------------ # Compute Expectations for E_kt and E2_kt as E_expect and E2_expect #------------------------------------------------------------------------ print "" print "Computing Expectations for E..." (E_expect, dE_expect) = mbar.computeExpectations(u_kln)*(beta_k)**(-1) print "Computing Expectations for E^2..." (E2_expect,dE2_expect) = mbar.computeExpectations(u_kln*u_kln)*(beta_k)**(-2) print "Computing Expectations for Q..." (Q,dQ) = mbar.computeExpectations(Q_kn) #------------------------------------------------------------------------ # Compute Cv for NVT simulations as <E^2> - <E>^2 / (RT^2) #------------------------------------------------------------------------ #print "" #print "Computing Heat Capacity as ( <E^2> - <E>^2 ) / ( R*T^2 )..." Cv_expect = numpy.zeros([K], numpy.float64) dCv_expect = numpy.zeros([K], numpy.float64) for i in range(K): Cv_expect[i] = (E2_expect[i] - (E_expect[i]*E_expect[i])) / ( kB * Temp_k[i] * Temp_k[i]) dCv_expect[i] = 2*dE_expect[i]**2 / (kB *Temp_k[i]*Temp_k[i]) # from propagation of error #print "Temperature dA <E> +/- d<E> <E^2> +/- d<E^2> Cv +/- dCv" #print "-------------------------------------------------------------------------------" #for k in range(K): # print "%8.3f %8.3f %9.3f +/- %5.3f %9.1f +/- %5.1f %7.4f +/- %6.4f" % (Temp_k[k],mbar.f_k[k],E_expect[k],dE_expect[k],E2_expect[k],dE2_expect[k],Cv_expect[k], dCv_expect[k]) #numpy.savetxt('/home/edz3fz/Qsurf_int.txt',Q) #numpy.savetxt('/home/edz3fz/dQsurf_int.txt',dQ) #numpy.savetxt('/home/edz3fz/dQsol.txt',dQ) #numpy.savetxt('/home/edz3fz/Qtemp.tt',Temp_k) import matplotlib.pyplot as plt #ncavg = numpy.average(Q_fromfile, axis=1) plt.figure(1) #plt.plot(T, ncavg, 'ko') plt.plot(Temp_k,Q,'k') plt.errorbar(Temp_k, Q, yerr=dQ) plt.xlabel('Temperature (K)') plt.ylabel('Q fraction native contacts') #plt.title('Heat Capacity from Go like model MC simulation of 1BSQ') plt.savefig(options.direc+'/foldingcurve.png') numpy.save(options.direc+'/foldingcurve',numpy.array([Temp_k, Q, dQ])) numpy.save(options.direc+'/heatcap',numpy.array([Temp_k, Cv_expect, dCv_expect])) if options.show: plt.show()
] #file=['/home/edz3fz/checkensemble_high/CE_high.txt','/home/edz3fz/checkensemble_low/CE_low.txt'] #file=[direc+'/energy426.txt',direc+'/energy442.txt'] #file = ['/home/edz3fz/surface_replica_exchange/replica0/energy300.txt', '/home/edz3fz/surface_replica_exchange/replica3/energy356.txt'] down = load(files[0]) up = load(files[1]) length = len(down) down = down[length / 2::] up = up[length / 2::] #up=up[-50000::] #down=down[-50000::] #up=up[::100] #down=down[::100] g_up = timeseries.statisticalInefficiency(up) indices_up = numpy.array(timeseries.subsampleCorrelatedData(up, g=g_up)) print len(indices_up), 'samples' g_down = timeseries.statisticalInefficiency(down) indices_down = numpy.array(timeseries.subsampleCorrelatedData(up, g=g_down)) print len(indices_down), 'samples' type = 'total' U_kn = zeros([2, len(up)]) U_kn[0, 0:len(indices_down)] = down[indices_down] U_kn[1, 0:len(indices_up)] = up[indices_up] #T_k=array([300.,336.8472786]) #T_k=array([426.81933819,442.13650313]) #T_k=array([424.67492585,450]) #T_k=array([437.99897735,450]) N_k = [len(indices_up), len(indices_down)]
files = ['%s/energy%i.npy' % (direc, T[-2]), '%s/energy%i.npy' % (direc, T[-1])] #file=['/home/edz3fz/checkensemble_high/CE_high.txt','/home/edz3fz/checkensemble_low/CE_low.txt'] #file=[direc+'/energy426.txt',direc+'/energy442.txt'] #file = ['/home/edz3fz/surface_replica_exchange/replica0/energy300.txt', '/home/edz3fz/surface_replica_exchange/replica3/energy356.txt'] down=load(files[0]) up=load(files[1]) length = len(down) down = down[length/2::] up = up[length/2::] #up=up[-50000::] #down=down[-50000::] #up=up[::100] #down=down[::100] g_up = timeseries.statisticalInefficiency(up) indices_up = numpy.array(timeseries.subsampleCorrelatedData(up,g=g_up)) print len(indices_up), 'samples' g_down = timeseries.statisticalInefficiency(down) indices_down = numpy.array(timeseries.subsampleCorrelatedData(up,g=g_down)) print len(indices_down), 'samples' type='total' U_kn=zeros([2,len(up)]) U_kn[0,0:len(indices_down)] = down[indices_down] U_kn[1,0:len(indices_up)] = up[indices_up] #T_k=array([300.,336.8472786]) #T_k=array([426.81933819,442.13650313]) #T_k=array([424.67492585,450])
# Calculate Reduced Potential if aur == 'o': if rfc[k,0] == 0: tmp=np.ones([R],np.float64)*0.001 u[0:N[k]] = np.sum(beta*tmp[0:R]*((val[0:N[k],k,0:R])**2), axis=1) else: u[0:N[k]] = np.sum(beta*rfc[k,0:R]*((val[0:N[k],k,0:R])**2), axis=1) else: if rfc[k,0] == 0: tmp=np.ones([R],np.float64)*0.001 u[0:N[k]] = np.sum(beta*tmp[0:R]*((val[0:N[k],k,0:R]-req[k,0:R])**2), axis=1) else: u[0:N[k]] = np.sum(beta*rfc[k,0:R]*((val[0:N[k],k,0:R]-req[k,0:R])**2), axis=1) g[k] = calcg(u[0:N[k]]) subs = timeseries.subsampleCorrelatedData(np.zeros([N[k]]),g=g[k]) Nind[k] = len(subs) if Nind[k] > 100000: Neff[k] = 100000 else: Neff[k] = Nind[k] print "Processed Window %5.0f. N= %12.0f. g= %10.3f Nind= %12.0f Neff= %12.0f" % ( k, N[k], g[k], Nind[k], Neff[k] ) print "Max Neff= %.0f" % ( np.max(Neff) ) Upot = np.zeros([K,K,np.max(Neff)], np.float64) # Calculate Restraint Energy for k in range(K): subs = timeseries.subsampleCorrelatedData(np.zeros([N[k]]),g=g[k])
def _subsample_kln(self, u_kln): #Try to load in the data if self.save_equil_data: #Check if we want to save/load equilibration data try: equil_data = numpy.load(os.path.join(self.source_directory, self.save_prefix + self.phase + '_equil_data_%s.npz' % self.subsample_method)) if self.nequil is None: self.nequil = equil_data['nequil'] elif type(self.nequil) is int and self.subsample_method == 'per-state': print "WARRNING: Per-state subsampling requested with only single value for equilibration..." try: self.nequil = equil_data['nequil'] print "Loading equilibration from file with %i states read" % self.nstates except: print "Assuming equal equilibration per state of %i" % self.nequil self.nequil = numpy.array([self.nequil] * self.nstates) self.g_t = equil_data['g_t'] Neff_max = equil_data['Neff_max'] #Do equilibration if we have not already if self.subsample_method == 'per-state' and (len(self.g_t) < self.nstates or len(self.nequil) < self.nstates): equil_loaded = False raise IndexError else: equil_loaded = True except: if self.subsample_method == 'per-state': self.nequil = numpy.zeros([self.nstates], dtype=numpy.int32) self.g_t = numpy.zeros([self.nstates]) Neff_max = numpy.zeros([self.nstates]) for k in xrange(self.nstates): if self.verbose: print "Computing timeseries for state %i/%i" % (k,self.nstates-1) self.nequil[k] = 0 self.g_t[k] = timeseries.statisticalInefficiency(u_kln[k,k,:]) Neff_max[k] = (u_kln[k,k,:].size + 1 ) / self.g_t[k] #[self.nequil[k], self.g_t[k], Neff_max[k]] = self._detect_equilibration(u_kln[k,k,:]) else: if self.nequil is None: [self.nequil, self.g_t, Neff_max] = self._detect_equilibration(self.u_n) else: [self.nequil_timeseries, self.g_t, Neff_max] = self._detect_equilibration(self.u_n) equil_loaded = False if not equil_loaded: numpy.savez(os.path.join(self.source_directory, self.save_prefix + self.phase + '_equil_data_%s.npz' % self.subsample_method), nequil=self.nequil, g_t=self.g_t, Neff_max=Neff_max) elif self.nequil is None: if self.subsample_method == 'per-state': self.nequil = numpy.zeros([self.nstates], dtype=numpy.int32) self.g_t = numpy.zeros([self.nstates]) Neff_max = numpy.zeros([self.nstates]) for k in xrange(self.nstates): [self.nequil[k], self.g_t[k], Neff_max[k]] = self._detect_equilibration(u_kln[k,k,:]) if self.verbose: print "State %i equilibrated with %i samples" % (k, int(Neff_max[k])) else: [self.nequil, self.g_t, Neff_max] = self._detect_equilibration(self.u_n) if self.verbose: print [self.nequil, Neff_max] # 1) Discard equilibration data # 2) Subsample data to obtain uncorrelated samples self.N_k = numpy.zeros(self.nstates, numpy.int32) if self.subsample_method == 'per-state': # Discard samples nsamples_equil = self.niterations - self.nequil self.u_kln = numpy.zeros([self.nstates,self.nstates,nsamples_equil.max()]) for k in xrange(self.nstates): self.u_kln[k,:,:nsamples_equil[k]] = u_kln[k,:,self.nequil[k]:] #Subsample transfer_retained_indices = numpy.zeros([self.nstates,nsamples_equil.max()], dtype=numpy.int32) for k in xrange(self.nstates): state_indices = timeseries.subsampleCorrelatedData(self.u_kln[k,k,:], g = self.g_t[k]) self.N_k[k] = len(state_indices) transfer_retained_indices[k,:self.N_k[k]] = state_indices transfer_kln = numpy.zeros([self.nstates, self.nstates, self.N_k.max()]) self.retained_indices = numpy.zeros([self.nstates,self.N_k.max()], dtype=numpy.int32) for k in xrange(self.nstates): self.retained_indices[k,:self.N_k[k]] = transfer_retained_indices[k,:self.N_k[k]] #Memory reduction transfer_kln[k,:,:self.N_k[k]] = self.u_kln[k,:,self.retained_indices[k,:self.N_k[k]]].T #Have to transpose since indexing in this way causes issues #Cut down on memory, once function is done, transfer_kln should be released self.u_kln = transfer_kln self.retained_iters = self.N_k else: #Discard Samples self.u_kln = u_kln[:,:,self.nequil:] self.u_n = self.u_n[self.nequil:] #Subsamples indices = timeseries.subsampleCorrelatedData(self.u_n, g=self.g_t) # indices of uncorrelated samples self.u_kln = self.u_kln[:,:,indices] self.N_k[:] = len(indices) self.retained_indices = indices self.retained_iters = len(indices) return
def estimate_free_energies(ncfile, ndiscard=0, nuse=None): """Estimate free energies of all alchemical states. ARGUMENTS ncfile (NetCDF) - input YANK netcdf file OPTIONAL ARGUMENTS ndiscard (int) - number of iterations to discard to equilibration nuse (int) - maximum number of iterations to use (after discarding) TODO: Automatically determine 'ndiscard'. """ # Get current dimensions. niterations = ncfile.variables['energies'].shape[0] nstates = ncfile.variables['energies'].shape[1] natoms = ncfile.variables['energies'].shape[2] # Extract energies. print "Reading energies..." energies = ncfile.variables['energies'] u_kln_replica = zeros([nstates, nstates, niterations], float64) for n in range(niterations): u_kln_replica[:, :, n] = energies[n, :, :] print "Done." # Deconvolute replicas print "Deconvoluting replicas..." u_kln = zeros([nstates, nstates, niterations], float64) for iteration in range(niterations): state_indices = ncfile.variables['states'][iteration, :] u_kln[state_indices, :, iteration] = energies[iteration, :, :] print "Done." # Compute total negative log probability over all iterations. u_n = zeros([niterations], float64) for iteration in range(niterations): u_n[iteration] = sum(diagonal(u_kln[:, :, iteration])) #print u_n # DEBUG outfile = open('u_n.out', 'w') for iteration in range(niterations): outfile.write("%8d %24.3f\n" % (iteration, u_n[iteration])) outfile.close() # Discard initial data to equilibration. u_kln_replica = u_kln_replica[:, :, ndiscard:] u_kln = u_kln[:, :, ndiscard:] u_n = u_n[ndiscard:] # Truncate to number of specified conforamtions to use if (nuse): u_kln_replica = u_kln_replica[:, :, 0:nuse] u_kln = u_kln[:, :, 0:nuse] u_n = u_n[0:nuse] # Subsample data to obtain uncorrelated samples N_k = zeros(nstates, int32) indices = timeseries.subsampleCorrelatedData( u_n) # indices of uncorrelated samples #indices = range(0,u_n.size) # DEBUG - assume samples are uncorrelated N = len(indices) # number of uncorrelated samples N_k[:] = N u_kln[:, :, 0:N] = u_kln[:, :, indices] print "number of uncorrelated samples:" print N_k print "" #=================================================================================================== # Estimate free energy difference with MBAR. #=================================================================================================== # Initialize MBAR (computing free energy estimates, which may take a while) print "Computing free energy differences..." mbar = MBAR(u_kln, N_k, verbose=False, method='adaptive', maximum_iterations=50000 ) # use slow self-consistent-iteration (the default) #mbar = MBAR(u_kln, N_k, verbose = False, method = 'self-consistent-iteration', maximum_iterations = 50000) # use slow self-consistent-iteration (the default) #mbar = MBAR(u_kln, N_k, verbose = True, method = 'Newton-Raphson') # use faster Newton-Raphson solver # Get matrix of dimensionless free energy differences and uncertainty estimate. print "Computing covariance matrix..." (Deltaf_ij, dDeltaf_ij) = mbar.getFreeEnergyDifferences(uncertainty_method='svd-ew') # # Matrix of free energy differences print "Deltaf_ij:" for i in range(nstates): for j in range(nstates): print "%8.3f" % Deltaf_ij[i, j], print "" # print Deltaf_ij # # Matrix of uncertainties in free energy difference (expectations standard deviations of the estimator about the true free energy) print "dDeltaf_ij:" for i in range(nstates): for j in range(nstates): print "%8.3f" % dDeltaf_ij[i, j], print "" # Return free energy differences and an estimate of the covariance. return (Deltaf_ij, dDeltaf_ij)
def call_mbar(aur, temp, phase): ### Arguments aur = aur # t or u or r or d temp = float(temp) # temp kB = 1.381e-23 * 6.022e23 / (4.184 * 1000.0 ) # Boltzmann constant in kJ/mol/K beta = 1 / (kB * temp) # beta N_max = 2000000 # Max frames for any simulation window, you should check this if you did some long runs sys.stdout = open('subs-' + aur + '.log', 'w') ### Determine Number of umbrellas K = 0 filename = './' + aur + '%02.0f/restraints.dat' % K while os.path.isfile(filename): K = K + 1 filename = './' + aur + '%02.0f/restraints.dat' % K R = 1 print "K= %5.0f R= %5.0f" % (K, R) ### Allocate storage for simulation data N = np.zeros( [K], np.int32 ) # N_k[k] is the number of snapshots to be used from umbrella simulation k Neff = np.zeros([K], np.int32) Nind = np.zeros([K], np.int32) Nprg = np.zeros([K], np.int32) rty = ['d'] * R # restraint type (distance or angle) rfc = np.zeros([K, R], np.float64) # restraint force constant rfc2 = np.zeros([K, R], np.float64) # restraint force constant fcmax = np.zeros( [R], np.float64 ) # full force constant value used during umbrella portion of work req = np.zeros([K, R], np.float64) # restraint target value req2 = np.zeros([K, R], np.float64) # restraint target value val = np.zeros( [N_max, K, R], np.float64) # value of the restrained variable at each frame n val2 = np.zeros( [N_max, K, R], np.float64) # value of the restrained variable at each frame n g = np.zeros([K], np.float64) ### Tmp type arrays for energy and spline fitting/integration u = np.zeros([N_max], np.float64) x = np.zeros([K], np.float64) y = np.zeros([K], np.float64) m = np.zeros([K], np.float64) s = np.zeros([K], np.float64) print "Done with array setup\n" ### Read the simulation data r = 0 for k in range(K): # Read Equilibrium Value and Force Constant if aur == 't': with open('./' + aur + '%02.0f/colvar.in' % k, 'r') as f: for line in f: if 'posit2' in line: for line in f: cols = line.split() if len(cols) != 0 and (cols[0] == "centers"): req[k, r] = float(cols[1]) if len(cols) != 0 and (cols[0] == "forceConstant"): rfc[k, r] = float(cols[1]) / 2 break if 'posit3' in line: for line in f: cols = line.split() if len(cols) != 0 and (cols[0] == "centers"): req2[k, r] = float(cols[1]) if len(cols) != 0 and (cols[0] == "forceConstant"): rfc2[k, r] = float(cols[1]) / 2 break elif aur == 'o': with open('./' + aur + '%02.0f/colvar.in' % k, 'r') as f: for line in f: if 'orient2' in line: for line in f: cols = line.split() if len(cols) != 0 and (cols[0] == "centers"): str = cols[1][1:-1] req[k, r] = float(str) if len(cols) != 0 and (cols[0] == "forceConstant"): rfc[k, r] = float(cols[1]) / 2 break elif aur == 'r' or aur == 'l': with open('./' + aur + '%02.0f/colvar.in' % k, 'r') as f: for line in f: if 'rmsd2' in line: for line in f: cols = line.split() if len(cols) != 0 and (cols[0] == "centers"): req[k, r] = float(cols[1]) if len(cols) != 0 and (cols[0] == "forceConstant"): rfc[k, r] = float(cols[1]) / 2 break elif aur == 'p' or aur == 'b': with open('./' + aur + '%02.0f/colvar.in' % k, 'r') as f: for line in f: if 'rmsd1' in line: for line in f: cols = line.split() if len(cols) != 0 and (cols[0] == "centers"): req[k, r] = float(cols[1]) if len(cols) != 0 and (cols[0] == "forceConstant"): rfc[k, r] = float(cols[1]) / 2 break elif aur == 'u': with open('./' + aur + '%02.0f/colvar.in' % k, 'r') as f: for line in f: if 'posit3' in line: for line in f: cols = line.split() if len(cols) != 0 and (cols[0] == "centers"): req[k, r] = float(cols[1]) if len(cols) != 0 and (cols[0] == "forceConstant"): rfc[k, r] = float(cols[1]) / 2 break else: sys.exit("not sure about restraint type!") # Read in Values for restrained variables for each simulation filename = './' + aur + '%02.0f/restraints.dat' % k infile = open(filename, 'r') restdat = infile.readlines( ) # slice off first 20 lines readlines()[20:] infile.close() # Parse Data n = 0 s = 0 from_line = 0 if int(phase) == 0: from_line = 500 for line in restdat: s += 1 #so ira analizar o arquivo a partir da linha 500! if line[0] != '#' and line[0] != '@' and s > from_line: cols = line.split() if aur == 'o': val[n, k, r] = math.acos(float(cols[2])) elif aur == 'u' or aur == 'l': val[n, k, r] = float(cols[2]) elif aur == 't': val[n, k, r] = float(cols[1]) val2[n, k, r] = float(cols[2]) else: val[n, k, r] = float(cols[1]) n += 1 N[k] = n # Calculate Reduced Potential if aur == 'o': if rfc[k, 0] == 0: tmp = np.ones([R], np.float64) * 0.001 u[0:N[k]] = np.sum(beta * tmp[0:R] * ((val[0:N[k], k, 0:R])**2), axis=1) #->slicing syntax [0:N[k]] else: u[0:N[k]] = np.sum(beta * rfc[k, 0:R] * ((val[0:N[k], k, 0:R])**2), axis=1) elif aur == 't': if rfc[k, 0] == 0 and rfc2[k, 0] != 0: tmp = np.ones([R], np.float64) * 0.001 u[0:N[k]] = np.sum(beta * (tmp[0:R] * (( (val[0:N[k], k, 0:R] - req[k, 0:R])**2)) + rfc2[k, 0:R] * ( (val2[0:N[k], k, 0:R] - req2[k, 0:R])**2)), axis=1) #-> (1/k_bT)*Kx**2 elif rfc[k, 0] != 0 and rfc2[k, 0] == 0: tmp = np.ones([R], np.float64) * 0.001 u[0:N[k]] = np.sum( beta * (rfc[k, 0:R] * (((val[0:N[k], k, 0:R] - req[k, 0:R])**2)) + tmp[0:R] * ((val2[0:N[k], k, 0:R] - req2[k, 0:R])**2)), axis=1) elif rfc[k, 0] == 0 and rfc2[k, 0] == 0: tmp = np.ones([R], np.float64) * 0.001 u[0:N[k]] = np.sum( beta * (tmp[0:R] * (((val[0:N[k], k, 0:R] - req[k, 0:R])**2)) + tmp[0:R] * ((val2[0:N[k], k, 0:R] - req2[k, 0:R])**2)), axis=1) else: u[0:N[k]] = np.sum(beta * (rfc[k, 0:R] * (( (val[0:N[k], k, 0:R] - req[k, 0:R])**2)) + rfc2[k, 0:R] * ( (val2[0:N[k], k, 0:R] - req2[k, 0:R])**2)), axis=1) else: if rfc[k, 0] == 0: tmp = np.ones([R], np.float64) * 0.001 u[0:N[k]] = np.sum(beta * tmp[0:R] * ((val[0:N[k], k, 0:R] - req[k, 0:R])**2), axis=1) #-> (1/k_bT)*Kx**2 else: u[0:N[k]] = np.sum(beta * rfc[k, 0:R] * ((val[0:N[k], k, 0:R] - req[k, 0:R])**2), axis=1) g[k] = calcg(u[0:N[k]]) subs = timeseries.subsampleCorrelatedData(np.zeros([N[k]]), g=g[k]) Nind[k] = len(subs) if Nind[k] > 100000: Neff[k] = 100000 else: Neff[k] = Nind[k] print "Processed Window %5.0f. N= %12.0f. g= %10.3f Nind= %12.0f Neff= %12.0f" % ( k, N[k], g[k], Nind[k], Neff[k]) print "Max Neff= %.0f" % (np.max(Neff)) Upot = np.zeros([K, K, np.max(Neff)], np.float64) # Calculate Restraint Energy for k in range(K): # subs = timeseries.subsampleCorrelatedData(np.zeros([N[k]]),g=g[k]) for l in range(K): if aur == 'o': Upot[k, l, 0:Neff[k]] = np.sum(beta * rfc[l, 0:R] * ((val[0:Neff[k], k, 0:R])**2), axis=1) elif aur == 't': Upot[k, l, 0:Neff[k]] = np.sum( beta * (rfc[l, 0:R] * ((val[0:Neff[k], k, 0:R] - req[l, 0:R])**2) + rfc2[l, 0:R] * ((val2[0:Neff[k], k, 0:R] - req2[l, 0:R])**2)), axis=1) else: Upot[k, l, 0:Neff[k]] = np.sum( beta * rfc[l, 0:R] * ((val[0:Neff[k], k, 0:R] - req[l, 0:R])**2), axis=1) val = [] prg = [100] for p in range(len(prg)): Nprg = Neff * prg[p] / 100 ## Test integers out only print "Running MBAR on %.0f percent of the data ... " % (prg[p]) mbar = pymbar.MBAR(Upot, Nprg, verbose=True, method='adaptive', initialize='BAR') print "Calculate Free Energy Differences Between States" [Deltaf, dDeltaf] = mbar.getFreeEnergyDifferences() min = np.argmin(Deltaf[0]) # Write to file print "Free Energy Differences (in units of kcal/mol)" print "%9s %8s %8s %12s %12s" % ('bin', 'f', 'df', 'deq', 'dfc') datfile = open('subs-' + aur + '.%03.0f.dat' % prg[p], 'w') for k in range(K): if aur == 'r' or aur == 'o' or aur == 'p' or aur == 'b' or aur == 'l': print "%10.5f %10.5f %10.5f %12.7f %12.7f" % ( rfc[k, 0] / rfc[-1, 0], Deltaf[0, k] / beta, dDeltaf[0, k] / beta, req[k, 0], rfc[k, 0]) datfile.write("%10.5f %10.5f %10.5f %12.7f %12.7f\n" % (rfc[k, 0] / rfc[-1, 0], Deltaf[0, k] / beta, dDeltaf[0, k] / beta, req[k, 0], rfc[k, 0])) elif aur == 't': print "%10.5f %10.5f %10.5f %12.7f %12.7f %12.7f %12.7f" % ( rfc[k, 0] / rfc[-1, 0], Deltaf[0, k] / beta, dDeltaf[0, k] / beta, req[k, 0], req2[k, 0], rfc[k, 0], rfc2[k, 0]) datfile.write("%10.5f %10.5f %10.5f %12.7f %12.7f\n" % (rfc[k, 0] / rfc[-1, 0], Deltaf[0, k] / beta, dDeltaf[0, k] / beta, req[k, 0], rfc[k, 0])) elif aur == 'd': print "%9.0f %10.5f %10.5f %12.7f %12.7f" % ( k, Deltaf[0, k] / beta, dDeltaf[0, k] / beta, req[k, 0], rfc[k, 0] / rfc[-1, 0]) datfile.write("%9.0f %10.5f %10.5f %12.7f %12.7f\n" % (k, Deltaf[0, k] / beta, dDeltaf[0, k] / beta, req[k, 0], rfc[k, 0] / rfc[-1, 0])) else: # 'u' print "%10.5f %10.5f %10.5f %12.7f %12.7f" % ( req[k, 0], Deltaf[0, k] / beta, dDeltaf[0, k] / beta, req[k, 0], rfc[k, 0]) datfile.write("%10.5f %10.5f %10.5f %12.7f %12.7f\n" % (req[k, 0], Deltaf[0, k] / beta, dDeltaf[0, k] / beta, req[k, 0], rfc[k, 0])) datfile.close() print "\n\n"
for iteration in range(niterations): state_indices = ncfile.variables['states'][iteration,:] u_kln[state_indices,:,iteration] = ncfile.variables['energies'][iteration,:,:] ncfile.close() # Extract log probability history. u_n = numpy.zeros([niterations], numpy.float64) for iteration in range(niterations): u_n[iteration] = 0.0 for state in range(nstates): u_n[iteration] += u_kln[state,state,iteration] # Detect equilibration. [nequil, g, Neff] = detect_equilibration(u_n) u_n = u_n[nequil:] u_kln = u_kln[:,:,nequil:] # Subsample data. indices = timeseries.subsampleCorrelatedData(u_n, g=g) u_n = u_n[indices] u_kln = u_kln[:,:,indices] N_k = len(indices) * numpy.ones([nstates], numpy.int32) # Analyze with MBAR. mbar = pymbar.MBAR(u_kln, N_k) [Delta_f_ij, dDelta_f_ij] = mbar.getFreeEnergyDifferences() # Compare with analytical. f_i_analytical = numpy.zeros([nstates], numpy.float64) for (state_index, state) in enumerate(simulation.states): values = computeHarmonicOscillatorExpectations(K, mass, state.temperature) f_i_analytical[state_index] = values['free energies']['potential'] Delta_f_ij_analytical = numpy.zeros([nstates, nstates], numpy.float64) for i in range(nstates): for j in range(nstates): Delta_f_ij_analytical[i,j] = f_i_analytical[j] - f_i_analytical[i]
def analyze_data(store_filename, phipsi_outfile=None): """ Analyze output from parallel tempering simulations. """ temperature = 300.0 * units.kelvin # temperature ndiscard = 100 # number of samples to discard to equilibration # Allocate storage for results. results = dict() # Compute kappa nbins = 10 kB = units.BOLTZMANN_CONSTANT_kB * units.AVOGADRO_CONSTANT_NA # Boltzmann constant kT = (kB * temperature) # thermal energy beta = 1.0 / kT # inverse temperature delta = 360.0 / float(nbins) * units.degrees # bin spacing sigma = delta/3.0 # standard deviation kappa = (sigma / units.radians)**(-2) # kappa parameter (unitless) # Open NetCDF file. ncfile = netcdf.Dataset(store_filename, 'r', version=2) # Get dimensions. [niterations, nstates, natoms, ndim] = ncfile.variables['positions'][:,:,:,:].shape print "%d iterations, %d states, %d atoms" % (niterations, nstates, natoms) # Discard initial configurations to equilibration. print "First %d iterations will be discarded to equilibration." % ndiscard niterations -= ndiscard # Print summary statistics about mixing in state space. [tau2, dtau2] = show_mixing_statistics_with_error(ncfile) # Compute correlation time of state index. states = ncfile.variables['states'][:,:].copy() A_kn = [ states[:,k].copy() for k in range(nstates) ] g_states = timeseries.statisticalInefficiencyMultiple(A_kn) tau_states = (g_states-1.0)/2.0 # Compute statistical error. nblocks = 10 blocksize = int(niterations) / int(nblocks) g_states_i = numpy.zeros([nblocks], numpy.float64) tau_states_i = numpy.zeros([nblocks], numpy.float64) for block_index in range(nblocks): # Extract block states = ncfile.variables['states'][(blocksize*block_index):(blocksize*(block_index+1)),:].copy() A_kn = [ states[:,k].copy() for k in range(nstates) ] g_states_i[block_index] = timeseries.statisticalInefficiencyMultiple(A_kn) tau_states_i[block_index] = (g_states_i[block_index]-1.0)/2.0 dg_states = g_states_i.std() / numpy.sqrt(float(nblocks)) dtau_states = tau_states_i.std() / numpy.sqrt(float(nblocks)) # Print. print "g_states = %.3f+-%.3f iterations" % (g_states, dg_states) print "tau_states = %.3f+-%.3f iterations" % (tau_states, dtau_states) del states, A_kn # Compute end-to-end time. states = ncfile.variables['states'][:,:].copy() [tau_end, dtau_end] = average_end_to_end_time(states) # Compute statistical inefficiency for reduced potential energies = ncfile.variables['energies'][ndiscard:,:,:].copy() states = ncfile.variables['states'][ndiscard:,:].copy() u_n = numpy.zeros([niterations], numpy.float64) for iteration in range(niterations): u_n[iteration] = 0.0 for replica in range(nstates): state = states[iteration,replica] u_n[iteration] += energies[iteration,replica,state] del energies, states g_u = timeseries.statisticalInefficiency(u_n) print "g_u = %8.1f iterations" % g_u # Compute x and y umbrellas. print "Computing torsions..." positions = ncfile.variables['positions'][ndiscard:,:,:,:] coordinates = units.Quantity(numpy.zeros([natoms,ndim], numpy.float32), units.angstroms) phi_it = units.Quantity(numpy.zeros([nstates,niterations], numpy.float32), units.radians) psi_it = units.Quantity(numpy.zeros([nstates,niterations], numpy.float32), units.radians) for iteration in range(niterations): for replica in range(nstates): coordinates[:,:] = units.Quantity(positions[iteration,replica,:,:].copy(), units.angstroms) phi_it[replica,iteration] = compute_torsion(coordinates, 4, 6, 8, 14) psi_it[replica,iteration] = compute_torsion(coordinates, 6, 8, 14, 16) # Run MBAR. print "Grouping torsions by state..." phi_state_it = numpy.zeros([nstates,niterations], numpy.float32) psi_state_it = numpy.zeros([nstates,niterations], numpy.float32) states = ncfile.variables['states'][ndiscard:,:].copy() for iteration in range(niterations): replicas = numpy.argsort(states[iteration,:]) for state in range(1,nstates): replica = replicas[state] phi_state_it[state,iteration] = phi_it[replica,iteration] / units.radians psi_state_it[state,iteration] = psi_it[replica,iteration] / units.radians print "Evaluating reduced potential energies..." N_k = numpy.ones([nstates], numpy.int32) * niterations u_kln = numpy.zeros([nstates, nstates, niterations], numpy.float32) for l in range(1,nstates): phi0 = ((numpy.floor((l-1)/nbins) + 0.5) * delta - 180.0 * units.degrees) / units.radians psi0 = ((numpy.remainder((l-1), nbins) + 0.5) * delta - 180.0 * units.degrees) / units.radians u_kln[:,l,:] = - kappa * numpy.cos(phi_state_it[:,:] - phi0) - kappa * numpy.cos(psi_state_it[:,:] - psi0) # print "Running MBAR..." # #mbar = pymbar.MBAR(u_kln, N_k, verbose=True, method='self-consistent-iteration') # mbar = pymbar.MBAR(u_kln[1:,1:,:], N_k[1:], verbose=True, method='adaptive', relative_tolerance=1.0e-2) # only use biased samples # f_k = mbar.f_k # mbar = pymbar.MBAR(u_kln[1:,1:,:], N_k[1:], verbose=True, method='Newton-Raphson', initial_f_k=f_k) # only use biased samples # #mbar = pymbar.MBAR(u_kln, N_k, verbose=True, method='Newton-Raphson', initialize='BAR') # print "Getting free energy differences..." # [df_ij, ddf_ij] = mbar.getFreeEnergyDifferences(uncertainty_method='svd-ew') # print df_ij # print ddf_ij # print "ln(Z_ij / Z_55):" # reference_bin = 4*nbins+4 # for psi_index in range(nbins): # print " [,%2d]" % (psi_index+1), # print "" # for phi_index in range(nbins): # print "[%2d,]" % (phi_index+1), # for psi_index in range(nbins): # print "%8.3f" % (-df_ij[reference_bin, phi_index*nbins+psi_index]), # print "" # print "" # print "dln(Z_ij / Z_55):" # reference_bin = 4*nbins+4 # for psi_index in range(nbins): # print " [,%2d]" % (psi_index+1), # print "" # for phi_index in range(nbins): # print "[%2d,]" % (phi_index+1), # for psi_index in range(nbins): # print "%8.3f" % (ddf_ij[reference_bin, phi_index*nbins+psi_index]), # print "" # print "" # Compute statistical inefficiencies of various functions of the timeseries data. print "Computing statistical infficiencies of cos(phi), sin(phi), cos(psi), sin(psi)..." cosphi_kn = [ numpy.cos(phi_it[replica,:] / units.radians).copy() for replica in range(1,nstates) ] sinphi_kn = [ numpy.sin(phi_it[replica,:] / units.radians).copy() for replica in range(1,nstates) ] cospsi_kn = [ numpy.cos(psi_it[replica,:] / units.radians).copy() for replica in range(1,nstates) ] sinpsi_kn = [ numpy.sin(psi_it[replica,:] / units.radians).copy() for replica in range(1,nstates) ] g_cosphi = timeseries.statisticalInefficiencyMultiple(cosphi_kn) g_sinphi = timeseries.statisticalInefficiencyMultiple(sinphi_kn) g_cospsi = timeseries.statisticalInefficiencyMultiple(cospsi_kn) g_sinpsi = timeseries.statisticalInefficiencyMultiple(sinpsi_kn) tau_cosphi = (g_cosphi-1.0)/2.0 tau_sinphi = (g_sinphi-1.0)/2.0 tau_cospsi = (g_cospsi-1.0)/2.0 tau_sinpsi = (g_sinpsi-1.0)/2.0 # Compute relaxation times in each torsion. print "Relaxation times for transitions among phi or psi bins alone:" phibin_it = ((phi_it + 180.0 * units.degrees) / (delta + 0.1*units.degrees)).astype(numpy.int16) tau_phi = compute_relaxation_time(phibin_it, nbins) psibin_it = ((psi_it + 180.0 * units.degrees) / (delta + 0.1*units.degrees)).astype(numpy.int16) tau_psi = compute_relaxation_time(psibin_it, nbins) print "tau_phi = %8.1f iteration" % tau_phi print "tau_psi = %8.1f iteration" % tau_psi # Compute statistical error. nblocks = 10 blocksize = int(niterations) / int(nblocks) g_cosphi_i = numpy.zeros([nblocks], numpy.float64) g_sinphi_i = numpy.zeros([nblocks], numpy.float64) g_cospsi_i = numpy.zeros([nblocks], numpy.float64) g_sinpsi_i = numpy.zeros([nblocks], numpy.float64) tau_cosphi_i = numpy.zeros([nblocks], numpy.float64) tau_sinphi_i = numpy.zeros([nblocks], numpy.float64) tau_cospsi_i = numpy.zeros([nblocks], numpy.float64) tau_sinpsi_i = numpy.zeros([nblocks], numpy.float64) for block_index in range(nblocks): # Extract block slice_indices = range(blocksize*block_index,blocksize*(block_index+1)) cosphi_kn = [ numpy.cos(phi_it[replica,slice_indices] / units.radians).copy() for replica in range(1,nstates) ] sinphi_kn = [ numpy.sin(phi_it[replica,slice_indices] / units.radians).copy() for replica in range(1,nstates) ] cospsi_kn = [ numpy.cos(psi_it[replica,slice_indices] / units.radians).copy() for replica in range(1,nstates) ] sinpsi_kn = [ numpy.sin(psi_it[replica,slice_indices] / units.radians).copy() for replica in range(1,nstates) ] g_cosphi_i[block_index] = timeseries.statisticalInefficiencyMultiple(cosphi_kn) g_sinphi_i[block_index] = timeseries.statisticalInefficiencyMultiple(sinphi_kn) g_cospsi_i[block_index] = timeseries.statisticalInefficiencyMultiple(cospsi_kn) g_sinpsi_i[block_index] = timeseries.statisticalInefficiencyMultiple(sinpsi_kn) tau_cosphi_i[block_index] = (g_cosphi_i[block_index]-1.0)/2.0 tau_sinphi_i[block_index] = (g_sinphi_i[block_index]-1.0)/2.0 tau_cospsi_i[block_index] = (g_cospsi_i[block_index]-1.0)/2.0 tau_sinpsi_i[block_index] = (g_sinpsi_i[block_index]-1.0)/2.0 dtau_cosphi = tau_cosphi_i.std() / numpy.sqrt(float(nblocks)) dtau_sinphi = tau_sinphi_i.std() / numpy.sqrt(float(nblocks)) dtau_cospsi = tau_cospsi_i.std() / numpy.sqrt(float(nblocks)) dtau_sinpsi = tau_sinpsi_i.std() / numpy.sqrt(float(nblocks)) del cosphi_kn, sinphi_kn, cospsi_kn, sinpsi_kn print "Integrated autocorrelation times" print "tau_cosphi = %8.1f+-%.1f iterations" % (tau_cosphi, dtau_cosphi) print "tau_sinphi = %8.1f+-%.1f iterations" % (tau_sinphi, dtau_sinphi) print "tau_cospsi = %8.1f+-%.1f iterations" % (tau_cospsi, dtau_cospsi) print "tau_sinpsi = %8.1f+-%.1f iterations" % (tau_sinpsi, dtau_sinpsi) # Print LaTeX line. print "" print "%(store_filename)s & %(tau2).2f $\pm$ %(dtau2).2f & %(tau_states).2f $\pm$ %(dtau_states).2f & %(tau_end).2f $\pm$ %(dtau_end).2f & %(tau_cosphi).2f $\pm$ %(dtau_cosphi).2f & %(tau_sinphi).2f $\pm$ %(dtau_sinphi).2f & %(tau_cospsi).2f $\pm$ %(dtau_cospsi).2f & %(tau_sinpsi).2f $\pm$ %(dtau_sinpsi).2f \\\\" % vars() print "" if phipsi_outfile is not None: # Write uncorrelated (phi,psi) data outfile = open(phipsi_outfile, 'w') outfile.write('# alanine dipeptide 2d umbrella sampling data\n') # Write umbrella restraints nbins = 10 # number of bins per torsion outfile.write('# %d x %d grid of restraints\n' % (nbins, nbins)) outfile.write('# Each state was sampled from p_i(x) = Z_i^{-1} q(x) q_i(x) where q_i(x) = exp[kappa*cos(phi(x)-phi_i) + kappa*cos(psi(x)-psi_i)]\n') outfile.write('# phi(x) and psi(x) are periodic torsion angles on domain [-180, +180) degrees.\n') outfile.write('# kappa = %f\n' % kappa) outfile.write('# phi_i = [-180 + (floor(i / nbins) + 0.5) * delta] degrees\n') outfile.write('# psi_i = [-180 + ( (i % nbins) + 0.5) * delta] degrees\n') outfile.write('# where i = 0...%d, nbins = %d, and delta = %f degrees\n' % (nbins*nbins-1, nbins, delta / units.degrees)) outfile.write('# Data has been subsampled to generate approximately uncorrelated samples.\n') outfile.write('#\n') # write data header outfile.write('# ') for replica in range(nstates): outfile.write('state %06d ' % replica) outfile.write('\n') # write data indices = timeseries.subsampleCorrelatedData(u_n, g=g_u) # indices of uncorrelated iterations states = ncfile.variables['states'][ndiscard:,:].copy() for iteration in indices: outfile.write(' ') replicas = numpy.argsort(states[iteration,:]) for state in range(1,nstates): replica = replicas[state] outfile.write('%+6.1f %+6.1f ' % (phi_it[replica,iteration] / units.degrees, psi_it[replica,iteration] / units.degrees)) outfile.write('\n') outfile.close() return results