def train(self, refB, xData, uData, saveParams=True): """Trains and returns a pymbar MBAR object as the model "parameters." """ refB = np.array(refB) if xData.shape[0] != uData.shape[0]: print('First observable dimension (%i) and size of potential energy' \ ' array (%i) don\'t match!'%(xData.shape[0], uData.shape[0])) raise ValueError('x and U must have same shape in first dimension') if (xData.shape[0] != refB.shape[0]) or (uData.shape[0] != refB.shape[0]): print('First dimension of data must match number of provided beta values.') raise ValueError('For interpolation, first dimension of xData, uData, and refB must match.') #Want to be able to handle vector-value observables #So make sure x has 3 dimensions, even if technically observable is scalar #Note that currently ragged data is not allowed, but I don't check for this! #(data sets at each state point must have the same number of samples) if len(xData.shape) == 2: xData = np.reshape(xData, (xData.shape[0], xData.shape[1], 1)) #Remember, no ragged data, otherwise the below won't work right allN = np.ones(xData.shape[0])*xData.shape[1] allU = uData.flatten() Ukn = np.tensordot(refB, allU, axes=0) mbarObj = mbar.MBAR(Ukn, allN) if saveParams: self.refB = refB self.x = xData self.U = uData self.params = mbarObj return mbarObj
def calculate_dG_using_mbar(self, u_kn: np.array, N_k: dict, env: str): logger.debug("#######################################") logger.debug("Pairwise Free Energy Estimate") logger.debug("#######################################") u_kn_ = copy.deepcopy(u_kn) start = 0 for d in range(u_kn.shape[0] - 1): nr_of_snapshots = N_k[env][d] + N_k[env][d + 1] u_kn_ = u_kn[d:d + 2:, start:start + nr_of_snapshots] m = mbar.MBAR(u_kn_, N_k[env][d:d + 2]) logger.debug( m.getFreeEnergyDifferences(return_dict=True)["Delta_f"][0, 1]) logger.debug( m.getFreeEnergyDifferences(return_dict=True)["dDelta_f"][0, 1]) start += N_k[env][d] logger.debug("#######################################") return mbar.MBAR(u_kn, N_k[env], initialize="BAR", verbose=True)
def predict(self, B, params=None, refB=None, useMBAR=False): """Performs perturbation at state of interest. """ #Check if have parameters if params is None: #Use trained parameters if you have them if self.params is None: raise TypeError('self.params is None - need to train model before predicting') params = self.params if refB is None: if self.refB is None: raise TypeError('self.refB is None - need to specify reference beta') refB = self.refB #Specify "parameters" as desired data to use x = params[0] U = params[1] #Make sure B is an array, even if just has one element if isinstance(B, (int, float)): B = [B] B = np.array(B) if useMBAR: mbarObj = mbar.MBAR(np.array([refB*U]), [U.shape[0]]) predictVals = np.zeros((len(B), x.shape[1])) for i in range(len(B)): predictVals[i, :] = mbarObj.computeMultipleExpectations(x.T, B[i]*U)[0] else: #Compute what goes in the exponent and subtract out the maximum #Don't need to bother storing for later because compute ratio dBeta = B - refB dBetaU = (-1.0)*np.tensordot(dBeta, U, axes=0) dBetaUdiff = dBetaU - np.array([np.max(dBetaU, axis=1)]).T expVals = np.exp(dBetaUdiff) #And perform averaging numer = np.dot(expVals, x) / float(x.shape[0]) denom = np.average(expVals, axis=1) predictVals = numer / np.array([denom]).T return predictVals
def perturbWithSamples(B, refB, x, U, useMBAR=False): """Computes observable x (can be a vector) at a set of perturbed temperatures of B (array) from the original refB using potential energies at each config and standard reweighting. Uses MBAR code instead of mine if desired. """ if x.shape[0] != U.shape[0]: print('First observable dimension (%i) and size of potential energy' \ ' array (%i) don\'t match!'%(x.shape[0], U.shape[0])) raise ValueError('x and U must have same shape in first dimension') #Check shape of observables and add dimension if needed #Note that for observables with more than 1 dimension, things won't work if len(x.shape) == 1: x = np.array([x]).T #While we're at it, also make B into an array if it isn't, just for convenience if isinstance(B, (int, float)): B = [B] B = np.array(B) if useMBAR: mbarObj = mbar.MBAR(np.array([refB * U]), [U.shape[0]]) outval = np.zeros((len(B), x.shape[1])) for i in range(len(B)): outval[i, :] = mbarObj.computeMultipleExpectations(x.T, B[i] * U)[0] else: #Compute what goes in the exponent and subtract out the maximum #Don't need to bother storing for later because compute ratio dBeta = B - refB dBetaU = (-1.0) * np.tensordot(dBeta, U, axes=0) dBetaUdiff = dBetaU - np.array([np.max(dBetaU, axis=1)]).T expVals = np.exp(dBetaUdiff) #And perform averaging numer = np.dot(expVals, x) / float(x.shape[0]) denom = np.average(expVals, axis=1) outval = numer / np.array([denom]).T return outval
def __initialize__(self, mixture): m = mixture.m n = mixture.n mb = self.MBAR = mbar.MBAR(np.hstack(mixture.u), n, relative_tolerance=self.tol, initial_f_k=mixture.f, verbose=mics.verbose) mixture.f = mb.f_k mics.verbose and info("Free energies after convergence:", mixture.f) flnpi = (mixture.f + np.log(n/sum(n)))[:, np.newaxis] mixture.u0 = [-logsumexp(flnpi - u) for u in mixture.u] self.P = [np.exp(flnpi - mixture.u[i] + mixture.u0[i]) for i in range(m)] Theta = mb._computeAsymptoticCovarianceMatrix(np.exp(mb.Log_W_nk), mb.N_k) mixture.Theta = np.array(Theta) mics.verbose and info("Free-energy covariance matrix:", mixture.Theta) mixture.Overlap = mb.N_k*np.matmul(mb.W_nk.T, mb.W_nk) mics.verbose and info("Overlap matrix:", mixture.Overlap)
#Now need to construct matrix #For each umbrella, need energy of all sampled configurations in that umbrella Umat = np.zeros((len(umbDirs), len(allU))) #Make potential energies dimensionless allU *= beta #Set reference for potential energies #allU -= np.min(allU) for i, aref in enumerate(allRef): Umat[i, :] = allU + beta * ((0.5 * springConst) * ((allDist - aref)**2)) #And use mbar to get pmf! mbarobj = mbar.MBAR(Umat, numSamples) deltaGs, deltaGerr, thetaStuff = mbarobj.getFreeEnergyDifferences() print "\nFree energies between states:" print deltaGs[0] print "\nwith uncertainties:" print deltaGerr[0] #Now also calculate pmf binsize = 0.05 #nm pmfBins = np.arange(np.min(allDist) - (1E-06), np.max(allDist), binsize) pmfBinInds = np.digitize(allDist, pmfBins) - 1 pmfBinCents = 0.5 * (pmfBins[:-1] + pmfBins[1:]) nBins = len(pmfBinCents)
#Loop over alchemical states with this restraint and add energy for j in range(numStates): Ukn[i * numStates + j, :] = allPots[:, j] + (xyEnergy / kBT) #Print some info about numbers of samples and effective numbers of samples print('\n') print(np.sum(nSamps, axis=1)) print(nSamps.tolist()) print(' ') #Now should be set to run MBAR #Note we ignore the pV term, as it won't matter here because all states have same V for given configuration #Also note that we haven't actually sampled the states we're interested in (i.e. unrestrained) #So we have to reweight, or us perturbation to get the free energies we're interested in mbarObj = mbar.MBAR(Ukn, nSamps.flatten()) dG, dGerr = mbarObj.computePerturbedFreeEnergies(allPots.T) print(dG[0].tolist()) print('\n') #Would also be nice to generate 2D PMF based on x and y coordinates #To do this, can actually use computePMF from mbar, but need to map back into 2D #Actually, have to do for each solute separately, then combine via Boltzmann weights of bins #First define bins xWidth = xyBox[0] / 10.0 yWidth = xyBox[1] / 10.0 xBins = np.arange(-0.5 * xyBox[0], 0.5 * xyBox[0] + 0.01, xWidth) yBins = np.arange(-0.5 * xyBox[1], 0.5 * xyBox[1] + 0.01, yWidth) #And initiate PMFs and unweighted histograms
def perform_mbar(self): print('Performing MBAR') rpots_matrix = self._calc_decorrelated_rpots_for_all_conditions() num_steps_per_condition = self._decor_outs.get_num_steps_per_condition( ) self._mbar = mbar.MBAR(rpots_matrix, num_steps_per_condition)
def getConfigWeightsSurf(kB=0.008314459848, T=298.15): """Computes and returns the configuration weights for simulations with a solute at an interface. Mostly replicates calcdGsolv in genetic_lib, but returns config weights in both the fully coupled and decoupled states (also includes pV term - won't matter very much for free energy differences, but maybe matters for weighting configurations, even though also probably not too much). """ #First define directory structure, spring constants, etc. simDirs = ['Quad_0.25X_0.25Y', 'Quad_0.25X_0.75Y', 'Quad_0.75X_0.25Y', 'Quad_0.75X_0.75Y'] kXY = [10.0, 10.0, 10.0, 10.0] #spring constant in kJ/mol*A^2 refX = [7.4550, 7.4550, 22.3650, 22.3650] refY = [8.6083, 25.8249, 8.6083, 25.8249] distRefX = [7.4550, 7.4550, 7.4550, 7.4550] distRefY = [8.6083, 8.6083, 8.6083, 8.6083] numStates = 19 #And some constants kBT = kB*T beta = 1.0 / kBT #First make sure all the input arrays have the same dimensions numSims = len(simDirs) allLens = np.array([len(a) for a in [kXY, refX, refY, distRefX, distRefY]]) #Want to loop over all trajectories provided, storing solute position information to calculate restraints xyPos = None #X and Y coordinates of first heavy atom for all solutes - get shape later nSamps = np.zeros((len(simDirs), numStates), dtype=int) #Have as many x-y restraints as sims and same number of lambda states for each allPots = np.array([[]]*numStates).T #Potential energies, EXCLUDING RESTRAINT, for each simulation frame and lambda state #Will also include pV term because may matter for configurations xyBox = np.zeros(2) for i, adir in enumerate(simDirs): topFile = "%s/../sol_surf.top"%adir trajFile = "%s/prod.nc"%adir alchemicalFile = "%s/alchemical_output.txt"%adir #First load in topology and get atom indices top = pmd.load_file(topFile) #Get solute heavy atoms for each solute #Also get indices of surface atoms to use as references later #Only taking last united atoms of first SAM molecule we find heavyIndices = [] for res in top.residues: if res.name not in ['OTM', 'CTM', 'STM', 'NTM', 'SOL']: #Assumes working with SAM surface... thisheavyinds = [] for atom in res.atoms: if not atom.name[0] == 'H': thisheavyinds.append(atom.idx) heavyIndices.append(thisheavyinds) #Make into arrays for easier referencing heavyIndices = np.array(heavyIndices) #Load in the potential energies, INCLUDING RESTRAINT, at all states for this simulation to figure out frames to skip alcDat = np.loadtxt(alchemicalFile) startTime = alcDat[0, 1] startFrame = int(startTime) - 1 #Be careful here... need write frequency in alchemical file to match exactly with positions #AND assuming that have written in 1 ps increments... #Also, first frame in trajectory is NOT at time zero, so subtract 1 thisPot = alcDat[:, 3:-1] thispV = alcDat[:, -1] #Next load in the trajectory and get all solute coordinates that matter top.rb_torsions = pmd.TrackedList([]) top = pt.load_parmed(top, traj=False) traj = pt.iterload(trajFile, top, frame_slice=(startFrame, -1)) nFrames = len(traj) xyBox = np.array(traj[0].box.values)[:2] #A little lazy, but all boxes should be same and fixed in X and Y dimensions thisxyPos = np.zeros((nFrames, len(heavyIndices), 2)) thisnSamps = np.zeros(numStates, dtype=int) #Reference x and y coordinates for this restraint thisRefXY = np.array([refX[i], refY[i]]) for j, frame in enumerate(traj): thisPos = np.array(frame.xyz) thisXY = thisPos[heavyIndices[:,0]][:, :2] #Takes XY coords for first heavy atom from each solute thisxyPos[j,:] = thisXY thisnSamps[int(alcDat[j, 2])] += 1 #Lambda states must be indexed starting at 0 #Also get wrapped positions relative to each reference face #AND calculate xy restraint energy to remove by adding this for each solute xyEnergy = 0.0 for k in range(len(heavyIndices)): xy = thisXY[k] #Then separately reimage around the restraint reference positions to calculate energy xy = wl.reimage([xy], thisRefXY, xyBox)[0] - thisRefXY xyEnergy += ( 0.5*kXY[i]*(0.5*(np.sign(xy[0] - distRefX[i]) + 1))*((xy[0] - distRefX[i])**2) + 0.5*kXY[i]*(0.5*(np.sign(xy[1] - distRefY[i]) + 1))*((xy[1] - distRefY[i])**2) ) #Remove the restraint energy (only for x-y restraint... z is the same in all simulations) thisPot[j,:] -= (xyEnergy / kBT) #And also add in pV contribution thisPot[j,:] += thispV[j] #Add to other things we're keeping track of if xyPos is None: xyPos = copy.deepcopy(thisxyPos) else: xyPos = np.vstack((xyPos, thisxyPos)) nSamps[i,:] = thisnSamps allPots = np.vstack((allPots, thisPot)) #Now should have all the information we need #Next, put it into the format that MBAR wants, adding energies as needed Ukn = np.zeros((len(simDirs)*numStates, int(np.sum(nSamps)))) for i in range(len(simDirs)): #First get energy of ith type of x-y restraint for all x-y positions thisRefXY = np.array([refX[i], refY[i]]) #Must do by looping over each solute xyEnergy = np.zeros(xyPos.shape[0]) for k in range(len(heavyIndices)): xy = wl.reimage(xyPos[:,k,:], thisRefXY, xyBox) - thisRefXY xyEnergy += ( 0.5*kXY[i]*(0.5*(np.sign(xy[:,0] - distRefX[i]) + 1))*((xy[:,0] - distRefX[i])**2) + 0.5*kXY[i]*(0.5*(np.sign(xy[:,1] - distRefY[i]) + 1))*((xy[:,1] - distRefY[i])**2) ) #Loop over alchemical states with this restraint and add energy for j in range(numStates): Ukn[i*numStates+j, :] = allPots[:,j] + (xyEnergy / kBT) #Now should be set to run MBAR mbarObj = mbar.MBAR(Ukn, nSamps.flatten()) #Following computePMF in MBAR to get configuration weights with desired potential of interest logwCoupled = mbarObj._computeUnnormalizedLogWeights(allPots[:,0]) logwDecoupled = mbarObj._computeUnnormalizedLogWeights(allPots[:,-1]) #Also report average solute-system LJ and coulombic potential energies in the fully coupled ensemble #(with restraints removed) #Just printing these values avgQ, stdQ = mbarObj.computeExpectations(allPots[:,0] - allPots[:,4], allPots[:,0]) avgLJ, stdLJ = mbarObj.computeExpectations(allPots[:,4] - allPots[:,-1], allPots[:,0]) print("\nAverage solute-system electrostatic potential energy: %f +/- %f"%(avgQ, stdQ)) print("Average solute-system LJ potential energy: %f +/- %f\n"%(avgLJ, stdLJ)) #Also print information that can be used to break free energy into components #Start by just printing all of the free energies between states alldGs, alldGerr = mbarObj.computePerturbedFreeEnergies(allPots.T) print("\nAll free energies relative to first (coupled) state:") print(alldGs.tolist()) print(alldGerr.tolist()) #And the free energy changes associated with just turning on LJ and elctrostatics separately dGq = alldGs[0][0] - alldGs[0][4] dGqErr = np.sqrt((alldGerr[0][0]**2) + (alldGerr[0][4])**2) print("\nElectrostatic dG (with LJ on): %f +/- %f"%(dGq, dGqErr)) dGlj = alldGs[4][4] - alldGs[4][-1] dGljErr = np.sqrt((alldGerr[4][4]**2) + (alldGerr[4][-1])**2) print("\nLJ dG (no charges): %f +/- %f"%(dGlj, dGljErr)) #Now calculate average potential energy differences needed for computing relative entropies dUq, dUqErr = mbarObj.computeExpectations(allPots[:,0] - allPots[:,4], allPots[:,0]) print("\nAverage electrostatic potential energy in fully coupled state: %f +/- %f"%(dUq, dUqErr)) dUlj, dUljErr = mbarObj.computeExpectations(allPots[:,4] - allPots[:,-1], allPots[:,4]) print("\nAverage LJ potential energy (no charges) in uncharged state: %f +/- %f"%(dUlj, dUljErr)) #And return weights after exponentiating log weights and normalizing wCoupled = np.exp(logwCoupled) wCoupled /= np.sum(wCoupled) wDecoupled = np.exp(logwDecoupled) wDecoupled /= np.sum(wDecoupled) return wCoupled, wDecoupled
def getConfigWeightsBulk(alchfile='alchemical_output.txt', kB=0.008314459848, T=298.15): """Given an alchemical output file, computes and returns configuration weights in both the fully coupled and decoupled ensembles of the solute. """ rawdat = np.loadtxt(alchfile) lstates = rawdat[:,2] Ukn = rawdat[:,3:-1] pV = rawdat[:,-1] #pV term is in last column #pV term doesn't matter for free energy differences #But does matter for configuration weights (even though it's a small contribution) Nsamps = np.zeros(Ukn.shape[1], dtype=int) for i in range(Ukn.shape[1]): Nsamps[i] = int(np.sum((lstates==i))) Ukn[:,i] += pV #neworder = np.argsort(lstates) #Ukn = Ukn[neworder] Ukn /= (kB*T) mbarObj = mbar.MBAR(Ukn.T, Nsamps) #Following computePMF in MBAR to get configuration weights with desired potential of interest logwCoupled = mbarObj._computeUnnormalizedLogWeights(Ukn[:,0]) logwDecoupled = mbarObj._computeUnnormalizedLogWeights(Ukn[:,-1]) #Also report average solute-system LJ and coulombic potential energies in the fully coupled ensemble #(with restraints removed) #Just printing these values avgQ, stdQ = mbarObj.computeExpectations(Ukn[:,0] - Ukn[:,4], Ukn[:,0]) avgLJ, stdLJ = mbarObj.computeExpectations(Ukn[:,4] - Ukn[:,-1], Ukn[:,0]) print("\nAverage solute-water electrostatic potential energy: %f +/- %f"%(avgQ, stdQ)) print("Average solute-water LJ potential energy: %f +/- %f\n"%(avgLJ, stdLJ)) #Also print information that can be used to break free energy into components #Start by just printing all of the free energies between states alldGs, alldGerr = mbarObj.computePerturbedFreeEnergies(Ukn.T) print("\nAll free energies relative to first (coupled) state:") print(alldGs.tolist()) print(alldGerr.tolist()) #And the free energy changes associated with just turning on LJ and elctrostatics separately dGq = alldGs[0][0] - alldGs[0][4] dGqErr = np.sqrt((alldGerr[0][0]**2) + (alldGerr[0][4])**2) print("\nElectrostatic dG (with LJ on): %f +/- %f"%(dGq, dGqErr)) dGlj = alldGs[4][4] - alldGs[4][-1] dGljErr = np.sqrt((alldGerr[4][4]**2) + (alldGerr[4][-1])**2) print("\nLJ dG (no charges): %f +/- %f"%(dGlj, dGljErr)) #Now calculate average potential energy differences needed for computing relative entropies dUq, dUqErr = mbarObj.computeExpectations(Ukn[:,0] - Ukn[:,4], Ukn[:,0]) print("\nAverage electrostatic potential energy in fully coupled state: %f +/- %f"%(dUq, dUqErr)) dUlj, dUljErr = mbarObj.computeExpectations(Ukn[:,4] - Ukn[:,-1], Ukn[:,4]) print("\nAverage LJ potential energy (no charges) in uncharged state: %f +/- %f"%(dUlj, dUljErr)) #And return weights after exponentiating log weights wCoupled = np.exp(logwCoupled) wCoupled /= np.sum(wCoupled) wDecoupled = np.exp(logwDecoupled) wDecoupled /= np.sum(wDecoupled) return wCoupled, wDecoupled
def _load_mbar_results(file: str): results = pickle.load(open(file, "rb")) return mbar.MBAR(results["u_kn"], results["N_k"], initialize="BAR", verbose=True)
interp_i_cross = i # Evaluate the interpolation functions at this set of points ls = fs(mu_interp) lspl1 = fspl1(mu_interp) # -------------------------------------------------- # # Use MBAR to shift subdomains by calculating free # # energy difference between data in overlap region # # -------------------------------------------------- # print "Joining subdomains %d and %d using MBAR" % (s, s + 1) # See pymbar documentation for detailed explanation U_kn = np.array([ls, lspl1]) N_k = [interp_i_cross, Ninterp - interp_i_cross] mbar_obj = mbar.MBAR(U_kn, N_k) matrix = mbar_obj.getFreeEnergyDifferences(compute_uncertainty=True) # Shift and error, output by pymbar shift = matrix[0][1, 0] sigma = matrix[1][1, 0] print "shift: ", shift, "+/-", sigma # Minimise free energy difference between subdomains by shifting the higher-mu subdomain data_list[s + 1] += shift # Sample shift error from a mean 0 Gaussian with standard deviation 'sigma' for j in range(Nsamples_join): uncertainty = np.random.normal(loc=0.0, scale=sigma) data_list_err[j][s + 1] += ( shift + uncertainty
Nsamps = np.zeros(Ukn.shape[1]) for i in range(Ukn.shape[1]): gU = timeseries.statisticalInefficiency(Ukn[:, i]) print("Correlation time if using state %i: %f" % (i, gU)) Nsamps[i] = np.sum((lstates == i)) print(Nsamps[i]) Ukn[:, i] += PVdat #Don't need to order them - it just makes things more complicated when computing averages #neworder = np.argsort(lstates) #Ukn = Ukn[neworder] Ukn /= kBT mbarObj = mbar.MBAR(Ukn.T, Nsamps) dG, dGerr, thetaStuff = mbarObj.getFreeEnergyDifferences() print(dG[0]) print(dGerr[0]) print("dGsolv = %f" % (-1.0 * dG[0][-1])) print("dGsolvError = %f" % (dGerr[0][-1])) with open('mbar_object.pkl', 'w') as outfile: pickle.dump(mbarObj, outfile) np.savetxt('alchemical_U.txt', Ukn, header='Potential energies at all interaction states (kBT)')
parser = argparse.ArgumentParser() parser.add_argument('-f', '--infile', type=str) parser.add_argument('-b', '--binsize', type=float, default=0.5) parser.add_argument('-t', '--title', type=str, default='noTitleProvided') args = parser.parse_args() with open(args.infile, 'rb') as f: npzfile = np.load(f) #Read data file Umat = npzfile['Umat'] numSamples = npzfile['numSamples'] allDists1D = npzfile['allDists1D'] #Now create and plot PMF mbarobj = mbar.MBAR(Umat, numSamples, verbose=True) deltaGs, deltaGerr, thetaStuff = mbarobj.getFreeEnergyDifferences() print("\nFree energies between states:") print(deltaGs[0]) print("\nwith uncertainties:") print(deltaGerr[0]) #Now also calculate pmf pmfBins = np.arange( np.min(allDists1D) - (1E-20), np.max(allDists1D), args.binsize) pmfBinInds = np.digitize(allDists1D, pmfBins) - 1 pmfBinCents = 0.5 * (pmfBins[:-1] + pmfBins[1:]) nBins = len(pmfBinCents)