def GetOperatorStats(self, RunPath_): ''' Get operator statistics. TODO: Generalize to MD. ''' operator_statistics = [] if self.JobType == 'CL' and self.Program == 'polyFTS': number_columns = self.Nspecies * 2 + 4 datafile = open(os.path.join(RunPath_, 'operators.dat'), 'r') for c in range(number_columns )[::2]: #Skip step column and imaginary columns try: warmup, Data, nwarmup = stats.autoWarmupMSER( datafile, c + 1) except: break (nsamples, (min, max), Val, Valerr, kappa, unbiasedvar, autocor) = stats.doStats(warmup, Data, False) operator_statistics.append([Val, Valerr, nsamples]) elif self.JobType == 'SCFT' and self.Program == 'polyFTS': number_columns = self.Nspecies + 2 data = np.loadtxt(os.path.join(RunPath_, 'operators.dat'))[-1] for c in range(number_columns): operator_statistics.append([data[c + 1], 0., 1]) return operator_statistics
ini=re.sub('__NumBlocks__',str(numBlocks),ini) ini=re.sub('__ReadField__','Yes',ini) runfile = open("run.in","w") runfile.write(ini) runfile.close() #call(["PolyFTSGPU.x","run.in"]) call('{} run.in > run.out'.format(FTS), shell=True) # Data analysis: # Partition 1 #datafile = open('model1_operators.dat','r') datafile = open('operators.dat','r') # ReP1 try: warmup, Data, nwarmup = stats.autoWarmupMSER(datafile,3) except: print("Failed on ReP1") break (nsamples,(min,max),P1,P1err,kappa,unbiasedvar,autocor)=stats.doStats(warmup,Data,False) datafile.seek(0) # ImP1 try: warmup, Data, nwarmup = stats.autoWarmupMSER(datafile,4) except: print("Failed on ImP1") break (nsamples,(min,max),imP1,imP1err,kappa,unbiasedvar,autocor)=stats.doStats(warmup,Data,False) datafile.seek(0) # mu1 try:
def GetThermo(ThermoLog, fi='lammps', obs=None, cols=None, autowarmup=True, nwarmup=100, plot=False, plotDir='Thermo_plots'): """ fi: log file format, 'lammps' or 'openmm' """ if not obs == None and not cols == None: Exception( 'Read data either by observable name or column index but not both!' ) if plot: try: os.mkdir(plotDir) except: pass print('...Thermo plots will be saved in {}...\n'.format(plotDir)) #conver log file: if fi == 'openmm': ThermoLog = log2txt.log2txt_openmm([ThermoLog])[0] elif fi == 'lammps': section = 'PRODUCTION RUNS' ThermoLog = log2txt.log2txt_lammps([ThermoLog], section, 'production')[0] print('new log file: {}'.format(ThermoLog)) txt = "" obsID = [] Stats = [] #do stats file = open(ThermoLog, 'r') if not obs == None: lines = file.readlines() while not isinstance(cols, list): for line in lines: if line.startswith('#'): obsNames = line.split()[1:] print('obsNames {}'.format(obsNames)) cols = [ obsNames.index(val) for val in obsNames if val in obs ] print('cols {}'.format(cols)) for i, col in enumerate(cols): if autowarmup: warmup, Data, nwarmup = stats.autoWarmupMSER(file, col) print("Auto warmup detection with MSER-5 => ", nwarmup) else: warmup, Data = stats.extractData(file, col, nwarmup) (nsamples, (min, max), mean, semcc, kappa, unbiasedvar, autocor) = stats.doStats(warmup, Data, False, False, '_{0}_mol{1}'.format(file.name, col)) try: obsName = obsNames[col] except: obsName = 'col{}'.format(col) lines = "" lines += '\n==== {} ===='.format(obsName) lines += "\n - Mean = {} +/- {}".format( mean, semcc) lines += "\n - Equilibrated samples = {}".format(nsamples) lines += "\n - Correlation time = {}".format(kappa) lines += "\n - Effective # samples = {}".format(nsamples / kappa) lines += "\n - Reduced-bias variance = {}".format(unbiasedvar) # note that there is no unbiased estimator for the population standard deviation. We can use sqrt(var) as a indicative estimator. lines += "\n - S.D. (unbiased, biased) = {} {}".format( np.sqrt(unbiasedvar), np.std(Data, ddof=0) ) # ddof is correction to 1/N...using ddof=1 returns regular reduced-bias estimator lines += "\n - Min, Max = {} {}\n".format(min, max) ''' Plot ''' if plot: plt.axvspan(0, nwarmup, alpha=0.5, color='#6495ED') plt.plot(np.hstack((warmup, Data))) plt.xlim(0) plt.xlabel('timestep') plt.ylabel(obsName) plt.savefig("{}/{}.png".format(plotDir, obsName), bbox_inches='tight') plt.close() print(lines) txt += lines Avg = mean Std = np.sqrt(unbiasedvar) Err = semcc CorrTime = kappa NUncorrSamples = nsamples / kappa Stats.append([Avg, Std, CorrTime, Err, NUncorrSamples]) obsID.append(obsName) return obsID, Stats
def GetRgRee(traj, DOP, NP, NAtomsPerChain=None, plotDir='RgRee_plots', RgDatName='RgTimeSeries', ReeDatName='ReeTimeSeries', RgStatOutName='RgReeStats', Ext='.dat', res0Id=0, autowarmup=True, nwarmup=100, plot=False): """NAtomsPerChain: used if running CG system, if provided will assume there is one residue per chain multiply coordinates by 10 if input traj was generated by lammps and unit is nonDim""" ElementDictionary = { "carbon": 12.01, "hydrogen": 1.008, "oxygen": 16.00, "nitrogen": 14.001, "virtual site": 1.0, "sodium": 23.0, "chloride": 35.5 } if plot: try: os.mkdir(plotDir) except: pass print('...Rg and Ree plots will be saved in {}...\n'.format(plotDir)) RgTimeseries = [range(traj.n_frames)] Rgheader = "Frame " RgSqStats = [] RgSqTimeseries = [range(traj.n_frames)] RgSqheader = "Frame " RgSqList = [] txtRg = "" ReeTimeseries = [range(traj.n_frames)] Reeheader = "Frame " ReeSqStats = [] ReeSqTimeseries = [range(traj.n_frames)] ReeSqheader = "Frame " ReeSqList = [] #get indices of residues in all chains MoleculeResidueList = [] BlockResName = [] if not NAtomsPerChain: #number residues per chain = DOP (for AA systems) for j in range(NP): resId = range(res0Id + j * DOP, res0Id + (j + 1) * DOP) MoleculeResidueList.append(resId) resname = [] for res in traj.topology.residues: if res.index in resId: resname.append(res.name) #check if diblock if j == 0: resname1 = resname[0] resname2 = resname[-1] i1 = np.where(np.array(resname) == resname1)[0] i2 = np.where(np.array(resname) == resname2)[0] if np.min(i1) == np.min(resId) and int( np.min(i2) - np.max(i1)) == 1 and np.max(i2) == np.max(resId): block = True BlockResName = [resname1, resname2] RgSqList_b = [[], []] RgSqStats_b = [[], []] print( 'Detect diblock:\n block 1: {} {}-mer, block 2: {} {}-mer' .format(resname1, len(i1), resname2, len(i2))) else: block = False else: #1 residue per chain (for CG system) a0Id = [atom.index for atom in traj.topology.residue(res0Id).atoms] a0Id = np.min(a0Id) MoleculeResidueList for i in range(NP): atomId_per_chain = range( a0Id + i * NAtomsPerChain, a0Id + i * NAtomsPerChain + NAtomsPerChain) resId_tmp = [ traj.topology.atom(aId).residue.index for aId in atomId_per_chain ] MoleculeResidueList.append(np.unique(resId_tmp)) block = False for j, resId in enumerate(MoleculeResidueList): resIdLow = np.min(resId) resIdUp = np.max(resId) atom_indices = traj.topology.select('resid {} to {}'.format( resIdLow, resIdUp)) if block: atom_indices_b = [] mass_list_b = [] for resname in BlockResName: ii = traj.topology.select( "resid {} to {} and resname '{}'".format( resIdLow, resIdUp, resname)) atom_indices_b.append(ii) tmp = [] for index in ii: element = str(traj.topology.atom(index).element) try: mass = ElementDictionary[element] except: mass = 1. tmp.append(mass) tmp = np.array(tmp) mass_list_b.append(tmp) mass_list = [] for index in atom_indices: element = str(traj.topology.atom(index).element) try: mass = ElementDictionary[element] except: mass = 1. mass_list.append(mass) mass_list = np.array(mass_list) if j == 0: print('Indices of atoms in chain {} \n{}'.format( j + 1, atom_indices)) print('Mass of atoms in a chain {}'.format(mass_list)) print('Evaluate Rg and Ree of chain {}/{}'.format( j + 1, len(MoleculeResidueList))) '''=== Compute Rg ===''' Rg = md.compute_rg(traj.atom_slice(atom_indices), masses=mass_list) RgTimeseries.append(Rg.tolist()) Rgheader += 'Rg{} '.format(j + 1) np.savetxt(RgDatName + Ext, np.transpose(RgTimeseries), fmt='%5.5f', header=Rgheader) RgSq = Rg**2. RgSqTimeseries.append(RgSq.tolist()) Rgheader += 'Rg{}^2 '.format(j + 1) np.savetxt('RgSqTimeSeries' + Ext, np.transpose(RgSqTimeseries), fmt='%5.5f', header=RgSqheader) #do stats on Rg^2 file = open('RgSqTimeSeries' + Ext, 'r') if autowarmup: warmup, Data, nwarmup = stats.autoWarmupMSER(file, j + 1) #print ("Auto warmup detection with MSER-5 => ",nwarmup) else: warmup, Data = stats.extractData(file, j + 1, nwarmup) (nsamples, (min, max), mean, semcc, kappa, unbiasedvar, autocor) = stats.doStats(warmup, Data, False, False, '_{0}_mol{1}'.format(file.name, j + 1)) Data = Data[::int(np.max([1., kappa]))] # get decorrelated samples RgSqList.extend(Data) lines = "" lines += '\n==== Rg^2 for molecule {} ===='.format(j + 1) lines += "\n - Mean = {} +/- {}".format( mean, semcc) lines += "\n - Equilibrated samples = {}".format(nsamples) lines += "\n - Correlation time = {}".format(kappa) lines += "\n - Effective # samples = {}".format(nsamples / kappa) lines += "\n - Reduced-bias variance = {}".format(unbiasedvar) # note that there is no unbiased estimator for the population standard deviation. We can use sqrt(var) as a indicative estimator. lines += "\n - S.D. (unbiased, biased) = {} {}".format( np.sqrt(unbiasedvar), np.std(Data, ddof=0) ) # ddof is correction to 1/N...using ddof=1 returns regular reduced-bias estimator lines += "\n - Min, Max = {} {}\n".format(min, max) txtRg += lines Avg = mean Std = np.sqrt(unbiasedvar) Err = semcc CorrTime = kappa NUncorrSamples = nsamples / kappa RgSqStats.append([Avg, Std, CorrTime, Err, NUncorrSamples]) ''' Plot Rg ''' if plot: plt.axvspan(0, nwarmup, alpha=0.5, color='#6495ED') plt.plot(Rg, "k-") plt.xlim(0) plt.xlabel('timestep') plt.ylabel('Radius-of-gryation') plt.savefig("{}/Rg{}.png".format(plotDir, j + 1), bbox_inches='tight') plt.close() ''' Rg of blocks ''' if block: Rg_b = [] for i, ai in enumerate(atom_indices_b): Rg_tmp = md.compute_rg(traj.atom_slice(ai), masses=mass_list_b[i]) Rg_b.append(Rg_tmp) Rg_b = np.array(Rg_b) RgSq_b = Rg_b**2. for i, RgSq in enumerate(RgSq_b): data = [range(0, len(RgSq))] data.append(RgSq.tolist()) np.savetxt('tmp.dat', np.transpose(data), fmt='%5.5f') #do stats on Rg^2 file = open('tmp.dat', 'r') if autowarmup: warmup, Data, nwarmup = stats.autoWarmupMSER(file, 1) else: warmup, Data = stats.extractData(file, 1, nwarmup) (nsamples, (min, max), mean, semcc, kappa, unbiasedvar, autocor) = stats.doStats(warmup, Data, False, False, '_{0}_mol{1}'.format(file.name, 1)) Data = Data[::int(np.max([1., kappa ]))] # get decorrelated samples RgSqList_b[i].extend(Data) Avg = mean Std = np.sqrt(unbiasedvar) Err = semcc CorrTime = kappa NUncorrSamples = nsamples / kappa RgSqStats_b[i].append( [Avg, Std, CorrTime, Err, NUncorrSamples]) os.remove("tmp.dat") '''=== Compute Ree ===''' atom_pairs = [np.min(atom_indices), np.max(atom_indices)] Ree = md.compute_distances(traj, atom_pairs=[atom_pairs], periodic=False, opt=True) Ree = Ree.tolist() Ree = [a[0] for a in Ree] ReeTimeseries.append(Ree) Reeheader += 'Ree{} '.format(j + 1) np.savetxt(ReeDatName + Ext, np.transpose(ReeTimeseries), fmt='%5.5f', header=Reeheader) ReeSq = np.array(Ree)**2. ReeSqTimeseries.append(ReeSq.tolist()) Reeheader += 'Ree{}^2 '.format(j + 1) np.savetxt('ReeSqTimeSeries' + Ext, np.transpose(ReeSqTimeseries), fmt='%5.5f', header=ReeSqheader) #do stats on Ree^2 file = open('ReeSqTimeSeries' + Ext, 'r') if autowarmup: warmup, Data, nwarmup = stats.autoWarmupMSER(file, j + 1) #print ("Auto warmup detection with MSER-5 => ",nwarmup) else: warmup, Data = stats.extractData(file, j + 1, nwarmup) (nsamples, (min, max), mean, semcc, kappa, unbiasedvar, autocor) = stats.doStats(warmup, Data, False, False, '_{0}_mol{1}'.format(file.name, j + 1)) Data = Data[::int(np.max([1., kappa]))] ReeSqList.extend(Data) lines = "" lines += '\n==== Ree^2 for molecule {} ===='.format(j + 1) lines += "\n - Mean = {} +/- {}".format( mean, semcc) lines += "\n - Equilibrated samples = {}".format(nsamples) lines += "\n - Correlation time = {}".format(kappa) lines += "\n - Effective # samples = {}".format(nsamples / kappa) lines += "\n - Reduced-bias variance = {}".format(unbiasedvar) # note that there is no unbiased estimator for the population standard deviation. We can use sqrt(var) as a indicative estimator. lines += "\n - S.D. (unbiased, biased) = {} {}".format( np.sqrt(unbiasedvar), np.std(Data, ddof=0) ) # ddof is correction to 1/N...using ddof=1 returns regular reduced-bias estimator lines += "\n - Min, Max = {} {}\n".format(min, max) txtRg += lines Avg = mean Std = np.sqrt(unbiasedvar) Err = semcc CorrTime = kappa NUncorrSamples = nsamples / kappa ReeSqStats.append([Avg, Std, CorrTime, Err, NUncorrSamples]) ''' Plot Ree ''' if plot: plt.axvspan(0, nwarmup, alpha=0.5, color='#6495ED') plt.plot(Ree, "k-") plt.xlim(0) plt.xlabel('timestep') plt.ylabel('End-to-end distance') plt.savefig("{}/Ree{}.png".format(plotDir, j + 1), bbox_inches='tight') plt.close() # get RMS Rg and Ree RgSqList = np.array(RgSqList) RgRMS = np.sqrt(np.mean(RgSqList)) RgSqErr = scipy.stats.sem(RgSqList) RgRMSErr = 1. / 2. * RgSqErr / RgRMS # propagate SEM of Rg^2 to Rg RgSqStd = np.std(RgSqList, ddof=1) RgRMSStd = 1. / 2. * RgSqStd / RgRMS # propagate Std of Rg^2 to Rg RgSqStats = np.array(RgSqStats) RgRMSCorrTime = np.mean(RgSqStats[:, 2]) RgRMSCorrTimeErr = np.sqrt(np.var(RgSqStats[:, 2]) / len(RgSqStats[:, 2])) RgRMSNUncorrSamples = np.mean(RgSqStats[:, 4]) #Rg of blocks RgRMS_b = [] RgRMSErr_b = [] RgRMSStd_b = [] RgRMSCorrTime_b = [] RgRMSCorrTimeErr_b = [] RgRMSNUncorrSamples_b = [] if block: for i, resname in enumerate(BlockResName): RgSqList = np.array(RgSqList_b[i]) RgRMS_b.append(np.sqrt(np.mean(RgSqList))) Err = scipy.stats.sem(RgSqList) RgRMSErr_b.append(1. / 2. * Err / RgRMS_b[i]) Std = np.std(RgSqList, ddof=1) RgRMSStd_b.append(1. / 2. * Std / RgRMS_b[i]) RgSqStats = np.array(RgSqStats_b[i]) RgRMSCorrTime_b.append(np.mean(RgSqStats[:, 2])) RgRMSCorrTimeErr_b.append( np.sqrt(np.var(RgSqStats[:, 2]) / len(RgSqStats[:, 2]))) RgRMSNUncorrSamples_b.append(np.mean(RgSqStats[:, 4])) #Ree ReeSqList = np.array(ReeSqList) ReeRMS = np.sqrt(np.mean(ReeSqList)) ReeSqErr = scipy.stats.sem(ReeSqList) ReeRMSErr = 1. / 2. * ReeSqErr / ReeRMS ReeSqStd = np.std(ReeSqList, ddof=1) ReeRMSStd = 1. / 2. * ReeSqStd / ReeRMS ReeSqStats = np.array(ReeSqStats) ReeRMSCorrTime = np.mean(ReeSqStats[:, 2]) ReeRMSCorrTimeErr = np.sqrt( np.var(ReeSqStats[:, 2]) / len(ReeSqStats[:, 2])) ReeRMSNUncorrSamples = np.mean(ReeSqStats[:, 4]) lines = "" lines += '\n\n=====================' lines += '\n\nRMS of Rg is: {0:2.4f} +/- {1:2.5f}'.format(RgRMS, RgRMSErr) lines += '\nRMS Rg correlation time: {0:5.4f} +/- {1:5.6f}'.format( RgRMSCorrTime, RgRMSCorrTimeErr) lines += '\n\nRMS of Ree is: {0:2.4f} +/- {1:2.5f}'.format( ReeRMS, ReeRMSErr) lines += '\nRMS Ree correlation time: {0:5.4f} +/- {1:5.6f}'.format( ReeRMSCorrTime, ReeRMSCorrTimeErr) if block: for i, resname in enumerate(BlockResName): lines += '\n\nRMS of Rg for block %i-%s is: %2.4f +/- %2.5f' % ( i + 1, resname, RgRMS_b[i], RgRMSErr_b[i]) lines += '\nRMS Rg correlation time: {0:5.4f} +/- {1:5.6f}'.format( RgRMSCorrTime_b[i], RgRMSCorrTimeErr_b[i]) print(lines + '\n') txtRg += lines f = open(RgStatOutName + Ext, 'w') f.write(txtRg) return RgRMS, ReeRMS, RgRMSErr, ReeRMSErr, RgRMSCorrTime, RgRMSCorrTimeErr, RgRMSNUncorrSamples, ReeRMSCorrTime, ReeRMSCorrTimeErr, ReeRMSNUncorrSamples, RgRMSStd, ReeRMSStd, RgRMS_b, RgRMSErr_b, RgRMSStd_b, RgRMSCorrTime_b, RgRMSCorrTimeErr_b, RgRMSNUncorrSamples_b, BlockResName
help='Filename containing scalar statistical data.') parser.add_argument('-tol', '--tolerance', default=1e-6, type=float, help='Tolerance for Optimizer') args = parser.parse_args() # in dH file dH model0 Re Im | dH model1 Re Im #ie. Delta H_{BA} = H_B(w^A)-H_A(w^A) #ie. Delta H_{AB} = H_A(w^B)-H_B(w^B) columns = [1, 2, 3, 4] prodlist = [] for col in columns: warmup, proddata, nwarmup = autoWarmupMSER(args.file, col) prodlist.append(proddata) Refun = CostStuff() Refun.H_AB = prodlist[0] Refun.H_BA = prodlist[2] Re_dict = minimize(Refun.CostFunction, x0=1, tol=args.tolerance) Imfun = CostStuff() Imfun.H_AB = prodlist[1] Imfun.H_BA = prodlist[3] Im_dict = minimize(Imfun.CostFunction, x0=1, tol=args.tolerance) if Re_dict.status + Im_dict.status == 0: print(f'Free Energy: {Re_dict.x[0]} (Re) {Im_dict.x[0]} (Im)') else: print('Optimizer Problems')
else: ini = re.sub('__NumBlocks__', str(numBlocks), ini) ini = re.sub('__ReadField__', 'Yes', ini) runfile = open("run.in", "w") runfile.write(ini) runfile.close() call('{} run.in > run.out'.format(fts), shell=True) # Data analysis: # Partition 1 #datafile = open('model1_operators.dat','r') datafile = open('operators.dat', 'r') # ReP1 try: warmup, Data, nwarmup = stats.autoWarmupMSER(datafile, 3) except: print("Failed on ReP1") break (nsamples, (min, max), P1, P1err, kappa, unbiasedvar, autocor) = stats.doStats(warmup, Data, False) datafile.seek(0) # ImP1 try: warmup, Data, nwarmup = stats.autoWarmupMSER(datafile, 4) except: print("Failed on ImP1") break (nsamples, (min, max), imP1, imP1err, kappa, unbiasedvar, autocor) = stats.doStats(warmup, Data, False) datafile.seek(0)