-
Notifications
You must be signed in to change notification settings - Fork 5
/
compute-pmf-externalevs.py
831 lines (786 loc) · 43.8 KB
/
compute-pmf-externalevs.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
# This script can be used to compute 1D and 2D PMFs, as well as the free energy and expectation
# values of predefined clusters.
# 1/30/14 Extending to be able to perform free energy perturbation calculations.
# This script uses the pymbar package: https://github.com/choderalab/pymbar
# To learn how to use the script: $ python ./compute-pmf.py -help
import numpy
numpy.set_printoptions(threshold=numpy.nan) # output full matrices when printing
import math
import sys
import os
import pickle # used to save files so that the initialization calculations don't have to be repeated
#import pymbar # used to do the computations of free energy
#sys.path.append('C:\\Users\\Dell\\Anaconda\\Lib\\site-packages\\pymbar\\')
#sys.path.append('/home/hht1/programs/anaconda/pkgs/pymbar-3.0.0.beta2-np19py27_1/lib/python2.7/site-packages/pymbar-3.0.0.dev0-py2.7-linux-x86_64.egg/pymbar')
#sys.path.append('/home/hht1/programs/anaconda/lib/python2.7/site-packages/pymbar-3.0.0.dev0-py2.7-linux-x86_64.egg/pymbar/')
#sys.path.append('/home/hht1/CODES/pymbar-2.1.0-beta/build/lib.linux-x86_64-2.7/pymbar')
#sys.path.append('/home/wl45/pymbar/build/lib.linux-ppc64-2.7')
sys.path.append('/home/wl45/python/lib/python2.7/site-packages/pymbar-3.0.0.dev0-py2.7-linux-ppc64.egg/pymbar')
import pymbar
import timeseries # used to subsample data so that the samples are uncorrelated
# Flags
debug_flag = False
check_for_pickle_files = True
# Command line arguments
if len(sys.argv) == 1:
print "#####################################################################################"
print "# If you don't enter any command line arguments, the script will run with defaults. #"
print "# You can use $ python ./compute-pmf.py -help to see the command line arguments. #"
print "#####################################################################################"
# Set default command line arguments
submit_to_cluster = True # flag for submitting calculation to a cluster (eliminates interactivity)
precision_threshold = 10e-13 # threshold to determine whether or not to attempt to compute expectation values
subsample_trajectories = False # flag for using pymbar's built-in subsampling features
compute_per_bin_quantities = False # flag for computing per-bin expectation values
kB = 1.381e-23 * 6.022e23 / 4184.0 # Boltzmann constant in kcal/mol/K
ndim = 2 # dimension of the pmf, can be 1 or 2
metadata_file = 'metadatafile' # name of the metadata file
biasing_variable_column = 5 # column in the trajectory file that contains the biasing variable information
energy_column = 4 # column in the trajectory file that contains the total (unbiased) potential energy
pmf_variable_column_1 = 1 # column in the trajectory file that contains the first pmf variable
pmf_variable_column_2 = 2 # column in the trajectory file that contians the second pmf variable
nbins1 = 50 # number of bins for the first pmf variable
nbins2 = 50 # number of bins for the second pmf variable
start_temperature = 200 # temperature at which to start the free energy calculations
end_temperature = 400 # temperature at which to stop the free energy calculations
temperature_increment = 20 # how often (in degrees) to compute the free energy
N_samples = 2000 # number of correlated samples per simulation
expectation_columns = [] # an array of column numbers for which to compute expectation values
expectation_files = [] # an array containing the names of the files that the expectation value data should be read from
pmf_variable_column_1_file = 'DEFAULT'
pmf_variable_column_2_file = 'DEFAULT'
expectation_data = [] # an array of numpy arrays to hold data used to compute expectation values
fep_columns = [] # an array of column numbers for which to compute free energy perturbations
perturb_from_columns = [] # an array of column numbers which tell MBAR how to perturb away from a particular energy column
fep_data = [] # an array of numpy arrays to hold data used to compute free energy perturbations
cluster_binning = False # True if you are using pre-assigned clusters to bin the data
cluster_bin_map = [] # Used to map cluster indices
nbiases = 1 # Number of biases applied during the simulations
nperturbations = 0 # Number of perturbed Hamiltonians to evaluate
biasing_variable_columns = [] # List of biasing variable columns in data file
biasing_variable_columns.append(biasing_variable_column)
specified_a_biasing_variable_column = False
perturbation_columns = [] # List of columns in data file with perturbed Hamiltonian energies
biasing_variable_kt = [] # List of numpy arrays containing biasing variables for all snapshots
biasing_variable_kn = [] # List of numpy arrays containing biasing variables for all snapshots (used to store subsampled data)
U_kt = [] # List of numpy arrays containing perturbed energies for all snapshots
U_kn = [] # List of numpy arrays containing perturbed energies for all snapshots (used to store subsampled data)
u_kn = [] # List of numpy arrays containing reduced perturbed energies for all snapshots (used to store subsampled data)
# Read command line arguments
for arg in range(len(sys.argv)):
if arg % 2 == 0:
continue
if sys.argv[arg] == "-kb":
kB = float(sys.argv[arg+1])
elif sys.argv[arg] == "-m":
metadata_file = sys.argv[arg+1]
elif sys.argv[arg] == "-ev":
ev_argument = sys.argv[arg+1]
if ':' in ev_argument:
ev_file, ev_columns = ev_argument.split(':')
else:
ev_file = 'DEFAULT'
ev_columns = ev_argument
if '-' in ev_columns:
ev_columns = ev_columns.split('-')
starting_column = int(ev_columns[0])
ending_column = int(ev_columns[1])
for i in range(starting_column, ending_column+1):
expectation_columns.append(i)
expectation_files.append(ev_file)
else:
expectation_columns.append(int(ev_columns))
expectation_files.append(ev_file)
elif sys.argv[arg] == "-fep":
argument_to_split = sys.argv[arg+1]
fep_column, perturb_from_column = argument_to_split.split(",")
fep_columns.append(int(fep_column))
perturb_from_columns.append(int(perturb_from_column))
elif sys.argv[arg] == "-b":
# Clear the default biasing variable column if you explicitly specify a column
if specified_a_biasing_variable_column == False:
specified_a_biasing_variable_column = True
biasing_variable_columns = []
biasing_variable_column = int(sys.argv[arg+1])
biasing_variable_columns.append(biasing_variable_column)
elif sys.argv[arg] == "-e":
energy_column = int(sys.argv[arg+1])
elif sys.argv[arg] == "-d":
ndim = int(sys.argv[arg+1])
if ndim !=1 and ndim !=2:
print "ndim must be either 1 or 2"
sys.exit()
elif sys.argv[arg] == "-v1":
pmf_variable_column_1 = sys.argv[arg+1]
if ':' in pmf_variable_column_1:
pmf_variable_column_1_file, pmf_variable_column_1 = pmf_variable_column_1.split(':')
pmf_variable_column_1 = int(pmf_variable_column_1)
else:
pmf_variable_column_1_file = 'DEFAULT'
pmf_variable_column_1 = int(pmf_variable_column_1)
elif sys.argv[arg] == "-v1n":
nbins1 = int(sys.argv[arg+1])
elif sys.argv[arg] == "-v2":
pmf_variable_column_2 = sys.argv[arg+1]
if ':' in pmf_variable_column_2:
pmf_variable_column_2_file, pmf_variable_column_2 = pmf_variable_column_2.split(':')
pmf_variable_column_2 = int(pmf_variable_column_2)
else:
pmf_variable_column_2_file = 'DEFAULT'
pmf_variable_column_2 = int(pmf_variable_column_2)
elif sys.argv[arg] == "-v2n":
nbins2 = int(sys.argv[arg+1])
elif sys.argv[arg] == "-st":
start_temperature = int(sys.argv[arg+1])
elif sys.argv[arg] == "-et":
end_temperature = int(sys.argv[arg+1])
elif sys.argv[arg] == "-ti":
temperature_increment = int(sys.argv[arg+1])
elif sys.argv[arg] == "-nsamples":
N_samples = int(sys.argv[arg+1])
elif sys.argv[arg] == "-ss":
ss = sys.argv[arg+1]
if ss != 'y' and ss != 'n':
print "subsample_trajectories must be either y or n"
sys.exit()
if ss == 'y':
subsample_trajectories = True
elif ss == 'n':
subsample_trajectories = False
elif sys.argv[arg] == "-pb":
pb = sys.argv[arg+1]
if pb != 'y' and pb != 'n':
print "compute_per_bin_quantities must be either y or n"
sys.exit()
if pb == 'y':
compute_per_bin_quantities = True
elif pb == 'n':
compute_per_bin_quantities = False
elif sys.argv[arg] == "-submit":
submit = sys.argv[arg+1]
if submit != 'y' and submit != 'n':
print "submit must be either y or n"
sys.exit()
if submit == 'y':
submit_to_cluster = True
elif submit == 'n':
submit_to_cluster = False
elif sys.argv[arg] == "-clustering":
if sys.argv[arg+1] == 'y':
cluster_binning = True
elif sys.argv[arg] == "-p":
perturbation_columns.append(int(sys.argv[arg+1]))
elif sys.argv[arg] == "-help":
print "Usage: $ python ./compute-pmf.py \\ \n -kb boltzmann_constant (default=%f)\\ \n -m metadatafile (default=%s)\\ \n -b biasing_variable_column (default=%d)\\ \n -e energy_column (default=%d)\\ \n -d pmf_dimension (default=%d)\\ \n -v1 pmf_variable_column_1 (default=%d)\\ \n -v1n pmf_variable_nbins_1 (default=%d)\\ \n -v2 pmf_variable_column_2 (default=%d)\\ \n -v2n pmf_variable_nbins_2 (default=%d)\\ \n -st start_temperature (default=%d)\\ \n -et end_temperature (default=%d)\\ \n -ti temperature_increment (default=%d)\\ \n -nsamples N_samples (default=%d)\\ \n -ev column_number_to_compute_expection_value (no default) \\ \n -pb compute_per_bin_quantities (default=%s)\\ \n -ss subsample_trajectories (default=%s)\\ \n -submit submit_to_cluster (default=%s)\\ \n -clustering cluster_binning (default=%s)\\ \n -p perturbation_column (default=None)\\ \n" % (kB, metadata_file, biasing_variable_column, energy_column, ndim, pmf_variable_column_1, nbins1, pmf_variable_column_2, nbins2, start_temperature, end_temperature, temperature_increment, N_samples, compute_per_bin_quantities, subsample_trajectories, submit_to_cluster, cluster_binning)
print "All command line arguments are optional and can be specified in an arbitrary order on a single line. To change the defaults, use a text editor to edit the script."
sys.exit()
else:
print "I don't recognize %s as an argument. Use -help to see the proper usage." % sys.argv[arg]
sys.exit()
nbiases = len(biasing_variable_columns)
nperturbations = len(perturbation_columns)
print "Using Boltzmann constant value kB=%f" % kB
print "Using metadata file %s" % metadata_file
print "Getting %d biasing variable(s) data from column(s) %s" % (nbiases, biasing_variable_columns)
print "Performing %d perturbation calculations using data from column(s) %s" % (nperturbations, perturbation_columns)
print "Getting energy data from column %d" % (energy_column)
print "Getting pmf variable 1 data from column %d" % (pmf_variable_column_1)
print "Using %d bins to bin pmf variable 1" % nbins1
if ndim == 2:
print "Getting pmf variable 2 data from column %d" % (pmf_variable_column_2)
print "Using %d bins to bin pmf variable 2" % nbins2
print "Starting from a temperature %d K and ending at a temperature %d K and using a temperature increment of %d K." % (start_temperature, end_temperature, temperature_increment)
print "#############################################################"
print "# Warning: you must have exactly %d samples per simulation #" % N_samples
print "#############################################################"
if subsample_trajectories == True:
print "Subsampling of trajectories is ON."
elif subsample_trajectories == False:
print "Subsampling of trajectories is OFF."
if len(expectation_columns) > 0:
print "Calculating expectation values for the following columns: %s \n From files: %s" % (str(expectation_columns), str(expectation_files))
if len(fep_columns) > 0:
print "Calculating free energy perturbation values for the following columns: %s" % str(fep_columns)
if compute_per_bin_quantities == True:
print "Computing per bin quantities is ON."
elif compute_per_bin_quantities == False:
print "Computing per bin quantities is OFF."
if submit_to_cluster == False:
while 1:
answer = raw_input("Would you like to continue? (y/n)\n")
if answer == 'y':
break
elif answer == 'n':
print "Okay. Exiting."
sys.exit()
# Check to see if you've already generated an mbar.pkl file and offer to reuse it to save time
load_pickle = False
if check_for_pickle_files:
if os.path.exists('./mbar.pkl'):
while 1:
answer = raw_input("mbar.pkl exists. Would you like to load the file (and skip the initialization of mbar)? y/n\n")
if answer == 'y':
load_pickle = True
break
elif answer == 'n':
load_pickle = False
break
elif submit_to_cluster == True:
# Check to see if you've already generated an mbar.pkl file and reuse it to save time
load_pickle = False
if check_for_pickle_files:
if os.path.exists('./mbar.pkl'):
load_pickle = True
print "Using the existing mbar.pkl; skipping the initialization of the calculation."
else:
load_pickle = False
print "Could not find an existing mbar.pkl; starting the initialization of the calculation."
# If not loading pickle files, start the intitialization calculation
if load_pickle == False:
# Lists
files = [] # list of trajectory files
temperatures = [] # list of sampling temperatures
biasing_strengths = [] # list of biasing strengths
biasing_values = [] # list of biasing center values
for i in range(nbiases):
biasing_strengths.append([])
biasing_values.append([])
# Read metadata file
metadata = open(metadata_file, 'r')
for line in metadata:
column_index = 0
if line[0] == '#':
continue
line = line.split()
files.append(line[column_index])
column_index += 1
temperatures.append(float(line[column_index]))
column_index += 1
for i in range(nbiases):
biasing_strengths[i].append(float(line[column_index]))
column_index += 1
biasing_values[i].append(float(line[column_index]))
column_index += 1
metadata.close()
if debug_flag:
print "Information read from metadata file:"
print "Files:"
print files
print "Temperatures:"
print temperatures
print "Biasing strengths:"
print biasing_strengths
print "Biasing values:"
print biasing_values
# Parameters
K = len(files) # number of simulations
# Allocate storage for simulation data
N_k = numpy.zeros([K], numpy.int32) # N_k[k] is the number of snapshots from umbrella simulation k
for i in range(nbiases):
biasing_variable_kt.append(numpy.zeros([K,N_samples], numpy.float64))
for i in range(nperturbations+1):
U_kt.append(numpy.zeros([K,N_samples], numpy.float64))
pmf_variable_kt_1 = numpy.zeros([K,N_samples], numpy.float64) # pmf_variable_kt_1[k,t] is the value of the first pmf variable from snapshot t of simulation k
if ndim == 2:
pmf_variable_kt_2 = numpy.zeros([K,N_samples], numpy.float64) # pmf_variable_kt_2[k,t] is the value of the second pmf variable from snapshot t of simulation k
cluster_bin_kt = -1*numpy.ones([K,N_samples], numpy.int32) # cluster_bin_kt[k,t] is the bin of snapshot t of umbrella simulation k
if (len(expectation_columns) > 0):
for i in range(len(expectation_columns)):
expectation_data.append(numpy.zeros([K,N_samples], numpy.float64))
if (len(fep_columns) > 0):
for i in range(len(fep_columns)):
fep_data.append(numpy.zeros([K,N_samples], numpy.float64))
# Read the correlated simulation data.
print "Reading simulation data from default files..."
k =0 # simuluation index
for trajectory_file in files:
print "Reading file %s ..." % trajectory_file
kT = kB * temperatures[k] # thermal energy
beta = 1.0 / kT # inverse temperature
t = 0 # time step index
for line in open(trajectory_file, 'r'):
if line[0] == '#':
continue
line = line.split()
# Store order parameters.
# print line
# print nbiases
for i in range(nbiases):
# print i
# print biasing_variable_columns[i]
# print float(line[biasing_variable_columns[i]-1])
# print k, t
biasing_variable_kt[i][k,t] = float(line[biasing_variable_columns[i]-1])
if pmf_variable_column_1_file == 'DEFAULT':
pmf_variable_kt_1[k,t] = float(line[pmf_variable_column_1-1])
if ndim == 2:
if pmf_variable_column_2_file == 'DEFAULT':
pmf_variable_kt_2[k,t] = float(line[pmf_variable_column_2-1])
U_kt[0][k,t] = float(line[energy_column-1])
for i in range(nperturbations):
U_kt[i+1][k,t] = float(line[perturbation_columns[i]-1])
if len(expectation_columns) > 0:
for i in range(len(expectation_columns)):
if expectation_files[i] == 'DEFAULT':
expectation_data[i][k,t] = float(line[expectation_columns[i]-1])
if len(fep_columns) > 0:
for i in range(len(fep_columns)):
fep_data[i][k,t] = float(line[perturb_from_columns[i]-1])-float(line[fep_columns[i]-1])
t += 1 # increment time step index
k += 1 # increment simulation index
# Read the correlated simulation data from other files
print "Reading simulation data from other files..."
k = 0 # simuluation index
expectation_files_set = list(set(expectation_files))
for trajectory_file in files:
directory = os.path.dirname(os.path.realpath(trajectory_file))
print "Reading extra files for the trajectory file %s ..." % trajectory_file
if pmf_variable_column_1_file != 'DEFAULT':
t = 0 # time step index
for line_index, line in enumerate(open(os.path.join(directory,pmf_variable_column_1_file), 'r')):
if line_index == 0:
print "WARNING: Skipping first line of %s by default." % pmf_variable_column_1_file
continue
line = line.split()
# Store order parameters.
pmf_variable_kt_1[k,t] = float(line[pmf_variable_column_1-1])
t += 1 # increment time step index
if pmf_variable_column_2_file != 'DEFAULT':
t = 0 # time step index
for line_index, line in enumerate(open(os.path.join(directory,pmf_variable_column_2_file), 'r')):
if line_index == 0:
print "WARNING: Skipping first line of %s by default." % pmf_variable_column_2_file
continue
line = line.split()
# Store order parameters.
pmf_variable_kt_2[k,t] = float(line[pmf_variable_column_2-1])
t += 1 # increment time step index
for expectation_file in expectation_files_set:
t = 0 # time step index
for line_index, line in enumerate(open(os.path.join(directory,expectation_file), 'r')):
if line_index == 0:
print "WARNING: Skipping first line of %s by default." % expectation_file
continue
line = line.split()
# Store order parameters.
if len(expectation_columns) > 0:
for i in range(len(expectation_columns)):
if expectation_file == expectation_files[i]:
expectation_data[i][k,t] = float(line[expectation_columns[i]-1])
t += 1 # increment time step index
k += 1 # increment simulation index
# If using clusters, read the correlated cluster bin indices
if cluster_binning:
# k == trajectory index
k = 0
for trajectory_file in files:
cluster_file = "/".join(trajectory_file.split("/")[:-1])+"/cluster.dat"
if not os.path.exists(cluster_file):
print "%s does not exist. Exiting." % cluster_file
sys.exit()
f = open(cluster_file, "r")
# n == snapshot index
n = 0
for line in f:
line = line.split()
if line[0] == "#": continue
cluster_bin_kt[k,n] = int(line[0])
n += 1
f.close()
k += 1
# Subsample data.
print "Subsampling data..."
for i in range(nbiases):
biasing_variable_kn.append(numpy.zeros([K,N_samples], numpy.float64)) # biasing_variable_kn[k,n] is the value of the biasing variable from snapshot n of simulation k
for i in range(nperturbations+1):
U_kn.append(numpy.zeros([K,N_samples], numpy.float64)) # biasing_variable_kn[k,n] is the value of the biasing variable from snapshot n of simulation k
if not cluster_binning:
pmf_variable_kn_1 = numpy.zeros([K,N_samples], numpy.float64) # pmf_variable_kn_1[k,n] is the value of the first pmf variable from snapshot n of simulation k
if ndim == 2:
pmf_variable_kn_2 = numpy.zeros([K,N_samples], numpy.float64) # pmf_variable_kn_2[k,n] is the value of the second pmf variable from snapshot n of simulation k
else:
cluster_bin_kn = -1*numpy.ones([K,N_samples], numpy.int32) # cluster_bin_kn[k,n] is the cluster bin index of snapshot n of umbrella simulation k
N_k = numpy.zeros([K], numpy.int32) # N_k[k] is the number of uncorrelated samples from simulation index k
reduced_expectation_data = []
if len(expectation_columns) > 0:
for i in range(len(expectation_columns)):
reduced_expectation_data.append(numpy.zeros([K,N_samples], numpy.float64))
reduced_fep_data = []
if len(fep_columns) > 0:
for i in range(len(fep_columns)):
reduced_fep_data.append(numpy.zeros([K,N_samples], numpy.float64))
for k in range(K):
# Extract timeseries.
A_t = biasing_variable_kt[0][k,:]
# Compute statistical inefficiency.
try:
g = timeseries.statisticalInefficiency(A_t)
except Exception as e:
print str(e)
print A_t
# Subsample data.
if subsample_trajectories:
indices = timeseries.subsampleCorrelatedData(A_t, g=g)
else:
indices = timeseries.subsampleCorrelatedData(A_t, g=1)
N = len(indices) # number of uncorrelated samples
print "k = %5d : g = %.1f, N = %d" % (k, g, N)
for i in range(nbiases):
biasing_variable_kn[i][k,0:N] = biasing_variable_kt[i][k,indices]
for i in range(nperturbations+1):
U_kn[i][k,0:N] = U_kt[i][k,indices]
if not cluster_binning:
pmf_variable_kn_1[k,0:N] = pmf_variable_kt_1[k,indices]
if ndim == 2:
pmf_variable_kn_2[k,0:N] = pmf_variable_kt_2[k,indices]
if cluster_binning:
cluster_bin_kn[k,0:N] = cluster_bin_kt[k,indices]
if len(expectation_columns) > 0:
for i in range(len(expectation_columns)):
reduced_expectation_data[i][k,0:N] = expectation_data[i][k,indices]
if len(fep_columns) > 0:
for i in range(len(fep_columns)):
reduced_fep_data[i][k,0:N] = fep_data[i][k,indices]
N_k[k] = N # number of uncorrelated samples
# Find unique cluster indices
if cluster_binning:
cluster_bin_map = numpy.unique(cluster_bin_kn)
cluster_bin_map = cluster_bin_map[cluster_bin_map > 0]
print "Clusters found: " + str(cluster_bin_map)
# Compute reduced potentials from all simulations in all thermodynamic states.
print "Computing reduced potentials..."
u_kln = numpy.zeros([K,K,N_samples], numpy.float64) # u_kln[k,l,n] is reduced biased potential of uncorrelated snapshot n from simulation k in thermodynamic state l
for k in range(K):
N = N_k[k] # number of uncorrelated snapshots
for l in range(K):
# Compute reduced potential in all other temperatures and biasing potentials indexed by l.
kT = kB * temperatures[l] # thermal energy
beta = 1.0 / kT # inverse temperature
u_kln[k,l,0:N] = beta * (U_kn[0][k,0:N])
for i in range(nbiases):
U_bias = (biasing_strengths[i][l]/2.0) * (biasing_variable_kn[i][k,0:N] - biasing_values[i][l])**2 # biasing potential for this sample
u_kln[k,l,0:N] += beta * (U_bias)
# Bin data
print "Binning data..."
bin_kn = -1 * numpy.ones([K,N_samples], numpy.int32) # bin_kn[k,n] is bin index of sample n from simulation k; otherwise -1
if not cluster_binning:
# Construct PMF bins in pmf_variable
pmf_variable_min_1 = pmf_variable_kt_1.min()
pmf_variable_max_1 = pmf_variable_kt_1.max()
pmf_variable_bin_width_1 = (pmf_variable_max_1 - pmf_variable_min_1) / nbins1
if ndim == 2:
pmf_variable_min_2 = pmf_variable_kt_2.min()
pmf_variable_max_2 = pmf_variable_kt_2.max()
pmf_variable_bin_width_2 = (pmf_variable_max_2 - pmf_variable_min_2) / nbins2
# Bin data
bin_centers = list() # bin_centers[i] is the center of bin i
if ndim == 1:
for i in range(nbins1):
bin_center_1 = pmf_variable_min_1 + pmf_variable_bin_width_1 * (i + 0.5)
bin_centers.append( bin_center_1 )
elif ndim == 2:
for i in range(nbins1):
for j in range(nbins2):
bin_center_1 = pmf_variable_min_1 + pmf_variable_bin_width_1 * (i + 0.5)
bin_center_2 = pmf_variable_min_2 + pmf_variable_bin_width_2 * (j + 0.5)
bin_centers.append( (bin_center_1, bin_center_2) )
if ndim == 1:
for k in range(K):
N = N_k[k]
# Compute bin assignment for 1D case.
bin_kn[k,0:N] = numpy.int32((pmf_variable_kn_1[k,0:N] - pmf_variable_min_1) / pmf_variable_bin_width_1) # pmf bin index 0..(nbins-1)
elif ndim == 2:
for k in range(K):
N = N_k[k]
# Compute bin assignment for 2D case.
bin_1 = numpy.int32((pmf_variable_kn_1[k,0:N] - pmf_variable_min_1) / pmf_variable_bin_width_1) # pmf bin index 0..(nbins-1)
bin_2 = numpy.int32((pmf_variable_kn_2[k,0:N] - pmf_variable_min_2) / pmf_variable_bin_width_2) # pmf bin index 0..(nbins-1)
bin_kn[k,0:N] = bin_1 * nbins2 + bin_2
else:
for k in range(K):
N = N_k[k]
for n in range(N):
bin_kn[k,n] = cluster_bin_map[numpy.where(cluster_bin_map == cluster_bin_kn[k,n])[0][0]]
# Make sure all bins are populated.
print "Counting number of samples per bin..."
if cluster_binning:
nbins = cluster_bin_map.size
bin_counts = numpy.zeros([nbins], numpy.int32)
for i in range(nbins):
bin_counts[i] = (bin_kn == cluster_bin_map[i]).sum()
else:
if ndim == 1:
nbins = nbins1
bin_counts = numpy.zeros([nbins], numpy.int32)
if ndim == 2:
nbins = nbins1*nbins2
bin_counts = numpy.zeros([nbins], numpy.int32)
for i in range(nbins):
bin_counts[i] = (bin_kn == i).sum()
print "Number of samples in each bin:"
print bin_counts
# Map to reduced bin counts to eliminate empty bins.
nonempty_bins = numpy.where(bin_counts > 0)[0]
print "Non-empty bins:"
print nonempty_bins
nreduced_bins = len(nonempty_bins)
reduced_bin_kn = -1 * numpy.ones([K,N_samples], numpy.int32) # reduced_bin_kn[k,n] is bin index of sample n from simulation k; otherwise -1
bin_map = [] # save original bin number so that you can map the free energy back to its original place
if cluster_binning:
for (i,j) in enumerate(nonempty_bins):
bin_map.append(j)
indices = numpy.where(bin_kn == cluster_bin_map[j])
reduced_bin_kn[indices] = i
else:
for (i,j) in enumerate(nonempty_bins):
bin_map.append(j)
indices = numpy.where(bin_kn == j)
reduced_bin_kn[indices] = i
# Build in_this_bin arrays, used for computing free energies and expectation values
in_this_bin = []
for i in range (nreduced_bins):
in_this_bin.append(numpy.where(reduced_bin_kn == i, 1.0, 0.0))
# Initialize MBAR.
print "Running MBAR..."
mbar = pymbar.MBAR(u_kln, N_k, verbose=True)
# Pickle files for later use
pickle.dump(mbar, open("mbar.pkl", "wb"))
pickle.dump(U_kn, open("Ukn.pkl", "wb"))
pickle.dump(nperturbations, open("nperturbations.pkl", "wb"))
pickle.dump(reduced_bin_kn, open("reducedbinkn.pkl", "wb"))
pickle.dump(nreduced_bins, open("nreducedbins.pkl", "wb"))
pickle.dump(K, open("k.pkl", "wb"))
pickle.dump(N_k, open("nk.pkl", "wb"))
pickle.dump(bin_map, open("binmap.pkl", "wb"))
pickle.dump(temperatures, open("temperatures.pkl", "wb"))
pickle.dump(bin_counts, open("bincounts.pkl", "wb"))
pickle.dump(reduced_expectation_data, open("reducedexpectationdata.pkl", "wb"))
pickle.dump(reduced_fep_data, open("reducedfepdata.pkl", "wb"))
pickle.dump(in_this_bin, open("inthisbin.pkl", "wb"))
pickle.dump(fep_columns, open("fepcolumns.pkl", "wb"))
pickle.dump(perturb_from_columns, open("perturbfromcolumns.pkl", "wb"))
if not cluster_binning:
pickle.dump(bin_centers, open("bincenters.pkl", "wb"))
pickle.dump(pmf_variable_kn_1, open("pmfvariablekn1.pkl", "wb"))
if ndim == 2:
pickle.dump(pmf_variable_kn_2, open("pmfvariablekn2.pkl", "wb"))
else:
pickle.dump(cluster_bin_map, open("clusterbinmap.pkl", "wb"))
else:
# Load files to save time
mbar = pickle.load(open("mbar.pkl", "rb"))
U_kn = pickle.load(open("Ukn.pkl", "rb"))
nperturbations = pickle.load(open("nperturbations.pkl", "rb"))
reduced_bin_kn = pickle.load(open("reducedbinkn.pkl", "rb"))
nreduced_bins = pickle.load(open("nreducedbins.pkl", "rb"))
K = pickle.load(open("k.pkl", "rb"))
N_k = pickle.load(open("nk.pkl", "rb"))
bin_map = pickle.load(open("binmap.pkl", "rb"))
temperatures = pickle.load(open("temperatures.pkl", "rb"))
bin_counts = pickle.load(open("bincounts.pkl", "rb"))
reduced_expectation_data = pickle.load(open("reducedexpectationdata.pkl", "rb"))
reduced_fep_data = pickle.load(open("reducedfepdata.pkl", "rb"))
in_this_bin = pickle.load(open("inthisbin.pkl", "rb"))
fep_columns = pickle.load(open("fepcolumns.pkl", "rb"))
perturb_from_columns = pickle.load(open("perturbfromcolumns.pkl", "rb"))
if not cluster_binning:
bin_centers = pickle.load(open("bincenters.pkl", "rb"))
pmf_variable_kn_1 = pickle.load(open("pmfvariablekn1.pkl", "rb"))
if ndim == 2:
pmf_variable_kn_2 = pickle.load(open("pmfvariablekn2.pkl", "rb"))
else:
cluster_bin_map = pickle.load(open("clusterbinmap.pkl", "rb"))
# Loop over original energy and any perturbed energies, if available
for perturbation_index in range(nperturbations+1):
# If this is the calculation using the original energies, do not attach a prefix to the files
if perturbation_index == 0:
file_prefix = ""
# If this is a calculation using perturbed energies, attach a prefix
else:
file_prefix = "perturbation-%s-" % str(perturbation_index)
print " ############################################################# "
print " # Computing pmfs and expectation values for perturbation %s #" % str(perturbation_index)
print " ############################################################# "
cv_file = file_prefix + "cv-%s-%s-%s.dat" % (str(start_temperature), str(end_temperature), str(temperature_increment))
cv_output_file = open(cv_file, 'w')
cv_output_file.write("# %10s %10s\n" % ('temperature', 'cv'))
expectation_file = file_prefix + "ev-%s-%s-%s.dat" % (str(start_temperature), str(end_temperature), str(temperature_increment))
ev_output_file = open(expectation_file, 'w')
ev_output_file.write("# %10s %10s\n" % ('temperature', 'expectation values'))
fep_file = file_prefix + "fep-%s-%s-%s.dat" % (str(start_temperature), str(end_temperature), str(temperature_increment))
fep_output_file = open(fep_file, 'w')
fep_output_file.write("# %10s %10s\n" % ('temperature', 'fep values'))
# Compute perturbed reduced potential at each temperature of interest in the absence of a biasing potential.
for target_temperature in range(start_temperature, end_temperature+1, temperature_increment):
pmf_file = file_prefix + "pmf-" + str(target_temperature) + ".dat"
pmf_output_file = open(pmf_file, 'w')
print "Computing perturbed reduced potential at %d K ..." % target_temperature
u_kn.append(numpy.zeros([K,N_samples], numpy.float64)) # u_kn[k,n] is the unbiased reduced potential energy of snapshot n of umbrella simulation k at conditions of interest
kT = kB * target_temperature
beta = 1.0 / kT # reduced temperature
for k in range(K):
N = N_k[k]
u_kn[perturbation_index][k,0:N] = beta * U_kn[perturbation_index][k,0:N] # unbiased reduced potential at desired temperature
print "Computing PMF..."
# Compute PMF in unbiased potential (in units of kT).
f_i = numpy.zeros(nreduced_bins, numpy.float64)
df_i = numpy.zeros(nreduced_bins, numpy.float64)
bin_expectation = numpy.zeros(nreduced_bins, numpy.float64)
for i in range (nreduced_bins):
bin_expectation[i] = mbar.computePerturbedExpectation(u_kn[perturbation_index], in_this_bin[i], compute_uncertainty=False)[0]
if bin_expectation[i] > precision_threshold:
f_i[i] = -numpy.log(bin_expectation[i])
else:
f_i[i] = numpy.nan
df_i[i] = 0.0
for i in range(nreduced_bins):
f_i[i] -= numpy.nanmin(f_i)
print "Computing CV..."
# Compute <E> and <E^2> to get Cv
E = mbar.computePerturbedExpectation(u_kn[perturbation_index], U_kn[perturbation_index], compute_uncertainty=False)[0]
Esquared = mbar.computePerturbedExpectation(u_kn[perturbation_index], U_kn[perturbation_index]**2, compute_uncertainty=False)[0]
heat_capacity = (Esquared-math.pow(E,2))/(kT*target_temperature)
if debug_flag:
print E, Esquared, heat_capacity
cv_output_file.write("%10d %10.3f\n" % (target_temperature, heat_capacity))
if len(reduced_expectation_data) > 0:
print "Computing expectation values as a function of temperature..."
expectation_values = []
ev_output_file.write("%10d " % target_temperature)
for i in range(len(reduced_expectation_data)):
expectation_values.append(mbar.computePerturbedExpectation(u_kn[perturbation_index], reduced_expectation_data[i], compute_uncertainty=False)[0])
ev_output_file.write("%10.3f" % expectation_values[i])
ev_output_file.write("\n")
# if len(reduced_fep_data) > 0:
# print "Computing free energy perturbation values as a function of temperature..."
# fep_values = []
# fep_output_file.write("%10d " % target_temperature)
# for i in range(len(reduced_expectation_data)):
# fep_values.append(numpy.exp(beta)*mbar.computePerturbedExpectation(u_kn[perturbation_index], reduced_fep_data[i], compute_uncertainty=False)[0])
# fep_output_file.write("%10.3f" % fep_values[i])
# fep_output_file.write("\n")
# Compute expectation value of the energy and entropy for each bin
E_i = numpy.zeros(nreduced_bins, numpy.float64)
S_i = numpy.zeros(nreduced_bins, numpy.float64)
if compute_per_bin_quantities:
print "Computing per-bin expectation values of energy and entropy..."
for i in range (nreduced_bins):
# Check to make sure you will actually be able to compute an expectation value
if bin_expectation[i] > precision_threshold:
E = mbar.computePerturbedExpectation(u_kn[perturbation_index], U_kn[perturbation_index]*in_this_bin[i], compute_uncertainty=False)[0]/bin_expectation[i]
else:
E = numpy.nan
E_i[i] = E/(kB*target_temperature)
S_i[i] = E_i[i]-f_i[i]
# Compute other per-bin expectation values
if len(reduced_expectation_data) > 0:
print "Computing other per-bin expectation values..."
ev_pb_file = file_prefix + "evpb-" + str(target_temperature) + ".dat"
ev_pb_output_file = open(ev_pb_file, 'w')
ev_pb_output_file.write("# expectation values for reduced (non-empty) bins\n")
for i in range(nreduced_bins):
if cluster_binning:
bin_center_1 = cluster_bin_map[i]
else:
if ndim == 1:
bin_center_1 = bin_centers[bin_map[i]]
if ndim == 2:
bin_center_1 = bin_centers[bin_map[i]][0]
bin_center_2 = bin_centers[bin_map[i]][1]
if cluster_binning:
ev_pb_output_file.write(" %12d %12d " % (bin_map[i], bin_center_1))
else:
if ndim == 1:
ev_pb_output_file.write(" %12d %12.3f " % (bin_map[i], bin_center_1))
if ndim == 2:
ev_pb_output_file.write(" %12d %12.3f %12.3f " % (bin_map[i], bin_center_1, bin_center_2))
# if bin clustering, include pmf information in evpb file
if cluster_binning:
ev_pb_output_file.write("%12.3f %12.3f %12.3f %12.3f " % (f_i[i], df_i[i], E_i[i], S_i[i]))
# compute each expectation value in list
for j in range(len(reduced_expectation_data)):
data_in_this_bin = reduced_expectation_data[j]*in_this_bin[i]
# Check to make sure you will actually be able to compute an expectation value
if bin_expectation[i] > precision_threshold:
E = mbar.computePerturbedExpectation(u_kn[perturbation_index], data_in_this_bin, compute_uncertainty=False)[0]/bin_expectation[i]
else:
E = float('nan')
ev_pb_output_file.write("%12.3f " % E)
ev_pb_output_file.write("\n")
ev_pb_output_file.close()
# Compute other per-bin expectation values
if len(reduced_fep_data) > 0:
print "Computing per-bin free energy perturbation values..."
fep_pb_file = file_prefix + "feppb-" + str(target_temperature) + ".dat"
fep_pb_output_file = open(fep_pb_file, 'w')
fep_pb_output_file.write("# free energy perturbation values for reduced (non-empty) bins\n")
fep_values = numpy.zeros([nreduced_bins, len(reduced_fep_data)], numpy.float64)
for i in range(nreduced_bins):
# compute each expectation value in list
for j in range(len(reduced_fep_data)):
data_in_this_bin = numpy.exp(reduced_fep_data[j])*in_this_bin[i]
# Check to make sure you will actually be able to compute an expectation value
if bin_expectation[i] > precision_threshold:
E = numpy.log(numpy.exp(beta)*mbar.computePerturbedExpectation(u_kn[perturbation_index], data_in_this_bin, compute_uncertainty=False)[0]/bin_expectation[i])
if perturb_from_columns[j] == energy_column:
E = f_i[i]-E
else:
# find the column index where fep_columns == perturb_from_columns[j]
column_to_perturb_from = fep_columns.index(perturb_from_columns[j])
E = fep_values[i][column_to_perturb_from]-E
fep_values[i][j] = E
else:
E = float('nan')
fep_values[i][j] = E
for j in range(len(reduced_fep_data)):
min_value = numpy.nanmin(zip(*fep_values)[j])
for i in range(nreduced_bins):
fep_values[i][j] -= min_value
for i in range(nreduced_bins):
if cluster_binning:
bin_center_1 = cluster_bin_map[i]
else:
if ndim == 1:
bin_center_1 = bin_centers[bin_map[i]]
if ndim == 2:
bin_center_1 = bin_centers[bin_map[i]][0]
bin_center_2 = bin_centers[bin_map[i]][1]
if cluster_binning:
fep_pb_output_file.write(" %12d %12d " % (bin_map[i], bin_center_1))
else:
if ndim == 1:
fep_pb_output_file.write(" %12d %12.3f " % (bin_map[i], bin_center_1))
if ndim == 2:
fep_pb_output_file.write(" %12d %12.3f %12.3f " % (bin_map[i], bin_center_1, bin_center_2))
# if bin clustering, include pmf information in feppb file
if cluster_binning:
fep_pb_output_file.write("%12.3f %12.3f %12.3f %12.3f " % (f_i[i], df_i[i], E_i[i], S_i[i]))
for j in range(len(reduced_fep_data)):
fep_pb_output_file.write("%12.3f " % fep_values[i][j])
fep_pb_output_file.write("\n")
fep_pb_output_file.close()
# Write out PMF for reduced bins.
pmf_output_file.write("# PMF, energy and entropy for reduced (non-empty) bins (in units of kT)\n")
if ndim == 1:
pmf_output_file.write("# %12s %12s %12s %12s %12s %12s\n" % ('bin', 'bin_center_1', 'f', 'df', 'e', 's'))
elif ndim == 2:
pmf_output_file.write("# %12s %12s %12s %12s %12s %12s %12s\n" % ('bin', 'bin_center_1', 'bin_center_2', 'f', 'df', 'e', 's'))
for i in range(nreduced_bins):
if cluster_binning:
bin_center_1 = cluster_bin_map[i]
else:
if ndim == 1:
bin_center_1 = bin_centers[bin_map[i]]
if ndim == 2:
bin_center_1 = bin_centers[bin_map[i]][0]
bin_center_2 = bin_centers[bin_map[i]][1]
if cluster_binning:
pmf_output_file.write(" %12d %12d %12.3f %12.3f %12.3f %12.3f\n" % (bin_map[i], bin_center_1, f_i[i], df_i[i], E_i[i], S_i[i]))
else:
if ndim == 1:
pmf_output_file.write(" %12d %12.3f %12.3f %12.3f %12.3f %12.3f\n" % (bin_map[i], bin_center_1, f_i[i], df_i[i], E_i[i], S_i[i]))
if ndim == 2:
pmf_output_file.write(" %12d %12.3f %12.3f %12.3f %12.3f %12.3f %12.3f\n" % (bin_map[i], bin_center_1, bin_center_2, f_i[i], df_i[i], E_i[i], S_i[i]))
pmf_output_file.close()
ev_output_file.close()
cv_output_file.close()