def gmx_water_particle_without_energy(): dataset = alchemtest.gmx.load_water_particle_without_energy() u_nk = pd.concat([ gmx.extract_u_nk(filename, T=300) for filename in dataset['data']['AllStates'] ]) return u_nk
def gmx_benzene_coul_u_nk(): dataset = alchemtest.gmx.load_benzene() u_nk = pd.concat([ gmx.extract_u_nk(filename, T=300) for filename in dataset['data']['Coulomb'] ]) return u_nk
def gmx_expanded_ensemble_case_3(): dataset = alchemtest.gmx.load_expanded_ensemble_case_3() u_nk = pd.concat([ gmx.extract_u_nk(filename, T=300) for filename in dataset['data']['AllStates'] ]) return u_nk
def gmx_benzene_vdw_u_nk(): dataset = alchemtest.gmx.load_benzene() u_nk = alchemlyb.concat([ gmx.extract_u_nk(filename, T=300) for filename in dataset['data']['VDW'] ]) return u_nk
def estimaters(): bz = load_benzene().data dHdl_coul = alchemlyb.concat( [extract_dHdl(xvg, T=300) for xvg in bz['Coulomb']]) ti = TI().fit(dHdl_coul) u_nk_coul = alchemlyb.concat( [extract_u_nk(xvg, T=300) for xvg in bz['Coulomb']]) mbar = MBAR().fit(u_nk_coul) return ti, mbar
def test_u_nk(): """Test that u_nk has the correct form when extracted from files. """ dataset = load_benzene() for leg in dataset['data']: for filename in dataset['data'][leg]: u_nk = extract_u_nk(filename, T=300) assert u_nk.index.names == ['time', 'fep-lambda'] if leg == 'Coulomb': assert u_nk.shape == (4001, 5) elif leg == 'VDW': assert u_nk.shape == (4001, 16)
def test_u_nk_with_potential_energy(): """Test that the reduced potential is calculated correctly when the potential energy is given. """ # Load dataset dataset = load_water_particle_with_potential_energy() # Check if the sum of values on the diagonal has the correct value assert_almost_equal(_diag_sum(dataset), 16674040406778.867, decimal=2) # Check one specific value in the dataframe assert_almost_equal(extract_u_nk(dataset['data']['AllStates'][0], T=300).iloc[0][0], -15656.557252200757, decimal=6)
def test_u_nk_with_total_energy(): """Test that the reduced potential is calculated correctly when the total energy is given. """ # Load dataset dataset = load_water_particle_with_total_energy() # Check if the sum of values on the diagonal has the correct value assert_almost_equal(_diag_sum(dataset), 47611374980.34574, decimal=4) # Check one specific value in the dataframe assert_almost_equal(extract_u_nk(dataset['data']['AllStates'][0], T=300).iloc[0][0], -11211.577658852531, decimal=6)
def test_u_nk_case2(): """Test that u_nk has the correct form when extracted from expanded ensemble files (case 2). """ dataset = load_expanded_ensemble_case_2() for leg in dataset['data']: for filename in dataset['data'][leg]: u_nk = extract_u_nk(filename, T=300, filter=False) assert u_nk.index.names == [ 'time', 'fep-lambda', 'coul-lambda', 'vdw-lambda', 'restraint-lambda' ] assert u_nk.shape == (25001, 28)
def test_u_nk_without_energy(): """Test that the reduced potential is calculated correctly when no energy is given. """ # Load dataset dataset = load_water_particle_without_energy() # Check if the sum of values on the diagonal has the correct value assert_almost_equal(_diag_sum(dataset), 20572986867158.184, decimal=2) # Check one specific value in the dataframe assert_almost_equal(extract_u_nk(dataset['data']['AllStates'][0], T=300).iloc[0][0], 0.0, decimal=6)
def _diag_sum(dataset): """Calculate the sum of diagonal elements (i, i) """ # Initialize the sum variable ds = 0.0 for leg in dataset['data']: for filename in dataset['data'][leg]: u_nk = extract_u_nk(filename, T=300) # Calculate the sum of diagonal elements: for i in range(len(dataset['data'][leg])): ds += u_nk.iloc[i][i] return ds
def test_plot_mbar_omatrix(): '''Just test if the plot runs''' bz = load_benzene().data u_nk_coul = pd.concat([extract_u_nk(xvg, T=300) for xvg in bz['Coulomb']]) mbar_coul = MBAR() mbar_coul.fit(u_nk_coul) assert isinstance(plot_mbar_overlap_matrix(mbar_coul.overlap_matrix), matplotlib.axes.Axes) assert isinstance( plot_mbar_overlap_matrix(mbar_coul.overlap_matrix, [ 1, ]), matplotlib.axes.Axes) # Bump up coverage overlap_maxtrix = mbar_coul.overlap_matrix overlap_maxtrix[0, 0] = 0.0025 overlap_maxtrix[-1, -1] = 0.9975 assert isinstance(plot_mbar_overlap_matrix(overlap_maxtrix), matplotlib.axes.Axes)
def test_plot_convergence(): bz = load_benzene().data data_list = [extract_u_nk(xvg, T=300) for xvg in bz['Coulomb']] forward = [] forward_error = [] backward = [] backward_error = [] num_points = 10 for i in range(1, num_points + 1): # Do the forward slice = int(len(data_list[0]) / num_points * i) u_nk_coul = alchemlyb.concat([data[:slice] for data in data_list]) estimate = MBAR().fit(u_nk_coul) forward.append(estimate.delta_f_.iloc[0, -1]) forward_error.append(estimate.d_delta_f_.iloc[0, -1]) # Do the backward u_nk_coul = alchemlyb.concat([data[-slice:] for data in data_list]) estimate = MBAR().fit(u_nk_coul) backward.append(estimate.delta_f_.iloc[0, -1]) backward_error.append(estimate.d_delta_f_.iloc[0, -1]) ax = plot_convergence(forward, forward_error, backward, backward_error) assert isinstance(ax, matplotlib.axes.Axes) plt.close(ax.figure)
def u_nk(): bz = load_benzene().data u_nk_coul = alchemlyb.concat( [extract_u_nk(xvg, T=300) for xvg in bz['Coulomb']]) u_nk_coul.attrs = extract_u_nk(load_benzene().data['Coulomb'][0], T=300).attrs return u_nk_coul
def n_uk_list(self): n_uk_list = [ gmx.extract_u_nk(dhdl, T=300) for dhdl in load_ABFE()['data']['complex'] ] return n_uk_list
def gmx_benzene_u_nk_full(): dataset = alchemtest.gmx.load_benzene() return pd.concat( [gmx.extract_u_nk(i, T=300) for i in dataset['data']['Coulomb']])
def gmx_benzene_u_nk(): dataset = alchemtest.gmx.load_benzene() return gmx.extract_u_nk(dataset['data']['Coulomb'][0], T=300)
def test_plot_dF_state(): '''Just test if the plot runs''' bz = load_benzene().data u_nk_coul = alchemlyb.concat( [extract_u_nk(xvg, T=300) for xvg in bz['Coulomb']]) dHdl_coul = alchemlyb.concat( [extract_dHdl(xvg, T=300) for xvg in bz['Coulomb']]) u_nk_vdw = alchemlyb.concat( [extract_u_nk(xvg, T=300) for xvg in bz['VDW']]) dHdl_vdw = alchemlyb.concat( [extract_dHdl(xvg, T=300) for xvg in bz['VDW']]) ti_coul = TI().fit(dHdl_coul) ti_vdw = TI().fit(dHdl_vdw) bar_coul = BAR().fit(u_nk_coul) bar_vdw = BAR().fit(u_nk_vdw) mbar_coul = MBAR().fit(u_nk_coul) mbar_vdw = MBAR().fit(u_nk_vdw) dhdl_data = [ (ti_coul, ti_vdw), (bar_coul, bar_vdw), (mbar_coul, mbar_vdw), ] fig = plot_dF_state(dhdl_data, orientation='portrait') assert isinstance(fig, matplotlib.figure.Figure) plt.close(fig) fig = plot_dF_state(dhdl_data, orientation='landscape') assert isinstance(fig, matplotlib.figure.Figure) plt.close(fig) fig = plot_dF_state(dhdl_data, labels=['MBAR', 'TI', 'BAR']) assert isinstance(fig, matplotlib.figure.Figure) plt.close(fig) with pytest.raises(ValueError): fig = plot_dF_state(dhdl_data, labels=[ 'MBAR', 'TI', ]) fig = plot_dF_state(dhdl_data, colors=['#C45AEC', '#33CC33', '#F87431']) assert isinstance(fig, matplotlib.figure.Figure) plt.close(fig) with pytest.raises(ValueError): fig = plot_dF_state(dhdl_data, colors=['#C45AEC', '#33CC33']) with pytest.raises(ValueError): fig = plot_dF_state(dhdl_data, orientation='xxx') fig = plot_dF_state(ti_coul, orientation='landscape') assert isinstance(fig, matplotlib.figure.Figure) plt.close(fig) fig = plot_dF_state(ti_coul, orientation='portrait') assert isinstance(fig, matplotlib.figure.Figure) plt.close(fig) fig = plot_dF_state([ti_coul, bar_coul]) assert isinstance(fig, matplotlib.figure.Figure) plt.close(fig) fig = plot_dF_state([(ti_coul, ti_vdw)]) assert isinstance(fig, matplotlib.figure.Figure) plt.close(fig)
def gmx_ABFE_u_nk(): dataset = alchemtest.gmx.load_ABFE() return gmx.extract_u_nk(dataset['data']['complex'][-1], T=300)
def extract_data(self, dir, temp, dt): # extract and subsample dHdl using equilibrium_detection dHdl_state = [] # dHdl_state is for collecting data for a single state u_nk_state = [] # u_nk_state is for collecting data fro a single state if os.path.isfile('temporary.xvg') is True: os.system("rm temporary.xvg") files = glob.glob(os.path.join(dir, '*dhdl.xvg*')) files = natsort.natsorted(files, reverse=False) file_idx = -1 n = 0 # counter for the number of files of a specific state self.n_state = 0 # counter for the number of states for i in track(files): n += 1 file_idx += 1 logger(f"Parsing {files[file_idx]} and collecting data ...") os.system(f"head -n-1 {i} > temporary.xvg" ) # delete the last line in case it is incomplete dHdl_state.append(extract_dHdl('temporary.xvg', T=temp)) u_nk_state.append(extract_u_nk('temporary.xvg', T=temp)) if n > 1: # for discard the overlapped time frames of the previous file upper_t = dHdl_state[-2].iloc[ dHdl_state[-2].shape[0] - 1].name[0] # the last time frame of file n lower_t = dHdl_state[-1].iloc[0].name[ 0] # the first time frame of file n + 1 # upper_t and lower_t should be the same for both dHdl and u_nk if lower_t != 0: # in case that the file n+1 is the first file of the next replica n_discard = int( (upper_t - lower_t) / dt + 1) # number of data frames to discard in file n dHdl_state[-2] = dHdl_state[-2].iloc[:-n_discard] u_nk_state[-2] = u_nk_state[-2].iloc[:-n_discard] else: # lower_t == 0 means that we have gathered dHdl for the previous state self.n_state += 1 dHdl_data = pd.concat(dHdl_state[:-1]) u_nk_data = pd.concat(u_nk_state[:-1]) dHdl.append( equilibrium_detection(dHdl_data, dHdl_data.iloc[:, 0])) dHdl_state = [dHdl_state[-1]] logger( f'Subsampling dHdl data of the {ordinal(self.n_state)} state ...' ) u_nk.append( equilibrium_detection(u_nk_data, u_nk_data.iloc[:, 0])) u_nk_state = [u_nk_state[-1]] logger( f'Subsampling u_nk data of the {ordinal(self.n_state)} state ...' ) n = 1 # now there is only one file loaded in dHdl_state/u_nk_state # dealing with the last state with equilibrium_detection self.n_state += 1 dHdl_data = pd.concat(dHdl_state) u_nk_data = pd.concat(u_nk_state) return dHdl_data, u_nk_data
def test_extract_u_nk_unit(): '''Test if extract_u_nk assign the attr correctly''' dataset = load_benzene() u_nk = extract_u_nk(dataset['data']['Coulomb'][0], 310) assert u_nk.attrs['temperature'] == 310 assert u_nk.attrs['energy_unit'] == 'kT'
def gmx_benzene(): dataset = load_benzene() return [gmx.extract_dHdl(dhdl, T=300) for dhdl in dataset['data']['Coulomb']], \ [gmx.extract_u_nk(dhdl, T=300) for dhdl in dataset['data']['Coulomb']]