def gmx_benzene_vdw_dHdl(): dataset = alchemtest.gmx.load_benzene() dHdl = pd.concat([gmx.extract_dHdl(filename, T=300) for filename in dataset['data']['VDW']]) return dHdl
def test_nounit(): '''Test no unit error''' dataset = load_benzene() dhdl = extract_dHdl(dataset['data']['Coulomb'][0], 310) dhdl.attrs.pop('energy_unit', None) with pytest.raises(TypeError): to_kT(dhdl)
def test_noT(): '''Test no temperature error''' dataset = load_benzene() dhdl = extract_dHdl(dataset['data']['Coulomb'][0], 310) dhdl.attrs.pop('temperature', None) with pytest.raises(TypeError): to_kT(dhdl)
def gmx_water_particle_without_energy_dHdl(): dataset = alchemtest.gmx.load_water_particle_without_energy() dHdl = alchemlyb.concat([gmx.extract_dHdl(filename, T=300) for filename in dataset['data']['AllStates']]) return dHdl
def gmx_expanded_ensemble_case_3_dHdl(): dataset = alchemtest.gmx.load_expanded_ensemble_case_3() dHdl = pd.concat([gmx.extract_dHdl(filename, T=300) for filename in dataset['data']['AllStates']]) return dHdl
def test_equilibrium_detection(self, dhdl): '''Test if extract_u_nk assign the attr correctly''' dataset = load_benzene() dhdl = extract_dHdl(dataset['data']['Coulomb'][0], 310) new_dhdl = equilibrium_detection(dhdl) assert new_dhdl.attrs['temperature'] == 310 assert new_dhdl.attrs['energy_unit'] == 'kT'
def gmx_benzene_coul_dHdl(): dataset = alchemtest.gmx.load_benzene() dHdl = alchemlyb.concat([gmx.extract_dHdl(filename, T=300) for filename in dataset['data']['Coulomb']]) return dHdl
def gmx_expanded_ensemble_case_2_dHdl(): dataset = alchemtest.gmx.load_expanded_ensemble_case_2() dHdl = alchemlyb.concat([gmx.extract_dHdl(filename, T=300, filter=False) for filename in dataset['data']['AllStates']]) return dHdl
def test_sanity(self, data, tmp_path): '''Test if the test routine is working.''' text, length = data new_text = tmp_path / 'text.xvg' new_text.write_text(text) dhdl = extract_dHdl(new_text, 310) assert len(dhdl) == length
def test_statistical_inefficiency(self, dhdl): '''Test if extract_u_nk assign the attr correctly''' dataset = load_benzene() dhdl = extract_dHdl(dataset['data']['Coulomb'][0], 310) new_dhdl = statistical_inefficiency(dhdl) assert new_dhdl.attrs['temperature'] == 310 assert new_dhdl.attrs['energy_unit'] == 'kT'
def test_truncated_row(self, data, tmp_path): '''Test the case where the last row has been truncated.''' text, length = data new_text = tmp_path / 'text.xvg' new_text.write_text(text + '40010.0 27.0\n') dhdl = extract_dHdl(new_text, 310, filter=True) assert len(dhdl) == length
def test_plot_dF_state(): '''Just test if the plot runs''' bz = load_benzene().data u_nk_coul = pd.concat([extract_u_nk(xvg, T=300) for xvg in bz['Coulomb']]) dHdl_coul = pd.concat([extract_dHdl(xvg, T=300) for xvg in bz['Coulomb']]) u_nk_vdw = pd.concat([extract_u_nk(xvg, T=300) for xvg in bz['VDW']]) dHdl_vdw = pd.concat([extract_dHdl(xvg, T=300) for xvg in bz['VDW']]) ti_coul = TI().fit(dHdl_coul) ti_vdw = TI().fit(dHdl_vdw) bar_coul = BAR().fit(u_nk_coul) bar_vdw = BAR().fit(u_nk_vdw) mbar_coul = MBAR().fit(u_nk_coul) mbar_vdw = MBAR().fit(u_nk_vdw) dhdl_data = [ (ti_coul, ti_vdw), (bar_coul, bar_vdw), (mbar_coul, mbar_vdw), ] fig = plot_dF_state(dhdl_data, orientation='portrait') assert isinstance(fig, matplotlib.figure.Figure) fig = plot_dF_state(dhdl_data, orientation='landscape') assert isinstance(fig, matplotlib.figure.Figure) fig = plot_dF_state(dhdl_data, labels=['MBAR', 'TI', 'BAR']) assert isinstance(fig, matplotlib.figure.Figure) with pytest.raises(ValueError): fig = plot_dF_state(dhdl_data, labels=[ 'MBAR', 'TI', ]) fig = plot_dF_state(dhdl_data, colors=['#C45AEC', '#33CC33', '#F87431']) assert isinstance(fig, matplotlib.figure.Figure) with pytest.raises(ValueError): fig = plot_dF_state(dhdl_data, colors=['#C45AEC', '#33CC33']) with pytest.raises(NameError): fig = plot_dF_state(dhdl_data, orientation='xxx') fig = plot_dF_state(ti_coul, orientation='landscape') assert isinstance(fig, matplotlib.figure.Figure) fig = plot_dF_state(ti_coul, orientation='portrait') assert isinstance(fig, matplotlib.figure.Figure) fig = plot_dF_state([ti_coul, bar_coul]) assert isinstance(fig, matplotlib.figure.Figure) fig = plot_dF_state([(ti_coul, ti_vdw)]) assert isinstance(fig, matplotlib.figure.Figure)
def gmx_water_particle_with_total_energy_dHdl(): dataset = alchemtest.gmx.load_water_particle_with_total_energy() dHdl = [ gmx.extract_dHdl(filename, T=300) for filename in dataset['data']['AllStates'] ] return dHdl
def test_too_many_cols(self, data, tmp_path): '''Test the case where the row has too many columns.''' text, length = data new_text = tmp_path / 'text.xvg' new_text.write_text( text + '40010.0 27.0 0.0 6.7 13.5 20.2 27.0 0.7 27.0 0.0 6.7 13.5 20.2 27.0 0.7\n' ) dhdl = extract_dHdl(new_text, 310, filter=True) assert len(dhdl) == length
def estimaters(): bz = load_benzene().data dHdl_coul = alchemlyb.concat( [extract_dHdl(xvg, T=300) for xvg in bz['Coulomb']]) ti = TI().fit(dHdl_coul) u_nk_coul = alchemlyb.concat( [extract_u_nk(xvg, T=300) for xvg in bz['Coulomb']]) mbar = MBAR().fit(u_nk_coul) return ti, mbar
def test_weirdnumber(self, data, tmp_path): '''Test the case where the last number has been appended a weird number.''' text, length = data new_text = tmp_path / 'text.xvg' # Note the 27.040010.0 which is the sum of 27.0 and 40010.0 new_text.write_text( text + '40010.0 27.040010.0 27.0 0.0 6.7 13.5 20.2 27.0 0.7 27.0 0.0 6.7 ' '13.5 20.2 27.0 0.7\n') dhdl = extract_dHdl(new_text, 310, filter=True) assert len(dhdl) == length
def test_dHdl(): """Test that dHdl has the correct form when extracted from files. """ dataset = load_benzene() for leg in dataset['data']: for filename in dataset['data'][leg]: dHdl = extract_dHdl(filename, T=300) assert dHdl.index.names == ['time', 'fep-lambda'] assert dHdl.shape == (4001, 1)
def test_plot_ti_dhdl(): '''Just test if the plot runs''' bz = load_benzene().data dHdl_coul = pd.concat([extract_dHdl(xvg, T=300) for xvg in bz['Coulomb']]) ti_coul = TI() ti_coul.fit(dHdl_coul) assert isinstance(plot_ti_dhdl(ti_coul), matplotlib.axes.Axes) fig, ax = plt.subplots(figsize=(8, 6)) assert isinstance(plot_ti_dhdl(ti_coul, ax=ax), matplotlib.axes.Axes) assert isinstance(plot_ti_dhdl(ti_coul, labels=['Coul']), matplotlib.axes.Axes) assert isinstance(plot_ti_dhdl(ti_coul, labels=['Coul'], colors=['r']), matplotlib.axes.Axes) dHdl_vdw = pd.concat([extract_dHdl(xvg, T=300) for xvg in bz['VDW']]) ti_vdw = TI().fit(dHdl_vdw) assert isinstance(plot_ti_dhdl([ti_coul, ti_vdw]), matplotlib.axes.Axes) ti_coul.dhdl = pd.DataFrame.from_dict({ 'fep': range(100) }, orient='index', columns=np.arange(100) / 100).T assert isinstance(plot_ti_dhdl(ti_coul), matplotlib.axes.Axes)
def test_dHdl_case1(): """Test that dHdl has the correct form when extracted from expanded ensemble files (case 1). """ dataset = load_expanded_ensemble_case_1() for leg in dataset['data']: for filename in dataset['data'][leg]: dHdl = extract_dHdl(filename, T=300, filter=False) assert dHdl.index.names == [ 'time', 'fep-lambda', 'coul-lambda', 'vdw-lambda', 'restraint-lambda' ] assert dHdl.shape == (50001, 4)
def get_dHdl_XVG_delayed(xvg): # TODO # apply extract_dHdl_updated3 # merge get_header, extract_state # cython for this? # don't forget cache read by bytesio fsize = os.path.getsize(xvg.abspath) bufsize = 8192 stopat = fsize / bufsize / 2 s0 = time.time() bread(xvg.abspath, bsize=bufsize, stopat=stopat) s1 = time.time() msg = ("{},{},{},{},{},{}".format('get_dHdl_XVG_delayed', 'bread', xvg.abspath, s1-s0, s1, s0)) #print(msg) logging.info(msg) dHdl = extract_dHdl(xvg.abspath, T=T) s2 = time.time() msg = ("{},{},{},{},{},{}".format('get_dHdl_XVG_delayed', '_extract_dHdl', xvg.abspath, s2-s1, s1, s0)) #print(msg) logging.info(msg) return dHdl
def test_extract_dHdl_unit(): '''Test if extract_u_nk assign the attr correctly''' dataset = load_benzene() dhdl = extract_dHdl(dataset['data']['Coulomb'][0], 310) assert dhdl.attrs['temperature'] == 310 assert dhdl.attrs['energy_unit'] == 'kT'
def dhdl(): data = load_ABFE()['data']['complex'] dhdl = alchemlyb.concat( [extract_dHdl(data[i], 300) for i in range(30)]) return dhdl
def dhdl(): bz = load_benzene().data dHdl_coul = alchemlyb.concat( [extract_dHdl(xvg, T=300) for xvg in bz['Coulomb']]) return dHdl_coul
def data(): dhdl = extract_dHdl(load_benzene()['data']['Coulomb'][0], 310) with bz2.open(load_benzene()['data']['Coulomb'][0], "rt") as bz_file: text = bz_file.read() return text, len(dhdl)
def gmx_benzene(): dataset = load_benzene() return [gmx.extract_dHdl(dhdl, T=300) for dhdl in dataset['data']['Coulomb']], \ [gmx.extract_u_nk(dhdl, T=300) for dhdl in dataset['data']['Coulomb']]
def gmx_ABFE_dhdl(): dataset = alchemtest.gmx.load_ABFE() return gmx.extract_dHdl(dataset['data']['complex'][0], T=300)
def gmx_benzene_dHdl(): dataset = alchemtest.gmx.load_benzene() return gmx.extract_dHdl(dataset['data']['Coulomb'][0], T=300)
def dhdl(): dataset = load_benzene() dhdl = extract_dHdl(dataset['data']['Coulomb'][0], 310) return dhdl
def gmx_benzene_dHdl_full(): dataset = alchemtest.gmx.load_benzene() return pd.concat( [gmx.extract_dHdl(i, T=300) for i in dataset['data']['Coulomb']])
def extract_data(self, dir, temp, dt): # extract and subsample dHdl using equilibrium_detection dHdl_state = [] # dHdl_state is for collecting data for a single state u_nk_state = [] # u_nk_state is for collecting data fro a single state if os.path.isfile('temporary.xvg') is True: os.system("rm temporary.xvg") files = glob.glob(os.path.join(dir, '*dhdl.xvg*')) files = natsort.natsorted(files, reverse=False) file_idx = -1 n = 0 # counter for the number of files of a specific state self.n_state = 0 # counter for the number of states for i in track(files): n += 1 file_idx += 1 logger(f"Parsing {files[file_idx]} and collecting data ...") os.system(f"head -n-1 {i} > temporary.xvg" ) # delete the last line in case it is incomplete dHdl_state.append(extract_dHdl('temporary.xvg', T=temp)) u_nk_state.append(extract_u_nk('temporary.xvg', T=temp)) if n > 1: # for discard the overlapped time frames of the previous file upper_t = dHdl_state[-2].iloc[ dHdl_state[-2].shape[0] - 1].name[0] # the last time frame of file n lower_t = dHdl_state[-1].iloc[0].name[ 0] # the first time frame of file n + 1 # upper_t and lower_t should be the same for both dHdl and u_nk if lower_t != 0: # in case that the file n+1 is the first file of the next replica n_discard = int( (upper_t - lower_t) / dt + 1) # number of data frames to discard in file n dHdl_state[-2] = dHdl_state[-2].iloc[:-n_discard] u_nk_state[-2] = u_nk_state[-2].iloc[:-n_discard] else: # lower_t == 0 means that we have gathered dHdl for the previous state self.n_state += 1 dHdl_data = pd.concat(dHdl_state[:-1]) u_nk_data = pd.concat(u_nk_state[:-1]) dHdl.append( equilibrium_detection(dHdl_data, dHdl_data.iloc[:, 0])) dHdl_state = [dHdl_state[-1]] logger( f'Subsampling dHdl data of the {ordinal(self.n_state)} state ...' ) u_nk.append( equilibrium_detection(u_nk_data, u_nk_data.iloc[:, 0])) u_nk_state = [u_nk_state[-1]] logger( f'Subsampling u_nk data of the {ordinal(self.n_state)} state ...' ) n = 1 # now there is only one file loaded in dHdl_state/u_nk_state # dealing with the last state with equilibrium_detection self.n_state += 1 dHdl_data = pd.concat(dHdl_state) u_nk_data = pd.concat(u_nk_state) return dHdl_data, u_nk_data