def test_load_save_fitresult(self, tmp_path): #todo missing read batch result test fpath = Path(tmp_path) / 'fit_result_single.csv' self.fit_result.to_file(fpath) df = csv_to_dataframe(fpath) assert df.attrs['metadata'] == self.fit_result.metadata fit_result_dir = Path(tmp_path) / 'fit_result' save_fitresult(fit_result_dir, self.fit_result, log_lines=['test123']) log_lines = Path(fit_result_dir / 'log.txt').read_text().split('\n') assert log_lines[-1] == 'test123' fit_result_loaded = load_fitresult(fit_result_dir) assert isinstance(fit_result_loaded.losses, pd.DataFrame) assert isinstance(fit_result_loaded.hdxm_set, HDXMeasurementSet) timepoints = np.linspace(0, 30*60, num=100) d_calc = fit_result_loaded(timepoints) assert d_calc.shape == (1, self.hdxm.Np, len(timepoints)) losses = csv_to_dataframe(fit_result_dir / 'losses.csv') fr_load_with_hdxm_and_losses = load_fitresult(fit_result_dir) assert len(fr_load_with_hdxm_and_losses.losses) == 100 assert fr_load_with_hdxm_and_losses.metadata['total_loss'] == losses.iloc[-1].sum()
def test_batch_fit_delta(self, tmp_path): yaml_file = input_dir / 'data_states_deltas.yaml' yaml_dict = yaml.safe_load(yaml_file.read_text()) hdxm_set = yaml_to_hdxmset(yaml_dict, data_dir=input_dir) guess_output = csv_to_dataframe(output_dir / 'ecSecB_guess.csv') gibbs_guess = hdxm_set[0].guess_deltaG(guess_output['rate']) # broadcast single guess over samples fr_global = fit_gibbs_global_batch(hdxm_set, gibbs_guess, epochs=200) output = fr_global.output check = csv_to_dataframe(output_dir / 'ecsecb_delta_batch' / 'fit_result.csv') states = check.columns.unique(level=0) for state in states: from pandas.testing import assert_series_equal result = output[state]['dG'] test = check[state]['dG'] assert_series_equal(result, test, rtol=0.1) errors = fr_global.get_squared_errors() assert errors.shape == (hdxm_set.Ns, hdxm_set.Np, hdxm_set.Nt) assert not np.any(np.isnan(errors))
def test_batch_fit(self, tmp_path): hdx_set = HDXMeasurementSet([self.hdxm_apo, self.hdxm_dimer]) guess = csv_to_dataframe(output_dir / 'ecSecB_guess.csv') # Create rates dataframe rates_df = pd.DataFrame( {name: guess['rate'] for name in hdx_set.names}) gibbs_guess = hdx_set.guess_deltaG(rates_df) fr_global = fit_gibbs_global_batch(hdx_set, gibbs_guess, epochs=1000) fpath = Path(tmp_path) / 'fit_result_batch.csv' fr_global.to_file(fpath) df = csv_to_dataframe(fpath) assert df.attrs['metadata'] == fr_global.metadata output = fr_global.output check_protein = csv_to_protein(output_dir / 'ecSecB_batch.csv') states = ['SecB WT apo', 'SecB his dimer apo'] for state in states: from pandas.testing import assert_series_equal result = output[state]['dG'] test = check_protein[state]['dG'] assert_series_equal(result, test, rtol=0.1) errors = fr_global.get_squared_errors() assert errors.shape == (hdx_set.Ns, hdx_set.Np, hdx_set.Nt) mock_alignment = { 'apo': 'MSEQNNTEMTFQIQRIYTKDI------------SFEAPNAPHVFQKDWQPEVKLDLDTASSQLADDVYEVVLRVTVTASLG-------------------EETAFLCEVQQGGIFSIAGIEGTQMAHCLGAYCPNILFPYARECITSMVSRG----TFPQLNLAPVNFDALFMNYLQQQAGEGTEEHQDA', 'dimer': 'MSEQNNTEMTFQIQRIYTKDISFEAPNAPHVFQKDWQPEVKLDLDTASSQLADDVY--------------EVVLRVTVTASLGEETAFLCEVQQGGIFSIAGIEGTQMAHCLGA----YCPNILFPAARECIASMVARGTFPQLNLAPVNFDALFMNYLQQQAGEGTEEHQDA-----------------', } hdx_set.add_alignment(list(mock_alignment.values())) gibbs_guess = hdx_set[0].guess_deltaG( guess['rate']) # Guesses from first measurement aligned_result = fit_gibbs_global_batch_aligned(hdx_set, gibbs_guess, r1=2, r2=5, epochs=1000) output = aligned_result.output check_protein = csv_to_protein(output_dir / 'ecSecB_batch_aligned.csv') states = ['SecB WT apo', 'SecB his dimer apo'] for state in states: from pandas.testing import assert_series_equal result = output[state]['dG'] test = check_protein[state]['dG'] assert_series_equal(result, test, rtol=0.1)
def reload_tables(): src = ctrl.sources['main'] src.tables['peptides'] = csv_to_dataframe(test_dir / 'peptides.csv') src.tables['rfu_residues'] = csv_to_dataframe(test_dir / 'rfu_residues.csv') src.tables['dG_fits'] = csv_to_dataframe(test_dir / 'dG_fits.csv') src.tables['ddG_comparison'] = csv_to_dataframe(test_dir / 'ddG_comparison.csv') src.tables['rates'] = csv_to_dataframe(test_dir / 'rates.csv') src.param.trigger('updated') ctrl.views['protein'].object = pdb_string
def reload_dashboard(): data_objs = {k: load_from_yaml(v, data_dir=data_dir) for k, v in yaml_dicts.items()} for k, v in data_objs.items(): v.metadata['name'] = k ctrl.data_objects = data_objs rates = csv_to_protein(test_dir / 'rates.txt', column_depth=3).df fit = csv_to_protein(test_dir / 'global_fit.txt', column_depth=3).df colors = csv_to_protein(test_dir / 'colors.txt', column_depth=3).df peptides = csv_to_dataframe(test_dir / 'peptides.txt', column_depth=2, index_col=0) source = ctrl.sources['dataframe'] source.add_df(rates, 'rates') source.add_df(peptides, 'peptides') #source.add_df(fit, 'global_fit') source.add_df(colors, 'colors') ctrl.sources['dataframe'].updated = True fit_control = ctrl.control_panels['FitControl'] fit_control.epochs = 100 fit_control.fit_mode = 'Single' fit_control.fit_name = 'new_global_fit_test_123' ngl = ctrl.views['protein'] ngl.ngl_view.pdb_string = Path(test_dir / '1qyn.pdb').read_text()
def reload_tables(): test_dir = cwd / 'test_data' src = ctrl.sources['main'] df = csv_to_dataframe(test_dir / 'peptides.csv') # names = df.columns.names # df = df.convert_dtypes() # df.columns.names = names table_names = [ 'rfu_residues.csv', 'rates.csv', 'peptides.csv', 'dG_fits.csv', 'ddG_comparison.csv', 'd_calc.csv', 'loss.csv', 'peptide_mse.csv' ] for name in table_names: try: df = csv_to_dataframe(test_dir / name) df.columns = fix_multiindex_dtypes(df.columns) src.tables[name.split('.')[0]] = df except Exception as e: print(e) print('not loaded:', name) src.param.trigger('updated')
def setup_class(cls): cls.fpath = input_dir / 'ecSecB_apo.csv' data = read_dynamx(cls.fpath) control = ('Full deuteration control', 0.167*60) cls.temperature, cls.pH = 273.15 + 30, 8. pf = PeptideMasterTable(data, drop_first=1, ignore_prolines=True, remove_nan=False) pf.set_control(control) cls.hdxm = HDXMeasurement(pf.get_state('SecB WT apo'), temperature=cls.temperature, pH=cls.pH) initial_rates = csv_to_dataframe(output_dir / 'ecSecB_guess.csv') gibbs_guess = cls.hdxm.guess_deltaG(initial_rates['rate']) cls.fit_result = fit_gibbs_global(cls.hdxm, gibbs_guess, epochs=100, r1=2)
def test_read_write_tables(self, tmp_path): # Single-index columns df = pd.DataFrame(np.random.randn(25, 4), columns=list('ABCD')) df.index.name = 'singlecolumnindex' sio = StringIO() dataframe_to_stringio(df, sio) sio.seek(0) df_read = csv_to_dataframe(sio) pd.testing.assert_frame_equal(df, df_read) fpath = Path(tmp_path) / 'single_index.csv' dataframe_to_file(fpath, df) csv_to_dataframe(fpath) pd.testing.assert_frame_equal(df, df_read) # multi-index column cols = pd.MultiIndex.from_product([('a', 'b'), ('x', 'y')]) df = pd.DataFrame(np.random.randn(25, 4), columns=cols) df.index.name = 'multicolumnindex' sio = StringIO() dataframe_to_stringio(df, sio) sio.seek(0) df_read = csv_to_dataframe(sio) pd.testing.assert_frame_equal(df, df_read) fpath = Path(tmp_path) / 'multi_index.csv' dataframe_to_file(fpath, df) df_read = csv_to_dataframe(fpath) pd.testing.assert_frame_equal(df, df_read) protein = csv_to_protein(fpath) assert protein.index.name == 'r_number' assert isinstance(protein, Protein) metadata = { 'instrumuent': 'LCMS', 'settings': {'pressure': '5 kPa', 'temperature': '400K'} } df.attrs['metadata'] = metadata fpath = Path(tmp_path) / 'multi_index_with_metadata.csv' dataframe_to_file(fpath, df) df_read = csv_to_dataframe(fpath) pd.testing.assert_frame_equal(df, df_read) assert df_read.attrs['metadata'] == metadata fpath = Path(tmp_path) / 'multi_index_with_metadata.txt' dataframe_to_file(fpath, df, fmt='pprint', include_version=True) lines = Path(fpath).read_text().split('\n') assert len(lines) == 38 assert lines[0].strip() == pyhdx.VERSION_STRING
def test_dtype_cuda(self): check_deltaG = csv_to_protein(output_dir / 'ecSecB_torch_fit.csv') initial_rates = csv_to_dataframe(output_dir / 'ecSecB_guess.csv') cfg.set('fitting', 'device', 'cuda') gibbs_guess = self.hdxm_apo.guess_deltaG( initial_rates['rate']).to_numpy() if torch.cuda.is_available(): fr_global = fit_gibbs_global(self.hdxm_apo, gibbs_guess, epochs=1000, r1=2) out_deltaG = fr_global.output for field in ['dG', 'k_obs', 'covariance']: assert_series_equal(check_deltaG[field], out_deltaG[self.hdxm_apo.name, field], rtol=0.01, check_dtype=False, check_names=False) else: with pytest.raises(AssertionError, match=r".* CUDA .*"): fr_global = fit_gibbs_global(self.hdxm_apo, gibbs_guess, epochs=1000, r1=2) cfg.set('fitting', 'device', 'cpu') cfg.set('fitting', 'dtype', 'float32') fr_global = fit_gibbs_global(self.hdxm_apo, gibbs_guess, epochs=1000, r1=2) dg = fr_global.model.dG assert dg.dtype == torch.float32 out_deltaG = fr_global.output for field in ['dG', 'k_obs']: assert_series_equal(check_deltaG[field], out_deltaG[self.hdxm_apo.name, field], rtol=0.01, check_dtype=False, check_names=False) cfg.set('fitting', 'dtype', 'float64')
def test_global_fit_extended(self): check_deltaG = csv_to_protein(output_dir / 'ecSecB_torch_fit_epochs_20000.csv') initial_rates = csv_to_dataframe(output_dir / 'ecSecB_guess.csv') gibbs_guess = self.hdxm_apo.guess_deltaG(initial_rates['rate']) t0 = time.time() # Very crude benchmarks fr_global = fit_gibbs_global(self.hdxm_apo, gibbs_guess, epochs=20000, r1=2) t1 = time.time() assert t1 - t0 < 50 out_deltaG = fr_global.output for field in ['dG', 'k_obs', 'covariance']: assert_series_equal(check_deltaG[self.hdxm_apo.name, field], out_deltaG[self.hdxm_apo.name, field], rtol=0.01, check_dtype=False) errors = fr_global.get_squared_errors() assert errors.shape == (1, self.hdxm_apo.Np, self.hdxm_apo.Nt)
def test_global_fit_extended_cuda(self): check_deltaG = csv_to_protein(output_dir / 'ecSecB_torch_fit_epochs_20000.csv') initial_rates = csv_to_dataframe(output_dir / 'ecSecB_guess.csv') gibbs_guess = self.hdxm_apo.guess_deltaG(initial_rates['rate']) # todo allow contextmanger? cfg.set('fitting', 'device', 'cuda') cfg.set('fitting', 'dtype', 'float32') fr_global = fit_gibbs_global(self.hdxm_apo, gibbs_guess, epochs=20000, r1=2) out_deltaG = fr_global.output for field in ['dG', 'k_obs']: assert_series_equal(check_deltaG[self.hdxm_apo.name, field], out_deltaG[self.hdxm_apo.name, field], rtol=0.01, check_dtype=False) cfg.set('fitting', 'device', 'cpu') cfg.set('fitting', 'dtype', 'float64')
def test_rfu(self): rfu_residues = self.hdxm.rfu_residues compare = csv_to_dataframe(output_dir / 'ecSecB_rfu_per_exposure.csv') compare.columns = compare.columns.astype(float) compare.columns.name = 'exposure' assert_frame_equal(rfu_residues, compare)
import numpy as np from pathlib import Path tmpl, ctrl = _main_app() directory = Path(__file__).parent fpath = directory / 'test_data' / 'ecSecB_apo.csv' dic = {} dic['file_path'] = directory / 'test_data' / 'ecSecB_apo.csv' dic['norm_mode'] = 'Exp' dic['fd_state'] = 'Full deuteration control' dic['fd_exposure'] = 0.167 dic['exp_state'] = 'SecB WT apo' src_file = directory / 'test_data' / 'ecSecB_torch_fit.txt' df = csv_to_dataframe(src_file) data_dict = df.to_dict(orient='series') print(data_dict) data_dict['color'] = np.full_like(data_dict['r_number'], fill_value=DEFAULT_COLORS['pfact'], dtype='<U7') data_source = DataSource(data_dict, x='r_number', tags=['mapping', 'pfact', 'deltaG'], renderer='circle', size=10, name='global_fit') dic['sources'] = {}
# Load the data of two Dynamx files, and combine the result to one table data = read_dynamx(input_dir / 'ecSecB_apo.csv', input_dir / 'ecSecB_dimer.csv') pmt = PeptideMasterTable(data, drop_first=1, ignore_prolines=True, remove_nan=False) pmt.set_control(('Full deuteration control', 0.167*60)) temperature, pH = 273.15 + 30, 8. hdxm = HDXMeasurement(pmt.get_state('SecB WT apo'), temperature=temperature, pH=pH) #%% if guess: client = default_client() wt_avg_result = fit_rates_weighted_average(hdxm, client=client) init_guess = wt_avg_result.output else: init_guess = csv_to_dataframe(test_data_dir / 'output' / 'ecSecB_guess.csv') gibbs_guess = hdxm.guess_deltaG(init_guess['rate']) #%% fr_torch = fit_gibbs_global(hdxm, gibbs_guess, **fit_kwargs) #Human readable output fr_torch.to_file(output_dir / 'SecB_fit_result.txt', fmt='pprint') #Machine readable output fr_torch.to_file(output_dir / 'SecB_fit_result.csv', fmt='csv') save_fitresult(output_dir / 'SecB_fit', fr_torch)
""" This script checks 'fixed' (stored) values of dG fitting and compares to new results generated in `generate_test_fit_results.py' """ import pandas as pd from pathlib import Path from pyhdx.fileIO import csv_to_dataframe test_data_dir = Path(__file__).parent / 'test_data' comparisons = {} new_result = csv_to_dataframe(test_data_dir / 'output' / 'ecSecB_torch_fit.csv') fixed_result = csv_to_dataframe( test_data_dir / 'ecSecB_torch_fit_fixed.csv').rename(columns={'deltaG': 'dG'}) comparisons['single_fit'] = (new_result, fixed_result) new_result = csv_to_dataframe(test_data_dir / 'output' / 'ecSecB_torch_fit_epochs_20000.csv') fixed_result = csv_to_dataframe( test_data_dir / 'ecSecB_torch_fit_epochs_20000_fixed.csv').rename(columns={'deltaG': 'dG'}) comparisons['single_20k'] = (new_result, fixed_result) new_result = csv_to_dataframe(test_data_dir / 'output' / 'ecSecB_batch.csv') fixed_result = csv_to_dataframe( test_data_dir / 'ecSecB_batch_fixed.csv').rename(columns={'deltaG': 'dG'}) comparisons['batch'] = (new_result, fixed_result)
"""Obtain ΔG for ecSecB tetramer and dimer""" from pathlib import Path from pyhdx.batch_processing import yaml_to_hdxmset from pyhdx.fileIO import csv_to_dataframe, save_fitresult from pyhdx.fitting import fit_gibbs_global_batch import yaml cwd = Path(__file__).parent data_dir = cwd / 'test_data' / 'input' output_dir = cwd / 'test_data' / 'output' yaml_dict = yaml.safe_load(Path(data_dir / 'data_states.yaml').read_text()) hdx_set = yaml_to_hdxmset(yaml_dict, data_dir=data_dir) initial_guess_rates = csv_to_dataframe(output_dir / 'ecSecB_guess.csv') guesses = hdx_set[0].guess_deltaG(initial_guess_rates['rate']) fit_kwargs = yaml.safe_load(Path(data_dir / 'fit_settings.yaml').read_text()) fr = fit_gibbs_global_batch(hdx_set, guesses, **fit_kwargs) save_fitresult(output_dir / 'ecsecb_tetramer_dimer', fr)
fmt='pprint') gibbs_guess = hdxm_reduced.guess_deltaG(reduced_guess['rate']) fr_torch = fit_gibbs_global(hdxm_reduced, gibbs_guess, epochs=epochs, r1=2) save_fitresult(output_dir / 'ecsecb_reduced', fr_torch) if guess: wt_avg_result = fit_rates_weighted_average(hdxm, bounds=(1e-2 / 60., 800 / 60.)) guess_output = wt_avg_result.output dataframe_to_file(output_dir / 'ecSecB_guess.csv', guess_output) dataframe_to_file(output_dir / 'ecSecB_guess.txt', guess_output, fmt='pprint') else: guess_output = csv_to_dataframe(output_dir / 'ecSecB_guess.csv') # Export protein sequence and intrinsic rate of exchange hdxm.coverage.protein.to_file(output_dir / 'ecSecB_info.csv') hdxm.coverage.protein.to_file(output_dir / 'ecSecB_info.txt', fmt='pprint') rfu_df = hdxm.rfu_residues dataframe_to_file(output_dir / 'ecSecB_rfu_per_exposure.csv', rfu_df) dataframe_to_file(output_dir / 'ecSecB_rfu_per_exposure.txt', rfu_df, fmt='pprint') gibbs_guess = hdxm.guess_deltaG(guess_output['rate']) fr_torch = fit_gibbs_global(hdxm, gibbs_guess, epochs=epochs, r1=2) dataframe_to_file(output_dir / f'ecSecB_torch_fit.csv', fr_torch.output)