def test_ML_MSM_estimated(self): params = {'dtrajs': [[0, 1, 2, 2, 2, 2, 1, 2, 2, 2, 1, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 1, 1, 2, 2, 2, 2, 2, 2, 2, 1, 0, 0]], 'lag': 2} ml_msm = pyerna.msm.estimate_markov_model(**params) assert isinstance(ml_msm, pyerna.msm.MaximumLikelihoodMSM) ml_msm.save(self.f) new_obj = load(self.f) self.assertEqual(new_obj._estimated, new_obj._estimated) np.testing.assert_equal(new_obj.transition_matrix, ml_msm.transition_matrix) np.testing.assert_equal(new_obj.count_matrix_active, ml_msm.count_matrix_active) np.testing.assert_equal(new_obj.active_set, ml_msm.active_set) np.testing.assert_equal(new_obj.ncv, ml_msm.ncv) np.testing.assert_equal(new_obj.discrete_trajectories_full, ml_msm.discrete_trajectories_full) # access ml_msm.active_state_indexes ml_msm.save(self.f, 'new') restored = load(self.f, 'new') assert len(ml_msm.active_state_indexes) == len(restored.active_state_indexes) for x, y in zip(ml_msm.active_state_indexes, restored.active_state_indexes): np.testing.assert_equal(x, y)
def test_recent_model_with_old_version(self): """ no backward compatibility, eg. recent models are not supported by old version of software. """ inst = test_cls_v3() inst.save(self.fn) from pyerna._base.serialization.serialization import OldVersionUnsupported old = SerializableMixIn._get_version(inst.__class__) def _set_version(cls, val): setattr(cls, '_%s__serialize_version' % cls.__name__, val) _set_version(test_cls_v3, 0) try: with self.assertRaises(OldVersionUnsupported) as c: pyerna.load(self.fn) self.assertIn("need at least version {version}".format(version=pyerna.version), c.exception.args[0]) finally: _set_version(test_cls_v3, old)
def tearDown(self): if py3: import tempfile with tempfile.NamedTemporaryFile(delete=False) as fh: self.ck.save(fh.name) restored = pyerna.load(fh.name) assert hasattr(restored, 'has_errors')
def _check_serialize(vamp): import six if six.PY2: return vamp import tempfile import pyerna try: with tempfile.NamedTemporaryFile(delete=False) as ntf: vamp.save(ntf.name) restored = pyerna.load(ntf.name) np.testing.assert_allclose(restored.model.C00, vamp.model.C00) np.testing.assert_allclose(restored.model.C0t, vamp.model.C0t) np.testing.assert_allclose(restored.model.Ctt, vamp.model.Ctt) np.testing.assert_allclose(restored.cumvar, vamp.cumvar) assert_allclose_ignore_phase(restored.singular_values, vamp.singular_values) assert_allclose_ignore_phase(restored.singular_vectors_left, vamp.singular_vectors_left) assert_allclose_ignore_phase(restored.singular_vectors_right, vamp.singular_vectors_right) np.testing.assert_equal(restored.dimension(), vamp.dimension()) assert restored.model._svd_performed == vamp.model._svd_performed return restored finally: import os os.remove(ntf.name)
def test_ml_msm_sparse(self): from pyerna.util.contexts import numpy_random_seed with numpy_random_seed(42): msm = pyerna.msm.estimate_markov_model([np.random.randint(0, 1000, size=10000)], sparse=True, lag=1) assert msm.sparse msm.save(self.f) restored = load(self.f) assert restored.sparse
def test_interpolation_with_map(self): c = test_cls_v1() c.save(self.fn) with patch_old_location(test_cls_v1, to_interpolate_with_functions): inst_restored = pyerna.load(self.fn) self.assertIsInstance(inst_restored, to_interpolate_with_functions) self.assertEqual(inst_restored.y, to_interpolate_with_functions.map_y(None))
def test_its(self): lags = [1, 2, 3] its = pyerna.msm.timescales_msm(self.obs_micro, lags=lags) its.save(self.f) restored = load(self.f) self.assertEqual(restored.estimator.get_params(deep=False), its.estimator.get_params(deep=False)) np.testing.assert_equal(restored.lags, its.lags) np.testing.assert_equal(restored.timescales, its.timescales)
def test_msm_save_load(self): self.msm.save(self.f) new_obj = load(self.f) np.testing.assert_equal(new_obj.transition_matrix, self.msm.transition_matrix) self.assertEqual(new_obj.nstates, self.msm.nstates) self.assertEqual(new_obj.is_sparse, self.msm.is_sparse) self.assertEqual(new_obj.is_reversible, self.msm.is_reversible) self.assertEqual(new_obj, self.msm)
def test_updated_class_v2_to_v3(self): inst = test_cls_v2() inst.save(self.fn) with patch_old_location(test_cls_v2, test_cls_v3): inst_restored = pyerna.load(self.fn) self.assertIsInstance(inst_restored, test_cls_v3) self.assertEqual(inst_restored.z, 23) self.assertFalse(hasattr(inst_restored, 'y'))
def test_renamed_class(self): """ ensure a removed class gets properly remapped to an existing one """ old = _deleted_in_old_version() old.save(self.fn) # mark old_loc as being handled by test_cls_with_old_locations in newer versions. with patch_old_location(_deleted_in_old_version, test_cls_with_old_locations): # now restore and check it got properly remapped to the new class restored = pyerna.load(self.fn) # assert isinstance(restored, test_cls_with_old_locations) self.assertIsInstance(restored, test_cls_with_old_locations)
def test_its_sampled_only_ts(self): lags = [1, 3] its = pyerna.msm.timescales_msm(self.obs_micro, lags=lags, errors='bayes', nsamples=2, only_timescales=True) its.save(self.f) restored = load(self.f) self.assertEqual(restored.estimator.get_params(deep=False), its.estimator.get_params(deep=False)) np.testing.assert_equal(restored.lags, its.lags) np.testing.assert_equal(restored.timescales, its.timescales) np.testing.assert_equal(restored.sample_mean, its.sample_mean)
def test_cktest(self): ck = self.bmsm_rev.cktest(nsets=2, mlags=[1, 3]) ck.save(self.f) restored = load(self.f) np.testing.assert_equal(restored.lagtimes, ck.lagtimes) np.testing.assert_equal(restored.predictions, ck.predictions) np.testing.assert_equal(restored.predictions_conf, ck.predictions_conf) np.testing.assert_equal(restored.estimates, ck.estimates) np.testing.assert_equal(restored.estimates_conf, ck.estimates_conf)
def test_oom(self): oom = pyerna.msm.estimate_markov_model(self.obs_macro, self.lag, weights='oom') oom.save(self.f) restored = load(self.f) np.testing.assert_equal(oom.eigenvalues_OOM, restored.eigenvalues_OOM) np.testing.assert_equal(oom.timescales_OOM, restored.timescales_OOM) np.testing.assert_equal(oom.OOM_rank, restored.OOM_rank) np.testing.assert_equal(oom.OOM_omega, restored.OOM_omega) np.testing.assert_equal(oom.OOM_sigma, restored.OOM_sigma)
def test_updated_class_v1_to_v2(self): """ """ inst = test_cls_v1() inst.save(self.fn) with patch_old_location(test_cls_v1, test_cls_v2): inst_restored = pyerna.load(self.fn) self.assertIsInstance(inst_restored, test_cls_v2) self.assertEqual(inst_restored.z, 42) np.testing.assert_equal(inst_restored.b, inst.a)
def test_hmsm(self): params = {'dtrajs': [[0, 1, 2, 2, 2, 2, 1, 2, 2, 2, 1, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 1, 1, 2, 2, 2, 2, 2, 2, 2, 1, 0, 0]], 'lag': 2, 'nstates': 2 } hmm = pyerna.msm.estimate_hidden_markov_model(**params) hmm.save(self.f) new_obj = load(self.f) self._compare_MLHMM(new_obj, hmm)
def test_serialization(self): ''' check if the test still hold for a restored model. ''' import tempfile f = tempfile.mktemp() try: self.msm.save(f) from pyerna import load restored = load(f) self.msm = restored self.test_eigenvectors() self.test_f() self.test_pi() finally: import os os.unlink(f)
def test_evil_things_not_allowed(self): """ overwrite the pickling procedure with something an evil method. Ensure it raises.""" import subprocess from pickle import UnpicklingError import types called = {'result': False} def evil(self): called['result'] = True return subprocess.Popen, ('/bin/sh', ) inst = np_container(np.empty(0)) old = SerializableMixIn.__getstate__ old2 = inst.__class__.__reduce__ try: del SerializableMixIn.__getstate__ inst.__class__.__reduce__ = types.MethodType(evil, inst) inst.save(self.fn) with self.assertRaises(UnpicklingError) as e: pyerna.load(self.fn) self.assertIn('not allowed', str(e.exception)) self.assertTrue(called, 'hack not executed') finally: SerializableMixIn.__getstate__ = old np_container.__reduce__ = old2
def test_bhmm(self): params = {'dtrajs': [[0, 1, 2, 2, 2, 2, 1, 2, 2, 2, 1, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 1, 1, 2, 2, 2, 2, 2, 2, 2, 1, 0, 0]], 'lag': 2, 'nstates': 2, 'nsamples': 2, } hmm = pyerna.msm.bayesian_hidden_markov_model(**params) hmm.save(self.f) new_obj = load(self.f) self._compare_MLHMM(new_obj, hmm) # compare samples self.assertEqual(new_obj.samples, hmm.samples)
def test_sampled_MSM_save_load(self): self.bmsm_rev.save(self.f) new_obj = load(self.f) np.testing.assert_equal(new_obj.samples, self.bmsm_rev.samples) np.testing.assert_equal(new_obj.transition_matrix, self.bmsm_rev.transition_matrix) self.assertEqual(new_obj.nstates, self.bmsm_rev.nstates) self.assertEqual(new_obj.is_sparse, self.bmsm_rev.is_sparse) self.assertEqual(new_obj.is_reversible, self.bmsm_rev.is_reversible) self.assertEqual(new_obj.nsamples, self.bmsm_rev.nsamples) self.assertEqual(new_obj.nsteps, self.bmsm_rev.nsteps) self.assertEqual(new_obj.conf, self.bmsm_rev.conf) self.assertEqual(new_obj.show_progress, self.bmsm_rev.show_progress)
def test_serialization(self): import pyerna import tempfile try: with tempfile.NamedTemporaryFile(delete=False) as ntf: self.tpt2.save(ntf.name) restored = pyerna.load(ntf.name) public_attrs = ('stationary_distribution', 'flux', 'gross_flux', 'committor', 'backward_committor', 'dt_model', 'total_flux') for attr in public_attrs: value = getattr(self.tpt2, attr) if isinstance(value, np.ndarray): np.testing.assert_equal(value, getattr(restored, attr)) else: self.assertEqual(value, getattr(restored, attr)) finally: import os os.unlink(ntf.name)
def test_serialize(self): import tempfile import pyerna f = tempfile.mktemp() try: self.amm.save(f) restored = pyerna.load(f) # check estimation parameters np.testing.assert_equal(self.amm.lag, restored.lag) np.testing.assert_equal(self.amm.count_mode, restored.count_mode) np.testing.assert_equal(self.amm.connectivity, restored.connectivity) np.testing.assert_equal(self.amm.dt_traj, restored.dt_traj) np.testing.assert_equal(self.amm.E, restored.E) np.testing.assert_equal(self.amm.m, restored.m) np.testing.assert_equal(self.amm.w, restored.w) np.testing.assert_equal(self.amm.eps, restored.eps) np.testing.assert_equal(self.amm.support_ci, restored.support_ci) np.testing.assert_equal(self.amm.maxiter, restored.maxiter) np.testing.assert_equal(self.amm.max_cache, restored.max_cache) np.testing.assert_equal(self.amm.mincount_connectivity, restored.mincount_connectivity) # ensure we got the estimated quantities right np.testing.assert_equal(self.amm.E_active, restored.E_active) np.testing.assert_equal(self.amm.E_min, restored.E_min) np.testing.assert_equal(self.amm.E_max, restored.E_max) np.testing.assert_equal(self.amm.mhat, restored.mhat) np.testing.assert_equal(self.amm.lagrange, restored.lagrange) np.testing.assert_equal(self.amm.sigmas, restored.sigmas) np.testing.assert_equal(self.amm.count_inside, restored.count_inside) np.testing.assert_equal(self.amm.count_outside, restored.count_outside) # derived from msm_estimator np.testing.assert_equal(self.amm.P, restored.P) np.testing.assert_equal(self.amm.pi, restored.pi) finally: import os os.unlink(f)
def main(argv=None): import argparse from pyerna import load from pyerna._base.serialization.h5file import H5File parser = argparse.ArgumentParser() parser.add_argument('--json', action='store_true', default=False) parser.add_argument('files', metavar='files', nargs='+', help='files to inspect') parser.add_argument('--recursive', action='store_true', default=False, help='If the pipeline of the stored estimator was stored, ' 'gather these information as well. This will require to load the model, ' 'so it could take a while, if the pipeline contains lots of data.') parser.add_argument('-v', '--verbose', action='store_true', default=False) args = parser.parse_args(argv) # store found models by filename from collections import defaultdict models = defaultdict(dict) for f in args.files: try: with H5File(f) as fh: m = fh.models_descriptive for k in m: models[f][k] = m[k] for model_name, values in m.items(): if values['saved_streaming_chain']: restored = load(f) models[f][model_name]['input_chain'] = [repr(x) for x in restored._data_flow_chain()] except BaseException as e: print('{} did not contain a valid PyEMMA model. Error was {err}. ' 'If you are sure, that it does, please post an issue on Github'.format(f, err=e)) if args.verbose: import traceback traceback.print_exc() return 1 if not args.json: from io import StringIO buff = StringIO() buff.write('PyEMMA models\n') buff.write('=' * (buff.tell() - 1)) buff.write('\n' * 2) for f in models: buff.write('file: {}'.format(f)) buff.write('\n') buff.write('-' * 80) buff.write('\n') model_file = models[f] for i, model_name in enumerate(model_file): attrs = model_file[model_name] buff.write('{index}. name: {key}\n' 'created: {created}\n' '{repr}\n'.format(key=model_name, index=i+1, created=attrs['created_readable'], repr=attrs['class_str'])) if attrs['saved_streaming_chain']: buff.write('\n---------Input chain---------\n') for j, x in enumerate(attrs['input_chain']): buff.write('{index}. {repr}\n'.format(index=j+1, repr=x)) buff.write('-' * 80) buff.write('\n') buff.seek(0) print(buff.read()) else: import json json.dump(models, fp=sys.stdout) return 0
def check_serialization(estimator): # check if the serialized and restored estimator still holds the derived quantities. import pyerna from pyerna.thermo import WHAM, TRAM, DTRAM, MBAR from pyerna._base.serialization.serialization import SerializableMixIn import tempfile assert isinstance(estimator, SerializableMixIn) f = tempfile.mktemp() estimator.save(f) restored = pyerna.load(f) def check(a, b): if isinstance(a, np.ndarray): np.testing.assert_allclose(a, b) elif isinstance(a, (list, tuple)): for x, y in zip(a, b): check(x, y) elif hasattr(a, '__eq__'): assert a == b else: raise ValueError('dunno how to compare %s' % a) if isinstance(estimator, WHAM): to_compare = [ 'active_set', 'bias_energies', 'bias_energies_full', 'conf_energies', 'dt_traj', 'increments', 'loglikelihoods', 'maxerr', 'maxiter', 'nstates_full', 'nthermo', 'save_convergence_info', 'state_counts', 'state_counts_full', 'stride', 'therm_energies', 'timestep_traj' ] elif isinstance(estimator, TRAM): to_compare = [ 'active_set', 'biased_conf_energies', 'btrajs', 'connectivity', 'count_matrices', 'csets', 'dtrajs', 'equilibrium_btrajs', 'equilibrium_dtrajs', 'equilibrium_state_counts', 'equilibrium_state_counts_full', 'increments', 'log_lagrangian_mult', 'loglikelihoods', 'mbar_biased_conf_energies', 'mbar_therm_energies', 'mbar_unbiased_conf_energies', 'nthermo', 'state_counts', 'therm_energies', 'therm_state_counts_full', 'timestep_traj' ] elif isinstance(estimator, DTRAM): to_compare = [ 'active_set', 'bias_energies', 'conf_energies', 'count_matrices', 'count_matrices_full', 'increments', 'log_lagrangian_mult', 'loglikelihoods', 'nstates_full', 'nthermo', 'state_counts', 'state_counts_full', 'therm_energies', 'timestep_traj' ] elif isinstance(estimator, MBAR): to_compare = [ 'active_set', 'biased_conf_energies_full', 'btrajs', 'conf_energies', 'increments', 'loglikelihoods', 'nstates_full', 'nthermo', 'state_counts', 'state_counts_full', 'therm_energies', 'therm_state_counts_full', 'unbiased_conf_energies_full' ] else: raise ValueError('unknown estimator') # base attrs to_compare.extend( ('umbrella_centers', 'force_constants', 'temperatures', 'dt_traj')) # inherited from SubSet to_compare.extend(['active_set', 'nstates_full']) # inherited from MultiThermModel to_compare.extend(['models', 'f_therm', 'pi', 'f', 'label']) # estimator parameters to_compare.extend(estimator.get_params(deep=False).keys()) for k in to_compare: try: check(getattr(restored, k), getattr(estimator, k)) except AttributeError: # some fields are only set upon certain input parameter choices. assert k in ('callback', 'mbar_unbiased_conf_energies', 'mbar_biased_conf_energies')
def test_private(self): inst = private_attr() inst.save(self.fn) restore = pyerna.load(self.fn) assert restore.has_private_attr()
def test_msm_coarse_grain(self): pcca = self.msm.pcca(2) self.msm.save(self.f) restored = load(self.f) np.testing.assert_equal(restored.metastable_memberships, pcca.memberships) np.testing.assert_equal(restored.metastable_distributions, pcca.output_probabilities)