def testDtraj(self): self.k = 5 self.dim = 100 self.data = [ np.random.random((30, self.dim)), np.random.random((37, self.dim)) ] self.kmeans = cluster_kmeans(data=self.data, k=self.k, max_iter=100) assert self.kmeans.dtrajs[0].dtype == self.kmeans.output_type() prefix = "test" extension = ".dtraj" with TemporaryDirectory() as outdir: self.kmeans.save_dtrajs(trajfiles=None, prefix=prefix, output_dir=outdir, extension=extension) names = [ "%s_%i%s" % (prefix, i, extension) for i in range( self.kmeans.data_producer.number_of_trajectories()) ] names = [os.path.join(outdir, n) for n in names] # check files with given patterns are there for f in names: os.stat(f)
def test_fragmented_reader(self): from pyemma.coordinates.tests.util import create_traj from pyemma.util.files import TemporaryDirectory top_file = pkg_resources.resource_filename(__name__, 'data/test.pdb') trajfiles = [] with TemporaryDirectory() as d: for _ in range(3): f, _, _ = create_traj(top_file, dir=d) trajfiles.append(f) # three trajectories: one consisting of all three, one consisting of the first, # one consisting of the first and the last frag_trajs = [ trajfiles, [trajfiles[0]], [trajfiles[0], trajfiles[2]] ] chunksize = 232 source = coor.source(frag_trajs, top=top_file, chunksize=chunksize) params = { 'chunksize': chunksize, 'ndim': source.ndim, '_trajectories': trajfiles } restored = self.compare(source, params) np.testing.assert_equal(source.get_output(), restored.get_output())
def test_write_to_csv_propagate_filenames(self): from pyemma.coordinates import source, tica with TemporaryDirectory() as td: data = [np.random.random((20, 3))] * 3 fns = [ os.path.join(td, f) for f in ('blah.npy', 'blub.npy', 'foo.npy') ] for x, fn in zip(data, fns): np.save(fn, x) reader = source(fns) assert reader.filenames == fns tica_obj = tica(reader, lag=1, dim=2) tica_obj.write_to_csv(extension=".exotic", chunksize=3) res = sorted([ os.path.abspath(x) for x in glob(td + os.path.sep + '*.exotic') ]) self.assertEqual(len(res), len(fns)) desired_fns = sorted([s.replace('.npy', '.exotic') for s in fns]) self.assertEqual(res, desired_fns) # compare written results expected = tica_obj.get_output() actual = source(list(s.replace('.npy', '.exotic') for s in fns)).get_output() assert len(actual) == len(fns) for a, e in zip(actual, expected): np.testing.assert_allclose(a, e)
def test_fragmented_reader_random_access(self): with TemporaryDirectory() as td: trajfiles = [] for i in range(3): trajfiles.append( create_traj(start=i * 10, dir=td, length=20)[0]) topfile = get_top() trajfiles = [ trajfiles[0], (trajfiles[0], trajfiles[1]), trajfiles[2] ] source = coor.source(trajfiles, top=topfile) assert isinstance(source, FragmentedTrajectoryReader) for chunksize in [0, 2, 3, 100000]: out = source.get_output(stride=self.stride, chunk=chunksize) keys = np.unique(self.stride[:, 0]) for i, coords in enumerate(out): if i in keys: traj = mdtraj.load(trajfiles[i], top=topfile) np.testing.assert_equal( coords, traj.xyz[np.array( self.stride[self.stride[:, 0] == i][:, 1])].reshape( -1, 3 * 3))
def test_np_reader_in_pipeline(self): with TemporaryDirectory() as td: file_name = os.path.join(td, "test.npy") data = np.random.random((100, 3)) np.save(file_name, data) reader = api.source(file_name) p = api.pipeline(reader, run=False, stride=2, chunksize=5) p.parametrize()
def test_non_writeable_cfg_dir(self): with TemporaryDirectory() as tmp: # make cfg dir non-writeable os.chmod(tmp, 0x300) assert not os.access(tmp, os.W_OK) with self.assertRaises(ConfigDirectoryException) as cm: self.config_inst.cfg_dir = tmp self.assertIn("is not writeable", str(cm.exception))
def test_numpy_reader(self): arr = np.random.random(10) from pyemma.util.files import TemporaryDirectory with TemporaryDirectory() as d: files = [os.path.join(d, '1.npy'), os.path.join(d, '2.npy')] np.save(files[0], arr) np.save(files[1], arr) params = {'filenames': files, 'chunksize': 23} r = NumPyFileReader(**params) self.compare(r, params)
def test_non_writeable_cfg_dir(self): with TemporaryDirectory() as tmp: os.environ['PYEMMA_CFG_DIR'] = tmp # make cfg dir non-writeable os.chmod(tmp, 0x300) assert not os.access(tmp, os.W_OK) with self.assertRaises(RuntimeError) as cm: self.config_inst._create_cfg_dir() self.assertIn("is not writeable", str(cm.exception))
def test_load(self): with TemporaryDirectory() as td: new_file = os.path.join(td, "test.cfg") self.config_inst.show_progress_bars = not self.config_inst.show_progress_bars old_val = self.config_inst.show_progress_bars self.config_inst.save(new_file) # set a runtime value, differing from what used to be state before save self.config_inst.show_progress_bars = not self.config_inst.show_progress_bars self.config_inst.load(new_file) self.assertEqual(self.config_inst.show_progress_bars, old_val)
def test_max_n_entries(self): data = [np.random.random((10, 3)) for _ in range(20)] max_entries = 10 config.traj_info_max_entries = max_entries files = [] with TemporaryDirectory() as td: for i, arr in enumerate(data): f = os.path.join(td, "%s.npy" % i) np.save(f, arr) files.append(f) pyemma.coordinates.source(files) self.assertLessEqual(self.db.num_entries, max_entries) self.assertGreater(self.db.num_entries, 0)
def test_save_dtrajs(self): c = self.ass prefix = "test" extension = ".dtraj" with TemporaryDirectory() as outdir: c.save_dtrajs(trajfiles=None, prefix=prefix, output_dir=outdir, extension=extension) names = ["%s_%i%s" % (prefix, i, extension) for i in range(c.data_producer.number_of_trajectories())] names = [os.path.join(outdir, n) for n in names] # check files with given patterns are there for f in names: os.stat(f)
def test_fragmented_reader(self): top_file = pkg_resources.resource_filename(__name__, 'data/test.pdb') trajfiles = [] nframes = [] with TemporaryDirectory() as wd: for _ in range(3): f, _, l = create_traj(top_file, dir=wd) trajfiles.append(f) nframes.append(l) # three trajectories: one consisting of all three, one consisting of the first, # one consisting of the first and the last reader = api.source( [trajfiles, [trajfiles[0]], [trajfiles[0], trajfiles[2]]], top=top_file) np.testing.assert_equal(reader.trajectory_lengths(), [sum(nframes), nframes[0], nframes[0] + nframes[2]])
def test_csv_reader(self): arr = np.random.random(10).reshape(-1, 2) from pyemma.util.files import TemporaryDirectory delimiter = ' ' with TemporaryDirectory() as d: files = [os.path.join(d, '1.csv'), os.path.join(d, '2.csv')] np.savetxt(files[0], arr, delimiter=delimiter) np.savetxt(files[1], arr, delimiter=delimiter) params = {'filenames': files, 'chunksize': 23} from pyemma.coordinates.data import PyCSVReader # sniffing the delimiter does not aid in the 1-column case: # https://bugs.python.org/issue2078 # but also specifying it does not help... r = PyCSVReader(delimiter=delimiter, **params) self.compare(r, params)
def test_config_vals_match_properties_in_wrapper(self): with TemporaryDirectory() as td: self.config_inst.cfg_dir = td self.assertEqual(self.config_inst.cfg_dir, td) from pyemma import config as config_module assert hasattr(config_module, 'default_config_file') my_cfg = os.path.join(td, 'pyemma.cfg') self.assertEqual(pkg_resources.resource_filename('pyemma', 'pyemma.cfg'), config_module.default_config_file) reader = configparser.ConfigParser() reader.read(my_cfg) opts = sorted(reader.options('pyemma')) actual = sorted(config_module.keys()) self.assertEqual(opts, actual)
def test_save_load_no_cfg_file_given(self): """ test that in case no cfg dir has been set, the default location is being used and values changed at runtime are used afterwards.""" # replace a value with a non default value: with TemporaryDirectory() as td: os.environ['PYEMMA_CFG_DIR'] = td self.config_inst = pyemma.config() self.config_inst.show_progress_bars = not self.config_inst.show_progress_bars self.config_inst.save() supposed_to_use_cfg = os.path.join(td, self.config_inst.DEFAULT_CONFIG_FILE_NAME) cfg = configparser.RawConfigParser() cfg.read(supposed_to_use_cfg) self.assertEqual(cfg.getboolean('pyemma', 'show_progress_bars'), self.config_inst.show_progress_bars)
def test_max_size(self): data = [np.random.random((150, 10)) for _ in range(150)] max_size = 1 files = [] config.show_progress_bars = False with TemporaryDirectory() as td, settings(traj_info_max_size=max_size): for i, arr in enumerate(data): f = os.path.join(td, "%s.txt" % i) # save as txt to enforce creation of offsets np.savetxt(f, arr) files.append(f) pyemma.coordinates.source(files) self.assertLessEqual(os.stat(self.db.database_filename).st_size / 1024, config.traj_info_max_size) self.assertGreater(self.db.num_entries, 0)
def test_h5_mdtraj_vs_plain(self): with TemporaryDirectory() as td: f = convert_traj(self.traj_files[0], format='h5', dir=td, top=self.pdb_file) r = api.source(f, top=self.pdb_file) from pyemma.coordinates.data import FeatureReader self.assertIsInstance(r, FeatureReader) import h5py from pyemma.coordinates.data.h5_reader import H5Reader plain_h5_file = os.path.join(td, 'f.h5') with h5py.File(plain_h5_file, mode='a') as fh: fh.create_dataset('test', data=np.random.random((100, 3))) r = api.source(plain_h5_file) self.assertIsInstance(r, H5Reader)
def test_fragmented_reader_random_access1(self): with TemporaryDirectory() as td: trajfiles = [] for i in range(3): trajfiles.append( create_traj(start=i * 10, dir=td, length=20)[0]) topfile = get_top() trajfiles = [(trajfiles[0], trajfiles[1]), trajfiles[0], trajfiles[2]] source = coor.source(trajfiles, top=topfile) assert isinstance(source, FragmentedTrajectoryReader) for r in source._readers: if not isinstance(r, (list, tuple)): r = r[0] for _r in r: _r._return_traj_obj = True from collections import defaultdict for chunksize in [0, 2, 3, 100000]: frames = defaultdict(list) with source.iterator(chunk=chunksize, return_trajindex=True, stride=self.stride) as it: for itraj, t in it: frames[itraj].append(t) dest = [] for itraj in frames.keys(): dest.append(frames[itraj][0]) for t in frames[itraj][1:]: dest[-1] = dest[-1].join(t) keys = np.unique(self.stride[:, 0]) for i, coords in enumerate(dest): if i in keys: traj = mdtraj.load(trajfiles[i], top=topfile) np.testing.assert_equal( coords.xyz, traj.xyz[np.array( self.stride[self.stride[:, 0] == i][:, 1])], err_msg="not equal for chunksize=%s" % chunksize)
def test_npy_reader(self): lengths_and_dims = [(7, 3), (23, 3), (27, 3)] data = [ np.empty((n, dim)) for n, dim in lengths_and_dims] files = [] with TemporaryDirectory() as td: for i, x in enumerate(data): fn = os.path.join(td, "%i.npy" % i) np.save(fn, x) files.append(fn) reader = NumPyFileReader(files) # cache it and compare results = {f: (self.db[f, reader].length, self.db[f, reader].ndim, self.db[f, reader].offsets) for f in files} expected = {f: (len(data[i]), data[i].shape[1], []) for i, f in enumerate(files)} np.testing.assert_equal(results, expected)
def test_non_writeable_cfg_dir(self): with TemporaryDirectory() as tmp: cfg_dir = os.path.join(tmp, '.pyemma') os.mkdir(cfg_dir) os.environ['HOME'] = tmp # make cfg dir non-writeable os.chmod(cfg_dir, 444) exp_homedir = os.path.expanduser('~') assert exp_homedir == tmp with warnings.catch_warnings(record=True) as w: # Cause all warnings to always be triggered. warnings.simplefilter("always") # Trigger a warning. readConfiguration() assert len(w) == 1 assert issubclass(w[-1].category, UserWarning) assert "is not writeable" in str(w[-1].message)
def test_write_to_csv_propagate_filenames(self): from pyemma.coordinates import source, tica with TemporaryDirectory() as td: data = [np.random.random((20, 3))] * 3 fns = [ os.path.join(td, f) for f in ('blah.npy', 'blub.npy', 'foo.npy') ] for x, fn in zip(data, fns): np.save(fn, x) reader = source(fns) assert reader.filenames == fns tica_obj = tica(reader, lag=1) tica_obj.write_to_csv(extension=".exotic") res = sorted([ os.path.abspath(x) for x in glob(td + os.path.sep + '*.exotic') ]) self.assertEqual(len(res), len(fns)) desired_fns = sorted([s.replace('.npy', '.exotic') for s in fns]) self.assertEqual(res, desired_fns)
def test_with_fragmented_reader(self): from pyemma.util.files import TemporaryDirectory trajlen = 35 # trajectory 0 (first trajectory, is trajfiles[2]) # -> skipped # trajectory 1 (second trajectory, is {trajfiles[0], trajfiles[1]}) # fragment 1: # -> frames 0,1,2,10 # fragment 2: # -> frames 1 (i.e., 36) and 34 (i.e., 69) # trajectory 2 (third trajectory, is trajfiles[2]) # -> frame 5 ra_indices = np.array([[1, 0], [1, 1], [1, 2], [1, 10], [1, trajlen + 1], [1, 2 * trajlen - 1], [2, 5]], dtype=int) with TemporaryDirectory() as td: trajfiles = [] xyzs = [] for i in range(3): tf, xyz, _ = create_traj(start=i * 10, dir=td, length=trajlen) trajfiles.append(tf) xyzs.append(xyz) topfile = get_top() frag_traj = [ trajfiles[2], [trajfiles[0], trajfiles[1]], trajfiles[2] ] expected = xyzs[0][np.array([0, 1, 2, 10]), :], xyzs[1][np.array( [1, 34])], np.array([(xyzs[2][5, :])]) expected = np.vstack(expected) reader = coor.source(frag_traj, top=topfile) for cs in range(1, 10): traj = save_traj(reader, ra_indices, None, chunksize=cs) np.testing.assert_almost_equal(traj.xyz, expected)