def test_use_case_example(tmp_path): f = str(tmp_path / "f.h5") o = str(tmp_path / "o.h5") create_test_file(f) output_path = "result" with h5py.File(f, "r") as fh, h5py.File(o, "w") as oh: data_paths = ["/data"] key_paths = ["/key"] df = DataSource(fh, key_paths, data_paths, timeout=1) output = None for dset in df: d = dset["/data"] d = d.squeeze() r = d.sum(axis=1) assert dset.maxshape == (2, 3) if output is None: output = df.create_dataset(r, oh, output_path) else: df.append_data(r, dset.slice_metadata, output) with h5py.File(o, "r") as oh: out = oh["/result"] assert out.shape == (2, 3, 4) assert out.maxshape == (2, 3, 4) print(out[...]) assert 119 + 118 + 117 + 116 + 115 == out[1, 2, 3] assert out[0, 1, 0] != 0
def test_correct_return_shape(): f = { "keys": { "four_dimensional": Dataset.four_dimensional_dataset_keys(), "three_dimensional": Dataset.three_dimensional_dataset_keys() }, "data/four_dimensional": Dataset.four_dimensional_dataset_data(), "data/three_dimensional": Dataset.three_dimensional_dataset_data() } data_paths = ['data/four_dimensional', "data/three_dimensional"] key_paths = ['keys'] df = DataSource.DataFollower(f, key_paths, data_paths, timeout=0.1) for dset in df: assert dset[0].shape == (1, 1, 1, 10) and dset[1].shape == (1, 1, 10)
def test_correct_return_data_complete(): f = { "keys": { 'complete': Dataset.complete_dataset_keys() }, "data/complete": Dataset.complete_dataset_data() } data_paths = ['data/complete'] key_paths = ['keys'] df = DataSource.DataFollower(f, key_paths, data_paths, timeout=0.1) full_dataset = np.array([]) for dset in df: full_dataset = np.concatenate((full_dataset, dset[0].flatten())) assert (Dataset.complete_dataset_data().flatten() == full_dataset.flatten()).all()
def test_iterates_incomplete_dataset(): f = { "keys": { 'incomplete': Dataset.incomplete_dataset_keys() }, "data/incomplete": Dataset.incomplete_dataset_data() } data_paths = ['data/incomplete'] key_paths = ['keys'] df = DataSource.DataFollower(f, key_paths, data_paths, timeout=0.1) current_key = 0 for dset in df: current_key += 1 assert current_key == 40
def test_reset_method_iterates_correct_length(): f = { "keys": { 'complete': Dataset.complete_dataset_keys() }, "data/complete": Dataset.complete_dataset_data() } data_paths = ['data/complete'] key_paths = ['keys'] df = DataSource.DataFollower(f, key_paths, data_paths, timeout=0.1) current_key = 0 for dset in df: current_key += 1 df.reset() for dset in df: current_key += 1 assert current_key == 100
def inner_data_read(tmp_path, cache): f = str(tmp_path / "f.h5") create_test_file(f) with h5py.File(f, "r") as fh: data_paths = ["/data"] key_paths = ["/key"] df = DataSource(fh, key_paths, data_paths, timeout=1, cache_datasets=cache) count = 0 base = np.arange(4 * 5) base = base.reshape((4, 5)) for dset in df: d = dset["/data"] assert np.all(d == base + (20 * count)) count = count + 1
def test_mock_scan(tmp_path): f = str(tmp_path / "scan.h5") mp.set_start_method("spawn") p = mp.Process(target=mock_scan, args=(f, )) p.start() utils.check_file_readable(f, ["/data", "/key"], timeout=5) with h5py.File(f, "r", libver="latest", swmr=True) as fh: data_paths = ["/data"] key_paths = ["/key"] df = DataSource(fh, key_paths, data_paths, timeout=1) count = 1 assert p.is_alive() for dset in df: d = dset["/data"] assert d[0, 0, 0].item() == count count = count + 1 p.join()