def test_group_indexing_simultaneous(self): file_path = 'test.h5' self.__delete_existing_file(file_path) with h5py.File(file_path) as h5_f: micro_group_0 = VirtualGroup('Test_', attrs = {'att_1': 'string_val', 'att_2': 1.2345}) micro_group_1 = VirtualGroup('Test_', attrs={'att_3': [1, 2, 3, 4], 'att_4': ['str_1', 'str_2', 'str_3']}) root_group = VirtualGroup('', children=[VirtualGroup('blah'), micro_group_0, VirtualGroup('meh'), micro_group_1]) writer = HDFwriter(h5_f) h5_refs_list = writer.write(root_group) [h5_group_1] = get_h5_obj_refs(['Test_001'], h5_refs_list) [h5_group_0] = get_h5_obj_refs(['Test_000'], h5_refs_list) self.assertIsInstance(h5_group_0, h5py.Group) self.assertEqual(h5_group_0.name, '/Test_000') for key, expected_val in micro_group_0.attrs.items(): self.assertTrue(np.all(get_attr(h5_group_0, key) == expected_val)) self.assertIsInstance(h5_group_1, h5py.Group) self.assertEqual(h5_group_1.name, '/Test_001') for key, expected_val in micro_group_1.attrs.items(): self.assertTrue(np.all(get_attr(h5_group_1, key) == expected_val)) os.remove(file_path)
def test_write_dset_under_root(self): file_path = 'test.h5' self.__delete_existing_file(file_path) with h5py.File(file_path) as h5_f: writer = HDFwriter(h5_f) data = np.random.rand(5, 7) attrs = {'att_1': 'string_val', 'att_2': 1.2345, 'att_3': [1, 2, 3, 4], 'att_4': ['str_1', 'str_2', 'str_3'], 'labels': {'even_rows': (slice(0, None, 2), slice(None)), 'odd_rows': (slice(1, None, 2), slice(None))} } micro_dset = VirtualDataset('test', data) micro_dset.attrs = attrs.copy() [h5_dset] = writer.write(micro_dset) self.assertIsInstance(h5_dset, h5py.Dataset) reg_ref = attrs.pop('labels') self.assertEqual(len(h5_dset.attrs), len(attrs) + 1 + len(reg_ref)) for key, expected_val in attrs.items(): self.assertTrue(np.all(get_attr(h5_dset, key) == expected_val)) self.assertTrue(np.all([x in list(reg_ref.keys()) for x in get_attr(h5_dset, 'labels')])) expected_data = [data[:None:2], data[1:None:2]] written_data = [h5_dset[h5_dset.attrs['even_rows']], h5_dset[h5_dset.attrs['odd_rows']]] for exp, act in zip(expected_data, written_data): self.assertTrue(np.allclose(exp, act)) os.remove(file_path)
def __validate_aux_dset_pair(self, h5_group, h5_inds, h5_vals, dim_names, dim_units, inds_matrix, vals_matrix=None, base_name=None, h5_main=None, is_spectral=True): if vals_matrix is None: vals_matrix = inds_matrix if base_name is None: if is_spectral: base_name = 'Spectroscopic' else: base_name = 'Position' else: self.assertIsInstance(base_name, (str, unicode)) for h5_dset, exp_dtype, exp_name, ref_data in zip( [h5_inds, h5_vals], [write_utils.INDICES_DTYPE, write_utils.VALUES_DTYPE], [base_name + '_Indices', base_name + '_Values'], [inds_matrix, vals_matrix]): if isinstance(h5_main, h5py.Dataset): self.assertEqual(h5_main.file[h5_main.attrs[exp_name]], h5_dset) self.assertIsInstance(h5_dset, h5py.Dataset) self.assertEqual(h5_dset.parent, h5_group) self.assertEqual(h5_dset.name.split('/')[-1], exp_name) self.assertTrue(np.allclose(ref_data, h5_dset[()])) self.assertEqual(h5_dset.dtype, exp_dtype) self.assertTrue( np.all( [_ in h5_dset.attrs.keys() for _ in ['labels', 'units']])) self.assertTrue( np.all([ x == y for x, y in zip( dim_names, hdf_utils.get_attr(h5_dset, 'labels')) ])) self.assertTrue( np.all([ x == y for x, y in zip( dim_units, hdf_utils.get_attr(h5_dset, 'units')) ])) # assert region references for dim_ind, curr_name in enumerate(dim_names): expected = np.squeeze(ref_data[:, dim_ind]) if is_spectral: expected = np.squeeze(ref_data[dim_ind]) self.assertTrue( np.allclose(expected, np.squeeze(h5_dset[h5_dset.attrs[curr_name]])))
def test_write_reg_ref_slice_dim_larger_than_data(self): file_path = 'test.h5' self.__delete_existing_file(file_path) with h5py.File(file_path) as h5_f: writer = HDFwriter(h5_f) data = np.random.rand(5, 7) h5_dset = writer._create_simple_dset(h5_f, VirtualDataset('test', data)) self.assertIsInstance(h5_dset, h5py.Dataset) attrs = {'labels': {'even_rows': (slice(0, 15, 2), slice(None)), 'odd_rows': (slice(1, 15, 2), slice(None))}} writer._write_dset_attributes(h5_dset, attrs.copy()) h5_f.flush() # two atts point to region references. one for labels self.assertEqual(len(h5_dset.attrs), 1 + len(attrs['labels'])) # check if the labels attribute was written: self.assertTrue(np.all([x in list(attrs['labels'].keys()) for x in get_attr(h5_dset, 'labels')])) expected_data = [data[:None:2], data[1:None:2]] written_data = [h5_dset[h5_dset.attrs['even_rows']], h5_dset[h5_dset.attrs['odd_rows']]] for exp, act in zip(expected_data, written_data): self.assertTrue(np.allclose(exp, act)) os.remove(file_path)
def test_generate_and_write_reg_ref_legal(self): file_path = 'test.h5' self.__delete_existing_file(file_path) with h5py.File(file_path) as h5_f: writer = HDFwriter(h5_f) data = np.random.rand(2, 7) h5_dset = writer._create_simple_dset(h5_f, VirtualDataset('test', data)) self.assertIsInstance(h5_dset, h5py.Dataset) attrs = {'labels': ['row_1', 'row_2']} if sys.version_info.major == 3: with self.assertWarns(UserWarning): writer._write_dset_attributes(h5_dset, attrs.copy()) else: writer._write_dset_attributes(h5_dset, attrs.copy()) h5_f.flush() # two atts point to region references. one for labels self.assertEqual(len(h5_dset.attrs), 1 + len(attrs['labels'])) # check if the labels attribute was written: self.assertTrue(np.all([x in list(attrs['labels']) for x in get_attr(h5_dset, 'labels')])) expected_data = [data[0], data[1]] written_data = [h5_dset[h5_dset.attrs['row_1']], h5_dset[h5_dset.attrs['row_2']]] for exp, act in zip(expected_data, written_data): self.assertTrue(np.allclose(np.squeeze(exp), np.squeeze(act))) os.remove(file_path)
def test_group_indexing_sequential(self): file_path = 'test.h5' self.__delete_existing_file(file_path) with h5py.File(file_path) as h5_f: writer = HDFwriter(h5_f) micro_group_0 = VirtualGroup('Test_', attrs={'att_1': 'string_val', 'att_2': 1.2345}) [h5_group_0] = writer.write(micro_group_0) _ = writer.write(VirtualGroup('blah')) self.assertIsInstance(h5_group_0, h5py.Group) self.assertEqual(h5_group_0.name, '/Test_000') for key, expected_val in micro_group_0.attrs.items(): self.assertTrue(np.all(get_attr(h5_group_0, key) == expected_val)) micro_group_1 = VirtualGroup('Test_', attrs={'att_3': [1, 2, 3, 4], 'att_4': ['str_1', 'str_2', 'str_3']}) [h5_group_1] = writer.write(micro_group_1) self.assertIsInstance(h5_group_1, h5py.Group) self.assertEqual(h5_group_1.name, '/Test_001') for key, expected_val in micro_group_1.attrs.items(): self.assertTrue(np.all(get_attr(h5_group_1, key) == expected_val)) os.remove(file_path)
def test_write_single_group(self): file_path = 'test.h5' self.__delete_existing_file(file_path) with h5py.File(file_path) as h5_f: attrs = {'att_1': 'string_val', 'att_2': 1.2345, 'att_3': [1, 2, 3, 4], 'att_4': ['str_1', 'str_2', 'str_3']} micro_group = VirtualGroup('Test_') micro_group.attrs = attrs writer = HDFwriter(h5_f) [h5_group] = writer.write(micro_group) for key, expected_val in attrs.items(): self.assertTrue(np.all(get_attr(h5_group, key) == expected_val)) os.remove(file_path)
def test_write_simple_tree(self): file_path = 'test.h5' self.__delete_existing_file(file_path) with h5py.File(file_path) as h5_f: inner_dset_data = np.random.rand(5, 7) inner_dset_attrs = {'att_1': 'string_val', 'att_2': 1.2345, 'att_3': [1, 2, 3, 4], 'att_4': ['str_1', 'str_2', 'str_3'], 'labels': {'even_rows': (slice(0, None, 2), slice(None)), 'odd_rows': (slice(1, None, 2), slice(None))} } inner_dset = VirtualDataset('inner_dset', inner_dset_data) inner_dset.attrs = inner_dset_attrs.copy() attrs_inner_grp = {'att_1': 'string_val', 'att_2': 1.2345, 'att_3': [1, 2, 3, 4], 'att_4': ['str_1', 'str_2', 'str_3']} inner_group = VirtualGroup('indexed_inner_group_') inner_group.attrs = attrs_inner_grp inner_group.add_children(inner_dset) outer_dset_data = np.random.rand(5, 7) outer_dset_attrs = {'att_1': 'string_val', 'att_2': 1.2345, 'att_3': [1, 2, 3, 4], 'att_4': ['str_1', 'str_2', 'str_3'], 'labels': {'even_rows': (slice(0, None, 2), slice(None)), 'odd_rows': (slice(1, None, 2), slice(None))} } outer_dset = VirtualDataset('test', outer_dset_data, parent='/test_group') outer_dset.attrs = outer_dset_attrs.copy() attrs_outer_grp = {'att_1': 'string_val', 'att_2': 1.2345, 'att_3': [1, 2, 3, 4], 'att_4': ['str_1', 'str_2', 'str_3']} outer_group = VirtualGroup('unindexed_outer_group') outer_group.attrs = attrs_outer_grp outer_group.add_children([inner_group, outer_dset]) writer = HDFwriter(h5_f) h5_refs_list = writer.write(outer_group) # I don't know of a more elegant way to do this: [h5_outer_dset] = get_h5_obj_refs([outer_dset.name], h5_refs_list) [h5_inner_dset] = get_h5_obj_refs([inner_dset.name], h5_refs_list) [h5_outer_group] = get_h5_obj_refs([outer_group.name], h5_refs_list) [h5_inner_group] = get_h5_obj_refs(['indexed_inner_group_000'], h5_refs_list) self.assertIsInstance(h5_outer_dset, h5py.Dataset) self.assertIsInstance(h5_inner_dset, h5py.Dataset) self.assertIsInstance(h5_outer_group, h5py.Group) self.assertIsInstance(h5_inner_group, h5py.Group) # check assertions for the inner dataset first self.assertEqual(h5_inner_dset.parent, h5_inner_group) reg_ref = inner_dset_attrs.pop('labels') self.assertEqual(len(h5_inner_dset.attrs), len(inner_dset_attrs) + 1 + len(reg_ref)) for key, expected_val in inner_dset_attrs.items(): self.assertTrue(np.all(get_attr(h5_inner_dset, key) == expected_val)) self.assertTrue(np.all([x in list(reg_ref.keys()) for x in get_attr(h5_inner_dset, 'labels')])) expected_data = [inner_dset_data[:None:2], inner_dset_data[1:None:2]] written_data = [h5_inner_dset[h5_inner_dset.attrs['even_rows']], h5_inner_dset[h5_inner_dset.attrs['odd_rows']]] for exp, act in zip(expected_data, written_data): self.assertTrue(np.allclose(exp, act)) # check assertions for the inner data group next: self.assertEqual(h5_inner_group.parent, h5_outer_group) for key, expected_val in attrs_inner_grp.items(): self.assertTrue(np.all(get_attr(h5_inner_group, key) == expected_val)) # check the outer dataset next: self.assertEqual(h5_outer_dset.parent, h5_outer_group) reg_ref = outer_dset_attrs.pop('labels') self.assertEqual(len(h5_outer_dset.attrs), len(outer_dset_attrs) + 1 + len(reg_ref)) for key, expected_val in outer_dset_attrs.items(): self.assertTrue(np.all(get_attr(h5_outer_dset, key) == expected_val)) self.assertTrue(np.all([x in list(reg_ref.keys()) for x in get_attr(h5_outer_dset, 'labels')])) expected_data = [outer_dset_data[:None:2], outer_dset_data[1:None:2]] written_data = [h5_outer_dset[h5_outer_dset.attrs['even_rows']], h5_outer_dset[h5_outer_dset.attrs['odd_rows']]] for exp, act in zip(expected_data, written_data): self.assertTrue(np.allclose(exp, act)) # Finally check the outer group: self.assertEqual(h5_outer_group.parent, h5_f) for key, expected_val in attrs_outer_grp.items(): self.assertTrue(np.all(get_attr(h5_outer_group, key) == expected_val)) os.remove(file_path)
def test_legal_translation(self): data_name = 'TestDataType' attrs = { 'att_1': 'string_val', 'att_2': 1.2345, 'att_3': [1, 2, 3, 4], 'att_4': ['str_1', 'str_2', 'str_3'] } extra_dsets = {'dset_1': np.random.rand(5), 'dset_2': np.arange(25)} file_path = 'test_numpy_translator.h5' self.__delete_existing_file(file_path) main_data = np.random.rand(15, 14) main_data_name = 'Test_Main' quantity = 'Current' units = 'nA' pos_sizes = [5, 3] pos_names = ['X', 'Y'] pos_units = ['nm', 'um'] pos_dims = [] for name, unit, length in zip(pos_names, pos_units, pos_sizes): pos_dims.append( write_utils.Dimension(name, unit, np.arange(length))) pos_data = np.vstack((np.tile(np.arange(5), 3), np.repeat(np.arange(3), 5))).T spec_sizes = [7, 2] spec_names = ['Bias', 'Cycle'] spec_units = ['V', ''] spec_dims = [] for name, unit, length in zip(spec_names, spec_units, spec_sizes): spec_dims.append( write_utils.Dimension(name, unit, np.arange(length))) spec_data = np.vstack((np.tile(np.arange(7), 2), np.repeat(np.arange(2), 7))) translator = NumpyTranslator() _ = translator.translate(file_path, data_name, main_data, quantity, units, pos_dims, spec_dims, parm_dict=attrs, extra_dsets=extra_dsets) with h5py.File(file_path, mode='r') as h5_f: # we are not interested in most of the attributes under root besides two: self.assertEqual(data_name, hdf_utils.get_attr(h5_f, 'data_type')) self.assertEqual('NumpyTranslator', hdf_utils.get_attr(h5_f, 'translator')) # First level should have absolutely nothing besides one group self.assertEqual(len(h5_f.items()), 1) self.assertTrue('Measurement_000' in h5_f.keys()) h5_meas_grp = h5_f['Measurement_000'] self.assertIsInstance(h5_meas_grp, h5py.Group) # check the attributes under this group self.assertEqual(len(h5_meas_grp.attrs), len(attrs)) for key, expected_val in attrs.items(): self.assertTrue( np.all( hdf_utils.get_attr(h5_meas_grp, key) == expected_val)) # Again, this group should only have one group - Channel_000 self.assertEqual(len(h5_meas_grp.items()), 1) self.assertTrue('Channel_000' in h5_meas_grp.keys()) h5_chan_grp = h5_meas_grp['Channel_000'] self.assertIsInstance(h5_chan_grp, h5py.Group) # This channel group is not expected to have any attributes but it will contain the main dataset self.assertEqual(len(h5_chan_grp.items()), 5 + len(extra_dsets)) for dset_name in [ 'Raw_Data', 'Position_Indices', 'Position_Values', 'Spectroscopic_Indices', 'Spectroscopic_Values' ]: self.assertTrue(dset_name in h5_chan_grp.keys()) h5_dset = h5_chan_grp[dset_name] self.assertIsInstance(h5_dset, h5py.Dataset) pycro_main = PycroDataset(h5_chan_grp['Raw_Data']) self.assertIsInstance(pycro_main, PycroDataset) self.assertEqual(pycro_main.name.split('/')[-1], 'Raw_Data') self.assertEqual(pycro_main.parent, h5_chan_grp) self.assertTrue(np.allclose(main_data, pycro_main[()])) self.__validate_aux_dset_pair(h5_chan_grp, pycro_main.h5_pos_inds, pycro_main.h5_pos_vals, pos_names, pos_units, pos_data, h5_main=pycro_main, is_spectral=False) self.__validate_aux_dset_pair(h5_chan_grp, pycro_main.h5_spec_inds, pycro_main.h5_spec_vals, spec_names, spec_units, spec_data, h5_main=pycro_main, is_spectral=True) # Now validate each of the extra datasets: for key, val in extra_dsets.items(): self.assertTrue(key in h5_chan_grp.keys()) h5_dset = h5_chan_grp[key] self.assertIsInstance(h5_dset, h5py.Dataset) self.assertTrue(np.allclose(val, h5_dset[()])) os.remove(file_path)