Пример #1
0
    def test_group_indexing_simultaneous(self):
        file_path = 'test.h5'
        self.__delete_existing_file(file_path)
        with h5py.File(file_path) as h5_f:
            micro_group_0 = VirtualGroup('Test_', attrs = {'att_1': 'string_val', 'att_2': 1.2345})
            micro_group_1 = VirtualGroup('Test_', attrs={'att_3': [1, 2, 3, 4], 'att_4': ['str_1', 'str_2', 'str_3']})
            root_group = VirtualGroup('', children=[VirtualGroup('blah'), micro_group_0,
                                                    VirtualGroup('meh'), micro_group_1])

            writer = HDFwriter(h5_f)
            h5_refs_list = writer.write(root_group)

            [h5_group_1] = get_h5_obj_refs(['Test_001'], h5_refs_list)
            [h5_group_0] = get_h5_obj_refs(['Test_000'], h5_refs_list)

            self.assertIsInstance(h5_group_0, h5py.Group)
            self.assertEqual(h5_group_0.name, '/Test_000')
            for key, expected_val in micro_group_0.attrs.items():
                self.assertTrue(np.all(get_attr(h5_group_0, key) == expected_val))

            self.assertIsInstance(h5_group_1, h5py.Group)
            self.assertEqual(h5_group_1.name, '/Test_001')
            for key, expected_val in micro_group_1.attrs.items():
                self.assertTrue(np.all(get_attr(h5_group_1, key) == expected_val))

        os.remove(file_path)
Пример #2
0
    def test_write_dset_under_root(self):
        file_path = 'test.h5'
        self.__delete_existing_file(file_path)
        with h5py.File(file_path) as h5_f:

            writer = HDFwriter(h5_f)
            data = np.random.rand(5, 7)
            attrs = {'att_1': 'string_val',
                     'att_2': 1.2345,
                     'att_3': [1, 2, 3, 4],
                     'att_4': ['str_1', 'str_2', 'str_3'],
                     'labels': {'even_rows': (slice(0, None, 2), slice(None)),
                                'odd_rows': (slice(1, None, 2), slice(None))}
                     }
            micro_dset = VirtualDataset('test', data)
            micro_dset.attrs = attrs.copy()
            [h5_dset] = writer.write(micro_dset)
            self.assertIsInstance(h5_dset, h5py.Dataset)

            reg_ref = attrs.pop('labels')

            self.assertEqual(len(h5_dset.attrs), len(attrs) + 1 + len(reg_ref))

            for key, expected_val in attrs.items():
                self.assertTrue(np.all(get_attr(h5_dset, key) == expected_val))

            self.assertTrue(np.all([x in list(reg_ref.keys()) for x in get_attr(h5_dset, 'labels')]))

            expected_data = [data[:None:2], data[1:None:2]]
            written_data = [h5_dset[h5_dset.attrs['even_rows']], h5_dset[h5_dset.attrs['odd_rows']]]

            for exp, act in zip(expected_data, written_data):
                self.assertTrue(np.allclose(exp, act))

        os.remove(file_path)
    def __validate_aux_dset_pair(self,
                                 h5_group,
                                 h5_inds,
                                 h5_vals,
                                 dim_names,
                                 dim_units,
                                 inds_matrix,
                                 vals_matrix=None,
                                 base_name=None,
                                 h5_main=None,
                                 is_spectral=True):
        if vals_matrix is None:
            vals_matrix = inds_matrix
        if base_name is None:
            if is_spectral:
                base_name = 'Spectroscopic'
            else:
                base_name = 'Position'
        else:
            self.assertIsInstance(base_name, (str, unicode))

        for h5_dset, exp_dtype, exp_name, ref_data in zip(
            [h5_inds, h5_vals],
            [write_utils.INDICES_DTYPE, write_utils.VALUES_DTYPE],
            [base_name + '_Indices', base_name + '_Values'],
            [inds_matrix, vals_matrix]):
            if isinstance(h5_main, h5py.Dataset):
                self.assertEqual(h5_main.file[h5_main.attrs[exp_name]],
                                 h5_dset)
            self.assertIsInstance(h5_dset, h5py.Dataset)
            self.assertEqual(h5_dset.parent, h5_group)
            self.assertEqual(h5_dset.name.split('/')[-1], exp_name)
            self.assertTrue(np.allclose(ref_data, h5_dset[()]))
            self.assertEqual(h5_dset.dtype, exp_dtype)
            self.assertTrue(
                np.all(
                    [_ in h5_dset.attrs.keys() for _ in ['labels', 'units']]))
            self.assertTrue(
                np.all([
                    x == y for x, y in zip(
                        dim_names, hdf_utils.get_attr(h5_dset, 'labels'))
                ]))
            self.assertTrue(
                np.all([
                    x == y for x, y in zip(
                        dim_units, hdf_utils.get_attr(h5_dset, 'units'))
                ]))
            # assert region references
            for dim_ind, curr_name in enumerate(dim_names):
                expected = np.squeeze(ref_data[:, dim_ind])
                if is_spectral:
                    expected = np.squeeze(ref_data[dim_ind])
                self.assertTrue(
                    np.allclose(expected,
                                np.squeeze(h5_dset[h5_dset.attrs[curr_name]])))
Пример #4
0
    def test_write_reg_ref_slice_dim_larger_than_data(self):
        file_path = 'test.h5'
        self.__delete_existing_file(file_path)
        with h5py.File(file_path) as h5_f:
            writer = HDFwriter(h5_f)
            data = np.random.rand(5, 7)
            h5_dset = writer._create_simple_dset(h5_f, VirtualDataset('test', data))
            self.assertIsInstance(h5_dset, h5py.Dataset)

            attrs = {'labels': {'even_rows': (slice(0, 15, 2), slice(None)),
                                'odd_rows': (slice(1, 15, 2), slice(None))}}

            writer._write_dset_attributes(h5_dset, attrs.copy())
            h5_f.flush()

            # two atts point to region references. one for labels
            self.assertEqual(len(h5_dset.attrs), 1 + len(attrs['labels']))

            # check if the labels attribute was written:

            self.assertTrue(np.all([x in list(attrs['labels'].keys()) for x in get_attr(h5_dset, 'labels')]))

            expected_data = [data[:None:2], data[1:None:2]]
            written_data = [h5_dset[h5_dset.attrs['even_rows']], h5_dset[h5_dset.attrs['odd_rows']]]

            for exp, act in zip(expected_data, written_data):
                self.assertTrue(np.allclose(exp, act))

        os.remove(file_path)
Пример #5
0
    def test_generate_and_write_reg_ref_legal(self):
        file_path = 'test.h5'
        self.__delete_existing_file(file_path)
        with h5py.File(file_path) as h5_f:

            writer = HDFwriter(h5_f)
            data = np.random.rand(2, 7)
            h5_dset = writer._create_simple_dset(h5_f, VirtualDataset('test', data))
            self.assertIsInstance(h5_dset, h5py.Dataset)

            attrs = {'labels': ['row_1', 'row_2']}
            if sys.version_info.major == 3:
                with self.assertWarns(UserWarning):
                    writer._write_dset_attributes(h5_dset, attrs.copy())
            else:
                writer._write_dset_attributes(h5_dset, attrs.copy())
            h5_f.flush()

            # two atts point to region references. one for labels
            self.assertEqual(len(h5_dset.attrs), 1 + len(attrs['labels']))

            # check if the labels attribute was written:

            self.assertTrue(np.all([x in list(attrs['labels']) for x in get_attr(h5_dset, 'labels')]))

            expected_data = [data[0], data[1]]
            written_data = [h5_dset[h5_dset.attrs['row_1']], h5_dset[h5_dset.attrs['row_2']]]

            for exp, act in zip(expected_data, written_data):
                self.assertTrue(np.allclose(np.squeeze(exp), np.squeeze(act)))

        os.remove(file_path)
Пример #6
0
    def test_group_indexing_sequential(self):
        file_path = 'test.h5'
        self.__delete_existing_file(file_path)
        with h5py.File(file_path) as h5_f:
            writer = HDFwriter(h5_f)
            micro_group_0 = VirtualGroup('Test_', attrs={'att_1': 'string_val', 'att_2': 1.2345})
            [h5_group_0] = writer.write(micro_group_0)

            _ = writer.write(VirtualGroup('blah'))

            self.assertIsInstance(h5_group_0, h5py.Group)
            self.assertEqual(h5_group_0.name, '/Test_000')
            for key, expected_val in micro_group_0.attrs.items():
                self.assertTrue(np.all(get_attr(h5_group_0, key) == expected_val))

            micro_group_1 = VirtualGroup('Test_', attrs={'att_3': [1, 2, 3, 4], 'att_4': ['str_1', 'str_2', 'str_3']})
            [h5_group_1] = writer.write(micro_group_1)

            self.assertIsInstance(h5_group_1, h5py.Group)
            self.assertEqual(h5_group_1.name, '/Test_001')
            for key, expected_val in micro_group_1.attrs.items():
                self.assertTrue(np.all(get_attr(h5_group_1, key) == expected_val))

        os.remove(file_path)
Пример #7
0
    def test_write_single_group(self):
        file_path = 'test.h5'
        self.__delete_existing_file(file_path)
        with h5py.File(file_path) as h5_f:
            attrs = {'att_1': 'string_val',
                     'att_2': 1.2345,
                     'att_3': [1, 2, 3, 4],
                     'att_4': ['str_1', 'str_2', 'str_3']}

            micro_group = VirtualGroup('Test_')
            micro_group.attrs = attrs
            writer = HDFwriter(h5_f)
            [h5_group] = writer.write(micro_group)

            for key, expected_val in attrs.items():
                self.assertTrue(np.all(get_attr(h5_group, key) == expected_val))

        os.remove(file_path)
Пример #8
0
    def test_write_simple_tree(self):
        file_path = 'test.h5'
        self.__delete_existing_file(file_path)
        with h5py.File(file_path) as h5_f:

            inner_dset_data = np.random.rand(5, 7)
            inner_dset_attrs = {'att_1': 'string_val',
                                'att_2': 1.2345,
                                'att_3': [1, 2, 3, 4],
                                'att_4': ['str_1', 'str_2', 'str_3'],
                                'labels': {'even_rows': (slice(0, None, 2), slice(None)),
                                           'odd_rows': (slice(1, None, 2), slice(None))}
                                }
            inner_dset = VirtualDataset('inner_dset', inner_dset_data)
            inner_dset.attrs = inner_dset_attrs.copy()

            attrs_inner_grp = {'att_1': 'string_val',
                               'att_2': 1.2345,
                               'att_3': [1, 2, 3, 4],
                               'att_4': ['str_1', 'str_2', 'str_3']}
            inner_group = VirtualGroup('indexed_inner_group_')
            inner_group.attrs = attrs_inner_grp
            inner_group.add_children(inner_dset)

            outer_dset_data = np.random.rand(5, 7)
            outer_dset_attrs = {'att_1': 'string_val',
                                'att_2': 1.2345,
                                'att_3': [1, 2, 3, 4],
                                'att_4': ['str_1', 'str_2', 'str_3'],
                                'labels': {'even_rows': (slice(0, None, 2), slice(None)),
                                           'odd_rows': (slice(1, None, 2), slice(None))}
                                }
            outer_dset = VirtualDataset('test', outer_dset_data, parent='/test_group')
            outer_dset.attrs = outer_dset_attrs.copy()

            attrs_outer_grp = {'att_1': 'string_val',
                               'att_2': 1.2345,
                               'att_3': [1, 2, 3, 4],
                               'att_4': ['str_1', 'str_2', 'str_3']}
            outer_group = VirtualGroup('unindexed_outer_group')
            outer_group.attrs = attrs_outer_grp
            outer_group.add_children([inner_group, outer_dset])

            writer = HDFwriter(h5_f)
            h5_refs_list = writer.write(outer_group)

            # I don't know of a more elegant way to do this:
            [h5_outer_dset] = get_h5_obj_refs([outer_dset.name], h5_refs_list)
            [h5_inner_dset] = get_h5_obj_refs([inner_dset.name], h5_refs_list)
            [h5_outer_group] = get_h5_obj_refs([outer_group.name], h5_refs_list)
            [h5_inner_group] = get_h5_obj_refs(['indexed_inner_group_000'], h5_refs_list)

            self.assertIsInstance(h5_outer_dset, h5py.Dataset)
            self.assertIsInstance(h5_inner_dset, h5py.Dataset)
            self.assertIsInstance(h5_outer_group, h5py.Group)
            self.assertIsInstance(h5_inner_group, h5py.Group)

            # check assertions for the inner dataset first
            self.assertEqual(h5_inner_dset.parent, h5_inner_group)

            reg_ref = inner_dset_attrs.pop('labels')

            self.assertEqual(len(h5_inner_dset.attrs), len(inner_dset_attrs) + 1 + len(reg_ref))

            for key, expected_val in inner_dset_attrs.items():
                self.assertTrue(np.all(get_attr(h5_inner_dset, key) == expected_val))

            self.assertTrue(np.all([x in list(reg_ref.keys()) for x in get_attr(h5_inner_dset, 'labels')]))

            expected_data = [inner_dset_data[:None:2], inner_dset_data[1:None:2]]
            written_data = [h5_inner_dset[h5_inner_dset.attrs['even_rows']], h5_inner_dset[h5_inner_dset.attrs['odd_rows']]]

            for exp, act in zip(expected_data, written_data):
                self.assertTrue(np.allclose(exp, act))

            # check assertions for the inner data group next:
            self.assertEqual(h5_inner_group.parent, h5_outer_group)
            for key, expected_val in attrs_inner_grp.items():
                self.assertTrue(np.all(get_attr(h5_inner_group, key) == expected_val))

            # check the outer dataset next:
            self.assertEqual(h5_outer_dset.parent, h5_outer_group)

            reg_ref = outer_dset_attrs.pop('labels')

            self.assertEqual(len(h5_outer_dset.attrs), len(outer_dset_attrs) + 1 + len(reg_ref))

            for key, expected_val in outer_dset_attrs.items():
                self.assertTrue(np.all(get_attr(h5_outer_dset, key) == expected_val))

            self.assertTrue(np.all([x in list(reg_ref.keys()) for x in get_attr(h5_outer_dset, 'labels')]))

            expected_data = [outer_dset_data[:None:2], outer_dset_data[1:None:2]]
            written_data = [h5_outer_dset[h5_outer_dset.attrs['even_rows']],
                            h5_outer_dset[h5_outer_dset.attrs['odd_rows']]]

            for exp, act in zip(expected_data, written_data):
                self.assertTrue(np.allclose(exp, act))

            # Finally check the outer group:
            self.assertEqual(h5_outer_group.parent, h5_f)
            for key, expected_val in attrs_outer_grp.items():
                self.assertTrue(np.all(get_attr(h5_outer_group, key) == expected_val))

        os.remove(file_path)
    def test_legal_translation(self):
        data_name = 'TestDataType'
        attrs = {
            'att_1': 'string_val',
            'att_2': 1.2345,
            'att_3': [1, 2, 3, 4],
            'att_4': ['str_1', 'str_2', 'str_3']
        }

        extra_dsets = {'dset_1': np.random.rand(5), 'dset_2': np.arange(25)}

        file_path = 'test_numpy_translator.h5'
        self.__delete_existing_file(file_path)
        main_data = np.random.rand(15, 14)
        main_data_name = 'Test_Main'
        quantity = 'Current'
        units = 'nA'

        pos_sizes = [5, 3]
        pos_names = ['X', 'Y']
        pos_units = ['nm', 'um']
        pos_dims = []
        for name, unit, length in zip(pos_names, pos_units, pos_sizes):
            pos_dims.append(
                write_utils.Dimension(name, unit, np.arange(length)))
        pos_data = np.vstack((np.tile(np.arange(5),
                                      3), np.repeat(np.arange(3), 5))).T

        spec_sizes = [7, 2]
        spec_names = ['Bias', 'Cycle']
        spec_units = ['V', '']
        spec_dims = []
        for name, unit, length in zip(spec_names, spec_units, spec_sizes):
            spec_dims.append(
                write_utils.Dimension(name, unit, np.arange(length)))

        spec_data = np.vstack((np.tile(np.arange(7),
                                       2), np.repeat(np.arange(2), 7)))

        translator = NumpyTranslator()
        _ = translator.translate(file_path,
                                 data_name,
                                 main_data,
                                 quantity,
                                 units,
                                 pos_dims,
                                 spec_dims,
                                 parm_dict=attrs,
                                 extra_dsets=extra_dsets)

        with h5py.File(file_path, mode='r') as h5_f:
            # we are not interested in most of the attributes under root besides two:
            self.assertEqual(data_name, hdf_utils.get_attr(h5_f, 'data_type'))
            self.assertEqual('NumpyTranslator',
                             hdf_utils.get_attr(h5_f, 'translator'))

            # First level should have absolutely nothing besides one group
            self.assertEqual(len(h5_f.items()), 1)
            self.assertTrue('Measurement_000' in h5_f.keys())
            h5_meas_grp = h5_f['Measurement_000']
            self.assertIsInstance(h5_meas_grp, h5py.Group)

            # check the attributes under this group
            self.assertEqual(len(h5_meas_grp.attrs), len(attrs))
            for key, expected_val in attrs.items():
                self.assertTrue(
                    np.all(
                        hdf_utils.get_attr(h5_meas_grp, key) == expected_val))

            # Again, this group should only have one group - Channel_000
            self.assertEqual(len(h5_meas_grp.items()), 1)
            self.assertTrue('Channel_000' in h5_meas_grp.keys())
            h5_chan_grp = h5_meas_grp['Channel_000']
            self.assertIsInstance(h5_chan_grp, h5py.Group)

            # This channel group is not expected to have any attributes but it will contain the main dataset
            self.assertEqual(len(h5_chan_grp.items()), 5 + len(extra_dsets))
            for dset_name in [
                    'Raw_Data', 'Position_Indices', 'Position_Values',
                    'Spectroscopic_Indices', 'Spectroscopic_Values'
            ]:
                self.assertTrue(dset_name in h5_chan_grp.keys())
                h5_dset = h5_chan_grp[dset_name]
                self.assertIsInstance(h5_dset, h5py.Dataset)

            pycro_main = PycroDataset(h5_chan_grp['Raw_Data'])

            self.assertIsInstance(pycro_main, PycroDataset)
            self.assertEqual(pycro_main.name.split('/')[-1], 'Raw_Data')
            self.assertEqual(pycro_main.parent, h5_chan_grp)
            self.assertTrue(np.allclose(main_data, pycro_main[()]))

            self.__validate_aux_dset_pair(h5_chan_grp,
                                          pycro_main.h5_pos_inds,
                                          pycro_main.h5_pos_vals,
                                          pos_names,
                                          pos_units,
                                          pos_data,
                                          h5_main=pycro_main,
                                          is_spectral=False)

            self.__validate_aux_dset_pair(h5_chan_grp,
                                          pycro_main.h5_spec_inds,
                                          pycro_main.h5_spec_vals,
                                          spec_names,
                                          spec_units,
                                          spec_data,
                                          h5_main=pycro_main,
                                          is_spectral=True)

            # Now validate each of the extra datasets:
            for key, val in extra_dsets.items():
                self.assertTrue(key in h5_chan_grp.keys())
                h5_dset = h5_chan_grp[key]
                self.assertIsInstance(h5_dset, h5py.Dataset)
                self.assertTrue(np.allclose(val, h5_dset[()]))

        os.remove(file_path)