示例#1
0
def write_enum_datasets(f):

    data = np.arange(4)

    uint8_enum_type = h5py.enum_dtype({"RED": 0, "GREEN": 1, "BLUE": 2, "YELLOW": 3}, basetype=np.uint8)
    f.create_dataset("enum_uint8_data", data=data, dtype=uint8_enum_type)
    uint16_enum_type = h5py.enum_dtype({"RED": 0, "GREEN": 1, "BLUE": 2, "YELLOW": 3}, basetype=np.uint16)
    f.create_dataset("enum_uint16_data", data=data, dtype=uint16_enum_type)
    uint32_enum_type = h5py.enum_dtype({"RED": 0, "GREEN": 1, "BLUE": 2, "YELLOW": 3}, basetype=np.uint32)
    f.create_dataset("enum_uint32_data", data=data, dtype=uint32_enum_type)
    uint64_enum_type = h5py.enum_dtype({"RED": 0, "GREEN": 1, "BLUE": 2, "YELLOW": 3}, basetype=np.uint64)
    f.create_dataset("enum_uint64_data", data=data, dtype=uint64_enum_type)

    data = np.arange(4).reshape(2,2)

    uint8_enum_type = h5py.enum_dtype({"RED": 0, "GREEN": 1, "BLUE": 2, "YELLOW": 3}, basetype=np.uint8)
    f.create_dataset("2d_enum_uint8_data", data=data, dtype=uint8_enum_type)
    uint16_enum_type = h5py.enum_dtype({"RED": 0, "GREEN": 1, "BLUE": 2, "YELLOW": 3}, basetype=np.uint16)
    f.create_dataset("2d_enum_uint16_data", data=data, dtype=uint16_enum_type)
    uint32_enum_type = h5py.enum_dtype({"RED": 0, "GREEN": 1, "BLUE": 2, "YELLOW": 3}, basetype=np.uint32)
    f.create_dataset("2d_enum_uint32_data", data=data, dtype=uint32_enum_type)
    uint64_enum_type = h5py.enum_dtype({"RED": 0, "GREEN": 1, "BLUE": 2, "YELLOW": 3}, basetype=np.uint64)
    f.create_dataset("2d_enum_uint64_data", data=data, dtype=uint64_enum_type)

    f.flush()
    f.close()
示例#2
0
 def test_create(self):
     """ Enum datasets can be created and type correctly round-trips """
     dt = h5py.enum_dtype(self.EDICT, basetype='i')
     ds = self.f.create_dataset('x', (100, 100), dtype=dt)
     dt2 = ds.dtype
     dict2 = h5py.check_enum_dtype(dt2)
     self.assertEqual(dict2, self.EDICT)
示例#3
0
def write_spikes(filepath):
    population_names = ["default", "default2"]
    timestamps_base = (0.3, 0.1, 0.2, 1.3, 0.7)
    node_ids_base = (1, 2, 0, 0, 2)

    sorting_type = h5py.enum_dtype({"none": 0, "by_id": 1, "by_time": 2})

    with h5py.File(filepath, "w") as h5f:
        h5f.create_group("spikes")
        gpop_spikes = h5f.create_group("/spikes/" + population_names[0])
        gpop_spikes.attrs.create("sorting", data=2, dtype=sorting_type)
        timestamps, node_ids = zip(
            *sorted(zip(timestamps_base, node_ids_base)))
        gpop_spikes.create_dataset("timestamps",
                                   data=timestamps,
                                   dtype=np.double)
        gpop_spikes.create_dataset("node_ids", data=node_ids, dtype=np.uint64)

        gpop_spikes2 = h5f.create_group("/spikes/" + population_names[1])
        gpop_spikes2.attrs.create("sorting", data=1, dtype=sorting_type)
        node_ids, timestamps = zip(
            *sorted(zip(node_ids_base, timestamps_base)))
        gpop_spikes2.create_dataset("timestamps",
                                    data=timestamps,
                                    dtype=np.double)
        gpop_spikes2.create_dataset("node_ids", data=node_ids, dtype=np.uint64)
示例#4
0
    def test_compound_vlen(self):
        vidt = h5py.vlen_dtype(np.uint8)
        eidt = h5py.enum_dtype({'OFF': 0, 'ON': 1}, basetype=np.uint8)

        for np_align in (False, True):
            dt = np.dtype([('a', eidt), ('foo', vidt), ('bar', vidt),
                           ('switch', eidt)],
                          align=np_align)
            np_offsets = [dt.fields[i][1] for i in dt.names]

            for logical in (False, True):
                if logical and np_align:
                    # Vlen types have different size in the numpy struct
                    self.assertRaises(TypeError,
                                      h5py.h5t.py_create,
                                      dt,
                                      logical=logical)
                else:
                    ht = h5py.h5t.py_create(dt, logical=logical)
                    offsets = [
                        ht.get_member_offset(i)
                        for i in range(ht.get_nmembers())
                    ]
                    if np_align:
                        self.assertEqual(np_offsets, offsets)
示例#5
0
def write_spikes(filepath):
    population_names = ['default', 'default2']
    timestamps_base = (0.3, 0.1, 0.2, 1.3, 0.7)
    node_ids_base = (1, 2, 0, 0, 2)

    sorting_type = h5py.enum_dtype({"none": 0, "by_id": 1, "by_time": 2})

    with h5py.File(filepath, 'w') as h5f:
        h5f.create_group('spikes')
        gpop_spikes = h5f.create_group('/spikes/' + population_names[0])
        gpop_spikes.attrs.create('sorting', data=2, dtype=sorting_type)
        timestamps, node_ids = zip(
            *sorted(zip(timestamps_base, node_ids_base)))
        gpop_spikes.create_dataset('timestamps',
                                   data=timestamps,
                                   dtype=np.double)
        gpop_spikes.create_dataset('node_ids', data=node_ids, dtype=np.uint64)

        gpop_spikes2 = h5f.create_group('/spikes/' + population_names[1])
        gpop_spikes2.attrs.create('sorting', data=1, dtype=sorting_type)
        node_ids, timestamps = zip(
            *sorted(zip(node_ids_base, timestamps_base)))
        gpop_spikes2.create_dataset('timestamps',
                                    data=timestamps,
                                    dtype=np.double)
        gpop_spikes2.create_dataset('node_ids', data=node_ids, dtype=np.uint64)
示例#6
0
def main():
    with h5py.File(file_path, 'w') as f:
        dataset = f.create_dataset('/group/dataset', shape=(3, 4), dtype='i')

        dataset[:] = [[1, 2, 3, 4], [5, 6, 7, 8], [9, 10, 11, 12]]

        dataset.attrs['double'] = math.pi

        hello = '早上好!'
        dataset.attrs['string-vlen'] = hello

        hello_utf8 = hello.encode('utf-8')
        hello_ascii = 'Hello, world!'

        dataset.attrs.create('string-ascii', hello_ascii, None,
                             '<S{0}'.format(len(hello_ascii)))

        utf8_type = h5py.string_dtype('utf-8', len(hello_utf8))
        # HDFView can not display the value of this attribute correctly, ViTables can.
        dataset.attrs.create('string', hello_utf8, None, utf8_type)

        dataset.attrs['boolean'] = True

        color_dt = h5py.enum_dtype({
            "RED": 0,
            "GREEN": 1,
            "BLUE": 42
        },
                                   basetype='i')
        dataset.attrs.create('color', 42, dtype=color_dt)
示例#7
0
 def test_readwrite(self):
     """ Enum datasets can be read/written as integers """
     dt = h5py.enum_dtype(self.EDICT, basetype='i4')
     ds = self.f.create_dataset('x', (100, 100), dtype=dt)
     ds[35, 37] = 42
     ds[1, :] = 1
     self.assertEqual(ds[35, 37], 42)
     self.assertArrayEqual(ds[1, :], np.array((1,)*100, dtype='i4'))
示例#8
0
    def write_simple_attributes(self, group_object):
        # this is for all the types that can be attributes of a group or dataset in HDF5
        # these simple hdf5 attributes can't have subgroups or datasets

        # iterate through all the key/values in _attributes and write to hdf5
        # if a value is an enum then translate to the correct Enum class
        # if a value is a date, time - convert to character string per S100, section 10C-7 table 10C-1
        # otherwise write as a simple attribute and simple type
        self._hdf5_path = group_object.name

        for key, val in self._attributes.items():
            if isinstance(val, s1xx_sequence_types):
                continue  # skip these types for now
            elif isinstance(val, S1xxWritesGroupObjects):
                continue  # skip these types for now
            elif isinstance(val, S1xxObject):
                continue  # skip these types for now
            elif isinstance(val,
                            (datetime.date, datetime.datetime, datetime.time)):
                logging.debug(key + " datetime: {}", val)
                group_object.attrs[key] = val.isoformat()
            elif isinstance(val, Enum):
                logging.debug(key + " enumeration: " + str(val))
                enum_as_dict = collections.OrderedDict(
                    [[item.name, item.value] for item in type(val)])
                int_type = numpy.uint8
                try:  # enum_dtype is added in h5py 2.10
                    enumtype = h5py.enum_dtype(enum_as_dict, int_type)
                except AttributeError:  # special_dtype is for h5py <= 2.9
                    enumtype = h5py.special_dtype(enum=(int_type,
                                                        enum_as_dict))
                try:
                    group_object.attrs.create(key, val.value, dtype=enumtype)
                except TypeError:  # h5py isn't accepting OrderedDict, convert to dict
                    try:
                        enumtype = h5py.enum_dtype(dict(enum_as_dict),
                                                   int_type)
                    except AttributeError:
                        enumtype = h5py.special_dtype(
                            enum=(int_type, dict(enum_as_dict)))
                    group_object.attrs.create(key, val.value, dtype=enumtype)

            else:
                logging.debug(key + " simple type: " + str(val))
                group_object.attrs[key] = val
示例#9
0
def write_params(output_path, pop_params_dict):

    output_pop_parameters = {}
    param_key_list = []
    for population in pop_params_dict:
        this_pop_output_parameters = {}
        for gid in pop_params_dict[population]:
            this_gid_param_dicts = pop_params_dict[population][gid]
            this_output_params = {}
            for pd in this_gid_param_dicts:
                param_key = f'{pd["population"]}.{pd["source"]}.{pd["sec_type"]}.{pd["syn_name"]}.{pd["param_path"]}'
                param_val = pd["param_val"]
                param_key_list.append(param_key)
                this_output_params[param_key] = param_val
            this_pop_output_parameters[f'{gid}'] = this_output_params
        output_pop_parameters[population] = this_pop_output_parameters

    param_keys = set(param_key_list)

    output_file = h5py.File(output_path, 'a')

    param_mapping = {name: idx for (idx, name) in enumerate(param_keys)}

    parameters_grp = h5_get_group(output_file, 'Parameters')
    if 'parameters_type' not in parameters_grp:
        dt = h5py.enum_dtype(param_mapping, basetype=np.uint16)
        parameters_grp['parameter_enum'] = dt
        dt = np.dtype([("parameter", parameters_grp['parameter_enum']),
                       ("value", np.float32)])
        parameters_grp['parameters_type'] = dt
    for population in output_pop_parameters:
        pop_grp = h5_get_group(parameters_grp, population)
        this_pop_output_parameters = output_pop_parameters[population]
        for id_str in this_pop_output_parameters:
            this_output_params = this_pop_output_parameters[id_str]
            dset = h5_get_dataset(
                pop_grp,
                id_str,
                maxshape=(len(this_output_params), ),
                dtype=parameters_grp['parameters_type'].dtype)
            dset.resize((len(this_output_params), ))
            a = np.zeros(len(this_output_params),
                         dtype=parameters_grp['parameters_type'].dtype)
            for idx, (parm, val) in enumerate(viewitems(this_output_params)):
                a[idx]["parameter"] = param_mapping[parm]
                a[idx]["value"] = val
            dset[:] = a

    output_file.close()
示例#10
0
 def write_group(self, dict, hf_group):  
     for key, value in dict.items():
         if isinstance(value, str):
             hf_group.attrs[key] = bytes(value, encoding="ascii")
         elif isinstance(value, (float, bool, int)): 
             hf_group.attrs[key] = value                
         elif (isinstance(value,list)):
             hf_group.create_dataset(key, data=value)
         elif type(value) is type(None):
             hf_group.attrs[key] = np.empty  
         elif (value.__module__ == "alt_core" and "__members__" in dir (value)):
             dt = h5py.enum_dtype({key: item.__int__() for key, item in type(value).__members__.items()}, basetype='i')
             hf_group.attrs.create(key, value.__int__(), dtype=dt)
         else:
             pass 
示例#11
0
    def test_vlen_enum(self):
        fname = self.mktemp()
        arr1 = [[1], [1, 2]]
        dt1 = h5py.vlen_dtype(h5py.enum_dtype(dict(foo=1, bar=2), 'i'))

        with h5py.File(fname, 'w') as f:
            df1 = f.create_dataset('test', (len(arr1), ), dtype=dt1)
            df1[:] = np.array(arr1)

        with h5py.File(fname, 'r') as f:
            df2 = f['test']
            dt2 = df2.dtype
            arr2 = [e.tolist() for e in df2[:]]

        self.assertEqual(arr1, arr2)
        self.assertEqual(h5py.check_enum_dtype(h5py.check_vlen_dtype(dt1)),
                         h5py.check_enum_dtype(h5py.check_vlen_dtype(dt2)))
示例#12
0
    def test_vlen_enum(self):
        fname = self.mktemp()
        arr1 = [[1],[1,2]]
        dt1 = h5py.vlen_dtype(h5py.enum_dtype(dict(foo=1, bar=2), 'i'))

        with h5py.File(fname,'w') as f:
            df1 = f.create_dataset('test', (len(arr1),), dtype=dt1)
            df1[:] = np.array(arr1)

        with h5py.File(fname,'r') as f:
            df2  = f['test']
            dt2  = df2.dtype
            arr2 = [e.tolist() for e in df2[:]]

        self.assertEqual(arr1, arr2)
        self.assertEqual(h5py.check_enum_dtype(h5py.check_vlen_dtype(dt1)),
                         h5py.check_enum_dtype(h5py.check_vlen_dtype(dt2)))
示例#13
0
def write_spikes(filepath):
    population_names = ['All', 'spikes1', 'spikes2', 'empty']
    timestamps_base = (0.3, 0.1, 0.2, 1.3, 0.7)
    node_ids_base = (3, 5, 2, 3, 2)

    sorting_type = h5py.enum_dtype({"none": 0, "by_id": 1, "by_time": 2})
    string_dtype = h5py.special_dtype(vlen=get_vlen_str_type())

    with h5py.File(filepath, 'w') as h5f:
        root = h5f.create_group('spikes')
        gpop_all = h5f.create_group('/spikes/' + population_names[0])
        gpop_all.attrs.create('sorting', data=2, dtype=sorting_type)
        timestamps, node_ids = zip(
            *sorted(zip(timestamps_base, node_ids_base)))
        set = gpop_all.create_dataset('timestamps',
                                      data=timestamps,
                                      dtype=np.double)
        gpop_all.create_dataset('node_ids', data=node_ids, dtype=np.uint64)

        gpop_spikes1 = h5f.create_group('/spikes/' + population_names[1])
        gpop_spikes1.attrs.create('sorting', data=1, dtype=sorting_type)
        node_ids, timestamps = zip(
            *sorted(zip(node_ids_base, timestamps_base)))
        gpop_spikes1.create_dataset('timestamps',
                                    data=timestamps,
                                    dtype=np.double)
        gpop_spikes1.create_dataset('node_ids', data=node_ids, dtype=np.uint64)

        gpop_spikes2 = h5f.create_group('/spikes/' + population_names[2])
        gpop_spikes2.attrs.create('sorting', data=0, dtype=sorting_type)
        dtimestamps = gpop_spikes2.create_dataset('timestamps',
                                                  data=timestamps_base,
                                                  dtype=np.double)
        dtimestamps.attrs.create('units', data="ms", dtype=string_dtype)
        gpop_spikes2.create_dataset('node_ids',
                                    data=node_ids_base,
                                    dtype=np.uint64)

        gpop_empty = h5f.create_group('/spikes/' + population_names[3])
        gpop_empty.attrs.create('sorting', data=1, dtype=sorting_type)
        dtimestamps = gpop_empty.create_dataset('timestamps',
                                                data=[],
                                                dtype=np.double)
        gpop_empty.create_dataset('node_ids', data=[], dtype=np.uint64)
示例#14
0
 def save_to_h5(self, hf):
     for k, v in self._tokens.items():
         dtype = v['dtype']
         data = v['data']
         if dtype == 'int':
             hf.create_dataset(k, data=data)
         elif dtype == 'enum':
             mapping = dict((k, i) for i, k in enumerate(set(data)))
             assert len(mapping) <= 0xff
             dt = h5py.enum_dtype(mapping, basetype=np.uint8)
             hf.create_dataset(k, dtype=dt, data=[mapping[x] for x in data])
         elif dtype == 'str':
             dt = h5py.string_dtype(encoding='utf8')
             strs = ['' if x is None else str(x) for x in data]
             hf.create_dataset(k,
                               dtype=dt,
                               data=[s.encode("utf8") for s in strs],
                               compression='lzf')
         else:
             raise ValueError(dtype)
示例#15
0
    def test_compound_vlen_enum(self):
        eidt = h5py.enum_dtype({'OFF': 0, 'ON': 1}, basetype=np.uint8)
        vidt = h5py.vlen_dtype(np.uint8)
        def a(items):
            return np.array(items, dtype=np.uint8)

        f = self.f

        dt_vve = np.dtype([
            ('foo', vidt),
            ('bar', vidt),
            ('switch', eidt)])
        vve = f.create_dataset('dt_vve', shape=(2,), dtype=dt_vve)
        data = np.array([(a([1,2,3]), a([1,2]),   1),
                         (a([]),      a([2,4,6]), 0),],
                         dtype=dt_vve)
        vve[:] = data
        actual = vve[:]
        self.assertVlenArrayEqual(data['foo'], actual['foo'])
        self.assertVlenArrayEqual(data['bar'], actual['bar'])
        self.assertArrayEqual(data['switch'], actual['switch'])
示例#16
0
    def test_compound_vlen_enum(self):
        eidt = h5py.enum_dtype({'OFF': 0, 'ON': 1}, basetype=np.uint8)
        vidt = h5py.vlen_dtype(np.uint8)

        def a(items):
            return np.array(items, dtype=np.uint8)

        f = self.f

        dt_vve = np.dtype([('foo', vidt), ('bar', vidt), ('switch', eidt)])
        vve = f.create_dataset('dt_vve', shape=(2, ), dtype=dt_vve)
        data = np.array([
            (a([1, 2, 3]), a([1, 2]), 1),
            (a([]), a([2, 4, 6]), 0),
        ],
                        dtype=dt_vve)
        vve[:] = data
        actual = vve[:]
        self.assertVlenArrayEqual(data['foo'], actual['foo'])
        self.assertVlenArrayEqual(data['bar'], actual['bar'])
        self.assertArrayEqual(data['switch'], actual['switch'])
示例#17
0
    def test_compound_vlen(self):
        vidt = h5py.vlen_dtype(np.uint8)
        eidt = h5py.enum_dtype({'OFF': 0, 'ON': 1}, basetype=np.uint8)

        for np_align in (False, True):
            dt = np.dtype([
                ('a', eidt),
                ('foo', vidt),
                ('bar', vidt),
                ('switch', eidt)], align=np_align)
            np_offsets = [dt.fields[i][1] for i in dt.names]

            for logical in (False, True):
                if logical and np_align:
                    # Vlen types have different size in the numpy struct
                    self.assertRaises(TypeError, h5py.h5t.py_create, dt,
                            logical=logical)
                else:
                    ht = h5py.h5t.py_create(dt, logical=logical)
                    offsets = [ht.get_member_offset(i)
                               for i in range(ht.get_nmembers())]
                    if np_align:
                        self.assertEqual(np_offsets, offsets)
示例#18
0
def write_samples(group, dataset, class_dict, class_index, image_shape, chunk_size, num_chunks, compression,
                  compression_opts):

    sampler = SamplerFactory().get(
        class_idxs=class_index,
        batch_size=chunk_size,
        n_batches=num_chunks,
        alpha=0.5,
        kind='fixed'
    )

    len = chunk_size * num_chunks

    group.create_dataset('image',
                         shape=(len, *dataset.image_shape),
                         chunks=(chunk_size, *dataset.image_shape),
                         dtype=dataset.image_dtype,
                         compression=compression,
                         compression_opts=compression_opts,
                         shuffle=False
                         )
    group.create_dataset('label',
                         shape=(len,),
                         chunks=(chunk_size,),
                         dtype=h5.enum_dtype(class_dict, basetype=np.int64),
                         compression=compression,
                         compression_opts=compression_opts,
                         shuffle=False
                         )

    with Progress() as p:
        task = p.add_task(description=f'[red] writing {group.name}', total=num_chunks)
        for i, (image, cls) in enumerate(DataLoader(dataset, batch_sampler=sampler, num_workers=0)):
            offset = i * chunk_size
            group['image'][offset:offset + chunk_size] = image.numpy()
            group['label'][offset:offset + chunk_size] = cls.numpy()
            p.update(task, total=num_chunks, advance=1)
示例#19
0
def h5_init_types(f,
                  opt_id,
                  feature_dtypes,
                  constraint_names,
                  param_names,
                  problem_parameters,
                  spec,
                  metadata=None):

    opt_grp = h5_get_group(f, opt_id)

    param_keys = set(param_names)
    param_keys.update(problem_parameters.keys())
    # create an HDF5 enumerated type for the parameter label
    param_mapping = {name: idx for (idx, name) in enumerate(param_keys)}

    feature_keys = None
    if feature_dtypes is not None:
        feature_keys = [feature_dtype[0] for feature_dtype in feature_dtypes]

    # create HDF5 types for features, if any
    feature_mapping = None
    if feature_keys is not None:
        feature_mapping = {
            name: idx
            for (idx, name) in enumerate(feature_keys)
        }

    constraint_mapping = None
    if constraint_names is not None:
        constraint_mapping = {
            name: idx
            for (idx, name) in enumerate(constraint_names)
        }

    objective_names = ['y']
    objective_mapping = {
        name: idx
        for (idx, name) in enumerate(objective_names)
    }
    dt = h5py.enum_dtype(objective_mapping, basetype=np.uint16)
    opt_grp['objective_enum'] = dt
    dt = np.dtype({'names': objective_names, 'formats': [np.float32]})
    opt_grp['objective_type'] = dt
    dt = np.dtype([("objective", opt_grp['objective_enum'])])
    opt_grp['objective_spec_type'] = dt
    dset = h5_get_dataset(opt_grp,
                          'objective_spec',
                          maxshape=(len(objective_names), ),
                          dtype=opt_grp['objective_spec_type'].dtype)
    dset.resize((len(objective_names), ))
    a = np.zeros(len(objective_names),
                 dtype=opt_grp['objective_spec_type'].dtype)
    for idx, parm in enumerate(objective_names):
        a[idx]["objective"] = objective_mapping[parm]
    dset[:] = a

    if feature_mapping is not None:
        dt = h5py.enum_dtype(feature_mapping, basetype=np.uint16)
        opt_grp['feature_enum'] = dt

        dt = np.dtype([("feature", opt_grp['feature_enum'])])
        opt_grp['feature_spec_type'] = dt

        dt = np.dtype(feature_dtypes)
        opt_grp['feature_type'] = dt

        dset = h5_get_dataset(opt_grp,
                              'feature_spec',
                              maxshape=(len(feature_keys), ),
                              dtype=opt_grp['feature_spec_type'].dtype)
        dset.resize((len(feature_keys), ))
        a = np.zeros(len(feature_keys),
                     dtype=opt_grp['feature_spec_type'].dtype)
        for idx, parm in enumerate(feature_keys):
            a[idx]["feature"] = feature_mapping[parm]
        dset[:] = a

    if constraint_mapping is not None:
        dt = h5py.enum_dtype(constraint_mapping, basetype=np.uint16)
        opt_grp['constraint_enum'] = dt

        dt = np.dtype([("constraint", opt_grp['constraint_enum'])])
        opt_grp['constraint_spec_type'] = dt

        dt = np.dtype({'names': constraint_names, 'formats': [np.int8]})
        opt_grp['constraint_type'] = dt

        dset = h5_get_dataset(opt_grp,
                              'constraint_spec',
                              maxshape=(len(constraint_names), ),
                              dtype=opt_grp['constraint_spec_type'].dtype)
        dset.resize((len(constraint_names), ))
        a = np.zeros(len(constraint_names),
                     dtype=opt_grp['constraint_spec_type'].dtype)
        for idx, parm in enumerate(constraint_names):
            a[idx]["constraint"] = constraint_mapping[parm]
        dset[:] = a

    dt = h5py.enum_dtype(param_mapping, basetype=np.uint16)
    opt_grp['parameter_enum'] = dt

    dt = np.dtype([("parameter", opt_grp['parameter_enum']),
                   ("value", np.float32)])
    opt_grp['problem_parameters_type'] = dt

    dset = h5_get_dataset(opt_grp,
                          'problem_parameters',
                          maxshape=(len(param_mapping), ),
                          dtype=opt_grp['problem_parameters_type'].dtype)
    dset.resize((len(param_mapping), ))
    a = np.zeros(len(param_mapping),
                 dtype=opt_grp['problem_parameters_type'].dtype)
    idx = 0
    for idx, (parm, val) in enumerate(problem_parameters.items()):
        a[idx]["parameter"] = param_mapping[parm]
        a[idx]["value"] = val
    dset[:] = a

    dt = np.dtype([("parameter", opt_grp['parameter_enum']),
                   ("is_integer", np.bool), ("lower", np.float32),
                   ("upper", np.float32)])
    opt_grp['parameter_spec_type'] = dt

    is_integer = np.asarray(spec.is_integer_variable, dtype=np.bool)
    upper = np.asarray(spec.upper, dtype=np.float32)
    lower = np.asarray(spec.lower, dtype=np.float32)

    dset = h5_get_dataset(opt_grp,
                          'parameter_spec',
                          maxshape=(len(param_names), ),
                          dtype=opt_grp['parameter_spec_type'].dtype)
    dset.resize((len(param_names), ))
    a = np.zeros(len(param_names), dtype=opt_grp['parameter_spec_type'].dtype)
    for idx, (parm, is_int, hi,
              lo) in enumerate(zip(param_names, is_integer, upper, lower)):
        a[idx]["parameter"] = param_mapping[parm]
        a[idx]["is_integer"] = is_int
        a[idx]["lower"] = lo
        a[idx]["upper"] = hi
    dset[:] = a

    dt = np.dtype({
        'names': param_names,
        'formats': [np.float32] * len(param_names)
    })
    opt_grp['parameter_space_type'] = dt
示例#20
0
import os
import h5py
from pytz import timezone
from astral import LocationInfo

PROJECT_PATH = os.path.join(os.path.dirname(os.path.realpath(__file__)),
                            "../..")
RAW_DATA_PATH = os.path.join(PROJECT_PATH, "data/raw/davos")
DATASET_PATH = os.path.join(PROJECT_PATH, "data/datasets")
MODELS_PATH = os.path.join(PROJECT_PATH, "pretrained_models")
AVAILABLE_MODELS_FILE = os.path.join(PROJECT_PATH,
                                     "cloudseg/inference/models.yaml")

TIMESTAMP_FORMAT_DAY = "%Y%m%d"
TIMESTAMP_FORMAT_MINUTE = "%Y%m%d%H%M"
TIMESTAMP_FORMAT = "%Y%m%d%H%M%S"
PRETTY_FORMAT = "%d.%m.%Y %H:%M:%S"

LOCATION = LocationInfo("Davos", "Switzerland", "Europe/Zurich", 46.813492,
                        9.844433)
TIMEZONE = timezone("Europe/Zurich")

LABEL_DATATYPE = h5py.enum_dtype({
    "CLOUD": 0,
    "SKY": 1,
    "MASK": -1
},
                                 basetype="i")
示例#21
0
.hdf5 file containing two datasets.
"""

from pathlib import Path
import sys

import h5py
import numpy as np

# Create enumerated type for labels
labels = [
    "O", "B-MISC", "I-MISC", "B-PER", "I-PER", "B-ORG", "I-ORG", "B-LOC",
    "I-LOC"
]
label_dict = {l: i for i, l in enumerate(labels)}
label_enum = h5py.enum_dtype(label_dict, basetype='i')

label_map = np.vectorize(label_dict.get)

DATASETS = ["testa", "testb", "train"]

print(f"Will convert {DATASETS} to hdf5")
for ds in DATASETS:
    print(f"Processing {ds}")
    representation = f"representation.{ds}.npy"
    true_labels = f"true_labels.{ds}.npy"
    out = f"{ds}.hdf5"

    with h5py.File(out, "w") as f:
        f.create_dataset("representation", data=np.load(representation))
        f.create_dataset("true_labels",
示例#22
0
 def water_level_trend_dtype(self) -> Type[WaterLevelTrend]:
     """Define array datatype"""
     return h5py.enum_dtype(
         dict([(water_level_trend.name, water_level_trend.value)
               for water_level_trend in WaterLevelTrend]))
示例#23
0
def write_compound_datasets(f):

    utf8 = h5py.special_dtype(vlen=str)
    gender_enum_dtype = h5py.enum_dtype({"MALE": 0, "FEMALE": 1}, basetype=np.uint8)
    dt = np.dtype([
        ('firstName', utf8), # variable lentgh utf8
        ('surname', 'S20'), # fixed length ASCII
        ('gender', gender_enum_dtype), # enum type
        ('age', np.uint8), # uint
        ('fav_number', np.float32), # float
        ('vector', np.float32, (3,))]) # array

    data = np.zeros(4, dtype=dt)

    # Set the example data
    data[0] = ('Bob', 'Smith', 0, 32, 1.0, [1, 2, 3])
    data[1] = ('Peter', 'Fletcher', 0, 43, 2.0, [16.2, 2.2, -32.4])
    data[2] = ('James', 'Mudd', 0, 12, 3.0, [-32.1,-774.1,-3.0])
    data[3] = ('Ellie', 'Kyle', 1, 22, 4.0, [2.1,74.1,-3.8])

    f.create_dataset('contiguous_compound', data=data)
    f.create_dataset('chunked_compound', data=data, chunks=(1,), compression="gzip")

    # 2d compound use img number example
    imgdt = np.dtype([
        ('real', np.float32),
        ('img', np.float32)
    ])
    data = np.zeros((3, 3), dtype=imgdt)
    data[0][0] = (2.3, -7.3)
    data[0][1] = (12.3, -17.3)
    data[0][2] = (-32.3, -0.3)
    data[1][0] = (2.3, -7.3)
    data[1][1] = (12.3, -17.3)
    data[1][2] = (-32.3, -0.3)
    data[2][0] = (2.3, -7.3)
    data[2][1] = (12.3, -17.3)
    data[2][2] = (-32.3, -0.3)

    f.create_dataset('2d_contiguous_compound', data=data)
    f.create_dataset('2d_chunked_compound', data=data, chunks=(1,2), compression="gzip")

    # Compound dataset containing ragged arrays
    uint8_vlen_type = h5py.vlen_dtype(np.uint8)
    compound_vlen_dtype = np.dtype([
        ('one', uint8_vlen_type),
        ('two', uint8_vlen_type)
    ])
    data = np.zeros(3, dtype=compound_vlen_dtype)
    data[0] = (np.array([1]), np.array([2]))
    data[1] = (np.array([1,1]), np.array([2,2]))
    data[2] = (np.array([1,1,1]), np.array([2,2,2]))

    f.create_dataset('vlen_contiguous_compound', data=data, dtype=compound_vlen_dtype)
    f.create_dataset('vlen_chunked_compound', data=data, dtype=compound_vlen_dtype, chunks=(1,), compression="gzip")

    # Compound dataset arrays of vlen type
    compound_vlen_dtype = np.dtype([
        ('name', utf8, 2)
    ])
    pointData = np.zeros(2, dtype=utf8)
    pointData[0] = "James"
    pointData[1] = "Ellie"
    data = np.zeros(1, dtype=compound_vlen_dtype)
    data['name'] = np.array(pointData)

    f.create_dataset('array_vlen_contiguous_compound', data=data, dtype=compound_vlen_dtype)
    f.create_dataset('array_vlen_chunked_compound', data=data, dtype=compound_vlen_dtype, chunks=(1,), compression="gzip")

    # Nested compound datasets use 2 img numbers as an example
    nested_dt = np.dtype([
        ('firstNumber', imgdt),
        ('secondNumber', imgdt),
    ])

    data = np.zeros(3, dtype=nested_dt)
    data[1] = ((1,1), (1,1))
    data[2] = ((2,2), (2,2))
    f.create_dataset('nested_contiguous_compound', data=data, dtype=nested_dt)
    f.create_dataset('nested_chunked_compound', data=data, dtype=nested_dt, chunks=(2,), compression="gzip")

    f.flush()
    f.close()
示例#24
0
def main():
    with h5py.File(file_path, 'w') as file:
        boolean_type = h5py.enum_dtype({"False": 0, "True": 1}, basetype='i')
        file.attrs.create('boolean', 0, None, boolean_type)