def __read_dataset_and_convert_to_h5data(k, v, data_attrs, dflt_ax_unit, timestamp, run_attrs): ax_label, ax_off, g_spacing, ax_pos, unitsi = \ data_attrs.pop('axisLabels'), data_attrs.pop('gridGlobalOffset', 0), \ data_attrs.pop('gridSpacing'), data_attrs.pop('position', 0), data_attrs.pop( 'unitSI', 1.) ax_min = (ax_off + ax_pos * g_spacing) * unitsi ax_max = ax_min + v.shape * g_spacing * unitsi # prepare the axes data axes = [] for aln, an, amax, amin, anp in zip(ax_label, ax_label, ax_max, ax_min, v.shape): ax_attrs = { 'LONG_NAME': aln.decode('utf-8'), 'NAME': an.decode('utf-8'), 'UNITS': dflt_ax_unit } data_axis = DataAxis(amin, amax, anp, attrs=ax_attrs) axes.append(data_axis) return H5Data(v[()], timestamp=timestamp, data_attrs=data_attrs, run_attrs=run_attrs, axes=axes)
def read_zdf(filename, path=None): """ HDF reader for Osiris/Visxd compatible HDF files. Returns: H5Data object. """ fname = filename if not path else path + '/' + filename data, info = zdf.read(filename) run_attrs, data_attrs = {}, {} nx = list(reversed(info.grid.nx)) axes = [ DataAxis(ax.min, ax.max, nx[i], attrs={ 'LONG_NAME': ax.label, 'NAME': ax.label.replace('_', ''), 'UNITS': OSUnits(ax.units) }) for i, ax in enumerate(reversed(info.grid.axis)) ] timestamp = fn_rule.findall(os.path.basename(filename))[0] run_attrs['NX'] = info.grid.nx run_attrs['TIME UNITS'] = OSUnits(info.iteration.tunits) run_attrs['TIME'] = np.array([info.iteration.t]) run_attrs['TIMESTAMP'] = timestamp data_attrs['LONG_NAME'] = info.grid.label data_attrs['NAME'] = info.grid.label.replace('_', '') data_attrs['UNITS'] = OSUnits(info.grid.units) return H5Data(data, timestamp=timestamp, data_attrs=data_attrs, run_attrs=run_attrs, axes=axes)
def read_h5(filename, path=None, axis_name="AXIS/AXIS"): """ HDF reader for Osiris/Visxd compatible HDF files... This will slurp in the data and the attributes that describe the data (e.g. title, units, scale). Usage: diag_data = read_hdf('e1-000006.h5') # diag_data is a subclass of numpy.ndarray with extra attributes print(diag_data) # print the meta data print(diag_data.view(numpy.ndarray)) # print the raw data print(diag_data.shape) # prints the dimension of the raw data print(diag_data.run_attrs['TIME']) # prints the simulation time associated with the hdf5 file diag_data.data_attrs['UNITS'] # print units of the dataset points list(diag_data.data_attrs) # lists all attributes related to the data array list(diag_data.run_attrs) # lists all attributes related to the run print(diag_data.axes[0].attrs['UNITS']) # prints units of X-axis list(diag_data.axes[0].attrs) # lists all variables of the X-axis diag_data[slice(3)] print(rw.view(np.ndarray)) We will convert all byte strings stored in the h5 file to strings which are easier to deal with when writing codes see also write_h5() function in this file """ fname = filename if not path else path + '/' + filename data_file = h5py.File(fname, 'r') n_data = scan_hdf5_file_for_main_data_array(data_file) timestamp, name, run_attrs, data_attrs, axes, data_bundle= '', '', {}, {}, [], [] try: timestamp = fn_rule.findall(os.path.basename(filename))[0] except IndexError: timestamp = '000000' axis_number = 1 while True: try: # try to open up another AXIS object in the HDF's attribute directory # (they are named /AXIS/AXIS1, /AXIS/AXIS2, /AXIS/AXIS3 ...) axis_to_look_for = axis_name + str(axis_number) axis = data_file[axis_to_look_for] # convert byte string attributes to string attrs = {} for k, v in axis.attrs.items(): try: attrs[k] = v[0].decode('utf-8') if isinstance(v[0], bytes) else v except IndexError: attrs[k] = v.decode('utf-8') if isinstance(v, bytes) else v axis_min = axis[0] axis_max = axis[-1] axis_numberpoints = n_data[0].shape[-axis_number] data_axis = DataAxis(axis_min, axis_max, axis_numberpoints, attrs=attrs) axes.insert(0, data_axis) except KeyError: break axis_number += 1 # we need a loop here primarily (I think) for n_ene_bin phasespace data for the_data_hdf_object in n_data: name = the_data_hdf_object.name[1:] # ignore the beginning '/' # now read in attributes of the ROOT of the hdf5.. # there's lots of good info there. strip out the array if value is a string for key, value in data_file.attrs.items(): try: run_attrs[key] = value[0].decode('utf-8') if isinstance( value[0], bytes) else value except IndexError: run_attrs[key] = value.decode('utf-8') if isinstance( value, bytes) else value # attach attributes assigned to the data array to # the H5Data.data_attrs object, remove trivial dimension before assignment for key, value in the_data_hdf_object.attrs.items(): try: data_attrs[key] = value[0].decode('utf-8') if isinstance( value[0], bytes) else value except IndexError: data_attrs[key] = value.decode('utf-8') if isinstance( value, bytes) else value # convert unit string to osunit object try: data_attrs['UNITS'] = OSUnits(data_attrs['UNITS']) except KeyError: data_attrs['UNITS'] = OSUnits('a.u.') data_attrs['NAME'] = name # data_bundle.data = the_data_hdf_object[()] data_bundle.append( H5Data(the_data_hdf_object, timestamp=timestamp, data_attrs=data_attrs, run_attrs=run_attrs, axes=axes)) data_file.close() if len(data_bundle) == 1: return data_bundle[0] else: return data_bundle
def read_h5_openpmd(filename, path=None): """ HDF reader for OpenPMD compatible HDF files... This will slurp in the data and the attributes that describe the data (e.g. title, units, scale). Usage: diag_data = read_hdf_openpmd('EandB000006.h5') # diag_data is a subclass of numpy.ndarray with extra attributes print(diag_data) # print the meta data print(diag_data.view(numpy.ndarray)) # print the raw data print(diag_data.shape) # prints the dimension of the raw data print(diag_data.run_attrs['TIME']) # prints the simulation time associated with the hdf5 file diag_data.data_attrs['UNITS'] # print units of the dataset points list(diag_data.data_attrs) # lists all attributes related to the data array list(diag_data.run_attrs) # lists all attributes related to the run print(diag_data.axes[0].attrs['UNITS']) # prints units of X-axis list(diag_data.axes[0].attrs) # lists all variables of the X-axis diag_data[slice(3)] print(rw.view(np.ndarray)) We will convert all byte strings stored in the h5 file to strings which are easier to deal with when writing codes see also write_h5() function in this file """ fname = filename if not path else path + '/' + filename with h5py.File(fname, 'r') as data_file: try: timestamp = fn_rule.findall(os.path.basename(filename))[0] except IndexError: timestamp = '00000000' basePath = data_file.attrs['basePath'].decode('utf-8').replace( '%T', timestamp) meshPath = basePath + data_file.attrs['meshesPath'].decode('utf-8') run_attrs = { k.upper(): v for k, v in data_file[basePath].attrs.items() } run_attrs.setdefault('TIME UNITS', r'1 / \omega_p') # read field data lname_dict, fldl = { 'E1': 'E_x', 'E2': 'E_y', 'E3': 'E_z', 'B1': 'B_x', 'B2': 'B_y', 'B3': 'B_z', 'jx': 'J_x', 'jy': 'J_y', 'jz': 'J_z', 'rho': r'\roh' }, {} # k is the field label and v is the field dataset for k, v in data_file[meshPath].items(): # openPMD doesn't enforce attrs that are required in OSIRIS dataset data_attrs, dflt_ax_unit = \ {'UNITS': OSUnits(r'm_e c \omega_p e^{-1} '), 'LONG_NAME': lname_dict.get(k, k), 'NAME': k}, r'c \omega_p^{-1}' data_attrs.update({ia: va for ia, va in v.attrs.items()}) ax_label, ax_off, g_spacing, ax_pos, unitsi = \ data_attrs.pop('axisLabels'), data_attrs.pop('gridGlobalOffset'), \ data_attrs.pop('gridSpacing'), data_attrs.pop('position'), data_attrs.pop('unitSI') ax_min = (ax_off + ax_pos * g_spacing) * unitsi ax_max = ax_min + v.shape * g_spacing * unitsi # prepare the axes data axes = [] for aln, an, amax, amin, anp in zip(ax_label, ax_label, ax_max, ax_min, v.shape): ax_attrs = { 'LONG_NAME': aln.decode('utf-8'), 'NAME': an.decode('utf-8'), 'UNITS': dflt_ax_unit } data_axis = DataAxis(amin, amax, anp, attrs=ax_attrs) axes.append(data_axis) fldl[k] = H5Data(v[()], timestamp=timestamp, data_attrs=data_attrs, run_attrs=run_attrs, axes=axes) return fldl
# # fill in any values we have stored in the Axis object # for key, value in data_object.axes[i].attrs.items(): # if key == 'UNITS': # try: # axis_data.attrs['UNITS'] = np.array([str(data_object.axes[i].attrs['UNITS']).encode('utf-8')]) # except: # axis_data.attrs['UNITS'] = np.array([b'a.u.']) # else: # axis_data.attrs[key] = np.array([value.encode('utf-8')]) if isinstance(value, str) else value h5file.close() if __name__ == '__main__': import osh5utils as ut a = np.arange(6.0).reshape(2, 3) ax, ay = DataAxis(0, 3, 3, attrs={'UNITS': '1 / \omega_p' }), DataAxis(10, 11, 2, attrs={'UNITS': 'c / \omega_p'}) da = { 'UNITS': 'n_0', 'NAME': 'test', } h5d = H5Data(a, timestamp='123456', data_attrs=da, axes=[ay, ax]) write_h5(h5d, './test-123456.h5') rw = read_h5('./test-123456.h5') h5d = read_h5( './test-123456.h5') # read from file to get all default attrs print("rw is h5d: ", rw is h5d, '\n') print(repr(rw))