def read_array(self, arrpath, arrnum): if arrpath not in self._dataset: raise LookupError("Array data not found in the dataset.") # ismrmrd complex data is stored as pairs named real and imag # TODO do we need to store and reset or the config local to the module? cplxcfg = h5py.get_config().complex_names; h5py.get_config().complex_names = ('real','imag') arr = np.copy(self._dataset[arrpath][arrnum]) h5py.get_config().complex_names = cplxcfg return arr
def read_array(self, arrpath, arrnum): if arrpath not in self._dataset: raise LookupError("Array data not found in the dataset.") # ismrmrd complex data is stored as pairs named real and imag # TODO do we need to store and reset or the config local to the module? cplxcfg = h5py.get_config().complex_names h5py.get_config().complex_names = ('real', 'imag') arr = np.copy(self._dataset[arrpath][arrnum]) h5py.get_config().complex_names = cplxcfg return arr
def export_to_hdf5(self, filename, step): """Export results to an HDF5 file Parameters ---------- filename : str The filename to write to step : int What step is this? """ # Write new file if first time step, else add to existing file kwargs = {'mode': "w" if step == 0 else "a"} if h5py.get_config().mpi and comm.size > 1: # Write results in parallel kwargs['driver'] = 'mpio' kwargs['comm'] = comm with h5py.File(filename, **kwargs) as handle: self._to_hdf5(handle, step, parallel=True) else: # Gather results at root process all_results = comm.gather(self) # Only root process writes results if comm.rank == 0: with h5py.File(filename, **kwargs) as handle: for res in all_results: res._to_hdf5(handle, step, parallel=False)
def mpi(self): """ Returns a named tuple with ``comm``, ``rank``, ``size``, and ``MPI`` if run with MPI and False otherwise. """ if hasattr(self, "__is_mpi"): return self.__is_mpi self.__is_mpi = is_mpi_env() # If it actually is an mpi environment, set the communicator and the # rank. if self.__is_mpi: # Check if HDF5 has been complied with parallel I/O. if not h5py.get_config().mpi: msg = "Running under MPI requires HDF5/h5py to be complied " \ "with support for parallel I/O." raise RuntimeError(msg) import mpi4py if not mpi4py.MPI.Is_initialized(): mpi4py.MPI.Init() # Set mpi tuple to easy class wide access. mpi_ns = collections.namedtuple("mpi_ns", ["comm", "rank", "size", "MPI"]) comm = mpi4py.MPI.COMM_WORLD self.__is_mpi = mpi_ns(comm=comm, rank=comm.rank, size=comm.size, MPI=mpi4py.MPI) return self.__is_mpi
class MaxwellFile(H5File): description = "maxwell" extension = [".h5", ".hdf5"] parallel_write = h5py.get_config().mpi _required_fields = {} _default_values = {'well': '0000'} def _read_from_header(self): header = {} self.my_file = h5py.File(self.file_name, mode='r') header['version'] = self.my_file['version'][0] if header['version'] == b'20160704': h5_key = 'sig' header['h5_key'] = 'sig' header['sampling_rate'] = 20000 header['dtype_offset'] = 512 header['gain'] = self.my_file.get('settings/lsb')[0] elif header['version'] == b'20190530': if not 'data%s' % self.params['well'] in list( self.my_file['data_store'].keys()): print_and_log(['Well %s not found!' % self.params['well']], 'error', logger) sys.exit(0) header[ 'h5_key'] = '/data_store/data%s/groups/routed/raw' % self.params[ 'well'] header['sampling_rate'] = self.my_file.get( '/data_store/data%s/settings/sampling' % self.params['well'])[0] header['dtype_offset'] = 512 header['gain'] = self.my_file.get( '/data_store/data%s/settings/lsb' % self.params['well'])[0] header['data_dtype'] = self.my_file.get(header['h5_key']).dtype self.compression = self.my_file.get(header['h5_key']).compression self._check_compression() nb_channels, n_frames = self.my_file.get(header['h5_key']).shape self.size = nb_channels * n_frames header['nb_channels'] = nb_channels self._shape = (n_frames, header['nb_channels']) self.my_file.close() return header def _read_chunk(self, do_slice, t_start, t_stop, nodes): if do_slice: local_chunk = self.data[nodes, t_start:t_stop].T else: local_chunk = self.data[:, t_start:t_stop].T return local_chunk def write_chunk(self, time, data): data = self._unscale_data_from_float32(data) self.data[:, time:time + data.shape[0]] = data.T
def __init__(self, intg, basedir, basename, prefix, *, extn='.pyfrs'): # Base output directory and file name self.basedir = basedir self.basename = basename # Data prefix self.prefix = prefix # Our physical rank self.prank = intg.rallocs.prank # Append the relevant extension if not self.basename.endswith(extn): self.basename += extn # Output counter (incremented each time write() is called) self.nout = self._restore_nout() if intg.isrestart else 0 # MPI info comm, rank, root = get_comm_rank_root() # Parallel I/O if (h5py.get_config().mpi and 'PYFR_FORCE_SERIAL_HDF5' not in os.environ): self._write = self._write_parallel # Serial I/O else: self._write = self._write_serial
def read_image(self, impath, imnum): if impath not in self._dataset: raise LookupError("Image data not found in the dataset.") # create an image # and fill with the header and attribute string for this image im = ismrmrd.Image(self._dataset[impath]['header'][imnum], self._dataset[impath]['attributes'][imnum]) # copy the data # ismrmrd complex data is stored as pairs named real and imag # TODO do we need to store and reset or the config local to the module? cplxcfg = h5py.get_config().complex_names; h5py.get_config().complex_names = ('real','imag') im.data[:] = self._dataset[impath]['data'][imnum] h5py.get_config().complex_names = cplxcfg return im
def open_output(self, tag, wrapper=False, **kwargs): """ Find and open an output file with the given tag, in write mode. For general files this will simply return a standard python file object. For specialized file types like FITS or HDF5 it will return a more specific object - see the types.py file for more info. This is an extended version of the parent class method which also saves configuration information. Putting this here right now for testing. """ path = self.get_output(tag) output_class = self.get_output_type(tag) # HDF files can be opened for parallel writing # under MPI. This checks if: # - we have been told to open in parallel # - we are actually running under MPI # and adds the flags required if all these are true run_parallel = kwargs.pop('parallel', False) and self.is_mpi() if run_parallel: if not output_class.supports_parallel_write: raise ValueError(f"Tried to open file for parallel output, but not" f" supported for type {output_class}. Tag was {tag} and" f" path was {path}" ) kwargs['driver'] = 'mpio' kwargs['comm'] = self.comm # XXX: This is also not a dependency, but it should be. # Or even better would be to make it a dependency of descformats where it # is actually used. import h5py if not h5py.get_config().mpi: print(dedent("""\ Your h5py installation is not MPI-enabled. Options include: 1) Set nprocess to 1 for all stages 2) Upgrade h5py to use mpi. See instructions here: http://docs.h5py.org/en/latest/build.html#custom-installation Note: If using conda, the most straightforward way is to enable it is conda install -c spectraldns h5py-parallel """)) raise RuntimeError("h5py module is not MPI-enabled.") extra_provenance = self.gather_provenance() # Return an opened object representing the file obj = output_class(path, 'w', extra_provenance=extra_provenance, **kwargs) if wrapper: return obj else: return obj.file
def load_mat_file(_file): # Load mat file using either h5py or scipy.io.loadmat h5py.get_config().default_file_mode = 'r' for fun in h5py.File, scipy.io.loadmat: try: return fun(_file) except: pass raise Exception('ERROR: File {} does not exist!'.format(_file))
def _to_hdf5_serial(self, filename, dataset, create=False): """Write into an HDF5 dataset. This explicitly serialises the IO so that it works when h5py does not support MPI-IO. Parameters ---------- filename : str File to write dataset into. dataset : string Name of dataset to write into. Should not exist. """ ## Naive non-parallel implementation to start import h5py if h5py.get_config().mpi: import warnings warnings.warn("h5py has parallel support. " "Use the parallel `.to_hdf5` routine instead.") if self.comm is None or self.comm.rank == 0: with h5py.File(filename, "a" if create else "r+") as fh: if dataset in fh: raise Exception("Dataset should not exist.") fh.create_dataset(dataset, self.global_shape, dtype=self.dtype) fh[dataset][:] = np.array(0.0).astype(self.dtype) # wait until all processes see the created file while not os.path.exists(filename): time.sleep(1) self.comm.Barrier() if self.axis == 0: dist_arr = self else: dist_arr = self.redistribute(axis=0) size = 1 if self.comm is None else self.comm.size for ri in range(size): rank = 0 if self.comm is None else self.comm.rank if ri == rank: with h5py.File(filename, "r+") as fh: start = dist_arr.local_offset[0] end = start + dist_arr.local_shape[0] fh[dataset][start:end] = dist_arr dist_arr.comm.Barrier()
def _to_hdf5_serial(self, filename, dataset, create=False): """Write into an HDF5 dataset. This explicitly serialises the IO so that it works when h5py does not support MPI-IO. Parameters ---------- filename : str File to write dataset into. dataset : string Name of dataset to write into. Should not exist. """ ## Naive non-parallel implementation to start import h5py if h5py.get_config().mpi: import warnings warnings.warn('h5py has parallel support. ' 'Use the parallel `.to_hdf5` routine instead.') if self.comm is None or self.comm.rank == 0: with h5py.File(filename, 'a' if create else 'r+') as fh: if dataset in fh: raise Exception("Dataset should not exist.") fh.create_dataset(dataset, self.global_shape, dtype=self.dtype) fh[dataset][:] = np.array(0.0).astype(self.dtype) # wait until all processes see the created file while not os.path.exists(filename): time.sleep(1) self.comm.Barrier() if self.axis == 0: dist_arr = self else: dist_arr = self.redistribute(axis=0) size = 1 if self.comm is None else self.comm.size for ri in range(size): rank = 0 if self.comm is None else self.comm.rank if ri == rank: with h5py.File(filename, 'r+') as fh: start = dist_arr.local_offset[0] end = start + dist_arr.local_shape[0] fh[dataset][start:end] = dist_arr dist_arr.comm.Barrier()
def _test_b8(self, arr1): fname = self.mktemp() with tables.open_file(fname, 'a') as f: if arr1.dtype.names: f.create_table('/', 'test', obj=arr1) else: f.create_array('/', 'test', obj=arr1) with h5py.File(fname, 'r') as f: assert h5py.get_config().b8_to_bool == False with self.assertRaises(TypeError): f['test'][:] h5py.get_config().b8_to_bool = True arr2 = f['test'][:] self.assertArrayEqual(arr1, arr2) h5py.get_config().b8_to_bool = False with self.assertRaises(TypeError): f['test'][:]
def __enter__(self): # Call parent __enter__. This will effect the # sequential/collective behaviour. super(h5File, self).__enter__() h5py_mpi = h5py.get_config().mpi and (self.pattern == "collective") if h5py_mpi: self.kwargs.update({"driver": 'mpio', "comm": MPI.COMM_WORLD}) self.h5f = h5py.File(*self.args, **self.kwargs) return self.h5f
def get_filename(self, basename, size=1, rank=0, mpiok=True, mode='r+'): """ Get name of file to be opened by this process self.filename is set to the name of the HDF5 file to be opened. This is also returned as the function value. In addition, the following flags are set: self.creating: True if creating a new file. self.rank_owns_file: True if the file will be exclusively owned by this process. """ self.usempi = mpiok and h5py.get_config().mpi name_nompi = '{name}s{size}r{rank}.h5'.format(name=basename, size=size, rank=rank) name_mpi = '{name}MPI.h5'.format(name=basename) name_seq = '{name}s1r0.h5'.format(name=basename) self.driver = None if self.usempi and os.path.isfile(name_mpi): self.creating = mode[0] == 'w' or mode[0] == 'x' self.rank_owns_file = size == 1 self.filename = name_mpi elif self.usempi and (mode[0] == 'w' or mode[0] == 'x'): self.creating = True self.rank_owns_file = size == 1 self.filename = name_mpi elif os.path.isfile(name_nompi): self.creating = mode[0] == 'w' or mode[0] == 'x' self.rank_owns_file = True self.filename = name_nompi elif (mode == 'r' or mode == 'a') and os.path.isfile(name_seq): self.creating = False self.rank_owns_file = size == 1 self.filename = name_seq # Allow reading from MPi file even if we're not using MPI: elif (mode == 'r' or mode == 'a') and os.path.isfile(name_mpi): self.creating = False self.rank_owns_file = size == 1 self.filename = name_mpi else: self.creating = mode != 'r' self.rank_owns_file = not self.usempi self.filename = name_mpi if self.usempi else name_nompi if self.creating and not self.rank_owns_file and self.usempi: self.driver = 'mpio' if self.creating: os.makedirs(os.path.dirname(self.filename), exist_ok=True) logSERIES('self.filename', self.filename) logSERIES('self.creating', self.creating) logSERIES('self.rank_owns_file', self.rank_owns_file) logSERIES('self.driver', self.driver) logSERIES('self.usempi', self.usempi) return self.filename
def __init__(self, intg, mdata, basedir, basename, *, extn='.pyfrs'): # Base output directory and file name self.basedir = basedir self.basename = basename # Append the relevant extension if not self.basename.endswith(extn): self.basename += extn # Output counter (incremented each time write() is called) self.nout = self._restore_nout() if intg.isrestart else 0 # MPI info comm, rank, root = get_comm_rank_root() # Gather the output metadata across all ranks mdata = comm.allgather(mdata) # Parallel I/O if (h5py.get_config().mpi and 'PYFR_FORCE_SERIAL_HDF5' not in os.environ): self._write = self._write_parallel self._loc_names = loc_names = [] self._global_shape_list = [] for mrank, mfields in enumerate(mdata): prank = intg.rallocs.mprankmap[mrank] # Loop over all element types across all ranks for fname, fshape, fdtype in mfields: name = f'{fname}_p{prank}' self._global_shape_list.append((name, fshape, fdtype)) if rank == mrank: loc_names.append(name) # Serial I/O else: self._write = self._write_serial if rank == root: self._loc_info = loc_info = [] self._mpi_info = mpi_info = [] for mrank, mfields in enumerate(mdata): prank = intg.rallocs.mprankmap[mrank] for fname, fshape, fdtype in mfields: name = f'{fname}_p{prank}' if mrank == root: loc_info.append(name) else: mpi_info.append((name, mrank, fshape, fdtype))
def convert_to_np_dtype(dset): """ Given an HDF5 dataset, return the values in a numpy-builtin datatype Parameters ---------- dset : h5py.Dataset HDF5 (h5py) dataset Returns ------- out : numpy.ndarray (dtype = numpy built-in) Note ---- The software accounts for big-/little-endianness, and the inability of \ hdf5 to natively store complex numbers. Software Info -------------- Original Python branch: Feb 16 2015 author: ("Charles H Camp Jr") email: ("*****@*****.**") version: ("16.02.18") """ assert isinstance(dset, _h5py.Dataset), 'Input is not of type h5py.Dataset' # Single datatype if len(dset.dtype) == 0: converted = _np.ndarray(dset.shape, dtype=dset.dtype.newbyteorder('=')) dset.read_direct(converted) if issubclass(converted.dtype.type, _np.integer): # Integer to float converted = converted.astype(_np.float) return converted #Compound datatype of length 2-- assumed ('Re','Im') elif len(dset.dtype) == 2: print( 'Warning: h5py.complex_names set incorrectly using \'{}\' and \'{}\' \ for Re and Im, respectively'.format(dset.dtype.names[0], dset.dtype.names[1])) _h5py.get_config().complex_names = (dset.dtype.names[0], dset.dtype.names[1]) dset = dset.file[dset.name] converted = _np.ndarray(dset.shape, dtype=dset.dtype.newbyteorder('=')) dset.read_direct(converted) # Unknown datatype else: print('Warning: Unknown datatype. Returning dataset values as is.') return dset.value return converted
def to_hdf5(self, f, dataset, create=False): """Parallel write into a contiguous HDF5 dataset. Parameters ---------- filename : str, h5py.File or h5py.Group File to write dataset into. dataset : string Name of dataset to write into. Should not exist. """ ## Naive non-parallel implementation to start import h5py if not h5py.get_config().mpi: if isinstance(f, basestring): self._to_hdf5_serial(f, dataset, create) return else: raise ValueError( "Argument must be a filename if h5py does not have MPI support" ) mode = 'a' if create else 'r+' fh = misc.open_h5py_mpi(f, mode, self.comm) dset = fh.create_dataset(dataset, shape=self.global_shape, dtype=self.dtype) start = self.local_offset[self.axis] end = start + self.local_shape[self.axis] # Construct slices for axis sl = [slice(None, None)] * self.axis sl += [slice(start, end)] sl = tuple(sl) # Check that there are no null slices, otherwise we need to turn off # collective IO to work around an h5py issue (#965) no_null_slices = self.global_shape[self.axis] >= self.comm.size if fh.is_mpi and no_null_slices: with dset.collective: dset[sl] = self[:] else: dset[sl] = self[:] if fh.opened: fh.close()
def open_h5py_mpi(f, mode, use_mpi=True, comm=None): """Ensure that we have an h5py File object. Opens with MPI-IO if possible. The returned file handle is annotated with two attributes: `.is_mpi` which says whether the file was opened as an MPI file and `.opened` which says whether it was opened in this call. Parameters ---------- f : string, h5py.File or h5py.Group Filename to open, or already open file object. If already open this is just returned as is. mode : string Mode to open file in. use_mpi : bool, optional Whether to use MPI-IO or not (default True) comm : mpi4py.Comm, optional MPI communicator to use. Uses `COMM_WORLD` if not set. Returns ------- fh : h5py.File File handle for h5py.File, with two extra attributes `.is_mpi` and `.opened`. """ import h5py has_mpi = h5py.get_config().mpi if isinstance(f, basestring): # Open using MPI-IO if we can if has_mpi and use_mpi: from mpi4py import MPI comm = comm if comm is not None else MPI.COMM_WORLD fh = h5py.File(f, mode, driver="mpio", comm=comm) else: fh = h5py.File(f, mode) fh.opened = True elif isinstance(f, (h5py.File, h5py.Group)): fh = f fh.opened = False else: raise ValueError("Did not receive a h5py.File or filename") fh.is_mpi = fh.file.driver == "mpio" return fh
class NixFile(H5File): description = "nix" extension = [".nix", ".h5", ".hdf5"] parallel_write = h5py.get_config().mpi is_writable = True _required_fields = {'block': str, 'data_array': str} _default_values = {'dtype_offset': 'auto', 'gain': 1.} def _read_from_header(self): nix_name = 'data/%s/data_arrays/%s' % (self.params['block'], self.params['data_array']) self.params['h5_key'] = '%s/data' % nix_name self.__check_valid_key__(self.h5_key) self._open() header = {} header['data_dtype'] = self.my_file.get(self.h5_key).dtype for key in self.my_file.get('%s/dimensions' % nix_name).keys(): tmp = dict( self.my_file.get('%s/dimensions/%s' % (nix_name, key)).attrs.items()) if tmp['label'] == 'time': header['sampling_rate'] = 1. / ['sampling_interval'] self.compression = self.my_file.get(self.h5_key).compression # HDF5 does not support parallel writes with compression if self.compression != '': self._parallel_write = False self.size = self.my_file.get(self.h5_key).shape if self.size[0] > self.size[1]: self.time_axis = 0 self._shape = (self.size[0], self.size[1]) else: self.time_axis = 1 self._shape = (self.size[1], self.size[0]) header['nb_channels'] = self._shape[1] self._close() return header
def write_file (file, l, n, omega) : # Write the file h5.get_config().complex_names = ('re', 'im') f = h5.File(file, 'w') f.attrs['label'] = np.string_(' '*256) f.create_dataset('l', data=l) f.create_dataset('n_pg', data=n) f.create_dataset('omega', data=omega) f.close()
def __enter__(self): # Call parent __enter__. This will effect the # sequential/collective behaviour. super(h5File, self).__enter__() h5py_mpi = h5py.get_config().mpi and (self.pattern == "collective") if h5py_mpi: self.kwargs.update({"driver": 'mpio', "comm": MPI.COMM_WORLD}) else: # If writing sequential, non-root procs should use `append` mode. if uw.mpi.rank != 0: self.kwargs.update({"mode": 'a'}) self.h5f = h5py.File(*self.args, **self.kwargs) return self.h5f
def open_h5py_mpi(f, mode, comm=None): """Ensure that we have an h5py File object. Opens with MPI-IO if possible. The returned file handle is annotated with two attributes: `.is_mpi` which says whether the file was opened as an MPI file and `.opened` which says whether it was opened in this call. Parameters ---------- f : string, h5py.File or h5py.Group Filename to open, or already open file object. If already open this is just returned as is. mode : string Mode to open file in. comm : mpi4py.Comm, optional MPI communicator to use. Uses `COMM_WORLD` if not set. Returns ------- fh : h5py.File File handle for h5py.File, with two extra attributes `.is_mpi` and `.opened`. """ import h5py has_mpi = h5py.get_config().mpi if isinstance(f, basestring): # Open using MPI-IO if we can if has_mpi: from mpi4py import MPI comm = comm if comm is not None else MPI.COMM_WORLD fh = h5py.File(f, mode, driver='mpio', comm=comm) else: fh = h5py.File(f, mode) fh.opened = True elif isinstance(f, (h5py.File, h5py.Group)): fh = f fh.opened = False else: raise ValueError("Did not receive a h5py.File or filename") fh.is_mpi = (fh.file.driver == 'mpio') return fh
def _open(self, mode): """Open the h5 file""" if self.mpi_comm is not None and self.mpi_comm.Get_size() > 1: if not h5py.get_config().mpi: raise RuntimeError( "ERROR: h5py is lacking MPI support, aborting!") self.h5file = h5py.File(self.filename, mode, driver='mpio', comm=self.mpi_comm) else: self.h5file = h5py.File(self.filename, mode) self.bbic = self.h5file.require_group('bbic') self._read_attrs() if mode is not 'r': self.version = BBIC_CURRENT_VERSION self._write_attrs()
def get_parallel_hdf5_flag(params): ''' Get parallel HDF5 flag. Argument -------- params: dict Dictionnary of parameters. Return ------ flag: bool True if parallel HDF5 is available and the user want to use it. ''' flag = h5py.get_config().mpi and params.getboolean('data', 'parallel_hdf5') return flag
def open_output(self, tag, wrapper=False, **kwargs): """ Find and open an output file with the given tag, in write mode. For general files this will simply return a standard python file object. For specialized file types like FITS or HDF5 it will return a more specific object - see the types.py file for more info. """ path = self.get_output(tag) output_class = self.get_output_type(tag) # HDF files can be opened for parallel writing # under MPI. This checks if: # - we have been told to open in parallel # - we are actually running under MPI # and adds the flags required if all these are true run_parallel = kwargs.pop('parallel', False) and self.is_mpi() if run_parallel: kwargs['driver'] = 'mpio' kwargs['comm'] = self.comm # XXX: This is also not a dependency, but it should be. import h5py if not h5py.get_config().mpi: print( dedent("""\ Your h5py installation is not MPI-enabled. Options include: 1) Set nprocess to 1 for all stages 2) Upgrade h5py to use mpi. See instructions here: http://docs.h5py.org/en/latest/build.html#custom-installation Note: If using conda, the most straightforward way is to enable it is conda install -c spectraldns h5py-parallel """)) raise RuntimeError("h5py module is not MPI-enabled.") # Return an opened object representing the file obj = output_class(path, 'w', **kwargs) if wrapper: return obj else: return obj.file
def get_watermark(): """ Return information about the current system relevant for pyasdf. """ vendor = MPI.get_vendor() if MPI else None c = h5py.get_config() if not hasattr(c, "mpi") or not c.mpi: is_parallel = False else: is_parallel = True watermark = { "python_implementation": platform.python_implementation(), "python_version": platform.python_version(), "python_compiler": platform.python_compiler(), "platform_system": platform.system(), "platform_release": platform.release(), "platform_version": platform.version(), "platform_machine": platform.machine(), "platform_processor": platform.processor(), "platform_processor_count": cpu_count(), "platform_architecture": platform.architecture()[0], "platform_hostname": gethostname(), "date": strftime("%d/%m/%Y"), "time": strftime("%H:%M:%S"), "timezone": strftime("%Z"), "hdf5_version": h5py.version.hdf5_version, "parallel_h5py": is_parallel, "mpi_vendor": vendor[0] if vendor else None, "mpi_vendor_version": ".".join(map(str, vendor[1])) if vendor else None, "problematic_multiprocessing": is_multiprocessing_problematic(), } watermark["module_versions"] = { module: get_distribution(module).version for module in modules } if MPI is None: watermark["module_versions"]["mpi4py"] = None return watermark
def get_watermark(): """ Return information about the current system relevant for pyasdf. """ vendor = MPI.get_vendor() if MPI else None c = h5py.get_config() if not hasattr(c, "mpi") or not c.mpi: is_parallel = False else: is_parallel = True watermark = { "python_implementation": platform.python_implementation(), "python_version": platform.python_version(), "python_compiler": platform.python_compiler(), "platform_system": platform.system(), "platform_release": platform.release(), "platform_version": platform.version(), "platform_machine": platform.machine(), "platform_processor": platform.processor(), "platform_processor_count": cpu_count(), "platform_architecture": platform.architecture()[0], "platform_hostname": gethostname(), "date": strftime('%d/%m/%Y'), "time": strftime('%H:%M:%S'), "timezone": strftime('%Z'), "hdf5_version": h5py.version.hdf5_version, "parallel_h5py": is_parallel, "mpi_vendor": vendor[0] if vendor else None, "mpi_vendor_version": ".".join(map(str, vendor[1])) if vendor else None, "problematic_multiprocessing": is_multiprocessing_problematic() } watermark["module_versions"] = { module: get_distribution(module).version for module in modules} if MPI is None: watermark["module_versions"]["mpi4py"] = None return watermark
def export_to_hdf5(self, filename, step): """Export results to an HDF5 file Parameters ---------- filename : str The filename to write to step : int What step is this? """ if have_mpi and h5py.get_config().mpi: kwargs = {'driver': 'mpio', 'comm': comm} else: kwargs = {} # Write new file if first time step, else add to existing file kwargs['mode'] = "w" if step == 0 else "a" with h5py.File(filename, **kwargs) as handle: self._to_hdf5(handle, step)
def _get_backup_file(ds): backup_filename = ds.backup_filename if os.path.exists(backup_filename): # backup file already exists, open it. We use parallel # h5py if it is available if communication_system.communicators[-1].size > 1 and \ h5py.get_config().mpi is True: mpi4py_communicator = communication_system.communicators[-1].comm f = h5py.File(backup_filename, "r+", driver='mpio', comm=mpi4py_communicator) else: f = h5py.File(backup_filename, "r+") yield f f.close() else: # backup file does not exist, create it with _create_new_gdf(ds, backup_filename, data_author=None, data_comment=None, particle_type_name="dark_matter") as f: yield f
def _convert_to_np_dtype(dset): """ Given an HDF5 dataset, return the values in a numpy-builtin datatype Parameters ---------- dset : h5py.Dataset HDF5 (h5py) dataset Returns ------- out : numpy.ndarray (dtype = numpy built-in) Notes ----- The software accounts for big-/little-endianness, and the inability of \ hdf5 to natively store complex numbers. """ assert isinstance(dset, _h5py.Dataset), 'Input is not of type h5py.Dataset' # Single datatype if len(dset.dtype) == 0: converted = _np.ndarray(dset.shape, dtype = dset.dtype.newbyteorder('=')) dset.read_direct(converted) if issubclass(converted.dtype.type, _np.integer): # Integer to float converted = converted.astype(_np.float) return converted #Compound datatype of length 2-- assumed ('Re','Im') elif len(dset.dtype) == 2: print('Warning: h5py.complex_names set incorrectly using \'{}\' and \'{}\' \ for Re and Im, respectively'.format(dset.dtype.names[0], dset.dtype.names[1])) _h5py.get_config().complex_names = (dset.dtype.names[0],dset.dtype.names[1]) dset = dset.file[dset.name] converted = _np.ndarray(dset.shape, dtype = dset.dtype.newbyteorder('=')) dset.read_direct(converted) # Unknown datatype else: print('Warning: Unknown datatype. Returning dataset values as is.') return dset.value return converted
def write_results(result, filename, index): """ Outputs result to an .hdf5 file. Parameters ---------- result : Results Object to be stored in a file. filename : String Target filename. index : int What step is this? """ if have_mpi and h5py.get_config().mpi: kwargs = {'driver': 'mpio', 'comm': comm} else: kwargs = {} kwargs['mode'] = "w" if index == 0 else "a" with h5py.File(filename, **kwargs) as handle: result.to_hdf5(handle, index)
def generate_parameters(elements): """Generate the atomic parameters of the elements and store them in an HDF5 container.""" config = h5py.get_config() config.track_order = True root = os.path.dirname(os.path.abspath(__file__)) calculations = Calculations(os.path.join(root, "calculations.json")) if elements == "all": elements = calculations.get_all_elements() for element in elements: element = Element(element) confs = calculations.unique_configurations(element) filename = "{:s}.h5".format(element.symbol) path = os.path.join(root, "atomic_parameters", filename) with h5py.File(path, "w") as h5: for conf in confs: cowan = Cowan(element, conf) conf.energy, conf.atomic_parameters = cowan.get_parameters() cowan.remove_calculation_files() # Write the parameters to the HDF5 file. root = "/{:s}".format(conf.name) h5[root + "/energy"] = conf.energy for parameter, value in conf.atomic_parameters.items(): path = root + "/parameters/{:s}".format(parameter) h5[path] = value logging.info("%-2s %-8s", element.symbol, conf) logging.info("E = %-.4f eV", conf.energy) for parameter, value in conf.atomic_parameters.items(): logging.debug("%-s = %-.4f eV", parameter, value) logging.info("")
def __init__(self, *args, **kwargs): global PATTERN # figure if we need to run collective or sequential h5py_mpi = h5py.get_config().mpi if h5py_mpi: pattern = 'collective' else: if (uw.mpi.size > 1) and (uw.mpi.rank == 0): import warnings warnings.warn("H5py not available in parallel mode. Read/write will be " \ "performed sequentially. Note that this may be slow for " \ "parallel simulations.") pattern = 'sequential' if PATTERN == 2: import warnings warnings.warn( "Collective IO not possible as h5py not available in parallel mode. Switching to sequential." ) PATTERN = 1 if PATTERN != 0: if PATTERN == 1: pattern = "sequential" elif PATTERN == 2: pattern = "collective" else: raise ValueError( "PATTERN must be set to 0:auto, 1:sequential or 2:collective." ) # Record these for later. Note we shouldn't # open file here, as may need to open/close # sequentially self.args = args self.kwargs = kwargs self.pattern = pattern super(h5File, self).__init__(pattern=pattern)
oldnames = config.complex_names try: for name in names: config.complex_names = name for ctype in complex_types: dt = dtype(ctype) htype = h5t.py_create(dt) self.assertEqual(type(htype), h5t.TypeCompoundID) self.assertEqual(htype.get_nmembers(), 2) self.assertEqual(htype.get_member_name(0), name[0]) self.assertEqual(htype.get_member_name(1), name[1]) finally: config.complex_names = oldnames import h5py cfg = h5py.get_config() bytemap = {'<': h5t.ORDER_LE, '>': h5t.ORDER_BE, '=': h5t.ORDER_NATIVE} class TestPyCreate(TestCasePlus): """ Tests the translation from Python dtypes to HDF5 datatypes """ def test_complex(self): """ Complex type translation - TypeComplexID - 8, 16 bytes - LE and BE - 2 members
def __init__(self, intg, nvars, basedir, basename, *, prefix, extn='.pyfrs'): # Base output directory and file name self.basedir = basedir self.basename = basename # Append the relevant extension if not self.basename.endswith(extn): self.basename += extn # Prefix given to each data array in the output file self.prefix = prefix # Output counter (incremented each time write() is called) self.nout = self._restore_nout() if intg.isrestart else 0 # Copy the float type self.fpdtype = intg.backend.fpdtype # MPI info comm, rank, root = get_comm_rank_root() # Get the type and shape of each element in the partition etypes = intg.system.ele_types shapes = [(nupts, nvars, neles) for nupts, _, neles in intg.system.ele_shapes] # Gather eleinfo = comm.allgather(zip(etypes, shapes)) # Parallel I/O if (h5py.get_config().mpi and 'PYFR_FORCE_SERIAL_HDF5' not in os.environ): self._write = self._write_parallel self._loc_names = loc_names = [] self._global_shape_list = [] for mrank, meleinfo in enumerate(eleinfo): prank = intg.rallocs.mprankmap[mrank] # Loop over all element types across all ranks for etype, shape in meleinfo: name = self._get_name_for_data(etype, prank) self._global_shape_list.append((name, shape)) if rank == mrank: loc_names.append(name) # Serial I/O else: self._write = self._write_serial if rank == root: self._mpi_rbufs = mpi_rbufs = [] self._mpi_rreqs = mpi_rreqs = [] self._mpi_names = mpi_names = [] self._loc_names = loc_names = [] for mrank, meleinfo in enumerate(eleinfo): prank = intg.rallocs.mprankmap[mrank] for tag, (etype, shape) in enumerate(meleinfo): name = self._get_name_for_data(etype, prank) if mrank == root: loc_names.append(name) else: rbuf = np.empty(shape, dtype=self.fpdtype) rreq = comm.Recv_init(rbuf, mrank, tag) mpi_rbufs.append(rbuf) mpi_rreqs.append(rreq) mpi_names.append(name)
import h5py if h5py.get_config().mpi == False: import warnings warnings.warn("h5py not MPI enabled. Discontinuing test.") import sys sys.exit(0) import underworld as uw import numpy as np mesh = uw.mesh.FeMesh_Cartesian(elementRes=(128, 128)) swarm = uw.swarm.Swarm(mesh) # create some variables to track origOwningEl = swarm.add_variable('int', 1) origCreatingProc = swarm.add_variable('int', 1) origParticleIndex = swarm.add_variable('int', 1) randomNumber = swarm.add_variable('int', 1) swarm.populate_using_layout( uw.swarm.layouts.PerCellSpaceFillerLayout(swarm, 20)) # init variables origOwningEl.data[:] = mesh.data_elgId[ swarm.owningCell.data[:, 0]] # global elementId where created origCreatingProc.data[:] = uw.mpi.rank # rank where created origParticleIndex.data[:, 0] = range( swarm.particleLocalCount) # local index where created from random import randint for index in range( 0, swarm.particleLocalCount): # add random numbers to this variable
from __future__ import absolute_import import numpy as np import pytest #pytest.importorskip('mpi4py', 'mpi4py unavailable') from seismic.traveltime.mpiops import rank, size, comm, run_once try: import h5py H5PY = h5py.get_config().mpi except: H5PY = False def test_helloworld(): ranks = comm.allgather(rank) assert len(ranks) == size @pytest.mark.skipif(not H5PY, reason='Skipped as parallel h5py is not available') def test_h5py(random_filename): hdf = run_once(random_filename, ext='.hdf5') f = h5py.File(hdf, 'w', driver='mpio', comm=comm) dset = f.create_dataset('test', (size, ), dtype='i') dset[rank] = rank f.close() f = h5py.File(hdf, 'r', libver='latest') b = f['test'][:]
def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) # Base output directory and file name self._basedir = self.cfg.getpath('soln-output', 'basedir', '.') self._basename = self.cfg.get('soln-output', 'basename', raw=True) # Output counter (incremented each time output() is called) self.nout = 0 # MPI info comm, rank, root = get_comm_rank_root() # Get the type and shape of each element in the partition etypes, shapes = self.system.ele_types, self.system.ele_shapes # Gather this information onto the root rank eleinfo = comm.gather(zip(etypes, shapes), root=root) # Deciding if parallel parallel = (h5py.get_config().mpi and h5py.version.version_tuple >= (2, 5) and not self.cfg.getbool('soln-output', 'serial-h5', False)) if parallel: self._write = self._write_parallel if rank == root: sollist = [] for mrank, meleinfo in enumerate(eleinfo): prank = self.rallocs.mprankmap[mrank] sollist.extend( (self._get_name_for_soln(etype, prank), dims) for etype, dims in meleinfo ) else: sollist = None self.sollist = comm.bcast(sollist, root=root) else: self._write = self._write_serial if rank == root: self._mpi_rbufs = mpi_rbufs = [] self._mpi_rreqs = mpi_rreqs = [] self._mpi_names = mpi_names = [] self._loc_names = loc_names = [] for mrank, meleinfo in enumerate(eleinfo): prank = self.rallocs.mprankmap[mrank] for tag, (etype, dims) in enumerate(meleinfo): name = self._get_name_for_soln(etype, prank) if mrank == root: loc_names.append(name) else: rbuf = np.empty(dims, dtype=self.backend.fpdtype) rreq = comm.Recv_init(rbuf, mrank, tag) mpi_rbufs.append(rbuf) mpi_rreqs.append(rreq) mpi_names.append(name)
import h5py._proxy import h5py.defs import h5py.h5 import h5py.h5a import h5py.h5d import h5py.h5f import h5py.h5fd import h5py.h5g import h5py.h5i import h5py.h5l import h5py.h5o import h5py.h5p import h5py.h5r import h5py.h5s import h5py.h5t import h5py.h5z import h5py.utils # verify that mpi builds are built with mpi should_have_mpi = os.getenv('mpi', 'nompi') != 'nompi' have_mpi = h5py.get_config().mpi assert have_mpi == should_have_mpi, "Expected mpi=%r, got %r" % ( should_have_mpi, have_mpi) # skip TestDrivers.test_mpio import h5py.tests.old.test_file delattr(h5py.tests.old.test_file.TestDrivers, 'test_mpio') from sys import exit exit(0) if h5py.run_tests().wasSuccessful() else exit(1)
""" Created on Thu May 26 13:16:12 2016 @author: chc """ import h5py as _h5py _h5py.get_config().complex_names = ('Re', 'Im') from crikit.io.meta_configs import (special_nist_bcars2 as _snb, special_nist_bcars1_sample_scan as _snb1ss) from crikit.io.meta_process import meta_process as _meta_process from crikit.io.hdf5 import hdf_import_data as _hdf_import_data from crikit.io.csv_nist import csv_nist_import_data as _csv_nist_import_data __all__ = [] def import_hdf_nist_special(pth, filename, dset, output_cls_instance): """ Import data from HDF File as specified by NIST-specific settings Returns ------- Success : bool Whether import was successful """ print('\n') try: import_success = _hdf_import_data(pth, filename, dset,
def _create_new_gdf(ds, gdf_path, data_author=None, data_comment=None, dataset_units=None, particle_type_name="dark_matter", clobber=False): # Make sure we have the absolute path to the file first gdf_path = os.path.abspath(gdf_path) # Is the file already there? If so, are we allowing # clobbering? if os.path.exists(gdf_path) and not clobber: raise YTGDFAlreadyExists(gdf_path) ### # Create and open the file with h5py. We use parallel # h5py if it is available. ### if communication_system.communicators[-1].size > 1 and \ h5py.get_config().mpi is True: mpi4py_communicator = communication_system.communicators[-1].comm f = h5py.File(gdf_path, "w", driver='mpio', comm=mpi4py_communicator) else: f = h5py.File(gdf_path, "w") ### # "gridded_data_format" group ### g = f.create_group("gridded_data_format") g.attrs["data_software"] = "yt" g.attrs["data_software_version"] = yt_version if data_author is not None: g.attrs["data_author"] = data_author if data_comment is not None: g.attrs["data_comment"] = data_comment ### # "simulation_parameters" group ### g = f.create_group("simulation_parameters") g.attrs["refine_by"] = ds.refine_by g.attrs["dimensionality"] = ds.dimensionality g.attrs["domain_dimensions"] = ds.domain_dimensions g.attrs["current_time"] = ds.current_time g.attrs["domain_left_edge"] = ds.domain_left_edge g.attrs["domain_right_edge"] = ds.domain_right_edge g.attrs["unique_identifier"] = ds.unique_identifier g.attrs["cosmological_simulation"] = ds.cosmological_simulation # @todo: Where is this in the yt API? g.attrs["num_ghost_zones"] = 0 # @todo: Where is this in the yt API? g.attrs["field_ordering"] = 0 # @todo: not yet supported by yt. g.attrs["boundary_conditions"] = np.array([0, 0, 0, 0, 0, 0], 'int32') if ds.cosmological_simulation: g.attrs["current_redshift"] = ds.current_redshift g.attrs["omega_matter"] = ds.omega_matter g.attrs["omega_lambda"] = ds.omega_lambda g.attrs["hubble_constant"] = ds.hubble_constant if dataset_units is None: dataset_units = {} g = f.create_group("dataset_units") for u in ["length","time","mass","velocity","magnetic"]: unit_name = u+"_unit" if unit_name in dataset_units: value, units = dataset_units[unit_name] else: attr = getattr(ds, unit_name) value = float(attr) units = str(attr.units) d = g.create_dataset(unit_name, data=value) d.attrs["unit"] = units ### # "field_types" group ### g = f.create_group("field_types") ### # "particle_types" group ### g = f.create_group("particle_types") # @todo: Particle type iterator sg = g.create_group(particle_type_name) sg["particle_type_name"] = np.string_(particle_type_name) ### # root datasets -- info about the grids ### f["grid_dimensions"] = ds.index.grid_dimensions f["grid_left_index"] = np.array( [grid.get_global_startindex() for grid in ds.index.grids] ).reshape(ds.index.grid_dimensions.shape[0], 3) f["grid_level"] = ds.index.grid_levels.flat # @todo: Fill with proper values f["grid_parent_id"] = -np.ones(ds.index.grid_dimensions.shape[0]) f["grid_particle_count"] = ds.index.grid_particle_count ### # "data" group -- where we should spend the most time ### g = f.create_group("data") for grid in ds.index.grids: # add group for this grid grid_group = g.create_group("grid_%010i" % (grid.id - grid._id_offset)) # add group for the particles on this grid particles_group = grid_group.create_group("particles") pt_group = particles_group.create_group(particle_type_name) yield f # close the file when done f.close()
Created on Mon May 23 10:17:16 2016 @author: chc """ import os as _os from crikit.data.spectrum import Spectrum as _Spectrum from crikit.data.spectra import Spectra as _Spectra from crikit.data.hsi import Hsi as _Hsi import h5py as _h5py _h5py.get_config().complex_names = ('Re','Im') import numpy as _np __all__ = ['hdf_dset_list_rep','hdf_is_valid_dsets', 'hdf_attr_to_dict','hdf_import_data'] def hdf_dset_list_rep(prefix,suffixes): """ Create a list of dataset names """ dset_list = [] assert isinstance(prefix,str) for sfx in suffixes:
import six from six.moves import cPickle from ..common import ut, TestCase, UNICODE_FILENAMES, closed_tempfile from h5py import File from h5py.h5py_warnings import H5pyDeprecationWarning import h5py try: import pathlib except ImportError: pathlib = None mpi = h5py.get_config().mpi class TestFileOpen(TestCase): """ Feature: Opening files with Python-style modes. """ def test_default(self): """ Default semantics in the presence or absence of a file """ fname = self.mktemp() # No existing file; create a new file and open RW with pytest.warns(H5pyDeprecationWarning): with File(fname) as f: self.assertTrue(f)
import sys import os import math havehdf5 = False try: import h5py havehdf5 = True except: pass if not havehdf5: print 'Warning: h5py not found' if havehdf5: h5py.get_config ().complex_names = ('real', 'imag') filename = sys.argv[1] if os.path.isdir (filename): filename = os.path.join (filename, 'Geometry.hdf5') file = open (filename, 'rb') str = file.read (4) del file hdf = len (str) == 4 and ((ord(str[0]) == 137 and str[1:4] == 'HDF') or str[0:3] == 'MAT') if not hdf: #data = np.recfromtxt (filename, names=True) data = np.recfromtxt (filename) else: import h5py file = h5py.File (filename, 'r')