def write_to_h5(infile, h5file, h5path='/', mode="a", overwrite_data=False, link_type="soft", create_dataset_args=None, min_size=500): """Write content of a h5py-like object into a HDF5 file. :param infile: Path of input file, or :class:`commonh5.File` object or :class:`commonh5.Group` object. :param h5file: Path of output HDF5 file or HDF5 file handle (`h5py.File` object) :param str h5path: Target path in HDF5 file in which scan groups are created. Default is root (``"/"``) :param str mode: Can be ``"r+"`` (read/write, file must exist), ``"w"`` (write, existing file is lost), ``"w-"`` (write, fail if exists) or ``"a"`` (read/write if exists, create otherwise). This parameter is ignored if ``h5file`` is a file handle. :param bool overwrite_data: If ``True``, existing groups and datasets can be overwritten, if ``False`` they are skipped. This parameter is only relevant if ``file_mode`` is ``"r+"`` or ``"a"``. :param str link_type: *"soft"* (default) or *"hard"* :param dict create_dataset_args: Dictionary of args you want to pass to ``h5py.File.create_dataset``. This allows you to specify filters and compression parameters. Don't specify ``name`` and ``data``. These arguments are only applied to datasets larger than 1MB. :param int min_size: Minimum number of elements in a dataset to apply chunking and compression. Default is 500. The structure of the spec data in an HDF5 file is described in the documentation of :mod:`silx.io.spech5`. """ writer = Hdf5Writer(h5path=h5path, overwrite_data=overwrite_data, link_type=link_type, create_dataset_args=create_dataset_args, min_size=min_size) # both infile and h5file can be either file handle or a file name: 4 cases if not isinstance(h5file, h5py.File) and not is_group(infile): with silx.io.open(infile) as h5pylike: if not _is_commonh5_group(h5pylike): raise IOError("Cannot convert HDF5 file %s to HDF5" % infile) with h5py.File(h5file, mode) as h5f: writer.write(h5pylike, h5f) elif isinstance(h5file, h5py.File) and not is_group(infile): with silx.io.open(infile) as h5pylike: if not _is_commonh5_group(h5pylike): raise IOError("Cannot convert HDF5 file %s to HDF5" % infile) writer.write(h5pylike, h5file) elif is_group(infile) and not isinstance(h5file, h5py.File): if not _is_commonh5_group(infile): raise IOError("Cannot convert HDF5 file %s to HDF5" % infile.file.name) with h5py.File(h5file, mode) as h5f: writer.write(infile, h5f) else: if not _is_commonh5_group(infile): raise IOError("Cannot convert HDF5 file %s to HDF5" % infile.file.name) writer.write(infile, h5file)
def append_member_to_h5(self, h5like_name, obj): """Add one group or one dataset to :attr:`h5f`""" h5_name = self.h5path + h5like_name.lstrip("/") if is_softlink(obj): # links to be created after all groups and datasets h5_target = self.h5path + obj.path.lstrip("/") self._links.append((h5_name, h5_target)) elif is_dataset(obj): _logger.debug("Saving dataset: " + h5_name) member_initially_exists = h5_name in self._h5f if self.overwrite_data and member_initially_exists: _logger.warning("Overwriting dataset: " + h5_name) del self._h5f[h5_name] if self.overwrite_data or not member_initially_exists: if fabioh5 is not None and \ isinstance(obj, fabioh5.FrameData) and \ len(obj.shape) > 2: # special case of multiframe data # write frame by frame to save memory usage low ds = self._h5f.create_dataset(h5_name, shape=obj.shape, dtype=obj.dtype, **self.create_dataset_args) for i, frame in enumerate(obj): ds[i] = frame else: # fancy arguments don't apply to small dataset if obj.size < self.min_size: ds = self._h5f.create_dataset(h5_name, data=obj.value) else: ds = self._h5f.create_dataset(h5_name, data=obj.value, **self.create_dataset_args) else: ds = self._h5f[h5_name] # add HDF5 attributes for key in obj.attrs: if self.overwrite_data or key not in ds.attrs: ds.attrs.create(key, _attr_utf8(obj.attrs[key])) if not self.overwrite_data and member_initially_exists: _logger.warning("Not overwriting existing dataset: " + h5_name) elif is_group(obj): if h5_name not in self._h5f: _logger.debug("Creating group: " + h5_name) grp = self._h5f.create_group(h5_name) else: grp = self._h5f[h5_name] # add HDF5 attributes for key in obj.attrs: if self.overwrite_data or key not in grp.attrs: grp.attrs.create(key, _attr_utf8(obj.attrs[key]))
def append_member_to_h5(self, h5like_name, obj): """Add one group or one dataset to :attr:`h5f`""" h5_name = self.h5path + h5like_name.lstrip("/") if is_softlink(obj): # links to be created after all groups and datasets h5_target = self.h5path + obj.path.lstrip("/") self._links.append((h5_name, h5_target)) elif is_dataset(obj): _logger.debug("Saving dataset: " + h5_name) member_initially_exists = h5_name in self._h5f if self.overwrite_data and member_initially_exists: _logger.warning("Overwriting dataset: " + h5_name) del self._h5f[h5_name] if self.overwrite_data or not member_initially_exists: if fabioh5 is not None and \ isinstance(obj, fabioh5.FrameData) and \ len(obj.shape) > 2: # special case of multiframe data # write frame by frame to save memory usage low ds = self._h5f.create_dataset(h5_name, shape=obj.shape, dtype=obj.dtype, **self.create_dataset_args) for i, frame in enumerate(obj): ds[i] = frame else: # fancy arguments don't apply to small dataset if obj.size < self.min_size: ds = self._h5f.create_dataset(h5_name, data=obj.value) else: ds = self._h5f.create_dataset( h5_name, data=obj.value, **self.create_dataset_args) else: ds = self._h5f[h5_name] # add HDF5 attributes for key in obj.attrs: if self.overwrite_data or key not in ds.attrs: ds.attrs.create(key, _attr_utf8(obj.attrs[key])) if not self.overwrite_data and member_initially_exists: _logger.warning("Not overwriting existing dataset: " + h5_name) elif is_group(obj): if h5_name not in self._h5f: _logger.debug("Creating group: " + h5_name) grp = self._h5f.create_group(h5_name) else: grp = self._h5f[h5_name] # add HDF5 attributes for key in obj.attrs: if self.overwrite_data or key not in grp.attrs: grp.attrs.create(key, _attr_utf8(obj.attrs[key]))
def isGroup(item): if isinstance(item, Group): return True elif hasattr(item, "keys"): return True elif is_group(item): return True else: return False
def __init__(self, ffile, node, parent=None, path=None): self.__sorting = False self.__sorting_list = None self.__sorting_order = qt.Qt.AscendingOrder if 1: #with ffile.plock: self._file = ffile self._parent = parent if hasattr(node, '_posixPath'): self._name = node._posixPath else: self._name = node.name """ if hasattr(node, "_sourceName"): self._name = node._sourceName else: self._name = posixpath.basename(node.name) """ self._type = type(node).__name__ self._hasChildren = is_group(node) #self._attrs = [] self._color = qt.QColor(qt.Qt.black) if hasattr(node, 'attrs'): attrs = list(node.attrs) for cname in ['class', 'NX_class']: if cname in attrs: nodeattr = node.attrs[cname] if sys.version < '3.0': _type = "%s" % nodeattr elif hasattr(nodeattr, "decode"): _type = nodeattr.decode('utf=8') else: _type = "%s" % nodeattr self._type = _type if _type in ["NXdata"]: self._color = qt.QColor(qt.Qt.blue) elif ("default" in attrs): self._color = qt.QColor(qt.Qt.blue) #self._attrs = attrs break #self._type = _type[2].upper() + _type[3:] self._children = [] if hasattr(node, 'dtype'): self._dtype = safe_str(node.dtype) else: self._dtype = "" if hasattr(node, 'shape'): if 0: self._shape = safe_str(node.shape) else: self._shape = node.shape else: self._shape = ""
def __init__(self, ffile, node, parent=None, path=None): if 1:#with ffile.plock: self._file = ffile self._parent = parent if hasattr(node, '_posixPath'): self._name = node._posixPath else: self._name = node.name """ if hasattr(node, "_sourceName"): self._name = node._sourceName else: self._name = posixpath.basename(node.name) """ self._type = type(node).__name__ self._hasChildren = is_group(node) #self._attrs = [] self._color = qt.QColor(qt.Qt.black) if hasattr(node, 'attrs'): attrs = list(node.attrs) for cname in ['class', 'NX_class']: if cname in attrs: nodeattr = node.attrs[cname] if sys.version <'3.0': _type = "%s" % nodeattr elif hasattr(nodeattr, "decode"): _type = nodeattr.decode('utf=8') else: _type = "%s" % nodeattr self._type = _type if _type in ["NXdata"]: self._color = qt.QColor(qt.Qt.blue) elif ("default" in attrs): self._color = qt.QColor(qt.Qt.blue) #self._attrs = attrs break #self._type = _type[2].upper() + _type[3:] self._children = [] if hasattr(node, 'dtype'): self._dtype = safe_str(node.dtype) else: self._dtype = "" if hasattr(node, 'shape'): if 0: self._shape = safe_str(node.shape) else: self._shape = node.shape else: self._shape = ""
def append_member_to_h5(self, h5like_name, obj): """Add one group or one dataset to :attr:`h5f`""" h5_name = self.h5path + h5like_name.lstrip("/") if is_softlink(obj): # links to be created after all groups and datasets h5_target = self.h5path + obj.path.lstrip("/") self._links.append((h5_name, h5_target)) elif is_dataset(obj): _logger.debug("Saving dataset: " + h5_name) member_initially_exists = h5_name in self._h5f if self.overwrite_data and member_initially_exists: _logger.warn("Overwriting dataset: " + h5_name) del self._h5f[h5_name] if self.overwrite_data or not member_initially_exists: # fancy arguments don't apply to small dataset if obj.size < self.min_size: ds = self._h5f.create_dataset(h5_name, data=obj.value) else: ds = self._h5f.create_dataset(h5_name, data=obj.value, **self.create_dataset_args) else: ds = self._h5f[h5_name] # add HDF5 attributes for key in obj.attrs: if self.overwrite_data or key not in ds.attrs: ds.attrs.create(key, _attr_utf8(obj.attrs[key])) if not self.overwrite_data and member_initially_exists: _logger.warn("Not overwriting existing dataset: " + h5_name) elif is_group(obj): if h5_name not in self._h5f: _logger.debug("Creating group: " + h5_name) grp = self._h5f.create_group(h5_name) else: grp = self._h5f[h5_name] # add HDF5 attributes for key in obj.attrs: if self.overwrite_data or key not in grp.attrs: grp.attrs.create(key, _attr_utf8(obj.attrs[key]))
def __init__(self, ffile, node, parent=None, path=None): if 1: #with ffile.plock: self._file = ffile self._parent = parent if hasattr(node, '_posixPath'): self._name = node._posixPath else: self._name = node.name """ if hasattr(node, "_sourceName"): self._name = node._sourceName else: self._name = posixpath.basename(node.name) """ self._type = type(node).__name__ self._hasChildren = is_group(node) if hasattr(node, 'attrs'): attrs = list(node.attrs) for cname in ['class', 'NX_class']: if cname in attrs: nodeattr = node.attrs[cname] if sys.version < '3.0': _type = "%s" % nodeattr elif hasattr(nodeattr, "decode"): _type = nodeattr.decode('utf=8') else: _type = "%s" % nodeattr self._type = _type break #self._type = _type[2].upper() + _type[3:] self._children = [] if hasattr(node, 'dtype'): self._dtype = safe_str(node.dtype) else: self._dtype = "" if hasattr(node, 'shape'): if 0: self._shape = safe_str(node.shape) else: self._shape = node.shape else: self._shape = ""
def _is_commonh5_group(grp): """Return True if grp is a commonh5 group. (h5py.Group objects are not commonh5 groups)""" return is_group(grp) and not isinstance(grp, h5py.Group)