def _save_image_as_h5(self, h5group: Group, image_key: ImageKey, params: SavingParameters ): if image_key.name in h5group.keys(): del h5group[image_key.name] img_group = h5group.create_group(image_key.name) img_group.attrs[IMAGE_PROJECT_KEY] = True polar_image = self._load_data.load_image_data(image_key, ImageDataFlags.POLAR_IMAGE).polar_image if params.save_image: self._fm.images.set_h5(img_group, image_key, image_key.get_image()) if params.save_polar_image and polar_image is not None: self._fm.polar_images.set_h5(img_group, image_key, polar_image) roi_data = self._fm.rois_data[image_key] if roi_data: self._fm.rois_data.set_h5(img_group, image_key, roi_data) geometry = self._fm.geometries[image_key] if geometry and params.save_geometries: self._fm.geometries.set_h5(img_group, image_key, geometry)
def write_dict_hierarchy(group: h5py.Group, d: dict, scalars_as_attribs: bool = True): """ Write a nested dictionary structure to an HDF file. This turns entries that are `dict`s into HDF groups. All other entries need to be numbers, numeric arrays, or lists. This function is adapted from https://stackoverflow.com/a/44077610. Parameters ---------- group HDF group where to save the data. d The data to save. scalars_as_attribs Single numbers are stored as attributes. """ for key, value in d.items(): if isinstance(value, dict): sub_group = group.create_group(key) write_dict_hierarchy(sub_group, value) else: is_seq = hasattr(value, "__len__") is_str = isinstance(value, str) if is_str: value = np.string_(value) if (is_seq and not is_str) or not scalars_as_attribs: group.create_dataset(key, data=np.atleast_1d(value)) else: group.attrs.create(key, value)
def get_dataset_paths_in_group(group: h5py.Group) -> List[str]: paths = [] for k in group.keys(): class_ = group.get(k, getclass=True) if class_ is h5py.Dataset: paths.append(group.get(k).name) return paths
def to_hdf5_via_pickle(x: object, y: h5py.Group, key: str, compression: Optional[str] = None) -> None: """Pickle, convert to numpy array and write to HDF5 dataset.""" data = np.frombuffer(pickle.dumps(x), dtype=np.byte) y.create_dataset(key, data=data, compression=compression)
def add_datasets(json_object: dict, stream_group: h5py.Group): for field_name, field_value in json_object.items(): if isinstance(field_value, dict): new_group = stream_group.create_group(field_name) add_datasets(field_value, new_group) else: stream_group.create_dataset(name=field_name, data=field_value)
def _write_static_data(self, static_group: h5py.Group) -> None: super()._write_static_data(static_group) # Write the Flex container info. container_group = static_group.create_group("container") for key in self._flex_container_command: if key == "$type": continue container_group.create_dataset( key, data=[self._flex_container_command[key]]) # Flatten the actor data and write it. for actors, group_name in zip([ self._solid_actors, self._soft_actors, self._cloth_actors, self._fluid_actors ], ["solid_actors", "soft_actors", "cloth_actors", "fluid_actors"]): actor_data = dict() for actor in actors: for key in actor.__dict__: if key not in actor_data: actor_data.update({key: []}) actor_data[key].append(actor.__dict__[key]) # Write the data. actors_group = static_group.create_group(group_name) for key in actor_data: actors_group.create_dataset(key, data=actor_data[key])
def from_hdf5_data(cls, group: h5py.Group): """ :param group: :return: """ energy_group = group.get('energy') energy_blocks = EnergyBlocks.from_hdf5_data(energy_group) weight_group = group.get('weight') weight_blocks = WeightBlocks.from_hdf5_data(weight_group) num_walkers_group = group.get('num_walkers') num_walkers_blocks = NumWalkersBlocks.from_hdf5_data(num_walkers_group) density_group = group.get('density') if density_group is not None: density_blocks = DensityBlocks.from_hdf5_data(density_group) else: density_blocks = None ssf_group = group.get('ss_factor') if ssf_group is not None: ssf_block = SSFBlocks.from_hdf5_data(ssf_group) else: ssf_block = None return cls(energy_blocks, weight_blocks, num_walkers_blocks, density_blocks, ssf_block)
def _create_or_extend(self, parent_group: h5py.Group, name: str, shape: Tuple, data: np.ndarray, **kwargs): """Internal helper function that either creates a dataframe if it doesn't exist or it extends it by using the h5py resize function. :param parent_group: chromosome group :param name: name of the dataframe in the group :param shape: shape of the added data (not new shape after extending) :param data: data to be added :param kwargs: passed on to create_dataset only if it doesn't exist """ if name not in parent_group.keys(): parent_group.create_dataset(name=name, shape=shape, data=data, **kwargs) else: num_data = data.shape[0] if hasattr(data, "shape") else len(data) ds = parent_group[name] old_shape = ds.shape new_shape = (old_shape[i] + (num_data if i == 0 else 0) for i in range(len(old_shape))) ds.resize(new_shape) ds[old_shape[0]:] = data self.log.debug("Extended from %s to %s" % (old_shape, ds.shape))
def _compareAuxData( out: OutputWriter, refGroup: h5py.Group, srcGroup: h5py.Group, diffResults: DiffResults, exclusions: Sequence[Pattern], ): """ Compare auxiliary datasets, which aren't stored as Parameters on the Composite model. Some parts of ARMI directly create HDF5 groups under the time step group to store arbitrary data. These still need to be compared. Missing datasets will be treated as structure differences and reported. """ data = dict() def visitor(name, obj): if isinstance(obj, h5py.Dataset): data[name] = obj refGroup.visititems(visitor) refData = data data = dict() srcGroup.visititems(visitor) srcData = data n = _compareSets(set(srcData.keys()), set(refData.keys()), out, name="auxiliary dataset") diffResults.addStructureDiffs(n) matchedSets = set(srcData.keys()) & set(refData.keys()) for name in matchedSets: _diffSimpleData(refData[name], srcData[name], out, diffResults)
def current_h5_metadata(fid: h5py.Group, dataset_path: str = ""): """ Read metadata entrypoint from h5 collection :param fid: A h5py.Group that includes the dataset metadata :param dataset_path: An optional reference (string) to the dataset location :raises: :MetadataError: Returned when a metadata document couldn't be found for dataset provided :return: A dictionary representation of the dataset metadata """ metadata = fid.get('/{}/{}/{}'.format(DatasetName.METADATA.value, dataset_path.lstrip('/'), DatasetName.CURRENT_METADATA.value)) if not metadata: # assume h5 collection represents 1 dataset metadata = fid.get('/{}/{}'.format(DatasetName.METADATA.value, DatasetName.CURRENT_METADATA.value)) if not metadata: raise MetadataError( "Unable to find metadata entry for dataset: {}:{}".format( fid.filename, dataset_path)) return yaml.load(metadata[()].item())
def load_proc(self, group: h5py.Group): """Load the procedure results from the file. :param group: :return: """ model_spec_group = group.get('model_spec') model_spec_config = dict(model_spec_group.attrs.items()) density_spec_group: h5py.Group = group.get('density_spec') if density_spec_group is not None: density_spec_config = dict(density_spec_group.attrs.items()) else: density_spec_config = None ssf_spec_group: h5py.Group = group.get('ssf_spec') if ssf_spec_group is not None: ssf_spec_config = dict(ssf_spec_group.attrs.items()) else: ssf_spec_config = None # Build a config object. proc_config = { 'model_spec': model_spec_config, 'density_spec': density_spec_config, 'ssf_spec': ssf_spec_config } proc_config.update(group.attrs.items()) return self.build_proc(proc_config)
def save_model_to_hdf5_group(grp: h5py.Group, net: torch.nn.Module): # this will work regardless whether for x, y in net.named_parameters(): if x not in grp: # currently, it's not big. So I don't do compression at all, for speed. grp.create_dataset(x, data=y.data.cpu().numpy()) grp.file.flush()
def from_hdf5(h5py_group: h5py.Group) -> ElectronicEnergy: """Constructs a new instance from the data stored in the provided HDF5 group. See also :func:`~qiskit_nature.hdf5.HDF5Storable.from_hdf5` for more details. Args: h5py_group: the HDF5 group from which to load the data. Returns: A new instance of this class. """ integral_property = IntegralProperty.from_hdf5(h5py_group) ret = ElectronicEnergy(list(integral_property), energy_shift=integral_property._shift) ret.nuclear_repulsion_energy = h5py_group.attrs.get("nuclear_repulsion_energy", None) ret.reference_energy = h5py_group.attrs.get("reference_energy", None) ret.orbital_energies = h5py_group.attrs.get("orbital_energies", None) if "kinetic" in h5py_group.keys(): ret.kinetic = ElectronicIntegrals.from_hdf5( h5py_group["kinetic"]["OneBodyElectronicIntegrals"] ) if "overlap" in h5py_group.keys(): ret.overlap = ElectronicIntegrals.from_hdf5( h5py_group["overlap"]["OneBodyElectronicIntegrals"] ) return ret
def _append_data_to_existing_file(inh5: h5py.Group, outh5: h5py.Group, track_order: bool = True): """ Internal function to handle appending new data to an existing h5File :param inh5: Group to read from :param outh5: Group to write to :param track_order: Add insertion order tracking to created groups """ if track_order: _traversal_step = -1 else: _traversal_step = 1 def _traverse(root: h5py.Group, offset: str): if isinstance(root[offset], h5py.Dataset): yield root[offset].name elif isinstance(root[offset], h5py.Group): for k in list(root[offset].keys())[::_traversal_step]: if isinstance(root[os.path.join(offset, k)], h5py.Dataset): yield root[os.path.join(offset, k)].name else: for name in _traverse(root, os.path.join(offset, k)): yield name md_docs = [] md_names = [] # Confirm new non-metadata datasets are missing in output file for k in list(inh5.keys()): if k in (PUBLIC_NAMESPACE, PRIVATE_NAMESPACE): continue else: for ds_path in _traverse(inh5, k): if outh5.get(ds_path): raise RuntimeError( "Dataset {} already exists in file: {}".format( ds_path, outh5.filename)) for k in list(inh5.keys())[::_traversal_step]: if k == PUBLIC_NAMESPACE: continue elif k == PRIVATE_NAMESPACE: for _md in _traverse(inh5, k): md_docs.append(YAML.load(inh5[_md][()].item())) md_names.append("/".join(_md.split("/")[2:-1])) else: for ds_path in _traverse(inh5, k): if track_order: create_groups(inh5, ds_path.rsplit("/", 1)[0], track_order=track_order) outh5.create_dataset(ds_path, data=inh5[ds_path]) write_h5_md(outh5, md_docs, md_names)
def _write_static_data(self, static_group: h5py.Group) -> None: """ Write static data to disk after assembling the trial initialization commands. :param static_group: The static data group. """ static_group.create_dataset("object_ids", data=self.object_ids)
def _create_dataset_from_spinner(stream_group: h5py.Group, nexus_to_spinner_dict: Dict[str, QSpinBox]): for (nexus_string, ui_element) in nexus_to_spinner_dict.items(): if ui_element.value() > 0: stream_group.create_dataset(nexus_string, dtype=int, data=ui_element.value())
def _compareH5Groups(out: OutputWriter, ref: h5py.Group, src: h5py.Group, name: str) -> Tuple[Sequence[str], int]: refGroups = set(ref.keys()) srcGroups = set(src.keys()) n = _compareSets(srcGroups, refGroups, out, name) return sorted(refGroups & srcGroups), n
def _save_node(self, node_group: h5py.Group): """Method to save a node to hdf5. Args: node_group: h5py group where data is saved """ super()._save_node(node_group) node_group.create_dataset('tensor', data=self._tensor)
def set_h5_orbit(group: h5py.Group, orbit: isce3.core.Orbit): orbit.save_to_h5(group) # orbitType and acceleration not used/contained in Orbit object group.create_dataset('orbitType', data=numpy.string_('DOE')) dset = group.create_dataset("acceleration", data=numpy.zeros_like(orbit.velocity)) dset.attrs["units"] = numpy.string_("meters per second squared") dset.attrs["description"] = numpy.string_("GPS state vector acceleration")
def hdf5_export(self, group: h5py.Group): """ :param group: :return: """ # Create the necessary data sets and export data. group.create_dataset('totals', data=self.totals)
def to_hdf5(self, group: h5py.Group) -> None: if isinstance(self.basis, QCBasisSet): basis_group = group.require_group("basis") self.basis.to_hdf5(basis_group) else: group.attrs["basis"] = self.basis group.attrs["method"] = self.method
def _write_static_data(self, static_group: h5py.Group) -> None: super()._write_static_data(static_group) ## color and scales of primitive objects # static_group.create_dataset("target_type", data=self.target_type) static_group.create_dataset("drop_type", data=self.drop_type) static_group.create_dataset("drop_position", data=xyz_to_arr(self.drop_position)) static_group.create_dataset("drop_rotation", data=xyz_to_arr(self.drop_rotation))
def hdf5_export(self, group: h5py.Group): """Export the data to an HDF5 group object. :param group: :return: """ # Array data go to a dataset. group.create_dataset('totals', data=self.totals)
def save_model_to_hdf5_group(grp: h5py.Group, saved_params: dict): # this will work regardless whether for x, y in saved_params.items(): if isinstance(y, dict): grp_this = grp.create_group(x) save_model_to_hdf5_group(grp_this, y) else: grp.create_dataset(x, data=y)
def _give_h5_name(h5group: Group, name): if name not in h5group.keys(): return name num = 0 while True: new_name = '_'.join((name, str(num))) if new_name not in h5group.keys(): return new_name num += 1
def __write_process_dataset(self, current_group: h5py.Group, df: pd.DataFrame, dataset_level: int): primary_keys = self.primary_keys() primary_key_fields = primary_keys[dataset_level:] # Make every column in the same length padding_dataset, max_len = self.__check_fill_dataset_group_alignment( current_group) if len(primary_key_fields) == 0 or max_len == 0: # Just insert insert_df = df else: update_df = df.dropna(subset=primary_key_fields) exists_df = self.__dataset_group_to_dataframe(current_group) upsert_df = pd.concat([exists_df, update_df], axis=0) upsert_df = upsert_df.drop_duplicates(primary_key_fields, keep='last') # upsert_df = exists_df.reindex(columns=update_df.columns | exists_df.columns) # upsert_df.update(update_df) # upsert_df = exists_df.merge(update_df, on=primary_key_fields, how='outer', indicator=True) # Remove all old data and re-insert self.__delete_all_dataset_in_group(current_group) max_len = 0 insert_df = upsert_df for column in insert_df.columns: s = insert_df[column] np_arr = s.to_numpy() if column not in current_group.keys(): # https://stackoverflow.com/a/40312924/12929244 if np_arr.dtype.kind == 'O': if max_len > 0: np_arr = np.pad(np_arr, (max_len, 0), pad_with) string_dt = h5py.special_dtype(vlen=str) current_group.create_dataset(column, data=np_arr, dtype=string_dt, maxshape=(None, ), chunks=True) else: if max_len > 0: np_arr = np.pad(np_arr, (max_len, 0)) current_group.create_dataset(column, data=np_arr, maxshape=(None, ), chunks=True) print('Create dataset %s, length = %s' % (column, np_arr.shape[0])) else: append_len = np_arr.shape[0] exists_len = current_group[column].shape[0] current_group[column].resize((exists_len + append_len, )) current_group[column][-append_len:] = np_arr print('Append dataset %s, length %s -> %s' % (column, exists_len, (exists_len + append_len)))
def populate_group(self, group: h5py.Group): group["Role Names"] = ["Raw Data".encode("utf8"), "Prediciton Mask".encode("utf8")] # FIXME! what about other workflows? group["StorageVersion"] = "0.2" infos_group = group.create_group("infos") for lane_index, lane in enumerate(self.lanes): lane.populate_group(infos_group.create_group(f"lane{lane_index:04}")) _ = group.create_group("local_data")
def save_create_h5_subgroup(group: h5py.Group, name: str, data=None): init_name = name i = 1 while name in group.keys(): name = f'{init_name}_{i}' i += 1 if data is not None: return group.create_dataset(name, data=data) else: return group.create_group(name)
def _write_static_data(self, static_group: h5py.Group) -> None: super()._write_static_data(static_group) # positions and rotations of objects static_group.create_dataset( "initial_position", data=np.stack([xyz_to_arr(p) for p in self.initial_positions], 0)) static_group.create_dataset( "initial_rotation", data=np.stack([xyz_to_arr(r) for r in self.initial_rotations], 0))
def duplicate_nx_group(self, group_to_duplicate: h5py.Group, new_group_name: str) -> h5py.Group: group_to_duplicate.copy( dest=group_to_duplicate.parent, source=group_to_duplicate, name=new_group_name, ) self._emit_file() return group_to_duplicate.parent[new_group_name]
def get_dataset(file: h5py.Group, name: str, rows: int, data_shape: Tuple[int] = (), data_type: np.dtype = np.dtype(float)): return file.require_dataset(name, (rows,) + data_shape, data_type)