Exemplo n.º 1
0
    def _save_image_as_h5(self,
                          h5group: Group,
                          image_key: ImageKey,
                          params: SavingParameters
                          ):

        if image_key.name in h5group.keys():
            del h5group[image_key.name]

        img_group = h5group.create_group(image_key.name)
        img_group.attrs[IMAGE_PROJECT_KEY] = True

        polar_image = self._load_data.load_image_data(image_key, ImageDataFlags.POLAR_IMAGE).polar_image

        if params.save_image:
            self._fm.images.set_h5(img_group, image_key, image_key.get_image())

        if params.save_polar_image and polar_image is not None:
            self._fm.polar_images.set_h5(img_group, image_key, polar_image)

        roi_data = self._fm.rois_data[image_key]

        if roi_data:
            self._fm.rois_data.set_h5(img_group, image_key, roi_data)

        geometry = self._fm.geometries[image_key]

        if geometry and params.save_geometries:
            self._fm.geometries.set_h5(img_group, image_key, geometry)
Exemplo n.º 2
0
def write_dict_hierarchy(group: h5py.Group,
                         d: dict,
                         scalars_as_attribs: bool = True):
    """ Write a nested dictionary structure to an HDF file.

    This turns entries that are `dict`s into HDF groups. All other entries need to be
    numbers, numeric arrays, or lists.

    This function is adapted from https://stackoverflow.com/a/44077610.

    Parameters
    ----------
    group
        HDF group where to save the data.
    d
        The data to save.
    scalars_as_attribs
        Single numbers are stored as attributes.
    """
    for key, value in d.items():
        if isinstance(value, dict):
            sub_group = group.create_group(key)
            write_dict_hierarchy(sub_group, value)
        else:
            is_seq = hasattr(value, "__len__")
            is_str = isinstance(value, str)
            if is_str:
                value = np.string_(value)
            if (is_seq and not is_str) or not scalars_as_attribs:
                group.create_dataset(key, data=np.atleast_1d(value))
            else:
                group.attrs.create(key, value)
Exemplo n.º 3
0
 def get_dataset_paths_in_group(group: h5py.Group) -> List[str]:
     paths = []
     for k in group.keys():
         class_ = group.get(k, getclass=True)
         if class_ is h5py.Dataset:
             paths.append(group.get(k).name)
     return paths
Exemplo n.º 4
0
 def to_hdf5_via_pickle(x: object,
                        y: h5py.Group,
                        key: str,
                        compression: Optional[str] = None) -> None:
     """Pickle, convert to numpy array and write to HDF5 dataset."""
     data = np.frombuffer(pickle.dumps(x), dtype=np.byte)
     y.create_dataset(key, data=data, compression=compression)
Exemplo n.º 5
0
def add_datasets(json_object: dict, stream_group: h5py.Group):
    for field_name, field_value in json_object.items():
        if isinstance(field_value, dict):
            new_group = stream_group.create_group(field_name)
            add_datasets(field_value, new_group)
        else:
            stream_group.create_dataset(name=field_name, data=field_value)
Exemplo n.º 6
0
    def _write_static_data(self, static_group: h5py.Group) -> None:
        super()._write_static_data(static_group)

        # Write the Flex container info.
        container_group = static_group.create_group("container")
        for key in self._flex_container_command:
            if key == "$type":
                continue
            container_group.create_dataset(
                key, data=[self._flex_container_command[key]])

        # Flatten the actor data and write it.
        for actors, group_name in zip([
                self._solid_actors, self._soft_actors, self._cloth_actors,
                self._fluid_actors
        ], ["solid_actors", "soft_actors", "cloth_actors", "fluid_actors"]):
            actor_data = dict()
            for actor in actors:
                for key in actor.__dict__:
                    if key not in actor_data:
                        actor_data.update({key: []})
                    actor_data[key].append(actor.__dict__[key])
            # Write the data.
            actors_group = static_group.create_group(group_name)
            for key in actor_data:
                actors_group.create_dataset(key, data=actor_data[key])
Exemplo n.º 7
0
    def from_hdf5_data(cls, group: h5py.Group):
        """

        :param group:
        :return:
        """
        energy_group = group.get('energy')
        energy_blocks = EnergyBlocks.from_hdf5_data(energy_group)

        weight_group = group.get('weight')
        weight_blocks = WeightBlocks.from_hdf5_data(weight_group)

        num_walkers_group = group.get('num_walkers')
        num_walkers_blocks = NumWalkersBlocks.from_hdf5_data(num_walkers_group)

        density_group = group.get('density')
        if density_group is not None:
            density_blocks = DensityBlocks.from_hdf5_data(density_group)
        else:
            density_blocks = None

        ssf_group = group.get('ss_factor')
        if ssf_group is not None:
            ssf_block = SSFBlocks.from_hdf5_data(ssf_group)
        else:
            ssf_block = None

        return cls(energy_blocks, weight_blocks, num_walkers_blocks,
                   density_blocks, ssf_block)
Exemplo n.º 8
0
    def _create_or_extend(self, parent_group: h5py.Group, name: str,
                          shape: Tuple, data: np.ndarray, **kwargs):
        """Internal helper function that either creates a dataframe if
        it doesn't exist or it extends it by using the h5py resize
        function.

        :param parent_group: chromosome group
        :param name: name of the dataframe in the group
        :param shape: shape of the added data (not new shape after extending)
        :param data: data to be added
        :param kwargs: passed on to create_dataset only if it doesn't exist
        """
        if name not in parent_group.keys():
            parent_group.create_dataset(name=name,
                                        shape=shape,
                                        data=data,
                                        **kwargs)
        else:
            num_data = data.shape[0] if hasattr(data, "shape") else len(data)
            ds = parent_group[name]
            old_shape = ds.shape
            new_shape = (old_shape[i] + (num_data if i == 0 else 0)
                         for i in range(len(old_shape)))
            ds.resize(new_shape)

            ds[old_shape[0]:] = data

            self.log.debug("Extended from %s to %s" % (old_shape, ds.shape))
Exemplo n.º 9
0
def _compareAuxData(
    out: OutputWriter,
    refGroup: h5py.Group,
    srcGroup: h5py.Group,
    diffResults: DiffResults,
    exclusions: Sequence[Pattern],
):
    """
    Compare auxiliary datasets, which aren't stored as Parameters on the Composite model.

    Some parts of ARMI directly create HDF5 groups under the time step group to store
    arbitrary data. These still need to be compared. Missing datasets will be treated as
    structure differences and reported.
    """
    data = dict()

    def visitor(name, obj):
        if isinstance(obj, h5py.Dataset):
            data[name] = obj

    refGroup.visititems(visitor)
    refData = data
    data = dict()
    srcGroup.visititems(visitor)
    srcData = data

    n = _compareSets(set(srcData.keys()),
                     set(refData.keys()),
                     out,
                     name="auxiliary dataset")
    diffResults.addStructureDiffs(n)
    matchedSets = set(srcData.keys()) & set(refData.keys())
    for name in matchedSets:
        _diffSimpleData(refData[name], srcData[name], out, diffResults)
Exemplo n.º 10
0
def current_h5_metadata(fid: h5py.Group, dataset_path: str = ""):
    """
    Read metadata entrypoint from h5 collection

    :param fid:
        A h5py.Group that includes the dataset metadata

    :param dataset_path:
        An optional reference (string) to the dataset location

    :raises:
        :MetadataError: Returned when a metadata document couldn't be found for
            dataset provided

    :return:
        A dictionary representation of the dataset metadata
    """

    metadata = fid.get('/{}/{}/{}'.format(DatasetName.METADATA.value,
                                          dataset_path.lstrip('/'),
                                          DatasetName.CURRENT_METADATA.value))

    if not metadata:  # assume h5 collection represents 1 dataset
        metadata = fid.get('/{}/{}'.format(DatasetName.METADATA.value,
                                           DatasetName.CURRENT_METADATA.value))
        if not metadata:
            raise MetadataError(
                "Unable to find metadata entry for dataset: {}:{}".format(
                    fid.filename, dataset_path))

    return yaml.load(metadata[()].item())
Exemplo n.º 11
0
    def load_proc(self, group: h5py.Group):
        """Load the procedure results from the file.

        :param group:
        :return:
        """
        model_spec_group = group.get('model_spec')
        model_spec_config = dict(model_spec_group.attrs.items())

        density_spec_group: h5py.Group = group.get('density_spec')
        if density_spec_group is not None:
            density_spec_config = dict(density_spec_group.attrs.items())
        else:
            density_spec_config = None

        ssf_spec_group: h5py.Group = group.get('ssf_spec')
        if ssf_spec_group is not None:
            ssf_spec_config = dict(ssf_spec_group.attrs.items())
        else:
            ssf_spec_config = None

        # Build a config object.
        proc_config = {
            'model_spec': model_spec_config,
            'density_spec': density_spec_config,
            'ssf_spec': ssf_spec_config
        }
        proc_config.update(group.attrs.items())
        return self.build_proc(proc_config)
Exemplo n.º 12
0
def save_model_to_hdf5_group(grp: h5py.Group, net: torch.nn.Module):
    # this will work regardless whether
    for x, y in net.named_parameters():
        if x not in grp:
            # currently, it's not big. So I don't do compression at all, for speed.
            grp.create_dataset(x, data=y.data.cpu().numpy())
            grp.file.flush()
Exemplo n.º 13
0
    def from_hdf5(h5py_group: h5py.Group) -> ElectronicEnergy:
        """Constructs a new instance from the data stored in the provided HDF5 group.

        See also :func:`~qiskit_nature.hdf5.HDF5Storable.from_hdf5` for more details.

        Args:
            h5py_group: the HDF5 group from which to load the data.

        Returns:
            A new instance of this class.
        """
        integral_property = IntegralProperty.from_hdf5(h5py_group)

        ret = ElectronicEnergy(list(integral_property), energy_shift=integral_property._shift)

        ret.nuclear_repulsion_energy = h5py_group.attrs.get("nuclear_repulsion_energy", None)
        ret.reference_energy = h5py_group.attrs.get("reference_energy", None)
        ret.orbital_energies = h5py_group.attrs.get("orbital_energies", None)

        if "kinetic" in h5py_group.keys():
            ret.kinetic = ElectronicIntegrals.from_hdf5(
                h5py_group["kinetic"]["OneBodyElectronicIntegrals"]
            )

        if "overlap" in h5py_group.keys():
            ret.overlap = ElectronicIntegrals.from_hdf5(
                h5py_group["overlap"]["OneBodyElectronicIntegrals"]
            )

        return ret
Exemplo n.º 14
0
def _append_data_to_existing_file(inh5: h5py.Group,
                                  outh5: h5py.Group,
                                  track_order: bool = True):
    """
    Internal function to handle appending new data to an existing h5File

    :param inh5:
        Group to read from
    :param outh5:
        Group to write to
    :param track_order:
        Add insertion order tracking to created groups
    """

    if track_order:
        _traversal_step = -1
    else:
        _traversal_step = 1

    def _traverse(root: h5py.Group, offset: str):
        if isinstance(root[offset], h5py.Dataset):
            yield root[offset].name
        elif isinstance(root[offset], h5py.Group):
            for k in list(root[offset].keys())[::_traversal_step]:
                if isinstance(root[os.path.join(offset, k)], h5py.Dataset):
                    yield root[os.path.join(offset, k)].name
                else:
                    for name in _traverse(root, os.path.join(offset, k)):
                        yield name

    md_docs = []
    md_names = []

    # Confirm new non-metadata datasets are missing in output file
    for k in list(inh5.keys()):
        if k in (PUBLIC_NAMESPACE, PRIVATE_NAMESPACE):
            continue
        else:
            for ds_path in _traverse(inh5, k):
                if outh5.get(ds_path):
                    raise RuntimeError(
                        "Dataset {} already exists in file: {}".format(
                            ds_path, outh5.filename))
    for k in list(inh5.keys())[::_traversal_step]:
        if k == PUBLIC_NAMESPACE:
            continue
        elif k == PRIVATE_NAMESPACE:
            for _md in _traverse(inh5, k):
                md_docs.append(YAML.load(inh5[_md][()].item()))
                md_names.append("/".join(_md.split("/")[2:-1]))
        else:
            for ds_path in _traverse(inh5, k):
                if track_order:
                    create_groups(inh5,
                                  ds_path.rsplit("/", 1)[0],
                                  track_order=track_order)
                outh5.create_dataset(ds_path, data=inh5[ds_path])

    write_h5_md(outh5, md_docs, md_names)
Exemplo n.º 15
0
    def _write_static_data(self, static_group: h5py.Group) -> None:
        """
        Write static data to disk after assembling the trial initialization commands.

        :param static_group: The static data group.
        """

        static_group.create_dataset("object_ids", data=self.object_ids)
Exemplo n.º 16
0
 def _create_dataset_from_spinner(stream_group: h5py.Group,
                                  nexus_to_spinner_dict: Dict[str,
                                                              QSpinBox]):
     for (nexus_string, ui_element) in nexus_to_spinner_dict.items():
         if ui_element.value() > 0:
             stream_group.create_dataset(nexus_string,
                                         dtype=int,
                                         data=ui_element.value())
Exemplo n.º 17
0
def _compareH5Groups(out: OutputWriter, ref: h5py.Group, src: h5py.Group,
                     name: str) -> Tuple[Sequence[str], int]:
    refGroups = set(ref.keys())
    srcGroups = set(src.keys())

    n = _compareSets(srcGroups, refGroups, out, name)

    return sorted(refGroups & srcGroups), n
Exemplo n.º 18
0
    def _save_node(self, node_group: h5py.Group):
        """Method to save a node to hdf5.

    Args:
      node_group: h5py group where data is saved
    """
        super()._save_node(node_group)
        node_group.create_dataset('tensor', data=self._tensor)
Exemplo n.º 19
0
def set_h5_orbit(group: h5py.Group, orbit: isce3.core.Orbit):
    orbit.save_to_h5(group)
    # orbitType and acceleration not used/contained in Orbit object
    group.create_dataset('orbitType', data=numpy.string_('DOE'))
    dset = group.create_dataset("acceleration",
                                data=numpy.zeros_like(orbit.velocity))
    dset.attrs["units"] = numpy.string_("meters per second squared")
    dset.attrs["description"] = numpy.string_("GPS state vector acceleration")
Exemplo n.º 20
0
    def hdf5_export(self, group: h5py.Group):
        """

        :param group:
        :return:
        """
        # Create the necessary data sets and export data.
        group.create_dataset('totals', data=self.totals)
Exemplo n.º 21
0
    def to_hdf5(self, group: h5py.Group) -> None:
        if isinstance(self.basis, QCBasisSet):
            basis_group = group.require_group("basis")
            self.basis.to_hdf5(basis_group)
        else:
            group.attrs["basis"] = self.basis

        group.attrs["method"] = self.method
Exemplo n.º 22
0
    def _write_static_data(self, static_group: h5py.Group) -> None:
        super()._write_static_data(static_group)

        ## color and scales of primitive objects
        # static_group.create_dataset("target_type", data=self.target_type)
        static_group.create_dataset("drop_type", data=self.drop_type)
        static_group.create_dataset("drop_position", data=xyz_to_arr(self.drop_position))
        static_group.create_dataset("drop_rotation", data=xyz_to_arr(self.drop_rotation))
Exemplo n.º 23
0
    def hdf5_export(self, group: h5py.Group):
        """Export the data to an HDF5 group object.

        :param group:
        :return:
        """
        # Array data go to a dataset.
        group.create_dataset('totals', data=self.totals)
Exemplo n.º 24
0
def save_model_to_hdf5_group(grp: h5py.Group, saved_params: dict):
    # this will work regardless whether
    for x, y in saved_params.items():
        if isinstance(y, dict):
            grp_this = grp.create_group(x)
            save_model_to_hdf5_group(grp_this, y)
        else:
            grp.create_dataset(x, data=y)
Exemplo n.º 25
0
def _give_h5_name(h5group: Group, name):
    if name not in h5group.keys():
        return name
    num = 0
    while True:
        new_name = '_'.join((name, str(num)))
        if new_name not in h5group.keys():
            return new_name
        num += 1
Exemplo n.º 26
0
    def __write_process_dataset(self, current_group: h5py.Group,
                                df: pd.DataFrame, dataset_level: int):
        primary_keys = self.primary_keys()
        primary_key_fields = primary_keys[dataset_level:]

        # Make every column in the same length
        padding_dataset, max_len = self.__check_fill_dataset_group_alignment(
            current_group)

        if len(primary_key_fields) == 0 or max_len == 0:
            # Just insert
            insert_df = df
        else:
            update_df = df.dropna(subset=primary_key_fields)
            exists_df = self.__dataset_group_to_dataframe(current_group)
            upsert_df = pd.concat([exists_df, update_df], axis=0)
            upsert_df = upsert_df.drop_duplicates(primary_key_fields,
                                                  keep='last')

            # upsert_df = exists_df.reindex(columns=update_df.columns | exists_df.columns)
            # upsert_df.update(update_df)
            # upsert_df = exists_df.merge(update_df, on=primary_key_fields, how='outer', indicator=True)
            # Remove all old data and re-insert
            self.__delete_all_dataset_in_group(current_group)
            max_len = 0
            insert_df = upsert_df

        for column in insert_df.columns:
            s = insert_df[column]
            np_arr = s.to_numpy()
            if column not in current_group.keys():
                # https://stackoverflow.com/a/40312924/12929244
                if np_arr.dtype.kind == 'O':
                    if max_len > 0:
                        np_arr = np.pad(np_arr, (max_len, 0), pad_with)
                    string_dt = h5py.special_dtype(vlen=str)
                    current_group.create_dataset(column,
                                                 data=np_arr,
                                                 dtype=string_dt,
                                                 maxshape=(None, ),
                                                 chunks=True)
                else:
                    if max_len > 0:
                        np_arr = np.pad(np_arr, (max_len, 0))
                    current_group.create_dataset(column,
                                                 data=np_arr,
                                                 maxshape=(None, ),
                                                 chunks=True)
                print('Create dataset %s, length = %s' %
                      (column, np_arr.shape[0]))
            else:
                append_len = np_arr.shape[0]
                exists_len = current_group[column].shape[0]
                current_group[column].resize((exists_len + append_len, ))
                current_group[column][-append_len:] = np_arr
                print('Append dataset %s, length %s -> %s' %
                      (column, exists_len, (exists_len + append_len)))
Exemplo n.º 27
0
    def populate_group(self, group: h5py.Group):
        group["Role Names"] = ["Raw Data".encode("utf8"), "Prediciton Mask".encode("utf8")] # FIXME! what about other workflows?
        group["StorageVersion"] = "0.2"

        infos_group = group.create_group("infos")
        for lane_index, lane in enumerate(self.lanes):
            lane.populate_group(infos_group.create_group(f"lane{lane_index:04}"))

        _ = group.create_group("local_data")
Exemplo n.º 28
0
def save_create_h5_subgroup(group: h5py.Group, name: str, data=None):
    init_name = name
    i = 1
    while name in group.keys():
        name = f'{init_name}_{i}'
        i += 1
    if data is not None:
        return group.create_dataset(name, data=data)
    else:
        return group.create_group(name)
Exemplo n.º 29
0
    def _write_static_data(self, static_group: h5py.Group) -> None:
        super()._write_static_data(static_group)

        # positions and rotations of objects
        static_group.create_dataset(
            "initial_position",
            data=np.stack([xyz_to_arr(p) for p in self.initial_positions], 0))
        static_group.create_dataset(
            "initial_rotation",
            data=np.stack([xyz_to_arr(r) for r in self.initial_rotations], 0))
Exemplo n.º 30
0
    def duplicate_nx_group(self, group_to_duplicate: h5py.Group,
                           new_group_name: str) -> h5py.Group:

        group_to_duplicate.copy(
            dest=group_to_duplicate.parent,
            source=group_to_duplicate,
            name=new_group_name,
        )
        self._emit_file()
        return group_to_duplicate.parent[new_group_name]
Exemplo n.º 31
0
def get_dataset(file: h5py.Group, name: str, rows: int, data_shape: Tuple[int] = (),
                data_type: np.dtype = np.dtype(float)):
    return file.require_dataset(name, (rows,) + data_shape, data_type)