def test_crystaldata2phase(self): """A Phase object is correctly returned.""" with File(EMSOFT_FILE, mode="r") as f: xtal_dict = hdf5group2dict(f["CrystalData"]) phase = _crystaldata2phase(xtal_dict) assert phase.name == "" assert phase.space_group.number == 140 assert phase.color == "tab:blue" structure = phase.structure assert np.allclose( structure.lattice.abcABG(), [0.5949, 0.5949, 0.5821, 90, 90, 90] ) assert np.allclose( structure.xyz, [[0.1587, 0.6587, 0], [0, 0, 0.25]], atol=1e-4 ) assert np.allclose(structure.occupancy, [1, 1]) assert np.allclose(structure.Bisoequiv, [0.5] * 2) assert np.compare_chararrays( structure.element, np.array(["13", "29"], dtype="|S2"), "==", rstrip=False, ).all()
def test_crystaldata2phase_single_atom(self): """A Phase object is correctly returned when there is only one atom present. """ with File(EMSOFT_FILE, mode="r") as f: xtal_dict = hdf5group2dict(f["CrystalData"]) xtal_dict["Natomtypes"] = 1 xtal_dict["AtomData"] = xtal_dict["AtomData"][:, 0][..., np.newaxis] xtal_dict["Atomtypes"] = xtal_dict["Atomtypes"][0] phase = _crystaldata2phase(xtal_dict) assert len(phase.structure) == 1
def file_reader( filename: str, scan_size: Union[None, int, Tuple[int, ...]] = None, lazy: bool = False, **kwargs, ) -> List[dict]: """Read dynamically simulated electron backscatter diffraction patterns from EMsoft's format produced by their EMEBSD.f90 program. Parameters ---------- filename Full file path of the HDF file. scan_size Scan size in number of patterns in width and height. lazy Open the data lazily without actually reading the data from disk until requested. Allows opening datasets larger than available memory. Default is False. kwargs : Keyword arguments passed to h5py.File. Returns ------- signal_dict_list: list of dicts Data, axes, metadata and original metadata. """ mode = kwargs.pop("mode", "r") f = File(filename, mode=mode, **kwargs) _check_file_format(f) # Read original metadata omd = hdf5group2dict(f["/"], data_dset_names=["EBSDPatterns"], recursive=True) # Set metadata and original metadata dictionaries md = _get_metadata(omd) md.update({ "Signal": { "signal_type": "EBSD", "record_by": "image" }, "General": { "title": f.filename.split("/")[-1].split(".")[0], "original_filename": f.filename.split("/")[-1], }, }) scan = {"metadata": md, "original_metadata": omd} # Read patterns dataset = f["EMData/EBSD/EBSDPatterns"] if lazy: chunks = "auto" if dataset.chunks is None else dataset.chunks patterns = da.from_array(dataset, chunks=chunks) else: patterns = np.asanyarray(dataset) # Reshape data if desired sy = omd["NMLparameters"]["EBSDNameList"]["numsy"] sx = omd["NMLparameters"]["EBSDNameList"]["numsx"] if scan_size is not None: if isinstance(scan_size, int): new_shape = (scan_size, sy, sx) else: new_shape = scan_size + (sy, sx) patterns = patterns.reshape(new_shape) scan["data"] = patterns # Set navigation and signal axes pixel_size = omd["NMLparameters"]["EBSDNameList"]["delta"] ndim = patterns.ndim units = ["px", "um", "um"] names = ["x", "dy", "dx"] scales = np.array([1, pixel_size, pixel_size]) if ndim == 4: units = ["px"] + units names = ["y"] + names scales = np.append([1], scales) scan["axes"] = [{ "size": patterns.shape[i], "index_in_array": i, "name": names[i], "scale": scales[i], "offset": 0, "units": units[i], } for i in range(patterns.ndim)] # Get crystal map phase = _crystaldata2phase(hdf5group2dict(f["CrystalData"])) xtal_fname = f["EMData/EBSD/xtalname"][()][0].decode().split("/")[-1] phase.name, _ = os.path.splitext(xtal_fname) scan["xmap"] = CrystalMap( rotations=Rotation.from_euler(f["EMData/EBSD/EulerAngles"][()]), phase_list=PhaseList(phase), ) if not lazy: f.close() return [scan]
def test_hdf5group2dict_raises_deprecation_warning(self): f = File(KIKUCHIPY_FILE, mode="r") with pytest.warns(VisibleDeprecationWarning, match="The 'lazy' "): _ = hdf5group2dict(group=f["/"], lazy=True)
def file_reader( filename: str, energy_range: Optional[range] = None, projection: str = "spherical", hemisphere: str = "north", lazy: bool = False, **kwargs, ) -> List[dict]: """Read electron backscatter diffraction master patterns from EMsoft's HDF5 file format [Callahan2013]_. Parameters ---------- filename Full file path of the HDF file. energy_range Range of beam energies for patterns to read. If None is passed (default), all available energies are read. projection Projection(s) to read. Options are "spherical" (default) or "lambert". hemisphere Projection hemisphere(s) to read. Options are "north" (default), "south" or "both". If "both", these will be stacked in the vertical navigation axis. lazy Open the data lazily without actually reading the data from disk until requested. Allows opening datasets larger than available memory. Default is False. kwargs : Keyword arguments passed to h5py.File. Returns ------- signal_dict_list: list of dicts Data, axes, metadata and original metadata. References ---------- .. [Callahan2013] P. G. Callahan and M. De Graef, "Dynamical\ Electron Backscatter Diffraction Patterns. Part I: Pattern\ Simulations," *Microscopy and Microanalysis* **19** (2013), doi: https://doi.org/10.1017/S1431927613001840. """ mode = kwargs.pop("mode", "r") f = File(filename, mode=mode, **kwargs) # Check if the file is valid _check_file_format(f) # Set metadata dictionary md = { "Signal": { "signal_type": "EBSDMasterPattern", "record_by": "image", }, "General": { "title": f.filename.split("/")[-1].split(".")[0], "original_filename": f.filename.split("/")[-1], }, "Simulation": { "EBSD_master_pattern": _namelist_params_2_metadata( hdf5group2dict(f["NMLparameters"], recursive=True)) }, "Sample": { "Phases": { "1": _crystal_data_2_metadata(hdf5group2dict(f["CrystalData"])) }, }, } # Get data shape and slices data_group = f["EMData/EBSDmaster"] energies = data_group["EkeVs"][()] data_shape, data_slices = _get_data_shape_slices( npx=f["NMLparameters/EBSDMasterNameList/npx"][()], energies=energies, energy_range=energy_range, ) i_min = data_slices[0].start i_min = 0 if i_min is None else i_min min_energy = energies[i_min] # Account for the Lambert projections being stored as having a 1-dimension # before the energy dimension # TODO: Figure out why EMsoft v4.3 have two Lambert projections in both # northern and southern hemisphere. if projection.lower() == "lambert": data_slices = (slice(0, 1), ) + data_slices # Get HDF5 data sets datasets = _get_datasets( data_group=data_group, projection=projection, hemisphere=hemisphere, ) data_shape = (len(datasets), ) + data_shape # Set up data reading data_kwargs = {} if lazy: if datasets[0].chunks is None or datasets[0].shape != data_shape: data_kwargs["chunks"] = "auto" else: data_kwargs["chunks"] = datasets[0].chunks data_read_func = da.from_array data_stack_func = da.stack else: data_read_func = np.asanyarray data_stack_func = np.stack # Read data data = data_read_func(datasets[0][data_slices], **data_kwargs) if data_shape[0] == 2: data = data_stack_func( [data, data_read_func(datasets[1][data_slices], **data_kwargs)], axis=0, ) # Remove 1-dimensions data = data.squeeze() # Axes scales energy_scale = energies[1] - energies[0] scales = np.array([1, energy_scale, 1, 1]) ny, nx, sy, sx = data_shape names = ["y", "energy", "height", "width"] units = ["hemisphere", "keV", "px", "px"] offsets = [0, min_energy, -sy // 2, -sx // 2] dim_idx = [] if ny != 1: dim_idx.append(0) if nx != 1: dim_idx.append(1) dim_idx += [2, 3] # Create axis object axes = [{ "size": data.shape[i], "index_in_array": i, "name": names[j], "scale": scales[j], "offset": offsets[j], "units": units[j], } for i, j in zip(range(data.ndim), dim_idx)] md["Simulation"]["EBSD_master_pattern"]["Master_pattern"].update({ "projection": projection, "hemisphere": hemisphere }) output = { "axes": axes, "data": data, "metadata": md, "original_metadata": {}, } if not lazy: f.close() return [ output, ]
def file_reader( filename: str, energy: Optional[range] = None, projection: str = "stereographic", hemisphere: str = "north", lazy: bool = False, **kwargs, ) -> List[dict]: """Read electron backscatter diffraction master patterns from EMsoft's HDF5 file format :cite:`callahan2013dynamical`. Parameters ---------- filename Full file path of the HDF file. energy Desired beam energy or energy range. If None is passed (default), all available energies are read. projection Projection(s) to read. Options are "stereographic" (default) or "lambert". hemisphere Projection hemisphere(s) to read. Options are "north" (default), "south" or "both". If "both", these will be stacked in the vertical navigation axis. lazy Open the data lazily without actually reading the data from disk until requested. Allows opening datasets larger than available memory. Default is False. kwargs : Keyword arguments passed to h5py.File. Returns ------- signal_dict_list: list of dicts Data, axes, metadata and original metadata. """ mode = kwargs.pop("mode", "r") f = File(filename, mode=mode, **kwargs) # Check if the file is valid _check_file_format(f) # Set metadata and original metadata dictionary md = { "Signal": { "signal_type": "EBSDMasterPattern", "record_by": "image" }, "General": { "title": f.filename.split("/")[-1].split(".")[0], "original_filename": f.filename.split("/")[-1], }, } nml_params = hdf5group2dict(f["NMLparameters"], recursive=True) # Get phase information and add it to both the original metadata and # a Phase object crystal_data = hdf5group2dict(f["CrystalData"]) nml_params["CrystalData"] = crystal_data phase = _crystaldata2phase(crystal_data) # Get the phase name try: xtal_name = os.path.split(nml_params["MCCLNameList"]["xtalname"])[0] phase_name = os.path.splitext(xtal_name)[0] except KeyError: phase_name = None phase.name = phase_name # Get data shape and slices data_group = f["EMData/EBSDmaster"] energies = data_group["EkeVs"][()] data_shape, data_slices = _get_data_shape_slices( npx=nml_params["EBSDMasterNameList"]["npx"], energies=energies, energy=energy) i_min = data_slices[0].start i_min = 0 if i_min is None else i_min min_energy = energies[i_min] # Get HDF5 data sets datasets = _get_datasets(data_group=data_group, projection=projection, hemisphere=hemisphere) # TODO: Data shape and slices are easier to handle if the reader # was a class (in addition to file_reader()) instead of a series of # function dataset_shape = data_shape if projection.lower() == "lambert": data_slices = (slice(None, None), ) + data_slices data_shape = (data_group["numset"][:][0], ) + data_shape data_shape = (len(datasets), ) + data_shape # Set up data reading data_kwargs = {} if lazy: if datasets[0].chunks is None or datasets[0].shape != dataset_shape: data_kwargs["chunks"] = "auto" else: data_kwargs["chunks"] = datasets[0].chunks data_read_func = da.from_array data_stack_func = da.stack else: data_read_func = np.asanyarray data_stack_func = np.stack # Read data data = data_read_func(datasets[0][data_slices], **data_kwargs) if data_shape[0] == 2: data = data_stack_func( [data, data_read_func(datasets[1][data_slices], **data_kwargs)], axis=0) if projection.lower() == "lambert": if hemisphere.lower() == "both": sum_axis = 1 data_shape = (data_shape[0], ) + data_shape[2:] else: sum_axis = 0 data_shape = data_shape[1:] data = data.sum(axis=sum_axis).astype(data.dtype) # Remove 1-dimensions data = data.squeeze() # Axes scales energy_scale = nml_params["MCCLNameList"]["Ebinsize"] scales = np.array([1, energy_scale, 1, 1]) ny, nx, sy, sx = data_shape names = ["hemisphere", "energy", "height", "width"] units = ["", "keV", "px", "px"] offsets = [0, min_energy, -sy // 2, -sx // 2] dim_idx = [] if ny != 1: dim_idx.append(0) if nx != 1: dim_idx.append(1) dim_idx += [2, 3] # Create axis object axes = [{ "size": data.shape[i], "index_in_array": i, "name": names[j], "scale": scales[j], "offset": offsets[j], "units": units[j], } for i, j in zip(range(data.ndim), dim_idx)] output = { "axes": axes, "data": data, "metadata": md, "original_metadata": nml_params, "phase": phase, "projection": projection, "hemisphere": hemisphere, } if not lazy: f.close() return [output]