def test_write_fresh(save_path): signal = hs.signals.Signal2D((255 * np.random.rand(10, 3, 5, 5) ).astype(np.uint8)) signal.save(save_path, overwrite=True) sig_reload = hs.load(save_path) np.testing.assert_equal(signal.data, sig_reload.data) header = sarray2dict(get_default_header()) header.update({ 'NX': 3, 'NY': 10, 'DP_SZ': 5, 'SX': 1, 'SY': 1, 'SDP': 100, 'Data_offset_2': 10 * 3 + header['Data_offset_1'], 'Note': '', }) header['Data_offset_2'] += header['Data_offset_2'] % 16 assert ( sig_reload.original_metadata.blockfile_header.as_dictionary() == header)
def test_write_fresh(save_path): signal = hs.signals.Signal2D( (255 * np.random.rand(10, 3, 5, 5)).astype(np.uint8)) signal.save(save_path, overwrite=True) sig_reload = hs.load(save_path) np.testing.assert_equal(signal.data, sig_reload.data) header = sarray2dict(get_default_header()) header.update({ "NX": 3, "NY": 10, "DP_SZ": 5, "SX": 1, "SY": 1, "SDP": 100, "Data_offset_2": 10 * 3 + header["Data_offset_1"], "Note": "", }) header["Data_offset_2"] += header["Data_offset_2"] % 16 assert sig_reload.original_metadata.blockfile_header.as_dictionary( ) == header
def _read_label(cls, unf_file): unpack = partial(unpack_from_intbytes, '<f') # Unpacking function for 4 byte floats! rec_length = np.fromfile(unf_file, dtype='<i', count=1)[0] # length of label label = sarray2dict( np.fromfile(unf_file, dtype=cls.LABEL_DTYPES, count=1)) label['SEMPER'] = ''.join([str(chr(l)) for l in label['SEMPER']]) assert label['SEMPER'] == 'Semper' # Process dimensions: for key in ['NCOL', 'NROW', 'NLAY', 'ICCOLN', 'ICROWN', 'ICLAYN']: value = 256**2 * \ label.pop(key + 'H') + 256 * label[key][0] + label[key][1] label[key] = value # Process date: date = '{}-{}-{} {}:{}:{}'.format(label['DATE'][0] + 1900, *label['DATE'][1:]) label['DATE'] = date # Process range: if label['NCRANG'] == 255: range_min = unpack(label['RANGE'][:4]) range_max = unpack(label['RANGE'][4:8]) range_string = '{:.6g},{:.6g}'.format(range_min, range_max) else: range_string = ''.join( [str(chr(l)) for l in label['RANGE'][:label['NCRANG']]]) label['RANGE'] = range_string # Process real coords: x0 = unpack(label.pop('X0V0')) dx = unpack(label.pop('DXV1')) y0 = unpack(label.pop('Y0V2')) dy = unpack(label.pop('DYV3')) z0 = unpack(label.pop('Z0V4')) dz = unpack(label.pop('DZV5')) if label['REALCO'] == 1: label.update({ 'X0V0': x0, 'Y0V2': y0, 'Z0V4': z0, 'DXV1': dx, 'DYV3': dy, 'DZV5': dz }) # Process additional commands (unused, not sure about the purpose): data_v6 = unpack(label['DATAV6']) data_v7 = unpack(label['DATAV7']) label['DATAV6'] = data_v6 label['DATAV7'] = data_v7 # Process title: title = ''.join( [str(chr(l)) for l in label['TITLE'][:label['NTITLE']]]) label['TITLE'] = title # Process units: label['XUNIT'] = ''.join([chr(l) for l in label['XUNIT'] ]).replace('\x00', '') label['YUNIT'] = ''.join([chr(l) for l in label['YUNIT'] ]).replace('\x00', '') label['ZUNIT'] = ''.join([chr(l) for l in label['ZUNIT'] ]).replace('\x00', '') # Sanity check: assert np.fromfile(unf_file, dtype='<i4', count=1)[0] == rec_length # Return label: return label
def ser_reader(filename, objects=None, lazy=False, only_valid_data=False): """Reads the information from the file and returns it in the HyperSpy required format. """ header, data = load_ser_file(filename) record_by = guess_record_by(header['DataTypeID']) ndim = int(header['NumberDimensions']) date, time = None, None if objects is not None: objects_dict = convert_xml_to_dict(objects[0]) date, time = _get_date_time(objects_dict.ObjectInfo.AcquireDate) if "PositionY" in data.dtype.names and len(data['PositionY']) > 1 and \ (data['PositionY'][0] == data['PositionY'][1]): # The spatial dimensions are stored in F order i.e. X, Y, ... order = "F" else: # The spatial dimensions are stored in C order i.e. ..., Y, X order = "C" if ndim == 0 and header["ValidNumberElements"] != 0: # The calibration of the axes are not stored in the header. # We try to guess from the position coordinates. array_shape, axes = get_axes_from_position(header=header, data=data) else: axes = [] array_shape = [None, ] * int(ndim) spatial_axes = ["x", "y"][:ndim] for i in range(ndim): idim = 1 + i if order == "C" else ndim - i if (record_by == "spectrum" or header['Dim-%i_DimensionSize' % (i + 1)][0] != 1): units = (header['Dim-%i_Units' % (idim)][0].decode('utf-8') if header['Dim-%i_UnitsLength' % (idim)] > 0 else t.Undefined) if units == "meters": name = (spatial_axes.pop() if order == "F" else spatial_axes.pop(-1)) else: name = t.Undefined axes.append({ 'offset': header['Dim-%i_CalibrationOffset' % idim][0], 'scale': header['Dim-%i_CalibrationDelta' % idim][0], 'units': units, 'size': header['Dim-%i_DimensionSize' % idim][0], 'name': name, }) array_shape[i] = header['Dim-%i_DimensionSize' % idim][0] # Deal with issue when TotalNumberElements does not equal # ValidNumberElements for ndim==1. if ndim == 1 and (header['TotalNumberElements'] != header['ValidNumberElements'][0]) and only_valid_data: if header['ValidNumberElements'][0] == 1: # no need for navigation dimension array_shape = [] axes = [] else: array_shape[0] = header['ValidNumberElements'][0] axes[0]['size'] = header['ValidNumberElements'][0] # Spectral dimension if record_by == "spectrum": axes.append({ 'offset': data['CalibrationOffset'][0], 'scale': data['CalibrationDelta'][0], 'size': data['ArrayLength'][0], 'index_in_array': header['NumberDimensions'][0] }) # FEI seems to use the international system of units (SI) for the # energy scale (eV). axes[-1]['units'] = 'eV' axes[-1]['name'] = 'Energy' array_shape.append(data['ArrayLength'][0]) elif record_by == 'image': if objects is not None: units = _guess_units_from_mode(objects_dict, header) else: units = "meters" # Y axis axes.append({ 'name': 'y', 'offset': data['CalibrationOffsetY'][0] - data['CalibrationElementY'][0] * data['CalibrationDeltaY'][0], 'scale': data['CalibrationDeltaY'][0], 'units': units, 'size': data['ArraySizeY'][0], }) array_shape.append(data['ArraySizeY'][0]) # X axis axes.append({ 'name': 'x', 'offset': data['CalibrationOffsetX'][0] - data['CalibrationElementX'][0] * data['CalibrationDeltaX'][0], 'scale': data['CalibrationDeltaX'][0], 'size': data['ArraySizeX'][0], 'units': units, }) array_shape.append(data['ArraySizeX'][0]) # FEI seems to use the international system of units (SI) for the # spatial scale. However, we prefer to work in nm for axis in axes: if axis['units'] == 'meters': axis['units'] = 'nm' axis['scale'] *= 10 ** 9 elif axis['units'] == '1/meters': axis['units'] = '1 / nm' axis['scale'] /= 10 ** 9 # Remove Nones from array_shape caused by squeezing size 1 dimensions array_shape = [dim for dim in array_shape if dim is not None] if lazy: from dask import delayed from dask.array import from_delayed val = delayed(load_only_data, pure=True)(filename, array_shape, record_by, len(axes), only_valid_data=only_valid_data) dc = from_delayed(val, shape=array_shape, dtype=data['Array'].dtype) else: dc = load_only_data(filename, array_shape, record_by, len(axes), data=data, header=header, only_valid_data=only_valid_data) original_metadata = OrderedDict() header_parameters = sarray2dict(header) sarray2dict(data, header_parameters) # We remove the Array key to save memory avoiding duplication del header_parameters['Array'] original_metadata['ser_header_parameters'] = header_parameters metadata = {'General': { 'original_filename': os.path.split(filename)[1], }, "Signal": { 'signal_type': "", 'record_by': record_by, }, } if date is not None and time is not None: metadata['General']['date'] = date metadata['General']['time'] = time dictionary = { 'data': dc, 'metadata': metadata, 'axes': axes, 'original_metadata': original_metadata, 'mapping': mapping} return dictionary
def file_reader(filename, endianess='<', load_to_memory=True, mmap_mode='c', **kwds): _logger.debug("Reading blockfile: %s" % filename) metadata = {} # Makes sure we open in right mode: if '+' in mmap_mode or ('write' in mmap_mode and 'copyonwrite' != mmap_mode): f = open(filename, 'r+b') else: f = open(filename, 'rb') _logger.debug("File opened") # Get header header = np.fromfile(f, dtype=get_header_dtype_list(endianess), count=1) if header['MAGIC'][0] not in magics: warnings.warn("Blockfile has unrecognized header signature. " "Will attempt to read, but correcteness not guaranteed!") header = sarray2dict(header) note = f.read(header['Data_offset_1'] - f.tell()) note = note.strip(b'\x00') header['Note'] = note.decode() _logger.debug("File header: " + str(header)) NX, NY = int(header['NX']), int(header['NY']) DP_SZ = int(header['DP_SZ']) if header['SDP']: SDP = 100. / header['SDP'] else: SDP = Undefined original_metadata = {'blockfile_header': header} # Get data: # A Virtual BF/DF is stored first # offset1 = header['Data_offset_1'] # f.seek(offset1) # data_pre = np.array(f.read(NX*NY), dtype=endianess+'u1' # ).squeeze().reshape((NX, NY), order='C').T # Then comes actual blockfile offset2 = header['Data_offset_2'] if load_to_memory: f.seek(offset2) data = np.fromfile(f, dtype=endianess + 'u1') else: data = np.memmap(f, mode=mmap_mode, offset=offset2, dtype=endianess + 'u1') try: data = data.reshape((NY, NX, DP_SZ * DP_SZ + 6)) except ValueError: warnings.warn( 'Blockfile header dimensions larger than file size! ' 'Will attempt to load by zero padding incomplete frames.') # Data is stored DP by DP: pw = [(0, NX * NY * (DP_SZ * DP_SZ + 6) - data.size)] data = np.pad(data, pw, mode='constant') data = data.reshape((NY, NX, DP_SZ * DP_SZ + 6)) # Every frame is preceeded by a 6 byte sequence (AA 55, and then a 4 byte # integer specifying frame number) data = data[:, :, 6:] data = data.reshape((NY, NX, DP_SZ, DP_SZ), order='C').squeeze() units = ['nm', 'nm', 'cm', 'cm'] names = ['y', 'x', 'dy', 'dx'] scales = [header['SY'], header['SX'], SDP, SDP] date, time = _from_serial_date(header['Acquisition_time']) metadata = {'General': {'original_filename': os.path.split(filename)[1], 'date': date, 'time': time, 'notes': header['Note']}, "Signal": {'signal_type': "diffraction", 'record_by': 'image', }, } # Create the axis objects for each axis dim = data.ndim axes = [ { 'size': data.shape[i], 'index_in_array': i, 'name': names[i], 'scale': scales[i], 'offset': 0.0, 'units': units[i], } for i in range(dim)] dictionary = {'data': data, 'axes': axes, 'metadata': metadata, 'original_metadata': original_metadata, 'mapping': mapping, } f.close() return [dictionary, ]
def file_reader(filename, scan_shape=None, scan_start_frame=0, winding_scan_axis=None, hysteresis=0, lazy=True, rechunking="auto", **kwds): """ TVIPS stream file reader for in-situ and 4D STEM data Parameters ---------- scan_shape : str or tuple of int By default the data is loaded as an image stack (1 navigation axis). If it concerns a 4D-STEM dataset, the (..., scan_y, scan_x) dimension can be provided. "auto" can also be selected, in which case the rotidx information in the frame headers will be used to try to reconstruct the scan. Additional navigation axes must be prepended. scan_start_frame : int First frame where the scan starts. If scan_shape = "auto" this is ignored. winding_scan_axis : str "x" or "y" if the scan was performed with winding scan along an axis as opposed to flyback scan. By default (None), flyback scan is assumed with "x" the fast and "y" the slow scan directions. hysteresis: int Only applicable if winding_scan_axis is not None. This parameter allows every second column or row to be shifted to correct for hysteresis that occurs during a winding scan. rechunking: bool, str, or Dict If set to False each tvips file is a single chunk. For a better experience working with the dataset, an automatic rechunking is recommended (default). If set to anything else, e.g. a Dictionary, the value will be passed to the chunks argument in dask.array.rechunk. """ # check whether we start at the first tvips file _is_valid_first_tvips_file(filename) # get all other filenames in case they exist other_files = [] basename = filename[:-9] # last bit: 000.tvips file_index = 1 _, ext = os.path.splitext(filename) while True: fn = basename + "{:03d}{}".format(file_index, ext) if not os.path.exists(fn): break other_files.append(fn) file_index += 1 # parse the header from the first file with open(filename, "rb") as f: f.seek(0) # read the main header in file 0 header = np.fromfile(f, dtype=TVIPS_RECORDER_GENERAL_HEADER, count=1) dtype = np.dtype(f"u{header['bitsperpixel'][0]//8}") dimx = header["dimx"][0] dimy = header["dimy"][0] # the size of the frame header varies with version if header["version"][0] == 1: increment = 12 # pragma: no cover elif header["version"][0] == 2: increment = header["frameheaderbytes"][0] else: raise NotImplementedError( f"This version {header.version} is not yet supported" " in HyperSpy. Please report this as an issue at " "https://github.com/hyperspy/hyperspy/issues." ) # pragma: no cover frame_header_dt = np.dtype(TVIPS_RECORDER_FRAME_HEADER) # the record must consume less bytes than reported in the main header if increment < frame_header_dt.itemsize: raise ValueError( "The frame header record consumes more bytes than stated in the main header" ) # pragma: no cover # save metadata original_metadata = {'tvips_header': sarray2dict(header)} # create custom dtype for memmap padding the frame_header as required extra_bytes = increment - frame_header_dt.itemsize record_dtype = TVIPS_RECORDER_FRAME_HEADER.copy() if extra_bytes > 0: record_dtype.append(("extra", bytes, extra_bytes)) record_dtype.append(("data", dtype, (dimy, dimx))) # memmap the data records_000 = np.memmap(filename, mode="r", dtype=record_dtype, offset=header["size"][0]) # the array data all_array_data = [records_000["data"]] # in case we also want the frame header metadata later metadata_keys = np.array(TVIPS_RECORDER_FRAME_HEADER)[:, 0] metadata_000 = records_000[metadata_keys] all_metadata = [metadata_000] # also load data from other files for i in other_files: # no offset on the other files records = np.memmap(i, mode="r", dtype=record_dtype) all_metadata.append(records[metadata_keys]) all_array_data.append(records["data"]) if lazy: data_stack = da.concatenate(all_array_data, axis=0) else: data_stack = np.concatenate(all_array_data, axis=0) # extracting some units/scales/offsets of the DP's or images mode = all_metadata[0]["mode"][0] DPU = "1/nm" if mode == 2 else "nm" SDP = header["pixelsize"][0] offsetx = header["offsetx"][0] offsety = header["offsety"][0] # modify the data if there is scan information # we construct a 2D array of indices to slice the data_stack if scan_shape is not None: # try to deduce start and stop of the scan based on rotator index if scan_shape == "auto": record_idxs = np.concatenate([i["rotidx"] for i in all_metadata]) scan_start_frame, scan_stop_frame = _find_auto_scan_start_stop( record_idxs) if scan_start_frame is None or scan_stop_frame is None: raise ValueError( "Scan start and stop information could not be automatically " "determined. Please supply a scan_shape and scan_start_frame." ) # pragma: no cover total_scan_frames = record_idxs[scan_stop_frame] # last rotator scan_dim = int(np.sqrt(total_scan_frames)) if not np.allclose(scan_dim, np.sqrt(total_scan_frames)): raise ValueError( "Scan was not square, please supply a scan_shape and start_frame." ) scan_shape = (scan_dim, scan_dim) # there may be discontinuities which must be filled up indices = _guess_scan_index_grid( record_idxs, scan_start_frame, scan_stop_frame).reshape(scan_shape) # scan shape and start are provided else: total_scan_frames = np.prod(scan_shape) max_frame_index = np.prod(data_stack.shape[:-2]) final_frame = scan_start_frame + total_scan_frames if final_frame > max_frame_index: raise ValueError( f"Shape {scan_shape} requires image index {final_frame-1} " f"which is out of bounds. Final frame index: {max_frame_index-1}." ) indices = np.arange(scan_start_frame, final_frame).reshape(scan_shape) # with winding scan, every second column or row must be inverted # due to hysteresis there is also a predictable offset if winding_scan_axis is not None: if winding_scan_axis in ["x", 0]: indices[..., ::2, :] = indices[..., ::2, :][..., :, ::-1] indices[..., ::2, :] = np.roll(indices[..., ::2, :], hysteresis, axis=-1) elif winding_scan_axis in ["y", 1]: indices[..., :, ::2] = indices[..., :, ::2][..., ::-1, :] indices[..., :, ::2] = np.roll(indices[..., :, ::2], hysteresis, axis=-2) else: raise ValueError("Invalid winding scan axis") with dask.config.set(**{'array.slicing.split_large_chunks': True}): data_stack = data_stack[indices.ravel()] data_stack = data_stack.reshape(*indices.shape, dimy, dimx) units = (indices.ndim - 2) * [""] + ['nm', 'nm', DPU, DPU] names = (indices.ndim - 2) * [""] + ['y', 'x', 'dy', 'dx'] # no scale information stored in the scan! scales = (indices.ndim - 2) * [1] + [1, 1, SDP, SDP] offsets = (indices.ndim - 2) * [0] + [0, 0, offsety, offsetx] # Create the axis objects for each axis dim = data_stack.ndim axes = [{ 'size': data_stack.shape[i], 'index_in_array': i, 'name': names[i], 'scale': scales[i], 'offset': offsets[i], 'units': units[i], 'navigate': True if i < len(scan_shape) else False, } for i in range(dim)] else: # we load as a regular image stack units = ['s', DPU, DPU] names = ['time', 'dy', 'dx'] times = np.concatenate( [i["timestamp"] + i["ms"] / 1000 for i in all_metadata]) timescale = 1 if times.shape[0] <= 0 else times[1] - times[0] scales = [timescale, SDP, SDP] offsets = [times[0], offsety, offsetx] # Create the axis objects for each axis dim = data_stack.ndim axes = [{ 'size': data_stack.shape[i], 'index_in_array': i, 'name': names[i], 'scale': scales[i], 'offset': offsets[i], 'units': units[i], 'navigate': True if i == 0 else False, } for i in range(dim)] dtobj = datetime.fromtimestamp(all_metadata[0]["timestamp"][0]) date = dtobj.date().isoformat() time = dtobj.time().isoformat() current = all_metadata[0]["fcurrent"][0] stagex = all_metadata[0]["stagex"][0] stagey = all_metadata[0]["stagey"][0] stagez = all_metadata[0]["stagez"][0] stagealpha = all_metadata[0]["stagea"][0] stagebeta = all_metadata[0]["stageb"][0] # mag = all_metadata[0]["mag"][0] # TODO it is unclear what this value is focus = all_metadata[0]["objective"][0] metadata = { 'General': { 'original_filename': os.path.split(filename)[1], 'date': date, 'time': time, 'time_zone': "UTC", }, "Acquisition_instrument": { "TEM": { "magnification": header["magtotal"][0], "beam_energy": header["ht"][0], "beam_current": current, "defocus": focus, "Stage": { "tilt_alpha": stagealpha, "tilt_beta": stagebeta, "x": stagex, "y": stagey, "z": stagez, }, }, } } if lazy: if rechunking: if rechunking == "auto": navdims = data_stack.ndim - 2 chunks = {ax_index: "auto" for ax_index in range(navdims)} chunks[navdims] = None chunks[navdims + 1] = None else: chunks = rechunking data_stack = data_stack.rechunk(chunks) if mode == 2: metadata["Signal"] = {"signal_type": "diffraction"} # TODO at the moment hyperspy doesn't have a signal type for mode==1, imaging dictionary = { 'data': data_stack, 'axes': axes, 'metadata': metadata, 'original_metadata': original_metadata, 'mapping': {}, } return [ dictionary, ]
def spc_reader(filename, endianess='<', load_all_spc=False, **kwargs): """ Read data from an SPC spectrum specified by filename. Parameters ---------- filename : str Name of SPC file to read endianess : char Byte-order of data to read load_all_spc : bool Switch to control if all of the .spc header is read, or just the important parts for import into HyperSpy **kwargs Remaining arguments are passed to the Numpy ``memmap`` function Returns ------- list list with dictionary of signal information to be passed back to hyperspy.io.load_with_reader """ with open(filename, 'rb') as f: _logger.debug(' Reading {}'.format(filename)) spc_header = __get_spc_header(f, endianess, load_all_spc) spc_dict = sarray2dict(spc_header) original_metadata = {'spc_header': spc_dict} nz = original_metadata['spc_header']['numPts'] data_offset = original_metadata['spc_header']['dataStart'] mode = kwargs.pop('mode', 'c') lazy = kwargs.pop('lazy', False) if lazy: mode = 'r' # Read data from file into a numpy memmap object data = np.memmap(f, mode=mode, offset=data_offset, dtype='u4', shape=(1, nz), **kwargs).squeeze() # create the energy axis dictionary: energy_axis = { 'size': data.shape[0], 'index_in_array': 0, 'name': 'Energy', 'scale': original_metadata['spc_header']['evPerChan'] / 1000.0, 'offset': original_metadata['spc_header']['startEnergy'], 'units': 'keV' } # Assign metadata for spectrum: metadata = {'General': {'original_filename': os.path.split(filename)[1], 'title': 'EDS Spectrum'}, "Signal": {'signal_type': "EDS_SEM", 'record_by': 'spectrum', }, } metadata = _add_spc_metadata(metadata, spc_dict) dictionary = {'data': data, 'axes': [energy_axis], 'metadata': metadata, 'original_metadata': original_metadata} return [dictionary, ]
def file_reader(filename, endianess='<', **kwds): metadata = {} f = open(filename, 'rb') std_header = np.fromfile(f, dtype=get_std_dtype_list(endianess), count=1) fei_header = None if std_header['NEXT'] / 1024 == 128: print "It seems to contain an extended FEI header" fei_header = np.fromfile(f, dtype=get_fei_dtype_list(endianess), count=1024) if f.tell() == 1024 + std_header['NEXT']: print "The FEI header was correctly loaded" else: print "There was a problem reading the extended header" f.seek(1024 + std_header['NEXT']) fei_header = None NX, NY, NZ = std_header['NX'], std_header['NY'], std_header['NZ'] data = np.memmap(f, mode='c', offset=f.tell(), dtype=get_data_type(std_header['MODE'], endianess) ).squeeze().reshape((NX, NY, NZ), order='F').T original_metadata = {'std_header': sarray2dict(std_header)} if fei_header is not None: fei_dict = sarray2dict(fei_header,) del fei_dict['empty'] original_metadata['fei_header'] = fei_dict dim = len(data.shape) if fei_header is None: # The scale is in Amstrongs, we convert it to nm scales = [10 * float(std_header['Zlen'] / std_header['MZ']) if float(std_header['MZ']) != 0 else 1, 10 * float(std_header['Ylen'] / std_header['MY']) if float(std_header['MY']) != 0 else 1, 10 * float(std_header['Xlen'] / std_header['MX']) if float(std_header['MX']) != 0 else 1, ] offsets = [10 * float(std_header['ZORIGIN']), 10 * float(std_header['YORIGIN']), 10 * float(std_header['XORIGIN']), ] else: # FEI does not use the standard header to store the scale # It does store the spatial scale in pixel_size, one per angle in # meters scales = [1, ] + [fei_header['pixel_size'][0] * 10 ** 9, ] * 2 offsets = [0, ] * 3 units = [Undefined, 'nm', 'nm'] names = ['z', 'y', 'x'] metadata = {'General': {'original_filename': os.path.split(filename)[1]}, "Signal": {'signal_type': "", 'record_by': 'image', }, } # create the axis objects for each axis axes = [ { 'size': data.shape[i], 'index_in_array': i, 'name': names[i + 3 - dim], 'scale': scales[i + 3 - dim], 'offset': offsets[i + 3 - dim], 'units': units[i + 3 - dim], } for i in xrange(dim)] dictionary = {'data': data, 'axes': axes, 'metadata': metadata, 'original_metadata': original_metadata, } return [dictionary, ]
def _read_label(cls, unf_file): unpack = partial( unpack_from_intbytes, '<f') # Unpacking function for 4 byte floats! rec_length = np.fromfile( unf_file, dtype='<i', count=1)[0] # length of label label = sarray2dict( np.fromfile( unf_file, dtype=cls.LABEL_DTYPES, count=1)) label['SEMPER'] = ''.join([str(chr(l)) for l in label['SEMPER']]) assert label['SEMPER'] == 'Semper' # Process dimensions: for key in ['NCOL', 'NROW', 'NLAY', 'ICCOLN', 'ICROWN', 'ICLAYN']: value = 256**2 * \ label.pop(key + 'H') + 256 * label[key][0] + label[key][1] label[key] = value # Process date: date = '{}-{}-{} {}:{}:{}'.format(label['DATE'][0] + 1900, * label['DATE'][1:]) label['DATE'] = date # Process range: if label['NCRANG'] == 255: range_min = unpack(label['RANGE'][:4]) range_max = unpack(label['RANGE'][4:8]) range_string = '{:.6g},{:.6g}'.format(range_min, range_max) else: range_string = ''.join([str(chr(l)) for l in label['RANGE'][:label['NCRANG']]]) label['RANGE'] = range_string # Process real coords: x0 = unpack(label.pop('X0V0')) dx = unpack(label.pop('DXV1')) y0 = unpack(label.pop('Y0V2')) dy = unpack(label.pop('DYV3')) z0 = unpack(label.pop('Z0V4')) dz = unpack(label.pop('DZV5')) if label['REALCO'] == 1: label.update({'X0V0': x0, 'Y0V2': y0, 'Z0V4': z0, 'DXV1': dx, 'DYV3': dy, 'DZV5': dz}) # Process additional commands (unused, not sure about the purpose): data_v6 = unpack(label['DATAV6']) data_v7 = unpack(label['DATAV7']) label['DATAV6'] = data_v6 label['DATAV7'] = data_v7 # Process title: title = ''.join([str(chr(l)) for l in label['TITLE'][:label['NTITLE']]]) label['TITLE'] = title # Process units: label['XUNIT'] = ''.join( [chr(l) for l in label['XUNIT']]).replace('\x00', '') label['YUNIT'] = ''.join( [chr(l) for l in label['YUNIT']]).replace('\x00', '') label['ZUNIT'] = ''.join( [chr(l) for l in label['ZUNIT']]).replace('\x00', '') # Sanity check: assert np.fromfile(unf_file, dtype='<i4', count=1)[0] == rec_length # Return label: return label
def ser_reader(filename, objects=None, *args, **kwds): """Reads the information from the file and returns it in the HyperSpy required format. """ header, data = load_ser_file(filename) record_by = guess_record_by(header['DataTypeID']) ndim = int(header['NumberDimensions']) date, time = None, None if objects is not None: objects_dict = convert_xml_to_dict(objects[0]) date, time = _get_date_time(objects_dict.ObjectInfo.AcquireDate) if "PositionY" in data.dtype.names and len(data['PositionY']) > 1 and \ (data['PositionY'][0] == data['PositionY'][1]): # The spatial dimensions are stored in F order i.e. X, Y, ... order = "F" else: # The spatial dimensions are stored in C order i.e. ..., Y, X order = "C" if ndim == 0 and header["ValidNumberElements"] != 0: # The calibration of the axes are not stored in the header. # We try to guess from the position coordinates. array_shape, axes = get_axes_from_position(header=header, data=data) else: axes = [] array_shape = [None, ] * int(ndim) spatial_axes = ["x", "y"][:ndim] for i in range(ndim): idim = 1 + i if order == "C" else ndim - i if (record_by == "spectrum" or header['Dim-%i_DimensionSize' % (i + 1)][0] != 1): units = (header['Dim-%i_Units' % (idim)][0].decode('utf-8') if header['Dim-%i_UnitsLength' % (idim)] > 0 else t.Undefined) if units == "meters": name = (spatial_axes.pop() if order == "F" else spatial_axes.pop(-1)) else: name = t.Undefined axes.append({ 'offset': header['Dim-%i_CalibrationOffset' % idim][0], 'scale': header['Dim-%i_CalibrationDelta' % idim][0], 'units': units, 'size': header['Dim-%i_DimensionSize' % idim][0], 'name': name, }) array_shape[i] = \ header['Dim-%i_DimensionSize' % idim][0] # Spectral dimension if record_by == "spectrum": axes.append({ 'offset': data['CalibrationOffset'][0], 'scale': data['CalibrationDelta'][0], 'size': data['ArrayLength'][0], 'index_in_array': header['NumberDimensions'][0] }) # FEI seems to use the international system of units (SI) for the # energy scale (eV). axes[-1]['units'] = 'eV' axes[-1]['name'] = 'Energy' array_shape.append(data['ArrayLength'][0]) elif record_by == 'image': if objects is not None: units = _guess_units_from_mode(objects_dict, header) else: units = "meters" # Y axis axes.append({ 'name': 'y', 'offset': data['CalibrationOffsetY'][0] - data['CalibrationElementY'][0] * data['CalibrationDeltaY'][0], 'scale': data['CalibrationDeltaY'][0], 'units': units, 'size': data['ArraySizeY'][0], }) array_shape.append(data['ArraySizeY'][0]) # X axis axes.append({ 'name': 'x', 'offset': data['CalibrationOffsetX'][0] - data['CalibrationElementX'][0] * data['CalibrationDeltaX'][0], 'scale': data['CalibrationDeltaX'][0], 'size': data['ArraySizeX'][0], 'units': units, }) array_shape.append(data['ArraySizeX'][0]) # FEI seems to use the international system of units (SI) for the # spatial scale. However, we prefer to work in nm for axis in axes: if axis['units'] == 'meters': axis['units'] = 'nm' axis['scale'] *= 10 ** 9 elif axis['units'] == '1/meters': axis['units'] = '1/nm' axis['scale'] /= 10 ** 9 # Remove Nones from array_shape caused by squeezing size 1 dimensions array_shape = [dim for dim in array_shape if dim is not None] lazy = kwds.pop('lazy', False) if lazy: from dask import delayed from dask.array import from_delayed val = delayed(load_only_data, pure=True)(filename, array_shape, record_by, len(axes)) dc = from_delayed(val, shape=array_shape, dtype=data['Array'].dtype) else: dc = load_only_data(filename, array_shape, record_by, len(axes), data=data) if ordict: original_metadata = OrderedDict() else: original_metadata = {} header_parameters = sarray2dict(header) sarray2dict(data, header_parameters) # We remove the Array key to save memory avoiding duplication del header_parameters['Array'] original_metadata['ser_header_parameters'] = header_parameters metadata = {'General': { 'original_filename': os.path.split(filename)[1], }, "Signal": { 'signal_type': "", 'record_by': record_by, }, } if date is not None and time is not None: metadata['General']['date'] = date metadata['General']['time'] = time dictionary = { 'data': dc, 'metadata': metadata, 'axes': axes, 'original_metadata': original_metadata, 'mapping': mapping} return dictionary
def file_reader(filename, endianess="<", **kwds): metadata = {} f = open(filename, "rb") std_header = np.fromfile(f, dtype=get_std_dtype_list(endianess), count=1) fei_header = None if std_header["NEXT"] / 1024 == 128: print "It seems to contain an extended FEI header" fei_header = np.fromfile(f, dtype=get_fei_dtype_list(endianess), count=1024) if f.tell() == 1024 + std_header["NEXT"]: print "The FEI header was correctly loaded" else: print "There was a problem reading the extended header" f.seek(1024 + std_header["NEXT"]) fei_header = None NX, NY, NZ = std_header["NX"], std_header["NY"], std_header["NZ"] data = ( np.memmap(f, mode="c", offset=f.tell(), dtype=get_data_type(std_header["MODE"], endianess)) .squeeze() .reshape((NX, NY, NZ), order="F") .T ) original_metadata = {"std_header": sarray2dict(std_header)} if fei_header is not None: fei_dict = sarray2dict(fei_header) del fei_dict["empty"] original_metadata["fei_header"] = fei_dict dim = len(data.shape) if fei_header is None: # The scale is in Amstrongs, we convert it to nm scales = [ 10 * float(std_header["Zlen"] / std_header["MZ"]) if float(std_header["MZ"]) != 0 else 1, 10 * float(std_header["Ylen"] / std_header["MY"]) if float(std_header["MY"]) != 0 else 1, 10 * float(std_header["Xlen"] / std_header["MX"]) if float(std_header["MX"]) != 0 else 1, ] offsets = [ 10 * float(std_header["ZORIGIN"]), 10 * float(std_header["YORIGIN"]), 10 * float(std_header["XORIGIN"]), ] else: # FEI does not use the standard header to store the scale # It does store the spatial scale in pixel_size, one per angle in # meters scales = [1] + [fei_header["pixel_size"][0] * 10 ** 9] * 2 offsets = [0] * 3 units = [Undefined, "nm", "nm"] names = ["z", "y", "x"] metadata = { "General": {"original_filename": os.path.split(filename)[1]}, "Signal": {"signal_type": "", "record_by": "image"}, } # create the axis objects for each axis axes = [ { "size": data.shape[i], "index_in_array": i, "name": names[i + 3 - dim], "scale": scales[i + 3 - dim], "offset": offsets[i + 3 - dim], "units": units[i + 3 - dim], } for i in xrange(dim) ] dictionary = {"data": data, "axes": axes, "metadata": metadata, "original_metadata": original_metadata} return [dictionary]
def file_reader(filename, endianess='<', **kwds): mapped_parameters = {} dtype_list = get_std_dtype_list(endianess) + get_fei_dtype_list(endianess) f = open(filename, 'rb') std_header = np.fromfile(f, dtype=get_std_dtype_list(endianess), count=1) fei_header = None if std_header['NEXT'] / 1024 == 128: print "It seems to contain an extended FEI header" fei_header = np.fromfile(f, dtype=get_fei_dtype_list(endianess), count=1024) if f.tell() == 1024 + std_header['NEXT']: print "The FEI header was correctly loaded" else: print "There was a problem reading the extended header" f.seek(1024 + std_header['NEXT']) fei_header = None NX, NY, NZ = std_header['NX'], std_header['NY'], std_header['NZ'] data = np.memmap(f, mode='c', offset=f.tell(), dtype=get_data_type(std_header['MODE'], endianess)).squeeze().reshape( (NX, NY, NZ), order='F').T original_parameters = {'std_header': sarray2dict(std_header)} if fei_header is not None: fei_dict = sarray2dict(fei_header, ) del fei_dict['empty'] original_parameters['fei_header'] = fei_dict dim = len(data.shape) if fei_header is None: # The scale is in Amstrongs, we convert it to nm scales = [ 10 * float(std_header['Zlen'] / std_header['MZ']) if float(std_header['MZ']) != 0 else 1, 10 * float(std_header['Ylen'] / std_header['MY']) if float(std_header['MY']) != 0 else 1, 10 * float(std_header['Xlen'] / std_header['MX']) if float(std_header['MX']) != 0 else 1, ] offsets = [ 10 * float(std_header['ZORIGIN']), 10 * float(std_header['YORIGIN']), 10 * float(std_header['XORIGIN']), ] else: # FEI does not use the standard header to store the scale # It does store the spatial scale in pixel_size, one per angle in meters scales = [ 1, ] + [ fei_header['pixel_size'][0] * 10**9, ] * 2 offsets = [ 0, ] * 3 units = [Undefined, 'nm', 'nm'] names = ['z', 'y', 'x'] mapped_parameters = { 'original_filename': os.path.split(filename)[1], 'record_by': 'image', 'signal_type': "", } #create the axis objects for each axis axes = [{ 'size': data.shape[i], 'index_in_array': i, 'name': names[i + 3 - dim], 'scale': scales[i + 3 - dim], 'offset': offsets[i + 3 - dim], 'units': units[i + 3 - dim], } for i in xrange(dim)] dictionary = { 'data': data, 'axes': axes, 'mapped_parameters': mapped_parameters, 'original_parameters': original_parameters, } return [ dictionary, ]
def ser_reader(filename, objects=None, *args, **kwds): """Reads the information from the file and returns it in the HyperSpy required format. """ header, data = load_ser_file(filename) record_by = guess_record_by(header['DataTypeID']) ndim = int(header['NumberDimensions']) date, time = None, None if objects is not None: objects_dict = convert_xml_to_dict(objects[0]) date, time = _get_date_time(objects_dict.ObjectInfo.AcquireDate) if "PositionY" in data.dtype.names and len(data['PositionY']) > 1 and \ (data['PositionY'][0] == data['PositionY'][1]): # The spatial dimensions are stored in F order i.e. X, Y, ... order = "F" else: # The spatial dimensions are stored in C order i.e. ..., Y, X order = "C" if ndim == 0 and header["ValidNumberElements"] != 0: # The calibration of the axes are not stored in the header. # We try to guess from the position coordinates. array_shape, axes = get_axes_from_position(header=header, data=data) else: axes = [] array_shape = [None, ] * int(ndim) spatial_axes = ["x", "y"] for i in range(ndim): idim = 1 + i if order == "C" else ndim - i if (record_by == "spectrum" or header['Dim-%i_DimensionSize' % (i + 1)][0] != 1): units = (header['Dim-%i_Units' % (idim)][0].decode('utf-8') if header['Dim-%i_UnitsLength' % (idim)] > 0 else t.Undefined) if units == "meters": name = (spatial_axes.pop() if order == "F" else spatial_axes.pop(-1)) else: name = t.Undefined axes.append({ 'offset': header['Dim-%i_CalibrationOffset' % idim][0], 'scale': header['Dim-%i_CalibrationDelta' % idim][0], 'units': units, 'size': header['Dim-%i_DimensionSize' % idim][0], 'name': name, }) array_shape[i] = \ header['Dim-%i_DimensionSize' % idim][0] # Spectral dimension if record_by == "spectrum": axes.append({ 'offset': data['CalibrationOffset'][0], 'scale': data['CalibrationDelta'][0], 'size': data['ArrayLength'][0], 'index_in_array': header['NumberDimensions'][0] }) # FEI seems to use the international system of units (SI) for the # energy scale (eV). axes[-1]['units'] = 'eV' axes[-1]['name'] = 'Energy' array_shape.append(data['ArrayLength'][0]) elif record_by == 'image': if objects is not None: units = _guess_units_from_mode(objects_dict, header) else: units = "meters" # Y axis axes.append({ 'name': 'y', 'offset': data['CalibrationOffsetY'][0] - data['CalibrationElementY'][0] * data['CalibrationDeltaY'][0], 'scale': data['CalibrationDeltaY'][0], 'units': units, 'size': data['ArraySizeY'][0], }) array_shape.append(data['ArraySizeY'][0]) # X axis axes.append({ 'name': 'x', 'offset': data['CalibrationOffsetX'][0] - data['CalibrationElementX'][0] * data['CalibrationDeltaX'][0], 'scale': data['CalibrationDeltaX'][0], 'size': data['ArraySizeX'][0], 'units': units, }) array_shape.append(data['ArraySizeX'][0]) # FEI seems to use the international system of units (SI) for the # spatial scale. However, we prefer to work in nm for axis in axes: if axis['units'] == 'meters': axis['units'] = 'nm' axis['scale'] *= 10 ** 9 elif axis['units'] == '1/meters': axis['units'] = '1/nm' axis['scale'] /= 10 ** 9 # Remove Nones from array_shape caused by squeezing size 1 dimensions array_shape = [dim for dim in array_shape if dim is not None] # If the acquisition stops before finishing the job, the stored file will # report the requested size even though no values are recorded. Therefore # if the shapes of the retrieved array does not match that of the data # dimensions we must fill the rest with zeros or (better) nans if the # dtype is float if np.prod(array_shape) != np.prod(data['Array'].shape): dc = np.zeros(np.prod(array_shape), dtype=data['Array'].dtype) if dc.dtype is np.dtype('f') or dc.dtype is np.dtype('f8'): dc[:] = np.nan dc[:data['Array'].ravel().shape[0]] = data['Array'].ravel() else: dc = data['Array'] dc = dc.reshape(array_shape) if record_by == 'image': dc = dc[..., ::-1, :] if ordict: original_metadata = OrderedDict() else: original_metadata = {} header_parameters = sarray2dict(header) sarray2dict(data, header_parameters) if len(axes) != len(dc.shape): dc = dc.squeeze() if len(axes) != len(dc.shape): raise IOError("Please report this issue to the HyperSpy developers.") # We remove the Array key to save memory avoiding duplication del header_parameters['Array'] original_metadata['ser_header_parameters'] = header_parameters metadata = {'General': { 'original_filename': os.path.split(filename)[1], }, "Signal": { 'signal_type': "", 'record_by': record_by, }, } if date is not None and time is not None: metadata['General']['date'] = date metadata['General']['time'] = time dictionary = { 'data': dc, 'metadata': metadata, 'axes': axes, 'original_metadata': original_metadata, 'mapping': mapping} return dictionary
def file_reader(filename, endianess='<', mmap_mode=None, lazy=False, **kwds): _logger.debug("Reading blockfile: %s" % filename) metadata = {} if mmap_mode is None: mmap_mode = 'r' if lazy else 'c' # Makes sure we open in right mode: if '+' in mmap_mode or ('write' in mmap_mode and 'copyonwrite' != mmap_mode): if lazy: raise ValueError("Lazy loading does not support in-place writing") f = open(filename, 'r+b') else: f = open(filename, 'rb') _logger.debug("File opened") # Get header header = np.fromfile(f, dtype=get_header_dtype_list(endianess), count=1) if header['MAGIC'][0] not in magics: warnings.warn("Blockfile has unrecognized header signature. " "Will attempt to read, but correcteness not guaranteed!") header = sarray2dict(header) note = f.read(header['Data_offset_1'] - f.tell()) # It seems it uses "\x00" for padding, so we remove it try: header['Note'] = note.decode("latin1").strip("\x00") except BaseException: # Not sure about the encoding so, if it fails, we carry on _logger.warning( "Reading the Note metadata of this file failed. " "You can help improving " "HyperSpy by reporting the issue in " "https://github.com/hyperspy/hyperspy") _logger.debug("File header: " + str(header)) NX, NY = int(header['NX']), int(header['NY']) DP_SZ = int(header['DP_SZ']) if header['SDP']: SDP = 100. / header['SDP'] else: SDP = Undefined original_metadata = {'blockfile_header': header} # Get data: # A Virtual BF/DF is stored first # offset1 = header['Data_offset_1'] # f.seek(offset1) # data_pre = np.array(f.read(NX*NY), dtype=endianess+'u1' # ).squeeze().reshape((NX, NY), order='C').T # Then comes actual blockfile offset2 = header['Data_offset_2'] if not lazy: f.seek(offset2) data = np.fromfile(f, dtype=endianess + 'u1') else: data = np.memmap(f, mode=mmap_mode, offset=offset2, dtype=endianess + 'u1') try: data = data.reshape((NY, NX, DP_SZ * DP_SZ + 6)) except ValueError: warnings.warn( 'Blockfile header dimensions larger than file size! ' 'Will attempt to load by zero padding incomplete frames.') # Data is stored DP by DP: pw = [(0, NX * NY * (DP_SZ * DP_SZ + 6) - data.size)] data = np.pad(data, pw, mode='constant') data = data.reshape((NY, NX, DP_SZ * DP_SZ + 6)) # Every frame is preceeded by a 6 byte sequence (AA 55, and then a 4 byte # integer specifying frame number) data = data[:, :, 6:] data = data.reshape((NY, NX, DP_SZ, DP_SZ), order='C').squeeze() units = ['nm', 'nm', 'cm', 'cm'] names = ['y', 'x', 'dy', 'dx'] scales = [header['SY'], header['SX'], SDP, SDP] date, time, time_zone = serial_date_to_ISO_format( header['Acquisition_time']) metadata = {'General': {'original_filename': os.path.split(filename)[1], 'date': date, 'time': time, 'time_zone': time_zone, 'notes': header['Note']}, "Signal": {'signal_type': "diffraction", 'record_by': 'image', }, } # Create the axis objects for each axis dim = data.ndim axes = [ { 'size': data.shape[i], 'index_in_array': i, 'name': names[i], 'scale': scales[i], 'offset': 0.0, 'units': units[i], } for i in range(dim)] dictionary = {'data': data, 'axes': axes, 'metadata': metadata, 'original_metadata': original_metadata, 'mapping': mapping, } f.close() return [dictionary, ]
def ser_reader(filename, objects=None, verbose=False, *args, **kwds): """Reads the information from the file and returns it in the HyperSpy required format. """ header, data = load_ser_file(filename, verbose=verbose) record_by = guess_record_by(header['DataTypeID']) axes = [] ndim = int(header['NumberDimensions']) if record_by == 'spectrum': array_shape = [None, ] * int(ndim) i_array = range(ndim) if len(data['PositionY']) > 1 and \ (data['PositionY'][0] == data['PositionY'][1]): # The spatial dimensions are stored in the reversed order # We reverse the shape i_array.reverse() # Extra dimensions for i in xrange(ndim): if i_array[i] == ndim - 1: name = 'x' elif i_array[i] == ndim - 2: name = 'y' else: name = t.Undefined axes.append({ 'name': name, 'offset': header['Dim-%i_CalibrationOffset' % (i + 1)][0], 'scale': header['Dim-%i_CalibrationDelta' % (i + 1)][0], 'units': header['Dim-%i_Units' % (i + 1)][0], 'size': header['Dim-%i_DimensionSize' % (i + 1)][0], 'index_in_array': i_array[i] }) array_shape[i_array[i]] = \ header['Dim-%i_DimensionSize' % (i + 1)][0] # FEI seems to use the international system of units (SI) for the # spatial scale. However, we prefer to work in nm for axis in axes: if axis['units'] == 'meters': axis['units'] = 'nm' axis['scale'] *= 10 ** 9 # Spectral dimension axes.append({ 'offset': data['CalibrationOffset'][0], 'scale': data['CalibrationDelta'][0], 'size': data['ArrayLength'][0], 'index_in_array': header['NumberDimensions'][0] }) # FEI seems to use the international system of units (SI) for the # energy scale (eV). axes[-1]['units'] = 'keV' axes[-1]['name'] = 'Energy' axes[-1]['scale'] *= 0.001 array_shape.append(data['ArrayLength'][0]) elif record_by == 'image': array_shape = [] # Extra dimensions for i in xrange(ndim): if header['Dim-%i_DimensionSize' % (i + 1)][0] != 1: axes.append({ 'offset': header['Dim-%i_CalibrationOffset' % (i + 1)][0], 'scale': header['Dim-%i_CalibrationDelta' % (i + 1)][0], 'units': header['Dim-%i_Units' % (i + 1)][0], 'size': header['Dim-%i_DimensionSize' % (i + 1)][0], }) array_shape.append(header['Dim-%i_DimensionSize' % (i + 1)][0]) # Y axis axes.append({ 'name': 'y', 'offset': data['CalibrationOffsetY'][0] - data['CalibrationElementY'][0] * data['CalibrationDeltaY'][0], 'scale': data['CalibrationDeltaY'][0], 'units': 'Unknown', 'size': data['ArraySizeY'][0], }) array_shape.append(data['ArraySizeY'][0]) # X axis axes.append({ 'name': 'x', 'offset': data['CalibrationOffsetX'][0] - data['CalibrationElementX'][0] * data['CalibrationDeltaX'][0], 'scale': data['CalibrationDeltaX'][0], 'size': data['ArraySizeX'][0], }) array_shape.append(data['ArraySizeX'][0]) # If the acquisition stops before finishing the job, the stored file will # report the requested size even though no values are recorded. Therefore if # the shapes of the retrieved array does not match that of the data # dimensions we must fill the rest with zeros or (better) nans if the # dtype is float if np.cumprod(array_shape)[-1] != np.cumprod(data['Array'].shape)[-1]: dc = np.zeros(np.cumprod(array_shape)[-1], dtype=data['Array'].dtype) if dc.dtype is np.dtype('f') or dc.dtype is np.dtype('f8'): dc[:] = np.nan dc[:data['Array'].ravel().shape[0]] = data['Array'].ravel() else: dc = data['Array'] dc = dc.reshape(array_shape) if record_by == 'image': dc = dc[::-1] if ordict: original_metadata = OrderedDict() else: original_metadata = {} header_parameters = sarray2dict(header) sarray2dict(data, header_parameters) if len(axes) != len(dc.shape): dc = dc.squeeze() if len(axes) != len(dc.shape): raise IOError("Please report this issue to the HyperSpy developers.") # We remove the Array key to save memory avoiding duplication del header_parameters['Array'] original_metadata['ser_header_parameters'] = header_parameters dictionary = { 'data': dc, 'metadata': {'General': {'original_filename': os.path.split(filename)[1]}, "Signal": {'signal_type': "", 'record_by': record_by, }, }, 'axes': axes, 'original_metadata': original_metadata, 'mapping': mapping} return dictionary
def spd_reader(filename, endianess='<', nav_units=None, spc_fname=None, ipr_fname=None, load_all_spc=False, **kwargs): """ Read data from an SPD spectral map specified by filename. Parameters ---------- filename : str Name of SPD file to read endianess : char Byte-order of data to read nav_units : 'nm', 'um', or None Default navigation units for EDAX data is in microns, so this is the default unit to save in the signal. Can also be specified as 'nm', which will output a signal with nm scale instead. spc_fname : None or str Name of file from which to read the spectral calibration. If data was exported fully from EDAX TEAM software, an .spc file with the same name as the .spd should be present. If `None`, the default filename will be searched for. Otherwise, the name of the .spc file to use for calibration can be explicitly given as a string. ipr_fname : None or str Name of file from which to read the spatial calibration. If data was exported fully from EDAX TEAM software, an .ipr file with the same name as the .spd (plus a \"_Img\" suffix) should be present. If `None`, the default filename will be searched for. Otherwise, the name of the .ipr file to use for spatial calibration can be explicitly given as a string. load_all_spc : bool Switch to control if all of the .spc header is read, or just the important parts for import into HyperSpy kwargs** Remaining arguments are passed to the Numpy ``memmap`` function Returns ------- list list with dictionary of signal information to be passed back to hyperspy.io.load_with_reader """ with open(filename, 'rb') as f: spd_header = np.fromfile(f, dtype=get_spd_dtype_list(endianess), count=1) original_metadata = {'spd_header': sarray2dict(spd_header)} # dimensions of map data: nx = original_metadata['spd_header']['nPoints'] ny = original_metadata['spd_header']['nLines'] nz = original_metadata['spd_header']['nChannels'] data_offset = original_metadata['spd_header']['dataOffset'] data_type = { '1': 'u1', '2': 'u2', '4': 'u4' }[str(original_metadata['spd_header']['countBytes'])] mode = kwargs.pop('mode', 'c') # Read data from file into a numpy memmap object data = np.memmap(f, mode=mode, offset=data_offset, dtype=data_type, **kwargs).squeeze().reshape((nz, nx, ny), order='F').T # Convert char arrays to strings: original_metadata['spd_header']['tag'] = \ spd_header['tag'][0].view('S16')[0] # fName is the name of the .bmp (and .ipr) file of the map original_metadata['spd_header']['fName'] = \ spd_header['fName'][0].view('S120')[0] # Get name of .spc file from the .spd map (if not explicitly given): if spc_fname is None: spc_path = os.path.dirname(filename) spc_basename = os.path.splitext(os.path.basename(filename))[0] + '.spc' spc_fname = os.path.join(spc_path, spc_basename) # Get name of .ipr file from bitmap image (if not explicitly given): if ipr_fname is None: ipr_basename = os.path.splitext( os.path.basename( original_metadata['spd_header']['fName']))[0].decode() + '.ipr' ipr_path = os.path.dirname(filename) ipr_fname = os.path.join(ipr_path, ipr_basename) # Flags to control reading of files read_spc = os.path.isfile(spc_fname) read_ipr = os.path.isfile(ipr_fname) # Read the .ipr header (if possible) if read_ipr: with open(ipr_fname, 'rb') as f: ipr_header = np.fromfile(f, dtype=get_ipr_dtype_list(endianess), count=1) original_metadata['ipr_header'] = sarray2dict(ipr_header) else: _logger.warning('Could not find .ipr file named {}.\n' 'No spatial calibration will be loaded.' '\n'.format(ipr_fname)) # Read the .spc header (if possible) if read_spc: with open(spc_fname, 'rb') as f: spc_header = np.fromfile(f, dtype=get_spc_dtype_list( load_all=load_all_spc, endianess=endianess), count=1) spc_dict = sarray2dict(spc_header) original_metadata['spc_header'] = spc_dict else: _logger.warning('Could not find .spc file named {}.\n' 'No spectral metadata will be loaded.' '\n'.format(spc_fname)) # create the energy axis dictionary: energy_axis = { 'size': data.shape[2], 'index_in_array': 2, 'name': 'Energy', 'scale': original_metadata['spc_header']['evPerChan'] / 1000.0 if read_spc else 1, 'offset': original_metadata['spc_header']['startEnergy'] if read_spc else 1, 'units': 'keV' if read_spc else t.Undefined, } # Handle navigation units input: scale = 1000 if nav_units == 'nm' else 1 if nav_units is not 'nm': if nav_units not in [None, 'um']: _logger.warning("Did not understand nav_units input \"{}\". " "Defaulting to microns.\n".format(nav_units)) nav_units = '$\mu m$' # Create navigation axes dictionaries: x_axis = { 'size': data.shape[1], 'index_in_array': 1, 'name': 'x', 'scale': original_metadata['ipr_header']['mppX'] * scale if read_ipr else 1, 'offset': 0, 'units': nav_units if read_ipr else t.Undefined, } y_axis = { 'size': data.shape[0], 'index_in_array': 0, 'name': 'y', 'scale': original_metadata['ipr_header']['mppY'] * scale if read_ipr else 1, 'offset': 0, 'units': nav_units if read_ipr else t.Undefined, } # Assign metadata for spectrum image: metadata = { 'General': { 'original_filename': os.path.split(filename)[1], 'title': 'EDS Spectrum Image' }, "Signal": { 'signal_type': "EDS_SEM", 'record_by': 'spectrum', }, } # Add spectral calibration and elements (if present): if read_spc: metadata = _add_spc_metadata(metadata, spc_dict) # Define navigation and signal axes: axes = [y_axis, x_axis, energy_axis] dictionary = { 'data': data, 'axes': axes, 'metadata': metadata, 'original_metadata': original_metadata } return [ dictionary, ]
def load_from_unf(cls, filename, lazy=False): """Load a `.unf`-file into a :class:`~.SemperFormat` object. Parameters ---------- filename : string The name of the unf-file from which to load the data. Standard format is '\*.unf'. Returns ------- semper : :class:`~.SemperFormat` (N=1) SEMPER file format object containing the loaded information. """ metadata = OrderedDict() with open(filename, 'rb') as f: # Read header: rec_length = np.fromfile( f, dtype='<i4', count=1)[0] # length of header header = np.fromfile( f, dtype=cls.HEADER_DTYPES[ :rec_length // 2], count=1) metadata.update(sarray2dict(header)) assert np.frombuffer(f.read(4), dtype=np.int32)[0] == rec_length, \ 'Error while reading the header (length is not correct)!' data_format = cls.IFORM_DICT[metadata['IFORM']] iversn, remain = divmod(metadata['IFLAG'], 10000) ilabel, ntitle = divmod(remain, 1000) metadata.update( {'IVERSN': iversn, 'ILABEL': ilabel, 'NTITLE': ntitle}) # Read title: title = '' if ntitle > 0: assert np.fromfile( f, dtype='<i4', count=1)[0] == ntitle # length of title title = b''.join(np.fromfile(f, dtype='c', count=ntitle)) title = title.decode() metadata['TITLE'] = title assert np.fromfile(f, dtype='<i4', count=1)[0] == ntitle if ilabel: try: metadata.update(cls._read_label(f)) except Exception as e: warning = ('Could not read label, trying to proceed ' 'without it!') warning += ' (Error message: {})'.format(str(e)) warnings.warn(warning) # Read picture data: pos = f.tell() shape = metadata['NLAY'], metadata[ 'NROW'], metadata['NCOL'] if lazy: from dask.array import from_delayed from dask import delayed task = delayed(_read_data)(f, filename, pos, data_format, shape) data = from_delayed(task, shape=shape, dtype=data_format) else: data = _read_data(f, filename, pos, data_format, shape) offsets = (metadata.get('X0V0', 0.), metadata.get('Y0V2', 0.), metadata.get('Z0V4', 0.)) scales = (metadata.get('DXV1', 1.), metadata.get('DYV3', 1.), metadata.get('DZV5', 1.)) units = (metadata.get('XUNIT', Undefined), metadata.get('YUNIT', Undefined), metadata.get('ZUNIT', Undefined)) return cls(data, title, offsets, scales, units, metadata)
def load_from_unf(cls, filename): """Load a `.unf`-file into a :class:`~.SemperFormat` object. Parameters ---------- filename : string The name of the unf-file from which to load the data. Standard format is '\*.unf'. Returns ------- semper : :class:`~.SemperFormat` (N=1) SEMPER file format object containing the loaded information. """ cls._log.debug('Calling load_from_file') if ordict: metadata = OrderedDict() else: cls._log.warning( 'OrderedDict is not available, using a standard dictionary.\n') metadata = {} with open(filename, 'rb') as f: # Read header: rec_length = np.fromfile( f, dtype='<i4', count=1)[0] # length of header header = np.fromfile( f, dtype=cls.HEADER_DTYPES[ :rec_length // 2], count=1) metadata.update(sarray2dict(header)) assert np.frombuffer(f.read(4), dtype=np.int32)[0] == rec_length, \ 'Error while reading the header (length is not correct)!' data_format = cls.IFORM_DICT[metadata['IFORM']] iversn, remain = divmod(metadata['IFLAG'], 10000) ilabel, ntitle = divmod(remain, 1000) metadata.update( {'IVERSN': iversn, 'ILABEL': ilabel, 'NTITLE': ntitle}) # Read title: title = '' if ntitle > 0: assert np.fromfile( f, dtype='<i4', count=1)[0] == ntitle # length of title title = ''.join(np.fromfile(f, dtype='c', count=ntitle)) metadata['TITLE'] = title assert np.fromfile(f, dtype='<i4', count=1)[0] == ntitle if ilabel: try: metadata.update(cls._read_label(f)) except Exception as e: warning = 'Could not read label, trying to proceed without it!' warning += ' (Error message: {})'.format(str(e)) cls._log.warning(warning) # Read picture data: nlay, nrow, ncol = metadata['NLAY'], metadata[ 'NROW'], metadata['NCOL'] data = np.empty((nlay, nrow, ncol), dtype=data_format) for k in range(nlay): for j in range(nrow): rec_length = np.fromfile(f, dtype='<i4', count=1)[0] # Not always ncol, see below count = rec_length / np.dtype(data_format).itemsize row = np.fromfile(f, dtype=data_format, count=count) # [:ncol] is used because Semper always writes an even number of bytes which # is a problem when reading in single bytes (IFORM = 0, np.byte). If ncol is # odd, an empty byte (0) is added which has to be skipped # during read in: data[k, j, :] = row[:ncol] test = np.fromfile(f, dtype='<i4', count=1)[0] assert test == rec_length offsets = (metadata.get('X0V0', 0.), metadata.get('Y0V2', 0.), metadata.get('Z0V4', 0.)) scales = (metadata.get('DXV1', 1.), metadata.get('DYV3', 1.), metadata.get('DZV5', 1.)) units = (metadata.get('XUNIT', Undefined), metadata.get('YUNIT', Undefined), metadata.get('ZUNIT', Undefined)) return cls(data, title, offsets, scales, units, metadata)
def ser_reader(filename, objects=None, verbose=False, *args, **kwds): """Reads the information from the file and returns it in the HyperSpy required format. """ header, data = load_ser_file(filename, verbose=verbose) record_by = guess_record_by(header['DataTypeID']) axes = [] ndim = int(header['NumberDimensions']) if record_by == 'spectrum': array_shape = [ None, ] * int(ndim) i_array = range(ndim) if len(data['PositionY']) > 1 and \ (data['PositionY'][0] == data['PositionY'][1]): # The spatial dimensions are stored in the reversed order # We reverse the shape i_array.reverse() # Extra dimensions for i in xrange(ndim): if i_array[i] == ndim - 1: name = 'x' elif i_array[i] == ndim - 2: name = 'y' else: name = t.Undefined axes.append({ 'name': name, 'offset': header['Dim-%i_CalibrationOffset' % (i + 1)][0], 'scale': header['Dim-%i_CalibrationDelta' % (i + 1)][0], 'units': header['Dim-%i_Units' % (i + 1)][0], 'size': header['Dim-%i_DimensionSize' % (i + 1)][0], 'index_in_array': i_array[i] }) array_shape[i_array[i]] = \ header['Dim-%i_DimensionSize' % (i + 1)][0] # FEI seems to use the international system of units (SI) for the # spatial scale. However, we prefer to work in nm for axis in axes: if axis['units'] == 'meters': axis['units'] = 'nm' axis['scale'] *= 10**9 # Spectral dimension axes.append({ 'offset': data['CalibrationOffset'][0], 'scale': data['CalibrationDelta'][0], 'size': data['ArrayLength'][0], 'index_in_array': header['NumberDimensions'][0] }) # FEI seems to use the international system of units (SI) for the # energy scale (eV). axes[-1]['units'] = 'eV' axes[-1]['name'] = 'Energy' array_shape.append(data['ArrayLength'][0]) elif record_by == 'image': array_shape = [] # Extra dimensions for i in xrange(ndim): if header['Dim-%i_DimensionSize' % (i + 1)][0] != 1: axes.append({ 'offset': header['Dim-%i_CalibrationOffset' % (i + 1)][0], 'scale': header['Dim-%i_CalibrationDelta' % (i + 1)][0], 'units': header['Dim-%i_Units' % (i + 1)][0], 'size': header['Dim-%i_DimensionSize' % (i + 1)][0], }) array_shape.append(header['Dim-%i_DimensionSize' % (i + 1)][0]) # Y axis axes.append({ 'name': 'y', 'offset': data['CalibrationOffsetY'][0] - data['CalibrationElementY'][0] * data['CalibrationDeltaY'][0], 'scale': data['CalibrationDeltaY'][0], 'units': 'Unknown', 'size': data['ArraySizeY'][0], }) array_shape.append(data['ArraySizeY'][0]) # X axis axes.append({ 'name': 'x', 'offset': data['CalibrationOffsetX'][0] - data['CalibrationElementX'][0] * data['CalibrationDeltaX'][0], 'scale': data['CalibrationDeltaX'][0], 'size': data['ArraySizeX'][0], }) array_shape.append(data['ArraySizeX'][0]) # If the acquisition stops before finishing the job, the stored file will # report the requested size even though no values are recorded. Therefore if # the shapes of the retrieved array does not match that of the data # dimensions we must fill the rest with zeros or (better) nans if the # dtype is float if np.cumprod(array_shape)[-1] != np.cumprod(data['Array'].shape)[-1]: dc = np.zeros(np.cumprod(array_shape)[-1], dtype=data['Array'].dtype) if dc.dtype is np.dtype('f') or dc.dtype is np.dtype('f8'): dc[:] = np.nan dc[:data['Array'].ravel().shape[0]] = data['Array'].ravel() else: dc = data['Array'] dc = dc.reshape(array_shape) if record_by == 'image': dc = dc[::-1] if ordict: original_metadata = OrderedDict() else: original_metadata = {} header_parameters = sarray2dict(header) sarray2dict(data, header_parameters) if len(axes) != len(dc.shape): dc = dc.squeeze() if len(axes) != len(dc.shape): raise IOError("Please report this issue to the HyperSpy developers.") # We remove the Array key to save memory avoiding duplication del header_parameters['Array'] original_metadata['ser_header_parameters'] = header_parameters dictionary = { 'data': dc, 'metadata': { 'General': { 'original_filename': os.path.split(filename)[1] }, "Signal": { 'signal_type': "", 'record_by': record_by, }, }, 'axes': axes, 'original_metadata': original_metadata, 'mapping': mapping } return dictionary
def spd_reader(filename, endianess='<', spc_fname=None, ipr_fname=None, load_all_spc=False, **kwargs): """ Read data from an SPD spectral map specified by filename. Parameters ---------- filename : str Name of SPD file to read endianess : char Byte-order of data to read spc_fname : None or str Name of file from which to read the spectral calibration. If data was exported fully from EDAX TEAM software, an .spc file with the same name as the .spd should be present. If `None`, the default filename will be searched for. Otherwise, the name of the .spc file to use for calibration can be explicitly given as a string. ipr_fname : None or str Name of file from which to read the spatial calibration. If data was exported fully from EDAX TEAM software, an .ipr file with the same name as the .spd (plus a "_Img" suffix) should be present. If `None`, the default filename will be searched for. Otherwise, the name of the .ipr file to use for spatial calibration can be explicitly given as a string. load_all_spc : bool Switch to control if all of the .spc header is read, or just the important parts for import into HyperSpy **kwargs Remaining arguments are passed to the Numpy ``memmap`` function Returns ------- list list with dictionary of signal information to be passed back to hyperspy.io.load_with_reader """ with open(filename, 'rb') as f: spd_header = np.fromfile(f, dtype=get_spd_dtype_list(endianess), count=1) original_metadata = {'spd_header': sarray2dict(spd_header)} # dimensions of map data: nx = original_metadata['spd_header']['nPoints'] ny = original_metadata['spd_header']['nLines'] nz = original_metadata['spd_header']['nChannels'] data_offset = original_metadata['spd_header']['dataOffset'] data_type = {'1': 'u1', '2': 'u2', '4': 'u4'}[str(original_metadata['spd_header'][ 'countBytes'])] lazy = kwargs.pop('lazy', False) mode = kwargs.pop('mode', 'c') if lazy: mode = 'r' # Read data from file into a numpy memmap object data = np.memmap(f, mode=mode, offset=data_offset, dtype=data_type, **kwargs).squeeze().reshape((nz, nx, ny), order='F').T # Convert char arrays to strings: original_metadata['spd_header']['tag'] = \ spd_header['tag'][0].view('S16')[0] # fName is the name of the .bmp (and .ipr) file of the map original_metadata['spd_header']['fName'] = \ spd_header['fName'][0].view('S120')[0] # Get name of .spc file from the .spd map (if not explicitly given): if spc_fname is None: spc_path = os.path.dirname(filename) spc_basename = os.path.splitext(os.path.basename(filename))[ 0] + '.spc' spc_fname = os.path.join(spc_path, spc_basename) # Get name of .ipr file from bitmap image (if not explicitly given): if ipr_fname is None: ipr_basename = os.path.splitext( os.path.basename( original_metadata['spd_header'][ 'fName']))[0].decode() + '.ipr' ipr_path = os.path.dirname(filename) ipr_fname = os.path.join(ipr_path, ipr_basename) # Flags to control reading of files read_spc = os.path.isfile(spc_fname) read_ipr = os.path.isfile(ipr_fname) # Read the .ipr header (if possible) if read_ipr: with open(ipr_fname, 'rb') as f: _logger.debug(' From .spd reader - ' 'reading .ipr {}'.format(ipr_fname)) ipr_header = __get_ipr_header(f, endianess) original_metadata['ipr_header'] = sarray2dict(ipr_header) # Workaround for type error when saving hdf5: # save as list of strings instead of numpy unicode array # see https://github.com/hyperspy/hyperspy/pull/2007 and # https://github.com/h5py/h5py/issues/289 for context original_metadata['ipr_header']['charText'] = \ [np.string_(i) for i in original_metadata['ipr_header']['charText']] else: _logger.warning('Could not find .ipr file named {}.\n' 'No spatial calibration will be loaded.' '\n'.format(ipr_fname)) # Read the .spc header (if possible) if read_spc: with open(spc_fname, 'rb') as f: _logger.debug(' From .spd reader - ' 'reading .spc {}'.format(spc_fname)) spc_header = __get_spc_header(f, endianess, load_all_spc) spc_dict = sarray2dict(spc_header) original_metadata['spc_header'] = spc_dict else: _logger.warning('Could not find .spc file named {}.\n' 'No spectral metadata will be loaded.' '\n'.format(spc_fname)) # create the energy axis dictionary: energy_axis = { 'size': data.shape[2], 'index_in_array': 2, 'name': 'Energy', 'scale': original_metadata['spc_header']['evPerChan'] / 1000.0 if read_spc else 1, 'offset': original_metadata['spc_header']['startEnergy'] if read_spc else 1, 'units': 'keV' if read_spc else t.Undefined, } nav_units = 'µm' # Create navigation axes dictionaries: x_axis = { 'size': data.shape[1], 'index_in_array': 1, 'name': 'x', 'scale': original_metadata['ipr_header']['mppX'] if read_ipr else 1, 'offset': 0, 'units': nav_units if read_ipr else t.Undefined, } y_axis = { 'size': data.shape[0], 'index_in_array': 0, 'name': 'y', 'scale': original_metadata['ipr_header']['mppY'] if read_ipr else 1, 'offset': 0, 'units': nav_units if read_ipr else t.Undefined, } # Assign metadata for spectrum image: metadata = {'General': {'original_filename': os.path.split(filename)[1], 'title': 'EDS Spectrum Image'}, "Signal": {'signal_type': "EDS_SEM", 'record_by': 'spectrum', }, } # Add spectral calibration and elements (if present): if read_spc: metadata = _add_spc_metadata(metadata, spc_dict) # Define navigation and signal axes: axes = [y_axis, x_axis, energy_axis] dictionary = {'data': data, 'axes': axes, 'metadata': metadata, 'original_metadata': original_metadata} return [dictionary, ]
def ser_reader(filename, objects=None, *args, **kwds): """Reads the information from the file and returns it in the HyperSpy required format. """ header, data = load_ser_file(filename) record_by = guess_record_by(header["DataTypeID"]) ndim = int(header["NumberDimensions"]) if record_by == "spectrum": if ndim == 0 and header["ValidNumberElements"] != 0: # The calibration of the axes are not stored in the header. # We try to guess from the position coordinates. array_shape, axes = get_axes_from_position(header=header, data=data) else: axes = [] array_shape = [None] * int(ndim) if len(data["PositionY"]) > 1 and (data["PositionY"][0] == data["PositionY"][1]): # The spatial dimensions are stored in F order i.e. X, Y, ... order = "F" else: # The spatial dimensions are stored in C order i.e. ..., Y, X order = "C" # Extra dimensions for i in range(ndim): if i == ndim - 1: name = "x" elif i == ndim - 2: name = "y" else: name = t.Undefined idim = 1 + i if order == "C" else ndim - i axes.append( { "name": name, "offset": header["Dim-%i_CalibrationOffset" % idim][0], "scale": header["Dim-%i_CalibrationDelta" % idim][0], "units": header["Dim-%i_Units" % idim][0].decode("utf-8"), "size": header["Dim-%i_DimensionSize" % idim][0], "index_in_array": i, } ) array_shape[i] = header["Dim-%i_DimensionSize" % idim][0] # Spectral dimension axes.append( { "offset": data["CalibrationOffset"][0], "scale": data["CalibrationDelta"][0], "size": data["ArrayLength"][0], "index_in_array": header["NumberDimensions"][0], } ) # FEI seems to use the international system of units (SI) for the # energy scale (eV). axes[-1]["units"] = "eV" axes[-1]["name"] = "Energy" array_shape.append(data["ArrayLength"][0]) elif record_by == "image": # Extra dimensions if ndim == 0 and header["ValidNumberElements"] != 0: # The calibration of the axes are not stored in the header. # We try to guess from the position coordinates. array_shape, axes = get_axes_from_position(header=header, data=data) else: axes = [] array_shape = [] for i in range(ndim): if header["Dim-%i_DimensionSize" % (i + 1)][0] != 1: axes.append( { "offset": header["Dim-%i_CalibrationOffset" % (i + 1)][0], "scale": header["Dim-%i_CalibrationDelta" % (i + 1)][0], # for image stack, the UnitsLength is 0 (no units) "units": header["Dim-%i_Units" % (i + 1)][0].decode("utf-8") if header["Dim-%i_UnitsLength" % (i + 1)] > 0 else "Unknown", "size": header["Dim-%i_DimensionSize" % (i + 1)][0], } ) array_shape.append(header["Dim-%i_DimensionSize" % (i + 1)][0]) if objects is not None: objects_dict = convert_xml_to_dict(objects[0]) units = guess_units_from_mode(objects_dict, header) else: units = "meters" # Y axis axes.append( { "name": "y", "offset": data["CalibrationOffsetY"][0] - data["CalibrationElementY"][0] * data["CalibrationDeltaY"][0], "scale": data["CalibrationDeltaY"][0], "units": units, "size": data["ArraySizeY"][0], } ) array_shape.append(data["ArraySizeY"][0]) # X axis axes.append( { "name": "x", "offset": data["CalibrationOffsetX"][0] - data["CalibrationElementX"][0] * data["CalibrationDeltaX"][0], "scale": data["CalibrationDeltaX"][0], "size": data["ArraySizeX"][0], "units": units, } ) array_shape.append(data["ArraySizeX"][0]) # FEI seems to use the international system of units (SI) for the # spatial scale. However, we prefer to work in nm for axis in axes: if axis["units"] == "meters": axis["units"] = "nm" axis["scale"] *= 10 ** 9 elif axis["units"] == "1/meters": axis["units"] = "1/nm" axis["scale"] /= 10 ** 9 # If the acquisition stops before finishing the job, the stored file will # report the requested size even though no values are recorded. Therefore # if the shapes of the retrieved array does not match that of the data # dimensions we must fill the rest with zeros or (better) nans if the # dtype is float if np.cumprod(array_shape)[-1] != np.cumprod(data["Array"].shape)[-1]: dc = np.zeros(np.cumprod(array_shape)[-1], dtype=data["Array"].dtype) if dc.dtype is np.dtype("f") or dc.dtype is np.dtype("f8"): dc[:] = np.nan dc[: data["Array"].ravel().shape[0]] = data["Array"].ravel() else: dc = data["Array"] dc = dc.reshape(array_shape) if record_by == "image": dc = dc[..., ::-1, :] if ordict: original_metadata = OrderedDict() else: original_metadata = {} header_parameters = sarray2dict(header) sarray2dict(data, header_parameters) if len(axes) != len(dc.shape): dc = dc.squeeze() if len(axes) != len(dc.shape): raise IOError("Please report this issue to the HyperSpy developers.") # We remove the Array key to save memory avoiding duplication del header_parameters["Array"] original_metadata["ser_header_parameters"] = header_parameters dictionary = { "data": dc, "metadata": { "General": {"original_filename": os.path.split(filename)[1]}, "Signal": {"signal_type": "", "record_by": record_by}, }, "axes": axes, "original_metadata": original_metadata, "mapping": mapping, } return dictionary
def file_reader(filename, endianess='<', **kwds): metadata = {} f = open(filename, 'rb') std_header = np.fromfile(f, dtype=get_std_dtype_list(endianess), count=1) fei_header = None if std_header['NEXT'] / 1024 == 128: _logger.info("%s seems to contain an extended FEI header", filename) fei_header = np.fromfile(f, dtype=get_fei_dtype_list(endianess), count=1024) if f.tell() == 1024 + std_header['NEXT']: _logger.debug("The FEI header was correctly loaded") else: _logger.warn("There was a problem reading the extended header") f.seek(1024 + std_header['NEXT']) fei_header = None NX, NY, NZ = std_header['NX'], std_header['NY'], std_header['NZ'] mmap_mode = kwds.pop('mmap_mode', 'c') lazy = kwds.pop('lazy', False) if lazy: mmap_mode = 'r' data = np.memmap(f, mode=mmap_mode, offset=f.tell(), dtype=get_data_type(std_header['MODE'], endianess)).squeeze().reshape( (NX, NY, NZ), order='F').T original_metadata = {'std_header': sarray2dict(std_header)} # Convert bytes to unicode for key in ["CMAP", "STAMP", "LABELS"]: original_metadata["std_header"][key] = \ original_metadata["std_header"][key].decode() if fei_header is not None: fei_dict = sarray2dict(fei_header, ) del fei_dict['empty'] original_metadata['fei_header'] = fei_dict dim = len(data.shape) if fei_header is None: # The scale is in Amstrongs, we convert it to nm scales = [ 10 * float(std_header['Zlen'] / std_header['MZ']) if float(std_header['MZ']) != 0 else 1, 10 * float(std_header['Ylen'] / std_header['MY']) if float(std_header['MY']) != 0 else 1, 10 * float(std_header['Xlen'] / std_header['MX']) if float(std_header['MX']) != 0 else 1, ] offsets = [ 10 * float(std_header['ZORIGIN']), 10 * float(std_header['YORIGIN']), 10 * float(std_header['XORIGIN']), ] else: # FEI does not use the standard header to store the scale # It does store the spatial scale in pixel_size, one per angle in # meters scales = [ 1, ] + [ fei_header['pixel_size'][0] * 10**9, ] * 2 offsets = [ 0, ] * 3 units = [Undefined, 'nm', 'nm'] names = ['z', 'y', 'x'] metadata = { 'General': { 'original_filename': os.path.split(filename)[1] }, "Signal": { 'signal_type': "", 'record_by': 'image', }, } # create the axis objects for each axis axes = [{ 'size': data.shape[i], 'index_in_array': i, 'name': names[i + 3 - dim], 'scale': scales[i + 3 - dim], 'offset': offsets[i + 3 - dim], 'units': units[i + 3 - dim], } for i in range(dim)] dictionary = { 'data': data, 'axes': axes, 'metadata': metadata, 'original_metadata': original_metadata, 'mapping': mapping } return [ dictionary, ]
def file_reader(filename, endianess='<', load_to_memory=True, mmap_mode='c', **kwds): _logger.debug("Reading blockfile: %s" % filename) metadata = {} # Makes sure we open in right mode: if '+' in mmap_mode or ('write' in mmap_mode and 'copyonwrite' != mmap_mode): f = open(filename, 'r+b') else: f = open(filename, 'rb') _logger.debug("File opened") # Get header header = np.fromfile(f, dtype=get_header_dtype_list(endianess), count=1) if header['MAGIC'][0] not in magics: warnings.warn("Blockfile has unrecognized header signature. " "Will attempt to read, but correcteness not guaranteed!") header = sarray2dict(header) note = f.read(header['Data_offset_1'] - f.tell()) note = note.strip(b'\x00') header['Note'] = note.decode() _logger.debug("File header: " + str(header)) NX, NY = int(header['NX']), int(header['NY']) DP_SZ = int(header['DP_SZ']) if header['SDP']: SDP = 100. / header['SDP'] else: SDP = Undefined original_metadata = {'blockfile_header': header} # Get data: # A Virtual BF/DF is stored first # offset1 = header['Data_offset_1'] # f.seek(offset1) # data_pre = np.array(f.read(NX*NY), dtype=endianess+'u1' # ).squeeze().reshape((NX, NY), order='C').T # Then comes actual blockfile offset2 = header['Data_offset_2'] if load_to_memory: f.seek(offset2) data = np.fromfile(f, dtype=endianess + 'u1') else: data = np.memmap(f, mode=mmap_mode, offset=offset2, dtype=endianess + 'u1') try: data = data.reshape((NY, NX, DP_SZ * DP_SZ + 6)) except ValueError: warnings.warn( 'Blockfile header dimensions larger than file size! ' 'Will attempt to load by zero padding incomplete frames.') # Data is stored DP by DP: pw = [(0, NX * NY * (DP_SZ * DP_SZ + 6) - data.size)] data = np.pad(data, pw, mode='constant') data = data.reshape((NY, NX, DP_SZ * DP_SZ + 6)) # Every frame is preceeded by a 6 byte sequence (AA 55, and then a 4 byte # integer specifying frame number) data = data[:, :, 6:] data = data.reshape((NY, NX, DP_SZ, DP_SZ), order='C').squeeze() units = ['nm', 'nm', 'cm', 'cm'] names = ['y', 'x', 'dy', 'dx'] scales = [header['SY'], header['SX'], SDP, SDP] date, time = _from_serial_date(header['Acquisition_time']) metadata = { 'General': { 'original_filename': os.path.split(filename)[1], 'date': date, 'time': time, 'notes': header['Note'] }, "Signal": { 'signal_type': "diffraction", 'record_by': 'image', }, } # Create the axis objects for each axis dim = data.ndim axes = [{ 'size': data.shape[i], 'index_in_array': i, 'name': names[i], 'scale': scales[i], 'offset': 0.0, 'units': units[i], } for i in range(dim)] dictionary = { 'data': data, 'axes': axes, 'metadata': metadata, 'original_metadata': original_metadata, 'mapping': mapping, } f.close() return [ dictionary, ]
def spc_reader(filename, endianess='<', load_all_spc=False, **kwargs): """ Read data from an SPC spectrum specified by filename. Parameters ---------- filename : str Name of SPC file to read endianess : char Byte-order of data to read load_all_spc : bool Switch to control if all of the .spc header is read, or just the important parts for import into HyperSpy **kwargs Remaining arguments are passed to the Numpy ``memmap`` function Returns ------- list list with dictionary of signal information to be passed back to hyperspy.io.load_with_reader """ with open(filename, 'rb') as f: _logger.debug(' Reading {}'.format(filename)) spc_header = __get_spc_header(f, endianess, load_all_spc) spc_dict = sarray2dict(spc_header) original_metadata = {'spc_header': spc_dict} nz = original_metadata['spc_header']['numPts'] data_offset = original_metadata['spc_header']['dataStart'] mode = kwargs.pop('mode', 'c') lazy = kwargs.pop('lazy', False) if lazy: mode = 'r' # Read data from file into a numpy memmap object data = np.memmap(f, mode=mode, offset=data_offset, dtype='u4', shape=(1, nz), **kwargs).squeeze() # create the energy axis dictionary: energy_axis = { 'size': data.shape[0], 'index_in_array': 0, 'name': 'Energy', 'scale': original_metadata['spc_header']['evPerChan'] / 1000.0, 'offset': original_metadata['spc_header']['startEnergy'], 'units': 'keV' } # Assign metadata for spectrum: metadata = { 'General': { 'original_filename': os.path.split(filename)[1], 'title': 'EDS Spectrum' }, "Signal": { 'signal_type': "EDS_SEM", 'record_by': 'spectrum', }, } metadata = _add_spc_metadata(metadata, spc_dict) dictionary = { 'data': data, 'axes': [energy_axis], 'metadata': metadata, 'original_metadata': original_metadata } return [ dictionary, ]
def spd_reader(filename, endianess='<', spc_fname=None, ipr_fname=None, load_all_spc=False, **kwargs): """ Read data from an SPD spectral map specified by filename. Parameters ---------- filename : str Name of SPD file to read endianess : char Byte-order of data to read spc_fname : None or str Name of file from which to read the spectral calibration. If data was exported fully from EDAX TEAM software, an .spc file with the same name as the .spd should be present. If `None`, the default filename will be searched for. Otherwise, the name of the .spc file to use for calibration can be explicitly given as a string. ipr_fname : None or str Name of file from which to read the spatial calibration. If data was exported fully from EDAX TEAM software, an .ipr file with the same name as the .spd (plus a "_Img" suffix) should be present. If `None`, the default filename will be searched for. Otherwise, the name of the .ipr file to use for spatial calibration can be explicitly given as a string. load_all_spc : bool Switch to control if all of the .spc header is read, or just the important parts for import into HyperSpy **kwargs Remaining arguments are passed to the Numpy ``memmap`` function Returns ------- list list with dictionary of signal information to be passed back to hyperspy.io.load_with_reader """ with open(filename, 'rb') as f: spd_header = np.fromfile(f, dtype=get_spd_dtype_list(endianess), count=1) original_metadata = {'spd_header': sarray2dict(spd_header)} # dimensions of map data: nx = original_metadata['spd_header']['nPoints'] ny = original_metadata['spd_header']['nLines'] nz = original_metadata['spd_header']['nChannels'] data_offset = original_metadata['spd_header']['dataOffset'] data_type = { '1': 'u1', '2': 'u2', '4': 'u4' }[str(original_metadata['spd_header']['countBytes'])] lazy = kwargs.pop('lazy', False) mode = kwargs.pop('mode', 'c') if lazy: mode = 'r' # Read data from file into a numpy memmap object data = np.memmap(f, mode=mode, offset=data_offset, dtype=data_type, **kwargs).squeeze().reshape((nz, nx, ny), order='F').T # Convert char arrays to strings: original_metadata['spd_header']['tag'] = \ spd_header['tag'][0].view('S16')[0] # fName is the name of the .bmp (and .ipr) file of the map original_metadata['spd_header']['fName'] = \ spd_header['fName'][0].view('S120')[0] # Get name of .spc file from the .spd map (if not explicitly given): if spc_fname is None: spc_path = os.path.dirname(filename) spc_basename = os.path.splitext(os.path.basename(filename))[0] + '.spc' spc_fname = os.path.join(spc_path, spc_basename) # Get name of .ipr file from bitmap image (if not explicitly given): if ipr_fname is None: ipr_basename = os.path.splitext( os.path.basename( original_metadata['spd_header']['fName']))[0].decode() + '.ipr' ipr_path = os.path.dirname(filename) ipr_fname = os.path.join(ipr_path, ipr_basename) # Flags to control reading of files read_spc = os.path.isfile(spc_fname) read_ipr = os.path.isfile(ipr_fname) # Read the .ipr header (if possible) if read_ipr: with open(ipr_fname, 'rb') as f: _logger.debug(' From .spd reader - ' 'reading .ipr {}'.format(ipr_fname)) ipr_header = __get_ipr_header(f, endianess) original_metadata['ipr_header'] = sarray2dict(ipr_header) # Workaround for type error when saving hdf5: # save as list of strings instead of numpy unicode array # see https://github.com/hyperspy/hyperspy/pull/2007 and # https://github.com/h5py/h5py/issues/289 for context original_metadata['ipr_header']['charText'] = \ [np.string_(i) for i in original_metadata['ipr_header']['charText']] else: _logger.warning('Could not find .ipr file named {}.\n' 'No spatial calibration will be loaded.' '\n'.format(ipr_fname)) # Read the .spc header (if possible) if read_spc: with open(spc_fname, 'rb') as f: _logger.debug(' From .spd reader - ' 'reading .spc {}'.format(spc_fname)) spc_header = __get_spc_header(f, endianess, load_all_spc) spc_dict = sarray2dict(spc_header) original_metadata['spc_header'] = spc_dict else: _logger.warning('Could not find .spc file named {}.\n' 'No spectral metadata will be loaded.' '\n'.format(spc_fname)) # create the energy axis dictionary: energy_axis = { 'size': data.shape[2], 'index_in_array': 2, 'name': 'Energy', 'scale': original_metadata['spc_header']['evPerChan'] / 1000.0 if read_spc else 1, 'offset': original_metadata['spc_header']['startEnergy'] if read_spc else 1, 'units': 'keV' if read_spc else t.Undefined, } nav_units = 'µm' # Create navigation axes dictionaries: x_axis = { 'size': data.shape[1], 'index_in_array': 1, 'name': 'x', 'scale': original_metadata['ipr_header']['mppX'] if read_ipr else 1, 'offset': 0, 'units': nav_units if read_ipr else t.Undefined, } y_axis = { 'size': data.shape[0], 'index_in_array': 0, 'name': 'y', 'scale': original_metadata['ipr_header']['mppY'] if read_ipr else 1, 'offset': 0, 'units': nav_units if read_ipr else t.Undefined, } # Assign metadata for spectrum image: metadata = { 'General': { 'original_filename': os.path.split(filename)[1], 'title': 'EDS Spectrum Image' }, "Signal": { 'signal_type': "EDS_SEM", 'record_by': 'spectrum', }, } # Add spectral calibration and elements (if present): if read_spc: metadata = _add_spc_metadata(metadata, spc_dict) # Define navigation and signal axes: axes = [y_axis, x_axis, energy_axis] dictionary = { 'data': data, 'axes': axes, 'metadata': metadata, 'original_metadata': original_metadata } return [ dictionary, ]
def ser_reader(filename, objects=None, *args, **kwds): """Reads the information from the file and returns it in the HyperSpy required format. """ header, data = load_ser_file(filename) record_by = guess_record_by(header['DataTypeID']) ndim = int(header['NumberDimensions']) if record_by == 'spectrum': if ndim == 0 and header["ValidNumberElements"] != 0: # The calibration of the axes are not stored in the header. # We try to guess from the position coordinates. array_shape, axes = get_axes_from_position(header=header, data=data) else: axes = [] array_shape = [None, ] * int(ndim) if len(data['PositionY']) > 1 and \ (data['PositionY'][0] == data['PositionY'][1]): # The spatial dimensions are stored in F order i.e. X, Y, ... order = "F" else: # The spatial dimensions are stored in C order i.e. ..., Y, X order = "C" # Extra dimensions for i in range(ndim): if i == ndim - 1: name = 'x' elif i == ndim - 2: name = 'y' else: name = t.Undefined idim = 1 + i if order == "C" else ndim - i axes.append({ 'name': name, 'offset': header['Dim-%i_CalibrationOffset' % idim][0], 'scale': header['Dim-%i_CalibrationDelta' % idim][0], 'units': header['Dim-%i_Units' % idim][0].decode('utf-8'), 'size': header['Dim-%i_DimensionSize' % idim][0], 'index_in_array': i }) array_shape[i] = \ header['Dim-%i_DimensionSize' % idim][0] # Spectral dimension axes.append({ 'offset': data['CalibrationOffset'][0], 'scale': data['CalibrationDelta'][0], 'size': data['ArrayLength'][0], 'index_in_array': header['NumberDimensions'][0] }) # FEI seems to use the international system of units (SI) for the # energy scale (eV). axes[-1]['units'] = 'eV' axes[-1]['name'] = 'Energy' array_shape.append(data['ArrayLength'][0]) elif record_by == 'image': # Extra dimensions if ndim == 0 and header["ValidNumberElements"] != 0: # The calibration of the axes are not stored in the header. # We try to guess from the position coordinates. array_shape, axes = get_axes_from_position(header=header, data=data) else: axes = [] array_shape = [] for i in range(ndim): if header['Dim-%i_DimensionSize' % (i + 1)][0] != 1: axes.append({ 'offset': header[ 'Dim-%i_CalibrationOffset' % (i + 1)][0], 'scale': header[ 'Dim-%i_CalibrationDelta' % (i + 1)][0], # for image stack, the UnitsLength is 0 (no units) 'units': header['Dim-%i_Units' % (i + 1)][0].decode( 'utf-8') if header['Dim-%i_UnitsLength' % (i + 1)] > 0 else 'Unknown', 'size': header['Dim-%i_DimensionSize' % (i + 1)][0], }) array_shape.append(header['Dim-%i_DimensionSize' % (i + 1)][0]) if objects is not None: objects_dict = convert_xml_to_dict(objects[0]) units = guess_units_from_mode(objects_dict, header) else: units = "meters" # Y axis axes.append({ 'name': 'y', 'offset': data['CalibrationOffsetY'][0] - data['CalibrationElementY'][0] * data['CalibrationDeltaY'][0], 'scale': data['CalibrationDeltaY'][0], 'units': units, 'size': data['ArraySizeY'][0], }) array_shape.append(data['ArraySizeY'][0]) # X axis axes.append({ 'name': 'x', 'offset': data['CalibrationOffsetX'][0] - data['CalibrationElementX'][0] * data['CalibrationDeltaX'][0], 'scale': data['CalibrationDeltaX'][0], 'size': data['ArraySizeX'][0], 'units': units, }) array_shape.append(data['ArraySizeX'][0]) # FEI seems to use the international system of units (SI) for the # spatial scale. However, we prefer to work in nm for axis in axes: if axis['units'] == 'meters': axis['units'] = 'nm' axis['scale'] *= 10 ** 9 elif axis['units'] == '1/meters': axis['units'] = '1/nm' axis['scale'] /= 10 ** 9 # If the acquisition stops before finishing the job, the stored file will # report the requested size even though no values are recorded. Therefore # if the shapes of the retrieved array does not match that of the data # dimensions we must fill the rest with zeros or (better) nans if the # dtype is float if np.cumprod(array_shape)[-1] != np.cumprod(data['Array'].shape)[-1]: dc = np.zeros(np.cumprod(array_shape)[-1], dtype=data['Array'].dtype) if dc.dtype is np.dtype('f') or dc.dtype is np.dtype('f8'): dc[:] = np.nan dc[:data['Array'].ravel().shape[0]] = data['Array'].ravel() else: dc = data['Array'] dc = dc.reshape(array_shape) if record_by == 'image': dc = dc[..., ::-1, :] if ordict: original_metadata = OrderedDict() else: original_metadata = {} header_parameters = sarray2dict(header) sarray2dict(data, header_parameters) if len(axes) != len(dc.shape): dc = dc.squeeze() if len(axes) != len(dc.shape): raise IOError("Please report this issue to the HyperSpy developers.") # We remove the Array key to save memory avoiding duplication del header_parameters['Array'] original_metadata['ser_header_parameters'] = header_parameters dictionary = { 'data': dc, 'metadata': { 'General': { 'original_filename': os.path.split(filename)[1]}, "Signal": { 'signal_type': "", 'record_by': record_by, }, }, 'axes': axes, 'original_metadata': original_metadata, 'mapping': mapping} return dictionary
def load_from_unf(cls, filename): """Load a `.unf`-file into a :class:`~.SemperFormat` object. Parameters ---------- filename : string The name of the unf-file from which to load the data. Standard format is '\*.unf'. Returns ------- semper : :class:`~.SemperFormat` (N=1) SEMPER file format object containing the loaded information. """ if ordict: metadata = OrderedDict() else: _logger.warning( 'OrderedDict is not available, using a standard dictionary.\n') metadata = {} with open(filename, 'rb') as f: # Read header: rec_length = np.fromfile( f, dtype='<i4', count=1)[0] # length of header header = np.fromfile( f, dtype=cls.HEADER_DTYPES[ :rec_length // 2], count=1) metadata.update(sarray2dict(header)) assert np.frombuffer(f.read(4), dtype=np.int32)[0] == rec_length, \ 'Error while reading the header (length is not correct)!' data_format = cls.IFORM_DICT[metadata['IFORM']] iversn, remain = divmod(metadata['IFLAG'], 10000) ilabel, ntitle = divmod(remain, 1000) metadata.update( {'IVERSN': iversn, 'ILABEL': ilabel, 'NTITLE': ntitle}) # Read title: title = '' if ntitle > 0: assert np.fromfile( f, dtype='<i4', count=1)[0] == ntitle # length of title title = b''.join(np.fromfile(f, dtype='c', count=ntitle)) title = title.decode() metadata['TITLE'] = title assert np.fromfile(f, dtype='<i4', count=1)[0] == ntitle if ilabel: try: metadata.update(cls._read_label(f)) except Exception as e: warning = ('Could not read label, trying to proceed ' 'without it!') warning += ' (Error message: {})'.format(str(e)) warnings.warn(warning) # Read picture data: nlay, nrow, ncol = metadata['NLAY'], metadata[ 'NROW'], metadata['NCOL'] data = np.empty((nlay, nrow, ncol), dtype=data_format) for k in range(nlay): for j in range(nrow): rec_length = np.fromfile(f, dtype='<i4', count=1)[0] # Not always ncol, see below count = rec_length // np.dtype(data_format).itemsize row = np.fromfile(f, dtype=data_format, count=count) # [:ncol] is used because Semper always writes an even # number of bytes which is a problem when reading in single # bytes (IFORM = 0, np.byte). If ncol is odd, an empty # byte (0) is added which has to be skipped during read in: data[k, j, :] = row[:ncol] test = np.fromfile(f, dtype='<i4', count=1)[0] assert test == rec_length offsets = (metadata.get('X0V0', 0.), metadata.get('Y0V2', 0.), metadata.get('Z0V4', 0.)) scales = (metadata.get('DXV1', 1.), metadata.get('DYV3', 1.), metadata.get('DZV5', 1.)) units = (metadata.get('XUNIT', Undefined), metadata.get('YUNIT', Undefined), metadata.get('ZUNIT', Undefined)) return cls(data, title, offsets, scales, units, metadata)
def load_from_unf(cls, filename, lazy=False): """Load a `.unf`-file into a :class:`~.SemperFormat` object. Parameters ---------- filename : string The name of the unf-file from which to load the data. Standard format is '\*.unf'. Returns ------- semper : :class:`~.SemperFormat` (N=1) SEMPER file format object containing the loaded information. """ metadata = OrderedDict() with open(filename, 'rb') as f: # Read header: rec_length = np.fromfile(f, dtype='<i4', count=1)[0] # length of header header = np.fromfile(f, dtype=cls.HEADER_DTYPES[:rec_length // 2], count=1) metadata.update(sarray2dict(header)) assert np.frombuffer(f.read(4), dtype=np.int32)[0] == rec_length, \ 'Error while reading the header (length is not correct)!' data_format = cls.IFORM_DICT[metadata['IFORM']] iversn, remain = divmod(metadata['IFLAG'], 10000) ilabel, ntitle = divmod(remain, 1000) metadata.update({ 'IVERSN': iversn, 'ILABEL': ilabel, 'NTITLE': ntitle }) # Read title: title = '' if ntitle > 0: assert np.fromfile(f, dtype='<i4', count=1)[0] == ntitle # length of title title = b''.join(np.fromfile(f, dtype='c', count=ntitle)) title = title.decode() metadata['TITLE'] = title assert np.fromfile(f, dtype='<i4', count=1)[0] == ntitle if ilabel: try: metadata.update(cls._read_label(f)) except Exception as e: warning = ('Could not read label, trying to proceed ' 'without it!') warning += ' (Error message: {})'.format(str(e)) warnings.warn(warning) # Read picture data: pos = f.tell() shape = metadata['NLAY'], metadata['NROW'], metadata['NCOL'] if lazy: from dask.array import from_delayed from dask import delayed task = delayed(_read_data)(f, filename, pos, data_format, shape) data = from_delayed(task, shape=shape, dtype=data_format) else: data = _read_data(f, filename, pos, data_format, shape) offsets = (metadata.get('X0V0', 0.), metadata.get('Y0V2', 0.), metadata.get('Z0V4', 0.)) scales = (metadata.get('DXV1', 1.), metadata.get('DYV3', 1.), metadata.get('DZV5', 1.)) units = (metadata.get('XUNIT', Undefined), metadata.get('YUNIT', Undefined), metadata.get('ZUNIT', Undefined)) return cls(data, title, offsets, scales, units, metadata)
def file_reader(filename, endianess="<", mmap_mode=None, lazy=False, **kwds): _logger.debug("Reading blockfile: %s" % filename) metadata = {} if mmap_mode is None: mmap_mode = "r" if lazy else "c" # Makes sure we open in right mode: if "+" in mmap_mode or ("write" in mmap_mode and "copyonwrite" != mmap_mode): if lazy: raise ValueError("Lazy loading does not support in-place writing") f = open(filename, "r+b") else: f = open(filename, "rb") _logger.debug("File opened") # Get header header = np.fromfile(f, dtype=get_header_dtype_list(endianess), count=1) if header["MAGIC"][0] not in magics: warnings.warn( "Blockfile has unrecognized header signature. " "Will attempt to read, but correcteness not guaranteed!", UserWarning, ) header = sarray2dict(header) note = f.read(header["Data_offset_1"] - f.tell()) # It seems it uses "\x00" for padding, so we remove it try: header["Note"] = note.decode("latin1").strip("\x00") except BaseException: # Not sure about the encoding so, if it fails, we carry on _logger.warning("Reading the Note metadata of this file failed. " "You can help improving " "HyperSpy by reporting the issue in " "https://github.com/hyperspy/hyperspy") _logger.debug("File header: " + str(header)) NX, NY = int(header["NX"]), int(header["NY"]) DP_SZ = int(header["DP_SZ"]) if header["SDP"]: SDP = 100.0 / header["SDP"] else: SDP = Undefined original_metadata = {"blockfile_header": header} # Get data: # TODO A Virtual BF/DF is stored first, may be loaded as navigator in future # offset1 = header['Data_offset_1'] # f.seek(offset1) # navigator = np.fromfile(f, dtype=endianess+"u1", shape=(NX, NY)).T # Then comes actual blockfile offset2 = header["Data_offset_2"] if not lazy: f.seek(offset2) data = np.fromfile(f, dtype=endianess + "u1") else: data = np.memmap(f, mode=mmap_mode, offset=offset2, dtype=endianess + "u1") try: data = data.reshape((NY, NX, DP_SZ * DP_SZ + 6)) except ValueError: warnings.warn( "Blockfile header dimensions larger than file size! " "Will attempt to load by zero padding incomplete frames.") # Data is stored DP by DP: pw = [(0, NX * NY * (DP_SZ * DP_SZ + 6) - data.size)] data = np.pad(data, pw, mode="constant") data = data.reshape((NY, NX, DP_SZ * DP_SZ + 6)) # Every frame is preceeded by a 6 byte sequence (AA 55, and then a 4 byte # integer specifying frame number) data = data[:, :, 6:] data = data.reshape((NY, NX, DP_SZ, DP_SZ), order="C").squeeze() units = ["nm", "nm", "cm", "cm"] names = ["y", "x", "dy", "dx"] scales = [header["SY"], header["SX"], SDP, SDP] date, time, time_zone = serial_date_to_ISO_format( header["Acquisition_time"]) metadata = { "General": { "original_filename": os.path.split(filename)[1], "date": date, "time": time, "time_zone": time_zone, "notes": header["Note"], }, "Signal": { "signal_type": "diffraction", "record_by": "image", }, } # Create the axis objects for each axis dim = data.ndim axes = [{ "size": data.shape[i], "index_in_array": i, "name": names[i], "scale": scales[i], "offset": 0.0, "units": units[i], } for i in range(dim)] dictionary = { "data": data, "axes": axes, "metadata": metadata, "original_metadata": original_metadata, "mapping": mapping, } f.close() return [ dictionary, ]