def file_reader(filename, encoding='latin-1', **kwds): parameters = {} mapped = DictionaryBrowser({}) with codecs.open( filename, encoding=encoding, errors='replace') as spectrum_file: y = [] # Read the keywords data_section = False for line in spectrum_file.readlines(): if data_section is False: if line[0] == "#": try: key,value = line.split(': ') value = value.strip() except ValueError: key = line value = None key = key.strip('#').strip() if key != 'SPECTRUM': parameters[key] = value else: data_section = True else: # Read the data if line[0] != "#" and line.strip(): if parameters['DATATYPE'] == 'XY': xy = line.replace(',', ' ').strip().split() y.append(float(xy[1])) elif parameters['DATATYPE'] == 'Y': data = [ float(i) for i in line.replace(',', ' ').strip().split()] y.extend(data) # We rewrite the format value to be sure that it complies with the # standard, because it will be used by the writer routine parameters['FORMAT'] = "EMSA/MAS Spectral Data File" # Convert the parameters to the right type and map some # TODO: the msa format seems to support specifying the units of some # parametes. We should add this feature here for parameter, value in parameters.iteritems(): # Some parameters names can contain the units information # e.g. #AZIMANGLE-dg: 90. if '-' in parameter: clean_par, units = parameter.split('-') clean_par, units = clean_par.strip(), units.strip() else: clean_par, units = parameter, None if clean_par in keywords: try: parameters[parameter] = keywords[clean_par]['dtype'](value) except: # Normally the offending mispelling is a space in the scientic # notation, e.g. 2.0 E-06, so we try to correct for it try: parameters[parameter] = keywords[clean_par]['dtype']( value.replace(' ', '')) except: print("The %s keyword value, %s " % (parameter, value) + "could not be converted to the right type" ) if keywords[clean_par]['mapped_to'] is not None: mapped.set_item(keywords[clean_par]['mapped_to'], parameters[parameter]) if units is not None: mapped.set_item(keywords[clean_par]['mapped_to'] + '_units',units) # The data parameter needs some extra care # It is necessary to change the locale to US english to read the date # keyword loc = locale.getlocale(locale.LC_TIME) # Setting locale can raise an exception because # their name depends on library versions, platform etc. try: if os_name == 'posix': locale.setlocale(locale.LC_TIME, ('en_US', 'utf8')) elif os_name == 'windows': locale.setlocale(locale.LC_TIME, 'english') try: H, M = time.strptime(parameters['TIME'], "%H:%M")[3:5] mapped['time'] = datetime.time(H, M) except: if 'TIME' in parameters and parameters['TIME']: print('The time information could not be retrieved') try: Y, M, D = time.strptime(parameters['DATE'], "%d-%b-%Y")[0:3] mapped['date'] = datetime.date(Y, M, D) except: if 'DATE' in parameters and parameters['DATE']: print('The date information could not be retrieved') except: warnings.warn("I couldn't write the date information due to" "an unexpected error. Please report this error to " "the developers") locale.setlocale(locale.LC_TIME, loc) # restore saved locale axes = [] axes.append({ 'size' : len(y), 'index_in_array' : 0, 'name' : parameters['XLABEL'] if 'XLABEL' in parameters else '', 'scale': parameters['XPERCHAN'] if 'XPERCHAN' in parameters else 1, 'offset' : parameters['OFFSET'] if 'OFFSET' in parameters else 0, 'units' : parameters['XUNITS'] if 'XUNITS' in parameters else '', }) mapped['original_filename'] = filename mapped['record_by'] = 'spectrum' if mapped.has_item('signal_type'): if mapped.signal_type == 'ELS': mapped.signal_type = 'EELS' else: # Defaulting to EELS looks reasonable mapped.signal_type = 'EELS' dictionary = { 'data' : np.array(y), 'axes' : axes, 'mapped_parameters': mapped.as_dictionary(), 'original_parameters' : parameters } return [dictionary,]
def file_reader(filename, rpl_info=None, encoding="latin-1", mmap_mode='c', *args,**kwds): """Parses a Lispix (http://www.nist.gov/lispix/) ripple (.rpl) file and reads the data from the corresponding raw (.raw) file; or, read a raw file if the dictionary rpl_info is provided. This format is often uses in EDS/EDX experiments. Images and spectral images or data cubes that are written in the (Lispix) raw file format are just a continuous string of numbers. Data cubes can be stored image by image, or spectrum by spectrum. Single images are stored row by row, vector cubes are stored row by row (each row spectrum by spectrum), image cubes are stored image by image. All of the numbers are in the same format, such as 16 bit signed integer, IEEE 8-byte real, 8-bit unsigned byte, etc. The "raw" file should be accompanied by text file with the same name and ".rpl" extension. This file lists the characteristics of the raw file so that it can be loaded without human intervention. Alternatively, dictionary 'rpl_info' containing the information can be given. Some keys are specific to Hyperspy and will be ignored by other software. RPL stands for "Raw Parameter List", an ASCII text, tab delimited file in which Hyperspy reads the image parameters for a raw file. TABLE OF RPL PARAMETERS key type description ---------- ------------ -------------------- # Mandatory keys: width int # pixels per row height int # number of rows depth int # number of images or spectral pts offset int # bytes to skip data-type str # 'signed', 'unsigned', or 'float' data-length str # bytes per pixel '1', '2', '4', or '8' byte-order str # 'big-endian', 'little-endian', or 'dont-care' record-by str # 'image', 'vector', or 'dont-care' # X-ray keys: ev-per-chan int # optional, eV per channel detector-peak-width-ev int # optional, FWHM for the Mn K-alpha line # Hyperspy-specific keys depth-origin int # energy offset in pixels depth-scale float # energy scaling (units per pixel) depth-units str # energy units, usually eV depth-name str # Name of the magnitude stored as depth width-origin int # column offset in pixels width-scale float # column scaling (units per pixel) width-units str # column units, usually nm width-name str # Name of the magnitude stored as width height-origin int # row offset in pixels height-scale float # row scaling (units per pixel) height-units str # row units, usually nm height-name str # Name of the magnitude stored as height signal str # Name of the signal stored, e.g. HAADF convergence-angle float # TEM convergence angle in mrad collection-angle float # EELS spectrometer collection angle in mrad beam-energy float # TEM beam energy in keV NOTES When 'data-length' is 1, the 'byte order' is not relevant as there is only one byte per datum, and 'byte-order' should be 'dont-care'. When 'depth' is 1, the file has one image, 'record-by' is not relevant and should be 'dont-care'. For spectral images, 'record-by' is 'vector'. For stacks of images, 'record-by' is 'image'. Floating point numbers can be IEEE 4-byte, or IEEE 8-byte. Therefore if data-type is float, data-length MUST be 4 or 8. The rpl file is read in a case-insensitive manner. However, when providing a dictionary as input, the keys MUST be lowercase. Comment lines, beginning with a semi-colon ';' are allowed anywhere. The first non-comment in the rpl file line MUST have two column names: 'name_1'<TAB>'name_2'; any name would do e.g. 'key'<TAB>'value'. Parameters can be in ANY order. In the rpl file, the parameter name is followed by ONE tab (spaces are ignored) e.g.: 'data-length'<TAB>'2' In the rpl file, other data and more tabs can follow the two items on each row, and are ignored. Other keys and values can be included and are ignored. Any number of spaces can go along with each tab. """ if not rpl_info: if filename[-3:] in file_extensions: with codecs.open(filename, encoding = encoding, errors = 'replace') as f: rpl_info = parse_ripple(f) else: raise IOError, 'File has wrong extension: "%s"' % filename[-3:] for ext in ['raw', 'RAW']: rawfname = filename[:-3] + ext if os.path.exists(rawfname): break else: rawfname = '' if not rawfname: raise IOError, 'RAW file "%s" does not exists' % rawfname else: data = read_raw(rpl_info, rawfname, mmap_mode=mmap_mode) if rpl_info['record-by'] == 'vector': print 'Loading as spectrum' record_by = 'spectrum' elif rpl_info['record-by'] == 'image': print 'Loading as Image' record_by = 'image' else: if len(data.shape) == 1: print 'Loading as spectrum' record_by = 'spectrum' else: print 'Loading as image' record_by = 'image' if rpl_info['record-by'] == 'vector': idepth, iheight, iwidth = 2, 0, 1 names = ['height', 'width', 'depth', ] else: idepth, iheight, iwidth = 0, 1, 2 names = ['depth', 'height', 'width'] scales = [1, 1, 1] origins = [0, 0, 0] units = ['', '', ''] sizes = [rpl_info[names[i]] for i in xrange(3)] if 'signal' not in rpl_info: rpl_info['signal'] = "" if rpl_info.has_key('detector-peak-width-ev'): original_parameters['detector-peak-width-ev'] = \ rpl_info['detector-peak-width-ev'] if rpl_info.has_key('depth-scale'): scales[idepth] = rpl_info['depth-scale'] # ev-per-chan is the only calibration supported by the original ripple # format elif rpl_info.has_key('ev-per-chan'): scales[idepth] = rpl_info['ev-per-chan'] if rpl_info.has_key('depth-origin'): origins[idepth] = rpl_info['depth-origin'] if rpl_info.has_key('depth-units'): units[idepth] = rpl_info['depth-units'] if rpl_info.has_key('depth-name'): names[idepth] = rpl_info['depth-name'] if rpl_info.has_key('width-origin'): origins[iwidth] = rpl_info['width-origin'] if rpl_info.has_key('width-scale'): scales[iwidth] = rpl_info['width-scale'] if rpl_info.has_key('width-units'): units[iwidth] = rpl_info['width-units'] if rpl_info.has_key('width-name'): names[iwidth] = rpl_info['width-name'] if rpl_info.has_key('height-origin'): origins[iheight] = rpl_info['height-origin'] if rpl_info.has_key('height-scale'): scales[iheight] = rpl_info['height-scale'] if rpl_info.has_key('height-units'): units[iheight] = rpl_info['height-units'] if rpl_info.has_key('height-name'): names[iheight] = rpl_info['height-name'] mp = DictionaryBrowser({ 'record_by': record_by, 'original_filename': os.path.split(filename)[1], 'signal_type': rpl_info['signal'], }) if 'convergence-angle' in rpl_info: mp.set_item('TEM.convergence_angle', rpl_info['convergence-angle']) if 'collection-angle' in rpl_info: mp.set_item('TEM.EELS.collection_angle', rpl_info['collection-angle']) if 'beam-energy' in rpl_info: mp.set_item('TEM.beam_energy', rpl_info['beam-energy']) axes = [] index_in_array = 0 for i in xrange(3): if sizes[i] > 1: axes.append({ 'size' : sizes[i], 'index_in_array' : index_in_array , 'name' : names[i], 'scale': scales[i], 'offset' : origins[i], 'units' : units[i], }) index_in_array += 1 dictionary = { 'data': data.squeeze(), 'axes': axes, 'mapped_parameters': mp.as_dictionary(), 'original_parameters': rpl_info } return [dictionary, ]
def file_reader(filename, encoding='latin-1', **kwds): parameters = {} mapped = DictionaryBrowser({}) with codecs.open(filename, encoding=encoding, errors='replace') as spectrum_file: y = [] # Read the keywords data_section = False for line in spectrum_file.readlines(): if data_section is False: if line[0] == "#": try: key, value = line.split(': ') value = value.strip() except ValueError: key = line value = None key = key.strip('#').strip() if key != 'SPECTRUM': parameters[key] = value else: data_section = True else: # Read the data if line[0] != "#" and line.strip(): if parameters['DATATYPE'] == 'XY': xy = line.replace(',', ' ').strip().split() y.append(float(xy[1])) elif parameters['DATATYPE'] == 'Y': data = [ float(i) for i in line.replace(',', ' ').strip().split() ] y.extend(data) # We rewrite the format value to be sure that it complies with the # standard, because it will be used by the writer routine parameters['FORMAT'] = "EMSA/MAS Spectral Data File" # Convert the parameters to the right type and map some # TODO: the msa format seems to support specifying the units of some # parametes. We should add this feature here for parameter, value in parameters.iteritems(): # Some parameters names can contain the units information # e.g. #AZIMANGLE-dg: 90. if '-' in parameter: clean_par, units = parameter.split('-') clean_par, units = clean_par.strip(), units.strip() else: clean_par, units = parameter, None if clean_par in keywords: try: parameters[parameter] = keywords[clean_par]['dtype'](value) except: # Normally the offending mispelling is a space in the scientic # notation, e.g. 2.0 E-06, so we try to correct for it try: parameters[parameter] = keywords[clean_par]['dtype']( value.replace(' ', '')) except: print("The %s keyword value, %s " % (parameter, value) + "could not be converted to the right type") if keywords[clean_par]['mapped_to'] is not None: mapped.set_item(keywords[clean_par]['mapped_to'], parameters[parameter]) if units is not None: mapped.set_item( keywords[clean_par]['mapped_to'] + '_units', units) # The data parameter needs some extra care # It is necessary to change the locale to US english to read the date # keyword loc = locale.getlocale(locale.LC_TIME) # Setting locale can raise an exception because # their name depends on library versions, platform etc. try: if os_name == 'posix': locale.setlocale(locale.LC_TIME, ('en_US', 'utf8')) elif os_name == 'windows': locale.setlocale(locale.LC_TIME, 'english') try: H, M = time.strptime(parameters['TIME'], "%H:%M")[3:5] mapped['time'] = datetime.time(H, M) except: if 'TIME' in parameters and parameters['TIME']: print('The time information could not be retrieved') try: Y, M, D = time.strptime(parameters['DATE'], "%d-%b-%Y")[0:3] mapped['date'] = datetime.date(Y, M, D) except: if 'DATE' in parameters and parameters['DATE']: print('The date information could not be retrieved') except: warnings.warn("I couldn't write the date information due to" "an unexpected error. Please report this error to " "the developers") locale.setlocale(locale.LC_TIME, loc) # restore saved locale axes = [] axes.append({ 'size': len(y), 'index_in_array': 0, 'name': parameters['XLABEL'] if 'XLABEL' in parameters else '', 'scale': parameters['XPERCHAN'] if 'XPERCHAN' in parameters else 1, 'offset': parameters['OFFSET'] if 'OFFSET' in parameters else 0, 'units': parameters['XUNITS'] if 'XUNITS' in parameters else '', }) mapped['original_filename'] = filename mapped['record_by'] = 'spectrum' if mapped.has_item('signal_type'): if mapped.signal_type == 'ELS': mapped.signal_type = 'EELS' else: # Defaulting to EELS looks reasonable mapped.signal_type = 'EELS' dictionary = { 'data': np.array(y), 'axes': axes, 'mapped_parameters': mapped.as_dictionary(), 'original_parameters': parameters } return [ dictionary, ]
def file_reader(filename, rpl_info=None, encoding="latin-1", mmap_mode='c', *args, **kwds): """Parses a Lispix (http://www.nist.gov/lispix/) ripple (.rpl) file and reads the data from the corresponding raw (.raw) file; or, read a raw file if the dictionary rpl_info is provided. This format is often uses in EDS/EDX experiments. Images and spectral images or data cubes that are written in the (Lispix) raw file format are just a continuous string of numbers. Data cubes can be stored image by image, or spectrum by spectrum. Single images are stored row by row, vector cubes are stored row by row (each row spectrum by spectrum), image cubes are stored image by image. All of the numbers are in the same format, such as 16 bit signed integer, IEEE 8-byte real, 8-bit unsigned byte, etc. The "raw" file should be accompanied by text file with the same name and ".rpl" extension. This file lists the characteristics of the raw file so that it can be loaded without human intervention. Alternatively, dictionary 'rpl_info' containing the information can be given. Some keys are specific to Hyperspy and will be ignored by other software. RPL stands for "Raw Parameter List", an ASCII text, tab delimited file in which Hyperspy reads the image parameters for a raw file. TABLE OF RPL PARAMETERS key type description ---------- ------------ -------------------- # Mandatory keys: width int # pixels per row height int # number of rows depth int # number of images or spectral pts offset int # bytes to skip data-type str # 'signed', 'unsigned', or 'float' data-length str # bytes per pixel '1', '2', '4', or '8' byte-order str # 'big-endian', 'little-endian', or 'dont-care' record-by str # 'image', 'vector', or 'dont-care' # X-ray keys: ev-per-chan int # optional, eV per channel detector-peak-width-ev int # optional, FWHM for the Mn K-alpha line # Hyperspy-specific keys depth-origin int # energy offset in pixels depth-scale float # energy scaling (units per pixel) depth-units str # energy units, usually eV depth-name str # Name of the magnitude stored as depth width-origin int # column offset in pixels width-scale float # column scaling (units per pixel) width-units str # column units, usually nm width-name str # Name of the magnitude stored as width height-origin int # row offset in pixels height-scale float # row scaling (units per pixel) height-units str # row units, usually nm height-name str # Name of the magnitude stored as height signal str # Name of the signal stored, e.g. HAADF convergence-angle float # TEM convergence angle in mrad collection-angle float # EELS spectrometer collection angle in mrad beam-energy float # TEM beam energy in keV elevation-angle float # Elevation angle of the EDS detector azimuth-angle float # Elevation angle of the EDS detector live-time float # Live time per spectrum energy-resolution float # Resolution of the EDS (FHWM of MnKa) tilt-stage float # The tilt of the stage NOTES When 'data-length' is 1, the 'byte order' is not relevant as there is only one byte per datum, and 'byte-order' should be 'dont-care'. When 'depth' is 1, the file has one image, 'record-by' is not relevant and should be 'dont-care'. For spectral images, 'record-by' is 'vector'. For stacks of images, 'record-by' is 'image'. Floating point numbers can be IEEE 4-byte, or IEEE 8-byte. Therefore if data-type is float, data-length MUST be 4 or 8. The rpl file is read in a case-insensitive manner. However, when providing a dictionary as input, the keys MUST be lowercase. Comment lines, beginning with a semi-colon ';' are allowed anywhere. The first non-comment in the rpl file line MUST have two column names: 'name_1'<TAB>'name_2'; any name would do e.g. 'key'<TAB>'value'. Parameters can be in ANY order. In the rpl file, the parameter name is followed by ONE tab (spaces are ignored) e.g.: 'data-length'<TAB>'2' In the rpl file, other data and more tabs can follow the two items on each row, and are ignored. Other keys and values can be included and are ignored. Any number of spaces can go along with each tab. """ if not rpl_info: if filename[-3:] in file_extensions: with codecs.open(filename, encoding=encoding, errors='replace') as f: rpl_info = parse_ripple(f) else: raise IOError, 'File has wrong extension: "%s"' % filename[-3:] for ext in ['raw', 'RAW']: rawfname = filename[:-3] + ext if os.path.exists(rawfname): break else: rawfname = '' if not rawfname: raise IOError, 'RAW file "%s" does not exists' % rawfname else: data = read_raw(rpl_info, rawfname, mmap_mode=mmap_mode) if rpl_info['record-by'] == 'vector': print 'Loading as spectrum' record_by = 'spectrum' elif rpl_info['record-by'] == 'image': print 'Loading as Image' record_by = 'image' else: if len(data.shape) == 1: print 'Loading as spectrum' record_by = 'spectrum' else: print 'Loading as image' record_by = 'image' if rpl_info['record-by'] == 'vector': idepth, iheight, iwidth = 2, 0, 1 names = [ 'height', 'width', 'depth', ] else: idepth, iheight, iwidth = 0, 1, 2 names = ['depth', 'height', 'width'] scales = [1, 1, 1] origins = [0, 0, 0] units = ['', '', ''] sizes = [rpl_info[names[i]] for i in xrange(3)] if 'signal' not in rpl_info: rpl_info['signal'] = "" if rpl_info.has_key('detector-peak-width-ev'): original_parameters['detector-peak-width-ev'] = \ rpl_info['detector-peak-width-ev'] if rpl_info.has_key('depth-scale'): scales[idepth] = rpl_info['depth-scale'] # ev-per-chan is the only calibration supported by the original ripple # format elif rpl_info.has_key('ev-per-chan'): scales[idepth] = rpl_info['ev-per-chan'] if rpl_info.has_key('depth-origin'): origins[idepth] = rpl_info['depth-origin'] if rpl_info.has_key('depth-units'): units[idepth] = rpl_info['depth-units'] if rpl_info.has_key('depth-name'): names[idepth] = rpl_info['depth-name'] if rpl_info.has_key('width-origin'): origins[iwidth] = rpl_info['width-origin'] if rpl_info.has_key('width-scale'): scales[iwidth] = rpl_info['width-scale'] if rpl_info.has_key('width-units'): units[iwidth] = rpl_info['width-units'] if rpl_info.has_key('width-name'): names[iwidth] = rpl_info['width-name'] if rpl_info.has_key('height-origin'): origins[iheight] = rpl_info['height-origin'] if rpl_info.has_key('height-scale'): scales[iheight] = rpl_info['height-scale'] if rpl_info.has_key('height-units'): units[iheight] = rpl_info['height-units'] if rpl_info.has_key('height-name'): names[iheight] = rpl_info['height-name'] mp = DictionaryBrowser({ 'record_by': record_by, 'original_filename': os.path.split(filename)[1], 'signal_type': rpl_info['signal'], }) if 'convergence-angle' in rpl_info: mp.set_item('TEM.convergence_angle', rpl_info['convergence-angle']) if 'tilt-stage' in rpl_info: mp.set_item('TEM.tilt_stage', rpl_info['tilt-stage']) if 'collection-angle' in rpl_info: mp.set_item('TEM.EELS.collection_angle', rpl_info['collection-angle']) if 'beam-energy' in rpl_info: mp.set_item('TEM.beam_energy', rpl_info['beam-energy']) if 'elevation-angle' in rpl_info: mp.set_item('TEM.EDS.elevation_angle', rpl_info['elevation-angle']) if 'azimuth-angle' in rpl_info: mp.set_item('TEM.EDS.azimuth_angle', rpl_info['azimuth-angle']) if 'energy-resolution' in rpl_info: mp.set_item('TEM.EDS.energy_resolution_MnKa', rpl_info['energy-resolution']) if 'live-time' in rpl_info: mp.set_item('TEM.EDS.live_time', rpl_info['live-time']) axes = [] index_in_array = 0 for i in xrange(3): if sizes[i] > 1: axes.append({ 'size': sizes[i], 'index_in_array': index_in_array, 'name': names[i], 'scale': scales[i], 'offset': origins[i], 'units': units[i], }) index_in_array += 1 dictionary = { 'data': data.squeeze(), 'axes': axes, 'mapped_parameters': mp.as_dictionary(), 'original_parameters': rpl_info } return [ dictionary, ]