def __init__(self, file, header_fields=None): """Initialize a new instance. Parameters ---------- file : file-like or str Stream or filename from which to read the data. The stream is allowed to be already opened in ``'rb'`` mode. header_fields : sequence of dicts, optional Definition of the fields in the header (per row), each containing key-value pairs for the following keys: - ``'name'`` : Label for the field. - ``'offset'`` : Start of the field in bytes. - ``'size'`` : Size of the field in bytes. - ``'dtype'`` : Data type in Numpy- or Numpy-readable format. - ``'dshape'`` (optional) : The array of values is reshaped to this shape. - ``'description'`` (optional) : A human-readable description of the field. For the default ``None``, the MRC2014 format is used, see `print_mrc2014_spec`. """ if header_fields is None: header_fields = header_fields_from_table( spec_table=MRC_2014_SPEC_TABLE, keys=MRC_SPEC_KEYS, dtype_map=MRC_DTYPE_TO_NPY_DTYPE) # `MRCHeaderProperties` has no `__init__`, so this calls # `FileReaderRawBinaryWithHeader.__init__` super(FileReaderMRC, self).__init__(file, header_fields)
def mrc_header_from_params(shape, dtype, kind, **kwargs): """Create a minimal MRC2014 header from the given parameters. Parameters ---------- shape : 3-sequence of ints 3D shape of the stored data. The values are used as ``'nx', 'ny', 'nz'`` header entries, in this order. Note that this is different from the actual data storage shape for non-trivial ``axis_order``. dtype : {'int8', 'int16', 'float32', 'uint16'} Data type specifier as understood by `numpy.dtype`. It is translated to a ``'mode'`` header entry. See `this page <http://www.ccpem.ac.uk/mrc_format/mrc2014.php>`_ for valid modes. kind : {'volume', 'projections'} Interpretation of the 3D data, either as single 3D volume or as a stack of 2D projections. The value is used for the ``'ispg'`` header entry. extent : 3-sequence of floats, optional Size of the 3D volume in meters. The values are used for the ``'cella'`` header entry. Default: ``shape``, resulting in ``(1, 1, 1)`` unit cells axis_order : permutation of ``(0, 1, 2)`` optional Order of the data axes as they should appear in the stored file. The values are used for the ``'mapc', 'mapr', 'maps'`` header entries. Default: ``(0, 1, 2)`` dmin, dmax : float, optional Minimum and maximum values of the data, used for header entries ``'dmin'`` and ``'dmax'``, resp. Default: 1.0, 0.0. These values indicate according to [Che+2015] that the values are considered as undetermined. dmean, rms : float, optional Mean and variance of the data, used for header entries ``'dmean'`` and ``'rms'``, resp. Default: ``min(dmin, dmax) - 1, -1.0``. These values indicate according to [Che+2015] that the values are considered as undetermined. mrc_version : 2-tuple of int, optional Version identifier for the MRC file, used for the ``'nversion'`` header entry. Default: ``(2014, 0)`` text_labels : sequence of strings, optional Maximal 10 strings with 80 characters each, used for the ``'nlabl'`` and ``'label'`` header entries. Default: ``[]`` Returns ------- header : `OrderedDict` Header stored in an ordered dictionary, where each entry has the following form:: 'name': {'value': value_as_array, 'offset': offset_in_bytes 'description': description_string} All ``'value'``'s are `numpy.ndarray`'s with at least one dimension. References ---------- [Che+2015] Cheng, A et al. *MRC2014: Extensions to the MRC format header for electron cryo-microscopy and tomography*. Journal of Structural Biology, 129 (2015), pp 146--150. """ # Positional args shape = [int(n) for n in shape] kind, kind_in = str(kind).lower(), kind if kind not in ('volume', 'projections'): raise ValueError("`kind '{}' not understood".format(kind_in)) # Keyword args extent = kwargs.pop('extent', shape) axis_order = kwargs.pop('axis_order', (0, 1, 2)) if tuple(axis_order) not in permutations((0, 1, 2)): raise ValueError('`axis_order` must be a permutation of (0, 1, 2), ' 'got {}'.format(axis_order)) dmin = kwargs.pop('dmin', 1.0) dmax = kwargs.pop('dmax', 0.0) dmean = kwargs.pop('dmean', min(dmin, dmax) - 1.0) rms = kwargs.pop('rms', -1.0) mrc_version = kwargs.pop('mrc_version', (2014, 0)) if len(mrc_version) != 2: raise ValueError('`mrc_version` must be a sequence of length 2, got ' '{}'.format(mrc_version)) # Text labels: fill each label up with whitespace to 80 characters. # Create the remaining labels as 80 * '\x00' text_labels_in = kwargs.pop('text_labels', []) nlabl = len(text_labels_in) if nlabl > 10: raise ValueError('expexted maximum of 10 labels, got {} labels' ''.format(nlabl)) text_labels = [str(label).ljust(80) for label in text_labels_in] if any(len(label) > 80 for label in text_labels): raise ValueError('labels cannot have more than 80 characters each') # Convert to header-friendly form. Names are required to match # exactly the header field names, and all of them must exist, # so that `eval` below succeeds for all fields. nx, ny, nz = [np.array(n, dtype='int32').reshape([1]) for n in shape] mode = np.array(NPY_DTYPE_TO_MRC_MODE[np.dtype(dtype)], dtype='int32').reshape([1]) mx, my, mz = nx, ny, nz cella = np.array(extent).reshape([3]).astype('float32') mapc, mapr, maps = [ np.array(m, dtype='int32').reshape([1]) + 1 for m in axis_order ] dmin, dmax, dmean, rms = [ np.array(x, dtype='float32').reshape([1]) for x in (dmin, dmax, dmean, rms) ] ispg = 1 if kind == 'volume' else 0 ispg = np.array(ispg, dtype='int32', ndmin=1) nsymbt = np.array([0], dtype='int32') exttype = np.fromstring(' ', dtype='S1') nversion = np.array(10 * mrc_version[0] + mrc_version[1], dtype='int32').reshape([1]) origin = np.zeros(3, dtype='int32') map = np.fromstring('MAP ', dtype='S1') # TODO: no idea how to properly choose the machine stamp machst = np.fromiter(b'DD ', dtype='S1') nlabl = np.array(nlabl, dtype='int32').reshape([1]) label = np.zeros((10, 80), dtype='S1') # ensure correct size for i, label_i in enumerate(text_labels): label[i] = np.fromstring(label_i, dtype='S1') # Make the header # We use again the specification to set the values header_fields = header_fields_from_table(MRC_2014_SPEC_TABLE, MRC_SPEC_KEYS, MRC_DTYPE_TO_NPY_DTYPE) header = OrderedDict() for field in header_fields: header[field['name']] = { 'offset': field['offset'], 'value': eval(field['name']) } return header
def read_extended_header(self, groupby='field', force_type=''): """Read the extended header according to `extended_header_type`. Currently, only the FEI extended header format is supported. See `print_fei_ext_header_spec` or `this homepage`_ for the format specification. The extended header usually has one header section per image (slice), in case of the FEI header 128 bytes each, with a total of 1024 sections. Parameters ---------- groupby : {'field', 'section'}, optional How to group the values in the extended header sections. ``'field'`` : make an array per section field, e.g.:: 'defocus': [dval1, dval2, ..., dval1024], 'exp_time': [tval1, tval2, ..., tval1024], ... ``'section'`` : make a dictionary for each section, e.g.:: {'defocus': dval1, 'exp_time': tval1}, {'defocus': dval2, 'exp_time': tval2}, ... If the number of images is smaller than 1024, the last values are all set to zero. force_type : string, optional If given, this value overrides the `extended_header_type` from `header`. Currently supported: ``'FEI1'`` Returns ------- ext_header: `OrderedDict` or tuple For ``groupby == 'field'``, a dictionary with the field names as keys, like in the example. For ``groupby == 'section'``, a tuple of dictionaries as shown above. The returned data structures store no offsets, in contrast to the regular header. See Also -------- References ---------- .. _this homepage: http://www.2dx.unibas.ch/documentation/mrc-software/fei-\ extended-mrc-format-not-used-by-2dx """ ext_header_type = str(force_type).upper() or self.extended_header_type if ext_header_type != 'FEI1': raise ValueError("extended header type '{}' not supported" "".format(self.extended_header_type)) groupby, groupby_in = str(groupby).lower(), groupby ext_header_len = int(self.header['nsymbt']['value']) if ext_header_len % MRC_FEI_SECTION_SIZE: raise ValueError('extended header length {} from header is ' 'not divisible by extended header section size ' '{}'.format(ext_header_len, MRC_FEI_SECTION_SIZE)) num_sections = ext_header_len // MRC_FEI_SECTION_SIZE if num_sections != MRC_FEI_NUM_SECTIONS: raise ValueError('calculated number of sections ({}) not equal to ' 'expected number of sections ({})' ''.format(num_sections, MRC_FEI_NUM_SECTIONS)) section_fields = header_fields_from_table( MRC_FEI_EXT_HEADER_SECTION, keys=MRC_SPEC_KEYS, dtype_map=MRC_DTYPE_TO_NPY_DTYPE) # Make a list for each field and append the values for that # field. Then create an array from that list and store it # under the field name. ext_header = OrderedDict() for field in section_fields: value_list = [] field_offset = field['offset'] field_dtype = field['dtype'] field_dshape = field['dshape'] # Compute some parameters num_items = int(np.prod(field_dshape)) size_bytes = num_items * field_dtype.itemsize fmt = '{}{}'.format(num_items, field_dtype.char) for section in range(num_sections): # Get the bytestring from the right position in the file, # unpack it and append the value to the list. start = section * MRC_FEI_SECTION_SIZE + field_offset self.file.seek(start) packed_value = self.file.read(size_bytes) value_list.append(struct.unpack(fmt, packed_value)) ext_header[field['name']] = np.array(value_list, dtype=field_dtype) if groupby == 'field': return ext_header elif groupby == 'section': # Transpose the data and return as tuple. return tuple({key: ext_header[key][i] for key in ext_header} for i in range(num_sections)) else: raise ValueError("`groupby` '{}' not understood" "".format(groupby_in))