def _check_slice_size(self, slicer): if not self._raise_on_big_slice: return shape = self.data_buffer.get_output_array(slicer, only_shape=True) size = np.prod(shape) * self.data_buffer.dtype.itemsize state = self._allow_big_slicing if size > load_params().memory_limit and not state: raise MemoryBlowOutError('A read with shape {} will be *very* large. Use the big_slices context to ' 'proceed'.format(shape))
def __init__(self, array, units_scale=None, real_slices=False, raise_bad_write=True): """ Parameters ---------- array: file-mapped array A memory-mapped array supporting slice syntax. This class assumes numpy.memmap. If using HDF5, construct with `HDF5Buffer`. units_scale: float or 2-tuple Either the scaling value or (offset, scaling) values such that signal = (memmap + offset) * scaling """ super(MappedBuffer, self).__init__() self._array = array # Anything other than 'r' indicates some kind of write access if hasattr(array, 'mode'): mode = array.mode self.__writeable = mode != 'r' elif hasattr(array, 'file'): mode = array.file.mode self.__writeable = mode != 'r' else: print('Unknown array type -- assuming not writeable') self.__writeable = False self._current_slice = None self._current_seg = () self._raw_offset = None self.units_scale = None if units_scale is not None: # Ignore the (0, 1) scaling to save cycles if np.iterable(units_scale): self._raw_offset = units_scale[ 0] if units_scale[0] != 0 else None self.units_scale = float(units_scale[1]) else: self.units_scale = float(units_scale) # Can set back to None if real slices aren't needed if self.units_scale == 1 and not real_slices: self.units_scale = None if self.units_scale is None: self.dtype = array.dtype else: fp_precision = load_params().floating_point.lower() typecode = 'f' if fp_precision == 'single' else 'd' self.dtype = np.dtype(typecode) self.map_dtype = array.dtype self.shape = array.shape self._raise_bad_write = raise_bad_write
def to_map(self): """Creates a temp HDF5 file and returns a MappedSource for this datasource.""" from .memmap import TempFilePool, MappedSource with TempFilePool(mode='ab') as tf: filename = str(tf) fp_precision = load_params().floating_point.lower() typecode = 'f' if fp_precision == 'single' else 'd' hdf = h5py.File(filename, 'w', libver='latest') tf.register_to_close(hdf) hdf.create_dataset('data', data=self.data_buffer.astype(typecode), chunks=True) for name in self.aligned_arrays: hdf.create_dataset(name, data=getattr(self, name).astype(typecode), chunks=True) return MappedSource.from_hdf_sources( hdf, 'data', aligned_arrays=self.aligned_arrays)
def _auto_block_length(self): # a bit hacky now... if isinstance(self.data_buffer, HDF5Buffer): chunks = self.data_buffer.chunks else: chunks = self.data_buffer.chunks[0] # This is the block size recommended by chunking block_size = chunk_size = chunks[0] if self._transpose else chunks[1] # If memory supports, then increase the block size to a multiple of the chunk size mem = load_params().memory_limit block_rows = self.shape[1] if self._transpose else self.shape[0] num_blocks = mem // (block_rows * block_size * self.dtype.itemsize) num_blocks = min(num_blocks, 100) if num_blocks < 1: warnings.warn('Memory limit is too low to support minimum block size', RuntimeWarning) else: block_size = num_blocks * chunk_size # Don't return a block size that's longer than the long axis block_size = min(block_size, (self.shape[0] if self._transpose else self.shape[1])) return block_size
def iter_channels(self, chans_per_block=None, use_max_memory=False, return_slice=False, **kwargs): """ Yield data channels. Parameters ---------- chans_per_block: int Number of channels per iteration. The default value is either 16 or based on memory limit if use_max_memory=True. use_max_memory: bool Set the number of channels based on the "memory_limit" config value. return_slice: bool If True return the ndarray block followed by the array slice to yield this block. Helpful for pairing the yielded blocks with the same position in a follower array, or writing back transformed data to this datasource (if writeable). kwargs: dict Arguments for ElectrodeDataSource.cache_slice """ C, T = self.shape if chans_per_block is None: if use_max_memory: max_memory = load_params()['memory_limit'] if isinstance( use_max_memory, bool) else use_max_memory if self._transpose: # compensate for the necessary copy-to-transpose max_memory /= 2 bytes_per_samp = self.dtype.itemsize chans_per_block = max(1, int(max_memory / T / bytes_per_samp)) else: chans_per_block = 16 chans_per_block = min(chans_per_block, C) return DataSourceBlockIter(self, axis=0, block_length=chans_per_block, return_slice=return_slice, **kwargs)
def hdf5_open_ephys_channels(exp_path, test, hdf5_name, rec_num='auto', quantized=False, data_chans='all', downsamp=1, load_chans=None): """Load HDF5-mapped arrays of the full band timeseries. This option provides a way to load massive multi-channel datasets sampled at 20 kS/s. Down-sampling is supported. """ downsamp = int(downsamp) if downsamp > 1: quantized = False rec_path, rec_num = prepare_paths(exp_path, test, rec_num) chan_names = OE.get_filelist(rec_path, ctype='CH', channels=data_chans, source=rec_num[0]) n_chan = len(chan_names) if not n_chan: raise IOError('no channels found') from ecogdata.expconfig import params # load channel at a time to be able to downsample bytes_per_channel = OE.get_channel_bytes( os.path.join(rec_path, chan_names[0])) if not quantized: bytes_per_channel *= 4 if load_chans is None: load_chans = int(float(params.memory_limit) // (2 * bytes_per_channel)) # get test channel for shape info ch_record = OE.loadContinuous(os.path.join(rec_path, chan_names[0]), dtype=np.int16, verbose=False) ch_data = ch_record['data'] header = OE.get_header_from_folder(rec_path, source=rec_num[0]) trueFs = get_robust_samplingrate(rec_path) if trueFs is None: trueFs = header['sampleRate'] if downsamp > 1: d_len = ch_data[..., ::downsamp].shape[-1] else: d_len = ch_data.shape[-1] if quantized: arr_dtype = 'h' else: arr_dtype = 'f' if load_params().floating_point == 'single' else 'd' def _proc_block(block, antialias=True): if not quantized: block = block * ch_record['header']['bitVolts'] if downsamp > 1: if antialias: block, _ = downsample(block, trueFs, r=downsamp) else: block = block[:, ::downsamp] return block with h5py.File(hdf5_name, 'w', libver='latest') as h5: h5.create_dataset('Fs', data=trueFs / downsamp) chans = h5.create_dataset('chdata', dtype=arr_dtype, chunks=True, shape=(n_chan, d_len)) # Pack in channel data chans[0] = _proc_block(ch_data) start_chan = 1 while True: stop_chan = min(len(chan_names), start_chan + load_chans) print('load chan', start_chan, 'to', stop_chan) ch_data = OE.loadFolderToTransArray(rec_path, dtype=np.int16, verbose=False, start_chan=start_chan, stop_chan=stop_chan, ctype='CH', channels=data_chans, source=rec_num[0]) chans[start_chan:stop_chan] = _proc_block(ch_data) start_chan += load_chans if start_chan >= len(chan_names): break for arr in ('ADC', 'AUX'): n_extra = len(OE.get_filelist(rec_path, ctype=arr, source=rec_num[0])) if not n_extra: continue with h5py.File(hdf5_name, 'r+', libver='latest') as h5: chans = h5.create_dataset(arr.lower(), dtype=arr_dtype, chunks=True, shape=(n_extra, d_len)) start_chan = 0 while True: stop_chan = min(n_extra, start_chan + load_chans) ch_data = OE.loadFolderToTransArray(rec_path, dtype=np.int16, verbose=False, source=rec_num[0], start_chan=start_chan, stop_chan=stop_chan, ctype=arr) chans[start_chan:stop_chan] = _proc_block(ch_data, antialias=False) start_chan += load_chans if start_chan >= n_extra: break
def mirror(self, new_rate_ratio=None, writeable=True, mapped=True, channel_compatible=False, filename='', copy='', new_sources=dict(), **map_args): # TODO: channel order permutation in mirrored source """ Create an empty ElectrodeDataSource based on the current source, possibly with a new sampling rate and new access permissions. Parameters ---------- new_rate_ratio: int or None Ratio of old to new sample rate for the mirrored array (> 1). writeable: bool Make any new MappedSource arrays writeable. This implies 1) datatype casting to floats, and 2) there is no more units conversion on the primary array. mapped: bool If False, mirror to a PlainArraySource (in memory). Else mirror into a new MappedSource. channel_compatible: bool If True, preserve the same number of raw data channels in a MappedSource. Otherwise, reduce the channels to just the set of active channels. Currently not supported for non-mapped mirrors. filename: str Name of the new MappedSource. If empty, use a self-destroying temporary file. copy: str Code whether to copy any arrays, which is only valid when new_rate_ratio is None or 1. 'aligned' copies aligned arrays. 'electrode' copies electrode data: only valid if channel_compatible is False. 'all' copies all arrays. By default, nothing is copied. new_sources: dict If mapped=False, then pre-allocated arrays can be provided for each source (i.e. 'data_buffer' and any aligned arrays). map_args: dict Any other MappedSource arguments Returns ------- datasource: ElectrodeDataSource subtype """ T = self.shape[1] if new_rate_ratio: T = calc_new_samples(T, new_rate_ratio) # if casting to floating point, check for preferred precision fp_precision = load_params().floating_point.lower() fp_dtype = 'f' if fp_precision == 'single' else 'd' fp_dtype = np.dtype(fp_dtype) # unpack copy mode copy_electrodes_coded = copy.lower() in ('all', 'electrode') copy_aligned_coded = copy.lower() in ('all', 'aligned') diff_rate = T != self.shape[1] if copy_electrodes_coded: if diff_rate or channel_compatible: copy_electrodes = False print('Not copying electrode channels. Diff rate ' '({}) or indirect channel map ({})'.format(diff_rate, channel_compatible)) else: copy_electrodes = True else: copy_electrodes = False if copy_aligned_coded: if diff_rate: copy_aligned = False print('Not copying aligned arrays: different sample rate') else: copy_aligned = True else: copy_aligned = False if mapped: if channel_compatible: electrode_channels = self._electrode_channels C = self.data_buffer.shape[1] if self._transpose else self.data_buffer.shape[0] channel_mask = self.binary_channel_mask else: C = self.shape[0] electrode_channels = None channel_mask = None if writeable: new_dtype = fp_dtype reopen_mode = 'r+' units_scale = None else: new_dtype = self.data_buffer.map_dtype reopen_mode = 'r' units_scale = self.data_buffer.units_scale tempfile = not filename if tempfile: with TempFilePool(mode='ab') as f: # punt on the unlink-on-close issue for now with "delete=False" # f.file.close() filename = f # open as string just in case its a TempFilePool # TODO: (need to figure out how to make it work seamless as a string) with h5py.File(str(filename), 'w', libver='latest') as fw: # Create all new datasets as non-transposed fw.create_dataset(self._electrode_field, shape=(C, T), dtype=new_dtype, chunks=True) if copy_electrodes: for block, sl in self.iter_blocks(return_slice=True, sharedmem=False): fw[self._electrode_field][sl] = block for name in self.aligned_arrays: arr = getattr(self, name) if len(arr.shape) > 1: dims = (arr.shape[1], T) if self._transpose else (arr.shape[0], T) # is this correct ??? dtype = fp_dtype if writeable else arr.dtype fw.create_dataset(name, shape=dims, dtype=dtype, chunks=True) if copy_aligned: aligned = getattr(self, name)[:] fw[name][:] = aligned.T if self._transpose else aligned # set single write, multiple read mode AFTER datasets are created # Skip for now -- SWMR issue pending # fw.swmr_mode = True print(str(filename), 'reopen mode', reopen_mode) f_mapped = h5py.File(str(filename), reopen_mode, libver='latest') # Skip for now -- SWMR issue pending # if writeable: # f_mapped.swmr_mode = True if isinstance(filename, TempFilePool): filename.register_to_close(f_mapped) # If the original buffer's unit scaling is 1.0 then we should keep real slices on real_slices = units_scale == 1.0 return MappedSource.from_hdf_sources(f_mapped, self._electrode_field, units_scale=units_scale, real_slices=real_slices, aligned_arrays=self.aligned_arrays, transpose=False, electrode_channels=electrode_channels, channel_mask=channel_mask, **map_args) # return MappedSource(f_mapped, self._electrode_field, electrode_channels=electrode_channels, # channel_mask=channel_mask, aligned_arrays=self.aligned_arrays, # transpose=False, **map_args) else: if channel_compatible: print('RAM mirrors are not channel compatible--use mapped=True') self._check_slice_size(np.s_[:, :T]) C = self.shape[0] if 'data_buffer' in new_sources: new_source = new_sources['data_buffer'] else: new_source = shm.shared_ndarray((C, T), fp_dtype.char) if copy_electrodes: for block, sl in self.iter_blocks(return_slice=True, sharedmem=False): new_source[sl] = block # Kind of tricky with aligned fields -- assume that transpose means the same thing for them? # But also un-transpose them on this mirroring step aligned_arrays = dict() for name in self.aligned_arrays: arr = getattr(self, name) if len(arr.shape) > 1: dims = (arr.shape[1], T) if self._transpose else (arr.shape[0], T) else: dims = (T,) if name in new_sources: aligned_arrays[name] = new_sources[name] else: aligned_arrays[name] = shm.shared_ndarray(dims, fp_dtype.char) if copy_aligned: aligned = getattr(self, name)[:] aligned_arrays[name][:] = aligned.T if self._transpose else aligned return PlainArraySource(new_source, **aligned_arrays)