Exemplo n.º 1
0
 def _check_slice_size(self, slicer):
     if not self._raise_on_big_slice:
         return
     shape = self.data_buffer.get_output_array(slicer, only_shape=True)
     size = np.prod(shape) * self.data_buffer.dtype.itemsize
     state = self._allow_big_slicing
     if size > load_params().memory_limit and not state:
         raise MemoryBlowOutError('A read with shape {} will be *very* large. Use the big_slices context to '
                                  'proceed'.format(shape))
Exemplo n.º 2
0
    def __init__(self,
                 array,
                 units_scale=None,
                 real_slices=False,
                 raise_bad_write=True):
        """

        Parameters
        ----------
        array: file-mapped array
            A memory-mapped array supporting slice syntax. This class assumes numpy.memmap. If using HDF5,
            construct with `HDF5Buffer`.
        units_scale: float or 2-tuple
            Either the scaling value or (offset, scaling) values such that signal = (memmap + offset) * scaling
        """
        super(MappedBuffer, self).__init__()
        self._array = array
        # Anything other than 'r' indicates some kind of write access
        if hasattr(array, 'mode'):
            mode = array.mode
            self.__writeable = mode != 'r'
        elif hasattr(array, 'file'):
            mode = array.file.mode
            self.__writeable = mode != 'r'
        else:
            print('Unknown array type -- assuming not writeable')
            self.__writeable = False
        self._current_slice = None
        self._current_seg = ()
        self._raw_offset = None
        self.units_scale = None
        if units_scale is not None:
            # Ignore the (0, 1) scaling to save cycles
            if np.iterable(units_scale):
                self._raw_offset = units_scale[
                    0] if units_scale[0] != 0 else None
                self.units_scale = float(units_scale[1])
            else:
                self.units_scale = float(units_scale)
            # Can set back to None if real slices aren't needed
            if self.units_scale == 1 and not real_slices:
                self.units_scale = None
        if self.units_scale is None:
            self.dtype = array.dtype
        else:
            fp_precision = load_params().floating_point.lower()
            typecode = 'f' if fp_precision == 'single' else 'd'
            self.dtype = np.dtype(typecode)
        self.map_dtype = array.dtype
        self.shape = array.shape
        self._raise_bad_write = raise_bad_write
Exemplo n.º 3
0
 def to_map(self):
     """Creates a temp HDF5 file and returns a MappedSource for this datasource."""
     from .memmap import TempFilePool, MappedSource
     with TempFilePool(mode='ab') as tf:
         filename = str(tf)
     fp_precision = load_params().floating_point.lower()
     typecode = 'f' if fp_precision == 'single' else 'd'
     hdf = h5py.File(filename, 'w', libver='latest')
     tf.register_to_close(hdf)
     hdf.create_dataset('data',
                        data=self.data_buffer.astype(typecode),
                        chunks=True)
     for name in self.aligned_arrays:
         hdf.create_dataset(name,
                            data=getattr(self, name).astype(typecode),
                            chunks=True)
     return MappedSource.from_hdf_sources(
         hdf, 'data', aligned_arrays=self.aligned_arrays)
Exemplo n.º 4
0
 def _auto_block_length(self):
     # a bit hacky now...
     if isinstance(self.data_buffer, HDF5Buffer):
         chunks = self.data_buffer.chunks
     else:
         chunks = self.data_buffer.chunks[0]
     # This is the block size recommended by chunking
     block_size = chunk_size = chunks[0] if self._transpose else chunks[1]
     # If memory supports, then increase the block size to a multiple of the chunk size
     mem = load_params().memory_limit
     block_rows = self.shape[1] if self._transpose else self.shape[0]
     num_blocks = mem // (block_rows * block_size * self.dtype.itemsize)
     num_blocks = min(num_blocks, 100)
     if num_blocks < 1:
         warnings.warn('Memory limit is too low to support minimum block size', RuntimeWarning)
     else:
         block_size = num_blocks * chunk_size
     # Don't return a block size that's longer than the long axis
     block_size = min(block_size, (self.shape[0] if self._transpose else self.shape[1]))
     return block_size
Exemplo n.º 5
0
    def iter_channels(self,
                      chans_per_block=None,
                      use_max_memory=False,
                      return_slice=False,
                      **kwargs):
        """
        Yield data channels.

        Parameters
        ----------
        chans_per_block: int
            Number of channels per iteration. The default value is either 16 or based on memory limit if 
            use_max_memory=True.
        use_max_memory: bool
            Set the number of channels based on the "memory_limit" config value.
        return_slice: bool
            If True return the ndarray block followed by the array slice to yield this block. Helpful for
            pairing the yielded blocks with the same position in a follower array, or writing back transformed data
            to this datasource (if writeable).
        kwargs: dict
            Arguments for ElectrodeDataSource.cache_slice

        """
        C, T = self.shape
        if chans_per_block is None:
            if use_max_memory:
                max_memory = load_params()['memory_limit'] if isinstance(
                    use_max_memory, bool) else use_max_memory
                if self._transpose:
                    # compensate for the necessary copy-to-transpose
                    max_memory /= 2
                bytes_per_samp = self.dtype.itemsize
                chans_per_block = max(1, int(max_memory / T / bytes_per_samp))
            else:
                chans_per_block = 16
        chans_per_block = min(chans_per_block, C)
        return DataSourceBlockIter(self,
                                   axis=0,
                                   block_length=chans_per_block,
                                   return_slice=return_slice,
                                   **kwargs)
Exemplo n.º 6
0
def hdf5_open_ephys_channels(exp_path,
                             test,
                             hdf5_name,
                             rec_num='auto',
                             quantized=False,
                             data_chans='all',
                             downsamp=1,
                             load_chans=None):
    """Load HDF5-mapped arrays of the full band timeseries.

    This option provides a way to load massive multi-channel datasets
    sampled at 20 kS/s. Down-sampling is supported.

    """

    downsamp = int(downsamp)
    if downsamp > 1:
        quantized = False

    rec_path, rec_num = prepare_paths(exp_path, test, rec_num)

    chan_names = OE.get_filelist(rec_path,
                                 ctype='CH',
                                 channels=data_chans,
                                 source=rec_num[0])
    n_chan = len(chan_names)
    if not n_chan:
        raise IOError('no channels found')
    from ecogdata.expconfig import params
    # load channel at a time to be able to downsample
    bytes_per_channel = OE.get_channel_bytes(
        os.path.join(rec_path, chan_names[0]))
    if not quantized:
        bytes_per_channel *= 4

    if load_chans is None:
        load_chans = int(float(params.memory_limit) // (2 * bytes_per_channel))
    # get test channel for shape info
    ch_record = OE.loadContinuous(os.path.join(rec_path, chan_names[0]),
                                  dtype=np.int16,
                                  verbose=False)
    ch_data = ch_record['data']
    header = OE.get_header_from_folder(rec_path, source=rec_num[0])
    trueFs = get_robust_samplingrate(rec_path)
    if trueFs is None:
        trueFs = header['sampleRate']

    if downsamp > 1:
        d_len = ch_data[..., ::downsamp].shape[-1]
    else:
        d_len = ch_data.shape[-1]
    if quantized:
        arr_dtype = 'h'
    else:
        arr_dtype = 'f' if load_params().floating_point == 'single' else 'd'

    def _proc_block(block, antialias=True):
        if not quantized:
            block = block * ch_record['header']['bitVolts']
        if downsamp > 1:
            if antialias:
                block, _ = downsample(block, trueFs, r=downsamp)
            else:
                block = block[:, ::downsamp]
        return block

    with h5py.File(hdf5_name, 'w', libver='latest') as h5:
        h5.create_dataset('Fs', data=trueFs / downsamp)
        chans = h5.create_dataset('chdata',
                                  dtype=arr_dtype,
                                  chunks=True,
                                  shape=(n_chan, d_len))

        # Pack in channel data
        chans[0] = _proc_block(ch_data)
        start_chan = 1
        while True:
            stop_chan = min(len(chan_names), start_chan + load_chans)
            print('load chan', start_chan, 'to', stop_chan)
            ch_data = OE.loadFolderToTransArray(rec_path,
                                                dtype=np.int16,
                                                verbose=False,
                                                start_chan=start_chan,
                                                stop_chan=stop_chan,
                                                ctype='CH',
                                                channels=data_chans,
                                                source=rec_num[0])
            chans[start_chan:stop_chan] = _proc_block(ch_data)
            start_chan += load_chans
            if start_chan >= len(chan_names):
                break

    for arr in ('ADC', 'AUX'):

        n_extra = len(OE.get_filelist(rec_path, ctype=arr, source=rec_num[0]))
        if not n_extra:
            continue
        with h5py.File(hdf5_name, 'r+', libver='latest') as h5:
            chans = h5.create_dataset(arr.lower(),
                                      dtype=arr_dtype,
                                      chunks=True,
                                      shape=(n_extra, d_len))
            start_chan = 0
            while True:
                stop_chan = min(n_extra, start_chan + load_chans)
                ch_data = OE.loadFolderToTransArray(rec_path,
                                                    dtype=np.int16,
                                                    verbose=False,
                                                    source=rec_num[0],
                                                    start_chan=start_chan,
                                                    stop_chan=stop_chan,
                                                    ctype=arr)
                chans[start_chan:stop_chan] = _proc_block(ch_data,
                                                          antialias=False)
                start_chan += load_chans
                if start_chan >= n_extra:
                    break
Exemplo n.º 7
0
    def mirror(self, new_rate_ratio=None, writeable=True, mapped=True, channel_compatible=False, filename='',
               copy='', new_sources=dict(), **map_args):
        # TODO: channel order permutation in mirrored source
        """
        Create an empty ElectrodeDataSource based on the current source, possibly with a new sampling rate and new
        access permissions.

        Parameters
        ----------
        new_rate_ratio: int or None
            Ratio of old to new sample rate for the mirrored array (> 1).
        writeable: bool
            Make any new MappedSource arrays writeable. This implies 1) datatype casting to floats, and 2) there is
            no more units conversion on the primary array.
        mapped: bool
            If False, mirror to a PlainArraySource (in memory). Else mirror into a new MappedSource.
        channel_compatible: bool
            If True, preserve the same number of raw data channels in a MappedSource. Otherwise, reduce the channels
            to just the set of active channels. Currently not supported for non-mapped mirrors.
        filename: str
            Name of the new MappedSource. If empty, use a self-destroying temporary file.
        copy: str
            Code whether to copy any arrays, which is only valid when new_rate_ratio is None or 1. 'aligned' copies
            aligned arrays. 'electrode' copies electrode data: only valid if channel_compatible is False.
            'all' copies all arrays. By default, nothing is copied.
        new_sources: dict
            If mapped=False, then pre-allocated arrays can be provided for each source (i.e. 'data_buffer' and any
            aligned arrays).
        map_args: dict
            Any other MappedSource arguments

        Returns
        -------
        datasource: ElectrodeDataSource subtype

        """

        T = self.shape[1]
        if new_rate_ratio:
            T = calc_new_samples(T, new_rate_ratio)

        # if casting to floating point, check for preferred precision
        fp_precision = load_params().floating_point.lower()
        fp_dtype = 'f' if fp_precision == 'single' else 'd'
        fp_dtype = np.dtype(fp_dtype)

        # unpack copy mode
        copy_electrodes_coded = copy.lower() in ('all', 'electrode')
        copy_aligned_coded = copy.lower() in ('all', 'aligned')
        diff_rate = T != self.shape[1]
        if copy_electrodes_coded:
            if diff_rate or channel_compatible:
                copy_electrodes = False
                print('Not copying electrode channels. Diff rate '
                      '({}) or indirect channel map ({})'.format(diff_rate, channel_compatible))
            else:
                copy_electrodes = True
        else:
            copy_electrodes = False
        if copy_aligned_coded:
            if diff_rate:
                copy_aligned = False
                print('Not copying aligned arrays: different sample rate')
            else:
                copy_aligned = True
        else:
            copy_aligned = False

        if mapped:
            if channel_compatible:
                electrode_channels = self._electrode_channels
                C = self.data_buffer.shape[1] if self._transpose else self.data_buffer.shape[0]
                channel_mask = self.binary_channel_mask
            else:
                C = self.shape[0]
                electrode_channels = None
                channel_mask = None
            if writeable:
                new_dtype = fp_dtype
                reopen_mode = 'r+'
                units_scale = None
            else:
                new_dtype = self.data_buffer.map_dtype
                reopen_mode = 'r'
                units_scale = self.data_buffer.units_scale
            tempfile = not filename
            if tempfile:
                with TempFilePool(mode='ab') as f:
                    # punt on the unlink-on-close issue for now with "delete=False"
                    # f.file.close()
                    filename = f
            # open as string just in case its a TempFilePool
            # TODO: (need to figure out how to make it work seamless as a string)
            with h5py.File(str(filename), 'w', libver='latest') as fw:
                # Create all new datasets as non-transposed
                fw.create_dataset(self._electrode_field, shape=(C, T), dtype=new_dtype, chunks=True)
                if copy_electrodes:
                    for block, sl in self.iter_blocks(return_slice=True, sharedmem=False):
                        fw[self._electrode_field][sl] = block
                for name in self.aligned_arrays:
                    arr = getattr(self, name)
                    if len(arr.shape) > 1:
                        dims = (arr.shape[1], T) if self._transpose else (arr.shape[0], T)
                    # is this correct ???
                    dtype = fp_dtype if writeable else arr.dtype
                    fw.create_dataset(name, shape=dims, dtype=dtype, chunks=True)
                    if copy_aligned:
                        aligned = getattr(self, name)[:]
                        fw[name][:] = aligned.T if self._transpose else aligned
                # set single write, multiple read mode AFTER datasets are created
                # Skip for now -- SWMR issue pending
                # fw.swmr_mode = True
            print(str(filename), 'reopen mode', reopen_mode)
            f_mapped = h5py.File(str(filename), reopen_mode, libver='latest')
            # Skip for now -- SWMR issue pending
            # if writeable:
            #     f_mapped.swmr_mode = True
            if isinstance(filename, TempFilePool):
                filename.register_to_close(f_mapped)
            # If the original buffer's unit scaling is 1.0 then we should keep real slices on
            real_slices = units_scale == 1.0
            return MappedSource.from_hdf_sources(f_mapped, self._electrode_field, units_scale=units_scale,
                                                 real_slices=real_slices, aligned_arrays=self.aligned_arrays,
                                                 transpose=False, electrode_channels=electrode_channels,
                                                 channel_mask=channel_mask, **map_args)
            # return MappedSource(f_mapped, self._electrode_field, electrode_channels=electrode_channels,
            #                     channel_mask=channel_mask, aligned_arrays=self.aligned_arrays,
            #                     transpose=False, **map_args)
        else:
            if channel_compatible:
                print('RAM mirrors are not channel compatible--use mapped=True')
            self._check_slice_size(np.s_[:, :T])
            C = self.shape[0]
            if 'data_buffer' in new_sources:
                new_source = new_sources['data_buffer']
            else:
                new_source = shm.shared_ndarray((C, T), fp_dtype.char)
            if copy_electrodes:
                for block, sl in self.iter_blocks(return_slice=True, sharedmem=False):
                    new_source[sl] = block
            # Kind of tricky with aligned fields -- assume that transpose means the same thing for them?
            # But also un-transpose them on this mirroring step
            aligned_arrays = dict()
            for name in self.aligned_arrays:
                arr = getattr(self, name)
                if len(arr.shape) > 1:
                    dims = (arr.shape[1], T) if self._transpose else (arr.shape[0], T)
                else:
                    dims = (T,)
                if name in new_sources:
                    aligned_arrays[name] = new_sources[name]
                else:
                    aligned_arrays[name] = shm.shared_ndarray(dims, fp_dtype.char)
                if copy_aligned:
                    aligned = getattr(self, name)[:]
                    aligned_arrays[name][:] = aligned.T if self._transpose else aligned
            return PlainArraySource(new_source, **aligned_arrays)