Пример #1
0
def subsample_volume(input_name, factor=2, output_name=None):
    '''Subsample volume'''
    # Read input
    input_h5 = h5py.File(input_name, 'r')
    input_volume = input_h5['timeseries_volume']
    vol_shape = input_h5['vol_shape'][...]
    # Setup output
    if output_name is None:
        output_name = '%s_subsampled_%dx.h5' % (os.path.splitext(input_name)[0], factor)
    if os.path.exists(output_name):
        return output_name
    output_h5 = h5py.File(output_name, 'w')
    output_volume = None
    for t in xrange(input_volume.shape[0]):
        x, new_vol_shape = volume.subsample_volume(input_volume[t,:], vol_shape, factor)
        if output_volume is None:
            data_shape = (input_volume.shape[0], x.shape[0])
            output_volume = output_h5.create_dataset('timeseries_volume', data_shape, 'f')
        output_volume[t, :] = x
        simple_progress_bar(t, input_volume.shape[0])
    print('Saving volume to', output_h5)
    output_h5.create_dataset('vol_shape', data=new_vol_shape[...])
    output_h5.flush()
    output_h5.close()
    return output_name
Пример #2
0
def matlabv7_to_h5(input_filename, data_name, output_filename=None):
    if output_filename is None:
        output_filename = os.path.splitext(input_filename)[0] + '.h5'
    f = h5py.File(input_filename,'r')
    matdata = f[data_name]
    fout = h5py.File(output_filename, 'w')
    img_shape = matdata.shape[1:][::-1]
    vol_shape = np.array((1,) + img_shape)
    data_shape = (matdata.shape[0], np.prod(vol_shape))
    fout.create_dataset('vol_shape', data=vol_shape)
    output_volume = fout.create_dataset('timeseries_volume', data_shape, 'f')
    for t in xrange(matdata.shape[0]):
        simple_progress_bar(t, matdata.shape[0])
        output_volume[t] =  np.reshape(np.reshape(matdata[t], vol_shape[::-1]), np.prod(vol_shape), order='f')
    fout.close()
Пример #3
0
def chunked_map_ooc(datareader,
                    fn,
                    targs=None,
                    chunk_args=None,
                    chunks=20,
                    output_filename=None,
                    n_jobs=-1,
                    aout=True,
                    preproc=None,
                    vol_shape_arg=True,
                    overwite=True,
                    transpose=False,
                    subsamp_factor=1,
                    **kwargs):
    '''Apply a function to chunks of the vectorized datset.

    :param data: dataset
    :param fn: Function to apply to chunks of dataset.
    :param targs: Argument for each frame of dataset
    :param chunk_args: Argument for each temporal chunk
    :param chunks: Number of chunks to split data into
    :param n_jobs: Number of jobs to use
    :param aout: Array output. Reduces the output of `fn` to an array 
                 either in memory or to h5 file output_filename if not None.
                 If aout is True, then we return a DataReader, otherwise
                 we return a list containing the function outputs.
    :param output_filename: h5 file to write output if aout=True
    :param **kwargs:  Additional kwargs to be passed to `fn`
    '''

    # Using numpy within multiprocessing segfaults on Mac.
    # See: http://mail.python.org/pipermail/python-ideas/2012-November/017932.html
    if sys.platform == 'darwin':
        n_jobs = 1
    from io import FileDataReader, MemoryDataReader
    out = None
    ntimesteps = datareader.ntimesteps
    nvoxels = datareader.nvoxels
    ntimesteps_out = int(
        np.floor(datareader.ntimesteps / float(subsamp_factor)))
    vol_shape = datareader.vol_shape
    if transpose:
        input_tchunks = np.array_split(np.arange(nvoxels), chunks)
    else:
        input_tchunks = np.array_split(np.arange(ntimesteps), chunks)

    args = []
    carg = []
    tidx = 0
    for t, tchunk in enumerate(input_tchunks):
        simple_progress_bar(t, len(input_tchunks))
        if tchunk.size:
            # Select appropriate parameters
            if targs is not None:
                args = targs[tchunk]
            elif chunk_args is not None:
                args = [chunk_args[t]] * len(tchunk)
            else:
                args = None
            # Read chunk of data
            if transpose:
                data = datareader.read(None, tchunk)
            else:
                data = datareader.read(tchunk, None)
            if preproc:
                if vol_shape_arg:
                    data, vol_shape = preproc(data, datareader.vol_shape)
                else:
                    data = preproc(data).astype(data.dtype)
            # Compute fnc in parallel over chunks of data
            from joblib import Parallel, delayed
            #TODO(ben): clean up/simplify
            if fn is not None:
                if vol_shape_arg:
                    if args is not None:
                        tout = Parallel(n_jobs=n_jobs)(
                            delayed(fn)(datum, vol_shape, arg, **kwargs)
                            for datum, arg in zip(data, args))
                    else:
                        tout = Parallel(n_jobs=n_jobs)(
                            delayed(fn)(datum, vol_shape, **kwargs)
                            for datum in data)
                else:
                    if args is not None:
                        tout = Parallel(n_jobs=n_jobs)(
                            delayed(fn)(datum, arg, **kwargs)
                            for datum, arg in zip(data, args))
                    else:
                        tout = Parallel(n_jobs=n_jobs)(
                            delayed(fn)(datum, **kwargs) for datum in data)
            else:
                tout = [data]
            if aout:
                # Compress output into array
                tout = np.vstack(tout)
                if out is None:
                    if transpose:
                        shape = (tout.shape[0], nvoxels)
                    else:
                        shape = (ntimesteps_out, ) + tout.shape[1:]
                    if output_filename is None:
                        out = np.zeros(shape, dtype=tout.dtype)
                    else:
                        import h5py
                        f = h5py.File(output_filename, 'w')
                        f.create_dataset('vol_shape', data=vol_shape)
                        out = f.create_dataset('timeseries_volume',
                                               shape,
                                               dtype=tout.dtype)
                tchunk_out = np.arange(tidx, tidx + tout.shape[0])
                if transpose:
                    out[:, tchunk_out] = tout
                else:
                    out[tchunk_out, :] = tout
                tidx += tout.shape[0]
            else:
                # Keep output in a big list
                if out is None:
                    out = []
                out.extend(tout)
        simple_progress_bar(t, chunks)
    if aout:
        if output_filename is not None:
            f.close()
            outreader = FileDataReader(output_filename)
        else:
            outreader = MemoryDataReader(out, vol_shape)
        return outreader
    else:
        return out
Пример #4
0
def load_data(input_filename,
              time_window=None,
              crops=None,
              big_data=False,
              subsample_time=1,
              output_filename=None,
              chunks=100,
              min_sub=False,
              max_proj=False,
              smooth=False):
    '''Load dataset into DataReader object. Can be read into memory using .read()

    :param input_filename: Input h5 filename
    :param time_window: tuple of [start_time, end_time]
    :param subsample_time: Subsampling factor in time. Note this does not do smoothing
                           before subsampling.
    :param crops: crop volumes according to list of 3 tuples:
                    [(xmin, xmax), (ymin, ymax), (zmin, zmax)]
    :param big_data: Write output to h5 file if cropped, and return FileDataReader
    :param output_filename: Output file to write if using big_data (optional)
    :param chunks: Read dataset in this many different chunks.
    :param min_sub: Min subtract dataset
    :param smooth: Whether to average frames before subsampling.
    '''
    if big_data and output_filename is None:
        output_filename = os.path.splitext(input_filename)[0] + '_cropped.h5'
    input_data = FileDataReader(input_filename)
    vol_shape = input_data.vol_shape
    time_window = time_window if time_window is not None else [
        0, input_data.ntimesteps
    ]
    time_window = np.array(time_window)
    if time_window[1] == -1:
        time_window[1] = input_data.ntimesteps
    time_list = np.arange(time_window[0], time_window[1], subsample_time)
    #XXX: ben throwing away data!
    ntimesteps = len(time_list)
    do_crop = crops is not None and np.any(crops > 0)
    do_time = ntimesteps != input_data.ntimesteps
    if min_sub:
        if 'vol_min' not in input_data.file:
            raise ValueError('Run update_timeseries_stats on %s' %
                             input_filename)
        vol_min = input_data.file['vol_min'][...]
    if not (do_crop or do_time or min_sub or max_proj):
        if big_data:
            return input_data
        else:
            return MemoryDataReader(input_data.read(), input_data.vol_shape)
    if do_crop:
        idx, new_vol_shape = grid_to_idx(vol_shape, crops[0], crops[1],
                                         crops[2])
    else:
        nocrops = True
        idx = slice(None)
        new_vol_shape = vol_shape
    if max_proj:
        new_vol_shape = np.array(new_vol_shape)
        new_vol_shape_full = new_vol_shape.copy()
        new_vol_shape[0] = 1

    nvoxels = np.prod(new_vol_shape)
    # Allocate memory or file
    if big_data:
        print('Writing output to ', output_filename)
        output_file = h5py.File(output_filename, 'w')
        output_file.create_dataset('vol_shape', data=new_vol_shape)
        data = output_file.create_dataset('timeseries_volume',
                                          (ntimesteps, nvoxels),
                                          dtype=input_data.data.dtype)
    else:
        data = np.zeros((ntimesteps, nvoxels), dtype=input_data.data.dtype)
    # Read dataset into memory or file
    #output_tchunks = np.array_split(np.arange(ntimesteps), chunks)

    if subsample_time != 1 and smooth:
        ntimesteps_last = (time_window[1] - time_window[0]) % (subsample_time *
                                                               chunks)
        tlist_full = np.arange(time_window[0], time_window[1])
        input_tchunks = np.array_split(
            tlist_full[:len(tlist_full) - ntimesteps_last], chunks)
        if ntimesteps_last > 0:
            input_tchunks.append(tlist_full[-ntimesteps_last:])
            chunks += 1
    else:
        input_tchunks = np.array_split(time_list, chunks)
    tidx = 0
    sigma = subsample_time / 4.0
    for i, input_tchunk in enumerate(input_tchunks):
        simple_progress_bar(i, chunks)
        if input_tchunk.size:  # array_split can have empty arrays
            # h5 only supports one indexing vector
            dchunk = input_data.read(input_tchunk, slice(None))
            if dchunk.ndim == 1:
                dchunk = dchunk.reshape(1, -1)
            dchunk = dchunk[:, idx]
            if subsample_time != 1 and smooth:
                dchunk = gaussian_filter1d(dchunk, sigma=sigma,
                                           axis=0)[::subsample_time]
            #if preproc is not None:
            #    dchunk = preproc(dchunk)
            if min_sub:
                dchunk -= vol_min[idx]
            output_tchunk = np.arange(tidx, tidx + dchunk.shape[0])
            tidx += dchunk.shape[0]
            if max_proj:
                for t in xrange(dchunk.shape[0]):
                    data[output_tchunk[t]] = volume_to_vector(
                        vector_to_volume(
                            dchunk[t], new_vol_shape_full).max(2)[:, :,
                                                                  np.newaxis])
            else:
                data[output_tchunk, :] = dchunk
    # Return data as DataReader objects
    if big_data:
        output_file.close()
        out_data = FileDataReader(output_filename)
    else:
        out_data = MemoryDataReader(data, new_vol_shape)
    return out_data