Пример #1
0
def process(function,
            source,
            sink=None,
            axes=None,
            size_max=None,
            size_min=None,
            overlap=None,
            optimization=True,
            optimization_fix='all',
            neighbours=False,
            function_type=None,
            as_memory=False,
            return_result=False,
            return_blocks=False,
            processes=None,
            verbose=False,
            **kwargs):
    """Create blocks and process a function on them in parallel.
  
  Arguments
  ---------
  function : function
    The main data processing script.
  source : str, Source, or list
    The source or list of sources to apply a function to 
  sink : str, Source, list, or None
    The sink or list of sinks to write the result to.
    If None, return single array.
  axes : int, list of ints, or None
    Axes along which to split the source. If None, the 
    splitting is determined automaticlly from the order of the array.
  size_max : int, list of ints or None
    Maximal size of a block along the axes. 
    If None, :const:`default_size_max` is used.
  size_min : int or list of ints
    Minial size of a block along the axes. 
    If None, :const:`default_size_min` is used.
  overlap : int, list of ints or None
    Minimal overlap between blocks along the axes.
    If None, :const:`default_overlap` is used.
  optimization : bool or list of bools
    If True, optimize block sizes to best fit number of processes.
  optimization_fix : 'increase', 'decrease', 'all' or None or list
    Increase, decrease or optimally change the block size when optimization 
    is active.
  neighbours : bool
    If True, also include information about the neighbourhood in the blocks.
  function_type : 'array', 'source', 'block' or None
    The function type passed. If None, 'array' is used.
    
    * 'array'
      Reading and writing the valid slices from the blocks is automatic 
      and the function gets passed numpy arrays.
    * 'source' 
      Reading and writing the valid slices from the blocks is automatic 
      and the function gets passed Source classes as inputs. 
    * 'block' 
      The function is assumed to act on and update blocks itself.
    
  as_memory : bool
    If True, load full blocks into memory before applying the function.
    Can be useful to reduce frequent reading and writing operations of memmaps.
  return_result : bool
    If True, return the results of the proceessing functions.
  return_blocks : bool
    If True, return the block information used to distribute the processing.
  processes : int
    The number of parallel processes, if 'serial', use serial processing.
  verbose : bool
    Print information on sub-stack generation.
      
  Returns
  -------
  sink : str, Source, list or array 
    The results of the processing.
  
  Note
  ----
  This implementation only supports processing into sinks with the same shape as the source.
  """
    #sources and sinks
    if isinstance(source, list):
        sources = source
    else:
        sources = [source]
    sources = [io.as_source(s).as_virtual() for s in sources]

    #if sink is None:
    #  sink = sma.Source(shape=sources[0].shape, dtype=sources[0].dtype, order=sources[0].order);
    if isinstance(sink, list):
        sinks = sink
    elif sink is None:
        sinks = []
    else:
        sinks = [sink]

    sinks = [io.initialize(s, hint=sources[0]) for s in sinks]
    sinks = [io.as_source(s).as_virtual() for s in sinks]

    axes = block_axes(sources[0], axes=axes)

    split = ft.partial(split_into_blocks,
                       processes=processes,
                       axes=axes,
                       size_max=size_max,
                       size_min=size_min,
                       overlap=overlap,
                       optimization=optimization,
                       optimization_fix=optimization_fix,
                       neighbours=neighbours,
                       verbose=False)

    source_blocks = [split(s) for s in sources]
    sink_blocks = [split(s) for s in sinks]
    n_blocks = len(source_blocks[0])

    source_blocks = [[blocks[i] for blocks in source_blocks]
                     for i in range(n_blocks)]
    sink_blocks = [[blocks[i] for blocks in sink_blocks]
                   for i in range(n_blocks)]

    if function_type is None:
        function_type = 'array'
    if function_type == 'block':
        func = ft.partial(process_block_block,
                          function=function,
                          as_memory=as_memory,
                          return_result=return_result,
                          verbose=verbose,
                          **kwargs)
    elif function_type == 'source':
        func = ft.partial(process_block_source,
                          function=function,
                          as_memory=as_memory,
                          as_array=False,
                          verbose=verbose,
                          **kwargs)
    elif function_type == 'array':
        func = ft.partial(process_block_source,
                          function=function,
                          as_memory=as_memory,
                          as_array=True,
                          verbose=verbose,
                          **kwargs)
    else:
        raise ValueError(
            "function type %r not 'array', 'source', 'block' or None!")

    if not isinstance(processes, int) and processes != "serial":
        processes = mp.cpu_count()

    if verbose:
        timer = tmr.Timer()
        print("Processing %d blocks with function %r." %
              (n_blocks, function.__name__))

    if isinstance(processes, int):
        #from bounded_pool_executor import BoundedProcessPoolExecutor
        with cf.ProcessPoolExecutor(max_workers=processes) as executor:
            #with BoundedProcessPoolExecutor(max_workers=processes) as executor:
            futures = [
                executor.submit(func, *args)
                for args in zip(source_blocks, sink_blocks)
            ]
            result = [f.result() for f in futures]
            #executor.map(function, source_blocks, sink_blocks)
    else:
        result = [func(*args) for args in zip(source_blocks, sink_blocks)]
        #analysis:ignore

    if verbose:
        timer.print_elapsed_time("Processed %d blocks with function %r" %
                                 (n_blocks, function.__name__))

    #gc.collect();

    if return_result:
        ret = result
    else:
        ret = sink
    if return_blocks:
        ret = (ret, [source_blocks, sink_blocks])
    return ret
Пример #2
0
def resample_inverse(source, sink = None, 
                     resample_source = None, resample_sink = None,
                     orientation = None, 
                     source_shape = None, source_resolution = None, 
                     sink_shape = None, sink_resolution = None, 
                     axes_order = None, method = 'memmap',
                     interpolation = 'linear', 
                     processes = None, verbose = True, **args):
  """Resample data inversely to :func:`resample` routine.
  
  Arguments
  ---------
  source : str, array
    Source to be inversly resampled (e.g. sink in :func:`resample`).
  sink : str or None
    Sink to write the inversly resampled image to.
  resample_source : str, array or None
    Optional source in :func:`resample`.
  resmaple_sink: str, array or None
    Optional sink used in :func:`resample`.
  orientation : tuple
    Orientation as specified as in :func:`resample`.
  source_shape : tuple or None
    Optional value of source_shape as in :func:`resample`.
  source_resolution : tuple or None
    Optional value of source_resolution as in :func:`resample`.
  sink_resolution : tuple or None
    Optional value of sink_resolution as in :func:`resample`.
  processing_directory : str or None
    Optional directory in which to perform resmapling in parallel.
    If None, a temporary directry will be created.
  axis_order : list of tuples of int or None
    The axes pairs along which to resample the data as in :func:`resample`.
  method : 'shared' or 'memmap'
    Method to handle intermediate resampling results. If 'shared' use shared 
    memory, otherwise use a memory map on disk.
  interpolation : str
    Method to use for interpolating to the resmapled image. 
  processes int or None
    Number of processes to use for parallel resampling.
  verbose : bool
    If True, print progress information.
   
  Returns
  -------
  resampled : array or str
     Data or file name of inversly resampled image.

  Notes
  -----
  * All arguments, except source and sink should be passed as :func:`resample`
    to invert the resmapling.
  """   
  source = io.as_source(source);
  ndim = source.ndim;
  dtype = source.dtype;
  
  #orientation
  orientation = format_orientation(orientation);
  orientation_inverse = inverse_orientation(orientation);
  
  #original source info
  if source_shape is None:
    if source_resolution is None and resample_source is None:
      raise ValueError('Either source_shape, source_resolution or resample_source must to be given!')
    if resample_source is not None:
      source_shape = io.shape(resample_source);
  
  #original sink info
  if sink_shape is None and sink_resolution is None: 
    if resample_sink is None:
      sink_shape = io.shape(source);
    else:
      sink_shape = io.shape(resample_sink);
  
  source_shape, sink_shape, source_resolution, sink_resolution = \
      resample_shape(source_shape=source_shape, sink_shape=sink_shape, 
                     source_resolution=source_resolution, sink_resolution=sink_resolution, 
                     orientation=orientation);
  
  sink_shape_in_source_orientation = orient_shape(sink_shape, orientation, inverse=True);
  
  axes_order, shape_order = _axes_order(axes_order, source, source_shape, sink_shape_in_source_orientation);
 
  interpolation = _interpolation_to_cv2(interpolation);                                   

  if processes is None or not processes == 'serial':
      processes = io.mp.cpu_count();
  
  #reversed orientation
  if not orientation is None:
    #reverse axes
    slicing = [slice(None)] * ndim;
    reslice = False;
    for d,o in enumerate(orientation):
      if o < 0:
        slicing[d] = slice(None, None, -1);
        reslice = True;
    if reslice:
      source = source[slicing];   
    
    #re-orient
    per = orientation_to_permuation(orientation_inverse);
    source = io.read(source);
    source = source.transpose(per);
    source = io.sma.as_shared(source);
 
  #reverse resampling steps
  axes_order = axes_order[::-1];
  
  shape_order = shape_order[:-1];
  shape_order = shape_order[::-1];
  shape_order = shape_order + [source_shape]
  #print(axes_order, shape_order)
  
  #reverse resampling
  n_steps = len(axes_order);
  last_source = source;
  delete_files = [];
  #print(last_source)
  for step, axes, shape in zip(range(n_steps), axes_order, shape_order):
    if step == n_steps-1:
      resampled = io.initialize(source=sink, shape=shape, dtype=dtype, memory='shared', as_source=True); 
    else:
      if method == 'shared':
        resampled = io.sma.create(shape, dtype=dtype, order='C', as_source=True);
      else:
        location = tempfile.mktemp() + '.npy';
        resampled = io.mmp.create(location, shape=shape, dtype=dtype, order='C', as_source=True);
        delete_files.append(location);

    #indices for non-resampled axes
    indices = tuple([range(s) for d,s in enumerate(shape) if d not in axes]);
    indices = [i for i in itertools.product(*indices)];
    n_indices = len(indices);
    
    #resample step
    last_source_virtual = last_source.as_virtual();
    resampled_virtual = resampled.as_virtual();
    _resample = ft.partial(_resample_2d, source=last_source_virtual, sink=resampled_virtual, axes=axes, shape=shape, 
                                         interpolation=interpolation, n_indices=n_indices, verbose=verbose)                       
    
    if processes == 'serial': 
      for index in indices:
        _resample(index=index);
    else:
      with concurrent.futures.ProcessPoolExecutor(processes) as executor:
        executor.map(_resample, indices);
        
    last_source = resampled;
  
  for f in delete_files:
      io.delete_file(f);  
  
  sink = resampled.as_real();
      
  return sink;
Пример #3
0
def smooth_by_configuration(source, sink = None, iterations = 1, 
                            processing_parameter = None,
                            processes = None, verbose = False):
  """Smooth a binary source using the local configuration around each pixel.
  
  Arguments
  ---------
  source : array or Source
    The binary source to smooth.
  sink : array, Source or None
    The sink to write result of smoothing. If None, return array.
  iterations : int
    Number of smoothing iterations.
  processing_parameter : None or dict
    The parameter passed to 
    :func:`ClearMap.ParallelProcessing.BlockProcessing.process`.
  processes : int or None
    number of processes to use.
  verbose : bool
    If True, print progress information.
    
  Returns
  -------
  smoothed : array or Source
    Thre smoothed binary array.

  Note
  ----
  The algorithm is based on a topological smoothing operation defined by adding
  or removing forground pixels based on the local topology of the binary array.
  """
  if verbose:
    print('Binary smoothing: initialized!');
    timer = tmr.Timer();
  
  #smoothing function
  smooth = functools.partial(smooth_by_configuration_block, iterations=iterations, verbose=False);
  smooth.__name__ = 'smooth_by_configuration'
  
  #initialize sources and sinks
  source = io.as_source(source);
  sink   = io.initialize(sink, shape=source.shape, dtype=bool, order=source.order); 
  
  #block processing parameter
  block_processing_parameter = dict(axes = bp.block_axes(source), 
                                    as_memory=True, 
                                    overlap=None, 
                                    function_type='source',
                                    processes=processes, 
                                    verbose=verbose);
  if processing_parameter is not None:
    block_processing_parameter.update(processing_parameter);
  if not 'overlap' in block_processing_parameter or block_processing_parameter['overlap'] is None:
    block_processing_parameter['overlap'] = 2 + 2 * iterations;
  if not 'size_min' in block_processing_parameter or block_processing_parameter['size_min'] is None:
    block_processing_parameter['size_min'] = 2 + 2 * iterations + 1;
  if not 'axes' in block_processing_parameter or block_processing_parameter['axes'] is None:
    block_processing_parameter['axes'] = bp.block_axes(source);
  #print(block_processing_parameter)
  
  #block process
  bp.process(smooth, source, sink, **block_processing_parameter);
  
  if verbose:
    timer.print_elapsed_time('Binary smoothing: done');
  
  return sink;
Пример #4
0
def resample(source, sink = None, orientation = None, 
             sink_shape = None, source_resolution = None, sink_resolution = None, 
             interpolation = 'linear', axes_order = None, method = 'shared',
             processes = None, verbose = True):
  """Resample data of source in new shape/resolution and orientation.
  
  Arguments
  ---------
  source : str or array
    The source to be resampled.
  sink : str or None
    The sink for the resampled image.
  orientation : tuple or None:
    The orientation specified by permuation and change in sign of (1,2,3).
  sink_shape : tuple or None
    The target shape of the resampled sink.
  source_resolution : tuple or None
    The resolution of the source (in length per pixel).
  sink_resolution : tuple or None
    The resolution of the resampled source (in length per pixel).
  interpolation : str 
    The method to use for interpolating to the resmapled array.
  axis_order : str, list of tuples of int or None
    The axes pairs along which to resample the data at each step.
    If None, this is detertmined automatically. For a FileList source, 
    setting the first tuple should point to axis not indicating files.
    If 'size' the axis order is determined automatically to maximally reduce 
    the size of the array in each resmapling step.
    If 'order' the axis order is chosed automatically to optimize io speed.
  method : 'shared' or 'memmap'
    Method to handle intermediate resampling results. If 'shared' use shared 
    memory, otherwise use a memory map on disk.
  processes : int, None or 'serial'
    Number of processes to use for parallel resampling, if None use maximal 
    processes avaialable, if 'serial' process in serial.
  verbose : bool
    If True, display progress information.
  
  Returns
  -------
  sink : array or str
    The data or filename of resampled sink.

  Notes
  -----
  * Resolutions are assumed to be given for the axes of the intrinsic 
    orientation of the data and reference (as when viewed by ImageJ).
  * Orientation: permuation of 1,2,3 with potential sign, indicating which 
    axes map onto the reference axes, a negative sign indicates reversal 
    of that particular axes.
  * Only a minimal set of information to determine the resampling parameter 
    has to be given, e.g. source_shape and sink_shape.
  * The resampling is done by iterating two dimensional resampling steps.
  """
  #TODO: write full nd resampling routine extending cv2 lib.
  if verbose:
    timer = tmr.Timer();
  
  source = io.as_source(source);
  source_shape = source.shape;
  ndim = len(source_shape);
  dtype = source.dtype;
  order = source.order;
  
  orientation = format_orientation(orientation);
  
  source_shape, sink_shape, source_resolution, sink_resolution = \
     resample_shape(source_shape=source_shape, sink_shape=sink_shape, 
                    source_resolution=source_resolution, sink_resolution=sink_resolution, 
                    orientation=orientation);
  
  sink_shape_in_source_orientation = orient_shape(sink_shape, orientation, inverse=True);
                                   
  interpolation = _interpolation_to_cv2(interpolation);                                   

  if not isinstance(processes, int) and processes != 'serial':
    processes = io.mp.cpu_count();
  
  #detemine order of resampling
  axes_order, shape_order = _axes_order(axes_order, source, sink_shape_in_source_orientation, order=order);
  #print(axes_order, shape_order) 
  
  if len(axes_order) == 0:
    if verbose:
      print('resampling: no resampling necessary, source has same size as sink!');
    if sink != source:
      return io.write(sink, source);
    else:
      return source;
  
  #resample
  n_steps = len(axes_order);
  last_source = source;
  delete_files = [];
  for step, axes, shape in zip(range(n_steps), axes_order, shape_order):
    if step == n_steps-1 and orientation is None:
      resampled = io.initialize(source=sink, shape=sink_shape, dtype=dtype, as_source=True); 
    else:
      if method == 'shared':
        resampled = io.sma.create(shape, dtype=dtype, order=order, as_source=True);
      else:
        location = tempfile.mktemp() + '.npy';
        resampled = io.mmp.create(location, shape=shape, dtype=dtype, order=order, as_source=True);
        delete_files.append(location);
    #print(resampled)

    #indices for non-resampled axes
    indices = tuple([range(s) for d,s in enumerate(shape) if d not in axes]);
    indices = [i for i in itertools.product(*indices)];
    n_indices = len(indices);
    
    #resample step
    last_source_virtual = last_source.as_virtual();
    resampled_virtual = resampled.as_virtual();
    _resample = ft.partial(_resample_2d, source=last_source_virtual, sink=resampled_virtual, axes=axes, shape=shape, 
                                         interpolation=interpolation, n_indices=n_indices, verbose=verbose)                       
    
    if processes == 'serial': 
      for index in indices:
        _resample(index=index);
    else:
      #print(processes);
      with concurrent.futures.ProcessPoolExecutor(processes) as executor:
        executor.map(_resample, indices);
        
    last_source = resampled;
  
  #fix orientation
  if not orientation is None:
    #permute
    per = orientation_to_permuation(orientation);
    resampled = resampled.transpose(per);

    #reverse axes
    reslice = False;
    slicing = [slice(None)] * ndim;
    for d,o in enumerate(orientation):
      if o < 0:
        slicing[d] = slice(None, None, -1);
        reslice = True;
    if reslice:
      resampled = resampled[slicing];
      
    if verbose:
      print("resample: re-oriented shape %r!" % (resampled.shape,))
  
    sink = io.write(sink, resampled);
  else: 
    sink = resampled;
  
  for f in delete_files:
      io.delete_file(f);
  
  if verbose:
    timer.print_elapsed_time('Resampling')
    
  return sink;
Пример #5
0
def initialize_sink(sink=None,
                    shape=None,
                    dtype=None,
                    order=None,
                    memory=None,
                    location=None,
                    mode=None,
                    source=None,
                    return_buffer=True,
                    as_1d=False,
                    return_shape=False,
                    return_strides=False):
    """Initialze or create a sink.
  
  Arguments
  ---------
  sink : sink specification
    The source to initialize.
  shape : tuple of int
    Optional shape of the sink. If None, inferred from the source.
  dtype : dtype
    Optional dtype of the sink. If None, inferred from the source.
  order : 'C', 'F' or None
    Optonal order of the sink. If None, inferred from the source.
  memory : 'shared' or None
    If 'shared' create a shared memory sink.
  location : str
    Optional location specification of the sink.
  source : Source or None
    Optional source to infer sink specifictions from.
  return_buffer : bool
    If True, return alos a buffer compatible with cython memory views. 
  return_shape : bool
    If True, also return shape of the sink.
  return_strides : bool
    If True, also return the element strides of the sink.
  
  Returns
  -------
  sink : Source
    The intialized sink.
  buffer : array
    Buffer of the sink.
  shape : tuple of int
    Shape of the source.
  strides : tuple of int
    Element strides of the source. 
  """

    sink = io.initialize(sink,
                         shape=shape,
                         dtype=dtype,
                         order=order,
                         memory=memory,
                         location=location,
                         mode=mode,
                         like=source,
                         as_source=True)

    if return_buffer:
        buffer = sink.as_buffer()

        if buffer.dtype == bool:
            buffer = sink.view('uint8')

        if as_1d:
            buffer = buffer.reshape(-1, order='A')

    result = (sink, )
    if return_buffer:
        result += (buffer, )
    if return_shape:
        result += (np.array(sink.shape, dtype=int), )
    if return_strides:
        result += (np.array(sink.element_strides, dtype=int), )

    if len(result) == 1:
        return result[0]
    else:
        return result