Пример #1
0
def main(argv):
    del argv  # Unused.
    tzero = time()
    path, dataset = FLAGS.input_volume.split(':')
    if _rank == 0:
        logging.info('Read hdf5 file {}'.format(path))
    with h5py.File(path, 'r') as f:
        segmentation = f[dataset]
        if _rank == 0:
            logging.info('Done reading.')
        bboxes = []
        for name, v in segmentation.attrs.items():
            if name.startswith('bounding_boxes'):
                for bbox in v:
                    bboxes.append(bounding_box.BoundingBox(bbox[0], bbox[1]))

        if not bboxes:
            bboxes.append(
                bounding_box.BoundingBox(start=(0, 0, 0),
                                         size=segmentation.shape[::-1]))

        shape = segmentation.shape
        lom_radius = [int(x) for x in FLAGS.lom_radius]
        if _rank == 0:
            logging.info('Compute partitions')
            logging.info('Segmantion shape: {}'.format(shape))
            logging.info('Bounding boxes: {}'.format(bboxes))
        corner, partitions = compute_partitions(
            segmentation[...], [float(x) for x in FLAGS.thresholds],
            lom_radius, FLAGS.id_whitelist, FLAGS.exclusion_regions,
            FLAGS.mask_configs, FLAGS.min_size)

    bboxes = adjust_bboxes(bboxes, np.array(lom_radius))

    path, dataset = FLAGS.output_volume.split(':')
    if _rank == 0:
        logging.info('Partition shape : {}'.format(partitions.shape))
        logging.info('Bounding boxes  : {}'.format(bboxes))
        logging.info('Corner          : {}'.format(corner))
        logging.info('Creating hdf5 file for the partitions...')
        with h5py.File(path, 'w') as f:
            ds = f.create_dataset(dataset,
                                  shape=shape,
                                  dtype=np.uint8,
                                  fillvalue=255,
                                  chunks=True,
                                  compression='gzip')
            s = partitions.shape
            ds[corner[2]:corner[2] + s[0], corner[1]:corner[1] + s[1],
               corner[0]:corner[0] + s[2]] = partitions
            ds.attrs['bounding_boxes'] = [(b.start, b.size) for b in bboxes]
            ds.attrs['partition_counts'] = np.array(
                np.unique(partitions, return_counts=True))
        logging.info('Finished in {} seconds.'.format(time() - tzero))
    return 0
Пример #2
0
def h5_sequential_chunk_writer(prediction_generator,
                               output_volumes,
                               output_shapes,
                               num_classes,
                               chunk_shape=(32, 64, 64),
                               overlap=(0, 0, 0),
                               min_logit=-0.5,
                               mpi=False):
    '''Sequentially write chunks(with overlap) from volumes'''
    chunk_shape = np.array(chunk_shape)
    chunk_offset = chunk_shape // 2
    overlap = np.array(overlap)
    step_shape = chunk_shape - overlap
    output_volume_map = {}

    for vol in output_volumes.split(','):
        volname, path, dataset = vol.split(':')
        if not mpi:
            f = h5py.File(path, 'w')
        else:
            f = h5py.File(path, 'w', driver='mpio', comm=MPI.COMM_WORLD)

        output_shape = output_shapes[volname]
        output_volume_map[volname] = f.create_dataset(name=dataset,
                                                      shape=output_shape,
                                                      dtype='float32')
        logits_ds = f.create_dataset(name='logits',
                                     shape=list(output_shape) + [num_classes],
                                     fillvalue=min_logit,
                                     dtype='float32')
        max_bbox = bounding_box.BoundingBox(start=(0, 0, 0),
                                            size=output_shapes[volname])
        for p in prediction_generator:
            center, logits, pred = p['center'], p['logits'], p[
                'class_prediction']
            pad_w_start = center - chunk_shape // 2 + overlap // 2
            pad_w_end = center + (chunk_shape + 1) // 2 - overlap // 2
            coord_offset = overlap // 2
            w_start = pad_w_start - coord_offset
            w_end = pad_w_end - coord_offset

            write_bbox = bounding_box.BoundingBox(start=w_start, end=w_end)

            write_bbox = bounding_box.intersection(write_bbox, max_bbox)
            read_bbox = bounding_box.BoundingBox(start=coord_offset,
                                                 size=write_bbox.size)

            write_slices = write_bbox.to_slice()
            read_slices = read_bbox.to_slice()
            write_slices = tuple([write_slices[i] for i in [2, 1, 0]])
            read_slices = tuple([read_slices[i] for i in [2, 1, 0]])
            output_volume_map[volname][write_slices] = pred[read_slices]
            logits_ds[write_slices] = logits[read_slices]
        f.close()
Пример #3
0
def main(argv):
    del argv  # Unused.
    path, dataset = FLAGS.input_volume.split(':')

    logging.info(' dataset name: {}'.format(dataset))

    with h5py.File(path) as f:
        segmentation = f[dataset]
        bboxes = []
        for name, v in segmentation.attrs.items():
            if name.startswith('bounding_boxes'):
                for bbox in v:
                    bboxes.append(bounding_box.BoundingBox(bbox[0], bbox[1]))

        if not bboxes:
            bboxes.append(
                bounding_box.BoundingBox(start=(0, 0, 0),
                                         size=segmentation.shape[::-1]))

        shape = segmentation.shape
        lom_radius = [int(x) for x in FLAGS.lom_radius]

        logging.info('bounding box: {}, shape: {}, type: {}'.format(
            bboxes, shape, segmentation.dtype))
        logging.info('minv: {}, maxv:{}'.format(np.min(segmentation),
                                                np.max(segmentation)))

        corner, partitions = compute_partitions(
            segmentation[...], [float(x) for x in FLAGS.thresholds],
            lom_radius, FLAGS.id_whitelist, FLAGS.exclusion_regions,
            FLAGS.mask_configs, FLAGS.min_size)

    bboxes = adjust_bboxes(bboxes, np.array(lom_radius))

    logging.info(' partition shape: {}, bboxes: {}'.format(
        partitions.shape, bboxes))

    path, dataset = FLAGS.output_volume.split(':')
    with h5py.File(path, 'w') as f:
        ds = f.create_dataset(dataset,
                              shape=shape,
                              dtype=np.uint8,
                              fillvalue=255,
                              chunks=True,
                              compression='gzip')
        s = partitions.shape
        ds[corner[2]:corner[2] + s[0], corner[1]:corner[1] + s[1],
           corner[0]:corner[0] + s[2]] = partitions
        ds.attrs['bounding_boxes'] = [(b.start, b.size) for b in bboxes]
        ds.attrs['partition_counts'] = np.array(
            np.unique(partitions, return_counts=True))
Пример #4
0
def get_bboxes(union_bbox,
               chunk_size,
               overlap=(0, 0, 0),
               back_shift_small=False,
               backend='cloudvolume'):
    '''Use ffn subbox calculator to generate sequential overlapping bboxes'''
    if isinstance(union_bbox, Bbox):
        ffn_style_bbox = bounding_box.BoundingBox(np.array(union_bbox.minpt),
                                                  np.array(union_bbox.size3()))
    else:
        ffn_style_bbox = union_bbox

    calc = bounding_box.OrderlyOverlappingCalculator(
        outer_box=ffn_style_bbox,
        sub_box_size=chunk_size,
        overlap=overlap,
        include_small_sub_boxes=True,
        back_shift_small_sub_boxes=back_shift_small)
    bbs = list(calc.generate_sub_boxes())
    if backend == 'ffn':
        pass
    elif backend == 'cloudvolume':
        bbs = [Bbox(a=bb.start, b=bb.start + bb.size) for bb in bbs]
    else:
        raise ValueError('Use either ffn or cloudvolume')
    return bbs
Пример #5
0
def h5_sequential_chunk_writer_v2(prediction_generator,
                                  output_volumes,
                                  output_shapes,
                                  num_classes,
                                  chunk_shape=(32, 64, 64),
                                  label_shape=(32, 64, 64),
                                  overlap=(0, 0, 0),
                                  sub_bbox=None,
                                  axes='zyx',
                                  mpi=False):
    '''Sequentially write chunks(with overlap) from volumes'''
    chunk_shape = np.array(chunk_shape)
    label_shape = np.array(label_shape)
    overlap = np.array(overlap)
    output_volume_map = {}

    for vol in output_volumes.split(','):
        volname, path, dataset = vol.split(':')
        if not mpi:
            f = h5py.File(path, 'w')
        else:
            f = h5py.File(path, 'w', driver='mpio', comm=MPI.COMM_WORLD)

        output_shape = output_shapes[volname]
        logging.warn('output_shape %s', output_shape)
        logits_ds = f.create_dataset(name='logits',
                                     shape=list(output_shape) + [num_classes],
                                     dtype='float32')
        class_prediction_ds = f.create_dataset(name='class_prediction',
                                               shape=output_shape,
                                               dtype='float32')
        max_bbox = bounding_box.BoundingBox(start=(0, 0, 0),
                                            size=output_shapes[volname][::-1])
        logging.warn('bbox %s', max_bbox)
        for p in prediction_generator:
            center, logits, class_prediction = p['center'][0], p['logits'], p[
                'class_prediction']

            # deal with initial boarders
            if (center - label_shape // 2 == 0).any():
                r_start = np.array([0, 0, 0])
                w_start = center - label_shape // 2
                r_size = label_shape
                w_size = label_shape
            else:
                r_start = overlap // 2
                w_start = center - label_shape // 2 + overlap // 2
                r_size = label_shape - overlap // 2
                w_size = label_shape - overlap // 2

            r_slc = np.s_[r_start[2]:r_start[2] + r_size[2],
                          r_start[1]:r_start[1] + r_size[1],
                          r_start[0]:r_start[0] + r_size[0], ]
            w_slc = np.s_[w_start[2]:w_start[2] + w_size[2],
                          w_start[1]:w_start[1] + w_size[1],
                          w_start[0]:w_start[0] + w_size[0], ]
            logits_ds[w_slc] = logits[r_slc]
            class_prediction_ds[w_slc] = class_prediction[r_slc]

        f.close()
Пример #6
0
def get_num_of_bbox(input_offset, input_size, chunk_shape, overlap):
    union_bbox = bounding_box.BoundingBox(start=input_offset, size=input_size)
    sub_bboxes = get_bboxes(union_bbox,
                            chunk_size=chunk_shape,
                            overlap=overlap,
                            back_shift_small=True,
                            backend='ffn')
    return len(sub_bboxes)
Пример #7
0
def _load_from_numpylike_v2(coord,
                            volume,
                            start_offset,
                            chunk_shape,
                            axes='zyx'):
    starts = np.array(coord) - start_offset
    slc = bounding_box.BoundingBox(start=starts, size=chunk_shape).to_slice()
    data = volume[slc[2], slc[1], slc[0], :]
    return data
Пример #8
0
def infer_bbox_from_empty_directory(input_dir):
    cpoints = glob.glob(join(input_dir, '**/seg-*/*/*/'), recursive=True)
    bboxes = []
    for cp in cpoints:
        src_dir = re.search(r'(.*)/.*/.*/', cp).groups()[0]
        res = re.search(r'seg-(\d+)_(\d+)_(\d+)_(\d+)_(\d+)_(\d+)',
                        src_dir).groups()
        ox, oy, oz, sx, sy, sz = [int(i) for i in res]
        bbox = bounding_box.BoundingBox(start=(ox, oy, oz), size=(sx, sy, sz))
        bboxes.append(bbox)
    return bboxes
Пример #9
0
 def _load_from_numpylike(coord):
     starts = np.array(coord[0]) - (chunk_shape - 1) // 2
     slc = bounding_box.BoundingBox(start=starts,
                                    size=chunk_shape).to_slice()
     logging.warning('loading from %s %s, %s, %s', starts, chunk_shape, slc,
                     volume.shape)
     if volume_axes == 'zyx':
         data = volume[slc[0], slc[1], slc[2], :]
     elif volume_axes == 'xyz':
         data = volume[slc[2], slc[1], slc[0], :]
     else:
         raise ValueError('volume_axes mush either be "zyx" or "xyz"')
     return data
Пример #10
0
 def _load_from_numpylike(coord):
     starts = np.array(coord[0]) - chunk_shape // 2
     slc = bounding_box.BoundingBox(start=starts,
                                    size=chunk_shape).to_slice()
     if volume_axes == 'zyx':
         data = volume[slc[0], slc[1], slc[2], :]
     elif volume_axes == 'xyz':
         data = volume[slc[2], slc[1], slc[0], :]
         data = data.transpose([2, 1, 0, 3])
         logging.warning('data shape %s', data.shape)
     else:
         raise ValueError('volume_axes mush either be "zyx" or "xyz"')
     return data
Пример #11
0
def get_chunk_bboxes(union_bbox, chunk_size):
  ffn_style_bbox = bounding_box.BoundingBox(
    np.array(union_bbox.minpt), np.array(union_bbox.size3()))

  calc = bounding_box.OrderlyOverlappingCalculator(
    outer_box=ffn_style_bbox, 
    sub_box_size=chunk_size, 
    overlap=[0,0,0], 
    include_small_sub_boxes=True,
    back_shift_small_sub_boxes=False)
  bbs = [ffn_bb for ffn_bb in calc.generate_sub_boxes()]
  for ffn_bb in bbs:
    logging.warning('sub_bb: %s', ffn_bb)
  return bbs
Пример #12
0
def _load_from_numpylike_with_pad(coord,
                                  volume,
                                  pad_start,
                                  pad_end,
                                  chunk_shape,
                                  sample_start=None,
                                  sample_size=None):
    '''load from numpy like with padding, all in zyx order.'''
    real_volume_shape = volume.shape
    pad_bbox = bounding_box.BoundingBox(start=(0, 0, 0),
                                        size=pad_start + real_volume_shape +
                                        pad_end)
    if sample_start is None and sample_size is None:
        real_bbox = bounding_box.BoundingBox(start=pad_start,
                                             size=real_volume_shape)
    else:
        real_bbox = bounding_box.BoundingBox(start=pad_start + sample_start,
                                             size=sample_size)
    tentative_bbox = bounding_box.BoundingBox(start=coord - chunk_shape // 2,
                                              size=chunk_shape)
    actual_bbox = bounding_box.intersection(tentative_bbox, real_bbox)
    if not actual_bbox:
        return None
    read_bbox = actual_bbox.adjusted_by(start=-pad_start, end=-pad_start)
    write_bbox = bounding_box.BoundingBox(start=actual_bbox.start -
                                          tentative_bbox.start,
                                          size=actual_bbox.size)
    output = np.zeros(chunk_shape, dtype=np.uint8)
    w_slc = write_bbox.to_slice()
    w_slc = tuple([w_slc[i] for i in [2, 1, 0]])
    r_slc = read_bbox.to_slice()
    r_slc = tuple([r_slc[i] for i in [2, 1, 0]])

    output[w_slc] = volume[r_slc]
    output = np.expand_dims(output, 4)
    return output
Пример #13
0
def divide_bounding_box(bbox, subvolume_size, overlap):
    """divide up into valid subvolumes."""
    # deal with parsed bbox missing "end" attr
    start = geom_utils.ToNumpy3Vector(bbox.start)
    size = geom_utils.ToNumpy3Vector(bbox.size)

    bbox = bounding_box.BoundingBox(start, size)

    calc = bounding_box.OrderlyOverlappingCalculator(
        outer_box=bbox,
        sub_box_size=subvolume_size,
        overlap=overlap,
        include_small_sub_boxes=True,
        back_shift_small_sub_boxes=False)

    return [bb for bb in calc.generate_sub_boxes()]
Пример #14
0
def get_chunk_bboxes(union_bbox,
                     chunk_size,
                     overlap,
                     include_small_sub_boxes=True,
                     back_shift_small_sub_boxes=False):
    ffn_style_bbox = bounding_box.BoundingBox(np.array(union_bbox.minpt),
                                              np.array(union_bbox.size2()))

    calc = bounding_box.OrderlyOverlappingCalculator(
        outer_box=ffn_style_bbox,
        sub_box_size=chunk_size,
        overlap=overlap,
        include_small_sub_boxes=include_small_sub_boxes,
        back_shift_small_sub_boxes=back_shift_small_sub_boxes)

    bbs = [ffn_to_cv(ffn_bb) for ffn_bb in calc.generate_sub_boxes()]

    return bbs
Пример #15
0
    def _load_from_numpylike(coord, volname):
        """Load from coord and volname, handling 3d or 4d volumes."""
        volume = volume_map[volname.decode(
            'ascii')]  # fetch volume with shape (X, Y)
        volume = np.expand_dims(volume,
                                axis=-1)  # volume now has shape (X,Y,Z)
        starts = np.array(coord) - start_offset

        # Verify that coordinates are within the size of the data region.
        assert np.all(np.array(coord) + start_offset <= volume.shape), \
            "Coordinate {} with offset {} (= {}) is beyond image dimensions of {}" \
            " for volume {}; ".format(coord, start_offset, np.array(coord) +
                                      start_offset, volume.shape, volname
                                      )
        assert np.all(np.array(coord) - start_offset >= 0), \
            "Coordinate {} with offset {} (= {}) is <= 0 with image dimensions of {} " \
            "for volume {}".format(coord, start_offset, np.array(coord) - start_offset,
                                   volume.shape, volname
                                   )

        slc_zyx = bounding_box.BoundingBox(start=starts, size=shape).to_slice()
        # if volume.ndim == 4:
        #     slc_zyx = np.index_exp[:] + slc_zyx

        data = volume[slc_zyx[::-1]]

        # Verify that data is non-empty and print data warning if it is. Useful for
        # debugging since TensorFlow won't produce an informative error in this case.
        assert (data.ndim and data.size) != 0, "empty data at coord {} in " \
                                               "volume {}".format(coord, volname)

        # If 4d, move channels to back.  Otherwise, just add flat channels dim.
        # if data.ndim == 4:
        #     data = np.rollaxis(data, 0, start=4)
        # else:
        data = np.expand_dims(data, 4)  # shape (X,Y,Z, n_channels)

        # Add flat batch dim and return shape (batch_size, X, Y, Z, n_channels)
        data = np.expand_dims(data, 0).astype(np.float32)
        return data
Пример #16
0
def compute_partitions(seg_array,
                       thresholds,
                       lom_radius,
                       id_whitelist=None,
                       exclusion_regions=None,
                       mask_configs=None,
                       min_size=10000):
  """Computes quantized fractions of active voxels in a local object mask.

  Args:
    thresholds: list of activation voxel fractions to use for partitioning.
    lom_radius: LOM radii as [x, y, z]
    id_whitelist: (optional) whitelist of object IDs for which to compute the
        partition numbers
    exclusion_regions: (optional) list of x, y, z, r tuples specifying regions
        to mark as excluded (with 255). The regions are spherical, with
        (x, y, z) definining the center of the sphere and 'r' specifying its
        radius. All values are in voxels.
    mask_configs: (optional) MaskConfigs proto; any locations where at least
        one voxel of the LOM is masked will be marked as excluded (255).

  Returns:
    tuple of:
      corner of output subvolume as (x, y, z)
      uint8 ndarray of active fraction voxels
  """
  seg_array = segmentation.clear_dust(seg_array, min_size=min_size)
  assert seg_array.ndim == 3

  lom_radius = np.array(lom_radius)
  lom_radius_zyx = lom_radius[::-1]
  lom_diam_zyx = 2 * lom_radius_zyx + 1

  def _sel(i):
    if i == 0:
      return slice(None)
    else:
      return slice(i, -i)

  valid_sel = [_sel(x) for x in lom_radius_zyx]
  output = np.zeros(seg_array[valid_sel].shape, dtype=np.uint8)
  print(output.shape)
  corner = lom_radius

  if exclusion_regions is not None:
    sz, sy, sx = output.shape
    hz, hy, hx = np.mgrid[:sz, :sy, :sx]

    hz += corner[2]
    hy += corner[1]
    hx += corner[0]

    for x, y, z, r in exclusion_regions:
      mask = (hx - x)**2 + (hy - y)**2 + (hz - z)**2 <= r**2
      output[mask] = 255

  labels = set(np.unique(seg_array))
  logging.info('Labels to process: %d', len(labels))

  if id_whitelist is not None:
    labels &= set(id_whitelist)

  mask = load_mask(mask_configs,
                   bounding_box.BoundingBox(
                       start=(0, 0, 0), size=seg_array.shape[::-1]),
                   lom_diam_zyx)
  if mask is not None:
    output[mask] = 255

  fov_volume = np.prod(lom_diam_zyx)
  label_groups = np.array_split(np.asarray(list(labels)), comm.size)
  print(label_groups[rank])
  gather_output = None
  if rank == 0:
    # gather_output = np.empty((comm.size,)+output.shape, dtype=output.dtype)
    gather_output = np.empty(output.shape, dtype=output.dtype)

  for l in label_groups[rank]:
    # Don't create a mask for the background component.
    if l == 0:
      continue

    object_mask = (seg_array == l)

    svt = _summed_volume_table(object_mask)
    active_fraction = _query_summed_volume(svt, lom_diam_zyx) / fov_volume
    assert active_fraction.shape == output.shape

    # Drop context that is only necessary for computing the active fraction
    # (i.e. one LOM radius in every direction).
    object_mask = object_mask[valid_sel]

    # TODO(mjanusz): Use np.digitize here.
    for i, th in enumerate(thresholds):
      output[object_mask & (active_fraction < th) & (output == 0)] = i + 1

    output[object_mask & (active_fraction >= thresholds[-1]) &
           (output == 0)] = len(thresholds) + 1

    logging.info('Done processing %d', l)

  comm.Reduce(output, gather_output, MPI.SUM, root=0)
  # comm.Gather(output, gather_output, root=0)
  if rank == 0:
    # gather_output = np.sum(gather_output, axis=0)
    logging.info('Nonzero values: %d', np.sum(gather_output > 0))
    return corner, gather_output
  else:
    return None, None
Пример #17
0
def predict_input_fn_h5(input_volume, input_offset, input_size, chunk_shape,
                        label_shape, overlap, batch_size, offset, scale,
                        var_threshold):
    """MPI inference of h5.
  
  For incoming h5 volume, break down into sub bboxes, and use subsets according
  to mpi rank
  """
    # volname, path, dataset = input_volume.split(':')
    path, dataset = input_volume.split(':')
    if input_offset is None or input_size is None:
        with h5py.File(path, 'r') as f:
            input_size = f[dataset].shape[::-1]
        input_offset = (0, 0, 0)
    slc = np.s_[input_offset[2]:input_offset[2] + input_size[2],
                input_offset[1]:input_offset[1] + input_size[1],
                input_offset[0]:input_offset[0] + input_size[0], ]
    logging.warn('slc: %s', slc)

    f_in = h5py.File(path, 'r')
    data = f_in[dataset]

    logging.warning('data_shape %s', data.shape)
    # this bbox coord is relative to offset

    if mpi_rank == 0:
        union_bbox = bounding_box.BoundingBox(start=input_offset,
                                              size=input_size)
        sub_bboxes = get_bboxes(union_bbox,
                                chunk_size=chunk_shape,
                                overlap=overlap,
                                back_shift_small=True,
                                backend='ffn')
        ranked_sub_bboxes = np.array_split(sub_bboxes, mpi_size)
    else:
        ranked_sub_bboxes = None

    ranked_sub_bboxes = mpi_comm.scatter(ranked_sub_bboxes, 0)

    logging.warning('num_sub_bbox %d %s', len(ranked_sub_bboxes),
                    ranked_sub_bboxes[0])
    logging.warning('bbox %s %s', ranked_sub_bboxes[0].start,
                    ranked_sub_bboxes[0].end)

    def sub_bbox_iterator():
        for sb in ranked_sub_bboxes:
            yield [(sb.start + sb.end) // 2]

    ds = tf.data.Dataset.from_generator(generator=sub_bbox_iterator,
                                        output_types=(tf.int64),
                                        output_shapes=(tf.TensorShape((1, 3))))
    ds = ds.map(lambda coord: (coord, load_from_h5(coord, data, chunk_shape)),
                num_parallel_calls=tf.data.experimental.AUTOTUNE)
    ds = ds.map(lambda coord, image:
                (coord, preprocess_image(image, offset, scale)),
                num_parallel_calls=tf.data.experimental.AUTOTUNE)
    ds = ds.map(lambda coord, image: {
        'center': coord,
        'image': image
    },
                num_parallel_calls=tf.data.experimental.AUTOTUNE)
    ds = ds.batch(batch_size)
    return ds