Exemplo n.º 1
0
def test_realign_bricks_to_new_grid_WITH_HALO():
    grid = Grid( (10,20), (12,3) )
    bounding_box = np.array([(15,30), (95,290)])
    volume = np.random.randint(0,10, (100,300) )

    original_bricks, _num_bricks = generate_bricks_from_volume_source( bounding_box, grid, partial(extract_subvol, volume), DebugClient() )

    halo = 1
    halo_shape = np.array([1,1])
    new_grid = Grid((20,10), (0,0), halo)
    new_bricks = realign_bricks_to_new_grid(new_grid, original_bricks).compute()

    new_logical_boxes = list(brick.logical_box for brick in new_bricks)

    assert len(new_bricks) == 5 * 26, f"{len(new_bricks)}" # from (0,30) -> (100,290)
    
    for logical_box, brick in zip(new_logical_boxes, new_bricks):
        assert isinstance( brick, Brick ), f"Got {type(brick)}"
        assert (brick.logical_box == logical_box).all()

        # logical_box must be exactly one block
        assert ((brick.logical_box[1] - brick.logical_box[0]) == new_grid.block_shape).all()
        
        # Must be grid-aligned
        assert ((brick.logical_box - new_grid.offset) % new_grid.block_shape == 0).all()
        
        # Should match logical_box+halo, except for edges
        assert (brick.physical_box == box_intersection( brick.logical_box + (-halo_shape, halo_shape), bounding_box )).all()
        
        # Volume shape must match
        assert (brick.volume.shape == brick.physical_box[1] - brick.physical_box[0]).all()
        
        # Volume data must match
        assert (brick.volume == extract_subvol( volume, brick.physical_box )).all()
Exemplo n.º 2
0
def test_split_brick_WITH_HALO():
    halo = 1
    grid = Grid( (10,20), (12,3), halo )
    volume = np.random.randint(0,10, (100,300) )
    
    # Test with the first brick in the grid
    physical_start = np.array(grid.offset)
    logical_start = physical_start // grid.block_shape * grid.block_shape
    logical_stop = logical_start + grid.block_shape
    
    physical_stop = logical_stop+halo # Not always true, but happens to be true in this case.
    
    logical_box = np.array([logical_start, logical_stop])
    physical_box = np.array([physical_start, physical_stop])
    
    assert (logical_box == [(10,0), (20,20)]).all()
    assert (physical_box == [(12,3), (21,21)]).all()

    original_brick = Brick( logical_box, physical_box, extract_subvol(volume, physical_box) )

    # New grid scheme
    new_grid = Grid((2,10), (0,0))
    
    try:
        _fragments = split_brick(new_grid, original_brick)
    except AssertionError:
        pass # Expected failure: Forbidden to split bricks that have a halo
    else:
        assert False, "Did not encounter the expected assertion.  split_brick() should fail for bricks that have a halo."
    def _consolidate_and_pad(self, slab_index, input_wall, scale, output_service, align=True, pad=True):
        """
        Consolidate (align), and pad the given BrickWall

        Note: UNPERSISTS the input data and returns the new, downsampled data.

        Args:
            scale: The pyramid scale of the data.
            
            output_service: The output_service to align to and pad from
            
            align: If False, skip the alignment step.
                  (Only use this if the bricks are already aligned.)
            
            pad: If False, skip the padding step
        
        Returns a pre-executed and persisted BrickWall.
        """
        output_writing_grid = Grid(output_service.preferred_message_shape)

        if not align or output_writing_grid.equivalent_to(input_wall.grid):
            realigned_wall = input_wall
            realigned_wall.persist_and_execute(f"Slab {slab_index}: Scale {scale}: Persisting pre-aligned bricks", logger)
        else:
            # Consolidate bricks to full-size, aligned blocks (shuffles data)
            realigned_wall = input_wall.realign_to_new_grid( output_writing_grid )
            realigned_wall.persist_and_execute(f"Slab {slab_index}: Scale {scale}: Shuffling bricks into alignment", logger)

            # Discard original
            input_wall.unpersist()
        
        if not pad:
            return realigned_wall

        # Pad from previously-existing pyramid data until
        # we have full storage blocks, e.g. (64,64,64),
        # but not necessarily full bricks, e.g. (64,64,6400)
        storage_block_width = output_service.block_width
        output_padding_grid = Grid( (storage_block_width, storage_block_width, storage_block_width), output_writing_grid.offset )
        output_accessor_func = partial(output_service.get_subvolume, scale=scale)
        
        padded_wall = realigned_wall.fill_missing(output_accessor_func, output_padding_grid)
        padded_wall.persist_and_execute(f"Slab {slab_index}: Scale {scale}: Padding", logger)

        # Discard old
        realigned_wall.unpersist()

        return padded_wall
Exemplo n.º 4
0
    def _partition_input(self):
        """
        Map the input segmentation
        volume from DVID into an RDD of (volumePartition, data),
        using the config's bounding-box setting for the full volume region,
        using the input 'message-block-shape' as the partition size.

        Returns: (RDD, bounding_box_zyx, partition_shape_zyx)
            where:
                - RDD is (volumePartition, data)
                - bounding box is tuple (start_zyx, stop_zyx)
                - partition_shape_zyx is a tuple
            
        """
        input_config = self.config_data["input"]
        options = self.config_data["options"]

        # repartition to be z=blksize, y=blksize, x=runlength
        brick_shape_zyx = input_config["message-block-shape"][::-1]
        input_grid = Grid(brick_shape_zyx, (0,0,0))
        
        input_bb_zyx = np.array(input_config["bounding-box"])[:,::-1]

        # Aim for 2 GB RDD partitions
        GB = 2**30
        target_partition_size_voxels = 2 * GB // np.uint64().nbytes

        sparkdvid_input_context = sparkdvid(self.sc, input_config["server"], input_config["uuid"], self)
        bricks = sparkdvid_input_context.parallelize_bounding_box( input_config["segmentation-name"], input_bb_zyx, input_grid, target_partition_size_voxels )
        return bricks, input_bb_zyx, input_grid
Exemplo n.º 5
0
def test_sparse_boxes_WITH_OFFSET():
    block_mask = np.zeros((5, 6, 7), dtype=bool)

    # since mask offset is 20, this spans 3 bricks (physical: 20-70, logical: 0-90)
    block_mask[0, 0, 0:5] = True

    # spans a single brick (physical: 30-60, logical: 30-60)
    block_mask[0, 1, 1:4] = True

    block_mask_resolution = 10

    # MASK STARTS AT OFFSET
    mask_box_start = np.array([0, 10, 20])
    mask_box_stop = mask_box_start + 10 * np.array(block_mask.shape)

    block_mask_box = (mask_box_start, mask_box_stop)
    brick_grid = Grid((10, 10, 30), (0, 0, 0))

    sparse_block_mask = SparseBlockMask(block_mask, block_mask_box,
                                        block_mask_resolution)
    logical_boxes = sparse_block_mask.sparse_boxes(brick_grid,
                                                   return_logical_boxes=True)

    assert (logical_boxes == [[[0, 10, 0], [10, 20, 30]],
                              [[0, 10, 30], [10, 20, 60]],
                              [[0, 10, 60], [10, 20, 90]],
                              [[0, 20, 30], [10, 30, 60]]]).all()

    physical_boxes = sparse_block_mask.sparse_boxes(brick_grid,
                                                    return_logical_boxes=False)

    assert (physical_boxes == [[[0, 10, 20], [10, 20, 30]],
                               [[0, 10, 30], [10, 20, 60]],
                               [[0, 10, 60], [10, 20, 70]],
                               [[0, 20, 30], [10, 30, 60]]]).all()
Exemplo n.º 6
0
def test_sparse_boxes_NO_OFFSET():
    block_mask = np.zeros((5, 6, 7), dtype=bool)

    block_mask[0, 0, 0:5] = True

    block_mask[0, 1, 1:4] = True

    block_mask_resolution = 10

    # MASK STARTS AT ORIGIN (NO OFFSET)
    mask_box_start = np.array([0, 0, 0])
    mask_box_stop = mask_box_start + 10 * np.array(block_mask.shape)

    block_mask_box = (mask_box_start, mask_box_stop)
    brick_grid = Grid((10, 10, 30))

    sparse_block_mask = SparseBlockMask(block_mask, block_mask_box,
                                        block_mask_resolution)
    logical_boxes = sparse_block_mask.sparse_boxes(brick_grid,
                                                   return_logical_boxes=True)
    assert (logical_boxes == [[[0, 0, 0], [10, 10, 30]],
                              [[0, 0, 30], [10, 10, 60]],
                              [[0, 10, 0], [10, 20, 30]],
                              [[0, 10, 30], [10, 20, 60]]]).all()

    physical_boxes = sparse_block_mask.sparse_boxes(brick_grid,
                                                    return_logical_boxes=False)
    assert (physical_boxes == [[[0, 0, 0], [10, 10, 30]],
                               [[0, 0, 30], [10, 10, 50]],
                               [[0, 10, 10], [10, 20, 30]],
                               [[0, 10, 30], [10, 20, 40]]]).all()
Exemplo n.º 7
0
def test_generate_bricks_WITH_HALO():
    halo = 1
    halo_shape = np.array([1,1])
    grid = Grid( (10,20), (12,3), halo )
    bounding_box = np.array([(15,30), (95,290)])
    volume = np.random.randint(0,10, (100,300) )

    bricks, num_bricks = generate_bricks_from_volume_source( bounding_box, grid, partial(extract_subvol, volume), DebugClient() )
    bricks = bricks.compute()

    assert len(bricks) == 9 * 14 == num_bricks
    
    for brick in bricks:
        assert isinstance( brick, Brick )
        assert brick.logical_box.shape == (2,2)
        assert brick.physical_box.shape == (2,2)

        # logical_box must be exactly one block
        assert ((brick.logical_box[1] - brick.logical_box[0]) == grid.block_shape).all()
        
        # Must be grid-aligned
        assert ((brick.logical_box - grid.offset) % grid.block_shape == 0).all()
        
        # Physical == logical+halo, except for bounding-box edges
        assert (brick.physical_box == box_intersection( brick.logical_box + (-halo_shape, halo_shape), bounding_box )).all()
        
        # Volume shape must match
        assert (brick.volume.shape == brick.physical_box[1] - brick.physical_box[0]).all()
        
        # Volume data must match
        assert (brick.volume == extract_subvol( volume, brick.physical_box )).all()
Exemplo n.º 8
0
def test_generate_bricks():
    grid = Grid( (10,20), (12,3) )
    bounding_box = np.array([(15,30), (95,290)])
    volume = np.random.randint(0,10, (100,300) )

    bricks, num_bricks = generate_bricks_from_volume_source( bounding_box, grid, partial(extract_subvol, volume), DebugClient() )

    bricks = bricks.compute()
    assert len(bricks) == 9 * 14 == num_bricks
    
    for brick in bricks:
        assert isinstance( brick, Brick )
        assert brick.logical_box.shape == (2,2)
        assert brick.physical_box.shape == (2,2)

        # logical_box must be exactly one block
        assert ((brick.logical_box[1] - brick.logical_box[0]) == grid.block_shape).all()
        
        # Must be grid-aligned
        assert ((brick.logical_box - grid.offset) % grid.block_shape == 0).all()
        
        # Must not exceed bounding box
        assert (brick.physical_box == box_intersection( brick.logical_box, bounding_box )).all()
        
        # Volume shape must match
        assert (brick.volume.shape == brick.physical_box[1] - brick.physical_box[0]).all()
        
        # Volume data must match
        assert (brick.volume == extract_subvol( volume, brick.physical_box )).all()

        # __sizeof__ must include the volume
        assert sys.getsizeof(brick) > sys.getsizeof(brick.volume)
Exemplo n.º 9
0
    def translate(self, offset_zyx):
        """
        Translate all bricks by the given offset.
        Does not change the brick data, just the logical/physical boxes.
        
        Also, translates the bounding box and grid.
        """
        new_bounding_box = None
        if self.bounding_box is not None:
            new_bounding_box = self.bounding_box + offset_zyx

        new_grid = Grid(self.grid.block_shape, self.grid.offset + offset_zyx)

        def translate_brick(brick):
            # FIXME: This is needlessly inefficient for compressed bricks,
            #        since it uncompresses and recompresses the volume,
            #        but currently the Brick constructor doesn't allow me to
            #        provide the compressed form directly.
            return Brick(brick.logical_box + offset_zyx,
                         brick.physical_box + offset_zyx,
                         brick.volume,
                         location_id=tuple(brick.logical_box[0] //
                                           new_grid.block_shape),
                         compression=brick.compression)

        translated_bricks = self.bricks.map(translate_brick)

        return BrickWall(new_bounding_box, new_grid, translated_bricks,
                         self.num_bricks)
Exemplo n.º 10
0
    def _hotknife_destripe(self, bricked_slab_wall, slab_index):
        options = self.config["copygrayscale"]
        assert options["slab-axis"] == 'z', \
            "To use hotknife-destripe, processing slabs must be cut across the Z axis"

        wall_shape = self.output_service.bounding_box_zyx[1] - self.output_service.bounding_box_zyx[0]
        z_slice_shape = (1,) + (*wall_shape[1:],)
        z_slice_grid = Grid( z_slice_shape )

        z_slice_slab = bricked_slab_wall.realign_to_new_grid( z_slice_grid )
        z_slice_slab.persist_and_execute(f"Slab {slab_index}: Constructing slices of shape {z_slice_shape}", logger)

        # This assertion could be lifted if we adjust seams as needed before calling destripe(),
        # but for now I have no use-case for volumes that don't start at (0,0)
        assert (bricked_slab_wall.bounding_box[0, 1:] == (0,0)).all(), \
            "Input bounding box must start at YX == (0,0)"

        seams = options["hotknife-seams"]
        def destripe_brick(brick):
            assert brick.volume.shape[0] == 1
            adjusted_slice = destripe(brick.volume[0], seams)
            return Brick(brick.logical_box, brick.physical_box, adjusted_slice[None], location_id=brick.location_id)

        adjusted_bricks = z_slice_slab.bricks.map(destripe_brick)
        adjusted_wall = BrickWall( bricked_slab_wall.bounding_box,
                                   bricked_slab_wall.grid,
                                   adjusted_bricks )

        adjusted_wall.persist_and_execute(f"Slab {slab_index}: Destriping slices", logger)
        return adjusted_wall
Exemplo n.º 11
0
def main():
    # Hard-coded parameters
    prod = 'emdata4:8900'
    master = (prod, find_master(prod))
    master_seg = (*master, 'segmentation')

    # I accidentally corrupted the labelindex of bodies in this region
    patch_box = 20480 + np.array([[0, 0, 0], [1024, 1024, 1024]])

    with Timer("Fetching supervoxels", logger):
        boxes = boxes_from_grid(patch_box, Grid((64, 64, 6400)), clipped=True)
        sv_sets = compute_parallel(partial(_fetch_svs, master_seg),
                                   boxes,
                                   processes=32,
                                   ordered=False,
                                   leave_progress=True)
        svs = set(chain(*sv_sets)) - set([0])

    bodies = set(fetch_mapping(*master_seg, svs))

    with Timer(f"Repairing {len(bodies)} labelindexes", logger):
        compute_parallel(partial(_repair_index, master_seg),
                         bodies,
                         processes=32,
                         ordered=False,
                         leave_progress=True)

    print("DONE.")
Exemplo n.º 12
0
def test_boxes_from_grid_0():
    # Simple: bounding_box starts at zero, no offset
    grid = Grid( (10,20), (0,0) )
    bounding_box = [(0,0), (100,300)]
    boxes = np.array(list(boxes_from_grid(bounding_box, grid)))
    assert boxes.shape == (np.prod( np.array(bounding_box[1]) / grid.block_shape ), 2, 2)
    assert (boxes % grid.block_shape == 0).all()
    assert (boxes[:, 1, :] - boxes[:, 0, :] == grid.block_shape).all()
Exemplo n.º 13
0
def test_pad_brick_data_from_volume_source_NO_PADDING_NEEDED():
    source_volume = np.random.randint(0,10, (100,300) )
    logical_box = [(1,0), (11,20)]
    physical_box = [(6,10), (11, 15)]
    brick = Brick( logical_box, physical_box, extract_subvol(source_volume, physical_box) )
    
    padding_grid = Grid( (5,5), offset=(1,0) )
    padded_brick = pad_brick_data_from_volume_source( padding_grid, partial(extract_subvol, source_volume), brick )

    assert padded_brick is brick, "Expected to get the same brick back."
Exemplo n.º 14
0
def test_pad_brick_data_from_volume_source():
    source_volume = np.random.randint(0,10, (100,300) )
    logical_box = [(1,0), (11,20)]
    physical_box = [(3,8), (7, 13)]
    brick = Brick( logical_box, physical_box, extract_subvol(source_volume, physical_box) )
    
    padding_grid = Grid( (5,5), offset=(1,0) )
    padded_brick = pad_brick_data_from_volume_source( padding_grid, partial(extract_subvol, source_volume), brick )
    
    assert (padded_brick.logical_box == brick.logical_box).all()
    assert (padded_brick.physical_box == [(1,5), (11, 15)]).all()
    assert (padded_brick.volume == extract_subvol(source_volume, padded_brick.physical_box)).all()
Exemplo n.º 15
0
def test_realign_bricks_to_same_grid():
    """
    The realign function has a special optimization to
    avoid realigning bricks that are already aligned.
    """
    grid = Grid( (10,20), (12,3) )
    bounding_box = np.array([(15,30), (95,290)])
    def assert_if_called(box):
        assert False, ("Shouldn't get here, since the bricks were generated with lazy=True "
                       "and realignment shouldn't have attempted to split any bricks.")

    original_bricks, _num_bricks = generate_bricks_from_volume_source( bounding_box, grid, assert_if_called, DebugClient(), lazy=True )
    new_bricks = realign_bricks_to_new_grid(grid, original_bricks)
    
    import dask.bag
    assert isinstance(new_bricks, dask.bag.Bag)
    
    # If we attempt to realign to a different grid,
    # we'll get an assertion because it will have to call create_brick_volume, above.
    with pytest.raises(AssertionError):
        realign_bricks_to_new_grid(Grid((20,10)), original_bricks).compute()
Exemplo n.º 16
0
def test_boxes_from_grid_1():
    # Set a non-aligned bounding box
    grid = Grid( (10,20), (0,0) )
    bounding_box = np.array([(15,30), (95,290)])
    
    aligned_bounding_box = (  bounding_box[0]                          // grid.block_shape * grid.block_shape,
                             (bounding_box[1] + grid.block_shape - 1 ) // grid.block_shape * grid.block_shape )
    
    algined_bb_shape = aligned_bounding_box[1] - aligned_bounding_box[0]
    
    boxes = np.array(list(boxes_from_grid(bounding_box, grid)))
    assert boxes.shape == (np.prod( algined_bb_shape / grid.block_shape ), 2, 2)
    assert (boxes % grid.block_shape == 0).all()
    assert (boxes[:, 1, :] - boxes[:, 0, :] == grid.block_shape).all()
Exemplo n.º 17
0
def test_split_brick():
    grid = Grid( (10,20), (12,3) )
    volume = np.random.randint(0,10, (100,300) )
    
    # Test with the first brick in the grid
    physical_start = np.array(grid.offset)
    logical_start = physical_start // grid.block_shape * grid.block_shape
    logical_stop = logical_start + grid.block_shape
    
    physical_stop = logical_stop # Not always true, but happens to be true in this case.
    
    logical_box = np.array([logical_start, logical_stop])
    physical_box = np.array([physical_start, physical_stop])
    
    assert (logical_box == [(10,0), (20,20)]).all()
    assert (physical_box == [(12,3), (20,20)]).all()

    original_brick = Brick( logical_box, physical_box, extract_subvol(volume, physical_box) )

    # New grid scheme
    new_grid = Grid((2,10), (0,0))
    fragments = split_brick(new_grid, original_brick)
    boxes = list(box_as_tuple(frag.logical_box) for frag in fragments)
    
    assert boxes == [ # ((10, 0), (14, 10)),  # <--- Not present. These new boxes intersect with the original logical_box,
                      # ((10, 10), (14, 20)), # <--- but there is no physical data for them in the original brick.
                      ((12, 0), (14, 10)),
                      ((12, 10), (14, 20)),
                      ((14, 0), (16, 10)),
                      ((14, 10), (16, 20)),
                      ((16, 0), (18, 10)),
                      ((16, 10), (18, 20)),
                      ((18, 0), (20, 10)),
                      ((18, 10), (20, 20)) ]
    
    for frag in fragments:
        assert (frag.volume == extract_subvol(volume, frag.physical_box)).all()
Exemplo n.º 18
0
def test_boxes_from_grid_2():
    # Use a grid offset
    grid = Grid( (10,20), (2,3) )
    bounding_box = np.array([(5,10), (95,290)])
    
    aligned_bounding_box = (  bounding_box[0]                          // grid.block_shape * grid.block_shape,
                             (bounding_box[1] + grid.block_shape - 1 ) // grid.block_shape * grid.block_shape )
    
    aligned_bb_shape = aligned_bounding_box[1] - aligned_bounding_box[0]
    
    boxes = np.array(list(boxes_from_grid(bounding_box, grid)))
    assert boxes.shape == (np.prod( aligned_bb_shape / grid.block_shape ), 2, 2)
    
    # Boxes should be offset by grid.offset.
    assert ((boxes - grid.offset) % grid.block_shape == 0).all()
    assert (boxes[:, 1, :] - boxes[:, 0, :] == grid.block_shape).all()
Exemplo n.º 19
0
    def downsample(self, block_shape, method):
        """
        See util.downsample for available methods
        
        Note:
            If the downsampling block_shape does not
            perfectly divide into the brick's physical_box start or stop,
            voxels on the edge of the volume will be discarded before downsampling. 
        """
        assert block_shape[0] == block_shape[1] == block_shape[2], \
            "Currently, downsampling must be isotropic"

        factor = block_shape[0]

        def downsample_brick(brick):
            assert (brick.logical_box % factor == 0).all()

            # If the factor doesn't perfectly divide into
            # the brick's physical dimensions,
            # then chop off the edges until it does.
            if (brick.physical_box % factor != 0).any():
                clipped_box = round_box(brick.physical_box, factor, 'in')
                volume = extract_subvol(brick.volume,
                                        clipped_box - brick.physical_box[0])
            else:
                clipped_box = brick.physical_box
                volume = brick.volume

            downsampled_volume = downsample(volume, factor, method)
            downsampled_logical_box = brick.logical_box // factor
            downsampled_physical_box = clipped_box // factor

            return Brick(downsampled_logical_box,
                         downsampled_physical_box,
                         downsampled_volume,
                         compression=brick.compression)

        new_bounding_box = None
        if self.bounding_box is not None:
            new_bounding_box = self.bounding_box // factor
        new_grid = Grid(self.grid.block_shape // factor,
                        self.grid.offset // factor)
        new_bricks = self.bricks.map(downsample_brick)

        return BrickWall(new_bounding_box, new_grid, new_bricks,
                         self.num_bricks)
Exemplo n.º 20
0
    def init_brickwall(self):
        input_config = self.config["input"]
        mask_input_config = self.config["mask-input"]
        mgr_config = self.config["resource-manager"]
        options = self.config["sparseblockstats"]
        
        resource_mgr_client = ResourceManagerClient( mgr_config["server"], mgr_config["port"] )
        input_service = VolumeService.create_from_config( input_config, resource_mgr_client )
        mask_service = VolumeService.create_from_config( mask_input_config, resource_mgr_client )
        
        assert (input_service.preferred_message_shape == mask_service.preferred_message_shape).all(), \
            "This workflow assumes that the input and the mask-input use the same brick grid."

        assert not (input_service.preferred_message_shape % input_service.block_width).any(), \
            "input brick grid spacing must be a multipe of the input's block-width"
        assert not (mask_service.preferred_message_shape % mask_service.block_width).any(), \
            "mask brick grid spacing must be a multipe of the input's block-width"

        is_supervoxels = False
        if isinstance(mask_service.base_service, DvidVolumeService):
            is_supervoxels = mask_service.base_service.supervoxels

        # Load body list and eliminate duplicates
        subset_labels = load_body_list(options["mask-labels"], is_supervoxels)
        subset_labels = set(subset_labels)

        if not subset_labels:
            raise RuntimeError("You didn't specify any mask subset labels. "
                               "If you want to compute block stats for an entire segmentation volume, use the CopySegmentation workflow.")

        sbm = mask_service.sparse_block_mask_for_labels(subset_labels)
        if ((sbm.box[1] - sbm.box[0]) == 0).any():
            raise RuntimeError("Could not find sparse masks for any of the mask-labels")

        with Timer("Initializing BrickWall", logger):
            # Aim for 2 GB RDD partitions when loading segmentation
            GB = 2**30
            target_partition_size_voxels = 2 * GB // np.uint64().nbytes
            brickwall = BrickWall.from_volume_service(input_service, 0, None, self.client, target_partition_size_voxels, 0, sbm)

            # Pad if necessary to ensure that all fetched bricks are block-aligned
            block_shape = 3*(input_service.block_width,)
            brickwall = brickwall.fill_missing(input_service.get_subvolume, Grid(block_shape))

        return brickwall
Exemplo n.º 21
0
def test_extract_halos_subsets():
    halo = 1
    grid = Grid( (10,20), (0,0), halo )
    bounding_box = np.array([(15,30), (95,290)])
    volume = np.random.randint(0,10, (100,300) )

    bricks, _num_bricks = generate_bricks_from_volume_source( bounding_box, grid, partial(extract_subvol, volume), DebugClient() )

    def bricks_to_df(bricks):
        rows = []
        for brick in bricks:
            rows.append([*brick.physical_box.flat, brick.volume])
        df = pd.DataFrame(rows, columns=['y0', 'x0', 'y1', 'x1', 'brickvol'])
        df = df.sort_values(['y0', 'x0', 'y1', 'x1']).reset_index(drop=True)
        return df

    def check(all_halos, lower_halos, upper_halos):
        all_df = bricks_to_df(all_halos)
        lower_df = bricks_to_df(lower_halos)
        upper_df = bricks_to_df(upper_halos)
        
        combined_df = pd.concat([lower_df, upper_df], ignore_index=True).sort_values(['y0', 'x0', 'y1', 'x1'])
        combined_df.reset_index(drop=True, inplace=True)
    
        assert (all_df[['y0', 'x0', 'y1', 'x1']] == combined_df[['y0', 'x0', 'y1', 'x1']]).all().all()
        for a, b in zip(all_df['brickvol'].values, combined_df['brickvol'].values):
            assert (a == b).all()
    
    # Check that 'all' is the same as combining 'lower' and 'upper'
    all_outer_halos = extract_halos(bricks, grid, 'outer', 'all').compute()
    lower_outer_halos = extract_halos(bricks, grid, 'outer', 'lower').compute()
    upper_outer_halos = extract_halos(bricks, grid, 'outer', 'upper').compute()

    all_inner_halos = extract_halos(bricks, grid, 'inner', 'all').compute()
    lower_inner_halos = extract_halos(bricks, grid, 'inner', 'lower').compute()
    upper_inner_halos = extract_halos(bricks, grid, 'inner', 'upper').compute()

    check(all_outer_halos, lower_outer_halos, upper_outer_halos)
    check(all_inner_halos, lower_inner_halos, upper_inner_halos)
Exemplo n.º 22
0
def test_compression():
    vol_box = [(0,0,0), (100,100,120)]
    volume = np.random.randint(10, size=vol_box[1], dtype=np.uint64)
    
    for method in COMPRESSION_METHODS:
        wall = BrickWall.from_accessor_func(vol_box, Grid((64,64,128)), lambda box: extract_subvol(volume, box), compression=method)

        # Compress them all
        wall.bricks.map(Brick.compress).compute()
        
        def check_pickle(brick):
            pickle.dumps(brick)

        # Compress them all
        wall.bricks.map(check_pickle).compute()
        
        def check_brick(brick):
            assert (brick.volume.shape == (brick.physical_box[1] - brick.physical_box[0])).all()
            assert (brick.volume == extract_subvol(volume, brick.physical_box)).all()
        
        # Check them all (implicit decompression)
        wall.bricks.map(check_brick).compute()
Exemplo n.º 23
0
        def timed_fetch_blocks_from_box(box):
            """
            Fetch the blocks for a given box and return the time it took to fetch them.
            Do not bother decompressing the blocks or combining them into a single volume.
            """
            assert not (box % block_shape).any(
            ), "For this test, all requests must be block-aligned"
            block_boxes = list(boxes_from_grid(box, Grid(block_shape)))
            block_coords_xyz = np.array(block_boxes)[:, 0, ::-1] // block_shape
            block_coords_str = ','.join(map(str, block_coords_xyz.flat))

            voxel_count = np.prod(box[1] - box[0])

            session = default_dvid_session()
            url = f'{server}/api/node/{uuid}/{instance}/specificblocks?blocks={block_coords_str}'

            with resource_mgr_client.access_context(server, True, 1,
                                                    voxel_count):
                timestamp = datetime.now()
                with Timer() as timer:
                    r = session.get(url)

            r.raise_for_status()
            return timestamp, voxel_count, len(r.content), timer.seconds
Exemplo n.º 24
0
    def _consolidate_and_pad(self, slab_index, input_wall, scale,
                             output_service):
        """
        Consolidate (align), and pad the given BrickWall

        Args:
            scale: The pyramid scale of the data.

            output_service: The output_service to align to and pad from

        Returns a pre-executed and persisted BrickWall.
        """
        options = self.config["copysegmentation"]

        # We'll pad from previously-existing pyramid data until
        # we have full storage blocks, e.g. (64,64,64),
        # but not necessarily full bricks, e.g. (64,64,6400)
        output_writing_grid = Grid(output_service.preferred_message_shape)
        storage_block_width = output_service.block_width
        output_padding_grid = Grid(
            (storage_block_width, storage_block_width, storage_block_width),
            output_writing_grid.offset)
        output_accessor_func = partial(output_service.get_subvolume,
                                       scale=scale)

        with Timer(
                f"Slab {slab_index}: Scale {scale}: Shuffling bricks into alignment",
                logger):
            # Consolidate bricks to full-size, aligned blocks (shuffles data)
            realigned_wall = input_wall.realign_to_new_grid(
                output_writing_grid, output_accessor_func)
            del input_wall
            realigned_wall.persist_and_execute()

        input_mask_labels = self.input_mask_labels
        output_mask_labels = self.output_mask_labels

        # If no masks are involved, we merely need to pad the existing data on the edges.
        # (No need to fetch the entire output.)
        # Similarly, if scale > 0, then the masks were already applied and the input/output data was
        # already combined, we can simply write the (padded) downsampled data.
        if scale == 0 and (input_mask_labels or output_mask_labels
                           ) and not options["skip-masking-step"]:
            # If masks are involved, we must fetch the ALL the output
            # (unless skip-masking-step was given),
            # and select data from input or output according to the masks.
            output_service = self.output_service
            translation_offset_zyx = self.translation_offset_zyx

            def combine_with_output(input_brick):
                output_box = input_brick.physical_box + translation_offset_zyx
                output_vol = output_service.get_subvolume(output_box, scale=0)
                output_vol = np.asarray(output_vol, order='C')

                mask = None
                if input_mask_labels:
                    mask = mask_for_labels(input_brick.volume,
                                           input_mask_labels)

                if output_mask_labels:
                    output_mask = mask_for_labels(output_vol,
                                                  output_mask_labels)

                    if mask is None:
                        mask = output_mask
                    else:
                        mask[:] &= output_mask

                # Start with the complete output, then
                # change voxels that fall within both masks.
                output_vol[mask] = input_brick.volume[mask]
                input_brick.compress()
                return output_vol

            combined_wall = realigned_wall.map_brick_volumes(
                combine_with_output)
            combined_wall.persist_and_execute(
                f"Slab {slab_index}: Scale {scale}: Combining masked bricks",
                logger)
            realigned_wall = combined_wall

        padded_wall = realigned_wall.fill_missing(output_accessor_func,
                                                  output_padding_grid)
        del realigned_wall
        padded_wall.persist_and_execute(
            f"Slab {slab_index}: Scale {scale}: Padding", logger)
        return padded_wall
Exemplo n.º 25
0
    def _process_slab(self, scale, slab_fullres_box_zyx, slab_index, num_slabs, upscale_slab_wall, min_scale):
        options = self.config["copygrayscale"]
        pyramid_source = options["pyramid-source"]
        downsample_method = options["downsample-method"]
        output_service = self.output_service

        if scale < min_scale and pyramid_source == "copy":
            logger.info(f"Slab {slab_index}: Skipping scale {scale}")
            return

        slab_voxels = np.prod(slab_fullres_box_zyx[1] - slab_fullres_box_zyx[0]) // (2**scale)**3
        voxels_per_thread = slab_voxels // self.total_cores()
        partition_voxels = voxels_per_thread // 2
        logging.info(f"Slab {slab_index}: Aiming for partitions of {partition_voxels} voxels")

        if pyramid_source == "copy" or scale == 0:
            # Copy from input source
            bricked_slab_wall = BrickWall.from_volume_service(self.input_service, scale, slab_fullres_box_zyx, self.client, partition_voxels)
            bricked_slab_wall.persist_and_execute(f"Slab {slab_index}: Downloading scale {scale}", logger)
        else:
            # Downsample from previous scale
            bricked_slab_wall = upscale_slab_wall.downsample( (2,2,2), downsample_method )
            bricked_slab_wall.persist_and_execute(f"Slab {slab_index}: Downsampling to scale {scale}", logger)
            del upscale_slab_wall

        if scale == 0:
            bricked_slab_wall = self.adjust_contrast(bricked_slab_wall, slab_index)

        # Remap to output bricks
        with Timer(f"Slab {slab_index}: Realigning to output grid", logger):
            output_grid = Grid(output_service.preferred_message_shape)
            output_slab_wall = bricked_slab_wall.realign_to_new_grid( output_grid )

        if options["fill-blocks"]:
            # Pad from previously-existing pyramid data until
            # we have full storage blocks, e.g. (64,64,64),
            # but not necessarily full bricks, e.g. (64,64,6400)
            output_accessor_func = partial(output_service.get_subvolume, scale=scale)

            # But don't bother fetching real data for scale 0
            # the input slabs are already block-aligned, and the edges of each slice will be zeros anyway.
            if scale == 0:
                output_accessor_func = lambda _box: 0

            if isinstance( output_service.base_service, DvidVolumeService):
                # For DVID, we use minimum padding (just pad up to the
                # nearest block boundary, not the whole brick boundary).
                padding_grid = Grid( 3*(output_service.block_width,), output_grid.offset )
            else:
                padding_grid = output_slab_wall.grid

            output_slab_wall = output_slab_wall.fill_missing(output_accessor_func, padding_grid)
            output_slab_wall.persist_and_execute(f"Slab {slab_index}: Assembling scale {scale} bricks", logger)

        # Discard original bricks
        del bricked_slab_wall

        if scale < min_scale:
            logger.info(f"Slab {slab_index}: Not writing scale {scale}")
            return output_slab_wall

        def _write(brick):
            write_brick(output_service, scale, brick)

        with Timer(f"Slab {slab_index}: Writing scale {scale}"):
            output_slab_wall.bricks.map(_write).compute()

        return output_slab_wall
Exemplo n.º 26
0
    def execute(self):
        self._init_services()
        self._sanitize_config()

        options = self.config_data["options"]

        output_service = self.output_service
        logger.info(
            f"Output bounding box: {output_service.bounding_box_zyx[:,::-1]}")

        # Data is processed in Z-slabs
        slab_depth = options["slices-per-slab"]

        input_bb_zyx = self.input_service.bounding_box_zyx
        _, slice_start_y, slice_start_x = input_bb_zyx[0]

        slab_shape_zyx = input_bb_zyx[1] - input_bb_zyx[0]
        slab_shape_zyx[0] = slab_depth

        slice_shape_zyx = slab_shape_zyx.copy()
        slice_shape_zyx[0] = 1

        # This grid outlines the slabs -- each grid box is a full slab
        slab_grid = Grid(slab_shape_zyx, (0, slice_start_y, slice_start_x))
        slab_boxes = list(clipped_boxes_from_grid(input_bb_zyx, slab_grid))

        for slab_index, slab_box_zyx in enumerate(slab_boxes):
            # Contruct BrickWall from input bricks
            num_threads = num_worker_nodes() * cpus_per_worker()
            slab_voxels = np.prod(slab_box_zyx[1] - slab_box_zyx[0])
            voxels_per_thread = slab_voxels / num_threads

            bricked_slab_wall = BrickWall.from_volume_service(
                self.input_service, 0, slab_box_zyx, self.sc,
                voxels_per_thread / 2)

            # Force download
            bricked_slab_wall.persist_and_execute(
                f"Downloading slab {slab_index}/{len(slab_boxes)}: {slab_box_zyx[:,::-1]}",
                logger)

            # Remap to slice-sized "bricks"
            sliced_grid = Grid(slice_shape_zyx, offset=slab_box_zyx[0])
            sliced_slab_wall = bricked_slab_wall.realign_to_new_grid(
                sliced_grid)
            sliced_slab_wall.persist_and_execute(
                f"Assembling slab {slab_index}/{len(slab_boxes)} slices",
                logger)

            # Discard original bricks
            bricked_slab_wall.unpersist()
            del bricked_slab_wall

            def write_slice(brick):
                assert (brick.physical_box == brick.logical_box).all()
                output_service.write_subvolume(brick.volume,
                                               brick.physical_box[0])

            # Export to PNG or TIFF, etc. (automatic via slice path extension)
            with Timer() as timer:
                logger.info(f"Exporting slab {slab_index}/{len(slab_boxes)}",
                            extra={
                                "status":
                                f"Exporting {slab_index}/{len(slab_boxes)}"
                            })
                rt.foreach(write_slice, sliced_slab_wall.bricks)
            logger.info(
                f"Exporting slab {slab_index}/{len(slab_boxes)} took {timer.timedelta}",
                extra={"status": f"Done: {slab_index}/{len(slab_boxes)}"})

            # Discard slice data
            sliced_slab_wall.unpersist()
            del sliced_slab_wall

        logger.info(f"DONE exporting {len(slab_boxes)} slabs.",
                    extra={'status': "DONE"})
Exemplo n.º 27
0
    def execute(self):
        from pyspark import StorageLevel

        self._sanitize_config()
        config = self.config_data
        options = config["options"]

        resource_mgr_client = ResourceManagerClient(options["resource-server"],
                                                    options["resource-port"])
        total_cpus = 16 * num_worker_nodes()

        concurrent_threads = total_cpus
        if options["resource-server"]:
            concurrent_threads = options["resource-server-config"]["read_reqs"]
            if concurrent_threads > total_cpus:
                msg = "You're attempting to use the resource manager to constrain concurrency, but you "\
                      "aren't running with a large enough cluster to saturate the resource manager settings"
                raise RuntimeError(msg)

        # We instantiate a VolumeService as an easy way to plug in missing config values as necessary.
        # (We won't actually use it.)
        volume_service = VolumeService.create_from_config(
            config["input"], self.config_dir)

        server = volume_service.server
        uuid = volume_service.uuid
        instance = volume_service.instance_name
        block_shape = 3 * (volume_service.block_width, )

        def timed_fetch_blocks_from_box(box):
            """
            Fetch the blocks for a given box and return the time it took to fetch them.
            Do not bother decompressing the blocks or combining them into a single volume.
            """
            assert not (box % block_shape).any(
            ), "For this test, all requests must be block-aligned"
            block_boxes = list(boxes_from_grid(box, Grid(block_shape)))
            block_coords_xyz = np.array(block_boxes)[:, 0, ::-1] // block_shape
            block_coords_str = ','.join(map(str, block_coords_xyz.flat))

            voxel_count = np.prod(box[1] - box[0])

            session = default_dvid_session()
            url = f'{server}/api/node/{uuid}/{instance}/specificblocks?blocks={block_coords_str}'

            with resource_mgr_client.access_context(server, True, 1,
                                                    voxel_count):
                timestamp = datetime.now()
                with Timer() as timer:
                    r = session.get(url)

            r.raise_for_status()
            return timestamp, voxel_count, len(r.content), timer.seconds

        # This hash-related hackery is to ensure uniform partition lengths, which Spark is bad at by default.
        boxes = list(
            clipped_boxes_from_grid(
                volume_service.bounding_box_zyx,
                Grid(volume_service.preferred_message_shape)))
        indexed_boxes = list(map(rt.tuple_with_hash, (enumerate(boxes))))
        for i_box in indexed_boxes:
            i_box.set_hash(i_box[0])

        rdd_boxes = self.sc.parallelize(indexed_boxes).values()
        timestamps_voxels_sizes_times = rdd_boxes.map(
            timed_fetch_blocks_from_box)

        # The only reason I'm persisting this is to see the partition distribution in the log.
        rt.persist_and_execute(timestamps_voxels_sizes_times,
                               "Fetching blocks", logger,
                               StorageLevel.MEMORY_ONLY)  #@UndefinedVariable

        # Execute the workload
        timestamps, voxels, sizes, times = zip(
            *timestamps_voxels_sizes_times.collect())

        # Process the results
        self.dump_stats(timestamps, voxels, sizes, times, block_shape,
                        concurrent_threads)
Exemplo n.º 28
0
    def execute(self):
        self._init_services()
        options = self.config["labelmapcopy"]

        input_service = self.input_service
        output_service = self.output_service
        mgr_client = self.mgr_client

        record_labels = options["record-label-sets"]
        record_only = options["record-only"]
        check_existing = options["dont-overwrite-identical-blocks"]

        if record_only:
            assert options["min-scale"] == 0 and options["max-scale"] == 0, \
                ("In record-only mode, there is no reason to process any scales other than 0. "
                "Set min-scale and max-scale to 0.")

        def copy_box(box, scale):
            assert not record_only or scale == 0
            box = round_box(box, 64, 'out')
            box_shape = (box[1] - box[0])

            # Read input blocks
            with mgr_client.access_context(input_service.server, True, 1,
                                           np.prod(box_shape)):
                input_raw_blocks = fetch_labelmap_voxels(
                    *input_service.instance_triple,
                    box,
                    scale,
                    False,
                    input_service.supervoxels,
                    format='raw-response')

            # If we're just recording, parse and return
            if scale == 0 and record_only:
                _input_spans, input_labels = parse_labelarray_data(
                    input_raw_blocks, extract_labels=True)
                return list(set(chain(*input_labels.values())))

            # If not checking the output, just copy input to output
            if not check_existing:
                with mgr_client.access_context(output_service.server, False, 1,
                                               np.prod(box_shape)):
                    post_labelmap_blocks(*output_service.instance_triple,
                                         None,
                                         input_raw_blocks,
                                         scale,
                                         output_service.enable_downres,
                                         output_service.disable_indexing,
                                         False,
                                         is_raw=True)

                if scale == 0 and record_labels:
                    _input_spans, input_labels = parse_labelarray_data(
                        input_raw_blocks, extract_labels=True)
                    return list(set(chain(*input_labels.values())))
                return []

            # Read from output
            with mgr_client.access_context(output_service.server, True, 1,
                                           np.prod(box_shape)):
                output_raw_blocks = fetch_labelmap_voxels(
                    *output_service.instance_triple,
                    box,
                    scale,
                    False,
                    output_service.supervoxels,
                    format='raw-response')

            # If no differences, no need to parse
            if (input_raw_blocks == output_raw_blocks):
                return []

            input_spans = parse_labelarray_data(input_raw_blocks,
                                                extract_labels=False)
            output_spans = parse_labelarray_data(output_raw_blocks,
                                                 extract_labels=False)

            # Compare block IDs
            input_ids = set(input_spans.keys())
            output_ids = set(output_spans.keys())

            missing_from_output = input_ids - output_ids
            missing_from_input = output_ids - input_ids
            common_ids = input_ids & output_ids

            for block_id in missing_from_input:
                # FIXME: We should pass this in the result so it can be logged in the client, not the worker.
                logger.error(
                    f"Not overwriting block-id: {block_id}.  It doesn't exist in the input."
                )

            # Filter the input blocks so only the new/different ones remain
            filtered_input_list = []
            for block_id in missing_from_output:
                start, stop = input_spans[block_id]
                filtered_input_list.append(
                    (block_id, input_raw_blocks[start:stop]))

            filtered_output_list = []
            for block_id in common_ids:
                in_start, in_stop = input_spans[block_id]
                out_start, out_stop = output_spans[block_id]

                in_buf = input_raw_blocks[in_start:in_stop]
                out_buf = output_raw_blocks[out_start:out_stop]

                if in_buf != out_buf:
                    filtered_input_list.append((block_id, in_buf))
                    filtered_output_list.append((block_id, out_buf))

            # Sort filtered blocks so they appear in the same order in which we received them.
            filtered_input_list = sorted(
                filtered_input_list, key=lambda k_v: input_spans[k_v[0]][0])

            # Post them
            filtered_input_buf = b''.join(
                [buf for (_, buf) in filtered_input_list])
            with mgr_client.access_context(output_service.server, False, 1,
                                           np.prod(box_shape)):
                post_labelmap_blocks(*output_service.instance_triple,
                                     None,
                                     filtered_input_buf,
                                     scale,
                                     output_service.enable_downres,
                                     output_service.disable_indexing,
                                     False,
                                     is_raw=True)

            if scale == 0 and record_labels:
                filtered_output_buf = b''.join(
                    [buf for (_, buf) in filtered_output_list])

                _, filtered_input_labels = parse_labelarray_data(
                    filtered_input_buf, extract_labels=True)
                _, filtered_output_labels = parse_labelarray_data(
                    filtered_output_buf, extract_labels=True)

                input_set = set(chain(*filtered_input_labels.values()))
                output_set = set(chain(*filtered_output_labels.values()))
                return list(input_set - output_set)

            return []

        all_labels = set()
        try:
            for scale in range(options["min-scale"], 1 + options["max-scale"]):
                scaled_bounding_box = input_service.bounding_box_zyx // (2**
                                                                         scale)
                slab_boxes = clipped_boxes_from_grid(
                    scaled_bounding_box, options["slab-shape"][::-1])
                logger.info(f"Scale {scale}: Copying {len(slab_boxes)} slabs")
                for slab_index, slab_box in enumerate(slab_boxes):
                    brick_boxes = clipped_boxes_from_grid(
                        slab_box,
                        Grid(self.input_service.preferred_message_shape))
                    with Timer(
                            f"Scale {scale} slab {slab_index}: Copying {slab_box[:,::-1].tolist()} ({len(brick_boxes)} bricks)",
                            logger):
                        brick_labels = db.from_sequence(brick_boxes).map(
                            lambda box: copy_box(box, scale)).compute()
                        slab_labels = chain(*brick_labels)
                        all_labels |= set(slab_labels)
        finally:
            if record_labels:
                name = 'sv' if input_service.supervoxels else 'body'
                pd.Series(sorted(all_labels),
                          name=name).to_csv('recorded-labels.csv',
                                            index=False,
                                            header=True)
Exemplo n.º 29
0
    def from_volume_service(cls,
                            volume_service,
                            scale=0,
                            bounding_box_zyx=None,
                            client=None,
                            target_partition_size_voxels=None,
                            halo=0,
                            sparse_block_mask=None,
                            lazy=False,
                            compression=None):
        """
        Convenience constructor, initialized from a VolumeService object.
        
        Args:
            volume_service:
                An instance of a VolumeService
        
            bounding_box_zyx:
                (start, stop) Optional.
                Bounding box to restrict the region of fetched blocks, always
                specified in FULL-RES coordinates, even if you are passing scale > 0
                If not provided, volume_service.bounding_box_zyx is used.
     
            scale:
                Brick data will be fetched at this scale.
                (Note: The bricks' sizes will still be the the full volume_service.preferred_message_shape,
                       but the overall bounding-box of the BrickWall be scaled down.) 
     
            client:
                dask distributed.Client
     
            target_partition_size_voxels:
                Optional. If provided, the RDD partition lengths (i.e. the number of bricks per RDD partition)
                will be chosen to have (approximately) this many total voxels in each partition.
            
            halo:
                If provided, add a halo to the brick grid that will be used to fetch the data.
                Depending on your use-case and/or input source, this can be faster than applying
                a halo after-the-fact, which involves shuffling data across the cluster.
            
            sparse_block_mask:
                Instance of SparseBlockMask
            
            lazy:
                If True, the bricks' data will not be created until their 'volume' member is first accessed.
            
            compression:
                If provided, the brick volume data will be serialized/stored in a compressed format.
                See ``flyemflows.util.compressed_volume.COMPRESSION_METHODS``
        """
        grid = Grid(volume_service.preferred_message_shape, (0, 0, 0), halo)

        if bounding_box_zyx is None:
            bounding_box_zyx = volume_service.bounding_box_zyx

        bounding_box_zyx = np.asarray(bounding_box_zyx)

        if scale == 0:
            downsampled_box = bounding_box_zyx
        else:
            full_box = bounding_box_zyx
            downsampled_box = np.zeros((2, 3), dtype=int)
            downsampled_box[0] = full_box[0] // 2**scale  # round down

            # Proper downsampled bounding-box would round up here...
            #downsampled_box[1] = (full_box[1] + 2**scale - 1) // 2**scale

            # ...but some some services probably don't do that, so we'll
            # round down to avoid out-of-bounds errors for higher scales.
            downsampled_box[1] = full_box[1] // 2**scale

        sparse_boxes = None
        if sparse_block_mask is not None:
            # FIXME:
            #
            #   It would save a lot of time in generate_bricks_from_volume_source() if we implemented
            #   a faster way to filter boxes in SparseBlockMask,
            #   and called it here.  Right now, workflows that process data in "slabs"
            #   end up passing the same SparseBlockMask for every slab, which gets processed from
            #   scratch in generate_bricks_from_volume_source() to filter boxes for each slab's bounding box.
            assert isinstance(sparse_block_mask, SparseBlockMask)
            assert scale == 0, "FIXME: I don't think the sparse feature works with scales other than 0."
            sparse_boxes = sparse_block_mask.sparse_boxes(grid)
            if len(sparse_boxes) == 0:
                # Some workflows check for this message; if you change it, change those checks!
                raise RuntimeError("SparseBlockMask selects no blocks at all!")

        return BrickWall.from_accessor_func(
            downsampled_box,
            grid,
            lambda box: volume_service.get_subvolume(box, scale),
            client,
            target_partition_size_voxels,
            sparse_boxes,
            lazy,
            compression=compression)
Exemplo n.º 30
0
    def execute(self):
        self._init_service()
        mgr_client = self.mgr_client

        options = self.config["stitchedmeshes"]

        server, uuid, instance = self.input_service.base_service.instance_triple
        is_supervoxels = self.input_service.base_service.supervoxels
        bodies = load_body_list(options["bodies"], is_supervoxels)

        logger.info(f"Input is {len(bodies)} bodies")
        os.makedirs(options["output-directory"], exist_ok=True)

        def make_bricks(coord_and_block):
            coord_zyx, block_vol = coord_and_block
            logical_box = np.array((coord_zyx, coord_zyx + block_vol.shape))
            return Brick(logical_box,
                         logical_box,
                         block_vol,
                         location_id=(logical_box // 64))

        rescale = (2**options["scale"]) * options["extra-rescale"]

        def create_brick_mesh(brick):
            mesh = Mesh.from_binary_vol(brick.volume, brick.physical_box)
            if rescale != 1.0:
                mesh.vertices_zyx *= rescale
            return mesh

        def create_combined_mesh(meshes):
            mesh = concatenate_meshes(meshes, False)
            if options["stitch"]:
                mesh.stitch_adjacent_faces(drop_unused_vertices=True,
                                           drop_duplicate_faces=True)
            mesh.laplacian_smooth(options["smoothing-iterations"])
            mesh.simplify(options["decimation-fraction"], in_memory=True)
            return mesh

        in_flight = 0

        # Support synchronous testing with a fake 'as_completed' object
        if hasattr(self.client, 'DEBUG'):
            result_futures = as_completed_synchronous()
        else:
            result_futures = as_completed()

        def pop_result():
            nonlocal in_flight
            r = next(result_futures)
            in_flight -= 1

            try:
                return r.result()
            except Exception as ex:
                if options["error-mode"] == "raise":
                    raise
                body = int(r.key)
                return (body, 0, 'error', str(ex))

        USER = getpass.getuser()
        results = []
        try:
            for i, body in enumerate(bodies):
                logger.info(f"Mesh #{i}: Body {body}: Starting")

                def fetch_sparsevol():
                    with mgr_client.access_context(server, True, 1, 0):
                        ns = default_node_service(server, uuid,
                                                  'flyemflows-stitchedmeshes',
                                                  USER)
                        coords_zyx, blocks = ns.get_sparselabelmask(
                            body, instance, options["scale"], is_supervoxels)
                        return list(coords_zyx.copy()), list(blocks.copy())

                # This leaves all blocks and bricks in a single partition,
                # but we're about to do a shuffle anyway when the bricks are realigned.
                coords, blocks = delayed(fetch_sparsevol, nout=2)()
                coords, blocks = db.from_delayed(coords), db.from_delayed(
                    blocks)
                bricks = db.zip(coords, blocks).map(make_bricks)

                mesh_grid = Grid((64, 64, 64), halo=options["block-halo"])
                wall = BrickWall(None, (64, 64, 64), bricks)
                wall = wall.realign_to_new_grid(mesh_grid)

                brick_meshes = wall.bricks.map(create_brick_mesh)
                consolidated_brick_meshes = brick_meshes.repartition(1)
                combined_mesh = delayed(create_combined_mesh)(
                    consolidated_brick_meshes)

                def write_mesh(mesh):
                    output_dir = options["output-directory"]
                    fmt = options["format"]
                    output_path = f'{output_dir}/{body}.{fmt}'
                    mesh.serialize(output_path)
                    return (body, len(mesh.vertices_zyx), 'success', '')

                # We hide the body ID in the task name, so that we can record it in pop_result
                task = delayed(write_mesh)(combined_mesh,
                                           dask_key_name=f'{body}')
                result_futures.add(self.client.compute(task))
                in_flight += 1

                assert in_flight <= options["concurrent-bodies"]
                while in_flight == options["concurrent-bodies"]:
                    body, vertices, result, msg = pop_result()
                    if result == "error":
                        logger.warning(
                            f"Body {body}: Failed to generate mesh: {msg}")
                    results.append((body, vertices, result, msg))

            # Flush the last batch of tasks
            while in_flight > 0:
                body, vertices, result, msg = pop_result()
                if result == "error":
                    logger.warning(
                        f"Body {body}: Failed to generate mesh: {msg}")
                results.append((body, vertices, result, msg))
        finally:
            stats_df = pd.DataFrame(
                results, columns=['body', 'vertices', 'result', 'msg'])
            stats_df.to_csv('mesh-stats.csv', index=False, header=True)

            failed_df = stats_df.query("result != 'success'")
            if len(failed_df) > 0:
                logger.warning(
                    f"Failed to create meshes for {len(failed_df)} bodies.  See mesh-stats.csv"
                )
Exemplo n.º 31
0
def test_extract_halos():
    halo = 1
    grid = Grid( (10,20), (0,0), halo )
    bounding_box = np.array([(15,30), (95,290)])
    volume = np.random.randint(0,10, (100,300) )

    bricks, _num_bricks = generate_bricks_from_volume_source( bounding_box, grid, partial(extract_subvol, volume), DebugClient() )

    outer_halos = extract_halos(bricks, grid, 'outer').compute()
    inner_halos = extract_halos(bricks, grid, 'inner').compute()

    for halo_type, halo_bricks in zip(('outer', 'inner'), (outer_halos, inner_halos)):
        for hb in halo_bricks:
            # Even bricks on the edge of the volume
            # (which have smaller physical boxes than logical boxes)
            # return halos which correspond to the original
            # logical box (except for the halo axis).
            # (Each halo's "logical box" still corresponds to
            # the brick it was extracted from.)
            if halo_type == 'outer':
                assert (hb.physical_box[0] != hb.logical_box[0]).sum() == 1
                assert (hb.physical_box[1] != hb.logical_box[1]).sum() == 1
            else:
                assert (hb.physical_box != hb.logical_box).sum() == 1

            # The bounding box above is not grid aligned,
            # so blocks on the volume edge will only have partial data
            # (i.e. a smaller physical_box than logical_box)
            # However, halos are always produced to correspond to the logical_box size,
            # and zero-padded if necessary to achieve that size.
            # Therefore, only compare the actually valid portion of the halo here with the expected volume.
            # The other voxels should be zeros.
            valid_box = box_intersection(bounding_box, hb.physical_box)
            halo_vol = extract_subvol(hb.volume, valid_box - hb.physical_box[0])
            expected_vol = extract_subvol(volume, valid_box)
            assert (halo_vol == expected_vol).all()
            
            # Other voxels should be zero
            full_halo_vol = hb.volume.copy()
            overwrite_subvol(full_halo_vol, valid_box - hb.physical_box[0], 0)
            assert (full_halo_vol == 0).all()

    rows = []
    for hb in chain(outer_halos):
        rows.append([*hb.physical_box.flat, hb, 'outer'])

    for hb in chain(inner_halos):
        rows.append([*hb.physical_box.flat, hb, 'inner'])
    
    halo_df = pd.DataFrame(rows, columns=['y0', 'x0', 'y1', 'x1', 'brick', 'halo_type'])
    
    halo_counts = halo_df.groupby(['y0', 'x0', 'y1', 'x1']).size()

    # Since the bricks' physical boxes are all clipped to the overall bounding-box,
    # every outer halo should have a matching inner halo from a neighboring brick.
    # (This would not necessarily be true for Bricks that are initialized from a sparse mask.)
    assert halo_counts.min() == 2
    assert halo_counts.max() == 2
    
    for _box, halos_df in halo_df.groupby(['y0', 'x0', 'y1', 'x1']):
        assert set(halos_df['halo_type']) == set(['outer', 'inner'])

        brick0 = halos_df.iloc[0]['brick']
        brick1 = halos_df.iloc[1]['brick']
        assert (brick0.volume == brick1.volume).all()