Exemplo n.º 1
0
def test_assemble_brick_fragments():
    volume = np.random.randint(0,10, (100,300) )
    
    logical_box = np.array( [(10, 20), (20, 120)] )

    # Omit the first and last boxes, to prove that the final
    # physical box ends up smaller than the logical box.
    
    # box_0 = np.array( [(10,20), (20,40)] )
    box_1 = np.array( [(10,40), (20,60)] )
    box_2 = np.array( [(10,60), (20,80)] )
    box_3 = np.array( [(10,80), (20,100)] )
    # box_4 = np.array( [(10,100), (20,120)] )

    # frag_0 = Brick( logical_box, box_0, extract_subvol(volume, box_0) ) # omit
    frag_1 = Brick( logical_box, box_1, extract_subvol(volume, box_1) )
    frag_2 = Brick( logical_box, box_2, extract_subvol(volume, box_2) )
    frag_3 = Brick( logical_box, box_3, extract_subvol(volume, box_3) )
    # frag_4 = Brick( logical_box, box_4, extract_subvol(volume, box_4) ) # omit

    assembled_brick = assemble_brick_fragments( [frag_1, frag_2, frag_3] )
    assert (assembled_brick.logical_box == logical_box).all()
    assert (assembled_brick.physical_box == [box_1[0], box_3[1]] ).all()
    
    physical_shape = assembled_brick.physical_box[1] - assembled_brick.physical_box[0]
    assert (assembled_brick.volume.shape == physical_shape).all()
    assert (assembled_brick.volume == extract_subvol(volume, assembled_brick.physical_box)).all()
Exemplo n.º 2
0
def test_sparseblocksstats(setup_sparseblockstats, disable_auto_retry):
    template_dir, _config, input_volume, _mask_volume, _dvid_address, _repo_uuid = setup_sparseblockstats

    execution_dir, workflow = launch_flow(template_dir, 1)
    _final_config = workflow.config

    with h5py.File(f'{execution_dir}/block-statistics.h5', 'r') as f:
        assert f['stats'].dtype == np.dtype(list(BLOCK_STATS_DTYPES.items()))
        stats_df = pd.DataFrame(f['stats'][:])

    for row in stats_df.itertuples():
        corner = np.array((row.z, row.y, row.x))
        block_box = np.array([corner, corner + 64])
        block = extract_subvol(input_volume, block_box)
        assert (block == row.segment_id).sum() == row.count

    assert len(stats_df.query('z == 0 and y == 0 and x == 0')) == 0, \
        "Was not supposed to compute stats for the first block!"

    block_coords = stats_df[['z', 'y', 'x']].sort_values(
        ['z', 'y', 'x']).drop_duplicates(['z', 'y', 'x']).values
    assert (block_coords == 64*np.array([[0,0,1], [0,1,0], [0,1,1]])).all(), \
        "Did not cover blocks for the selected labels!"

    for row in stats_df.itertuples():
        corner = np.array((row.z, row.y, row.x))
        block_box = np.array([corner, corner + 64])
        block = extract_subvol(input_volume, block_box)
        assert (block == row.segment_id).sum() == row.count
def test_masksegmentation_resume(setup_dvid_segmentation_input,
                                 disable_auto_retry):
    template_dir, config, volume, dvid_address, repo_uuid, roi_mask_s5, _input_segmentation_name, output_segmentation_name = setup_dvid_segmentation_input

    brick_shape = config["input"]["geometry"]["message-block-shape"]
    batch_size = config["masksegmentation"]["batch-size"]

    # This is the total bricks in the volume, not necessarily
    # the total *processed* bricks, but it's close enough.
    total_bricks = np.ceil(np.prod(np.array(volume.shape) /
                                   brick_shape)).astype(int)
    total_batches = int(np.ceil(total_bricks / batch_size))

    # Skip over half of the original bricks.
    config["masksegmentation"]["resume-at"] = {
        "scale": 0,
        "batch-index": 1 + (total_batches // 2)
    }

    # re-dump config
    yaml = YAML()
    yaml.default_flow_style = False
    with open(f"{template_dir}/workflow.yaml", 'w') as f:
        yaml.dump(config, f)

    _execution_dir, workflow = launch_flow(template_dir, 1)
    final_config = workflow.config

    input_box_xyz = np.array(final_config['input']['geometry']['bounding-box'])
    input_box_zyx = input_box_xyz[:, ::-1]

    roi_mask = upsample(roi_mask_s5, 2**5)
    roi_mask = extract_subvol(roi_mask, input_box_zyx)

    masked_vol = extract_subvol(volume.copy(), input_box_zyx)
    masked_vol[roi_mask] = 0

    output_box_xyz = np.array(
        final_config['output']['geometry']['bounding-box'])
    output_box_zyx = output_box_xyz[:, ::-1]
    output_vol = fetch_labelmap_voxels(dvid_address,
                                       repo_uuid,
                                       output_segmentation_name,
                                       output_box_zyx,
                                       scale=0,
                                       supervoxels=True)

    #np.save('/tmp/original.npy', volume)
    #np.save('/tmp/output.npy', output_vol)

    # First part was untouched
    assert (output_vol[:128] == volume[:128]).all()

    # Last part was touched somewhere
    assert (output_vol[128:] != volume[128:]).any()
Exemplo n.º 4
0
def test_pad_brick_data_from_volume_source():
    source_volume = np.random.randint(0,10, (100,300) )
    logical_box = [(1,0), (11,20)]
    physical_box = [(3,8), (7, 13)]
    brick = Brick( logical_box, physical_box, extract_subvol(source_volume, physical_box) )
    
    padding_grid = Grid( (5,5), offset=(1,0) )
    padded_brick = pad_brick_data_from_volume_source( padding_grid, partial(extract_subvol, source_volume), brick )
    
    assert (padded_brick.logical_box == brick.logical_box).all()
    assert (padded_brick.physical_box == [(1,5), (11, 15)]).all()
    assert (padded_brick.volume == extract_subvol(source_volume, padded_brick.physical_box)).all()
Exemplo n.º 5
0
    def _extract_subbrick(brick, box):
        """
        Given a brick and the box to extract from it,
        return a new Brick with the same logical_box as the original brick,
        but only containing the subvolume corresponding to the given box.

        If necessary, the returned subbrick will be zero-padded to full
        the entirety of the given box.
        """
        box_clipped = box_intersection(box, brick.physical_box)
        if (box_clipped[1] - box_clipped[0] <= 0).any():
            return None

        subvol = extract_subvol(brick.volume,
                                box_clipped - brick.physical_box[0])
        full_subvol = zero_fill(subvol, box_clipped, box)

        # FIXME: Should we bother with location_id?
        #        (If we don't, realign operations won't work,
        #        but it's not clear what that would mean for halos anyway)
        subbrick = Brick(brick.logical_box,
                         box,
                         full_subvol,
                         compression=brick.compression)
        return subbrick
Exemplo n.º 6
0
def test_realign_bricks_to_new_grid_WITH_HALO():
    grid = Grid( (10,20), (12,3) )
    bounding_box = np.array([(15,30), (95,290)])
    volume = np.random.randint(0,10, (100,300) )

    original_bricks, _num_bricks = generate_bricks_from_volume_source( bounding_box, grid, partial(extract_subvol, volume), DebugClient() )

    halo = 1
    halo_shape = np.array([1,1])
    new_grid = Grid((20,10), (0,0), halo)
    new_bricks = realign_bricks_to_new_grid(new_grid, original_bricks).compute()

    new_logical_boxes = list(brick.logical_box for brick in new_bricks)

    assert len(new_bricks) == 5 * 26, f"{len(new_bricks)}" # from (0,30) -> (100,290)
    
    for logical_box, brick in zip(new_logical_boxes, new_bricks):
        assert isinstance( brick, Brick ), f"Got {type(brick)}"
        assert (brick.logical_box == logical_box).all()

        # logical_box must be exactly one block
        assert ((brick.logical_box[1] - brick.logical_box[0]) == new_grid.block_shape).all()
        
        # Must be grid-aligned
        assert ((brick.logical_box - new_grid.offset) % new_grid.block_shape == 0).all()
        
        # Should match logical_box+halo, except for edges
        assert (brick.physical_box == box_intersection( brick.logical_box + (-halo_shape, halo_shape), bounding_box )).all()
        
        # Volume shape must match
        assert (brick.volume.shape == brick.physical_box[1] - brick.physical_box[0]).all()
        
        # Volume data must match
        assert (brick.volume == extract_subvol( volume, brick.physical_box )).all()
Exemplo n.º 7
0
def test_split_brick_WITH_HALO():
    halo = 1
    grid = Grid( (10,20), (12,3), halo )
    volume = np.random.randint(0,10, (100,300) )
    
    # Test with the first brick in the grid
    physical_start = np.array(grid.offset)
    logical_start = physical_start // grid.block_shape * grid.block_shape
    logical_stop = logical_start + grid.block_shape
    
    physical_stop = logical_stop+halo # Not always true, but happens to be true in this case.
    
    logical_box = np.array([logical_start, logical_stop])
    physical_box = np.array([physical_start, physical_stop])
    
    assert (logical_box == [(10,0), (20,20)]).all()
    assert (physical_box == [(12,3), (21,21)]).all()

    original_brick = Brick( logical_box, physical_box, extract_subvol(volume, physical_box) )

    # New grid scheme
    new_grid = Grid((2,10), (0,0))
    
    try:
        _fragments = split_brick(new_grid, original_brick)
    except AssertionError:
        pass # Expected failure: Forbidden to split bricks that have a halo
    else:
        assert False, "Did not encounter the expected assertion.  split_brick() should fail for bricks that have a halo."
Exemplo n.º 8
0
def test_generate_bricks():
    grid = Grid( (10,20), (12,3) )
    bounding_box = np.array([(15,30), (95,290)])
    volume = np.random.randint(0,10, (100,300) )

    bricks, num_bricks = generate_bricks_from_volume_source( bounding_box, grid, partial(extract_subvol, volume), DebugClient() )

    bricks = bricks.compute()
    assert len(bricks) == 9 * 14 == num_bricks
    
    for brick in bricks:
        assert isinstance( brick, Brick )
        assert brick.logical_box.shape == (2,2)
        assert brick.physical_box.shape == (2,2)

        # logical_box must be exactly one block
        assert ((brick.logical_box[1] - brick.logical_box[0]) == grid.block_shape).all()
        
        # Must be grid-aligned
        assert ((brick.logical_box - grid.offset) % grid.block_shape == 0).all()
        
        # Must not exceed bounding box
        assert (brick.physical_box == box_intersection( brick.logical_box, bounding_box )).all()
        
        # Volume shape must match
        assert (brick.volume.shape == brick.physical_box[1] - brick.physical_box[0]).all()
        
        # Volume data must match
        assert (brick.volume == extract_subvol( volume, brick.physical_box )).all()

        # __sizeof__ must include the volume
        assert sys.getsizeof(brick) > sys.getsizeof(brick.volume)
Exemplo n.º 9
0
def test_generate_bricks_WITH_HALO():
    halo = 1
    halo_shape = np.array([1,1])
    grid = Grid( (10,20), (12,3), halo )
    bounding_box = np.array([(15,30), (95,290)])
    volume = np.random.randint(0,10, (100,300) )

    bricks, num_bricks = generate_bricks_from_volume_source( bounding_box, grid, partial(extract_subvol, volume), DebugClient() )
    bricks = bricks.compute()

    assert len(bricks) == 9 * 14 == num_bricks
    
    for brick in bricks:
        assert isinstance( brick, Brick )
        assert brick.logical_box.shape == (2,2)
        assert brick.physical_box.shape == (2,2)

        # logical_box must be exactly one block
        assert ((brick.logical_box[1] - brick.logical_box[0]) == grid.block_shape).all()
        
        # Must be grid-aligned
        assert ((brick.logical_box - grid.offset) % grid.block_shape == 0).all()
        
        # Physical == logical+halo, except for bounding-box edges
        assert (brick.physical_box == box_intersection( brick.logical_box + (-halo_shape, halo_shape), bounding_box )).all()
        
        # Volume shape must match
        assert (brick.volume.shape == brick.physical_box[1] - brick.physical_box[0]).all()
        
        # Volume data must match
        assert (brick.volume == extract_subvol( volume, brick.physical_box )).all()
Exemplo n.º 10
0
    def get_voxels( cls, server, uuid, instance_name, scale,
                    instance_type, is_labels,
                    volume_shape, offset,
                    resource_server="", resource_port=0, throttle="auto", supervoxels=False, node_service=None):

        if node_service is None:
            node_service = retrieve_node_service(server, uuid, resource_server, resource_port)

        if throttle == "auto":
            throttle = (resource_server == "")
        
        if instance_type in ('labelarray', 'labelmap'):
            # Labelarray data can be fetched very efficiently if the request is block-aligned
            # So, block-align the request no matter what.
            aligned_start = np.array(offset) // 64 * 64
            aligned_stop = (np.array(offset) + volume_shape + 64-1) // 64 * 64
            aligned_shape = aligned_stop - aligned_start
            aligned_volume = node_service.get_labelarray_blocks3D( instance_name, aligned_shape, aligned_start, throttle, scale, supervoxels )
            requested_box_within_aligned = ( offset - aligned_start,
                                             offset - aligned_start + volume_shape )
            return extract_subvol(aligned_volume, requested_box_within_aligned )
                
        elif is_labels:
            assert scale == 0, "FIXME: get_labels3D() doesn't support scale yet!"
            # labelblk (or non-aligned labelarray) must be fetched the old-fashioned way
            return node_service.get_labels3D( instance_name, volume_shape, offset, throttle, compress=True, supervoxels=supervoxels )
        else:
            assert scale == 0, "FIXME: get_gray3D() doesn't support scale yet!"
            return node_service.get_gray3D( instance_name, volume_shape, offset, throttle, compress=False )
def test_copysegmentation_from_hdf5_to_dvid_input_mask(
        setup_hdf5_segmentation_input, disable_auto_retry):
    template_dir, config, volume, dvid_address, repo_uuid, _output_segmentation_name = setup_hdf5_segmentation_input

    # make sure we get a fresh output
    output_segmentation_name = 'copyseg-with-input-mask'
    config["output"]["dvid"]["segmentation-name"] = output_segmentation_name

    # Select only even IDs
    all_labels = pd.unique(volume.reshape(-1))
    even_labels = all_labels[all_labels % 2 == 0]
    config["copysegmentation"]["input-mask-labels"] = even_labels.tolist()

    # Add an offset, which is added to both the input volume AND the mask labels
    offset = 2000
    config["copysegmentation"]["add-offset-to-ids"] = offset

    input_box = np.array(config["input"]["geometry"]["bounding-box"])[:, ::-1]
    volume = np.where((volume % 2) == 0, volume + offset, 0)
    expected_vol = np.zeros_like(volume)
    overwrite_subvol(expected_vol, input_box,
                     extract_subvol(volume, input_box))

    setup = template_dir, config, expected_vol, dvid_address, repo_uuid, output_segmentation_name
    _box_zyx, _expected_vol, _output_vol = _run_to_dvid(setup)
def test_copysegmentation_from_dvid_to_dvid_input_mask(
        setup_dvid_segmentation_input, disable_auto_retry):
    template_dir, config, volume, dvid_address, repo_uuid, _output_segmentation_name = setup_dvid_segmentation_input

    # make sure we get a fresh output
    output_segmentation_name = 'copyseg-with-input-mask-from-dvid'
    config["output"]["dvid"]["segmentation-name"] = output_segmentation_name

    # Add an offset, which is added to both the input volume AND the mask labels
    offset = 2000
    config["copysegmentation"]["add-offset-to-ids"] = offset

    # Select some labels that don't extend throughout the whole volume
    selected_labels = pd.unique(volume[150, 64:128, 64:128].reshape(-1))
    assert 0 not in selected_labels
    selected_coords = np.array(
        mask_for_labels(volume, selected_labels).nonzero()).transpose()
    selected_box = np.array(
        [selected_coords.min(axis=0), 1 + selected_coords.max(axis=0)])

    input_box = np.array(config["input"]["geometry"]["bounding-box"])[:, ::-1]

    subvol_box = box_intersection(input_box, selected_box)
    selected_subvol = extract_subvol(volume, subvol_box).copy()
    selected_subvol = apply_mask_for_labels(selected_subvol, selected_labels)
    config["copysegmentation"]["input-mask-labels"] = selected_labels.tolist()

    selected_subvol = np.where(selected_subvol, selected_subvol + offset, 0)
    expected_vol = np.zeros(volume.shape, np.uint64)
    overwrite_subvol(expected_vol, subvol_box, selected_subvol)

    setup = template_dir, config, expected_vol, dvid_address, repo_uuid, output_segmentation_name
    _box_zyx, _expected_vol, _output_vol = _run_to_dvid(setup)
def _run_to_dvid(setup, check_scale_0=True):
    template_dir, config, volume, dvid_address, repo_uuid, output_segmentation_name = setup

    yaml = YAML()
    yaml.default_flow_style = False

    # re-dump config in case it's been changed by a specific test
    with open(f"{template_dir}/workflow.yaml", 'w') as f:
        yaml.dump(config, f)

    _execution_dir, workflow = launch_flow(template_dir, 1)
    final_config = workflow.config

    input_box_xyz = np.array(final_config['input']['geometry']['bounding-box'])
    input_box_zyx = input_box_xyz[:, ::-1]

    expected_vol = extract_subvol(volume, input_box_zyx)

    output_box_xyz = np.array(
        final_config['output']['geometry']['bounding-box'])
    output_box_zyx = output_box_xyz[:, ::-1]
    output_vol = fetch_raw(dvid_address,
                           repo_uuid,
                           output_segmentation_name,
                           output_box_zyx,
                           dtype=np.uint64)

    np.save('/tmp/output_vol.npy', output_vol)
    np.save('/tmp/expected_vol.npy', expected_vol)

    if check_scale_0:
        assert (output_vol == expected_vol).all(), \
            "Written vol does not match expected"

    return input_box_zyx, expected_vol, output_vol
def test_dvid_volume_service_grayscale(setup_dvid_repo, disable_auto_retry):
    server, uuid = setup_dvid_repo
    instance_name = 'test-dvs-grayscale'

    volume = np.random.randint(100, size=(256, 192, 128), dtype=np.uint8)
    max_scale = 2
    voxel_dimensions = [4.0, 4.0, 32.0]

    config_text = textwrap.dedent(f"""\
        dvid:
          server: {server}
          uuid: {uuid}
          grayscale-name: {instance_name}
          
          create-if-necessary: true
          creation-settings:
            max-scale: {max_scale}
            voxel-size: {voxel_dimensions}
       
        geometry:
          bounding-box: [[0,0,0], {list(volume.shape[::-1])}]
    """)

    yaml = YAML()
    with StringIO(config_text) as f:
        volume_config = yaml.load(f)

    assert instance_name not in fetch_repo_instances(server, uuid)

    service = VolumeService.create_from_config(volume_config)

    repo_instances = fetch_repo_instances(server, uuid)

    info = fetch_instance_info(server, uuid, instance_name)
    assert info["Extended"]["VoxelSize"] == voxel_dimensions

    scaled_volumes = {}
    for scale in range(max_scale + 1):
        if scale == 0:
            assert instance_name in repo_instances
            assert repo_instances[instance_name] == 'uint8blk'
        else:
            assert f"{instance_name}_{scale}" in repo_instances
            assert repo_instances[f"{instance_name}_{scale}"] == 'uint8blk'

        vol = downsample(volume, 2**scale,
                         'label')  # label downsampling is easier to test with
        aligned_shape = (np.ceil(np.array(vol.shape) / 64) * 64).astype(int)
        aligned_vol = np.zeros(aligned_shape, np.uint8)
        overwrite_subvol(aligned_vol, [(0, 0, 0), aligned_shape], aligned_vol)
        service.write_subvolume(aligned_vol, (0, 0, 0), scale)
        scaled_volumes[scale] = aligned_vol

    box = np.array([[40, 80, 40], [240, 160, 100]])
    for scale in range(max_scale + 1):
        scaled_box = box // 2**scale
        vol = service.get_subvolume(scaled_box, scale)
        assert (vol == extract_subvol(scaled_volumes[scale], scaled_box)).all()
Exemplo n.º 15
0
def test_labelindex(labelmap_setup):
    dvid_server, dvid_repo, _merge_table_path, _mapping_path, _supervoxel_vol = labelmap_setup

    # Need an unlocked node to test these posts
    uuid = post_branch(dvid_server, dvid_repo, 'test_labelindex',
                       'test_labelindex')
    instance_info = (dvid_server, uuid, 'segmentation-scratch')

    # Write some random data
    sv = 99
    vol = sv * np.random.randint(2, size=(128, 128, 128), dtype=np.uint64)
    offset = np.array((64, 64, 64))

    # DVID will generate the index.
    post_labelmap_voxels(*instance_info, offset, vol)

    # Compute labelindex table from scratch
    rows = []
    for block_coord in ndrange(offset, offset + vol.shape, (64, 64, 64)):
        block_coord = np.array(block_coord)
        block_box = np.array((block_coord, block_coord + 64))
        block = extract_subvol(vol, block_box - offset)

        count = (block == sv).sum()
        rows.append([*block_coord, sv, count])

    index_df = pd.DataFrame(rows, columns=['z', 'y', 'x', 'sv', 'count'])

    # Check DVID's generated labelindex table against expected
    labelindex_tuple = fetch_labelindex(*instance_info, sv, format='pandas')
    assert labelindex_tuple.label == sv

    labelindex_tuple.blocks.sort_values(['z', 'y', 'x', 'sv'], inplace=True)
    labelindex_tuple.blocks.reset_index(drop=True, inplace=True)
    assert (labelindex_tuple.blocks == index_df).all().all()

    # Check our protobuf against DVID's
    index_tuple = PandasLabelIndex(index_df, sv, 1,
                                   datetime.datetime.now().isoformat(),
                                   'someuser')
    labelindex = create_labelindex(index_tuple)

    # Since labelindex block entries are not required to be sorted,
    # dvid might return them in a different order.
    # Hence this comparison function which sorts them first.
    def compare_proto_blocks(left, right):
        left_blocks = sorted(left.blocks.items())
        right_blocks = sorted(right.blocks.items())
        return left_blocks == right_blocks

    dvid_labelindex = fetch_labelindex(*instance_info, sv, format='protobuf')
    assert compare_proto_blocks(labelindex, dvid_labelindex)

    # Check post/get roundtrip
    post_labelindex(*instance_info, sv, labelindex)
    dvid_labelindex = fetch_labelindex(*instance_info, sv, format='protobuf')
    assert compare_proto_blocks(labelindex, dvid_labelindex)
Exemplo n.º 16
0
def test_pad_brick_data_from_volume_source_NO_PADDING_NEEDED():
    source_volume = np.random.randint(0,10, (100,300) )
    logical_box = [(1,0), (11,20)]
    physical_box = [(6,10), (11, 15)]
    brick = Brick( logical_box, physical_box, extract_subvol(source_volume, physical_box) )
    
    padding_grid = Grid( (5,5), offset=(1,0) )
    padded_brick = pad_brick_data_from_volume_source( padding_grid, partial(extract_subvol, source_volume), brick )

    assert padded_brick is brick, "Expected to get the same brick back."
Exemplo n.º 17
0
def split_brick(new_grid, original_brick):
    """
    Given a single brick and a new grid to which its data should be redistributed,
    split the brick into pieces, indexed by their NEW grid locations.
    
    The brick fragments are returned as Bricks themselves, but with relatively
    small volume and physical_box members.
    
    Note: It is probably a mistake to call this function for Bricks which have
          a larger physical_box than logical_box, so that is currently forbidden.
          (It would work here, but it implies that you will end up with some voxels
          represented multiple times in a given RDD of Bricks, with undefined results
          as to which ones are kept after you consolidate them into a new alignment.
          
          However, the reverse is permitted, i.e. it is permitted for the DESTINATION
          grid to use a halo, in which case some pixels in the original brick will be
          duplicated to multiple destinations.
    
    Returns: [(box,Brick), (box, Brick), ....],
            where each Brick is a fragment (to be assembled later into the new grid's bricks),
            and 'box' is the logical_box of the Brick into which this fragment should be assembled.
    """
    new_logical_boxes_and_fragments = []
    
    # Forbid out-of-bounds physical_boxes. (See note above.)
    assert ((original_brick.physical_box[0] >= original_brick.logical_box[0]).all() and
            (original_brick.physical_box[1] <= original_brick.logical_box[1]).all())
    
    # Iterate over the new boxes that intersect with the original brick
    for destination_box in boxes_from_grid(original_brick.physical_box, new_grid, include_halos=True):
        # Physical intersection of original with new
        split_box = box_intersection(destination_box, original_brick.physical_box)
        
        # Extract portion of original volume data that belongs to this new box
        split_box_internal = split_box - original_brick.physical_box[0]
        fragment_vol = extract_subvol(original_brick.volume, split_box_internal)

        # Subtract out halo to get logical_box
        new_logical_box = destination_box - (-new_grid.halo_shape, new_grid.halo_shape)

        fragment_brick = Brick(new_logical_box, split_box, fragment_vol)
        fragment_brick.compress()

        # Append key (the new_logical_box, but with a special type and hash,
        # to avoid bad collisions with the default spark hash function),
        # and new brick fragment, to be assembled into the final brick in a later stage.
        key = rt.tuple_with_hash( box_as_tuple(new_logical_box) )
        key.set_hash( hash(tuple(new_logical_box[0] / new_grid.block_shape)) )
        new_logical_boxes_and_fragments.append( (key, fragment_brick) )

    return new_logical_boxes_and_fragments
Exemplo n.º 18
0
def test_split_brick():
    grid = Grid( (10,20), (12,3) )
    volume = np.random.randint(0,10, (100,300) )
    
    # Test with the first brick in the grid
    physical_start = np.array(grid.offset)
    logical_start = physical_start // grid.block_shape * grid.block_shape
    logical_stop = logical_start + grid.block_shape
    
    physical_stop = logical_stop # Not always true, but happens to be true in this case.
    
    logical_box = np.array([logical_start, logical_stop])
    physical_box = np.array([physical_start, physical_stop])
    
    assert (logical_box == [(10,0), (20,20)]).all()
    assert (physical_box == [(12,3), (20,20)]).all()

    original_brick = Brick( logical_box, physical_box, extract_subvol(volume, physical_box) )

    # New grid scheme
    new_grid = Grid((2,10), (0,0))
    fragments = split_brick(new_grid, original_brick)
    boxes = list(box_as_tuple(frag.logical_box) for frag in fragments)
    
    assert boxes == [ # ((10, 0), (14, 10)),  # <--- Not present. These new boxes intersect with the original logical_box,
                      # ((10, 10), (14, 20)), # <--- but there is no physical data for them in the original brick.
                      ((12, 0), (14, 10)),
                      ((12, 10), (14, 20)),
                      ((14, 0), (16, 10)),
                      ((14, 10), (16, 20)),
                      ((16, 0), (18, 10)),
                      ((16, 10), (18, 20)),
                      ((18, 0), (20, 10)),
                      ((18, 10), (20, 20)) ]
    
    for frag in fragments:
        assert (frag.volume == extract_subvol(volume, frag.physical_box)).all()
Exemplo n.º 19
0
    def _execute_scale(self, scale, starting_batch, mask_s5, mask_box_s5):
        options = self.config["masksegmentation"]
        block_width = self.output_service.block_width

        def scale_box(box, scale):
            # Scale down, then round up to the nearest multiple of the block width
            box = np.ceil(box / 2**scale).astype(np.int32)
            return round_box(box, block_width)

        # bounding box of the segmentation at the current scale.
        bounding_box = scale_box(self.input_service.bounding_box_zyx, scale)

        # Don't make bricks that are wider than the bounding box at this scale
        brick_shape = np.minimum(self.input_service.preferred_message_shape,
                                 bounding_box[1])
        assert not (brick_shape % block_width).any()

        brick_boxes = boxes_from_grid(bounding_box, brick_shape, clipped=True)

        with Timer(f"Scale {scale}: Preparing bricks", logger):
            boxes_and_masks = []
            for box in brick_boxes:
                mask_block_box = ((box // 2**(5 - scale)) - mask_box_s5[0])
                mask_block_box = mask_block_box.astype(
                    np.int32)  # necessary when scale is > 5
                mask_block_s5 = np.zeros(box_shape(mask_block_box), bool)
                mask_block_s5 = extract_subvol(mask_s5, mask_block_box)
                if mask_block_s5.any():
                    boxes_and_masks.append((box, mask_block_s5))

        batches = [*iter_batches(boxes_and_masks, options["batch-size"])]

        if starting_batch == 0:
            logger.info(f"Scale {scale}: Processing {len(batches)} batches")
        else:
            logger.info(
                f"Scale {scale}: Processing {len(batches) - starting_batch} "
                f"remaining batches from {len(batches)} original batches")

            assert starting_batch < len(batches), \
                f"Can't start at batch {starting_batch}; there are only {len(batches)} in total."
            batches = batches[starting_batch:]

        for batch_index, batch_boxes_and_masks in enumerate(
                batches, start=starting_batch):
            with Timer(f"Scale {scale}: Batch {batch_index:02d}", logger):
                self._execute_batch(scale, batch_index, batch_boxes_and_masks)
Exemplo n.º 20
0
        def downsample_brick(brick):
            assert (brick.logical_box % factor == 0).all()

            # If the factor doesn't perfectly divide into
            # the brick's physical dimensions,
            # then chop off the edges until it does.
            if (brick.physical_box % factor != 0).any():
                clipped_box = round_box(brick.physical_box, factor, 'in')
                volume = extract_subvol(brick.volume,
                                        clipped_box - brick.physical_box[0])
            else:
                clipped_box = brick.physical_box
                volume = brick.volume

            downsampled_volume = downsample(volume, factor, method)
            downsampled_logical_box = brick.logical_box // factor
            downsampled_physical_box = clipped_box // factor

            return Brick(downsampled_logical_box,
                         downsampled_physical_box,
                         downsampled_volume,
                         compression=brick.compression)
Exemplo n.º 21
0
def test_compression():
    vol_box = [(0,0,0), (100,100,120)]
    volume = np.random.randint(10, size=vol_box[1], dtype=np.uint64)
    
    for method in COMPRESSION_METHODS:
        wall = BrickWall.from_accessor_func(vol_box, Grid((64,64,128)), lambda box: extract_subvol(volume, box), compression=method)

        # Compress them all
        wall.bricks.map(Brick.compress).compute()
        
        def check_pickle(brick):
            pickle.dumps(brick)

        # Compress them all
        wall.bricks.map(check_pickle).compute()
        
        def check_brick(brick):
            assert (brick.volume.shape == (brick.physical_box[1] - brick.physical_box[0])).all()
            assert (brick.volume == extract_subvol(volume, brick.physical_box)).all()
        
        # Check them all (implicit decompression)
        wall.bricks.map(check_brick).compute()
def mitos_in_neighborhood(mito_roi_source, neighborhood_origin_xyz,
                          neighborhood_id, mito_res_scale_diff):
    """
    Determine how many non-trivial mito objects overlap with the given "neighborhood object",
    and return a table of their IDs and sizes.

    1. Download the neighborhood mask for the given neighborhood_id.
    2. Erode the neighborhood mask by 1 px (see note in the comment above).
    3. Fetch the mito segmentation for the voxels within the neighborhood.
    4. Fetch (from dvid) the sizes of each mito object.
    5. Filter out the mitos that are smaller than the minimum size that is
       actually used in our published mito analyses.
    6. Just for additional info, determine how many connected components
       are formed by the mito objects.
    7. Return the mito IDs, sizses, and CC info as a DataFrame.
    """
    # The neighborhood segmentation source
    protocol, url = mito_roi_source.split('://')[-2:]
    server, uuid, instance = url.split('/')
    server = f'{protocol}://{server}'

    origin_zyx = np.array(neighborhood_origin_xyz[::-1])
    box = [origin_zyx - RADIUS, 1 + origin_zyx + RADIUS]

    # Align box to the analysis scale before scaling it.
    box = round_box(box, (2**ANALYSIS_SCALE))

    # Scale box
    box //= (2**ANALYSIS_SCALE)

    neighborhood_seg = fetch_labelmap_voxels(server,
                                             uuid,
                                             instance,
                                             box,
                                             scale=ANALYSIS_SCALE)
    neighborhood_mask = (neighborhood_seg == neighborhood_id)

    # This is equivalent to a 1-px erosion
    # See note above for why we do this.
    neighborhood_mask ^= binary_edge_mask(neighborhood_mask, 'inner')

    mito_seg = fetch_labelmap_voxels(*MITO_SEG,
                                     box,
                                     supervoxels=True,
                                     scale=ANALYSIS_SCALE -
                                     mito_res_scale_diff)
    assert neighborhood_mask.shape == mito_seg.shape
    mito_seg = np.where(neighborhood_mask, mito_seg, 0)

    # The mito segmentation includes little scraps and slivers
    # that were filtered out of the "real" mito set.
    # Filter those scraps out of our results here.
    mito_ids = set(pd.unique(mito_seg.ravel())) - {0}
    mito_sizes = fetch_sizes(*MITO_SEG, [*mito_ids], supervoxels=True)
    mito_sizes = mito_sizes.rename_axis('mito')
    mito_sizes *= (2**mito_res_scale_diff)**3

    # This is our main result: mito IDs (and their sizes)
    mito_sizes = mito_sizes.loc[mito_sizes >= MIN_MITO_SIZE]

    # Just for extra info, group the mitos we found into connected components.
    mito_mask = mask_for_labels(mito_seg, mito_sizes.index)
    mito_box = compute_nonzero_box(mito_mask)
    mito_mask = extract_subvol(mito_mask, mito_box)
    mito_seg = extract_subvol(mito_seg, mito_box)
    mito_cc = label(mito_mask, connectivity=1)
    ct = contingency_table(mito_seg, mito_cc).reset_index()
    ct = ct.rename(columns={
        'left': 'mito',
        'right': 'cc',
        'voxel_count': 'cc_size'
    })
    ct = ct.set_index('mito')
    mito_sizes = pd.DataFrame(mito_sizes).merge(ct,
                                                'left',
                                                left_index=True,
                                                right_index=True)
    return mito_sizes
def test_dvid_volume_service_labelmap(setup_dvid_repo, random_segmentation,
                                      disable_auto_retry):
    server, uuid = setup_dvid_repo
    instance_name = 'test-dvs-labelmap'

    volume = random_segmentation[:256, :192, :128]
    max_scale = 2
    voxel_dimensions = [4.0, 4.0, 32.0]

    config_text = textwrap.dedent(f"""\
        dvid:
          server: {server}
          uuid: {uuid}
          segmentation-name: {instance_name}
          supervoxels: true
          
          create-if-necessary: true
          creation-settings:
            max-scale: {max_scale}
            voxel-size: {voxel_dimensions}
       
        geometry:
          bounding-box: [[0,0,0], {list(volume.shape[::-1])}]
          message-block-shape: [64,64,64]
    """)

    yaml = YAML()
    with StringIO(config_text) as f:
        volume_config = yaml.load(f)

    assert instance_name not in fetch_repo_instances(server, uuid)

    service = VolumeService.create_from_config(volume_config)

    repo_instances = fetch_repo_instances(server, uuid)

    assert instance_name in repo_instances
    assert repo_instances[instance_name] == 'labelmap'

    info = fetch_instance_info(server, uuid, instance_name)
    assert info["Extended"]["VoxelSize"] == voxel_dimensions

    scaled_volumes = {}
    for scale in range(max_scale + 1):
        vol = downsample(volume, 2**scale, 'label')
        aligned_shape = (np.ceil(np.array(vol.shape) / 64) * 64).astype(int)
        aligned_vol = np.zeros(aligned_shape, np.uint64)
        overwrite_subvol(aligned_vol, [(0, 0, 0), vol.shape], vol)

        service.write_subvolume(aligned_vol, (0, 0, 0), scale)
        scaled_volumes[scale] = aligned_vol

    box = np.array([[40, 80, 40], [240, 160, 100]])
    for scale in range(max_scale + 1):
        scaled_box = box // 2**scale
        vol = service.get_subvolume(scaled_box, scale)
        assert (vol == extract_subvol(scaled_volumes[scale], scaled_box)).all()

    #
    # Check sparse coords function
    #
    labels = list({*pd.unique(volume.reshape(-1))} - {0})
    brick_coords_df = service.sparse_brick_coords_for_labels(labels)

    assert brick_coords_df.columns.tolist() == ['z', 'y', 'x', 'label']
    assert set(brick_coords_df['label'].values) == set(labels), \
        "Some labels were missing from the sparse brick coords!"

    def ndi(shape):
        return np.indices(shape).reshape(len(shape), -1).transpose()

    expected_df = pd.DataFrame(ndi(volume.shape), columns=[*'zyx'])

    expected_df['label'] = volume.reshape(-1)
    expected_df['z'] //= 64
    expected_df['y'] //= 64
    expected_df['x'] //= 64
    expected_df = expected_df.drop_duplicates()
    expected_df['z'] *= 64
    expected_df['y'] *= 64
    expected_df['x'] *= 64

    expected_df = expected_df.query('label != 0')

    expected_df.sort_values(['z', 'y', 'x', 'label'], inplace=True)
    brick_coords_df.sort_values(['z', 'y', 'x', 'label'], inplace=True)

    expected_df.reset_index(drop=True, inplace=True)
    brick_coords_df.reset_index(drop=True, inplace=True)

    assert expected_df.shape == brick_coords_df.shape
    assert (brick_coords_df == expected_df).all().all()

    #
    # Check sample_labels()
    #
    points = [np.random.randint(d, size=(10, )) for d in vol.shape]
    points = np.transpose(points)
    labels = service.sample_labels(points)
    assert (labels == volume[(*points.transpose(), )]).all()
def test_masksegmentation_basic(setup_dvid_segmentation_input, invert_mask,
                                roi_dilation, disable_auto_retry):
    template_dir, config, volume, dvid_address, repo_uuid, roi_mask_s5, input_segmentation_name, output_segmentation_name = setup_dvid_segmentation_input

    if invert_mask:
        roi_mask_s5 = ~roi_mask_s5

    config["masksegmentation"]["invert-mask"] = invert_mask
    config["masksegmentation"]["dilate-roi"] = roi_dilation

    # re-dump config
    yaml = YAML()
    yaml.default_flow_style = False
    with open(f"{template_dir}/workflow.yaml", 'w') as f:
        yaml.dump(config, f)

    execution_dir, workflow = launch_flow(template_dir, 1)
    final_config = workflow.config

    input_box_xyz = np.array(final_config['input']['geometry']['bounding-box'])
    input_box_zyx = input_box_xyz[:, ::-1]

    roi_mask = upsample(roi_mask_s5, 2**5)
    roi_mask = extract_subvol(roi_mask, input_box_zyx)

    expected_vol = extract_subvol(volume.copy(), input_box_zyx)
    expected_vol[roi_mask] = 0

    output_box_xyz = np.array(
        final_config['output']['geometry']['bounding-box'])
    output_box_zyx = output_box_xyz[:, ::-1]
    output_vol = fetch_labelmap_voxels(dvid_address,
                                       repo_uuid,
                                       output_segmentation_name,
                                       output_box_zyx,
                                       scale=0,
                                       supervoxels=True)

    # Create a copy of the volume that contains only the voxels we removed
    erased_vol = volume.copy()
    erased_vol[~roi_mask] = 0

    if EXPORT_DEBUG_FILES:
        original_vol = fetch_labelmap_voxels(dvid_address,
                                             repo_uuid,
                                             input_segmentation_name,
                                             output_box_zyx,
                                             scale=0,
                                             supervoxels=True)
        original_agglo_vol = fetch_labelmap_voxels(dvid_address,
                                                   repo_uuid,
                                                   input_segmentation_name,
                                                   output_box_zyx,
                                                   scale=0)
        output_agglo_vol = fetch_labelmap_voxels(dvid_address,
                                                 repo_uuid,
                                                 output_segmentation_name,
                                                 output_box_zyx,
                                                 scale=0)
        np.save('/tmp/original-svs.npy', original_vol)
        np.save('/tmp/original-agglo.npy', original_agglo_vol)
        np.save('/tmp/output.npy', output_vol)
        np.save('/tmp/output-agglo.npy', output_agglo_vol)
        np.save('/tmp/expected.npy', expected_vol)
        np.save('/tmp/erased.npy', erased_vol)

        shutil.copyfile(f'{execution_dir}/roi-mask.h5', '/tmp/roi-mask.h5')
        if roi_dilation:
            shutil.copyfile(f'{execution_dir}/dilated-roi-mask.h5',
                            '/tmp/dilated-roi-mask.h5')
        if invert_mask:
            shutil.copyfile(f'{execution_dir}/segmentation-mask.h5',
                            '/tmp/segmentation-mask.h5')
        shutil.copyfile(f'{execution_dir}/final-mask.h5', '/tmp/final-mask.h5')

    if roi_dilation > 0:
        # FIXME: We don't yet verify voxel-accuracy of ROI dilation.
        return

    assert (output_vol == expected_vol).all(), \
        "Written vol does not match expected"

    scaled_expected_vol = expected_vol
    for scale in range(1, 1 + MAX_SCALE):
        scaled_expected_vol = downsample(scaled_expected_vol, 2,
                                         'labels-numba')
        scaled_output_vol = fetch_labelmap_voxels(dvid_address,
                                                  repo_uuid,
                                                  output_segmentation_name,
                                                  output_box_zyx // 2**scale,
                                                  scale=scale,
                                                  supervoxels=True)

        if EXPORT_DEBUG_FILES:
            np.save(f'/tmp/expected-{scale}.npy', scaled_expected_vol)
            np.save(f'/tmp/expected-{scale}.npy', scaled_expected_vol)
            np.save(f'/tmp/output-{scale}.npy', scaled_output_vol)

        if scale <= 5:
            assert (scaled_output_vol == scaled_expected_vol).all(), \
                f"Written vol does not match expected at scale {scale}"
        else:
            # For scale 6 and 7, some blocks are not even changed,
            # but that means we would be comparing DVID's label
            # downsampling method to our method ('labels-numba').
            # The two don't necessarily give identical results in the case of 'ties',
            # so we'll just verify that the nonzero voxels match, at least.
            assert ((scaled_output_vol == 0) == (scaled_expected_vol == 0)).all(), \
                f"Written vol does not match expected at scale {scale}"

    block_stats_path = f'{execution_dir}/erased-block-statistics.h5'
    with h5py.File(block_stats_path, 'r') as f:
        stats_df = pd.DataFrame(f['stats'][:])

    #
    # Check the exported block statistics
    #
    stats_cols = [*BLOCK_STATS_DTYPES.keys()]
    assert stats_df.columns.tolist() == stats_cols
    stats_df = stats_df.sort_values(stats_cols).reset_index()

    expected_stats_df = block_stats_for_volume((64, 64, 64), erased_vol,
                                               input_box_zyx)
    expected_stats_df = expected_stats_df.sort_values(stats_cols).reset_index()

    assert len(stats_df) == len(expected_stats_df)
    assert (stats_df == expected_stats_df).all().all()

    #
    # Try updating the labelindexes
    #
    src_info = (dvid_address, repo_uuid, input_segmentation_name)
    dest_info = (dvid_address, repo_uuid, output_segmentation_name)
    with switch_cwd(execution_dir):
        erase_from_labelindexes(src_info,
                                dest_info,
                                block_stats_path,
                                batch_size=10,
                                threads=4)

    # Verify deleted supervoxels
    assert os.path.exists(f'{execution_dir}/deleted-supervoxels.csv')
    deleted_svs = set(
        pd.read_csv(f'{execution_dir}/deleted-supervoxels.csv')['sv'])

    orig_svs = {*pd.unique(volume.reshape(-1))} - {0}
    remaining_svs = {*pd.unique(expected_vol.reshape(-1))} - {0}
    expected_deleted_svs = orig_svs - remaining_svs
    assert deleted_svs == expected_deleted_svs

    # Verify remaining sizes
    expected_sv_counts = (pd.Series(
        expected_vol.reshape(-1),
        name='sv').value_counts().drop(0).sort_index().rename('count'))

    index_dfs = []
    for body in np.unique(fetch_mapping(*dest_info, remaining_svs)):
        index_df = fetch_labelindex(*dest_info, body, format='pandas').blocks
        index_dfs.append(index_df)

    sv_counts = (pd.concat(index_dfs, ignore_index=True)[[
        'sv', 'count'
    ]].groupby('sv')['count'].sum().sort_index())
    assert set(sv_counts.index.values) == set(expected_sv_counts.index.values)
    assert (sv_counts == expected_sv_counts).all(), \
        pd.DataFrame({'stored_count': sv_counts, 'expected_count': expected_sv_counts}).query('stored_count != expected_count')

    # Verify mapping
    # Deleted supervoxels exist in the mapping, but they map to 0.
    assert (fetch_mapping(*dest_info, [*deleted_svs]) == 0).all()

    # Remaining supervoxels still map to their original bodies
    assert (fetch_mapping(*dest_info, [*remaining_svs]) == fetch_mapping(
        *src_info, [*remaining_svs])).all()
Exemplo n.º 25
0
    def init_boxes(self, volume_service, subset_labels, roi):
        sbm = None
        if roi:
            base_service = volume_service.base_service
            assert isinstance(base_service, DvidVolumeService), \
                "Can't specify an ROI unless you're using a dvid input"

            assert isinstance(volume_service, (ScaledVolumeService, DvidVolumeService)), \
                "The 'roi' option doesn't support adapters other than 'rescale-level'"
            scale = 0
            if isinstance(volume_service, ScaledVolumeService):
                scale = volume_service.scale_delta
                assert scale <= 5, \
                    "The 'roi' option doesn't support volumes downscaled beyond level 5"

            server, uuid, _seg_instance = base_service.instance_triple

            brick_shape = volume_service.preferred_message_shape
            assert not (brick_shape % 2**(5-scale)).any(), \
                "If using an ROI, select a brick shape that is divisible by 32"

            seg_box = volume_service.bounding_box_zyx
            seg_box = round_box(seg_box, brick_shape)
            seg_box_s5 = seg_box // 2**(5 - scale)

            with Timer(f"Fetching mask for ROI '{roi}'", logger):
                roi_mask_s5, roi_box_s5 = fetch_roi(server,
                                                    uuid,
                                                    roi,
                                                    format='mask')

            # Restrict to input bounding box
            clipped_roi_box_s5 = box_intersection(seg_box_s5, roi_box_s5)
            clipped_roi_mask_s5 = extract_subvol(
                roi_mask_s5, clipped_roi_box_s5 - roi_box_s5[0])

            # Align to brick grid
            aligned_roi_box_s5 = round_box(clipped_roi_box_s5,
                                           brick_shape // 2**5, 'out')
            padding = (aligned_roi_box_s5 - clipped_roi_box_s5)
            padding[0] *= -1
            aligned_roi_mask_s5 = np.pad(clipped_roi_mask_s5,
                                         padding.transpose())

            # At the service native scale
            aligned_roi_box = (2**(5 - scale) * aligned_roi_box_s5)
            logger.info(
                f"Brick-aligned ROI '{roi}' has bounding-box {aligned_roi_box[:, ::-1].tolist()}"
            )

            # SBM 'full-res' corresponds to the input service voxels, not necessarily scale-0.
            sbm = SparseBlockMask.create_from_highres_mask(
                aligned_roi_mask_s5, 2**(5 - scale), aligned_roi_box,
                brick_shape)
        elif subset_labels:
            try:
                sbm = volume_service.sparse_block_mask_for_labels(
                    [*subset_labels])
                if ((sbm.box[1] - sbm.box[0]) == 0).any():
                    raise RuntimeError(
                        "Could not find sparse masks for any of the subset-labels"
                    )
            except NotImplementedError:
                sbm = None

        if sbm is None:
            boxes = boxes_from_grid(volume_service.bounding_box_zyx,
                                    volume_service.preferred_message_shape,
                                    clipped=True)
            return np.array([*boxes])
        else:
            boxes = sbm.sparse_boxes(brick_shape)
            boxes = np.array(boxes)

            # Clip
            boxes[:, 0, :] = np.maximum(volume_service.bounding_box_zyx[0],
                                        boxes[:, 0, :])
            boxes[:, 1, :] = np.minimum(volume_service.bounding_box_zyx[1],
                                        boxes[:, 1, :])
            assert (boxes[:,0,:] < boxes[:,1,:]).all(), \
                "After cropping to input volume, some bricks disappeared."

            return boxes
Exemplo n.º 26
0
    def _init_masks(self):
        options = self.config["copysegmentation"]
        self.sbm = None

        if options["sparse-block-mask"]:
            # In theory, we could just take the intersection of the masks involved.
            # But I'm too lazy to think about that right now.
            assert not options["input-mask-labels"] and not options["output-mask-labels"], \
                "Not Implemented: Can't use sparse-block-mask in conjunction with input-mask-labels or output-mask-labels"

            with open(options["sparse-block-mask"], 'rb') as f:
                self.sbm = pickle.load(f)

        is_supervoxels = False
        if isinstance(self.input_service.base_service, DvidVolumeService):
            is_supervoxels = self.input_service.base_service.supervoxels

        output_mask_labels = load_body_list(options["output-mask-labels"],
                                            is_supervoxels)
        self.output_mask_labels = set(output_mask_labels)

        output_sbm = None
        if len(output_mask_labels) > 0:
            if (self.output_service.preferred_message_shape !=
                    self.input_service.preferred_message_shape).any():
                logger.warn(
                    "Not using output mask to reduce data fetching: Your input service and output service don't have the same brick shape"
                )
            elif (self.output_service.bounding_box_zyx !=
                  self.input_service.bounding_box_zyx).any():
                logger.warn(
                    "Not using output mask to reduce data fetching: Your input service and output service don't have the same bounding box"
                )
            else:
                try:
                    output_sbm = self.output_service.sparse_block_mask_for_labels(
                        output_mask_labels)
                except NotImplementedError:
                    output_sbm = None

        input_mask_labels = load_body_list(options["input-mask-labels"],
                                           is_supervoxels)

        input_sbm = None
        if len(input_mask_labels) > 0:
            try:
                input_sbm = self.input_service.sparse_block_mask_for_labels(
                    input_mask_labels)
            except NotImplementedError:
                input_sbm = None

        if self.sbm is not None:
            pass
        elif input_sbm is None:
            self.sbm = output_sbm
        elif output_sbm is None:
            self.sbm = input_sbm
        else:
            assert (input_sbm.resolution == output_sbm.resolution).all(), \
                "FIXME: At the moment, you can't supply both an input mask and an output "\
                "mask unless the input and output sources use the same brick shape (message-block-shape)"

            final_box = box_intersection(input_sbm.box, output_sbm.box)

            input_box = (input_sbm.box - final_box) // input_sbm.resolution
            input_mask = extract_subvol(input_sbm.lowres_mask, input_box)

            output_box = (output_sbm - final_box) // output_sbm.resolution
            output_mask = extract_subvol(output_sbm.lowres_mask, output_box)

            assert input_mask.shape == output_mask.shape
            assert input_mask.dtype == output_mask.dtype == np.bool
            final_mask = (input_mask & output_mask)

            self.sbm = SparseBlockMask(final_mask, final_box,
                                       input_sbm.resolution)

        id_offset = options["add-offset-to-ids"]
        if id_offset != 0:
            id_offset = options["add-offset-to-ids"]
            input_mask_labels = np.asarray(input_mask_labels, np.uint64)
            input_mask_labels += id_offset
        self.input_mask_labels = set(input_mask_labels)
Exemplo n.º 27
0
        def overwrite_box(box, lowres_mask):
            assert lowres_mask.dtype == np.bool
            assert not (box[0] % block_width).any()
            assert lowres_mask.any(), \
                "This function is supposed to be called on bricks that actually need masking"

            # Crop box and mask to only include the extent of the masked voxels
            nonzero_mask_box = compute_nonzero_box(lowres_mask)
            nonzero_mask_box = round_box(nonzero_mask_box,
                                         (block_width * 2**scale) // 2**5)
            lowres_mask = extract_subvol(lowres_mask, nonzero_mask_box)

            box = box[0] + (nonzero_mask_box * 2**(5 - scale))
            box = box.astype(np.int32)

            if scale <= 5:
                mask = upsample(lowres_mask, 2**(5 - scale))
            else:
                # Downsample, but favor UNmasked voxels
                mask = ~view_as_blocks(~lowres_mask, 3 *
                                       (2**(scale - 5), )).any(axis=(3, 4, 5))

            old_seg = input_service.get_subvolume(box, scale)

            assert mask.dtype == np.bool
            new_seg = old_seg.copy()
            new_seg[mask] = 0

            if (new_seg == old_seg).all():
                # It's possible that there are no changed voxels, but only
                # at high scales where the masked voxels were downsampled away.
                #
                # So if the original downscale pyramids are perfect,
                # then the following assumption ought to hold.
                #
                # But I'm commenting it out in case the DVID pyramid at scale 5
                # isn't pixel-perfect in some places.
                #
                # assert scale > 5

                return None

            def post_changed_blocks(old_seg, new_seg):
                # If we post the whole volume, we'll be overwriting blocks that haven't changed,
                # wasting space in DVID (for duplicate blocks stored in the child uuid).
                # Instead, we need to only post the blocks that have changed.

                # So, can't just do this:
                # output_service.write_subvolume(new_seg, box[0], scale)

                seg_diff = (old_seg != new_seg)
                block_diff = view_as_blocks(seg_diff, 3 * (block_width, ))

                changed_block_map = block_diff.any(axis=(3, 4, 5)).nonzero()
                changed_block_corners = box[0] + np.transpose(
                    changed_block_map) * block_width

                changed_blocks = view_as_blocks(
                    new_seg, 3 * (block_width, ))[changed_block_map]
                encoded_blocks = encode_labelarray_blocks(
                    changed_block_corners, changed_blocks)

                mgr = output_service.resource_manager_client
                with mgr.access_context(output_service.server, True, 1,
                                        changed_blocks.nbytes):
                    post_labelmap_blocks(*output_service.instance_triple,
                                         None,
                                         encoded_blocks,
                                         scale,
                                         downres=False,
                                         noindexing=True,
                                         throttle=False,
                                         is_raw=True)

            assert not (box % block_width).any(), \
                "Should not write partial blocks"

            post_changed_blocks(old_seg, new_seg)
            del new_seg

            if scale != 0:
                # Don't collect statistics for higher scales
                return None

            erased_seg = old_seg.copy()
            erased_seg[~mask] = 0

            block_shape = 3 * (input_service.block_width, )
            erased_stats_df = block_stats_for_volume(block_shape, erased_seg,
                                                     box)
            return erased_stats_df
Exemplo n.º 28
0
    def execute(self):
        scale = self._init_service()

        options = self.config["roistats"]
        server = self.input_service.base_service.server
        uuid = self.input_service.base_service.uuid
        rois = options["rois"]

        bodies = load_body_list(options["subset-bodies"],
                                self.input_service.base_service.supervoxels)
        assert len(
            bodies) > 0, "Please provide a list of subset-bodies to process"

        bounding_box = self.input_service.bounding_box_zyx
        assert not (bounding_box % 2**(5-scale)).any(), \
            "Make sure your configured bounding box is divisible by 32px at scale 0"
        brick_shape = self.input_service.preferred_message_shape
        assert not (brick_shape % 2**(5-scale)).any(), \
            "Make sure your preferred message shape divides into 32px blocks at scale 0"

        with Timer("Fetching ROI volume", logger):
            roi_vol_s5, roi_box_s5, overlaps = fetch_combined_roi_volume(
                server, uuid, rois, False, bounding_box // 2**(5 - scale))

        if len(overlaps) > 0:
            logger.warn(
                f"Some of your ROIs overlap!  Here's an incomplete list:\n{overlaps}"
            )

        with Timer("Determining brick set", logger):
            brick_coords_df = self.input_service.sparse_brick_coords_for_labels(
                bodies)
            np.save('brick-coords.npy',
                    brick_coords_df.to_records(index=False))

        with Timer(f"Preparing bricks", logger):
            boxes_and_roi_bricks = []
            for coord, labels in brick_coords_df.groupby(
                [*'zyx'])['label'].agg(tuple).iteritems():
                box = np.array((coord, coord))
                box[1] += brick_shape
                box = box_intersection(box, bounding_box)

                roi_brick_box = ((box // 2**(5 - scale)) - roi_box_s5[0])
                roi_brick_s5 = extract_subvol(roi_vol_s5, roi_brick_box)
                boxes_and_roi_bricks.append((box, roi_brick_s5, labels))

        logger.info(
            f"Prepared {len(boxes_and_roi_bricks)} bricks of shape {(*brick_shape[::-1],)}"
        )

        all_stats = []
        batches = [*iter_batches(boxes_and_roi_bricks, options["batch-size"])]
        logger.info(f"Processing {len(batches)} batches")
        for i, batch_boxes_and_bricks in enumerate(batches):
            with Timer(f"Batch {i:02d}", logger):
                batch_stats = self._execute_batch(scale,
                                                  batch_boxes_and_bricks)
                all_stats.append(batch_stats)

        all_stats = pd.concat(all_stats, ignore_index=True)
        all_stats = all_stats.groupby(['body', 'roi_id'],
                                      as_index=False)['voxels'].sum()

        roi_names = pd.Series(["<none>", *rois], name='roi')
        roi_names.index.name = 'roi_id'
        all_stats = all_stats.merge(roi_names, 'left', on='roi_id')
        all_stats = all_stats.sort_values(['body', 'roi_id'])

        if scale > 0:
            all_stats.rename(columns={'voxels': f'voxels_s{scale}'},
                             inplace=True)

        with Timer(f"Writing stats ({len(all_stats)} rows)", logger):
            np.save('roi-stats.npy', all_stats.to_records(index=False))
            all_stats.to_csv('roi-stats.csv', index=False, header=True)
Exemplo n.º 29
0
def split_brick(new_grid, original_brick):
    """
    Given a single brick and a new grid to which its data should be redistributed,
    split the brick into pieces, indexed by their NEW grid locations.

    The brick fragments are returned as Bricks themselves, but with relatively
    small volume and physical_box members.

    Note: It is probably a mistake to call this function for Bricks which have
          a larger physical_box than logical_box, so that is currently forbidden.
          (It would work here, but it implies that you will end up with some voxels
          represented multiple times in a given RDD of Bricks, with undefined results
          as to which ones are kept after you consolidate them into a new alignment.

          However, the reverse is permitted, i.e. it is permitted for the DESTINATION
          grid to use a halo, in which case some pixels in the original brick will be
          duplicated to multiple destinations.

    Returns: [Brick, Brick, ....],
            where each Brick is a fragment (to be assembled later into the new grid's bricks),
    """
    fragments = []

    # Forbid out-of-bounds physical_boxes. (See note above.)
    assert ((original_brick.physical_box[0] >= original_brick.logical_box[0]).all() and
            (original_brick.physical_box[1] <= original_brick.logical_box[1]).all()), \
                f"{original_brick.physical_box[:,::-1].tolist()} extends outside of {original_brick.logical_box[:,::-1].tolist()}"

    ## FIXME:
    ## If the brick lies completely within a single grid square for the destination block,
    ## Then boxes_from_grid() will only return a single box and the brick's volume will remain unchanged.
    ## In that case, it's probably best not to uncompress/recompress the brick.
    ## Just create a new brick with the same compressed data and a different logical_box.

    # Iterate over the new boxes that intersect with the original brick
    for destination_box in boxes_from_grid(original_brick.physical_box,
                                           new_grid,
                                           include_halos=True):
        # Physical intersection of original with new
        split_box = box_intersection(destination_box,
                                     original_brick.physical_box)

        # Extract portion of original volume data that belongs to this new box
        split_box_internal = split_box - original_brick.physical_box[0]
        fragment_vol = extract_subvol(original_brick.volume,
                                      split_box_internal)

        # Subtract out halo to get logical_box
        new_logical_box = destination_box - (-new_grid.halo_shape,
                                             new_grid.halo_shape)

        new_location_id = tuple(new_logical_box[0] // new_grid.block_shape)

        fragment_brick = Brick(new_logical_box,
                               split_box,
                               fragment_vol,
                               location_id=new_location_id,
                               compression=original_brick.compression)
        fragment_brick.compress()

        fragments.append(fragment_brick)

    original_brick.compress()
    return fragments
Exemplo n.º 30
0
 def check_brick(brick):
     assert (brick.volume.shape == (brick.physical_box[1] - brick.physical_box[0])).all()
     assert (brick.volume == extract_subvol(volume, brick.physical_box)).all()
Exemplo n.º 31
0
def test_extract_halos():
    halo = 1
    grid = Grid( (10,20), (0,0), halo )
    bounding_box = np.array([(15,30), (95,290)])
    volume = np.random.randint(0,10, (100,300) )

    bricks, _num_bricks = generate_bricks_from_volume_source( bounding_box, grid, partial(extract_subvol, volume), DebugClient() )

    outer_halos = extract_halos(bricks, grid, 'outer').compute()
    inner_halos = extract_halos(bricks, grid, 'inner').compute()

    for halo_type, halo_bricks in zip(('outer', 'inner'), (outer_halos, inner_halos)):
        for hb in halo_bricks:
            # Even bricks on the edge of the volume
            # (which have smaller physical boxes than logical boxes)
            # return halos which correspond to the original
            # logical box (except for the halo axis).
            # (Each halo's "logical box" still corresponds to
            # the brick it was extracted from.)
            if halo_type == 'outer':
                assert (hb.physical_box[0] != hb.logical_box[0]).sum() == 1
                assert (hb.physical_box[1] != hb.logical_box[1]).sum() == 1
            else:
                assert (hb.physical_box != hb.logical_box).sum() == 1

            # The bounding box above is not grid aligned,
            # so blocks on the volume edge will only have partial data
            # (i.e. a smaller physical_box than logical_box)
            # However, halos are always produced to correspond to the logical_box size,
            # and zero-padded if necessary to achieve that size.
            # Therefore, only compare the actually valid portion of the halo here with the expected volume.
            # The other voxels should be zeros.
            valid_box = box_intersection(bounding_box, hb.physical_box)
            halo_vol = extract_subvol(hb.volume, valid_box - hb.physical_box[0])
            expected_vol = extract_subvol(volume, valid_box)
            assert (halo_vol == expected_vol).all()
            
            # Other voxels should be zero
            full_halo_vol = hb.volume.copy()
            overwrite_subvol(full_halo_vol, valid_box - hb.physical_box[0], 0)
            assert (full_halo_vol == 0).all()

    rows = []
    for hb in chain(outer_halos):
        rows.append([*hb.physical_box.flat, hb, 'outer'])

    for hb in chain(inner_halos):
        rows.append([*hb.physical_box.flat, hb, 'inner'])
    
    halo_df = pd.DataFrame(rows, columns=['y0', 'x0', 'y1', 'x1', 'brick', 'halo_type'])
    
    halo_counts = halo_df.groupby(['y0', 'x0', 'y1', 'x1']).size()

    # Since the bricks' physical boxes are all clipped to the overall bounding-box,
    # every outer halo should have a matching inner halo from a neighboring brick.
    # (This would not necessarily be true for Bricks that are initialized from a sparse mask.)
    assert halo_counts.min() == 2
    assert halo_counts.max() == 2
    
    for _box, halos_df in halo_df.groupby(['y0', 'x0', 'y1', 'x1']):
        assert set(halos_df['halo_type']) == set(['outer', 'inner'])

        brick0 = halos_df.iloc[0]['brick']
        brick1 = halos_df.iloc[1]['brick']
        assert (brick0.volume == brick1.volume).all()