Exemplo n.º 1
0
    def test_get_image_and_mask_manual_bounds(self):
        """Test get_image_and_mask_from_slide()."""
        if sys.version_info < (3, ):
            return
        # get specified region -- without providing scaled annotations
        roi_out_1 = get_image_and_mask_from_slide(mode='manual_bounds',
                                                  **cfg.get_kwargs)

        # get specified region -- with providing scaled annotations
        roi_out_2 = get_image_and_mask_from_slide(
            mode='manual_bounds',
            slide_annotations=copy.deepcopy(cfg.slide_annotations),
            element_infos=cfg.element_infos.copy(),
            **cfg.get_kwargs)

        for roi_out in (roi_out_1, roi_out_2):
            assert set(roi_out.keys()) == {
                'bounds', 'ROI', 'rgb', 'contours', 'visualization'
            }
            assert roi_out['ROI'].shape == (200, 250)
            assert roi_out['rgb'].shape == (200, 250, 3)
            assert roi_out['visualization'].shape == (200, 250, 3)
            assert len(roi_out['contours']) > 26 and (len(roi_out['contours'])
                                                      < 32)
            assert set(roi_out['contours'][0].keys()) == {
                'group', 'color', 'ymin', 'ymax', 'xmin', 'xmax', 'has_holes',
                'touches_edge-top', 'touches_edge-left', 'touches_edge-bottom',
                'touches_edge-right', 'coords_x', 'coords_y'
            }
Exemplo n.º 2
0
def get_annotation_mask_and_image(gc, item_id, mode, gt_codes, element_infos, magnification=None, groups_to_get=None,
                                  bounds=None):
    """Wrapper around HistomicsTK get_image_and_mask_from_slide(..) function. Allows retrieval of labeled masks of
    annotations from an image.

    """
    # mode must be "manual_bounds" or "wsi"
    if mode not in ('manual_bounds', 'wsi'):
        raise Exception('mode must be either manual bounds or wsi')

    # if magnification is not specified, get the scan magnification
    if magnification is None:
        magnification = gc.get(f'item/{magnification}/tiles')['magnification']

    # format key-word arguments
    get_roi_mask_kwargs = {'crop_to_roi': True}
    get_contours_kwargs = {'groups_to_get': groups_to_get, 'get_roi_contour': False,
                           'discard_nonenclosed_background': True, 'MIN_SIZE': 0}
    get_kwargs = {'gc': gc, 'slide_id': item_id, 'GTCodes_dict': gt_codes.T.to_dict(), 'MPP': None,
                  'MAG': magnification, 'get_contours_kwargs': get_contours_kwargs, 'bounds': bounds,
                  'element_infos': element_infos, 'get_roi_mask_kwargs': get_roi_mask_kwargs}

    # get label mask and RGB image
    output = get_image_and_mask_from_slide(mode=mode, **get_kwargs)
    rgb_im = output['rgb']
    label_im = output['ROI']
    return rgb_im, label_im
    def test_get_image_and_mask_minbbox(self):
        """Test get_image_and_mask_from_slide()."""
        if sys.version_info < (3, ):
            return
        # get ROI bounding everything
        roi_out = get_image_and_mask_from_slide(
            mode='min_bounding_box',
            slide_annotations=copy.deepcopy(cfg.slide_annotations),
            element_infos=cfg.element_infos.copy(), **cfg.get_kwargs)

        assert set(roi_out.keys()) == {
            'bounds', 'ROI', 'rgb', 'contours', 'visualization'}
        assert roi_out['ROI'].shape == (321, 351)
        assert roi_out['rgb'].shape == (321, 351, 3)
        assert roi_out['visualization'].shape == (321, 351, 3)
        assert len(roi_out['contours']) > 26 and (
            len(roi_out['contours']) < 32)
        assert set(roi_out['contours'][0].keys()) == {
            'group', 'color', 'ymin', 'ymax', 'xmin', 'xmax',
            'has_holes', 'touches_edge-top', 'touches_edge-left',
            'touches_edge-bottom', 'touches_edge-right', 'coords_x',
            'coords_y'
        }
Exemplo n.º 4
0
def grid_tiling(gc,
                item_id,
                group_names,
                save_dir,
                save_mag=None,
                mask_mag=1.25,
                tile_size=(224, 224),
                tissue_threshold=0.3,
                annotation_threshold=0.15,
                random_seed=64,
                is_test=False,
                oversample_background=2.0,
                reinhard_stats=None):
    """Split a DSA image item (WSI) into smaller images and save locally grouped by annotations. This approach grids the
    image into equal sized small images, or tiles (i.e. a grid is placed over the WSI starting at the top left corner).
    At the bottom and right edge of the WSI the tiles are ignored if not of correct size (the case where the WSI
    dimensions are not a multiple factor of the tile size). A list of annotation group names are needed to group the
    tiles into classes of images saved in their own directories. Tiles with no tissue detected are ignored and tiles not
    containing annotations (but have tissue) are by default saved into background class. A background annotation group
    will cause issues so avoid having this annotation group name.

    Tiles can be saved at a lower magnification than source image if needed (param: save_mag). Note that tiles size
    specified should be the tile size at the save magnification not the source magnification. Image saved will be of the
    tile size specified in parameters, regardless of the save_mag used.

    Parameters
    ----------
    gc : girder_client.GirderClient
        authenticated client
    item_id : str
        DSA image item id
    group_names : list
        list of annotation group names
    save_dir : str
        directory to create group directories with images - save_dir / group_name_1, save_dir / background, etc.
    save_mag : float (optional)
        magnification to use when saving the images, if None then source magnification will be used.
    mask_mag : float (optional)
        magnification to create binary mask of tissue and annotations. Note that if your annotations are very small
        it will benefit to use a larger value than default here, but binary masks will fail to create at very high
        magnifications.
    tile_size : tuple (optional)
        size (width, height) to save tiles at, note that this is the size it will be saved at regardless of the
        magnification used to save the images at (i.e. if save_mag is 4 times less than the source magnification than
        the actual tile_size will represent 4 times the pixels at full resolution).
    tissue_threshold : float (optional)
        from 0 to 1, percentage of tile that must contain tissue to be included
    annotation_threshold : float (optional)
        from 0 to 1, percentage of tile that must contain annotation (per group) to be labeled as annotation. Note
        that a single tile may be saved twice, representing multiple classes.
    random_seed : int (optional)
        random seed to use when shuffling the background regions
    is_test : bool (optional)
        if True then all the background regions will be saved, otherwise oversample_background will be used to determine
        how many background regions to save
    oversample_background : float (optional)
        factor to oversample background class images, compared to the number of images of the class of annoation images
        with the most images saved
    reinhard_stats : dict (optional)
        if not None then the images saved will be color augmented by color normalizing the tiles using the Reinhard
        color norm method. This dict should contain src_mu and scr_sigma keys with the stats for this image and
        target_mu and targe_sigma keys which are lists contain 1 or more target images to normalize to.

    """
    im_info = gc.get('item/{}/tiles'.format(item_id))
    if save_mag is None:
        save_mag = im_info['magnification']

    if reinhard_stats is not None:
        # get color stats for image
        mu, sigma = reinhard_color_stats(gc, item_id)

    # ----- prep work ----- #
    filename = splitext(gc.getItem(item_id)['name'])[0]

    # create dirs for each image class to save
    group_dirs = [join(save_dir, group_name) for group_name in group_names]
    for group_dir in group_dirs:
        makedirs(group_dir, exist_ok=True)
    background_dir = join(save_dir, 'background')
    makedirs(background_dir, exist_ok=True)

    # get image annotations
    annotations = gc.get('/annotation/item/' + item_id)

    # create a dataframe to use with annotation to mask handler functions (gt codes)
    gt_data = [[group_name, 1, i + 1, 0, 0, 'rgb(0, 0, {})'.format(i), '']
               for i, group_name in enumerate(group_names)]
    gt_codes = pd.DataFrame(columns=[
        'group', 'overlay_order', 'GT_code', 'is_roi', 'is_background_class',
        'color', 'comments'
    ],
                            data=gt_data,
                            index=range(len(group_names)))
    gt_codes.index = gt_codes.loc[:, 'group']

    # get binary masks - tissue mask and annotation(s) mask
    mask_mag_factor, _ = get_scale_factor_and_appendStr(gc=gc,
                                                        slide_id=item_id,
                                                        MAG=mask_mag)
    # - scaling the annotations to lower magnification
    mask_annotations = scale_slide_annotations(deepcopy(annotations),
                                               sf=mask_mag_factor)

    # - binary masks are for the whole image at low resolution, function returns also the RGB image which we use for
    # - getting the tissue mask
    mask_element_info = get_bboxes_from_slide_annotations(mask_annotations)
    get_kwargs = deepcopy(
        GET_KWARGS)  # avoid referencing on the global variable
    get_kwargs['gc'] = gc
    get_kwargs['slide_id'] = item_id
    get_kwargs['GTCodes_dict'] = gt_codes.T.to_dict()
    get_kwargs['bounds'] = None
    get_kwargs['MAG'] = mask_mag
    ann_mask_and_image = get_image_and_mask_from_slide(
        mode='wsi',
        slide_annotations=mask_annotations,
        element_infos=mask_element_info,
        **get_kwargs)
    tissue_mask = get_tissue_mask(ann_mask_and_image['rgb'])[0]

    # convert the annotations to lower magnification
    fr_to_lr_factor, _ = get_scale_factor_and_appendStr(gc=gc,
                                                        slide_id=item_id,
                                                        MAG=save_mag)
    annotations = scale_slide_annotations(annotations, sf=fr_to_lr_factor)
    lr_element_info = get_bboxes_from_slide_annotations(annotations)

    # get full resolution information for image
    fr_mag = im_info['magnification']
    fr_width = im_info['sizeX']
    fr_height = im_info['sizeY']
    fr_tile_size = int(tile_size[0] / fr_to_lr_factor), int(
        tile_size[1] / fr_to_lr_factor)  # (width, height)

    # change the get_kwargs to save magnification
    get_kwargs['MAG'] = save_mag

    # ----- loop through image at full res ----- #
    group_annotation_counts = [0] * len(group_names)
    background_regions = []
    for x in range(0, fr_width, fr_tile_size[0]):
        for y in range(0, fr_height, fr_tile_size[1]):
            # check that the tile won't go over the edge of image, if so skip
            if x + fr_tile_size[0] > fr_width or y + fr_tile_size[
                    1] > fr_height:
                continue

            # check tile for tissue, using the binary mask for tissue
            tissue_tile = tissue_mask[int(y * mask_mag /
                                          fr_mag):int((y + fr_tile_size[1]) *
                                                      mask_mag / fr_mag),
                                      int(x * mask_mag /
                                          fr_mag):int((x + fr_tile_size[0]) *
                                                      mask_mag / fr_mag)]

            # skip if tile does not contain enough tissue
            if np.count_nonzero(
                    tissue_tile) / tissue_tile.size < tissue_threshold:
                continue

            # check tile for annotations, using the binary mask for annotations
            annotation_tile = ann_mask_and_image['ROI'][
                int(y * mask_mag / fr_mag):int((y + fr_tile_size[1]) *
                                               mask_mag / fr_mag),
                int(x * mask_mag / fr_mag):int((x + fr_tile_size[0]) *
                                               mask_mag / fr_mag)]

            # tile is background if no annotation is present (of any group)
            background_flag = True
            # - check for each annotation group
            for i, group_name in enumerate(group_names):
                group_annotation_tile = annotation_tile == i + 1

                # tile is ignored if not enough contain annotation
                if np.count_nonzero(
                        group_annotation_tile
                ) / group_annotation_tile.size < annotation_threshold:
                    continue

                background_flag = False
                group_annotation_counts[i] += 1

                # get annotation image and save it
                get_kwargs['bounds'] = {
                    'XMIN': x,
                    'XMAX': x + fr_tile_size[0],
                    'YMIN': y,
                    'YMAX': y + fr_tile_size[1]
                }

                annotation_im = get_image_and_mask_from_slide(
                    mode='manual_bounds',
                    slide_annotations=annotations,
                    element_infos=lr_element_info,
                    **get_kwargs)['rgb']

                # save the image to correct directory
                imwrite(
                    join(group_dirs[i],
                         '{}_x_{}_y_{}.png'.format(filename, x, y)),
                    annotation_im)

                if reinhard_stats is not None:
                    # add color augmentation with Reinhard method
                    for j, (_, v) in enumerate(reinhard_stats.items()):
                        im_norm = reinhard(annotation_im.copy(),
                                           v['mu'],
                                           v['sigma'],
                                           src_mu=mu,
                                           src_sigma=sigma)
                        imwrite(
                            join(
                                group_dirs[i],
                                '{}_x_{}_y_{}_norm_{}.png'.format(
                                    filename, x, y, j)), im_norm)

            if background_flag:
                # save coordinates for non-glomeruli images candidates
                background_regions.append({
                    'magnification': save_mag,
                    'left': x,
                    'top': y,
                    'width': fr_tile_size[0],
                    'height': fr_tile_size[1]
                })

    # randomly select background class coordinates
    # - oversample the background class by a factor of the most represented annoation class
    Random(random_seed).shuffle(background_regions)
    if not is_test:
        background_regions = background_regions[:int(
            oversample_background * max(group_annotation_counts))]

    for region in background_regions:
        tile_im = get_region_im(gc, item_id, region)[:, :, :3]

        # save background image
        imwrite(
            join(
                background_dir,
                '{}_x_{}_y_{}.png'.format(filename, region['left'],
                                          region['top'])), tile_im)

        if reinhard_stats is not None:
            # add color augmentation with Reinhard method
            for j, (_, v) in enumerate(reinhard_stats.items()):
                im_norm = reinhard(tile_im.copy(),
                                   v['mu'],
                                   v['sigma'],
                                   src_mu=mu,
                                   src_sigma=sigma)
                imwrite(
                    join(
                        background_dir, '{}_x_{}_y_{}_norm_{}.png'.format(
                            filename, region['left'], region['top'], j)),
                    im_norm)